diff options
Diffstat (limited to 'net/sched')
| -rw-r--r-- | net/sched/Kconfig | 17 | ||||
| -rw-r--r-- | net/sched/Makefile | 3 | ||||
| -rw-r--r-- | net/sched/act_api.c | 18 | ||||
| -rw-r--r-- | net/sched/act_bpf.c | 4 | ||||
| -rw-r--r-- | net/sched/act_connmark.c | 1 | ||||
| -rw-r--r-- | net/sched/act_csum.c | 18 | ||||
| -rw-r--r-- | net/sched/act_ife.c | 5 | ||||
| -rw-r--r-- | net/sched/act_mirred.c | 57 | ||||
| -rw-r--r-- | net/sched/act_pedit.c | 3 | ||||
| -rw-r--r-- | net/sched/act_sample.c | 4 | ||||
| -rw-r--r-- | net/sched/act_skbedit.c | 10 | ||||
| -rw-r--r-- | net/sched/act_skbmod.c | 21 | ||||
| -rw-r--r-- | net/sched/act_tunnel_key.c | 32 | ||||
| -rw-r--r-- | net/sched/act_vlan.c | 19 | ||||
| -rw-r--r-- | net/sched/cls_api.c | 614 | ||||
| -rw-r--r-- | net/sched/cls_basic.c | 1 | ||||
| -rw-r--r-- | net/sched/cls_bpf.c | 4 | ||||
| -rw-r--r-- | net/sched/cls_flower.c | 317 | ||||
| -rw-r--r-- | net/sched/sch_cake.c | 15 | ||||
| -rw-r--r-- | net/sched/sch_cbs.c | 134 | ||||
| -rw-r--r-- | net/sched/sch_fq_codel.c | 25 | ||||
| -rw-r--r-- | net/sched/sch_skbprio.c | 320 |
22 files changed, 1362 insertions, 280 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 7af246764a35..e95741388311 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -1,6 +1,6 @@ # # Traffic control configuration. -# +# menuconfig NET_SCHED bool "QoS and/or fair queueing" @@ -251,6 +251,19 @@ config NET_SCH_MQPRIO If unsure, say N. +config NET_SCH_SKBPRIO + tristate "SKB priority queue scheduler (SKBPRIO)" + help + Say Y here if you want to use the SKB priority queue + scheduler. This schedules packets according to skb->priority, + which is useful for request packets in DoS mitigation systems such + as Gatekeeper. + + To compile this driver as a module, choose M here: the module will + be called sch_skbprio. + + If unsure, say N. + config NET_SCH_CHOKE tristate "CHOose and Keep responsive flow scheduler (CHOKE)" help @@ -706,7 +719,7 @@ config NET_CLS_ACT config NET_ACT_POLICE tristate "Traffic Policing" - depends on NET_CLS_ACT + depends on NET_CLS_ACT ---help--- Say Y here if you want to do traffic policing, i.e. strict bandwidth limiting. This action replaces the existing policing diff --git a/net/sched/Makefile b/net/sched/Makefile index 673ee7d26ff2..f0403f49edcb 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -33,7 +33,7 @@ obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o obj-$(CONFIG_NET_SCH_RED) += sch_red.o obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o -obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o +obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o @@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o +obj-$(CONFIG_NET_SCH_SKBPRIO) += sch_skbprio.o obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 148a89ab789b..229d63c99be2 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -36,7 +36,7 @@ static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp) if (!tp) return -EINVAL; - a->goto_chain = tcf_chain_get(tp->chain->block, chain_index, true); + a->goto_chain = tcf_chain_get_by_act(tp->chain->block, chain_index); if (!a->goto_chain) return -ENOMEM; return 0; @@ -44,7 +44,7 @@ static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp) static void tcf_action_goto_chain_fini(struct tc_action *a) { - tcf_chain_put(a->goto_chain); + tcf_chain_put_by_act(a->goto_chain); } static void tcf_action_goto_chain_exec(const struct tc_action *a, @@ -786,6 +786,15 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) return c; } +static bool tcf_action_valid(int action) +{ + int opcode = TC_ACT_EXT_OPCODE(action); + + if (!opcode) + return action <= TC_ACT_VALUE_MAX; + return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC; +} + struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, @@ -895,6 +904,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, } } + if (!tcf_action_valid(a->tcfa_action)) { + NL_SET_ERR_MSG(extack, "invalid action value, using TC_ACT_UNSPEC instead"); + a->tcfa_action = TC_ACT_UNSPEC; + } + return a; err_mod: diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 06f743d8ed41..6203eb075c9a 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -196,12 +196,10 @@ static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS])) return -EINVAL; - bpf_ops = kzalloc(bpf_size, GFP_KERNEL); + bpf_ops = kmemdup(nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size, GFP_KERNEL); if (bpf_ops == NULL) return -ENOMEM; - memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size); - fprog_tmp.len = bpf_num_ops; fprog_tmp.filter = bpf_ops; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 1e31f0e448e2..2f9bc833d046 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -252,4 +252,3 @@ module_exit(connmark_cleanup_module); MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>"); MODULE_DESCRIPTION("Connection tracking mark restoring"); MODULE_LICENSE("GPL"); - diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index bd232d3bd022..648a3a35b720 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -97,7 +97,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, } params_old = rtnl_dereference(p->params); - params_new->action = parm->action; + p->tcf_action = parm->action; params_new->update_flags = parm->update_flags; rcu_assign_pointer(p->params, params_new); if (params_old) @@ -561,15 +561,14 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, u32 update_flags; int action; - rcu_read_lock(); - params = rcu_dereference(p->params); + params = rcu_dereference_bh(p->params); tcf_lastuse_update(&p->tcf_tm); bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb); - action = params->action; + action = READ_ONCE(p->tcf_action); if (unlikely(action == TC_ACT_SHOT)) - goto drop_stats; + goto drop; update_flags = params->update_flags; switch (tc_skb_protocol(skb)) { @@ -583,16 +582,11 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, break; } -unlock: - rcu_read_unlock(); return action; drop: - action = TC_ACT_SHOT; - -drop_stats: qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats)); - goto unlock; + return TC_ACT_SHOT; } static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, @@ -605,11 +599,11 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, .index = p->tcf_index, .refcnt = refcount_read(&p->tcf_refcnt) - ref, .bindcnt = atomic_read(&p->tcf_bindcnt) - bind, + .action = p->tcf_action, }; struct tcf_t t; params = rtnl_dereference(p->params); - opt.action = params->action; opt.update_flags = params->update_flags; if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt)) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 3d6e265758c0..df4060e32d43 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -820,14 +820,11 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_ife_params *p; int ret; - rcu_read_lock(); - p = rcu_dereference(ife->params); + p = rcu_dereference_bh(ife->params); if (p->flags & IFE_ENCODE) { ret = tcf_ife_encode(skb, a, res, p); - rcu_read_unlock(); return ret; } - rcu_read_unlock(); return tcf_ife_decode(skb, a, res); } diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6afd89a36c69..b26d060da08e 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -25,6 +25,7 @@ #include <net/net_namespace.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <linux/tc_act/tc_mirred.h> #include <net/tc_act/tc_mirred.h> @@ -49,6 +50,18 @@ static bool tcf_mirred_act_wants_ingress(int action) } } +static bool tcf_mirred_can_reinsert(int action) +{ + switch (action) { + case TC_ACT_SHOT: + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + case TC_ACT_TRAP: + return true; + } + return false; +} + static void tcf_mirred_release(struct tc_action *a) { struct tcf_mirred *m = to_mirred(a); @@ -171,21 +184,23 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_mirred *m = to_mirred(a); + struct sk_buff *skb2 = skb; bool m_mac_header_xmit; struct net_device *dev; - struct sk_buff *skb2; int retval, err = 0; + bool use_reinsert; + bool want_ingress; + bool is_redirect; int m_eaction; int mac_len; tcf_lastuse_update(&m->tcf_tm); bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); - rcu_read_lock(); m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); m_eaction = READ_ONCE(m->tcfm_eaction); retval = READ_ONCE(m->tcf_action); - dev = rcu_dereference(m->tcfm_dev); + dev = rcu_dereference_bh(m->tcfm_dev); if (unlikely(!dev)) { pr_notice_once("tc mirred: target device is gone\n"); goto out; @@ -197,16 +212,25 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, goto out; } - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) - goto out; + /* we could easily avoid the clone only if called by ingress and clsact; + * since we can't easily detect the clsact caller, skip clone only for + * ingress - that covers the TC S/W datapath. + */ + is_redirect = tcf_mirred_is_act_redirect(m_eaction); + use_reinsert = skb_at_tc_ingress(skb) && is_redirect && + tcf_mirred_can_reinsert(retval); + if (!use_reinsert) { + skb2 = skb_clone(skb, GFP_ATOMIC); + if (!skb2) + goto out; + } /* If action's target direction differs than filter's direction, * and devices expect a mac header on xmit, then mac push/pull is * needed. */ - if (skb_at_tc_ingress(skb) != tcf_mirred_act_wants_ingress(m_eaction) && - m_mac_header_xmit) { + want_ingress = tcf_mirred_act_wants_ingress(m_eaction); + if (skb_at_tc_ingress(skb) != want_ingress && m_mac_header_xmit) { if (!skb_at_tc_ingress(skb)) { /* caught at egress, act ingress: pull mac */ mac_len = skb_network_header(skb) - skb_mac_header(skb); @@ -217,15 +241,23 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, } } + skb2->skb_iif = skb->dev->ifindex; + skb2->dev = dev; + /* mirror is always swallowed */ - if (tcf_mirred_is_act_redirect(m_eaction)) { + if (is_redirect) { skb2->tc_redirected = 1; skb2->tc_from_ingress = skb2->tc_at_ingress; + + /* let's the caller reinsert the packet, if possible */ + if (use_reinsert) { + res->ingress = want_ingress; + res->qstats = this_cpu_ptr(m->common.cpu_qstats); + return TC_ACT_REINSERT; + } } - skb2->skb_iif = skb->dev->ifindex; - skb2->dev = dev; - if (!tcf_mirred_act_wants_ingress(m_eaction)) + if (!want_ingress) err = dev_queue_xmit(skb2); else err = netif_receive_skb(skb2); @@ -236,7 +268,6 @@ out: if (tcf_mirred_is_act_redirect(m_eaction)) retval = TC_ACT_SHOT; } - rcu_read_unlock(); return retval; } diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index cc8ffcd1ddb5..43ba999b2d23 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -286,7 +286,7 @@ static int pedit_skb_hdr_offset(struct sk_buff *skb, default: ret = -EINVAL; break; - }; + } return ret; } @@ -516,4 +516,3 @@ static void __exit pedit_cleanup_module(void) module_init(pedit_init_module); module_exit(pedit_cleanup_module); - diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 3079e7be5bde..2608ccc83e5e 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -140,8 +140,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb); retval = READ_ONCE(s->tcf_action); - rcu_read_lock(); - psample_group = rcu_dereference(s->psample_group); + psample_group = rcu_dereference_bh(s->psample_group); /* randomly sample packets according to rate */ if (psample_group && (prandom_u32() % s->rate == 0)) { @@ -165,7 +164,6 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, skb_pull(skb, skb->mac_len); } - rcu_read_unlock(); return retval; } diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index da56e6938c9e..a6db47ebec11 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -43,8 +43,7 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, tcf_lastuse_update(&d->tcf_tm); bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); - rcu_read_lock(); - params = rcu_dereference(d->params); + params = rcu_dereference_bh(d->params); action = READ_ONCE(d->tcf_action); if (params->flags & SKBEDIT_F_PRIORITY) @@ -77,14 +76,11 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, } if (params->flags & SKBEDIT_F_PTYPE) skb->pkt_type = params->ptype; - -unlock: - rcu_read_unlock(); return action; + err: qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats)); - action = TC_ACT_SHOT; - goto unlock; + return TC_ACT_SHOT; } static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index cdc6bacfb190..c437c6d51a71 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -41,20 +41,14 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, * then MAX_EDIT_LEN needs to change appropriately */ err = skb_ensure_writable(skb, MAX_EDIT_LEN); - if (unlikely(err)) { /* best policy is to drop on the floor */ - qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); - return TC_ACT_SHOT; - } + if (unlikely(err)) /* best policy is to drop on the floor */ + goto drop; - rcu_read_lock(); action = READ_ONCE(d->tcf_action); - if (unlikely(action == TC_ACT_SHOT)) { - qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); - rcu_read_unlock(); - return action; - } + if (unlikely(action == TC_ACT_SHOT)) + goto drop; - p = rcu_dereference(d->skbmod_p); + p = rcu_dereference_bh(d->skbmod_p); flags = p->flags; if (flags & SKBMOD_F_DMAC) ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst); @@ -62,7 +56,6 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src); if (flags & SKBMOD_F_ETYPE) eth_hdr(skb)->h_proto = p->eth_type; - rcu_read_unlock(); if (flags & SKBMOD_F_SWAPMAC) { u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */ @@ -73,6 +66,10 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, } return action; + +drop: + qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); + return TC_ACT_SHOT; } static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 3ec585d58762..d42d9e112789 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -31,13 +31,11 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_tunnel_key_params *params; int action; - rcu_read_lock(); - - params = rcu_dereference(t->params); + params = rcu_dereference_bh(t->params); tcf_lastuse_update(&t->tcf_tm); bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb); - action = params->action; + action = READ_ONCE(t->tcf_action); switch (params->tcft_action) { case TCA_TUNNEL_KEY_ACT_RELEASE: @@ -53,8 +51,6 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, break; } - rcu_read_unlock(); - return action; } @@ -197,6 +193,8 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16}, [TCA_TUNNEL_KEY_NO_CSUM] = { .type = NLA_U8 }, [TCA_TUNNEL_KEY_ENC_OPTS] = { .type = NLA_NESTED }, + [TCA_TUNNEL_KEY_ENC_TOS] = { .type = NLA_U8 }, + [TCA_TUNNEL_KEY_ENC_TTL] = { .type = NLA_U8 }, }; static int tunnel_key_init(struct net *net, struct nlattr *nla, @@ -216,6 +214,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, int opts_len = 0; __be64 key_id; __be16 flags; + u8 tos, ttl; int ret = 0; int err; @@ -273,6 +272,13 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, } } + tos = 0; + if (tb[TCA_TUNNEL_KEY_ENC_TOS]) + tos = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TOS]); + ttl = 0; + if (tb[TCA_TUNNEL_KEY_ENC_TTL]) + ttl = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TTL]); + if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] && tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) { __be32 saddr; @@ -281,7 +287,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]); daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); - metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, + metadata = __ip_tun_set_dst(saddr, daddr, tos, ttl, dst_port, flags, key_id, opts_len); } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && @@ -292,7 +298,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]); daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); - metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, dst_port, + metadata = __ipv6_tun_set_dst(&saddr, &daddr, tos, ttl, dst_port, 0, flags, key_id, 0); } else { @@ -350,7 +356,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, params_old = rtnl_dereference(t->params); - params_new->action = parm->action; + t->tcf_action = parm->action; params_new->tcft_action = parm->t_action; params_new->tcft_enc_metadata = metadata; @@ -479,13 +485,13 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, .index = t->tcf_index, .refcnt = refcount_read(&t->tcf_refcnt) - ref, .bindcnt = atomic_read(&t->tcf_bindcnt) - bind, + .action = t->tcf_action, }; struct tcf_t tm; params = rtnl_dereference(t->params); opt.t_action = params->tcft_action; - opt.action = params->action; if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -504,6 +510,12 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, !(key->tun_flags & TUNNEL_CSUM)) || tunnel_key_opts_dump(skb, info)) goto nla_put_failure; + + if (key->tos && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TOS, key->tos)) + goto nla_put_failure; + + if (key->ttl && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TTL, key->ttl)) + goto nla_put_failure; } tcf_tm_dump(&tm, &t->tcf_tm); diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index ad37f308175a..15a0ee214c9c 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -40,11 +40,9 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, if (skb_at_tc_ingress(skb)) skb_push_rcsum(skb, skb->mac_len); - rcu_read_lock(); - action = READ_ONCE(v->tcf_action); - p = rcu_dereference(v->vlan_p); + p = rcu_dereference_bh(v->vlan_p); switch (p->tcfv_action) { case TCA_VLAN_ACT_POP: @@ -61,7 +59,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, case TCA_VLAN_ACT_MODIFY: /* No-op if no vlan tag (either hw-accel or in-payload) */ if (!skb_vlan_tagged(skb)) - goto unlock; + goto out; /* extract existing tag (and guarantee no hw-accel tag) */ if (skb_vlan_tag_present(skb)) { tci = skb_vlan_tag_get(skb); @@ -86,18 +84,15 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, BUG(); } - goto unlock; - -drop: - action = TC_ACT_SHOT; - qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats)); - -unlock: - rcu_read_unlock(); +out: if (skb_at_tc_ingress(skb)) skb_pull_rcsum(skb, skb->mac_len); return action; + +drop: + qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats)); + return TC_ACT_SHOT; } static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 73d9967c3739..194c2e0b2737 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -39,7 +39,7 @@ static DEFINE_RWLOCK(cls_mod_lock); /* Find classifier type by string name */ -static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind) +static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind) { const struct tcf_proto_ops *t, *res = NULL; @@ -57,6 +57,33 @@ static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind) return res; } +static const struct tcf_proto_ops * +tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack) +{ + const struct tcf_proto_ops *ops; + + ops = __tcf_proto_lookup_ops(kind); + if (ops) + return ops; +#ifdef CONFIG_MODULES + rtnl_unlock(); + request_module("cls_%s", kind); + rtnl_lock(); + ops = __tcf_proto_lookup_ops(kind); + /* We dropped the RTNL semaphore in order to perform + * the module load. So, even if we succeeded in loading + * the module we have to replay the request. We indicate + * this using -EAGAIN. + */ + if (ops) { + module_put(ops->owner); + return ERR_PTR(-EAGAIN); + } +#endif + NL_SET_ERR_MSG(extack, "TC classifier not found"); + return ERR_PTR(-ENOENT); +} + /* Register(unregister) new classifier type */ int register_tcf_proto_ops(struct tcf_proto_ops *ops) @@ -133,27 +160,9 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, if (!tp) return ERR_PTR(-ENOBUFS); - err = -ENOENT; - tp->ops = tcf_proto_lookup_ops(kind); - if (!tp->ops) { -#ifdef CONFIG_MODULES - rtnl_unlock(); - request_module("cls_%s", kind); - rtnl_lock(); - tp->ops = tcf_proto_lookup_ops(kind); - /* We dropped the RTNL semaphore in order to perform - * the module load. So, even if we succeeded in loading - * the module we have to replay the request. We indicate - * this using -EAGAIN. - */ - if (tp->ops) { - module_put(tp->ops->owner); - err = -EAGAIN; - } else { - NL_SET_ERR_MSG(extack, "TC classifier not found"); - err = -ENOENT; - } -#endif + tp->ops = tcf_proto_lookup_ops(kind, extack); + if (IS_ERR(tp->ops)) { + err = PTR_ERR(tp->ops); goto errout; } tp->classify = tp->ops->classify; @@ -195,11 +204,12 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (!chain) return NULL; - INIT_LIST_HEAD(&chain->filter_chain_list); list_add_tail(&chain->list, &block->chain_list); chain->block = block; chain->index = chain_index; chain->refcnt = 1; + if (!chain->index) + block->chain0.chain = chain; return chain; } @@ -209,35 +219,28 @@ static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item, if (item->chain_head_change) item->chain_head_change(tp_head, item->chain_head_change_priv); } -static void tcf_chain_head_change(struct tcf_chain *chain, - struct tcf_proto *tp_head) + +static void tcf_chain0_head_change(struct tcf_chain *chain, + struct tcf_proto *tp_head) { struct tcf_filter_chain_list_item *item; + struct tcf_block *block = chain->block; - list_for_each_entry(item, &chain->filter_chain_list, list) + if (chain->index) + return; + list_for_each_entry(item, &block->chain0.filter_chain_list, list) tcf_chain_head_change_item(item, tp_head); } -static void tcf_chain_flush(struct tcf_chain *chain) -{ - struct tcf_proto *tp = rtnl_dereference(chain->filter_chain); - - tcf_chain_head_change(chain, NULL); - while (tp) { - RCU_INIT_POINTER(chain->filter_chain, tp->next); - tcf_proto_destroy(tp, NULL); - tp = rtnl_dereference(chain->filter_chain); - tcf_chain_put(chain); - } -} - static void tcf_chain_destroy(struct tcf_chain *chain) { struct tcf_block *block = chain->block; list_del(&chain->list); + if (!chain->index) + block->chain0.chain = NULL; kfree(chain); - if (list_empty(&block->chain_list)) + if (list_empty(&block->chain_list) && block->refcnt == 0) kfree(block); } @@ -246,28 +249,119 @@ static void tcf_chain_hold(struct tcf_chain *chain) ++chain->refcnt; } -struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, - bool create) +static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain) +{ + /* In case all the references are action references, this + * chain should not be shown to the user. + */ + return chain->refcnt == chain->action_refcnt; +} + +static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block, + u32 chain_index) { struct tcf_chain *chain; list_for_each_entry(chain, &block->chain_list, list) { - if (chain->index == chain_index) { - tcf_chain_hold(chain); + if (chain->index == chain_index) return chain; - } } + return NULL; +} - return create ? tcf_chain_create(block, chain_index) : NULL; +static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, + u32 seq, u16 flags, int event, bool unicast); + +static struct tcf_chain *__tcf_chain_get(struct tcf_block *block, + u32 chain_index, bool create, + bool by_act) +{ + struct tcf_chain *chain = tcf_chain_lookup(block, chain_index); + + if (chain) { + tcf_chain_hold(chain); + } else { + if (!create) + return NULL; + chain = tcf_chain_create(block, chain_index); + if (!chain) + return NULL; + } + + if (by_act) + ++chain->action_refcnt; + + /* Send notification only in case we got the first + * non-action reference. Until then, the chain acts only as + * a placeholder for actions pointing to it and user ought + * not know about them. + */ + if (chain->refcnt - chain->action_refcnt == 1 && !by_act) + tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, + RTM_NEWCHAIN, false); + + return chain; +} + +static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, + bool create) +{ + return __tcf_chain_get(block, chain_index, create, false); } -EXPORT_SYMBOL(tcf_chain_get); -void tcf_chain_put(struct tcf_chain *chain) +struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index) +{ + return __tcf_chain_get(block, chain_index, true, true); +} +EXPORT_SYMBOL(tcf_chain_get_by_act); + +static void tc_chain_tmplt_del(struct tcf_chain *chain); + +static void __tcf_chain_put(struct tcf_chain *chain, bool by_act) { - if (--chain->refcnt == 0) + if (by_act) + chain->action_refcnt--; + chain->refcnt--; + + /* The last dropped non-action reference will trigger notification. */ + if (chain->refcnt - chain->action_refcnt == 0 && !by_act) + tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false); + + if (chain->refcnt == 0) { + tc_chain_tmplt_del(chain); tcf_chain_destroy(chain); + } +} + +static void tcf_chain_put(struct tcf_chain *chain) +{ + __tcf_chain_put(chain, false); +} + +void tcf_chain_put_by_act(struct tcf_chain *chain) +{ + __tcf_chain_put(chain, true); +} +EXPORT_SYMBOL(tcf_chain_put_by_act); + +static void tcf_chain_put_explicitly_created(struct tcf_chain *chain) +{ + if (chain->explicitly_created) + tcf_chain_put(chain); +} + +static void tcf_chain_flush(struct tcf_chain *chain) +{ + struct tcf_proto *tp = rtnl_dereference(chain->filter_chain); + + tcf_chain0_head_change(chain, NULL); + while (tp) { + RCU_INIT_POINTER(chain->filter_chain, tp->next); + tcf_proto_destroy(tp, NULL); + tp = rtnl_dereference(chain->filter_chain); + tcf_chain_put(chain); + } } -EXPORT_SYMBOL(tcf_chain_put); static bool tcf_block_offload_in_use(struct tcf_block *block) { @@ -337,10 +431,11 @@ no_offload_dev_dec: } static int -tcf_chain_head_change_cb_add(struct tcf_chain *chain, - struct tcf_block_ext_info *ei, - struct netlink_ext_ack *extack) +tcf_chain0_head_change_cb_add(struct tcf_block *block, + struct tcf_block_ext_info *ei, + struct netlink_ext_ack *extack) { + struct tcf_chain *chain0 = block->chain0.chain; struct tcf_filter_chain_list_item *item; item = kmalloc(sizeof(*item), GFP_KERNEL); @@ -350,23 +445,25 @@ tcf_chain_head_change_cb_add(struct tcf_chain *chain, } item->chain_head_change = ei->chain_head_change; item->chain_head_change_priv = ei->chain_head_change_priv; - if (chain->filter_chain) - tcf_chain_head_change_item(item, chain->filter_chain); - list_add(&item->list, &chain->filter_chain_list); + if (chain0 && chain0->filter_chain) + tcf_chain_head_change_item(item, chain0->filter_chain); + list_add(&item->list, &block->chain0.filter_chain_list); return 0; } static void -tcf_chain_head_change_cb_del(struct tcf_chain *chain, - struct tcf_block_ext_info *ei) +tcf_chain0_head_change_cb_del(struct tcf_block *block, + struct tcf_block_ext_info *ei) { + struct tcf_chain *chain0 = block->chain0.chain; struct tcf_filter_chain_list_item *item; - list_for_each_entry(item, &chain->filter_chain_list, list) { + list_for_each_entry(item, &block->chain0.filter_chain_list, list) { if ((!ei->chain_head_change && !ei->chain_head_change_priv) || (item->chain_head_change == ei->chain_head_change && item->chain_head_change_priv == ei->chain_head_change_priv)) { - tcf_chain_head_change_item(item, NULL); + if (chain0) + tcf_chain_head_change_item(item, NULL); list_del(&item->list); kfree(item); return; @@ -402,8 +499,6 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, struct netlink_ext_ack *extack) { struct tcf_block *block; - struct tcf_chain *chain; - int err; block = kzalloc(sizeof(*block), GFP_KERNEL); if (!block) { @@ -413,14 +508,8 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, INIT_LIST_HEAD(&block->chain_list); INIT_LIST_HEAD(&block->cb_list); INIT_LIST_HEAD(&block->owner_list); + INIT_LIST_HEAD(&block->chain0.filter_chain_list); - /* Create chain 0 by default, it has to be always present. */ - chain = tcf_chain_create(block, 0); - if (!chain) { - NL_SET_ERR_MSG(extack, "Failed to create new tcf chain"); - err = -ENOMEM; - goto err_chain_create; - } block->refcnt = 1; block->net = net; block->index = block_index; @@ -429,10 +518,6 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, if (!tcf_block_shared(block)) block->q = q; return block; - -err_chain_create: - kfree(block); - return ERR_PTR(err); } static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index) @@ -514,11 +599,6 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, return block; } -static struct tcf_chain *tcf_block_chain_zero(struct tcf_block *block) -{ - return list_first_entry(&block->chain_list, struct tcf_chain, list); -} - struct tcf_block_owner_item { struct list_head list; struct Qdisc *q; @@ -612,10 +692,9 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, tcf_block_owner_netif_keep_dst(block, q, ei->binder_type); - err = tcf_chain_head_change_cb_add(tcf_block_chain_zero(block), - ei, extack); + err = tcf_chain0_head_change_cb_add(block, ei, extack); if (err) - goto err_chain_head_change_cb_add; + goto err_chain0_head_change_cb_add; err = tcf_block_offload_bind(block, q, ei, extack); if (err) @@ -625,15 +704,14 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, return 0; err_block_offload_bind: - tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei); -err_chain_head_change_cb_add: + tcf_chain0_head_change_cb_del(block, ei); +err_chain0_head_change_cb_add: tcf_block_owner_del(block, q, ei->binder_type); err_block_owner_add: if (created) { if (tcf_block_shared(block)) tcf_block_remove(block, net); err_block_insert: - kfree(tcf_block_chain_zero(block)); kfree(block); } else { block->refcnt--; @@ -673,10 +751,10 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, if (!block) return; - tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei); + tcf_chain0_head_change_cb_del(block, ei); tcf_block_owner_del(block, q, ei->binder_type); - if (--block->refcnt == 0) { + if (block->refcnt == 1) { if (tcf_block_shared(block)) tcf_block_remove(block, block->net); @@ -692,13 +770,16 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, tcf_block_offload_unbind(block, q, ei); - if (block->refcnt == 0) { + if (block->refcnt == 1) { /* At this point, all the chains should have refcnt >= 1. */ - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) + list_for_each_entry_safe(chain, tmp, &block->chain_list, list) { + tcf_chain_put_explicitly_created(chain); tcf_chain_put(chain); + } - /* Finally, put chain 0 and allow block to be freed. */ - tcf_chain_put(tcf_block_chain_zero(block)); + block->refcnt--; + if (list_empty(&block->chain_list)) + kfree(block); } } EXPORT_SYMBOL(tcf_block_put_ext); @@ -818,7 +899,7 @@ int tcf_block_cb_register(struct tcf_block *block, block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv, extack); - return IS_ERR(block_cb) ? PTR_ERR(block_cb) : 0; + return PTR_ERR_OR_ZERO(block_cb); } EXPORT_SYMBOL(tcf_block_cb_register); @@ -938,7 +1019,7 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain, struct tcf_proto *tp) { if (*chain_info->pprev == chain->filter_chain) - tcf_chain_head_change(chain, tp); + tcf_chain0_head_change(chain, tp); RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); tcf_chain_hold(chain); @@ -951,7 +1032,7 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain, struct tcf_proto *next = rtnl_dereference(chain_info->next); if (tp == chain->filter_chain) - tcf_chain_head_change(chain, next); + tcf_chain0_head_change(chain, next); RCU_INIT_POINTER(*chain_info->pprev, next); tcf_chain_put(chain); } @@ -1098,7 +1179,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, for (tp = rtnl_dereference(chain->filter_chain); tp; tp = rtnl_dereference(tp->next)) tfilter_notify(net, oskb, n, tp, block, - q, parent, 0, event, false); + q, parent, NULL, event, false); } static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, @@ -1227,6 +1308,12 @@ replay: goto errout; } + if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) { + NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind"); + err = -EINVAL; + goto errout; + } + err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE, extack); @@ -1302,6 +1389,13 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, } chain = tcf_chain_get(block, chain_index, false); if (!chain) { + /* User requested flush on non-existent chain. Nothing to do, + * so just return success. + */ + if (prio == 0) { + err = 0; + goto errout; + } NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); err = -EINVAL; goto errout; @@ -1489,7 +1583,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); if (cb->args[1] == 0) { - if (tcf_fill_node(net, skb, tp, block, q, parent, 0, + if (tcf_fill_node(net, skb, tp, block, q, parent, NULL, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) @@ -1508,7 +1602,9 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, arg.w.stop = 0; arg.w.skip = cb->args[1] - 1; arg.w.count = 0; + arg.w.cookie = cb->args[2]; tp->ops->walk(tp, &arg.w); + cb->args[2] = arg.w.cookie; cb->args[1] = arg.w.count + 1; if (arg.w.stop) return false; @@ -1606,6 +1702,330 @@ out: return skb->len; } +static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net, + struct sk_buff *skb, struct tcf_block *block, + u32 portid, u32 seq, u16 flags, int event) +{ + unsigned char *b = skb_tail_pointer(skb); + const struct tcf_proto_ops *ops; + struct nlmsghdr *nlh; + struct tcmsg *tcm; + void *priv; + + ops = chain->tmplt_ops; + priv = chain->tmplt_priv; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); + if (!nlh) + goto out_nlmsg_trim; + tcm = nlmsg_data(nlh); + tcm->tcm_family = AF_UNSPEC; + tcm->tcm__pad1 = 0; + tcm->tcm__pad2 = 0; + tcm->tcm_handle = 0; + if (block->q) { + tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex; + tcm->tcm_parent = block->q->handle; + } else { + tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; + tcm->tcm_block_index = block->index; + } + + if (nla_put_u32(skb, TCA_CHAIN, chain->index)) + goto nla_put_failure; + + if (ops) { + if (nla_put_string(skb, TCA_KIND, ops->kind)) + goto nla_put_failure; + if (ops->tmplt_dump(skb, net, priv) < 0) + goto nla_put_failure; + } + + nlh->nlmsg_len = skb_tail_pointer(skb) - b; + return skb->len; + +out_nlmsg_trim: +nla_put_failure: + nlmsg_trim(skb, b); + return -EMSGSIZE; +} + +static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, + u32 seq, u16 flags, int event, bool unicast) +{ + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + struct tcf_block *block = chain->block; + struct net *net = block->net; + struct sk_buff *skb; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tc_chain_fill_node(chain, net, skb, block, portid, + seq, flags, event) <= 0) { + kfree_skb(skb); + return -EINVAL; + } + + if (unicast) + return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); +} + +static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, + struct nlattr **tca, + struct netlink_ext_ack *extack) +{ + const struct tcf_proto_ops *ops; + void *tmplt_priv; + + /* If kind is not set, user did not specify template. */ + if (!tca[TCA_KIND]) + return 0; + + ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack); + if (IS_ERR(ops)) + return PTR_ERR(ops); + if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) { + NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier"); + return -EOPNOTSUPP; + } + + tmplt_priv = ops->tmplt_create(net, chain, tca, extack); + if (IS_ERR(tmplt_priv)) { + module_put(ops->owner); + return PTR_ERR(tmplt_priv); + } + chain->tmplt_ops = ops; + chain->tmplt_priv = tmplt_priv; + return 0; +} + +static void tc_chain_tmplt_del(struct tcf_chain *chain) +{ + const struct tcf_proto_ops *ops = chain->tmplt_ops; + + /* If template ops are set, no work to do for us. */ + if (!ops) + return; + + ops->tmplt_destroy(chain->tmplt_priv); + module_put(ops->owner); +} + +/* Add/delete/get a chain */ + +static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tca[TCA_MAX + 1]; + struct tcmsg *t; + u32 parent; + u32 chain_index; + struct Qdisc *q = NULL; + struct tcf_chain *chain = NULL; + struct tcf_block *block; + unsigned long cl; + int err; + + if (n->nlmsg_type != RTM_GETCHAIN && + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + +replay: + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); + if (err < 0) + return err; + + t = nlmsg_data(n); + parent = t->tcm_parent; + cl = 0; + + block = tcf_block_find(net, &q, &parent, &cl, + t->tcm_ifindex, t->tcm_block_index, extack); + if (IS_ERR(block)) + return PTR_ERR(block); + + chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; + if (chain_index > TC_ACT_EXT_VAL_MASK) { + NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); + return -EINVAL; + } + chain = tcf_chain_lookup(block, chain_index); + if (n->nlmsg_type == RTM_NEWCHAIN) { + if (chain) { + if (tcf_chain_held_by_acts_only(chain)) { + /* The chain exists only because there is + * some action referencing it. + */ + tcf_chain_hold(chain); + } else { + NL_SET_ERR_MSG(extack, "Filter chain already exists"); + return -EEXIST; + } + } else { + if (!(n->nlmsg_flags & NLM_F_CREATE)) { + NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain"); + return -ENOENT; + } + chain = tcf_chain_create(block, chain_index); + if (!chain) { + NL_SET_ERR_MSG(extack, "Failed to create filter chain"); + return -ENOMEM; + } + } + } else { + if (!chain || tcf_chain_held_by_acts_only(chain)) { + NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); + return -EINVAL; + } + tcf_chain_hold(chain); + } + + switch (n->nlmsg_type) { + case RTM_NEWCHAIN: + err = tc_chain_tmplt_add(chain, net, tca, extack); + if (err) + goto errout; + /* In case the chain was successfully added, take a reference + * to the chain. This ensures that an empty chain + * does not disappear at the end of this function. + */ + tcf_chain_hold(chain); + chain->explicitly_created = true; + tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, + RTM_NEWCHAIN, false); + break; + case RTM_DELCHAIN: + /* Flush the chain first as the user requested chain removal. */ + tcf_chain_flush(chain); + /* In case the chain was successfully deleted, put a reference + * to the chain previously taken during addition. + */ + tcf_chain_put_explicitly_created(chain); + chain->explicitly_created = false; + break; + case RTM_GETCHAIN: + err = tc_chain_notify(chain, skb, n->nlmsg_seq, + n->nlmsg_seq, n->nlmsg_type, true); + if (err < 0) + NL_SET_ERR_MSG(extack, "Failed to send chain notify message"); + break; + default: + err = -EOPNOTSUPP; + NL_SET_ERR_MSG(extack, "Unsupported message type"); + goto errout; + } + +errout: + tcf_chain_put(chain); + if (err == -EAGAIN) + /* Replay the request. */ + goto replay; + return err; +} + +/* called with RTNL */ +static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tca[TCA_MAX + 1]; + struct Qdisc *q = NULL; + struct tcf_block *block; + struct tcf_chain *chain; + struct tcmsg *tcm = nlmsg_data(cb->nlh); + long index_start; + long index; + u32 parent; + int err; + + if (nlmsg_len(cb->nlh) < sizeof(*tcm)) + return skb->len; + + err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); + if (err) + return err; + + if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { + block = tcf_block_lookup(net, tcm->tcm_block_index); + if (!block) + goto out; + /* If we work with block index, q is NULL and parent value + * will never be used in the following code. The check + * in tcf_fill_node prevents it. However, compiler does not + * see that far, so set parent to zero to silence the warning + * about parent being uninitialized. + */ + parent = 0; + } else { + const struct Qdisc_class_ops *cops; + struct net_device *dev; + unsigned long cl = 0; + + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return skb->len; + + parent = tcm->tcm_parent; + if (!parent) { + q = dev->qdisc; + parent = q->handle; + } else { + q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); + } + if (!q) + goto out; + cops = q->ops->cl_ops; + if (!cops) + goto out; + if (!cops->tcf_block) + goto out; + if (TC_H_MIN(tcm->tcm_parent)) { + cl = cops->find(q, tcm->tcm_parent); + if (cl == 0) + goto out; + } + block = cops->tcf_block(q, cl, NULL); + if (!block) + goto out; + if (tcf_block_shared(block)) + q = NULL; + } + + index_start = cb->args[0]; + index = 0; + + list_for_each_entry(chain, &block->chain_list, list) { + if ((tca[TCA_CHAIN] && + nla_get_u32(tca[TCA_CHAIN]) != chain->index)) + continue; + if (index < index_start) { + index++; + continue; + } + if (tcf_chain_held_by_acts_only(chain)) + continue; + err = tc_chain_fill_node(chain, net, skb, block, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWCHAIN); + if (err <= 0) + break; + index++; + } + + cb->args[0] = index; + +out: + /* If we did no progress, the error (EMSGSIZE) is real */ + if (skb->len == 0 && err) + return err; + return skb->len; +} + void tcf_exts_destroy(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT @@ -1822,6 +2242,10 @@ static int __init tc_filter_init(void) rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter, tc_dump_tfilter, 0); + rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain, + tc_dump_chain, 0); return 0; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 95367f37098d..6a5dce8baf19 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -324,4 +324,3 @@ static void __exit exit_basic(void) module_init(init_basic) module_exit(exit_basic) MODULE_LICENSE("GPL"); - diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 66e0ac9811f9..fa6fe2fe0f32 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -349,12 +349,10 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog) if (bpf_size != nla_len(tb[TCA_BPF_OPS])) return -EINVAL; - bpf_ops = kzalloc(bpf_size, GFP_KERNEL); + bpf_ops = kmemdup(nla_data(tb[TCA_BPF_OPS]), bpf_size, GFP_KERNEL); if (bpf_ops == NULL) return -ENOMEM; - memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); - fprog_tmp.len = bpf_num_ops; fprog_tmp.filter = bpf_ops; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 8b2474293db1..a3b69bb6f4b0 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -52,6 +52,7 @@ struct fl_flow_key { struct flow_dissector_key_mpls mpls; struct flow_dissector_key_tcp tcp; struct flow_dissector_key_ip ip; + struct flow_dissector_key_ip enc_ip; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -71,6 +72,13 @@ struct fl_flow_mask { struct list_head list; }; +struct fl_flow_tmplt { + struct fl_flow_key dummy_key; + struct fl_flow_key mask; + struct flow_dissector dissector; + struct tcf_chain *chain; +}; + struct cls_fl_head { struct rhashtable ht; struct list_head masks; @@ -146,6 +154,23 @@ static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key, *lmkey++ = *lkey++ & *lmask++; } +static bool fl_mask_fits_tmplt(struct fl_flow_tmplt *tmplt, + struct fl_flow_mask *mask) +{ + const long *lmask = fl_key_get_start(&mask->key, mask); + const long *ltmplt; + int i; + + if (!tmplt) + return true; + ltmplt = fl_key_get_start(&tmplt->mask, mask); + for (i = 0; i < fl_mask_range(mask); i += sizeof(long)) { + if (~*ltmplt++ & *lmask++) + return false; + } + return true; +} + static void fl_clear_masked_range(struct fl_flow_key *key, struct fl_flow_mask *mask) { @@ -453,6 +478,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_CVLAN_ID] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_CVLAN_PRIO] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_CVLAN_ETH_TYPE] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_IP_TOS] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_IP_TTL] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -561,17 +590,17 @@ static int fl_set_key_flags(struct nlattr **tb, return 0; } -static void fl_set_key_ip(struct nlattr **tb, +static void fl_set_key_ip(struct nlattr **tb, bool encap, struct flow_dissector_key_ip *key, struct flow_dissector_key_ip *mask) { - fl_set_key_val(tb, &key->tos, TCA_FLOWER_KEY_IP_TOS, - &mask->tos, TCA_FLOWER_KEY_IP_TOS_MASK, - sizeof(key->tos)); + int tos_key = encap ? TCA_FLOWER_KEY_ENC_IP_TOS : TCA_FLOWER_KEY_IP_TOS; + int ttl_key = encap ? TCA_FLOWER_KEY_ENC_IP_TTL : TCA_FLOWER_KEY_IP_TTL; + int tos_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TOS_MASK : TCA_FLOWER_KEY_IP_TOS_MASK; + int ttl_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TTL_MASK : TCA_FLOWER_KEY_IP_TTL_MASK; - fl_set_key_val(tb, &key->ttl, TCA_FLOWER_KEY_IP_TTL, - &mask->ttl, TCA_FLOWER_KEY_IP_TTL_MASK, - sizeof(key->ttl)); + fl_set_key_val(tb, &key->tos, tos_key, &mask->tos, tos_mask, sizeof(key->tos)); + fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl)); } static int fl_set_key(struct net *net, struct nlattr **tb, @@ -633,7 +662,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, sizeof(key->basic.ip_proto)); - fl_set_key_ip(tb, &key->ip, &mask->ip); + fl_set_key_ip(tb, false, &key->ip, &mask->ip); } if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) { @@ -768,6 +797,8 @@ static int fl_set_key(struct net *net, struct nlattr **tb, &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, sizeof(key->enc_tp.dst)); + fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip); + if (tb[TCA_FLOWER_KEY_FLAGS]) ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags); @@ -819,49 +850,52 @@ static int fl_init_mask_hashtable(struct fl_flow_mask *mask) FL_KEY_SET(keys, cnt, id, member); \ } while(0); -static void fl_init_dissector(struct fl_flow_mask *mask) +static void fl_init_dissector(struct flow_dissector *dissector, + struct fl_flow_key *mask) { struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX]; size_t cnt = 0; FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control); FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic); - FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_ETH_ADDRS, eth); - FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); - FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); - FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp); - FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_IP, ip); - FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_TCP, tcp); - FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_ICMP, icmp); - FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_ARP, arp); - FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_MPLS, mpls); - FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_VLAN, vlan); - FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_CVLAN, cvlan); - FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id); - FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4); - FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6); - if (FL_KEY_IS_MASKED(&mask->key, enc_ipv4) || - FL_KEY_IS_MASKED(&mask->key, enc_ipv6)) + if (FL_KEY_IS_MASKED(mask, enc_ipv4) || + FL_KEY_IS_MASKED(mask, enc_ipv6)) FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control); - FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_IP, enc_ip); - skb_flow_dissector_init(&mask->dissector, keys, cnt); + skb_flow_dissector_init(dissector, keys, cnt); } static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head, @@ -880,7 +914,7 @@ static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head, if (err) goto errout_free; - fl_init_dissector(newmask); + fl_init_dissector(&newmask->dissector, &newmask->key); INIT_LIST_HEAD_RCU(&newmask->filters); @@ -929,6 +963,7 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct cls_fl_filter *f, struct fl_flow_mask *mask, unsigned long base, struct nlattr **tb, struct nlattr *est, bool ovr, + struct fl_flow_tmplt *tmplt, struct netlink_ext_ack *extack) { int err; @@ -949,6 +984,11 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, fl_mask_update_range(mask); fl_set_masked_key(&f->mkey, &f->key, mask); + if (!fl_mask_fits_tmplt(tmplt, mask)) { + NL_SET_ERR_MSG_MOD(extack, "Mask does not fit the template"); + return -EINVAL; + } + return 0; } @@ -1014,7 +1054,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr, - extack); + tp->chain->tmplt_priv, extack); if (err) goto errout_idr; @@ -1099,19 +1139,17 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f; - struct fl_flow_mask *mask; - list_for_each_entry_rcu(mask, &head->masks, list) { - list_for_each_entry_rcu(f, &mask->filters, list) { - if (arg->count < arg->skip) - goto skip; - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; - break; - } -skip: - arg->count++; + arg->count = arg->skip; + + while ((f = idr_get_next_ul(&head->handle_idr, + &arg->cookie)) != NULL) { + if (arg->fn(tp, f, arg) < 0) { + arg->stop = 1; + break; } + arg->cookie = f->handle + 1; + arg->count++; } } @@ -1156,6 +1194,93 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, return 0; } +static void fl_hw_create_tmplt(struct tcf_chain *chain, + struct fl_flow_tmplt *tmplt) +{ + struct tc_cls_flower_offload cls_flower = {}; + struct tcf_block *block = chain->block; + struct tcf_exts dummy_exts = { 0, }; + + cls_flower.common.chain_index = chain->index; + cls_flower.command = TC_CLSFLOWER_TMPLT_CREATE; + cls_flower.cookie = (unsigned long) tmplt; + cls_flower.dissector = &tmplt->dissector; + cls_flower.mask = &tmplt->mask; + cls_flower.key = &tmplt->dummy_key; + cls_flower.exts = &dummy_exts; + + /* We don't care if driver (any of them) fails to handle this + * call. It serves just as a hint for it. + */ + tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER, + &cls_flower, false); +} + +static void fl_hw_destroy_tmplt(struct tcf_chain *chain, + struct fl_flow_tmplt *tmplt) +{ + struct tc_cls_flower_offload cls_flower = {}; + struct tcf_block *block = chain->block; + + cls_flower.common.chain_index = chain->index; + cls_flower.command = TC_CLSFLOWER_TMPLT_DESTROY; + cls_flower.cookie = (unsigned long) tmplt; + + tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER, + &cls_flower, false); +} + +static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain, + struct nlattr **tca, + struct netlink_ext_ack *extack) +{ + struct fl_flow_tmplt *tmplt; + struct nlattr **tb; + int err; + + if (!tca[TCA_OPTIONS]) + return ERR_PTR(-EINVAL); + + tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); + if (!tb) + return ERR_PTR(-ENOBUFS); + err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], + fl_policy, NULL); + if (err) + goto errout_tb; + + tmplt = kzalloc(sizeof(*tmplt), GFP_KERNEL); + if (!tmplt) { + err = -ENOMEM; + goto errout_tb; + } + tmplt->chain = chain; + err = fl_set_key(net, tb, &tmplt->dummy_key, &tmplt->mask, extack); + if (err) + goto errout_tmplt; + kfree(tb); + + fl_init_dissector(&tmplt->dissector, &tmplt->mask); + + fl_hw_create_tmplt(chain, tmplt); + + return tmplt; + +errout_tmplt: + kfree(tmplt); +errout_tb: + kfree(tb); + return ERR_PTR(err); +} + +static void fl_tmplt_destroy(void *tmplt_priv) +{ + struct fl_flow_tmplt *tmplt = tmplt_priv; + + fl_hw_destroy_tmplt(tmplt->chain, tmplt); + kfree(tmplt); +} + static int fl_dump_key_val(struct sk_buff *skb, void *val, int val_type, void *mask, int mask_type, int len) @@ -1210,14 +1335,17 @@ static int fl_dump_key_mpls(struct sk_buff *skb, return 0; } -static int fl_dump_key_ip(struct sk_buff *skb, +static int fl_dump_key_ip(struct sk_buff *skb, bool encap, struct flow_dissector_key_ip *key, struct flow_dissector_key_ip *mask) { - if (fl_dump_key_val(skb, &key->tos, TCA_FLOWER_KEY_IP_TOS, &mask->tos, - TCA_FLOWER_KEY_IP_TOS_MASK, sizeof(key->tos)) || - fl_dump_key_val(skb, &key->ttl, TCA_FLOWER_KEY_IP_TTL, &mask->ttl, - TCA_FLOWER_KEY_IP_TTL_MASK, sizeof(key->ttl))) + int tos_key = encap ? TCA_FLOWER_KEY_ENC_IP_TOS : TCA_FLOWER_KEY_IP_TOS; + int ttl_key = encap ? TCA_FLOWER_KEY_ENC_IP_TTL : TCA_FLOWER_KEY_IP_TTL; + int tos_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TOS_MASK : TCA_FLOWER_KEY_IP_TOS_MASK; + int ttl_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TTL_MASK : TCA_FLOWER_KEY_IP_TTL_MASK; + + if (fl_dump_key_val(skb, &key->tos, tos_key, &mask->tos, tos_mask, sizeof(key->tos)) || + fl_dump_key_val(skb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl))) return -1; return 0; @@ -1286,29 +1414,9 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask); } -static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) +static int fl_dump_key(struct sk_buff *skb, struct net *net, + struct fl_flow_key *key, struct fl_flow_key *mask) { - struct cls_fl_filter *f = fh; - struct nlattr *nest; - struct fl_flow_key *key, *mask; - - if (!f) - return skb->len; - - t->tcm_handle = f->handle; - - nest = nla_nest_start(skb, TCA_OPTIONS); - if (!nest) - goto nla_put_failure; - - if (f->res.classid && - nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid)) - goto nla_put_failure; - - key = &f->key; - mask = &f->mask->key; - if (mask->indev_ifindex) { struct net_device *dev; @@ -1317,9 +1425,6 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, goto nla_put_failure; } - if (!tc_skip_hw(f->flags)) - fl_hw_update_stats(tp, f); - if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST, mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK, sizeof(key->eth.dst)) || @@ -1342,8 +1447,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, TCA_FLOWER_KEY_CVLAN_PRIO, &key->cvlan, &mask->cvlan) || (mask->cvlan.vlan_tpid && - nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE, - key->cvlan.vlan_tpid))) + nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE, + key->cvlan.vlan_tpid))) goto nla_put_failure; if (mask->basic.n_proto) { @@ -1363,7 +1468,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, (fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, sizeof(key->basic.ip_proto)) || - fl_dump_key_ip(skb, &key->ip, &mask->ip))) + fl_dump_key_ip(skb, false, &key->ip, &mask->ip))) goto nla_put_failure; if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && @@ -1488,12 +1593,48 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, TCA_FLOWER_KEY_ENC_UDP_DST_PORT, &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, - sizeof(key->enc_tp.dst))) + sizeof(key->enc_tp.dst)) || + fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip)) goto nla_put_failure; if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags)) goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, + struct sk_buff *skb, struct tcmsg *t) +{ + struct cls_fl_filter *f = fh; + struct nlattr *nest; + struct fl_flow_key *key, *mask; + + if (!f) + return skb->len; + + t->tcm_handle = f->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (!nest) + goto nla_put_failure; + + if (f->res.classid && + nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid)) + goto nla_put_failure; + + key = &f->key; + mask = &f->mask->key; + + if (fl_dump_key(skb, net, key, mask)) + goto nla_put_failure; + + if (!tc_skip_hw(f->flags)) + fl_hw_update_stats(tp, f); + if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags)) goto nla_put_failure; @@ -1512,6 +1653,31 @@ nla_put_failure: return -1; } +static int fl_tmplt_dump(struct sk_buff *skb, struct net *net, void *tmplt_priv) +{ + struct fl_flow_tmplt *tmplt = tmplt_priv; + struct fl_flow_key *key, *mask; + struct nlattr *nest; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (!nest) + goto nla_put_failure; + + key = &tmplt->dummy_key; + mask = &tmplt->mask; + + if (fl_dump_key(skb, net, key, mask)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + + return skb->len; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + static void fl_bind_class(void *fh, u32 classid, unsigned long cl) { struct cls_fl_filter *f = fh; @@ -1532,6 +1698,9 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .reoffload = fl_reoffload, .dump = fl_dump, .bind_class = fl_bind_class, + .tmplt_create = fl_tmplt_create, + .tmplt_destroy = fl_tmplt_destroy, + .tmplt_dump = fl_tmplt_dump, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 30695691e9ff..35fc7252187c 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -80,7 +80,6 @@ #define CAKE_QUEUES (1024) #define CAKE_FLOW_MASK 63 #define CAKE_FLOW_NAT_FLAG 64 -#define CAKE_SPLIT_GSO_THRESHOLD (125000000) /* 1Gbps */ /* struct cobalt_params - contains codel and blue parameters * @interval: codel initial drop rate @@ -1546,7 +1545,7 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, if (TC_H_MAJ(skb->priority) == sch->handle && TC_H_MIN(skb->priority) > 0 && TC_H_MIN(skb->priority) <= q->tin_cnt) { - tin = TC_H_MIN(skb->priority) - 1; + tin = q->tin_order[TC_H_MIN(skb->priority) - 1]; if (q->rate_flags & CAKE_FLAG_WASH) cake_wash_diffserv(skb); @@ -2569,10 +2568,12 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_CAKE_MEMORY]) q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]); - if (q->rate_bps && q->rate_bps <= CAKE_SPLIT_GSO_THRESHOLD) - q->rate_flags |= CAKE_FLAG_SPLIT_GSO; - else - q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO; + if (tb[TCA_CAKE_SPLIT_GSO]) { + if (!!nla_get_u32(tb[TCA_CAKE_SPLIT_GSO])) + q->rate_flags |= CAKE_FLAG_SPLIT_GSO; + else + q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO; + } if (q->tins) { sch_tree_lock(sch); @@ -2608,7 +2609,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, q->target = 5000; /* 5ms: codel RFC argues * for 5 to 10% of interval */ - + q->rate_flags |= CAKE_FLAG_SPLIT_GSO; q->cur_tin = 0; q->cur_flow = 0; diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index cdd96b9a27bc..e26a24017faa 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -78,18 +78,42 @@ struct cbs_sched_data { s64 sendslope; /* in bytes/s */ s64 idleslope; /* in bytes/s */ struct qdisc_watchdog watchdog; - int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch); + int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free); struct sk_buff *(*dequeue)(struct Qdisc *sch); + struct Qdisc *qdisc; }; -static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch) +static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct Qdisc *child, + struct sk_buff **to_free) { - return qdisc_enqueue_tail(skb, sch); + int err; + + err = child->ops->enqueue(skb, child, to_free); + if (err != NET_XMIT_SUCCESS) + return err; + + qdisc_qstats_backlog_inc(sch, skb); + sch->q.qlen++; + + return NET_XMIT_SUCCESS; } -static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch) +static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); + struct Qdisc *qdisc = q->qdisc; + + return cbs_child_enqueue(skb, sch, qdisc, to_free); +} + +static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + struct Qdisc *qdisc = q->qdisc; if (sch->q.qlen == 0 && q->credits > 0) { /* We need to stop accumulating credits when there's @@ -99,7 +123,7 @@ static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch) q->last = ktime_get_ns(); } - return qdisc_enqueue_tail(skb, sch); + return cbs_child_enqueue(skb, sch, qdisc, to_free); } static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, @@ -107,7 +131,7 @@ static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, { struct cbs_sched_data *q = qdisc_priv(sch); - return q->enqueue(skb, sch); + return q->enqueue(skb, sch, to_free); } /* timediff is in ns, slope is in bytes/s */ @@ -132,9 +156,25 @@ static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate) return div64_s64(len * slope, port_rate); } +static struct sk_buff *cbs_child_dequeue(struct Qdisc *sch, struct Qdisc *child) +{ + struct sk_buff *skb; + + skb = child->ops->dequeue(child); + if (!skb) + return NULL; + + qdisc_qstats_backlog_dec(sch, skb); + qdisc_bstats_update(sch, skb); + sch->q.qlen--; + + return skb; +} + static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) { struct cbs_sched_data *q = qdisc_priv(sch); + struct Qdisc *qdisc = q->qdisc; s64 now = ktime_get_ns(); struct sk_buff *skb; s64 credits; @@ -157,8 +197,7 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) return NULL; } } - - skb = qdisc_dequeue_head(sch); + skb = cbs_child_dequeue(sch, qdisc); if (!skb) return NULL; @@ -178,7 +217,10 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch) { - return qdisc_dequeue_head(sch); + struct cbs_sched_data *q = qdisc_priv(sch); + struct Qdisc *qdisc = q->qdisc; + + return cbs_child_dequeue(sch, qdisc); } static struct sk_buff *cbs_dequeue(struct Qdisc *sch) @@ -310,6 +352,13 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt, return -EINVAL; } + q->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + sch->handle, extack); + if (!q->qdisc) + return -ENOMEM; + + qdisc_hash_add(q->qdisc, false); + q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); q->enqueue = cbs_enqueue_soft; @@ -328,6 +377,9 @@ static void cbs_destroy(struct Qdisc *sch) qdisc_watchdog_cancel(&q->watchdog); cbs_disable_offload(dev, q); + + if (q->qdisc) + qdisc_destroy(q->qdisc); } static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -356,8 +408,72 @@ nla_put_failure: return -1; } +static int cbs_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + if (cl != 1 || !q->qdisc) /* only one class */ + return -ENOENT; + + tcm->tcm_handle |= TC_H_MIN(1); + tcm->tcm_info = q->qdisc->handle; + + return 0; +} + +static int cbs_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, + struct Qdisc **old, struct netlink_ext_ack *extack) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + if (!new) { + new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + sch->handle, NULL); + if (!new) + new = &noop_qdisc; + } + + *old = qdisc_replace(sch, new, &q->qdisc); + return 0; +} + +static struct Qdisc *cbs_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + return q->qdisc; +} + +static unsigned long cbs_find(struct Qdisc *sch, u32 classid) +{ + return 1; +} + +static void cbs_walk(struct Qdisc *sch, struct qdisc_walker *walker) +{ + if (!walker->stop) { + if (walker->count >= walker->skip) { + if (walker->fn(sch, 1, walker) < 0) { + walker->stop = 1; + return; + } + } + walker->count++; + } +} + +static const struct Qdisc_class_ops cbs_class_ops = { + .graft = cbs_graft, + .leaf = cbs_leaf, + .find = cbs_find, + .walk = cbs_walk, + .dump = cbs_dump_class, +}; + static struct Qdisc_ops cbs_qdisc_ops __read_mostly = { .id = "cbs", + .cl_ops = &cbs_class_ops, .priv_size = sizeof(struct cbs_sched_data), .enqueue = cbs_enqueue, .dequeue = cbs_dequeue, diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index cd2e0e342fb6..6c0a9d5dbf94 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -479,24 +479,28 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt, q->cparams.mtu = psched_mtu(qdisc_dev(sch)); if (opt) { - int err = fq_codel_change(sch, opt, extack); + err = fq_codel_change(sch, opt, extack); if (err) - return err; + goto init_failure; } err = tcf_block_get(&q->block, &q->filter_list, sch, extack); if (err) - return err; + goto init_failure; if (!q->flows) { q->flows = kvcalloc(q->flows_cnt, sizeof(struct fq_codel_flow), GFP_KERNEL); - if (!q->flows) - return -ENOMEM; + if (!q->flows) { + err = -ENOMEM; + goto init_failure; + } q->backlogs = kvcalloc(q->flows_cnt, sizeof(u32), GFP_KERNEL); - if (!q->backlogs) - return -ENOMEM; + if (!q->backlogs) { + err = -ENOMEM; + goto alloc_failure; + } for (i = 0; i < q->flows_cnt; i++) { struct fq_codel_flow *flow = q->flows + i; @@ -509,6 +513,13 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt, else sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; + +alloc_failure: + kvfree(q->flows); + q->flows = NULL; +init_failure: + q->flows_cnt = 0; + return err; } static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c new file mode 100644 index 000000000000..52c0b6d8f1d7 --- /dev/null +++ b/net/sched/sch_skbprio.c @@ -0,0 +1,320 @@ +/* + * net/sched/sch_skbprio.c SKB Priority Queue. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Nishanth Devarajan, <ndev2021@gmail.com> + * Cody Doucette, <doucette@bu.edu> + * original idea by Michel Machado, Cody Doucette, and Qiaobin Fu + */ + +#include <linux/string.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <net/pkt_sched.h> +#include <net/sch_generic.h> +#include <net/inet_ecn.h> + +/* SKB Priority Queue + * ================================= + * + * Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes + * packets according to their skb->priority field. Under congestion, + * Skbprio drops already-enqueued lower priority packets to make space + * available for higher priority packets; it was conceived as a solution + * for denial-of-service defenses that need to route packets with different + * priorities as a mean to overcome DoS attacks. + */ + +struct skbprio_sched_data { + /* Queue state. */ + struct sk_buff_head qdiscs[SKBPRIO_MAX_PRIORITY]; + struct gnet_stats_queue qstats[SKBPRIO_MAX_PRIORITY]; + u16 highest_prio; + u16 lowest_prio; +}; + +static u16 calc_new_high_prio(const struct skbprio_sched_data *q) +{ + int prio; + + for (prio = q->highest_prio - 1; prio >= q->lowest_prio; prio--) { + if (!skb_queue_empty(&q->qdiscs[prio])) + return prio; + } + + /* SKB queue is empty, return 0 (default highest priority setting). */ + return 0; +} + +static u16 calc_new_low_prio(const struct skbprio_sched_data *q) +{ + int prio; + + for (prio = q->lowest_prio + 1; prio <= q->highest_prio; prio++) { + if (!skb_queue_empty(&q->qdiscs[prio])) + return prio; + } + + /* SKB queue is empty, return SKBPRIO_MAX_PRIORITY - 1 + * (default lowest priority setting). + */ + return SKBPRIO_MAX_PRIORITY - 1; +} + +static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1; + struct skbprio_sched_data *q = qdisc_priv(sch); + struct sk_buff_head *qdisc; + struct sk_buff_head *lp_qdisc; + struct sk_buff *to_drop; + u16 prio, lp; + + /* Obtain the priority of @skb. */ + prio = min(skb->priority, max_priority); + + qdisc = &q->qdiscs[prio]; + if (sch->q.qlen < sch->limit) { + __skb_queue_tail(qdisc, skb); + qdisc_qstats_backlog_inc(sch, skb); + q->qstats[prio].backlog += qdisc_pkt_len(skb); + + /* Check to update highest and lowest priorities. */ + if (prio > q->highest_prio) + q->highest_prio = prio; + + if (prio < q->lowest_prio) + q->lowest_prio = prio; + + sch->q.qlen++; + return NET_XMIT_SUCCESS; + } + + /* If this packet has the lowest priority, drop it. */ + lp = q->lowest_prio; + if (prio <= lp) { + q->qstats[prio].drops++; + q->qstats[prio].overlimits++; + return qdisc_drop(skb, sch, to_free); + } + + __skb_queue_tail(qdisc, skb); + qdisc_qstats_backlog_inc(sch, skb); + q->qstats[prio].backlog += qdisc_pkt_len(skb); + + /* Drop the packet at the tail of the lowest priority qdisc. */ + lp_qdisc = &q->qdiscs[lp]; + to_drop = __skb_dequeue_tail(lp_qdisc); + BUG_ON(!to_drop); + qdisc_qstats_backlog_dec(sch, to_drop); + qdisc_drop(to_drop, sch, to_free); + + q->qstats[lp].backlog -= qdisc_pkt_len(to_drop); + q->qstats[lp].drops++; + q->qstats[lp].overlimits++; + + /* Check to update highest and lowest priorities. */ + if (skb_queue_empty(lp_qdisc)) { + if (q->lowest_prio == q->highest_prio) { + /* The incoming packet is the only packet in queue. */ + BUG_ON(sch->q.qlen != 1); + q->lowest_prio = prio; + q->highest_prio = prio; + } else { + q->lowest_prio = calc_new_low_prio(q); + } + } + + if (prio > q->highest_prio) + q->highest_prio = prio; + + return NET_XMIT_CN; +} + +static struct sk_buff *skbprio_dequeue(struct Qdisc *sch) +{ + struct skbprio_sched_data *q = qdisc_priv(sch); + struct sk_buff_head *hpq = &q->qdiscs[q->highest_prio]; + struct sk_buff *skb = __skb_dequeue(hpq); + + if (unlikely(!skb)) + return NULL; + + sch->q.qlen--; + qdisc_qstats_backlog_dec(sch, skb); + qdisc_bstats_update(sch, skb); + + q->qstats[q->highest_prio].backlog -= qdisc_pkt_len(skb); + + /* Update highest priority field. */ + if (skb_queue_empty(hpq)) { + if (q->lowest_prio == q->highest_prio) { + BUG_ON(sch->q.qlen); + q->highest_prio = 0; + q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1; + } else { + q->highest_prio = calc_new_high_prio(q); + } + } + return skb; +} + +static int skbprio_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct tc_skbprio_qopt *ctl = nla_data(opt); + + sch->limit = ctl->limit; + return 0; +} + +static int skbprio_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct skbprio_sched_data *q = qdisc_priv(sch); + int prio; + + /* Initialise all queues, one for each possible priority. */ + for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) + __skb_queue_head_init(&q->qdiscs[prio]); + + memset(&q->qstats, 0, sizeof(q->qstats)); + q->highest_prio = 0; + q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1; + sch->limit = 64; + if (!opt) + return 0; + + return skbprio_change(sch, opt, extack); +} + +static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct tc_skbprio_qopt opt; + + opt.limit = sch->limit; + + if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) + return -1; + + return skb->len; +} + +static void skbprio_reset(struct Qdisc *sch) +{ + struct skbprio_sched_data *q = qdisc_priv(sch); + int prio; + + sch->qstats.backlog = 0; + sch->q.qlen = 0; + + for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) + __skb_queue_purge(&q->qdiscs[prio]); + + memset(&q->qstats, 0, sizeof(q->qstats)); + q->highest_prio = 0; + q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1; +} + +static void skbprio_destroy(struct Qdisc *sch) +{ + struct skbprio_sched_data *q = qdisc_priv(sch); + int prio; + + for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) + __skb_queue_purge(&q->qdiscs[prio]); +} + +static struct Qdisc *skbprio_leaf(struct Qdisc *sch, unsigned long arg) +{ + return NULL; +} + +static unsigned long skbprio_find(struct Qdisc *sch, u32 classid) +{ + return 0; +} + +static int skbprio_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + tcm->tcm_handle |= TC_H_MIN(cl); + return 0; +} + +static int skbprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) +{ + struct skbprio_sched_data *q = qdisc_priv(sch); + if (gnet_stats_copy_queue(d, NULL, &q->qstats[cl - 1], + q->qstats[cl - 1].qlen) < 0) + return -1; + return 0; +} + +static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + unsigned int i; + + if (arg->stop) + return; + + for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, i + 1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } +} + +static const struct Qdisc_class_ops skbprio_class_ops = { + .leaf = skbprio_leaf, + .find = skbprio_find, + .dump = skbprio_dump_class, + .dump_stats = skbprio_dump_class_stats, + .walk = skbprio_walk, +}; + +static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = { + .cl_ops = &skbprio_class_ops, + .id = "skbprio", + .priv_size = sizeof(struct skbprio_sched_data), + .enqueue = skbprio_enqueue, + .dequeue = skbprio_dequeue, + .peek = qdisc_peek_dequeued, + .init = skbprio_init, + .reset = skbprio_reset, + .change = skbprio_change, + .dump = skbprio_dump, + .destroy = skbprio_destroy, + .owner = THIS_MODULE, +}; + +static int __init skbprio_module_init(void) +{ + return register_qdisc(&skbprio_qdisc_ops); +} + +static void __exit skbprio_module_exit(void) +{ + unregister_qdisc(&skbprio_qdisc_ops); +} + +module_init(skbprio_module_init) +module_exit(skbprio_module_exit) + +MODULE_LICENSE("GPL"); |