diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/act_ct.c | 1 | ||||
-rw-r--r-- | net/sched/act_mpls.c | 1 | ||||
-rw-r--r-- | net/sched/act_pedit.c | 12 | ||||
-rw-r--r-- | net/sched/act_tunnel_key.c | 207 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 254 | ||||
-rw-r--r-- | net/sched/sch_pie.c | 120 | ||||
-rw-r--r-- | net/sched/sch_taprio.c | 28 |
7 files changed, 590 insertions, 33 deletions
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 68d6af56b243..c13638aeef46 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -474,7 +474,6 @@ drop: } static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = { - [TCA_CT_UNSPEC] = { .strict_start_type = TCA_CT_UNSPEC + 1 }, [TCA_CT_ACTION] = { .type = NLA_U16 }, [TCA_CT_PARMS] = { .type = NLA_EXACT_LEN, .len = sizeof(struct tc_ct) }, [TCA_CT_ZONE] = { .type = NLA_U16 }, diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 4d8c822b6aca..c7d5e12ee919 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -119,7 +119,6 @@ static int valid_label(const struct nlattr *attr, } static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = { - [TCA_MPLS_UNSPEC] = { .strict_start_type = TCA_MPLS_UNSPEC + 1 }, [TCA_MPLS_PARMS] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_mpls)), [TCA_MPLS_PROTO] = { .type = NLA_U16 }, [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_U32, valid_label), diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index d5eff6ac17a9..3ad718576304 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -43,7 +43,7 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, int err = -EINVAL; int rem; - if (!nla || !n) + if (!nla) return NULL; keys_ex = kcalloc(n, sizeof(*k), GFP_KERNEL); @@ -171,6 +171,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, } parm = nla_data(pattr); + if (!parm->nkeys) { + NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed"); + return -EINVAL; + } ksize = parm->nkeys * sizeof(struct tc_pedit_key); if (nla_len(pattr) < sizeof(*parm) + ksize) { NL_SET_ERR_MSG_ATTR(extack, pattr, "Length of TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute is invalid"); @@ -184,12 +188,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { - if (!parm->nkeys) { - tcf_idr_cleanup(tn, index); - NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed"); - ret = -EINVAL; - goto out_free; - } ret = tcf_idr_create(tn, index, est, a, &act_pedit_ops, bind, false, 0); if (ret) { diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index cb34e5d57aaa..6379f9568ab8 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -10,6 +10,8 @@ #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <net/geneve.h> +#include <net/vxlan.h> +#include <net/erspan.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/dst.h> @@ -53,7 +55,11 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, static const struct nla_policy enc_opts_policy[TCA_TUNNEL_KEY_ENC_OPTS_MAX + 1] = { + [TCA_TUNNEL_KEY_ENC_OPTS_UNSPEC] = { + .strict_start_type = TCA_TUNNEL_KEY_ENC_OPTS_VXLAN }, [TCA_TUNNEL_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED }, + [TCA_TUNNEL_KEY_ENC_OPTS_VXLAN] = { .type = NLA_NESTED }, + [TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN] = { .type = NLA_NESTED }, }; static const struct nla_policy @@ -64,6 +70,19 @@ geneve_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1] = { .len = 128 }, }; +static const struct nla_policy +vxlan_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_MAX + 1] = { + [TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP] = { .type = NLA_U32 }, +}; + +static const struct nla_policy +erspan_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_MAX + 1] = { + [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER] = { .type = NLA_U8 }, + [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR] = { .type = NLA_U8 }, + [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID] = { .type = NLA_U8 }, +}; + static int tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len, struct netlink_ext_ack *extack) @@ -116,10 +135,89 @@ tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len, return opt_len; } +static int +tunnel_key_copy_vxlan_opt(const struct nlattr *nla, void *dst, int dst_len, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_MAX + 1]; + int err; + + err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_VXLAN_MAX, nla, + vxlan_opt_policy, extack); + if (err < 0) + return err; + + if (!tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP]) { + NL_SET_ERR_MSG(extack, "Missing tunnel key vxlan option gbp"); + return -EINVAL; + } + + if (dst) { + struct vxlan_metadata *md = dst; + + md->gbp = nla_get_u32(tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP]); + } + + return sizeof(struct vxlan_metadata); +} + +static int +tunnel_key_copy_erspan_opt(const struct nlattr *nla, void *dst, int dst_len, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_MAX + 1]; + int err; + u8 ver; + + err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_MAX, nla, + erspan_opt_policy, extack); + if (err < 0) + return err; + + if (!tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER]) { + NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option ver"); + return -EINVAL; + } + + ver = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER]); + if (ver == 1) { + if (!tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX]) { + NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option index"); + return -EINVAL; + } + } else if (ver == 2) { + if (!tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR] || + !tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID]) { + NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option dir or hwid"); + return -EINVAL; + } + } else { + NL_SET_ERR_MSG(extack, "Tunnel key erspan option ver is incorrect"); + return -EINVAL; + } + + if (dst) { + struct erspan_metadata *md = dst; + + md->version = ver; + if (ver == 1) { + nla = tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX]; + md->u.index = nla_get_be32(nla); + } else { + nla = tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR]; + md->u.md2.dir = nla_get_u8(nla); + nla = tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID]; + set_hwid(&md->u.md2, nla_get_u8(nla)); + } + } + + return sizeof(struct erspan_metadata); +} + static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, int dst_len, struct netlink_ext_ack *extack) { - int err, rem, opt_len, len = nla_len(nla), opts_len = 0; + int err, rem, opt_len, len = nla_len(nla), opts_len = 0, type = 0; const struct nlattr *attr, *head = nla_data(nla); err = nla_validate_deprecated(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX, @@ -130,15 +228,48 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, nla_for_each_attr(attr, head, len, rem) { switch (nla_type(attr)) { case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE: + if (type && type != TUNNEL_GENEVE_OPT) { + NL_SET_ERR_MSG(extack, "Duplicate type for geneve options"); + return -EINVAL; + } opt_len = tunnel_key_copy_geneve_opt(attr, dst, dst_len, extack); if (opt_len < 0) return opt_len; opts_len += opt_len; + if (opts_len > IP_TUNNEL_OPTS_MAX) { + NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size"); + return -EINVAL; + } if (dst) { dst_len -= opt_len; dst += opt_len; } + type = TUNNEL_GENEVE_OPT; + break; + case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN: + if (type) { + NL_SET_ERR_MSG(extack, "Duplicate type for vxlan options"); + return -EINVAL; + } + opt_len = tunnel_key_copy_vxlan_opt(attr, dst, + dst_len, extack); + if (opt_len < 0) + return opt_len; + opts_len += opt_len; + type = TUNNEL_VXLAN_OPT; + break; + case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN: + if (type) { + NL_SET_ERR_MSG(extack, "Duplicate type for erspan options"); + return -EINVAL; + } + opt_len = tunnel_key_copy_erspan_opt(attr, dst, + dst_len, extack); + if (opt_len < 0) + return opt_len; + opts_len += opt_len; + type = TUNNEL_ERSPAN_OPT; break; } } @@ -175,6 +306,22 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info, #else return -EAFNOSUPPORT; #endif + case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN: +#if IS_ENABLED(CONFIG_INET) + info->key.tun_flags |= TUNNEL_VXLAN_OPT; + return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info), + opts_len, extack); +#else + return -EAFNOSUPPORT; +#endif + case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN: +#if IS_ENABLED(CONFIG_INET) + info->key.tun_flags |= TUNNEL_ERSPAN_OPT; + return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info), + opts_len, extack); +#else + return -EAFNOSUPPORT; +#endif default: NL_SET_ERR_MSG(extack, "Cannot set tunnel options for unknown tunnel type"); return -EINVAL; @@ -451,6 +598,56 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, return 0; } +static int tunnel_key_vxlan_opts_dump(struct sk_buff *skb, + const struct ip_tunnel_info *info) +{ + struct vxlan_metadata *md = (struct vxlan_metadata *)(info + 1); + struct nlattr *start; + + start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_VXLAN); + if (!start) + return -EMSGSIZE; + + if (nla_put_u32(skb, TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP, md->gbp)) { + nla_nest_cancel(skb, start); + return -EMSGSIZE; + } + + nla_nest_end(skb, start); + return 0; +} + +static int tunnel_key_erspan_opts_dump(struct sk_buff *skb, + const struct ip_tunnel_info *info) +{ + struct erspan_metadata *md = (struct erspan_metadata *)(info + 1); + struct nlattr *start; + + start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN); + if (!start) + return -EMSGSIZE; + + if (nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER, md->version)) + goto err; + + if (md->version == 1 && + nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX, md->u.index)) + goto err; + + if (md->version == 2 && + (nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR, + md->u.md2.dir) || + nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID, + get_hwid(&md->u.md2)))) + goto err; + + nla_nest_end(skb, start); + return 0; +err: + nla_nest_cancel(skb, start); + return -EMSGSIZE; +} + static int tunnel_key_opts_dump(struct sk_buff *skb, const struct ip_tunnel_info *info) { @@ -468,6 +665,14 @@ static int tunnel_key_opts_dump(struct sk_buff *skb, err = tunnel_key_geneve_opts_dump(skb, info); if (err) goto err_out; + } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) { + err = tunnel_key_vxlan_opts_dump(skb, info); + if (err) + goto err_out; + } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) { + err = tunnel_key_erspan_opts_dump(skb, info); + if (err) + goto err_out; } else { err_out: nla_nest_cancel(skb, start); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 74221e3351c3..c307ee1d6ca6 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -22,6 +22,8 @@ #include <net/ip.h> #include <net/flow_dissector.h> #include <net/geneve.h> +#include <net/vxlan.h> +#include <net/erspan.h> #include <net/dst.h> #include <net/dst_metadata.h> @@ -688,7 +690,11 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { static const struct nla_policy enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPTS_UNSPEC] = { + .strict_start_type = TCA_FLOWER_KEY_ENC_OPTS_VXLAN }, [TCA_FLOWER_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED }, + [TCA_FLOWER_KEY_ENC_OPTS_VXLAN] = { .type = NLA_NESTED }, + [TCA_FLOWER_KEY_ENC_OPTS_ERSPAN] = { .type = NLA_NESTED }, }; static const struct nla_policy @@ -699,6 +705,19 @@ geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = { .len = 128 }, }; +static const struct nla_policy +vxlan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP] = { .type = NLA_U32 }, +}; + +static const struct nla_policy +erspan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID] = { .type = NLA_U8 }, +}; + static void fl_set_key_val(struct nlattr **tb, void *val, int val_type, void *mask, int mask_type, int len) @@ -928,6 +947,105 @@ static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key, return sizeof(struct geneve_opt) + data_len; } +static int fl_set_vxlan_opt(const struct nlattr *nla, struct fl_flow_key *key, + int depth, int option_len, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX + 1]; + struct vxlan_metadata *md; + int err; + + md = (struct vxlan_metadata *)&key->enc_opts.data[key->enc_opts.len]; + memset(md, 0xff, sizeof(*md)); + + if (!depth) + return sizeof(*md); + + if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_VXLAN) { + NL_SET_ERR_MSG(extack, "Non-vxlan option type for mask"); + return -EINVAL; + } + + err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX, nla, + vxlan_opt_policy, extack); + if (err < 0) + return err; + + if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) { + NL_SET_ERR_MSG(extack, "Missing tunnel key vxlan option gbp"); + return -EINVAL; + } + + if (tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) + md->gbp = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]); + + return sizeof(*md); +} + +static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key, + int depth, int option_len, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1]; + struct erspan_metadata *md; + int err; + + md = (struct erspan_metadata *)&key->enc_opts.data[key->enc_opts.len]; + memset(md, 0xff, sizeof(*md)); + md->version = 1; + + if (!depth) + return sizeof(*md); + + if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_ERSPAN) { + NL_SET_ERR_MSG(extack, "Non-erspan option type for mask"); + return -EINVAL; + } + + err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX, nla, + erspan_opt_policy, extack); + if (err < 0) + return err; + + if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER]) { + NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option ver"); + return -EINVAL; + } + + if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER]) + md->version = nla_get_u8(tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER]); + + if (md->version == 1) { + if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) { + NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option index"); + return -EINVAL; + } + if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) { + nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]; + md->u.index = nla_get_be32(nla); + } + } else if (md->version == 2) { + if (!option_len && (!tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR] || + !tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID])) { + NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option dir or hwid"); + return -EINVAL; + } + if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR]) { + nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR]; + md->u.md2.dir = nla_get_u8(nla); + } + if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID]) { + nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID]; + set_hwid(&md->u.md2, nla_get_u8(nla)); + } + } else { + NL_SET_ERR_MSG(extack, "Tunnel key erspan option ver is incorrect"); + return -EINVAL; + } + + return sizeof(*md); +} + static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -958,6 +1076,11 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) { switch (nla_type(nla_opt_key)) { case TCA_FLOWER_KEY_ENC_OPTS_GENEVE: + if (key->enc_opts.dst_opt_type && + key->enc_opts.dst_opt_type != TUNNEL_GENEVE_OPT) { + NL_SET_ERR_MSG(extack, "Duplicate type for geneve options"); + return -EINVAL; + } option_len = 0; key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; option_len = fl_set_geneve_opt(nla_opt_key, key, @@ -986,6 +1109,72 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, if (msk_depth) nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); break; + case TCA_FLOWER_KEY_ENC_OPTS_VXLAN: + if (key->enc_opts.dst_opt_type) { + NL_SET_ERR_MSG(extack, "Duplicate type for vxlan options"); + return -EINVAL; + } + option_len = 0; + key->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT; + option_len = fl_set_vxlan_opt(nla_opt_key, key, + key_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + key->enc_opts.len += option_len; + /* At the same time we need to parse through the mask + * in order to verify exact and mask attribute lengths. + */ + mask->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT; + option_len = fl_set_vxlan_opt(nla_opt_msk, mask, + msk_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + mask->enc_opts.len += option_len; + if (key->enc_opts.len != mask->enc_opts.len) { + NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); + return -EINVAL; + } + + if (msk_depth) + nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); + break; + case TCA_FLOWER_KEY_ENC_OPTS_ERSPAN: + if (key->enc_opts.dst_opt_type) { + NL_SET_ERR_MSG(extack, "Duplicate type for erspan options"); + return -EINVAL; + } + option_len = 0; + key->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT; + option_len = fl_set_erspan_opt(nla_opt_key, key, + key_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + key->enc_opts.len += option_len; + /* At the same time we need to parse through the mask + * in order to verify exact and mask attribute lengths. + */ + mask->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT; + option_len = fl_set_erspan_opt(nla_opt_msk, mask, + msk_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + mask->enc_opts.len += option_len; + if (key->enc_opts.len != mask->enc_opts.len) { + NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); + return -EINVAL; + } + + if (msk_depth) + nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); + break; default: NL_SET_ERR_MSG(extack, "Unknown tunnel option type"); return -EINVAL; @@ -2135,6 +2324,61 @@ nla_put_failure: return -EMSGSIZE; } +static int fl_dump_key_vxlan_opt(struct sk_buff *skb, + struct flow_dissector_key_enc_opts *enc_opts) +{ + struct vxlan_metadata *md; + struct nlattr *nest; + + nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_VXLAN); + if (!nest) + goto nla_put_failure; + + md = (struct vxlan_metadata *)&enc_opts->data[0]; + if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP, md->gbp)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int fl_dump_key_erspan_opt(struct sk_buff *skb, + struct flow_dissector_key_enc_opts *enc_opts) +{ + struct erspan_metadata *md; + struct nlattr *nest; + + nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_ERSPAN); + if (!nest) + goto nla_put_failure; + + md = (struct erspan_metadata *)&enc_opts->data[0]; + if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER, md->version)) + goto nla_put_failure; + + if (md->version == 1 && + nla_put_be32(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX, md->u.index)) + goto nla_put_failure; + + if (md->version == 2 && + (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR, + md->u.md2.dir) || + nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID, + get_hwid(&md->u.md2)))) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + static int fl_dump_key_ct(struct sk_buff *skb, struct flow_dissector_key_ct *key, struct flow_dissector_key_ct *mask) @@ -2188,6 +2432,16 @@ static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, if (err) goto nla_put_failure; break; + case TUNNEL_VXLAN_OPT: + err = fl_dump_key_vxlan_opt(skb, enc_opts); + if (err) + goto nla_put_failure; + break; + case TUNNEL_ERSPAN_OPT: + err = fl_dump_key_erspan_opt(skb, enc_opts); + if (err) + goto nla_put_failure; + break; default: goto nla_put_failure; } diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index df98a887eb89..b0b0dc46af61 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -22,6 +22,7 @@ #define QUEUE_THRESHOLD 16384 #define DQCOUNT_INVALID -1 +#define DTIME_INVALID 0xffffffffffffffff #define MAX_PROB 0xffffffffffffffff #define PIE_SCALE 8 @@ -34,6 +35,7 @@ struct pie_params { u32 beta; /* and are used for shift relative to 1 */ bool ecn; /* true if ecn is enabled */ bool bytemode; /* to scale drop early prob based on pkt size */ + u8 dq_rate_estimator; /* to calculate delay using Little's law */ }; /* variables used */ @@ -77,11 +79,34 @@ static void pie_params_init(struct pie_params *params) params->target = PSCHED_NS2TICKS(15 * NSEC_PER_MSEC); /* 15 ms */ params->ecn = false; params->bytemode = false; + params->dq_rate_estimator = false; +} + +/* private skb vars */ +struct pie_skb_cb { + psched_time_t enqueue_time; +}; + +static struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb) +{ + qdisc_cb_private_validate(skb, sizeof(struct pie_skb_cb)); + return (struct pie_skb_cb *)qdisc_skb_cb(skb)->data; +} + +static psched_time_t pie_get_enqueue_time(const struct sk_buff *skb) +{ + return get_pie_cb(skb)->enqueue_time; +} + +static void pie_set_enqueue_time(struct sk_buff *skb) +{ + get_pie_cb(skb)->enqueue_time = psched_get_time(); } static void pie_vars_init(struct pie_vars *vars) { vars->dq_count = DQCOUNT_INVALID; + vars->dq_tstamp = DTIME_INVALID; vars->accu_prob = 0; vars->avg_dq_rate = 0; /* default of 150 ms in pschedtime */ @@ -172,6 +197,10 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* we can enqueue the packet */ if (enqueue) { + /* Set enqueue time only when dq_rate_estimator is disabled. */ + if (!q->params.dq_rate_estimator) + pie_set_enqueue_time(skb); + q->stats.packets_in++; if (qdisc_qlen(sch) > q->stats.maxq) q->stats.maxq = qdisc_qlen(sch); @@ -194,6 +223,7 @@ static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = { [TCA_PIE_BETA] = {.type = NLA_U32}, [TCA_PIE_ECN] = {.type = NLA_U32}, [TCA_PIE_BYTEMODE] = {.type = NLA_U32}, + [TCA_PIE_DQ_RATE_ESTIMATOR] = {.type = NLA_U32}, }; static int pie_change(struct Qdisc *sch, struct nlattr *opt, @@ -247,6 +277,10 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_PIE_BYTEMODE]) q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]); + if (tb[TCA_PIE_DQ_RATE_ESTIMATOR]) + q->params.dq_rate_estimator = + nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]); + /* Drop excess packets if new limit is lower */ qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { @@ -266,6 +300,28 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb) { struct pie_sched_data *q = qdisc_priv(sch); int qlen = sch->qstats.backlog; /* current queue size in bytes */ + psched_time_t now = psched_get_time(); + u32 dtime = 0; + + /* If dq_rate_estimator is disabled, calculate qdelay using the + * packet timestamp. + */ + if (!q->params.dq_rate_estimator) { + q->vars.qdelay = now - pie_get_enqueue_time(skb); + + if (q->vars.dq_tstamp != DTIME_INVALID) + dtime = now - q->vars.dq_tstamp; + + q->vars.dq_tstamp = now; + + if (qlen == 0) + q->vars.qdelay = 0; + + if (dtime == 0) + return; + + goto burst_allowance_reduction; + } /* If current queue is about 10 packets or more and dq_count is unset * we have enough packets to calculate the drain rate. Save @@ -289,10 +345,10 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb) q->vars.dq_count += skb->len; if (q->vars.dq_count >= QUEUE_THRESHOLD) { - psched_time_t now = psched_get_time(); - u32 dtime = now - q->vars.dq_tstamp; u32 count = q->vars.dq_count << PIE_SCALE; + dtime = now - q->vars.dq_tstamp; + if (dtime == 0) return; @@ -317,14 +373,19 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb) q->vars.dq_tstamp = psched_get_time(); } - if (q->vars.burst_time > 0) { - if (q->vars.burst_time > dtime) - q->vars.burst_time -= dtime; - else - q->vars.burst_time = 0; - } + goto burst_allowance_reduction; } } + + return; + +burst_allowance_reduction: + if (q->vars.burst_time > 0) { + if (q->vars.burst_time > dtime) + q->vars.burst_time -= dtime; + else + q->vars.burst_time = 0; + } } static void calculate_probability(struct Qdisc *sch) @@ -332,19 +393,25 @@ static void calculate_probability(struct Qdisc *sch) struct pie_sched_data *q = qdisc_priv(sch); u32 qlen = sch->qstats.backlog; /* queue size in bytes */ psched_time_t qdelay = 0; /* in pschedtime */ - psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */ + psched_time_t qdelay_old = 0; /* in pschedtime */ s64 delta = 0; /* determines the change in probability */ u64 oldprob; u64 alpha, beta; u32 power; bool update_prob = true; - q->vars.qdelay_old = q->vars.qdelay; + if (q->params.dq_rate_estimator) { + qdelay_old = q->vars.qdelay; + q->vars.qdelay_old = q->vars.qdelay; - if (q->vars.avg_dq_rate > 0) - qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate; - else - qdelay = 0; + if (q->vars.avg_dq_rate > 0) + qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate; + else + qdelay = 0; + } else { + qdelay = q->vars.qdelay; + qdelay_old = q->vars.qdelay_old; + } /* If qdelay is zero and qlen is not, it means qlen is very small, less * than dequeue_rate, so we do not update probabilty in this round @@ -430,14 +497,18 @@ static void calculate_probability(struct Qdisc *sch) /* We restart the measurement cycle if the following conditions are met * 1. If the delay has been low for 2 consecutive Tupdate periods * 2. Calculated drop probability is zero - * 3. We have atleast one estimate for the avg_dq_rate ie., - * is a non-zero value + * 3. If average dq_rate_estimator is enabled, we have atleast one + * estimate for the avg_dq_rate ie., is a non-zero value */ if ((q->vars.qdelay < q->params.target / 2) && (q->vars.qdelay_old < q->params.target / 2) && q->vars.prob == 0 && - q->vars.avg_dq_rate > 0) + (!q->params.dq_rate_estimator || q->vars.avg_dq_rate > 0)) { pie_vars_init(&q->vars); + } + + if (!q->params.dq_rate_estimator) + q->vars.qdelay_old = qdelay; } static void pie_timer(struct timer_list *t) @@ -497,7 +568,9 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) || nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) || nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) || - nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode)) + nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode) || + nla_put_u32(skb, TCA_PIE_DQ_RATE_ESTIMATOR, + q->params.dq_rate_estimator)) goto nla_put_failure; return nla_nest_end(skb, opts); @@ -514,9 +587,6 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) .prob = q->vars.prob, .delay = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) / NSEC_PER_USEC, - /* unscale and return dq_rate in bytes per sec */ - .avg_dq_rate = q->vars.avg_dq_rate * - (PSCHED_TICKS_PER_SEC) >> PIE_SCALE, .packets_in = q->stats.packets_in, .overlimit = q->stats.overlimit, .maxq = q->stats.maxq, @@ -524,6 +594,14 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) .ecn_mark = q->stats.ecn_mark, }; + /* avg_dq_rate is only valid if dq_rate_estimator is enabled */ + st.dq_rate_estimating = q->params.dq_rate_estimator; + + /* unscale and return dq_rate in bytes per sec */ + if (q->params.dq_rate_estimator) + st.avg_dq_rate = q->vars.avg_dq_rate * + (PSCHED_TICKS_PER_SEC) >> PIE_SCALE; + return gnet_stats_copy_app(d, &st, sizeof(st)); } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 7cd68628c637..c609373c8661 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -922,7 +922,7 @@ static int taprio_parse_mqprio_opt(struct net_device *dev, } /* Verify priority mapping uses valid tcs */ - for (i = 0; i < TC_BITMASK + 1; i++) { + for (i = 0; i <= TC_BITMASK; i++) { if (qopt->prio_tc_map[i] >= qopt->num_tc) { NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping"); return -EINVAL; @@ -1347,6 +1347,26 @@ out: return err; } +static int taprio_mqprio_cmp(const struct net_device *dev, + const struct tc_mqprio_qopt *mqprio) +{ + int i; + + if (!mqprio || mqprio->num_tc != dev->num_tc) + return -1; + + for (i = 0; i < mqprio->num_tc; i++) + if (dev->tc_to_txq[i].count != mqprio->count[i] || + dev->tc_to_txq[i].offset != mqprio->offset[i]) + return -1; + + for (i = 0; i <= TC_BITMASK; i++) + if (dev->prio_tc_map[i] != mqprio->prio_tc_map[i]) + return -1; + + return 0; +} + static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -1398,6 +1418,10 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, admin = rcu_dereference(q->admin_sched); rcu_read_unlock(); + /* no changes - no new mqprio settings */ + if (!taprio_mqprio_cmp(dev, mqprio)) + mqprio = NULL; + if (mqprio && (oper || admin)) { NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported"); err = -ENOTSUPP; @@ -1455,7 +1479,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, mqprio->offset[i]); /* Always use supplied priority mappings */ - for (i = 0; i < TC_BITMASK + 1; i++) + for (i = 0; i <= TC_BITMASK; i++) netdev_set_prio_tc_map(dev, i, mqprio->prio_tc_map[i]); } |