summaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2020-06-30 12:07:51 +0200
committerThomas Gleixner <tglx@linutronix.de>2020-06-30 12:07:51 +0200
commit98817a84ff1c755c347ac633ff017a623a631fad (patch)
tree93bd22485a697d1dbba97b2829b82e03c08ae57f /net/sched
parent0e698dfa282211e414076f9dc7e83c1c288314fd (diff)
parent005c34ae4b44f085120d7f371121ec7ded677761 (diff)
downloadlinux-98817a84ff1c755c347ac633ff017a623a631fad.tar.bz2
Merge tag 'irqchip-fixes-5.8-1' of git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms into irq/urgent
Pull irqchip fixes from Marc Zyngier: - Fix atomicity of affinity update in the GIC driver - Don't sleep in atomic when waiting for a GICv4.1 RD to respond - Fix a couple of typos in user-visible messages
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig134
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c43
-rw-r--r--net/sched/act_ct.c5
-rw-r--r--net/sched/act_gate.c639
-rw-r--r--net/sched/cls_api.c282
-rw-r--r--net/sched/cls_flower.c350
-rw-r--r--net/sched/em_ipt.c2
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/sched/sch_cake.c65
-rw-r--r--net/sched/sch_choke.c9
-rw-r--r--net/sched/sch_fq.c143
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_fq_pie.c4
-rw-r--r--net/sched/sch_generic.c110
-rw-r--r--net/sched/sch_red.c9
-rw-r--r--net/sched/sch_sfq.c9
-rw-r--r--net/sched/sch_skbprio.c3
18 files changed, 1459 insertions, 354 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index bfbefb7bff9d..84badf00647e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -6,7 +6,7 @@
menuconfig NET_SCHED
bool "QoS and/or fair queueing"
select NET_SCH_FIFO
- ---help---
+ help
When the kernel has several packets to send out over a network
device, it has to decide which ones to send first, which ones to
delay, and which ones to drop. This is the job of the queueing
@@ -47,7 +47,7 @@ comment "Queueing/Scheduling"
config NET_SCH_CBQ
tristate "Class Based Queueing (CBQ)"
- ---help---
+ help
Say Y here if you want to use the Class-Based Queueing (CBQ) packet
scheduling algorithm. This algorithm classifies the waiting packets
into a tree-like hierarchy of classes; the leaves of this tree are
@@ -64,7 +64,7 @@ config NET_SCH_CBQ
config NET_SCH_HTB
tristate "Hierarchical Token Bucket (HTB)"
- ---help---
+ help
Say Y here if you want to use the Hierarchical Token Buckets (HTB)
packet scheduling algorithm. See
<http://luxik.cdi.cz/~devik/qos/htb/> for complete manual and
@@ -78,7 +78,7 @@ config NET_SCH_HTB
config NET_SCH_HFSC
tristate "Hierarchical Fair Service Curve (HFSC)"
- ---help---
+ help
Say Y here if you want to use the Hierarchical Fair Service Curve
(HFSC) packet scheduling algorithm.
@@ -88,7 +88,7 @@ config NET_SCH_HFSC
config NET_SCH_ATM
tristate "ATM Virtual Circuits (ATM)"
depends on ATM
- ---help---
+ help
Say Y here if you want to use the ATM pseudo-scheduler. This
provides a framework for invoking classifiers, which in turn
select classes of this queuing discipline. Each class maps
@@ -101,7 +101,7 @@ config NET_SCH_ATM
config NET_SCH_PRIO
tristate "Multi Band Priority Queueing (PRIO)"
- ---help---
+ help
Say Y here if you want to use an n-band priority queue packet
scheduler.
@@ -110,7 +110,7 @@ config NET_SCH_PRIO
config NET_SCH_MULTIQ
tristate "Hardware Multiqueue-aware Multi Band Queuing (MULTIQ)"
- ---help---
+ help
Say Y here if you want to use an n-band queue packet scheduler
to support devices that have multiple hardware transmit queues.
@@ -119,7 +119,7 @@ config NET_SCH_MULTIQ
config NET_SCH_RED
tristate "Random Early Detection (RED)"
- ---help---
+ help
Say Y here if you want to use the Random Early Detection (RED)
packet scheduling algorithm.
@@ -130,7 +130,7 @@ config NET_SCH_RED
config NET_SCH_SFB
tristate "Stochastic Fair Blue (SFB)"
- ---help---
+ help
Say Y here if you want to use the Stochastic Fair Blue (SFB)
packet scheduling algorithm.
@@ -141,7 +141,7 @@ config NET_SCH_SFB
config NET_SCH_SFQ
tristate "Stochastic Fairness Queueing (SFQ)"
- ---help---
+ help
Say Y here if you want to use the Stochastic Fairness Queueing (SFQ)
packet scheduling algorithm.
@@ -152,7 +152,7 @@ config NET_SCH_SFQ
config NET_SCH_TEQL
tristate "True Link Equalizer (TEQL)"
- ---help---
+ help
Say Y here if you want to use the True Link Equalizer (TLE) packet
scheduling algorithm. This queueing discipline allows the combination
of several physical devices into one virtual device.
@@ -164,7 +164,7 @@ config NET_SCH_TEQL
config NET_SCH_TBF
tristate "Token Bucket Filter (TBF)"
- ---help---
+ help
Say Y here if you want to use the Token Bucket Filter (TBF) packet
scheduling algorithm.
@@ -175,7 +175,7 @@ config NET_SCH_TBF
config NET_SCH_CBS
tristate "Credit Based Shaper (CBS)"
- ---help---
+ help
Say Y here if you want to use the Credit Based Shaper (CBS) packet
scheduling algorithm.
@@ -208,7 +208,7 @@ config NET_SCH_TAPRIO
config NET_SCH_GRED
tristate "Generic Random Early Detection (GRED)"
- ---help---
+ help
Say Y here if you want to use the Generic Random Early Detection
(GRED) packet scheduling algorithm for some of your network devices
(see the top of <file:net/sched/sch_red.c> for details and
@@ -219,7 +219,7 @@ config NET_SCH_GRED
config NET_SCH_DSMARK
tristate "Differentiated Services marker (DSMARK)"
- ---help---
+ help
Say Y if you want to schedule packets according to the
Differentiated Services architecture proposed in RFC 2475.
Technical information on this method, with pointers to associated
@@ -230,7 +230,7 @@ config NET_SCH_DSMARK
config NET_SCH_NETEM
tristate "Network emulator (NETEM)"
- ---help---
+ help
Say Y if you want to emulate network delay, loss, and packet
re-ordering. This is often useful to simulate networks when
testing applications or protocols.
@@ -384,7 +384,7 @@ config NET_SCH_INGRESS
depends on NET_CLS_ACT
select NET_INGRESS
select NET_EGRESS
- ---help---
+ help
Say Y here if you want to use classifiers for incoming and/or outgoing
packets. This qdisc doesn't do anything else besides running classifiers,
which can also have actions attached to them. In case of outgoing packets,
@@ -398,7 +398,7 @@ config NET_SCH_INGRESS
config NET_SCH_PLUG
tristate "Plug network traffic until release (PLUG)"
- ---help---
+ help
This queuing discipline allows userspace to plug/unplug a network
output queue, using the netlink interface. When it receives an
@@ -441,7 +441,7 @@ config NET_SCH_ETS
menuconfig NET_SCH_DEFAULT
bool "Allow override default queue discipline"
- ---help---
+ help
Support for selection of default queuing discipline.
Nearly all users can safely say no here, and the default
@@ -492,7 +492,7 @@ config NET_CLS
config NET_CLS_BASIC
tristate "Elementary classification (BASIC)"
select NET_CLS
- ---help---
+ help
Say Y here if you want to be able to classify packets using
only extended matches and actions.
@@ -502,7 +502,7 @@ config NET_CLS_BASIC
config NET_CLS_TCINDEX
tristate "Traffic-Control Index (TCINDEX)"
select NET_CLS
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
traffic control indices. You will want this feature if you want
to implement Differentiated Services together with DSMARK.
@@ -515,7 +515,7 @@ config NET_CLS_ROUTE4
depends on INET
select IP_ROUTE_CLASSID
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets
according to the route table entry they matched.
@@ -525,7 +525,7 @@ config NET_CLS_ROUTE4
config NET_CLS_FW
tristate "Netfilter mark (FW)"
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets
according to netfilter/firewall marks.
@@ -535,7 +535,7 @@ config NET_CLS_FW
config NET_CLS_U32
tristate "Universal 32bit comparisons w/ hashing (U32)"
select NET_CLS
- ---help---
+ help
Say Y here to be able to classify packets using a universal
32bit pieces based comparison scheme.
@@ -545,20 +545,20 @@ config NET_CLS_U32
config CLS_U32_PERF
bool "Performance counters support"
depends on NET_CLS_U32
- ---help---
+ help
Say Y here to make u32 gather additional statistics useful for
fine tuning u32 classifiers.
config CLS_U32_MARK
bool "Netfilter marks support"
depends on NET_CLS_U32
- ---help---
+ help
Say Y here to be able to use netfilter marks as u32 key.
config NET_CLS_RSVP
tristate "IPv4 Resource Reservation Protocol (RSVP)"
select NET_CLS
- ---help---
+ help
The Resource Reservation Protocol (RSVP) permits end systems to
request a minimum and maximum data flow rate for a connection; this
is important for real time data such as streaming sound or video.
@@ -572,7 +572,7 @@ config NET_CLS_RSVP
config NET_CLS_RSVP6
tristate "IPv6 Resource Reservation Protocol (RSVP6)"
select NET_CLS
- ---help---
+ help
The Resource Reservation Protocol (RSVP) permits end systems to
request a minimum and maximum data flow rate for a connection; this
is important for real time data such as streaming sound or video.
@@ -586,7 +586,7 @@ config NET_CLS_RSVP6
config NET_CLS_FLOW
tristate "Flow classifier"
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets based on
a configurable combination of packet keys. This is mostly useful
in combination with SFQ.
@@ -599,7 +599,7 @@ config NET_CLS_CGROUP
select NET_CLS
select CGROUP_NET_CLASSID
depends on CGROUPS
- ---help---
+ help
Say Y here if you want to classify packets based on the control
cgroup of their process.
@@ -609,7 +609,7 @@ config NET_CLS_CGROUP
config NET_CLS_BPF
tristate "BPF-based classifier"
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets based on
programmable BPF (JIT'ed) filters as an alternative to ematches.
@@ -619,7 +619,7 @@ config NET_CLS_BPF
config NET_CLS_FLOWER
tristate "Flower classifier"
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets based on
a configurable combination of packet keys and masks.
@@ -629,7 +629,7 @@ config NET_CLS_FLOWER
config NET_CLS_MATCHALL
tristate "Match-all classifier"
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets based on
nothing. Every packet will match.
@@ -639,7 +639,7 @@ config NET_CLS_MATCHALL
config NET_EMATCH
bool "Extended Matches"
select NET_CLS
- ---help---
+ help
Say Y here if you want to use extended matches on top of classifiers
and select the extended matches below.
@@ -653,7 +653,7 @@ config NET_EMATCH_STACK
int "Stack size"
depends on NET_EMATCH
default "32"
- ---help---
+ help
Size of the local stack variable used while evaluating the tree of
ematches. Limits the depth of the tree, i.e. the number of
encapsulated precedences. Every level requires 4 bytes of additional
@@ -662,7 +662,7 @@ config NET_EMATCH_STACK
config NET_EMATCH_CMP
tristate "Simple packet data comparison"
depends on NET_EMATCH
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
simple packet data comparisons for 8, 16, and 32bit values.
@@ -672,7 +672,7 @@ config NET_EMATCH_CMP
config NET_EMATCH_NBYTE
tristate "Multi byte comparison"
depends on NET_EMATCH
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
multiple byte comparisons mainly useful for IPv6 address comparisons.
@@ -682,7 +682,7 @@ config NET_EMATCH_NBYTE
config NET_EMATCH_U32
tristate "U32 key"
depends on NET_EMATCH
- ---help---
+ help
Say Y here if you want to be able to classify packets using
the famous u32 key in combination with logic relations.
@@ -692,7 +692,7 @@ config NET_EMATCH_U32
config NET_EMATCH_META
tristate "Metadata"
depends on NET_EMATCH
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
metadata such as load average, netfilter attributes, socket
attributes and routing decisions.
@@ -707,7 +707,7 @@ config NET_EMATCH_TEXT
select TEXTSEARCH_KMP
select TEXTSEARCH_BM
select TEXTSEARCH_FSM
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
textsearch comparisons.
@@ -717,7 +717,7 @@ config NET_EMATCH_TEXT
config NET_EMATCH_CANID
tristate "CAN Identifier"
depends on NET_EMATCH && (CAN=y || CAN=m)
- ---help---
+ help
Say Y here if you want to be able to classify CAN frames based
on CAN Identifier.
@@ -727,7 +727,7 @@ config NET_EMATCH_CANID
config NET_EMATCH_IPSET
tristate "IPset"
depends on NET_EMATCH && IP_SET
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
ipset membership.
@@ -737,7 +737,7 @@ config NET_EMATCH_IPSET
config NET_EMATCH_IPT
tristate "IPtables Matches"
depends on NET_EMATCH && NETFILTER && NETFILTER_XTABLES
- ---help---
+ help
Say Y here to be able to classify packets based on iptables
matches.
Current supported match is "policy" which allows packet classification
@@ -749,7 +749,7 @@ config NET_EMATCH_IPT
config NET_CLS_ACT
bool "Actions"
select NET_CLS
- ---help---
+ help
Say Y here if you want to use traffic control actions. Actions
get attached to classifiers and are invoked after a successful
classification. They are used to overwrite the classification
@@ -761,7 +761,7 @@ config NET_CLS_ACT
config NET_ACT_POLICE
tristate "Traffic Policing"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here if you want to do traffic policing, i.e. strict
bandwidth limiting. This action replaces the existing policing
module.
@@ -772,7 +772,7 @@ config NET_ACT_POLICE
config NET_ACT_GACT
tristate "Generic actions"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to take generic actions such as dropping and
accepting packets.
@@ -782,13 +782,13 @@ config NET_ACT_GACT
config GACT_PROB
bool "Probability support"
depends on NET_ACT_GACT
- ---help---
+ help
Say Y here to use the generic action randomly or deterministically.
config NET_ACT_MIRRED
tristate "Redirecting and Mirroring"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to allow packets to be mirrored or redirected to
other devices.
@@ -799,7 +799,7 @@ config NET_ACT_SAMPLE
tristate "Traffic Sampling"
depends on NET_CLS_ACT
select PSAMPLE
- ---help---
+ help
Say Y here to allow packet sampling tc action. The packet sample
action consists of statistically choosing packets and sampling
them using the psample module.
@@ -810,7 +810,7 @@ config NET_ACT_SAMPLE
config NET_ACT_IPT
tristate "IPtables targets"
depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
- ---help---
+ help
Say Y here to be able to invoke iptables targets after successful
classification.
@@ -820,7 +820,7 @@ config NET_ACT_IPT
config NET_ACT_NAT
tristate "Stateless NAT"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to do stateless NAT on IPv4 packets. You should use
netfilter for NAT unless you know what you are doing.
@@ -830,7 +830,7 @@ config NET_ACT_NAT
config NET_ACT_PEDIT
tristate "Packet Editing"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here if you want to mangle the content of packets.
To compile this code as a module, choose M here: the
@@ -839,7 +839,7 @@ config NET_ACT_PEDIT
config NET_ACT_SIMP
tristate "Simple Example (Debug)"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to add a simple action for demonstration purposes.
It is meant as an example and for debugging purposes. It will
print a configured policy string followed by the packet count
@@ -853,7 +853,7 @@ config NET_ACT_SIMP
config NET_ACT_SKBEDIT
tristate "SKB Editing"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to change skb priority or queue_mapping settings.
If unsure, say N.
@@ -865,7 +865,7 @@ config NET_ACT_CSUM
tristate "Checksum Updating"
depends on NET_CLS_ACT && INET
select LIBCRC32C
- ---help---
+ help
Say Y here to update some common checksum after some direct
packet alterations.
@@ -886,7 +886,7 @@ config NET_ACT_MPLS
config NET_ACT_VLAN
tristate "Vlan manipulation"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to push or pop vlan headers.
If unsure, say N.
@@ -897,7 +897,7 @@ config NET_ACT_VLAN
config NET_ACT_BPF
tristate "BPF based action"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to execute BPF code on packets. The BPF code will decide
if the packet should be dropped or not.
@@ -910,7 +910,7 @@ config NET_ACT_CONNMARK
tristate "Netfilter Connection Mark Retriever"
depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
depends on NF_CONNTRACK && NF_CONNTRACK_MARK
- ---help---
+ help
Say Y here to allow retrieving of conn mark
If unsure, say N.
@@ -938,7 +938,7 @@ config NET_ACT_CTINFO
config NET_ACT_SKBMOD
tristate "skb data modification action"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to allow modification of skb data
If unsure, say N.
@@ -950,7 +950,7 @@ config NET_ACT_IFE
tristate "Inter-FE action based on IETF ForCES InterFE LFB"
depends on NET_CLS_ACT
select NET_IFE
- ---help---
+ help
Say Y here to allow for sourcing and terminating metadata
For details refer to netdev01 paper:
"Distributing Linux Traffic Control Classifier-Action Subsystem"
@@ -962,7 +962,7 @@ config NET_ACT_IFE
config NET_ACT_TUNNEL_KEY
tristate "IP tunnel metadata manipulation"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to set/release ip tunnel metadata.
If unsure, say N.
@@ -981,6 +981,18 @@ config NET_ACT_CT
To compile this code as a module, choose M here: the
module will be called act_ct.
+config NET_ACT_GATE
+ tristate "Frame gate entry list control tc action"
+ depends on NET_CLS_ACT
+ help
+ Say Y here to allow to control the ingress flow to be passed at
+ specific time slot and be dropped at other specific time slot by
+ the gate entry list.
+
+ If unsure, say N.
+ To compile this code as a module, choose M here: the
+ module will be called act_gate.
+
config NET_IFE_SKBMARK
tristate "Support to encoding decoding skb mark on IFE action"
depends on NET_ACT_IFE
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 31c367a6cd09..66bbf9a98f9e 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o
obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o
obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
obj-$(CONFIG_NET_ACT_CT) += act_ct.o
+obj-$(CONFIG_NET_ACT_GATE) += act_gate.o
obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index df4560909157..8ac7eb0a8309 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -766,12 +766,10 @@ tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
return a->ops->dump(skb, a, bind, ref);
}
-int
-tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int
+tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a)
{
- int err = -EINVAL;
unsigned char *b = skb_tail_pointer(skb);
- struct nlattr *nest;
struct tc_cookie *cookie;
if (nla_put_string(skb, TCA_KIND, a->ops->kind))
@@ -789,6 +787,23 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
}
rcu_read_unlock();
+ return 0;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+int
+tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+ int err = -EINVAL;
+ unsigned char *b = skb_tail_pointer(skb);
+ struct nlattr *nest;
+
+ if (tcf_action_dump_terse(skb, a))
+ goto nla_put_failure;
+
if (a->hw_stats != TCA_ACT_HW_STATS_ANY &&
nla_put_bitfield32(skb, TCA_ACT_HW_STATS,
a->hw_stats, TCA_ACT_HW_STATS_ANY))
@@ -820,7 +835,7 @@ nla_put_failure:
EXPORT_SYMBOL(tcf_action_dump_1);
int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
- int bind, int ref)
+ int bind, int ref, bool terse)
{
struct tc_action *a;
int err = -EINVAL, i;
@@ -831,7 +846,8 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
nest = nla_nest_start_noflag(skb, i + 1);
if (nest == NULL)
goto nla_put_failure;
- err = tcf_action_dump_1(skb, a, bind, ref);
+ err = terse ? tcf_action_dump_terse(skb, a) :
+ tcf_action_dump_1(skb, a, bind, ref);
if (err < 0)
goto errout;
nla_nest_end(skb, nest);
@@ -876,19 +892,14 @@ static u8 tcf_action_hw_stats_get(struct nlattr *hw_stats_attr)
return hw_stats_bf.value;
}
-static const u32 tca_act_flags_allowed = TCA_ACT_FLAGS_NO_PERCPU_STATS;
-static const u32 tca_act_hw_stats_allowed = TCA_ACT_HW_STATS_ANY;
-
static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
[TCA_ACT_KIND] = { .type = NLA_STRING },
[TCA_ACT_INDEX] = { .type = NLA_U32 },
[TCA_ACT_COOKIE] = { .type = NLA_BINARY,
.len = TC_COOKIE_MAX_SIZE },
[TCA_ACT_OPTIONS] = { .type = NLA_NESTED },
- [TCA_ACT_FLAGS] = { .type = NLA_BITFIELD32,
- .validation_data = &tca_act_flags_allowed },
- [TCA_ACT_HW_STATS] = { .type = NLA_BITFIELD32,
- .validation_data = &tca_act_hw_stats_allowed },
+ [TCA_ACT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS),
+ [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY),
};
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
@@ -1138,7 +1149,7 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
if (!nest)
goto out_nlmsg_trim;
- if (tcf_action_dump(skb, actions, bind, ref) < 0)
+ if (tcf_action_dump(skb, actions, bind, ref, false) < 0)
goto out_nlmsg_trim;
nla_nest_end(skb, nest);
@@ -1454,10 +1465,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
return ret;
}
-static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
- [TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD32,
- .validation_data = &tcaa_root_flags_allowed },
+ [TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_FLAG_LARGE_DUMP_ON),
[TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 },
};
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 1a766393be62..e29f0f45d688 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -30,6 +30,7 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <uapi/linux/netfilter/nf_nat.h>
@@ -199,6 +200,9 @@ static int tcf_ct_flow_table_add_action_nat(struct net *net,
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
struct nf_conntrack_tuple target;
+ if (!(ct->status & IPS_NAT_MASK))
+ return 0;
+
nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
switch (tuple->src.l3num) {
@@ -536,6 +540,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
flow_offload_refresh(nf_ft, flow);
nf_conntrack_get(&ct->ct_general);
nf_ct_set(skb, ct, ctinfo);
+ nf_ct_acct_update(ct, dir, skb->len);
return true;
}
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
new file mode 100644
index 000000000000..9c628591f452
--- /dev/null
+++ b/net/sched/act_gate.c
@@ -0,0 +1,639 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright 2020 NXP */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <net/act_api.h>
+#include <net/netlink.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gate.h>
+
+static unsigned int gate_net_id;
+static struct tc_action_ops act_gate_ops;
+
+static ktime_t gate_get_time(struct tcf_gate *gact)
+{
+ ktime_t mono = ktime_get();
+
+ switch (gact->tk_offset) {
+ case TK_OFFS_MAX:
+ return mono;
+ default:
+ return ktime_mono_to_any(mono, gact->tk_offset);
+ }
+
+ return KTIME_MAX;
+}
+
+static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
+{
+ struct tcf_gate_params *param = &gact->param;
+ ktime_t now, base, cycle;
+ u64 n;
+
+ base = ns_to_ktime(param->tcfg_basetime);
+ now = gate_get_time(gact);
+
+ if (ktime_after(base, now)) {
+ *start = base;
+ return 0;
+ }
+
+ cycle = param->tcfg_cycletime;
+
+ /* cycle time should not be zero */
+ if (!cycle)
+ return -EFAULT;
+
+ n = div64_u64(ktime_sub_ns(now, base), cycle);
+ *start = ktime_add_ns(base, (n + 1) * cycle);
+ return 0;
+}
+
+static void gate_start_timer(struct tcf_gate *gact, ktime_t start)
+{
+ ktime_t expires;
+
+ expires = hrtimer_get_expires(&gact->hitimer);
+ if (expires == 0)
+ expires = KTIME_MAX;
+
+ start = min_t(ktime_t, start, expires);
+
+ hrtimer_start(&gact->hitimer, start, HRTIMER_MODE_ABS_SOFT);
+}
+
+static enum hrtimer_restart gate_timer_func(struct hrtimer *timer)
+{
+ struct tcf_gate *gact = container_of(timer, struct tcf_gate,
+ hitimer);
+ struct tcf_gate_params *p = &gact->param;
+ struct tcfg_gate_entry *next;
+ ktime_t close_time, now;
+
+ spin_lock(&gact->tcf_lock);
+
+ next = gact->next_entry;
+
+ /* cycle start, clear pending bit, clear total octets */
+ gact->current_gate_status = next->gate_state ? GATE_ACT_GATE_OPEN : 0;
+ gact->current_entry_octets = 0;
+ gact->current_max_octets = next->maxoctets;
+
+ gact->current_close_time = ktime_add_ns(gact->current_close_time,
+ next->interval);
+
+ close_time = gact->current_close_time;
+
+ if (list_is_last(&next->list, &p->entries))
+ next = list_first_entry(&p->entries,
+ struct tcfg_gate_entry, list);
+ else
+ next = list_next_entry(next, list);
+
+ now = gate_get_time(gact);
+
+ if (ktime_after(now, close_time)) {
+ ktime_t cycle, base;
+ u64 n;
+
+ cycle = p->tcfg_cycletime;
+ base = ns_to_ktime(p->tcfg_basetime);
+ n = div64_u64(ktime_sub_ns(now, base), cycle);
+ close_time = ktime_add_ns(base, (n + 1) * cycle);
+ }
+
+ gact->next_entry = next;
+
+ hrtimer_set_expires(&gact->hitimer, close_time);
+
+ spin_unlock(&gact->tcf_lock);
+
+ return HRTIMER_RESTART;
+}
+
+static int tcf_gate_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_gate *gact = to_gate(a);
+
+ spin_lock(&gact->tcf_lock);
+
+ tcf_lastuse_update(&gact->tcf_tm);
+ bstats_update(&gact->tcf_bstats, skb);
+
+ if (unlikely(gact->current_gate_status & GATE_ACT_PENDING)) {
+ spin_unlock(&gact->tcf_lock);
+ return gact->tcf_action;
+ }
+
+ if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN))
+ goto drop;
+
+ if (gact->current_max_octets >= 0) {
+ gact->current_entry_octets += qdisc_pkt_len(skb);
+ if (gact->current_entry_octets > gact->current_max_octets) {
+ gact->tcf_qstats.overlimits++;
+ goto drop;
+ }
+ }
+
+ spin_unlock(&gact->tcf_lock);
+
+ return gact->tcf_action;
+drop:
+ gact->tcf_qstats.drops++;
+ spin_unlock(&gact->tcf_lock);
+
+ return TC_ACT_SHOT;
+}
+
+static const struct nla_policy entry_policy[TCA_GATE_ENTRY_MAX + 1] = {
+ [TCA_GATE_ENTRY_INDEX] = { .type = NLA_U32 },
+ [TCA_GATE_ENTRY_GATE] = { .type = NLA_FLAG },
+ [TCA_GATE_ENTRY_INTERVAL] = { .type = NLA_U32 },
+ [TCA_GATE_ENTRY_IPV] = { .type = NLA_S32 },
+ [TCA_GATE_ENTRY_MAX_OCTETS] = { .type = NLA_S32 },
+};
+
+static const struct nla_policy gate_policy[TCA_GATE_MAX + 1] = {
+ [TCA_GATE_PARMS] = { .len = sizeof(struct tc_gate),
+ .type = NLA_EXACT_LEN },
+ [TCA_GATE_PRIORITY] = { .type = NLA_S32 },
+ [TCA_GATE_ENTRY_LIST] = { .type = NLA_NESTED },
+ [TCA_GATE_BASE_TIME] = { .type = NLA_U64 },
+ [TCA_GATE_CYCLE_TIME] = { .type = NLA_U64 },
+ [TCA_GATE_CYCLE_TIME_EXT] = { .type = NLA_U64 },
+ [TCA_GATE_FLAGS] = { .type = NLA_U32 },
+ [TCA_GATE_CLOCKID] = { .type = NLA_S32 },
+};
+
+static int fill_gate_entry(struct nlattr **tb, struct tcfg_gate_entry *entry,
+ struct netlink_ext_ack *extack)
+{
+ u32 interval = 0;
+
+ entry->gate_state = nla_get_flag(tb[TCA_GATE_ENTRY_GATE]);
+
+ if (tb[TCA_GATE_ENTRY_INTERVAL])
+ interval = nla_get_u32(tb[TCA_GATE_ENTRY_INTERVAL]);
+
+ if (interval == 0) {
+ NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
+ return -EINVAL;
+ }
+
+ entry->interval = interval;
+
+ if (tb[TCA_GATE_ENTRY_IPV])
+ entry->ipv = nla_get_s32(tb[TCA_GATE_ENTRY_IPV]);
+ else
+ entry->ipv = -1;
+
+ if (tb[TCA_GATE_ENTRY_MAX_OCTETS])
+ entry->maxoctets = nla_get_s32(tb[TCA_GATE_ENTRY_MAX_OCTETS]);
+ else
+ entry->maxoctets = -1;
+
+ return 0;
+}
+
+static int parse_gate_entry(struct nlattr *n, struct tcfg_gate_entry *entry,
+ int index, struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_GATE_ENTRY_MAX + 1] = { };
+ int err;
+
+ err = nla_parse_nested(tb, TCA_GATE_ENTRY_MAX, n, entry_policy, extack);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Could not parse nested entry");
+ return -EINVAL;
+ }
+
+ entry->index = index;
+
+ return fill_gate_entry(tb, entry, extack);
+}
+
+static void release_entry_list(struct list_head *entries)
+{
+ struct tcfg_gate_entry *entry, *e;
+
+ list_for_each_entry_safe(entry, e, entries, list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+}
+
+static int parse_gate_list(struct nlattr *list_attr,
+ struct tcf_gate_params *sched,
+ struct netlink_ext_ack *extack)
+{
+ struct tcfg_gate_entry *entry;
+ struct nlattr *n;
+ int err, rem;
+ int i = 0;
+
+ if (!list_attr)
+ return -EINVAL;
+
+ nla_for_each_nested(n, list_attr, rem) {
+ if (nla_type(n) != TCA_GATE_ONE_ENTRY) {
+ NL_SET_ERR_MSG(extack, "Attribute isn't type 'entry'");
+ continue;
+ }
+
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry) {
+ NL_SET_ERR_MSG(extack, "Not enough memory for entry");
+ err = -ENOMEM;
+ goto release_list;
+ }
+
+ err = parse_gate_entry(n, entry, i, extack);
+ if (err < 0) {
+ kfree(entry);
+ goto release_list;
+ }
+
+ list_add_tail(&entry->list, &sched->entries);
+ i++;
+ }
+
+ sched->num_entries = i;
+
+ return i;
+
+release_list:
+ release_entry_list(&sched->entries);
+
+ return err;
+}
+
+static int tcf_gate_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action **a,
+ int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_action_net *tn = net_generic(net, gate_net_id);
+ enum tk_offsets tk_offset = TK_OFFS_TAI;
+ struct nlattr *tb[TCA_GATE_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
+ struct tcf_gate_params *p;
+ s32 clockid = CLOCK_TAI;
+ struct tcf_gate *gact;
+ struct tc_gate *parm;
+ int ret = 0, err;
+ u64 basetime = 0;
+ u32 gflags = 0;
+ s32 prio = -1;
+ ktime_t start;
+ u32 index;
+
+ if (!nla)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_GATE_MAX, nla, gate_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_GATE_PARMS])
+ return -EINVAL;
+
+ parm = nla_data(tb[TCA_GATE_PARMS]);
+ index = parm->index;
+
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
+ if (err < 0)
+ return err;
+
+ if (err && bind)
+ return 0;
+
+ if (!err) {
+ ret = tcf_idr_create(tn, index, est, a,
+ &act_gate_ops, bind, false, 0);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+ }
+
+ ret = ACT_P_CREATED;
+ } else if (!ovr) {
+ tcf_idr_release(*a, bind);
+ return -EEXIST;
+ }
+ if (ret == ACT_P_CREATED) {
+ to_gate(*a)->param.tcfg_clockid = -1;
+ INIT_LIST_HEAD(&(to_gate(*a)->param.entries));
+ }
+
+ if (tb[TCA_GATE_PRIORITY])
+ prio = nla_get_s32(tb[TCA_GATE_PRIORITY]);
+
+ if (tb[TCA_GATE_BASE_TIME])
+ basetime = nla_get_u64(tb[TCA_GATE_BASE_TIME]);
+
+ if (tb[TCA_GATE_FLAGS])
+ gflags = nla_get_u32(tb[TCA_GATE_FLAGS]);
+
+ if (tb[TCA_GATE_CLOCKID]) {
+ clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ tk_offset = TK_OFFS_REAL;
+ break;
+ case CLOCK_MONOTONIC:
+ tk_offset = TK_OFFS_MAX;
+ break;
+ case CLOCK_BOOTTIME:
+ tk_offset = TK_OFFS_BOOT;
+ break;
+ case CLOCK_TAI:
+ tk_offset = TK_OFFS_TAI;
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+ goto release_idr;
+ }
+ }
+
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
+ gact = to_gate(*a);
+
+ spin_lock_bh(&gact->tcf_lock);
+ p = &gact->param;
+
+ if (tb[TCA_GATE_CYCLE_TIME]) {
+ p->tcfg_cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
+ if (!p->tcfg_cycletime_ext)
+ goto chain_put;
+ }
+
+ if (tb[TCA_GATE_ENTRY_LIST]) {
+ err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
+ if (err < 0)
+ goto chain_put;
+ }
+
+ if (!p->tcfg_cycletime) {
+ struct tcfg_gate_entry *entry;
+ ktime_t cycle = 0;
+
+ list_for_each_entry(entry, &p->entries, list)
+ cycle = ktime_add_ns(cycle, entry->interval);
+ p->tcfg_cycletime = cycle;
+ }
+
+ if (tb[TCA_GATE_CYCLE_TIME_EXT])
+ p->tcfg_cycletime_ext =
+ nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
+
+ p->tcfg_priority = prio;
+ p->tcfg_basetime = basetime;
+ p->tcfg_clockid = clockid;
+ p->tcfg_flags = gflags;
+
+ gact->tk_offset = tk_offset;
+ hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT);
+ gact->hitimer.function = gate_timer_func;
+
+ err = gate_get_start_time(gact, &start);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack,
+ "Internal error: failed get start time");
+ release_entry_list(&p->entries);
+ goto chain_put;
+ }
+
+ gact->current_close_time = start;
+ gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING;
+
+ gact->next_entry = list_first_entry(&p->entries,
+ struct tcfg_gate_entry, list);
+
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+
+ gate_start_timer(gact, start);
+
+ spin_unlock_bh(&gact->tcf_lock);
+
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
+ if (ret == ACT_P_CREATED)
+ tcf_idr_insert(tn, *a);
+
+ return ret;
+
+chain_put:
+ spin_unlock_bh(&gact->tcf_lock);
+
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
+}
+
+static void tcf_gate_cleanup(struct tc_action *a)
+{
+ struct tcf_gate *gact = to_gate(a);
+ struct tcf_gate_params *p;
+
+ p = &gact->param;
+ if (p->tcfg_clockid != -1)
+ hrtimer_cancel(&gact->hitimer);
+
+ release_entry_list(&p->entries);
+}
+
+static int dumping_entry(struct sk_buff *skb,
+ struct tcfg_gate_entry *entry)
+{
+ struct nlattr *item;
+
+ item = nla_nest_start_noflag(skb, TCA_GATE_ONE_ENTRY);
+ if (!item)
+ return -ENOSPC;
+
+ if (nla_put_u32(skb, TCA_GATE_ENTRY_INDEX, entry->index))
+ goto nla_put_failure;
+
+ if (entry->gate_state && nla_put_flag(skb, TCA_GATE_ENTRY_GATE))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_GATE_ENTRY_INTERVAL, entry->interval))
+ goto nla_put_failure;
+
+ if (nla_put_s32(skb, TCA_GATE_ENTRY_MAX_OCTETS, entry->maxoctets))
+ goto nla_put_failure;
+
+ if (nla_put_s32(skb, TCA_GATE_ENTRY_IPV, entry->ipv))
+ goto nla_put_failure;
+
+ return nla_nest_end(skb, item);
+
+nla_put_failure:
+ nla_nest_cancel(skb, item);
+ return -1;
+}
+
+static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_gate *gact = to_gate(a);
+ struct tc_gate opt = {
+ .index = gact->tcf_index,
+ .refcnt = refcount_read(&gact->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&gact->tcf_bindcnt) - bind,
+ };
+ struct tcfg_gate_entry *entry;
+ struct tcf_gate_params *p;
+ struct nlattr *entry_list;
+ struct tcf_t t;
+
+ spin_lock_bh(&gact->tcf_lock);
+ opt.action = gact->tcf_action;
+
+ p = &gact->param;
+
+ if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(skb, TCA_GATE_BASE_TIME,
+ p->tcfg_basetime, TCA_GATE_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME,
+ p->tcfg_cycletime, TCA_GATE_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME_EXT,
+ p->tcfg_cycletime_ext, TCA_GATE_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_s32(skb, TCA_GATE_CLOCKID, p->tcfg_clockid))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_GATE_FLAGS, p->tcfg_flags))
+ goto nla_put_failure;
+
+ if (nla_put_s32(skb, TCA_GATE_PRIORITY, p->tcfg_priority))
+ goto nla_put_failure;
+
+ entry_list = nla_nest_start_noflag(skb, TCA_GATE_ENTRY_LIST);
+ if (!entry_list)
+ goto nla_put_failure;
+
+ list_for_each_entry(entry, &p->entries, list) {
+ if (dumping_entry(skb, entry) < 0)
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(skb, entry_list);
+
+ tcf_tm_dump(&t, &gact->tcf_tm);
+ if (nla_put_64bit(skb, TCA_GATE_TM, sizeof(t), &t, TCA_GATE_PAD))
+ goto nla_put_failure;
+ spin_unlock_bh(&gact->tcf_lock);
+
+ return skb->len;
+
+nla_put_failure:
+ spin_unlock_bh(&gact->tcf_lock);
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static int tcf_gate_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_action_net *tn = net_generic(net, gate_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
+}
+
+static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u32 packets,
+ u64 lastuse, bool hw)
+{
+ struct tcf_gate *gact = to_gate(a);
+ struct tcf_t *tm = &gact->tcf_tm;
+
+ tcf_action_update_stats(a, bytes, packets, false, hw);
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
+}
+
+static int tcf_gate_search(struct net *net, struct tc_action **a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, gate_net_id);
+
+ return tcf_idr_search(tn, a, index);
+}
+
+static size_t tcf_gate_get_fill_size(const struct tc_action *act)
+{
+ return nla_total_size(sizeof(struct tc_gate));
+}
+
+static struct tc_action_ops act_gate_ops = {
+ .kind = "gate",
+ .id = TCA_ID_GATE,
+ .owner = THIS_MODULE,
+ .act = tcf_gate_act,
+ .dump = tcf_gate_dump,
+ .init = tcf_gate_init,
+ .cleanup = tcf_gate_cleanup,
+ .walk = tcf_gate_walker,
+ .stats_update = tcf_gate_stats_update,
+ .get_fill_size = tcf_gate_get_fill_size,
+ .lookup = tcf_gate_search,
+ .size = sizeof(struct tcf_gate),
+};
+
+static __net_init int gate_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, gate_net_id);
+
+ return tc_action_net_init(net, tn, &act_gate_ops);
+}
+
+static void __net_exit gate_exit_net(struct list_head *net_list)
+{
+ tc_action_net_exit(net_list, gate_net_id);
+}
+
+static struct pernet_operations gate_net_ops = {
+ .init = gate_init_net,
+ .exit_batch = gate_exit_net,
+ .id = &gate_net_id,
+ .size = sizeof(struct tc_action_net),
+};
+
+static int __init gate_init_module(void)
+{
+ return tcf_register_action(&act_gate_ops, &gate_net_ops);
+}
+
+static void __exit gate_cleanup_module(void)
+{
+ tcf_unregister_action(&act_gate_ops, &gate_net_ops);
+}
+
+module_init(gate_init_module);
+module_exit(gate_cleanup_module);
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 55bd1429678f..a00a203b2ef5 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -39,6 +39,7 @@
#include <net/tc_act/tc_skbedit.h>
#include <net/tc_act/tc_ct.h>
#include <net/tc_act/tc_mpls.h>
+#include <net/tc_act/tc_gate.h>
#include <net/flow_offload.h>
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
@@ -620,96 +621,42 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
static int tcf_block_setup(struct tcf_block *block,
struct flow_block_offload *bo);
-static void tc_indr_block_cmd(struct net_device *dev, struct tcf_block *block,
- flow_indr_block_bind_cb_t *cb, void *cb_priv,
- enum flow_block_command command, bool ingress)
-{
- struct flow_block_offload bo = {
- .command = command,
- .binder_type = ingress ?
- FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS :
- FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS,
- .net = dev_net(dev),
- .block_shared = tcf_block_non_null_shared(block),
- };
- INIT_LIST_HEAD(&bo.cb_list);
-
- if (!block)
- return;
-
- bo.block = &block->flow_block;
-
- down_write(&block->cb_lock);
- cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
-
- tcf_block_setup(block, &bo);
- up_write(&block->cb_lock);
-}
-
-static struct tcf_block *tc_dev_block(struct net_device *dev, bool ingress)
+static void tcf_block_offload_init(struct flow_block_offload *bo,
+ struct net_device *dev,
+ enum flow_block_command command,
+ enum flow_block_binder_type binder_type,
+ struct flow_block *flow_block,
+ bool shared, struct netlink_ext_ack *extack)
{
- const struct Qdisc_class_ops *cops;
- const struct Qdisc_ops *ops;
- struct Qdisc *qdisc;
-
- if (!dev_ingress_queue(dev))
- return NULL;
-
- qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
- if (!qdisc)
- return NULL;
-
- ops = qdisc->ops;
- if (!ops)
- return NULL;
-
- if (!ingress && !strcmp("ingress", ops->id))
- return NULL;
-
- cops = ops->cl_ops;
- if (!cops)
- return NULL;
-
- if (!cops->tcf_block)
- return NULL;
-
- return cops->tcf_block(qdisc,
- ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS,
- NULL);
+ bo->net = dev_net(dev);
+ bo->command = command;
+ bo->binder_type = binder_type;
+ bo->block = flow_block;
+ bo->block_shared = shared;
+ bo->extack = extack;
+ INIT_LIST_HEAD(&bo->cb_list);
}
-static void tc_indr_block_get_and_cmd(struct net_device *dev,
- flow_indr_block_bind_cb_t *cb,
- void *cb_priv,
- enum flow_block_command command)
-{
- struct tcf_block *block;
-
- block = tc_dev_block(dev, true);
- tc_indr_block_cmd(dev, block, cb, cb_priv, command, true);
-
- block = tc_dev_block(dev, false);
- tc_indr_block_cmd(dev, block, cb, cb_priv, command, false);
-}
+static void tcf_block_unbind(struct tcf_block *block,
+ struct flow_block_offload *bo);
-static void tc_indr_block_call(struct tcf_block *block,
- struct net_device *dev,
- struct tcf_block_ext_info *ei,
- enum flow_block_command command,
- struct netlink_ext_ack *extack)
+static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
{
- struct flow_block_offload bo = {
- .command = command,
- .binder_type = ei->binder_type,
- .net = dev_net(dev),
- .block = &block->flow_block,
- .block_shared = tcf_block_shared(block),
- .extack = extack,
- };
- INIT_LIST_HEAD(&bo.cb_list);
+ struct tcf_block *block = block_cb->indr.data;
+ struct net_device *dev = block_cb->indr.dev;
+ struct netlink_ext_ack extack = {};
+ struct flow_block_offload bo;
- flow_indr_block_call(dev, &bo, command, TC_SETUP_BLOCK);
- tcf_block_setup(block, &bo);
+ tcf_block_offload_init(&bo, dev, FLOW_BLOCK_UNBIND,
+ block_cb->indr.binder_type,
+ &block->flow_block, tcf_block_shared(block),
+ &extack);
+ down_write(&block->cb_lock);
+ list_move(&block_cb->list, &bo.cb_list);
+ up_write(&block->cb_lock);
+ rtnl_lock();
+ tcf_block_unbind(block, &bo);
+ rtnl_unlock();
}
static bool tcf_block_offload_in_use(struct tcf_block *block)
@@ -726,17 +673,21 @@ static int tcf_block_offload_cmd(struct tcf_block *block,
struct flow_block_offload bo = {};
int err;
- bo.net = dev_net(dev);
- bo.command = command;
- bo.binder_type = ei->binder_type;
- bo.block = &block->flow_block;
- bo.block_shared = tcf_block_shared(block);
- bo.extack = extack;
- INIT_LIST_HEAD(&bo.cb_list);
+ tcf_block_offload_init(&bo, dev, command, ei->binder_type,
+ &block->flow_block, tcf_block_shared(block),
+ extack);
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
- if (err < 0)
+ if (dev->netdev_ops->ndo_setup_tc)
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+ else
+ err = flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, block,
+ &bo, tc_block_indr_cleanup);
+
+ if (err < 0) {
+ if (err != -EOPNOTSUPP)
+ NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
return err;
+ }
return tcf_block_setup(block, &bo);
}
@@ -749,13 +700,13 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
int err;
down_write(&block->cb_lock);
- if (!dev->netdev_ops->ndo_setup_tc)
- goto no_offload_dev_inc;
/* If tc offload feature is disabled and the block we try to bind
* to already has some offloaded filters, forbid to bind.
*/
- if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
+ if (dev->netdev_ops->ndo_setup_tc &&
+ !tc_can_offload(dev) &&
+ tcf_block_offload_in_use(block)) {
NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
err = -EOPNOTSUPP;
goto err_unlock;
@@ -767,18 +718,15 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
if (err)
goto err_unlock;
- tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
up_write(&block->cb_lock);
return 0;
no_offload_dev_inc:
- if (tcf_block_offload_in_use(block)) {
- err = -EOPNOTSUPP;
+ if (tcf_block_offload_in_use(block))
goto err_unlock;
- }
+
err = 0;
block->nooffloaddevcnt++;
- tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
err_unlock:
up_write(&block->cb_lock);
return err;
@@ -791,10 +739,6 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
int err;
down_write(&block->cb_lock);
- tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
-
- if (!dev->netdev_ops->ndo_setup_tc)
- goto no_offload_dev_dec;
err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
if (err == -EOPNOTSUPP)
goto no_offload_dev_dec;
@@ -1847,7 +1791,7 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
struct tcf_proto *tp, struct tcf_block *block,
struct Qdisc *q, u32 parent, void *fh,
u32 portid, u32 seq, u16 flags, int event,
- bool rtnl_held)
+ bool terse_dump, bool rtnl_held)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -1874,6 +1818,14 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
goto nla_put_failure;
if (!fh) {
tcm->tcm_handle = 0;
+ } else if (terse_dump) {
+ if (tp->ops->terse_dump) {
+ if (tp->ops->terse_dump(net, tp, fh, skb, tcm,
+ rtnl_held) < 0)
+ goto nla_put_failure;
+ } else {
+ goto cls_op_not_supp;
+ }
} else {
if (tp->ops->dump &&
tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
@@ -1884,6 +1836,7 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
out_nlmsg_trim:
nla_put_failure:
+cls_op_not_supp:
nlmsg_trim(skb, b);
return -1;
}
@@ -1904,7 +1857,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
n->nlmsg_seq, n->nlmsg_flags, event,
- rtnl_held) <= 0) {
+ false, rtnl_held) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1936,7 +1889,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
- rtnl_held) <= 0) {
+ false, rtnl_held) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to build del event notification");
kfree_skb(skb);
return -EINVAL;
@@ -2070,6 +2023,7 @@ replay:
err = PTR_ERR(block);
goto errout;
}
+ block->classid = parent;
chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
if (chain_index > TC_ACT_EXT_VAL_MASK) {
@@ -2496,6 +2450,7 @@ struct tcf_dump_args {
struct tcf_block *block;
struct Qdisc *q;
u32 parent;
+ bool terse_dump;
};
static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
@@ -2506,12 +2461,12 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
n, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER, true);
+ RTM_NEWTFILTER, a->terse_dump, true);
}
static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
struct sk_buff *skb, struct netlink_callback *cb,
- long index_start, long *p_index)
+ long index_start, long *p_index, bool terse)
{
struct net *net = sock_net(skb->sk);
struct tcf_block *block = chain->block;
@@ -2540,7 +2495,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER, true) <= 0)
+ RTM_NEWTFILTER, false, true) <= 0)
goto errout;
cb->args[1] = 1;
}
@@ -2556,6 +2511,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
arg.w.skip = cb->args[1] - 1;
arg.w.count = 0;
arg.w.cookie = cb->args[2];
+ arg.terse_dump = terse;
tp->ops->walk(tp, &arg.w, true);
cb->args[2] = arg.w.cookie;
cb->args[1] = arg.w.count + 1;
@@ -2569,6 +2525,10 @@ errout:
return false;
}
+static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = {
+ [TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE),
+};
+
/* called with RTNL */
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
@@ -2578,6 +2538,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
struct Qdisc *q = NULL;
struct tcf_block *block;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
+ bool terse_dump = false;
long index_start;
long index;
u32 parent;
@@ -2587,10 +2548,17 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
- NULL, cb->extack);
+ tcf_tfilter_dump_policy, cb->extack);
if (err)
return err;
+ if (tca[TCA_DUMP_FLAGS]) {
+ struct nla_bitfield32 flags =
+ nla_get_bitfield32(tca[TCA_DUMP_FLAGS]);
+
+ terse_dump = flags.value & TCA_DUMP_FLAGS_TERSE;
+ }
+
if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
if (!block)
@@ -2612,12 +2580,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
parent = tcm->tcm_parent;
- if (!parent) {
+ if (!parent)
q = dev->qdisc;
- parent = q->handle;
- } else {
+ else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
- }
if (!q)
goto out;
cops = q->ops->cl_ops;
@@ -2633,6 +2599,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
block = cops->tcf_block(q, cl, NULL);
if (!block)
goto out;
+ parent = block->classid;
if (tcf_block_shared(block))
q = NULL;
}
@@ -2649,7 +2616,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
nla_get_u32(tca[TCA_CHAIN]) != chain->index)
continue;
if (!tcf_chain_dump(chain, q, parent, skb, cb,
- index_start, &index)) {
+ index_start, &index, terse_dump)) {
tcf_chain_put(chain);
err = -EMSGSIZE;
break;
@@ -3152,7 +3119,8 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
if (nest == NULL)
goto nla_put_failure;
- if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
+ if (tcf_action_dump(skb, exts->actions, 0, 0, false)
+ < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
} else if (exts->police) {
@@ -3176,6 +3144,31 @@ nla_put_failure:
}
EXPORT_SYMBOL(tcf_exts_dump);
+int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ struct nlattr *nest;
+
+ if (!exts->action || !tcf_exts_has_actions(exts))
+ return 0;
+
+ nest = nla_nest_start_noflag(skb, exts->action);
+ if (!nest)
+ goto nla_put_failure;
+
+ if (tcf_action_dump(skb, exts->actions, 0, 0, true) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+#else
+ return 0;
+#endif
+}
+EXPORT_SYMBOL(tcf_exts_terse_dump);
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
{
@@ -3523,6 +3516,37 @@ static void tcf_sample_get_group(struct flow_action_entry *entry,
#endif
}
+static void tcf_gate_entry_destructor(void *priv)
+{
+ struct action_gate_entry *oe = priv;
+
+ kfree(oe);
+}
+
+static int tcf_gate_get_entries(struct flow_action_entry *entry,
+ const struct tc_action *act)
+{
+ entry->gate.entries = tcf_gate_get_list(act);
+
+ if (!entry->gate.entries)
+ return -EINVAL;
+
+ entry->destructor = tcf_gate_entry_destructor;
+ entry->destructor_priv = entry->gate.entries;
+
+ return 0;
+}
+
+static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
+{
+ if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY))
+ return FLOW_ACTION_HW_STATS_DONT_CARE;
+ else if (!hw_stats)
+ return FLOW_ACTION_HW_STATS_DISABLED;
+
+ return hw_stats;
+}
+
int tc_setup_flow_action(struct flow_action *flow_action,
const struct tcf_exts *exts)
{
@@ -3546,7 +3570,7 @@ int tc_setup_flow_action(struct flow_action *flow_action,
if (err)
goto err_out_locked;
- entry->hw_stats = act->hw_stats;
+ entry->hw_stats = tc_act_hw_stats(act->hw_stats);
if (is_tcf_gact_ok(act)) {
entry->id = FLOW_ACTION_ACCEPT;
@@ -3614,7 +3638,7 @@ int tc_setup_flow_action(struct flow_action *flow_action,
entry->mangle.mask = tcf_pedit_mask(act, k);
entry->mangle.val = tcf_pedit_val(act, k);
entry->mangle.offset = tcf_pedit_offset(act, k);
- entry->hw_stats = act->hw_stats;
+ entry->hw_stats = tc_act_hw_stats(act->hw_stats);
entry = &flow_action->entries[++j];
}
} else if (is_tcf_csum(act)) {
@@ -3669,6 +3693,17 @@ int tc_setup_flow_action(struct flow_action *flow_action,
} else if (is_tcf_skbedit_priority(act)) {
entry->id = FLOW_ACTION_PRIORITY;
entry->priority = tcf_skbedit_priority(act);
+ } else if (is_tcf_gate(act)) {
+ entry->id = FLOW_ACTION_GATE;
+ entry->gate.index = tcf_gate_index(act);
+ entry->gate.prio = tcf_gate_prio(act);
+ entry->gate.basetime = tcf_gate_basetime(act);
+ entry->gate.cycletime = tcf_gate_cycletime(act);
+ entry->gate.cycletimeext = tcf_gate_cycletimeext(act);
+ entry->gate.num_entries = tcf_gate_num_entries(act);
+ err = tcf_gate_get_entries(entry, act);
+ if (err)
+ goto err_out;
} else {
err = -EOPNOTSUPP;
goto err_out_locked;
@@ -3729,11 +3764,6 @@ static struct pernet_operations tcf_net_ops = {
.size = sizeof(struct tcf_net),
};
-static struct flow_indr_block_entry block_entry = {
- .cb = tc_indr_block_get_and_cmd,
- .list = LIST_HEAD_INIT(block_entry.list),
-};
-
static int __init tc_filter_init(void)
{
int err;
@@ -3746,8 +3776,6 @@ static int __init tc_filter_init(void)
if (err)
goto err_register_pernet_subsys;
- flow_indr_add_block_cb(&block_entry);
-
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 74a0febcafb8..b2da37286082 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -272,14 +272,16 @@ static struct cls_fl_filter *fl_lookup_range(struct fl_flow_mask *mask,
return NULL;
}
-static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
- struct fl_flow_key *mkey,
- struct fl_flow_key *key)
+static noinline_for_stack
+struct cls_fl_filter *fl_mask_lookup(struct fl_flow_mask *mask, struct fl_flow_key *key)
{
+ struct fl_flow_key mkey;
+
+ fl_set_masked_key(&mkey, key, mask);
if ((mask->flags & TCA_FLOWER_MASK_FLAGS_RANGE))
- return fl_lookup_range(mask, mkey, key);
+ return fl_lookup_range(mask, &mkey, key);
- return __fl_lookup(mask, mkey);
+ return __fl_lookup(mask, &mkey);
}
static u16 fl_ct_info_to_flower_map[] = {
@@ -299,7 +301,6 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
- struct fl_flow_key skb_mkey;
struct fl_flow_key skb_key;
struct fl_flow_mask *mask;
struct cls_fl_filter *f;
@@ -319,9 +320,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
ARRAY_SIZE(fl_ct_info_to_flower_map));
skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
- fl_set_masked_key(&skb_mkey, &skb_key, mask);
-
- f = fl_lookup(mask, &skb_mkey, &skb_key);
+ f = fl_mask_lookup(mask, &skb_key);
if (f && !tc_skip_sw(f->flags)) {
*res = f->res;
return tcf_exts_exec(skb, &f->exts, res);
@@ -668,6 +667,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_MPLS_BOS] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_MPLS_TC] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_MPLS_LABEL] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_MPLS_OPTS] = { .type = NLA_NESTED },
[TCA_FLOWER_KEY_TCP_FLAGS] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_TCP_FLAGS_MASK] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_IP_TOS] = { .type = NLA_U8 },
@@ -726,6 +726,15 @@ erspan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID] = { .type = NLA_U8 },
};
+static const struct nla_policy
+mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = {
+ [TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_OPT_LSE_TC] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL] = { .type = NLA_U32 },
+};
+
static void fl_set_key_val(struct nlattr **tb,
void *val, int val_type,
void *mask, int mask_type, int len)
@@ -776,14 +785,157 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
return 0;
}
+static int fl_set_key_mpls_lse(const struct nlattr *nla_lse,
+ struct flow_dissector_key_mpls *key_val,
+ struct flow_dissector_key_mpls *key_mask,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1];
+ struct flow_dissector_mpls_lse *lse_mask;
+ struct flow_dissector_mpls_lse *lse_val;
+ u8 lse_index;
+ u8 depth;
+ int err;
+
+ err = nla_parse_nested(tb, TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX, nla_lse,
+ mpls_stack_entry_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]) {
+ NL_SET_ERR_MSG(extack, "Missing MPLS option \"depth\"");
+ return -EINVAL;
+ }
+
+ depth = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]);
+
+ /* LSE depth starts at 1, for consistency with terminology used by
+ * RFC 3031 (section 3.9), where depth 0 refers to unlabeled packets.
+ */
+ if (depth < 1 || depth > FLOW_DIS_MPLS_MAX) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH],
+ "Invalid MPLS depth");
+ return -EINVAL;
+ }
+ lse_index = depth - 1;
+
+ dissector_set_mpls_lse(key_val, lse_index);
+ dissector_set_mpls_lse(key_mask, lse_index);
+
+ lse_val = &key_val->ls[lse_index];
+ lse_mask = &key_mask->ls[lse_index];
+
+ if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]) {
+ lse_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]);
+ lse_mask->mpls_ttl = MPLS_TTL_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS]) {
+ u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS]);
+
+ if (bos & ~MPLS_BOS_MASK) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS],
+ "Bottom Of Stack (BOS) must be 0 or 1");
+ return -EINVAL;
+ }
+ lse_val->mpls_bos = bos;
+ lse_mask->mpls_bos = MPLS_BOS_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC]) {
+ u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC]);
+
+ if (tc & ~MPLS_TC_MASK) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC],
+ "Traffic Class (TC) must be between 0 and 7");
+ return -EINVAL;
+ }
+ lse_val->mpls_tc = tc;
+ lse_mask->mpls_tc = MPLS_TC_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]) {
+ u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]);
+
+ if (label & ~MPLS_LABEL_MASK) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL],
+ "Label must be between 0 and 1048575");
+ return -EINVAL;
+ }
+ lse_val->mpls_label = label;
+ lse_mask->mpls_label = MPLS_LABEL_MASK;
+ }
+
+ return 0;
+}
+
+static int fl_set_key_mpls_opts(const struct nlattr *nla_mpls_opts,
+ struct flow_dissector_key_mpls *key_val,
+ struct flow_dissector_key_mpls *key_mask,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *nla_lse;
+ int rem;
+ int err;
+
+ if (!(nla_mpls_opts->nla_type & NLA_F_NESTED)) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_mpls_opts,
+ "NLA_F_NESTED is missing");
+ return -EINVAL;
+ }
+
+ nla_for_each_nested(nla_lse, nla_mpls_opts, rem) {
+ if (nla_type(nla_lse) != TCA_FLOWER_KEY_MPLS_OPTS_LSE) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_lse,
+ "Invalid MPLS option type");
+ return -EINVAL;
+ }
+
+ err = fl_set_key_mpls_lse(nla_lse, key_val, key_mask, extack);
+ if (err < 0)
+ return err;
+ }
+ if (rem) {
+ NL_SET_ERR_MSG(extack,
+ "Bytes leftover after parsing MPLS options");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int fl_set_key_mpls(struct nlattr **tb,
struct flow_dissector_key_mpls *key_val,
struct flow_dissector_key_mpls *key_mask,
struct netlink_ext_ack *extack)
{
+ struct flow_dissector_mpls_lse *lse_mask;
+ struct flow_dissector_mpls_lse *lse_val;
+
+ if (tb[TCA_FLOWER_KEY_MPLS_OPTS]) {
+ if (tb[TCA_FLOWER_KEY_MPLS_TTL] ||
+ tb[TCA_FLOWER_KEY_MPLS_BOS] ||
+ tb[TCA_FLOWER_KEY_MPLS_TC] ||
+ tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[TCA_FLOWER_KEY_MPLS_OPTS],
+ "MPLS label, Traffic Class, Bottom Of Stack and Time To Live must be encapsulated in the MPLS options attribute");
+ return -EBADMSG;
+ }
+
+ return fl_set_key_mpls_opts(tb[TCA_FLOWER_KEY_MPLS_OPTS],
+ key_val, key_mask, extack);
+ }
+
+ lse_val = &key_val->ls[0];
+ lse_mask = &key_mask->ls[0];
+
if (tb[TCA_FLOWER_KEY_MPLS_TTL]) {
- key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
- key_mask->mpls_ttl = MPLS_TTL_MASK;
+ lse_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
+ lse_mask->mpls_ttl = MPLS_TTL_MASK;
+ dissector_set_mpls_lse(key_val, 0);
+ dissector_set_mpls_lse(key_mask, 0);
}
if (tb[TCA_FLOWER_KEY_MPLS_BOS]) {
u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]);
@@ -794,8 +946,10 @@ static int fl_set_key_mpls(struct nlattr **tb,
"Bottom Of Stack (BOS) must be 0 or 1");
return -EINVAL;
}
- key_val->mpls_bos = bos;
- key_mask->mpls_bos = MPLS_BOS_MASK;
+ lse_val->mpls_bos = bos;
+ lse_mask->mpls_bos = MPLS_BOS_MASK;
+ dissector_set_mpls_lse(key_val, 0);
+ dissector_set_mpls_lse(key_mask, 0);
}
if (tb[TCA_FLOWER_KEY_MPLS_TC]) {
u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]);
@@ -806,8 +960,10 @@ static int fl_set_key_mpls(struct nlattr **tb,
"Traffic Class (TC) must be between 0 and 7");
return -EINVAL;
}
- key_val->mpls_tc = tc;
- key_mask->mpls_tc = MPLS_TC_MASK;
+ lse_val->mpls_tc = tc;
+ lse_mask->mpls_tc = MPLS_TC_MASK;
+ dissector_set_mpls_lse(key_val, 0);
+ dissector_set_mpls_lse(key_mask, 0);
}
if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]);
@@ -818,8 +974,10 @@ static int fl_set_key_mpls(struct nlattr **tb,
"Label must be between 0 and 1048575");
return -EINVAL;
}
- key_val->mpls_label = label;
- key_mask->mpls_label = MPLS_LABEL_MASK;
+ lse_val->mpls_label = label;
+ lse_mask->mpls_label = MPLS_LABEL_MASK;
+ dissector_set_mpls_lse(key_val, 0);
+ dissector_set_mpls_lse(key_mask, 0);
}
return 0;
}
@@ -2218,35 +2376,132 @@ static int fl_dump_key_port_range(struct sk_buff *skb, struct fl_flow_key *key,
return 0;
}
+static int fl_dump_key_mpls_opt_lse(struct sk_buff *skb,
+ struct flow_dissector_key_mpls *mpls_key,
+ struct flow_dissector_key_mpls *mpls_mask,
+ u8 lse_index)
+{
+ struct flow_dissector_mpls_lse *lse_mask = &mpls_mask->ls[lse_index];
+ struct flow_dissector_mpls_lse *lse_key = &mpls_key->ls[lse_index];
+ int err;
+
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH,
+ lse_index + 1);
+ if (err)
+ return err;
+
+ if (lse_mask->mpls_ttl) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL,
+ lse_key->mpls_ttl);
+ if (err)
+ return err;
+ }
+ if (lse_mask->mpls_bos) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS,
+ lse_key->mpls_bos);
+ if (err)
+ return err;
+ }
+ if (lse_mask->mpls_tc) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_TC,
+ lse_key->mpls_tc);
+ if (err)
+ return err;
+ }
+ if (lse_mask->mpls_label) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL,
+ lse_key->mpls_label);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int fl_dump_key_mpls_opts(struct sk_buff *skb,
+ struct flow_dissector_key_mpls *mpls_key,
+ struct flow_dissector_key_mpls *mpls_mask)
+{
+ struct nlattr *opts;
+ struct nlattr *lse;
+ u8 lse_index;
+ int err;
+
+ opts = nla_nest_start(skb, TCA_FLOWER_KEY_MPLS_OPTS);
+ if (!opts)
+ return -EMSGSIZE;
+
+ for (lse_index = 0; lse_index < FLOW_DIS_MPLS_MAX; lse_index++) {
+ if (!(mpls_mask->used_lses & 1 << lse_index))
+ continue;
+
+ lse = nla_nest_start(skb, TCA_FLOWER_KEY_MPLS_OPTS_LSE);
+ if (!lse) {
+ err = -EMSGSIZE;
+ goto err_opts;
+ }
+
+ err = fl_dump_key_mpls_opt_lse(skb, mpls_key, mpls_mask,
+ lse_index);
+ if (err)
+ goto err_opts_lse;
+ nla_nest_end(skb, lse);
+ }
+ nla_nest_end(skb, opts);
+
+ return 0;
+
+err_opts_lse:
+ nla_nest_cancel(skb, lse);
+err_opts:
+ nla_nest_cancel(skb, opts);
+
+ return err;
+}
+
static int fl_dump_key_mpls(struct sk_buff *skb,
struct flow_dissector_key_mpls *mpls_key,
struct flow_dissector_key_mpls *mpls_mask)
{
+ struct flow_dissector_mpls_lse *lse_mask;
+ struct flow_dissector_mpls_lse *lse_key;
int err;
- if (!memchr_inv(mpls_mask, 0, sizeof(*mpls_mask)))
+ if (!mpls_mask->used_lses)
return 0;
- if (mpls_mask->mpls_ttl) {
+
+ lse_mask = &mpls_mask->ls[0];
+ lse_key = &mpls_key->ls[0];
+
+ /* For backward compatibility, don't use the MPLS nested attributes if
+ * the rule can be expressed using the old attributes.
+ */
+ if (mpls_mask->used_lses & ~1 ||
+ (!lse_mask->mpls_ttl && !lse_mask->mpls_bos &&
+ !lse_mask->mpls_tc && !lse_mask->mpls_label))
+ return fl_dump_key_mpls_opts(skb, mpls_key, mpls_mask);
+
+ if (lse_mask->mpls_ttl) {
err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TTL,
- mpls_key->mpls_ttl);
+ lse_key->mpls_ttl);
if (err)
return err;
}
- if (mpls_mask->mpls_tc) {
+ if (lse_mask->mpls_tc) {
err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TC,
- mpls_key->mpls_tc);
+ lse_key->mpls_tc);
if (err)
return err;
}
- if (mpls_mask->mpls_label) {
+ if (lse_mask->mpls_label) {
err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_LABEL,
- mpls_key->mpls_label);
+ lse_key->mpls_label);
if (err)
return err;
}
- if (mpls_mask->mpls_bos) {
+ if (lse_mask->mpls_bos) {
err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_BOS,
- mpls_key->mpls_bos);
+ lse_key->mpls_bos);
if (err)
return err;
}
@@ -2768,6 +3023,48 @@ nla_put_failure:
return -1;
}
+static int fl_terse_dump(struct net *net, struct tcf_proto *tp, void *fh,
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
+{
+ struct cls_fl_filter *f = fh;
+ struct nlattr *nest;
+ bool skip_hw;
+
+ if (!f)
+ return skb->len;
+
+ t->tcm_handle = f->handle;
+
+ nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
+ if (!nest)
+ goto nla_put_failure;
+
+ spin_lock(&tp->lock);
+
+ skip_hw = tc_skip_hw(f->flags);
+
+ if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
+ goto nla_put_failure_locked;
+
+ spin_unlock(&tp->lock);
+
+ if (!skip_hw)
+ fl_hw_update_stats(tp, f, rtnl_held);
+
+ if (tcf_exts_terse_dump(skb, &f->exts))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ return skb->len;
+
+nla_put_failure_locked:
+ spin_unlock(&tp->lock);
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
static int fl_tmplt_dump(struct sk_buff *skb, struct net *net, void *tmplt_priv)
{
struct fl_flow_tmplt *tmplt = tmplt_priv;
@@ -2832,6 +3129,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.hw_add = fl_hw_add,
.hw_del = fl_hw_del,
.dump = fl_dump,
+ .terse_dump = fl_terse_dump,
.bind_class = fl_bind_class,
.tmplt_create = fl_tmplt_create,
.tmplt_destroy = fl_tmplt_destroy,
diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
index eecfe072c508..18755d29fd15 100644
--- a/net/sched/em_ipt.c
+++ b/net/sched/em_ipt.c
@@ -199,7 +199,7 @@ static void em_ipt_destroy(struct tcf_ematch *em)
im->match->destroy(&par);
}
module_put(im->match->me);
- kfree((void *)im);
+ kfree(im);
}
static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 0d99df1e764d..9a3449b56bd6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -32,6 +32,8 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <trace/events/qdisc.h>
+
/*
Short review.
@@ -1283,6 +1285,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
}
qdisc_hash_add(sch, false);
+ trace_qdisc_create(ops, dev, parent);
return sch;
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 1496e87cd07b..60f8ae578819 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -584,26 +584,48 @@ static bool cobalt_should_drop(struct cobalt_vars *vars,
return drop;
}
-static void cake_update_flowkeys(struct flow_keys *keys,
+static bool cake_update_flowkeys(struct flow_keys *keys,
const struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
struct nf_conntrack_tuple tuple = {};
- bool rev = !skb->_nfct;
+ bool rev = !skb->_nfct, upd = false;
+ __be32 ip;
if (tc_skb_protocol(skb) != htons(ETH_P_IP))
- return;
+ return false;
if (!nf_ct_get_tuple_skb(&tuple, skb))
- return;
+ return false;
- keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
- keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
+ ip = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
+ if (ip != keys->addrs.v4addrs.src) {
+ keys->addrs.v4addrs.src = ip;
+ upd = true;
+ }
+ ip = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
+ if (ip != keys->addrs.v4addrs.dst) {
+ keys->addrs.v4addrs.dst = ip;
+ upd = true;
+ }
if (keys->ports.ports) {
- keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all;
- keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all;
+ __be16 port;
+
+ port = rev ? tuple.dst.u.all : tuple.src.u.all;
+ if (port != keys->ports.src) {
+ keys->ports.src = port;
+ upd = true;
+ }
+ port = rev ? tuple.src.u.all : tuple.dst.u.all;
+ if (port != keys->ports.dst) {
+ port = keys->ports.dst;
+ upd = true;
+ }
}
+ return upd;
+#else
+ return false;
#endif
}
@@ -624,23 +646,36 @@ static bool cake_ddst(int flow_mode)
static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
int flow_mode, u16 flow_override, u16 host_override)
{
+ bool hash_flows = (!flow_override && !!(flow_mode & CAKE_FLOW_FLOWS));
+ bool hash_hosts = (!host_override && !!(flow_mode & CAKE_FLOW_HOSTS));
+ bool nat_enabled = !!(flow_mode & CAKE_FLOW_NAT_FLAG);
u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0;
u16 reduced_hash, srchost_idx, dsthost_idx;
struct flow_keys keys, host_keys;
+ bool use_skbhash = skb->l4_hash;
if (unlikely(flow_mode == CAKE_FLOW_NONE))
return 0;
- /* If both overrides are set we can skip packet dissection entirely */
- if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) &&
- (host_override || !(flow_mode & CAKE_FLOW_HOSTS)))
+ /* If both overrides are set, or we can use the SKB hash and nat mode is
+ * disabled, we can skip packet dissection entirely. If nat mode is
+ * enabled there's another check below after doing the conntrack lookup.
+ */
+ if ((!hash_flows || (use_skbhash && !nat_enabled)) && !hash_hosts)
goto skip_hash;
skb_flow_dissect_flow_keys(skb, &keys,
FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
- if (flow_mode & CAKE_FLOW_NAT_FLAG)
- cake_update_flowkeys(&keys, skb);
+ /* Don't use the SKB hash if we change the lookup keys from conntrack */
+ if (nat_enabled && cake_update_flowkeys(&keys, skb))
+ use_skbhash = false;
+
+ /* If we can still use the SKB hash and don't need the host hash, we can
+ * skip the rest of the hashing procedure
+ */
+ if (use_skbhash && !hash_hosts)
+ goto skip_hash;
/* flow_hash_from_keys() sorts the addresses by value, so we have
* to preserve their order in a separate data structure to treat
@@ -679,12 +714,14 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
/* This *must* be after the above switch, since as a
* side-effect it sorts the src and dst addresses.
*/
- if (flow_mode & CAKE_FLOW_FLOWS)
+ if (hash_flows && !use_skbhash)
flow_hash = flow_hash_from_keys(&keys);
skip_hash:
if (flow_override)
flow_hash = flow_override - 1;
+ else if (use_skbhash)
+ flow_hash = skb->hash;
if (host_override) {
dsthost_hash = host_override - 1;
srchost_hash = host_override - 1;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index a36974e9c601..bd618b00d319 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -131,7 +131,6 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
}
struct choke_skb_cb {
- u16 classid;
u8 keys_valid;
struct flow_keys_digest keys;
};
@@ -142,11 +141,6 @@ static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
}
-static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
-{
- choke_skb_cb(skb)->classid = classid;
-}
-
/*
* Compare flow of two packets
* Returns true only if source and destination address and port match.
@@ -323,7 +317,8 @@ static void choke_reset(struct Qdisc *sch)
sch->q.qlen = 0;
sch->qstats.backlog = 0;
- memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
+ if (q->tab)
+ memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
q->head = q->tail = 0;
red_restart(&q->vars);
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 4c060134c736..8f06a808c59a 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -66,22 +66,27 @@ static inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb)
* in linear list (head,tail), otherwise are placed in a rbtree (t_root).
*/
struct fq_flow {
+/* First cache line : used in fq_gc(), fq_enqueue(), fq_dequeue() */
struct rb_root t_root;
struct sk_buff *head; /* list of skbs for this flow : first skb */
union {
struct sk_buff *tail; /* last skb in the list */
- unsigned long age; /* jiffies when flow was emptied, for gc */
+ unsigned long age; /* (jiffies | 1UL) when flow was emptied, for gc */
};
struct rb_node fq_node; /* anchor in fq_root[] trees */
struct sock *sk;
+ u32 socket_hash; /* sk_hash */
int qlen; /* number of packets in flow queue */
+
+/* Second cache line, used in fq_dequeue() */
int credit;
- u32 socket_hash; /* sk_hash */
- struct fq_flow *next; /* next pointer in RR lists, or &detached */
+ /* 32bit hole on 64bit arches */
+
+ struct fq_flow *next; /* next pointer in RR lists */
struct rb_node rate_node; /* anchor in q->delayed tree */
u64 time_next_packet;
-};
+} ____cacheline_aligned_in_smp;
struct fq_flow_head {
struct fq_flow *first;
@@ -95,6 +100,7 @@ struct fq_sched_data {
struct rb_root delayed; /* for rate limited flows */
u64 time_next_delayed_flow;
+ u64 ktime_cache; /* copy of last ktime_get_ns() */
unsigned long unthrottle_latency_ns;
struct fq_flow internal; /* for non classified or high prio packets */
@@ -104,12 +110,13 @@ struct fq_sched_data {
u32 flow_plimit; /* max packets per flow */
unsigned long flow_max_rate; /* optional max rate per flow */
u64 ce_threshold;
+ u64 horizon; /* horizon in ns */
u32 orphan_mask; /* mask for orphaned skb */
u32 low_rate_threshold;
struct rb_root *fq_root;
u8 rate_enable;
u8 fq_trees_log;
-
+ u8 horizon_drop;
u32 flows;
u32 inactive_flows;
u32 throttled_flows;
@@ -118,6 +125,8 @@ struct fq_sched_data {
u64 stat_internal_packets;
u64 stat_throttled;
u64 stat_ce_mark;
+ u64 stat_horizon_drops;
+ u64 stat_horizon_caps;
u64 stat_flows_plimit;
u64 stat_pkts_too_long;
u64 stat_allocation_errors;
@@ -126,20 +135,25 @@ struct fq_sched_data {
struct qdisc_watchdog watchdog;
};
-/* special value to mark a detached flow (not on old/new list) */
-static struct fq_flow detached, throttled;
-
+/*
+ * f->tail and f->age share the same location.
+ * We can use the low order bit to differentiate if this location points
+ * to a sk_buff or contains a jiffies value, if we force this value to be odd.
+ * This assumes f->tail low order bit must be 0 since alignof(struct sk_buff) >= 2
+ */
static void fq_flow_set_detached(struct fq_flow *f)
{
- f->next = &detached;
- f->age = jiffies;
+ f->age = jiffies | 1UL;
}
static bool fq_flow_is_detached(const struct fq_flow *f)
{
- return f->next == &detached;
+ return !!(f->age & 1UL);
}
+/* special value to mark a throttled flow (not on old/new list) */
+static struct fq_flow throttled;
+
static bool fq_flow_is_throttled(const struct fq_flow *f)
{
return f->next == &throttled;
@@ -204,9 +218,10 @@ static void fq_gc(struct fq_sched_data *q,
struct rb_root *root,
struct sock *sk)
{
- struct fq_flow *f, *tofree[FQ_GC_MAX];
struct rb_node **p, *parent;
- int fcnt = 0;
+ void *tofree[FQ_GC_MAX];
+ struct fq_flow *f;
+ int i, fcnt = 0;
p = &root->rb_node;
parent = NULL;
@@ -229,15 +244,18 @@ static void fq_gc(struct fq_sched_data *q,
p = &parent->rb_left;
}
+ if (!fcnt)
+ return;
+
+ for (i = fcnt; i > 0; ) {
+ f = tofree[--i];
+ rb_erase(&f->fq_node, root);
+ }
q->flows -= fcnt;
q->inactive_flows -= fcnt;
q->stat_gc_flows += fcnt;
- while (fcnt) {
- struct fq_flow *f = tofree[--fcnt];
- rb_erase(&f->fq_node, root);
- kmem_cache_free(fq_flow_cachep, f);
- }
+ kmem_cache_free_bulk(fq_flow_cachep, fcnt, tofree);
}
static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
@@ -370,19 +388,17 @@ static void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow,
}
}
-/* remove one skb from head of flow queue */
-static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
+/* Remove one skb from flow queue.
+ * This skb must be the return value of prior fq_peek().
+ */
+static void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow,
+ struct sk_buff *skb)
{
- struct sk_buff *skb = fq_peek(flow);
-
- if (skb) {
- fq_erase_head(sch, flow, skb);
- skb_mark_not_on_list(skb);
- flow->qlen--;
- qdisc_qstats_backlog_dec(sch, skb);
- sch->q.qlen--;
- }
- return skb;
+ fq_erase_head(sch, flow, skb);
+ skb_mark_not_on_list(skb);
+ flow->qlen--;
+ qdisc_qstats_backlog_dec(sch, skb);
+ sch->q.qlen--;
}
static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
@@ -390,8 +406,6 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
struct rb_node **p, *parent;
struct sk_buff *head, *aux;
- fq_skb_cb(skb)->time_to_send = skb->tstamp ?: ktime_get_ns();
-
head = flow->head;
if (!head ||
fq_skb_cb(skb)->time_to_send >= fq_skb_cb(flow->tail)->time_to_send) {
@@ -419,6 +433,12 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
rb_insert_color(&skb->rbnode, &flow->t_root);
}
+static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
+ const struct fq_sched_data *q)
+{
+ return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon));
+}
+
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
@@ -428,6 +448,28 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (unlikely(sch->q.qlen >= sch->limit))
return qdisc_drop(skb, sch, to_free);
+ if (!skb->tstamp) {
+ fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns();
+ } else {
+ /* Check if packet timestamp is too far in the future.
+ * Try first if our cached value, to avoid ktime_get_ns()
+ * cost in most cases.
+ */
+ if (fq_packet_beyond_horizon(skb, q)) {
+ /* Refresh our cache and check another time */
+ q->ktime_cache = ktime_get_ns();
+ if (fq_packet_beyond_horizon(skb, q)) {
+ if (q->horizon_drop) {
+ q->stat_horizon_drops++;
+ return qdisc_drop(skb, sch, to_free);
+ }
+ q->stat_horizon_caps++;
+ skb->tstamp = q->ktime_cache + q->horizon;
+ }
+ }
+ fq_skb_cb(skb)->time_to_send = skb->tstamp;
+ }
+
f = fq_classify(skb, q);
if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) {
q->stat_flows_plimit++;
@@ -494,11 +536,13 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
if (!sch->q.qlen)
return NULL;
- skb = fq_dequeue_head(sch, &q->internal);
- if (skb)
+ skb = fq_peek(&q->internal);
+ if (unlikely(skb)) {
+ fq_dequeue_skb(sch, &q->internal, skb);
goto out;
+ }
- now = ktime_get_ns();
+ q->ktime_cache = now = ktime_get_ns();
fq_check_throttled(q, now);
begin:
head = &q->new_flows;
@@ -532,14 +576,13 @@ begin:
fq_flow_set_throttled(q, f);
goto begin;
}
+ prefetch(&skb->end);
if ((s64)(now - time_next_packet - q->ce_threshold) > 0) {
INET_ECN_set_ce(skb);
q->stat_ce_mark++;
}
- }
-
- skb = fq_dequeue_head(sch, f);
- if (!skb) {
+ fq_dequeue_skb(sch, f, skb);
+ } else {
head->first = f->next;
/* force a pass through old_flows to prevent starvation */
if ((head == &q->new_flows) && q->old_flows.first) {
@@ -550,7 +593,6 @@ begin:
}
goto begin;
}
- prefetch(&skb->end);
plen = qdisc_pkt_len(skb);
f->credit -= plen;
@@ -753,6 +795,8 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 },
+ [TCA_FQ_HORIZON] = { .type = NLA_U32 },
+ [TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 },
};
static int fq_change(struct Qdisc *sch, struct nlattr *opt,
@@ -842,7 +886,15 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_TIMER_SLACK])
q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]);
+ if (tb[TCA_FQ_HORIZON])
+ q->horizon = (u64)NSEC_PER_USEC *
+ nla_get_u32(tb[TCA_FQ_HORIZON]);
+
+ if (tb[TCA_FQ_HORIZON_DROP])
+ q->horizon_drop = nla_get_u8(tb[TCA_FQ_HORIZON_DROP]);
+
if (!err) {
+
sch_tree_unlock(sch);
err = fq_resize(sch, fq_log);
sch_tree_lock(sch);
@@ -895,6 +947,9 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */
+ q->horizon = 10ULL * NSEC_PER_SEC; /* 10 seconds */
+ q->horizon_drop = 1; /* by default, drop packets beyond horizon */
+
/* Default ce_threshold of 4294 seconds */
q->ce_threshold = (u64)NSEC_PER_USEC * ~0U;
@@ -912,6 +967,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct fq_sched_data *q = qdisc_priv(sch);
u64 ce_threshold = q->ce_threshold;
+ u64 horizon = q->horizon;
struct nlattr *opts;
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -921,6 +977,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
do_div(ce_threshold, NSEC_PER_USEC);
+ do_div(horizon, NSEC_PER_USEC);
if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
@@ -936,7 +993,9 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
q->low_rate_threshold) ||
nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) ||
- nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack))
+ nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack) ||
+ nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) ||
+ nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop))
goto nla_put_failure;
return nla_nest_end(skb, opts);
@@ -967,6 +1026,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.unthrottle_latency_ns = min_t(unsigned long,
q->unthrottle_latency_ns, ~0U);
st.ce_mark = q->stat_ce_mark;
+ st.horizon_drops = q->stat_horizon_drops;
+ st.horizon_caps = q->stat_horizon_caps;
sch_tree_unlock(sch);
return gnet_stats_copy_app(d, &st, sizeof(st));
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 968519ff36e9..436160be9c18 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -416,7 +416,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
- q->drop_batch_size = min(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
+ q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
if (tb[TCA_FQ_CODEL_MEMORY_LIMIT])
q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]));
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index a9da8776bf5b..fb760cee824e 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -297,9 +297,9 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
goto flow_error;
}
q->flows_cnt = nla_get_u32(tb[TCA_FQ_PIE_FLOWS]);
- if (!q->flows_cnt || q->flows_cnt > 65536) {
+ if (!q->flows_cnt || q->flows_cnt >= 65536) {
NL_SET_ERR_MSG_MOD(extack,
- "Number of flows must be < 65536");
+ "Number of flows must range in [1..65535]");
goto flow_error;
}
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2efd5b61acef..265a61d011df 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -464,6 +464,7 @@ void __netdev_watchdog_up(struct net_device *dev)
dev_hold(dev);
}
}
+EXPORT_SYMBOL_GPL(__netdev_watchdog_up);
static void dev_watchdog_up(struct net_device *dev)
{
@@ -794,6 +795,9 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
};
EXPORT_SYMBOL(pfifo_fast_ops);
+static struct lock_class_key qdisc_tx_busylock;
+static struct lock_class_key qdisc_running_key;
+
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
const struct Qdisc_ops *ops,
struct netlink_ext_ack *extack)
@@ -846,9 +850,17 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
}
spin_lock_init(&sch->busylock);
+ lockdep_set_class(&sch->busylock,
+ dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+
/* seqlock has the same scope of busylock, for NOLOCK qdisc */
spin_lock_init(&sch->seqlock);
+ lockdep_set_class(&sch->busylock,
+ dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+
seqcount_init(&sch->running);
+ lockdep_set_class(&sch->running,
+ dev->qdisc_running_key ?: &qdisc_running_key);
sch->ops = ops;
sch->flags = ops->static_flags;
@@ -859,12 +871,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
dev_hold(dev);
refcount_set(&sch->refcnt, 1);
- if (sch != &noop_qdisc) {
- lockdep_set_class(&sch->busylock, &dev->qdisc_tx_busylock_key);
- lockdep_set_class(&sch->seqlock, &dev->qdisc_tx_busylock_key);
- lockdep_set_class(&sch->running, &dev->qdisc_running_key);
- }
-
return sch;
errout1:
kfree(p);
@@ -891,8 +897,10 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
}
sch->parent = parentid;
- if (!ops->init || ops->init(sch, NULL, extack) == 0)
+ if (!ops->init || ops->init(sch, NULL, extack) == 0) {
+ trace_qdisc_create(ops, dev_queue->dev, parentid);
return sch;
+ }
qdisc_put(sch);
return NULL;
@@ -906,6 +914,8 @@ void qdisc_reset(struct Qdisc *qdisc)
const struct Qdisc_ops *ops = qdisc->ops;
struct sk_buff *skb, *tmp;
+ trace_qdisc_reset(qdisc);
+
if (ops->reset)
ops->reset(qdisc);
@@ -944,7 +954,6 @@ static void qdisc_free_cb(struct rcu_head *head)
static void qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
- struct sk_buff *skb, *tmp;
#ifdef CONFIG_NET_SCHED
qdisc_hash_del(qdisc);
@@ -952,23 +961,16 @@ static void qdisc_destroy(struct Qdisc *qdisc)
qdisc_put_stab(rtnl_dereference(qdisc->stab));
#endif
gen_kill_estimator(&qdisc->rate_est);
- if (ops->reset)
- ops->reset(qdisc);
+
+ qdisc_reset(qdisc);
+
if (ops->destroy)
ops->destroy(qdisc);
module_put(ops->owner);
dev_put(qdisc_dev(qdisc));
- skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
- __skb_unlink(skb, &qdisc->gso_skb);
- kfree_skb_list(skb);
- }
-
- skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
- __skb_unlink(skb, &qdisc->skb_bad_txq);
- kfree_skb_list(skb);
- }
+ trace_qdisc_destroy(qdisc);
call_rcu(&qdisc->rcu, qdisc_free_cb);
}
@@ -1037,10 +1039,9 @@ static void attach_one_default_qdisc(struct net_device *dev,
ops = &pfifo_fast_ops;
qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
- if (!qdisc) {
- netdev_info(dev, "activation failed\n");
+ if (!qdisc)
return;
- }
+
if (!netif_is_multiqueue(dev))
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
dev_queue->qdisc_sleeping = qdisc;
@@ -1065,6 +1066,18 @@ static void attach_default_qdiscs(struct net_device *dev)
qdisc->ops->attach(qdisc);
}
}
+
+ /* Detect default qdisc setup/init failed and fallback to "noqueue" */
+ if (dev->qdisc == &noop_qdisc) {
+ netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n",
+ default_qdisc_ops->id, noqueue_qdisc_ops.id);
+ dev->priv_flags |= IFF_NO_QUEUE;
+ netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
+ dev->qdisc = txq->qdisc_sleeping;
+ qdisc_refcount_inc(dev->qdisc);
+ dev->priv_flags ^= IFF_NO_QUEUE;
+ }
+
#ifdef CONFIG_NET_SCHED
if (dev->qdisc != &noop_qdisc)
qdisc_hash_add(dev->qdisc, false);
@@ -1116,6 +1129,28 @@ void dev_activate(struct net_device *dev)
}
EXPORT_SYMBOL(dev_activate);
+static void qdisc_deactivate(struct Qdisc *qdisc)
+{
+ bool nolock = qdisc->flags & TCQ_F_NOLOCK;
+
+ if (qdisc->flags & TCQ_F_BUILTIN)
+ return;
+ if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state))
+ return;
+
+ if (nolock)
+ spin_lock_bh(&qdisc->seqlock);
+ spin_lock_bh(qdisc_lock(qdisc));
+
+ set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
+
+ qdisc_reset(qdisc);
+
+ spin_unlock_bh(qdisc_lock(qdisc));
+ if (nolock)
+ spin_unlock_bh(&qdisc->seqlock);
+}
+
static void dev_deactivate_queue(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_qdisc_default)
@@ -1125,21 +1160,8 @@ static void dev_deactivate_queue(struct net_device *dev,
qdisc = rtnl_dereference(dev_queue->qdisc);
if (qdisc) {
- bool nolock = qdisc->flags & TCQ_F_NOLOCK;
-
- if (nolock)
- spin_lock_bh(&qdisc->seqlock);
- spin_lock_bh(qdisc_lock(qdisc));
-
- if (!(qdisc->flags & TCQ_F_BUILTIN))
- set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
-
+ qdisc_deactivate(qdisc);
rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
- qdisc_reset(qdisc);
-
- spin_unlock_bh(qdisc_lock(qdisc));
- if (nolock)
- spin_unlock_bh(&qdisc->seqlock);
}
}
@@ -1170,16 +1192,6 @@ static bool some_qdisc_is_busy(struct net_device *dev)
return false;
}
-static void dev_qdisc_reset(struct net_device *dev,
- struct netdev_queue *dev_queue,
- void *none)
-{
- struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
-
- if (qdisc)
- qdisc_reset(qdisc);
-}
-
/**
* dev_deactivate_many - deactivate transmissions on several devices
* @head: list of devices to deactivate
@@ -1216,12 +1228,6 @@ void dev_deactivate_many(struct list_head *head)
*/
schedule_timeout_uninterruptible(1);
}
- /* The new qdisc is assigned at this point so we can safely
- * unwind stale skb lists and qdisc statistics
- */
- netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
- if (dev_ingress_queue(dev))
- dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
}
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index c7de47c942e3..555a1b9e467f 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -48,7 +48,7 @@ struct red_sched_data {
struct Qdisc *qdisc;
};
-static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS | TC_RED_NODROP;
+#define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
static inline int red_use_ecn(struct red_sched_data *q)
{
@@ -212,8 +212,7 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
[TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
[TCA_RED_STAB] = { .len = RED_STAB_SIZE },
[TCA_RED_MAX_P] = { .type = NLA_U32 },
- [TCA_RED_FLAGS] = { .type = NLA_BITFIELD32,
- .validation_data = &red_supported_flags },
+ [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
};
static int red_change(struct Qdisc *sch, struct nlattr *opt,
@@ -248,7 +247,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
return -EINVAL;
err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
- tb[TCA_RED_FLAGS], red_supported_flags,
+ tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
&flags_bf, &userbits, extack);
if (err)
return err;
@@ -372,7 +371,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
nla_put_bitfield32(skb, TCA_RED_FLAGS,
- q->flags, red_supported_flags))
+ q->flags, TC_RED_SUPPORTED_FLAGS))
goto nla_put_failure;
return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index c787d4d46017..5a6def5e4e6d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -637,6 +637,15 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
if (ctl->divisor &&
(!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
return -EINVAL;
+
+ /* slot->allot is a short, make sure quantum is not too big. */
+ if (ctl->quantum) {
+ unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
+
+ if (scaled <= 0 || scaled > SHRT_MAX)
+ return -EINVAL;
+ }
+
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog))
return -EINVAL;
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 0fb10abf7579..7a5e4c454715 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -169,6 +169,9 @@ static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
{
struct tc_skbprio_qopt *ctl = nla_data(opt);
+ if (opt->nla_len != nla_attr_size(sizeof(*ctl)))
+ return -EINVAL;
+
sch->limit = ctl->limit;
return 0;
}