diff options
author | David S. Miller <davem@davemloft.net> | 2018-08-01 09:52:31 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-08-01 09:52:31 -0700 |
commit | 53dd9652b5f530da2739298025c8324b6a4d0f21 (patch) | |
tree | 26082aae613a8b6b81729f9b721dc3661f7785c8 /net/ipv4 | |
parent | 83ba4645152d1177c161750e1064e3a8e7cee19b (diff) | |
parent | 9bae0451b7dcca54db376d2ea2cb8d9fc763a683 (diff) | |
download | linux-53dd9652b5f530da2739298025c8324b6a4d0f21.tar.bz2 |
Merge branch 'ipv4-Control-SKB-reprioritization-after-forwarding'
Petr Machata says:
====================
ipv4: Control SKB reprioritization after forwarding
After IPv4 packets are forwarded, the priority of the corresponding SKB
is updated according to the TOS field of IPv4 header. This overrides any
prioritization done earlier by e.g. an skbedit action or ingress-qos-map
defined at a vlan device.
Such overriding may not always be desirable. Even if the packet ends up
being routed, which implies this is an L3 network node, an administrator
may wish to preserve whatever prioritization was done earlier on in the
pipeline.
Therefore this patch set introduces a sysctl that controls this
behavior, net.ipv4.ip_forward_update_priority. It's value is 1 by
default to preserve the current behavior.
All of the above is implemented in patch #1.
Value changes prompt a new NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE
notification, so that the drivers can hook up whatever logic may depend
on this value. That is implemented in patch #2.
In patches #3 and #4, mlxsw is adapted to recognize the sysctl. On
initialization, the RGCR register that handles router configuration is
set in accordance with the sysctl. The new notification is listened to
and RGCR is reconfigured as necessary.
In patches #5 to #7, a selftest is added to verify that mlxsw reflects
the sysctl value as necessary. The test is expressed in terms of the
recently-introduced ieee_setapp support, and works by observing how DSCP
value gets rewritten depending on packet priority. For this reason, the
test is added to the subdirectory drivers/net/mlxsw. Even though it's
not particularly specific to mlxsw, it's not suitable for running on
soft devices (which don't support the ieee_setapp et.al.).
Changes from v1 to v2:
- In patch #1, init sysctl_ip_fwd_update_priority to 1 instead of true.
Changes from RFC to v1:
- Fix wrong sysctl name in ip-sysctl.txt
- Add notifications
- Add mlxsw support
- Add self test
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_forward.c | 3 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 26 |
3 files changed, 29 insertions, 1 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ee707b91d1a7..20fda8fb8ffd 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1801,6 +1801,7 @@ static __net_init int inet_init_net(struct net *net) * We set them here, in case sysctl is not compiled. */ net->ipv4.sysctl_ip_default_ttl = IPDEFTTL; + net->ipv4.sysctl_ip_fwd_update_priority = 1; net->ipv4.sysctl_ip_dynaddr = 0; net->ipv4.sysctl_ip_early_demux = 1; net->ipv4.sysctl_udp_early_demux = 1; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index b54b948b0596..32662e9e5d21 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -143,7 +143,8 @@ int ip_forward(struct sk_buff *skb) !skb_sec_path(skb)) ip_rt_send_redirect(skb); - skb->priority = rt_tos2priority(iph->tos); + if (net->ipv4.sysctl_ip_fwd_update_priority) + skb->priority = rt_tos2priority(iph->tos); return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, net, NULL, skb, skb->dev, rt->dst.dev, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5fa335fd3852..b92f422f2fa8 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -201,6 +201,23 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, return ret; } +static int ipv4_fwd_update_priority(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + struct net *net; + int ret; + + net = container_of(table->data, struct net, + ipv4.sysctl_ip_fwd_update_priority); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (write && ret == 0) + call_netevent_notifiers(NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE, + net); + + return ret; +} + static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -664,6 +681,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "ip_forward_update_priority", + .data = &init_net.ipv4.sysctl_ip_fwd_update_priority, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = ipv4_fwd_update_priority, + .extra1 = &zero, + .extra2 = &one, + }, + { .procname = "ip_nonlocal_bind", .data = &init_net.ipv4.sysctl_ip_nonlocal_bind, .maxlen = sizeof(int), |