summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.txt11
-rw-r--r--include/net/netns/ipv4.h2
-rw-r--r--include/net/protocol.h7
-rw-r--r--include/net/udp.h1
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/ip_input.c5
-rw-r--r--net/ipv4/protocol.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c67
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/protocol.c2
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/ipv6/udp.c3
12 files changed, 103 insertions, 14 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index eaee2c8d4c00..b1c6500e7a8d 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -856,12 +856,21 @@ ip_dynaddr - BOOLEAN
ip_early_demux - BOOLEAN
Optimize input packet processing down to one demux for
certain kinds of local sockets. Currently we only do this
- for established TCP sockets.
+ for established TCP and connected UDP sockets.
It may add an additional cost for pure routing workloads that
reduces overall throughput, in such case you should disable it.
Default: 1
+tcp_early_demux - BOOLEAN
+ Enable early demux for established TCP sockets.
+ Default: 1
+
+udp_early_demux - BOOLEAN
+ Enable early demux for connected UDP sockets. Disable this if
+ your system could experience more unconnected load.
+ Default: 1
+
icmp_echo_ignore_all - BOOLEAN
If set non-zero, then the kernel will ignore all ICMP ECHO
requests sent to it.
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index a0e89190a3e9..cd686c4fb32d 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -95,6 +95,8 @@ struct netns_ipv4 {
/* Shall we try to damage output packets if routing dev changes? */
int sysctl_ip_dynaddr;
int sysctl_ip_early_demux;
+ int sysctl_tcp_early_demux;
+ int sysctl_udp_early_demux;
int sysctl_fwmark_reflect;
int sysctl_tcp_fwmark_accept;
diff --git a/include/net/protocol.h b/include/net/protocol.h
index bf36ca34af7a..65ba335b0e7e 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -40,6 +40,7 @@
/* This is used to register protocols. */
struct net_protocol {
void (*early_demux)(struct sk_buff *skb);
+ void (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
void (*err_handler)(struct sk_buff *skb, u32 info);
unsigned int no_policy:1,
@@ -54,7 +55,7 @@ struct net_protocol {
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_protocol {
void (*early_demux)(struct sk_buff *skb);
-
+ void (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
void (*err_handler)(struct sk_buff *skb,
@@ -92,12 +93,12 @@ struct inet_protosw {
#define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */
#define INET_PROTOSW_ICSK 0x04 /* Is this an inet_connection_sock? */
-extern const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS];
+extern struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS];
extern const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS];
extern const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS];
#if IS_ENABLED(CONFIG_IPV6)
-extern const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS];
+extern struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS];
#endif
int inet_add_protocol(const struct net_protocol *prot, unsigned char num);
diff --git a/include/net/udp.h b/include/net/udp.h
index c9d8b8e848e0..3391dbd73959 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -372,4 +372,5 @@ void udp_encap_enable(void);
#if IS_ENABLED(CONFIG_IPV6)
void udpv6_encap_enable(void);
#endif
+
#endif /* _UDP_H */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6b1fc6e4278e..d1a11707a126 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1599,8 +1599,9 @@ static const struct net_protocol igmp_protocol = {
};
#endif
-static const struct net_protocol tcp_protocol = {
+static struct net_protocol tcp_protocol = {
.early_demux = tcp_v4_early_demux,
+ .early_demux_handler = tcp_v4_early_demux,
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.no_policy = 1,
@@ -1608,8 +1609,9 @@ static const struct net_protocol tcp_protocol = {
.icmp_strict_tag_validation = 1,
};
-static const struct net_protocol udp_protocol = {
+static struct net_protocol udp_protocol = {
.early_demux = udp_v4_early_demux,
+ .early_demux_handler = udp_v4_early_demux,
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
@@ -1720,6 +1722,8 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
net->ipv4.sysctl_ip_dynaddr = 0;
net->ipv4.sysctl_ip_early_demux = 1;
+ net->ipv4.sysctl_udp_early_demux = 1;
+ net->ipv4.sysctl_tcp_early_demux = 1;
#ifdef CONFIG_SYSCTL
net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
#endif
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d6feabb03516..fa2dc8f692c6 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -313,6 +313,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct net_device *dev = skb->dev;
+ void (*edemux)(struct sk_buff *skb);
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -329,8 +330,8 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
int protocol = iph->protocol;
ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot && ipprot->early_demux) {
- ipprot->early_demux(skb);
+ if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
+ edemux(skb);
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
}
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 4b7c0ec65251..32a691b7ce2c 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,7 +28,7 @@
#include <linux/spinlock.h>
#include <net/protocol.h>
-const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
+struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
EXPORT_SYMBOL(inet_offloads);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 711c3e2e17b1..6fb25693c00b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -24,6 +24,7 @@
#include <net/cipso_ipv4.h>
#include <net/inet_frag.h>
#include <net/ping.h>
+#include <net/protocol.h>
static int zero;
static int one = 1;
@@ -294,6 +295,58 @@ bad_key:
return ret;
}
+static void proc_configure_early_demux(int enabled, int protocol)
+{
+ struct net_protocol *ipprot;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_protocol *ip6prot;
+#endif
+
+ ipprot = rcu_dereference(inet_protos[protocol]);
+ if (ipprot)
+ ipprot->early_demux = enabled ? ipprot->early_demux_handler :
+ NULL;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ ip6prot = rcu_dereference(inet6_protos[protocol]);
+ if (ip6prot)
+ ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
+ NULL;
+#endif
+}
+
+static int proc_tcp_early_demux(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret = 0;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+ if (write && !ret) {
+ int enabled = init_net.ipv4.sysctl_tcp_early_demux;
+
+ proc_configure_early_demux(enabled, IPPROTO_TCP);
+ }
+
+ return ret;
+}
+
+static int proc_udp_early_demux(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret = 0;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+ if (write && !ret) {
+ int enabled = init_net.ipv4.sysctl_udp_early_demux;
+
+ proc_configure_early_demux(enabled, IPPROTO_UDP);
+ }
+
+ return ret;
+}
+
static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_timestamps",
@@ -750,6 +803,20 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "udp_early_demux",
+ .data = &init_net.ipv4.sysctl_udp_early_demux,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_udp_early_demux
+ },
+ {
+ .procname = "tcp_early_demux",
+ .data = &init_net.ipv4.sysctl_tcp_early_demux,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_tcp_early_demux
+ },
+ {
.procname = "ip_default_ttl",
.data = &init_net.ipv4.sysctl_ip_default_ttl,
.maxlen = sizeof(int),
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index aacfb4bce153..b04539dd4629 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -49,6 +49,8 @@
int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ void (*edemux)(struct sk_buff *skb);
+
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
*/
@@ -60,8 +62,8 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
const struct inet6_protocol *ipprot;
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
- if (ipprot && ipprot->early_demux)
- ipprot->early_demux(skb);
+ if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
+ edemux(skb);
}
if (!skb_valid_dst(skb))
ip6_route_input(skb);
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index e3770abe688a..b5d54d4f995c 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -26,7 +26,7 @@
#include <net/protocol.h>
#if IS_ENABLED(CONFIG_IPV6)
-const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
+struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
EXPORT_SYMBOL(inet6_protos);
int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0f08d718a002..031a8c019f7a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1925,8 +1925,9 @@ struct proto tcpv6_prot = {
.diag_destroy = tcp_abort,
};
-static const struct inet6_protocol tcpv6_protocol = {
+static struct inet6_protocol tcpv6_protocol = {
.early_demux = tcp_v6_early_demux,
+ .early_demux_handler = tcp_v6_early_demux,
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b793ed1d2a36..fd4b1c98a472 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1436,8 +1436,9 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
}
#endif
-static const struct inet6_protocol udpv6_protocol = {
+static struct inet6_protocol udpv6_protocol = {
.early_demux = udp_v6_early_demux,
+ .early_demux_handler = udp_v6_early_demux,
.handler = udpv6_rcv,
.err_handler = udpv6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,