summaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c4
-rw-r--r--net/netfilter/ipset/ip_set_core.c7
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c17
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c15
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c23
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c19
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c40
-rw-r--r--net/netfilter/nf_conntrack_proto.c7
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c170
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c15
-rw-r--r--net/netfilter/nf_conntrack_standalone.c16
-rw-r--r--net/netfilter/nf_tables_api.c261
-rw-r--r--net/netfilter/nft_payload.c2
-rw-r--r--net/netfilter/nft_set_rbtree.c332
19 files changed, 562 insertions, 432 deletions
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index a8ce04a4bb72..e4fa00abde6a 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -308,8 +308,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_BITMAP_RANGE;
pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask);
- hosts = 2 << (32 - netmask - 1);
- elements = 2 << (netmask - mask_bits - 1);
+ hosts = 2U << (32 - netmask - 1);
+ elements = 2UL << (netmask - mask_bits - 1);
}
if (elements > IPSET_BITMAP_MAX_RANGE + 1)
return -IPSET_ERR_BITMAP_RANGE_SIZE;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e7ba5b6dd2b7..46ebee9400da 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1698,9 +1698,10 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
ip_set_unlock(set);
retried = true;
- } while (ret == -EAGAIN &&
- set->variant->resize &&
- (ret = set->variant->resize(set, retried)) == 0);
+ } while (ret == -ERANGE ||
+ (ret == -EAGAIN &&
+ set->variant->resize &&
+ (ret = set->variant->resize(set, retried)) == 0));
if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
return 0;
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index e30513cefd90..c9f4e3859663 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -100,11 +100,11 @@ static int
hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ip4 *h = set->data;
+ struct hash_ip4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ip4_elem e = { 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, hosts;
+ u32 ip = 0, ip_to = 0, hosts, i = 0;
int ret = 0;
if (tb[IPSET_ATTR_LINENO])
@@ -149,14 +149,14 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
- /* 64bit division is not allowed on 32bit */
- if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
- for (; ip <= ip_to;) {
+ for (; ip <= ip_to; i++) {
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ip4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 153de3457423..a22ec1a6f6ec 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -97,11 +97,11 @@ static int
hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipmark4 *h = set->data;
+ struct hash_ipmark4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipmark4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip, ip_to = 0;
+ u32 ip, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -148,13 +148,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip, ip_to, cidr);
}
- if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
- for (; ip <= ip_to; ip++) {
+ for (; ip <= ip_to; ip++, i++) {
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipmark4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 2ffbd0b78a8c..e977b5a9c48d 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -112,11 +112,11 @@ static int
hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipport4 *h = set->data;
+ struct hash_ipport4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip, ip_to = 0, p = 0, port, port_to;
+ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
bool with_ports = false;
int ret;
@@ -184,17 +184,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
- for (; p <= port_to; p++) {
+ for (; p <= port_to; p++, i++) {
e.ip = htonl(ip);
e.port = htons(p);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipport4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 334fb1ad0e86..39a01934b153 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -108,11 +108,11 @@ static int
hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipportip4 *h = set->data;
+ struct hash_ipportip4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportip4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip, ip_to = 0, p = 0, port, port_to;
+ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
bool with_ports = false;
int ret;
@@ -180,17 +180,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
- for (; p <= port_to; p++) {
+ for (; p <= port_to; p++, i++) {
e.ip = htonl(ip);
e.port = htons(p);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipportip4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 7df94f437f60..5c6de605a9fb 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -160,12 +160,12 @@ static int
hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipportnet4 *h = set->data;
+ struct hash_ipportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2;
+ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -253,9 +253,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
@@ -282,9 +279,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
for (; p <= port_to; p++) {
e.port = htons(p);
do {
+ i++;
e.ip2 = htonl(ip2);
ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr);
e.cidr = cidr - 1;
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipportnet4_data_next(&h->next,
+ &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 1422739d9aa2..ce0a9ce5a91f 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -136,11 +136,11 @@ static int
hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_net4 *h = set->data;
+ struct hash_net4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = { .cidr = HOST_MASK };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, ipn, n = 0;
+ u32 ip = 0, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -188,19 +188,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
- n++;
- } while (ipn++ < ip_to);
-
- if (n > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
do {
+ i++;
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_net4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 9810f5bf63f5..031073286236 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, ipn, n = 0;
+ u32 ip = 0, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -256,19 +256,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
- n++;
- } while (ipn++ < ip_to);
-
- if (n > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
do {
+ i++;
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netiface4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index cdfb78c6e0d3..8fbe649c9dd3 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -166,13 +166,12 @@ static int
hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netnet4 *h = set->data;
+ struct hash_netnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0;
- u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
- u64 n = 0, m = 0;
+ u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -248,19 +247,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
- n++;
- } while (ipn++ < ip_to);
- ipn = ip2_from;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
- m++;
- } while (ipn++ < ip2_to);
-
- if (n*m > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
@@ -273,7 +259,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
e.ip[0] = htonl(ip);
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
do {
+ i++;
e.ip[1] = htonl(ip2);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netnet4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 09cf72eb37f8..d1a0628df4ef 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -154,12 +154,11 @@ static int
hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netport4 *h = set->data;
+ struct hash_netport4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
- u64 n = 0;
+ u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -236,14 +235,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
- n++;
- } while (ipn++ < ip_to);
-
- if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip);
@@ -255,8 +246,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
e.ip = htonl(ip);
ip = ip_set_range_to_cidr(ip, ip_to, &cidr);
e.cidr = cidr - 1;
- for (; p <= port_to; p++) {
+ for (; p <= port_to; p++, i++) {
e.port = htons(p);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netport4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 19bcdb3141f6..005a7ce87217 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -173,17 +173,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
+static u32
+hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr)
+{
+ if (from == 0 && to == UINT_MAX) {
+ *cidr = 0;
+ return to;
+ }
+ return ip_set_range_to_cidr(from, to, cidr);
+}
+
static int
hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netportnet4 *h = set->data;
+ struct hash_netportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2, ipn;
- u64 n = 0, m = 0;
+ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
bool with_ports = false;
int ret;
@@ -285,19 +294,6 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
- n++;
- } while (ipn++ < ip_to);
- ipn = ip2_from;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
- m++;
- } while (ipn++ < ip2_to);
-
- if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
@@ -310,13 +306,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
do {
e.ip[0] = htonl(ip);
- ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
+ ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]);
for (; p <= port_to; p++) {
e.port = htons(p);
do {
+ i++;
e.ip[1] = htonl(ip2);
- ip2 = ip_set_range_to_cidr(ip2, ip2_to,
- &e.cidr[1]);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netportnet4_data_next(&h->next,
+ &e);
+ return -ERANGE;
+ }
+ ip2 = hash_netportnet4_range_to_cidr(ip2,
+ ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 99323fb12d0f..ccef340be575 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -141,6 +141,7 @@ unsigned int nf_confirm(void *priv,
struct nf_conn *ct;
bool seqadj_needed;
__be16 frag_off;
+ int start;
u8 pnum;
ct = nf_ct_get(skb, &ctinfo);
@@ -163,9 +164,11 @@ unsigned int nf_confirm(void *priv,
break;
case NFPROTO_IPV6:
pnum = ipv6_hdr(skb)->nexthdr;
- protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off);
- if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
+ start = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off);
+ if (start < 0 || (frag_off & htons(~0x7)) != 0)
return nf_conntrack_confirm(skb);
+
+ protoff = start;
break;
default:
return nf_conntrack_confirm(skb);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index d88b92a8ffca..945dd40e7077 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -27,22 +27,16 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_timeout.h>
-/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
- closely. They're more complex. --RR
-
- And so for me for SCTP :D -Kiran */
-
static const char *const sctp_conntrack_names[] = {
- "NONE",
- "CLOSED",
- "COOKIE_WAIT",
- "COOKIE_ECHOED",
- "ESTABLISHED",
- "SHUTDOWN_SENT",
- "SHUTDOWN_RECD",
- "SHUTDOWN_ACK_SENT",
- "HEARTBEAT_SENT",
- "HEARTBEAT_ACKED",
+ [SCTP_CONNTRACK_NONE] = "NONE",
+ [SCTP_CONNTRACK_CLOSED] = "CLOSED",
+ [SCTP_CONNTRACK_COOKIE_WAIT] = "COOKIE_WAIT",
+ [SCTP_CONNTRACK_COOKIE_ECHOED] = "COOKIE_ECHOED",
+ [SCTP_CONNTRACK_ESTABLISHED] = "ESTABLISHED",
+ [SCTP_CONNTRACK_SHUTDOWN_SENT] = "SHUTDOWN_SENT",
+ [SCTP_CONNTRACK_SHUTDOWN_RECD] = "SHUTDOWN_RECD",
+ [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = "SHUTDOWN_ACK_SENT",
+ [SCTP_CONNTRACK_HEARTBEAT_SENT] = "HEARTBEAT_SENT",
};
#define SECS * HZ
@@ -54,13 +48,11 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
[SCTP_CONNTRACK_CLOSED] = 10 SECS,
[SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
[SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
- [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS,
+ [SCTP_CONNTRACK_ESTABLISHED] = 210 SECS,
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
[SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
- [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS,
- [SCTP_CONNTRACK_DATA_SENT] = 30 SECS,
};
#define SCTP_FLAG_HEARTBEAT_VTAG_FAILED 1
@@ -74,8 +66,6 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
#define sHS SCTP_CONNTRACK_HEARTBEAT_SENT
-#define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED
-#define sDS SCTP_CONNTRACK_DATA_SENT
#define sIV SCTP_CONNTRACK_MAX
/*
@@ -98,10 +88,6 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
the SHUTDOWN chunk. Connection is closed.
HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow.
-HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK/DATA/SACK in the direction
- opposite to that of the HEARTBEAT/DATA chunk. Secondary connection
- is established.
-DATA_SENT - We have seen a DATA/SACK in a new flow.
*/
/* TODO
@@ -115,38 +101,36 @@ cookie echoed to closed.
*/
/* SCTP conntrack state transitions */
-static const u8 sctp_conntracks[2][12][SCTP_CONNTRACK_MAX] = {
+static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS */
-/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA, sCW},
-/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},
-/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS, sCL},
-/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA, sSA},
-/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* Can't have Stale cookie*/
-/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* 5.2.4 - Big TODO */
-/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* Can't come in orig dir */
-/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA, sCL},
-/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS},
-/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS},
-/* data/sack */ {sDS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS}
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
+/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW},
+/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
+/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},
+/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA},
+/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/
+/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */
+/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */
+/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL},
+/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
+/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
},
{
/* REPLY */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS */
-/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},/* INIT in sCL Big TODO */
-/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},
-/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL, sIV},
-/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR, sIV},
-/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA, sIV},
-/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA, sIV},
-/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},/* Can't come in reply dir */
-/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA, sIV},
-/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA, sIV},
-/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sHA},
-/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA, sHA},
-/* data/sack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA, sHA},
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
+/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* INIT in sCL Big TODO */
+/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV},
+/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV},
+/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV},
+/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV},
+/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV},
+/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */
+/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV},
+/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV},
+/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
+/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sES},
}
};
@@ -160,8 +144,8 @@ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \
- (offset) < (skb)->len && \
- ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \
+ ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))) && \
+ (sch)->length; \
(offset) += (ntohs((sch)->length) + 3) & ~3, (count)++)
/* Some validity checks to make sure the chunks are fine */
@@ -258,11 +242,6 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
pr_debug("SCTP_CID_HEARTBEAT_ACK");
i = 10;
break;
- case SCTP_CID_DATA:
- case SCTP_CID_SACK:
- pr_debug("SCTP_CID_DATA/SACK");
- i = 11;
- break;
default:
/* Other chunks like DATA or SACK do not change the state */
pr_debug("Unknown chunk type, Will stay in %s\n",
@@ -316,9 +295,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
ih->init_tag);
ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
- } else if (sch->type == SCTP_CID_HEARTBEAT ||
- sch->type == SCTP_CID_DATA ||
- sch->type == SCTP_CID_SACK) {
+ } else if (sch->type == SCTP_CID_HEARTBEAT) {
pr_debug("Setting vtag %x for secondary conntrack\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
@@ -404,19 +381,19 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
if (!sctp_new(ct, skb, sh, dataoff))
return -NF_ACCEPT;
- } else {
- /* Check the verification tag (Sec 8.5) */
- if (!test_bit(SCTP_CID_INIT, map) &&
- !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
- !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
- !test_bit(SCTP_CID_ABORT, map) &&
- !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
- !test_bit(SCTP_CID_HEARTBEAT, map) &&
- !test_bit(SCTP_CID_HEARTBEAT_ACK, map) &&
- sh->vtag != ct->proto.sctp.vtag[dir]) {
- pr_debug("Verification tag check failed\n");
- goto out;
- }
+ }
+
+ /* Check the verification tag (Sec 8.5) */
+ if (!test_bit(SCTP_CID_INIT, map) &&
+ !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
+ !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
+ !test_bit(SCTP_CID_ABORT, map) &&
+ !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
+ !test_bit(SCTP_CID_HEARTBEAT, map) &&
+ !test_bit(SCTP_CID_HEARTBEAT_ACK, map) &&
+ sh->vtag != ct->proto.sctp.vtag[dir]) {
+ pr_debug("Verification tag check failed\n");
+ goto out;
}
old_state = new_state = SCTP_CONNTRACK_NONE;
@@ -424,22 +401,29 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
/* Special cases of Verification tag check (Sec 8.5.1) */
if (sch->type == SCTP_CID_INIT) {
- /* Sec 8.5.1 (A) */
+ /* (A) vtag MUST be zero */
if (sh->vtag != 0)
goto out_unlock;
} else if (sch->type == SCTP_CID_ABORT) {
- /* Sec 8.5.1 (B) */
- if (sh->vtag != ct->proto.sctp.vtag[dir] &&
- sh->vtag != ct->proto.sctp.vtag[!dir])
+ /* (B) vtag MUST match own vtag if T flag is unset OR
+ * MUST match peer's vtag if T flag is set
+ */
+ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[dir]) ||
+ ((sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[!dir]))
goto out_unlock;
} else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
- /* Sec 8.5.1 (C) */
- if (sh->vtag != ct->proto.sctp.vtag[dir] &&
- sh->vtag != ct->proto.sctp.vtag[!dir] &&
- sch->flags & SCTP_CHUNK_FLAG_T)
+ /* (C) vtag MUST match own vtag if T flag is unset OR
+ * MUST match peer's vtag if T flag is set
+ */
+ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[dir]) ||
+ ((sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[!dir]))
goto out_unlock;
} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
- /* Sec 8.5.1 (D) */
+ /* (D) vtag must be same as init_vtag as found in INIT_ACK */
if (sh->vtag != ct->proto.sctp.vtag[dir])
goto out_unlock;
} else if (sch->type == SCTP_CID_HEARTBEAT) {
@@ -476,11 +460,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
} else if (ct->proto.sctp.flags & SCTP_FLAG_HEARTBEAT_VTAG_FAILED) {
ct->proto.sctp.flags &= ~SCTP_FLAG_HEARTBEAT_VTAG_FAILED;
}
- } else if (sch->type == SCTP_CID_DATA || sch->type == SCTP_CID_SACK) {
- if (ct->proto.sctp.vtag[dir] == 0) {
- pr_debug("Setting vtag %x for dir %d\n", sh->vtag, dir);
- ct->proto.sctp.vtag[dir] = sh->vtag;
- }
}
old_state = ct->proto.sctp.state;
@@ -518,8 +497,12 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
}
ct->proto.sctp.state = new_state;
- if (old_state != new_state)
+ if (old_state != new_state) {
nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+ if (new_state == SCTP_CONNTRACK_ESTABLISHED &&
+ !test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
+ nf_conntrack_event_cache(IPCT_ASSURED, ct);
+ }
}
spin_unlock_bh(&ct->lock);
@@ -533,14 +516,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
- if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
- dir == IP_CT_DIR_REPLY &&
- new_state == SCTP_CONNTRACK_ESTABLISHED) {
- pr_debug("Setting assured bit\n");
- set_bit(IPS_ASSURED_BIT, &ct->status);
- nf_conntrack_event_cache(IPCT_ASSURED, ct);
- }
-
return NF_ACCEPT;
out_unlock:
@@ -701,7 +676,6 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = {
[CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 },
- [CTA_TIMEOUT_SCTP_DATA_SENT] = { .type = NLA_U32 },
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 656631083177..3ac1af6f59fc 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1068,6 +1068,13 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
ct->proto.tcp.last_flags |=
IP_CT_EXP_CHALLENGE_ACK;
}
+
+ /* possible challenge ack reply to syn */
+ if (old_state == TCP_CONNTRACK_SYN_SENT &&
+ index == TCP_ACK_SET &&
+ dir == IP_CT_DIR_REPLY)
+ ct->proto.tcp.last_ack = ntohl(th->ack_seq);
+
spin_unlock_bh(&ct->lock);
nf_ct_l4proto_log_invalid(skb, ct, state,
"packet (index %d) in dir %d ignored, state %s",
@@ -1193,6 +1200,14 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
* segments we ignored. */
goto in_window;
}
+
+ /* Reset in response to a challenge-ack we let through earlier */
+ if (old_state == TCP_CONNTRACK_SYN_SENT &&
+ ct->proto.tcp.last_index == TCP_ACK_SET &&
+ ct->proto.tcp.last_dir == IP_CT_DIR_REPLY &&
+ ntohl(th->seq) == ct->proto.tcp.last_ack)
+ goto in_window;
+
break;
default:
/* Keep compilers happy. */
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 0250725e38a4..460294bd4b60 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -601,8 +601,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD,
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT,
- NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED,
- NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_DATA_SENT,
#endif
#ifdef CONFIG_NF_CT_PROTO_DCCP
NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST,
@@ -887,18 +885,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED] = {
- .procname = "nf_conntrack_sctp_timeout_heartbeat_acked",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_DATA_SENT] = {
- .procname = "nf_conntrack_sctp_timeout_data_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
#endif
#ifdef CONFIG_NF_CT_PROTO_DCCP
[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = {
@@ -1042,8 +1028,6 @@ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
XASSIGN(SHUTDOWN_RECD, sn);
XASSIGN(SHUTDOWN_ACK_SENT, sn);
XASSIGN(HEARTBEAT_SENT, sn);
- XASSIGN(HEARTBEAT_ACKED, sn);
- XASSIGN(DATA_SENT, sn);
#undef XASSIGN
#endif
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 832b881f7c17..8c09e4d12ac1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -465,8 +465,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
return 0;
}
-static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
- struct nft_set *set)
+static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+ struct nft_set *set,
+ const struct nft_set_desc *desc)
{
struct nft_trans *trans;
@@ -474,17 +475,28 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
if (trans == NULL)
return -ENOMEM;
- if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
+ if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) {
nft_trans_set_id(trans) =
ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
nft_activate_next(ctx->net, set);
}
nft_trans_set(trans) = set;
+ if (desc) {
+ nft_trans_set_update(trans) = true;
+ nft_trans_set_gc_int(trans) = desc->gc_int;
+ nft_trans_set_timeout(trans) = desc->timeout;
+ }
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
}
+static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+ struct nft_set *set)
+{
+ return __nft_trans_set_add(ctx, msg_type, set, NULL);
+}
+
static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
int err;
@@ -3780,8 +3792,7 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
static const struct nft_set_ops *
nft_select_set_ops(const struct nft_ctx *ctx,
const struct nlattr * const nla[],
- const struct nft_set_desc *desc,
- enum nft_set_policies policy)
+ const struct nft_set_desc *desc)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
const struct nft_set_ops *ops, *bops;
@@ -3810,7 +3821,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
if (!ops->estimate(desc, flags, &est))
continue;
- switch (policy) {
+ switch (desc->policy) {
case NFT_SET_POL_PERFORMANCE:
if (est.lookup < best.lookup)
break;
@@ -4045,8 +4056,10 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb,
static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
const struct nft_set *set, u16 event, u16 flags)
{
- struct nlmsghdr *nlh;
+ u64 timeout = READ_ONCE(set->timeout);
+ u32 gc_int = READ_ONCE(set->gc_int);
u32 portid = ctx->portid;
+ struct nlmsghdr *nlh;
struct nlattr *nest;
u32 seq = ctx->seq;
int i;
@@ -4082,13 +4095,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype)))
goto nla_put_failure;
- if (set->timeout &&
+ if (timeout &&
nla_put_be64(skb, NFTA_SET_TIMEOUT,
- nf_jiffies64_to_msecs(set->timeout),
+ nf_jiffies64_to_msecs(timeout),
NFTA_SET_PAD))
goto nla_put_failure;
- if (set->gc_int &&
- nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
+ if (gc_int &&
+ nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(gc_int)))
goto nla_put_failure;
if (set->policy != NFT_SET_POL_PERFORMANCE) {
@@ -4389,15 +4402,94 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
return err;
}
+static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set,
+ const struct nlattr * const *nla,
+ struct nft_expr **exprs, int *num_exprs,
+ u32 flags)
+{
+ struct nft_expr *expr;
+ int err, i;
+
+ if (nla[NFTA_SET_EXPR]) {
+ expr = nft_set_elem_expr_alloc(ctx, set, nla[NFTA_SET_EXPR]);
+ if (IS_ERR(expr)) {
+ err = PTR_ERR(expr);
+ goto err_set_expr_alloc;
+ }
+ exprs[0] = expr;
+ (*num_exprs)++;
+ } else if (nla[NFTA_SET_EXPRESSIONS]) {
+ struct nlattr *tmp;
+ int left;
+
+ if (!(flags & NFT_SET_EXPR)) {
+ err = -EINVAL;
+ goto err_set_expr_alloc;
+ }
+ i = 0;
+ nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
+ if (i == NFT_SET_EXPR_MAX) {
+ err = -E2BIG;
+ goto err_set_expr_alloc;
+ }
+ if (nla_type(tmp) != NFTA_LIST_ELEM) {
+ err = -EINVAL;
+ goto err_set_expr_alloc;
+ }
+ expr = nft_set_elem_expr_alloc(ctx, set, tmp);
+ if (IS_ERR(expr)) {
+ err = PTR_ERR(expr);
+ goto err_set_expr_alloc;
+ }
+ exprs[i++] = expr;
+ (*num_exprs)++;
+ }
+ }
+
+ return 0;
+
+err_set_expr_alloc:
+ for (i = 0; i < *num_exprs; i++)
+ nft_expr_destroy(ctx, exprs[i]);
+
+ return err;
+}
+
+static bool nft_set_is_same(const struct nft_set *set,
+ const struct nft_set_desc *desc,
+ struct nft_expr *exprs[], u32 num_exprs, u32 flags)
+{
+ int i;
+
+ if (set->ktype != desc->ktype ||
+ set->dtype != desc->dtype ||
+ set->flags != flags ||
+ set->klen != desc->klen ||
+ set->dlen != desc->dlen ||
+ set->field_count != desc->field_count ||
+ set->num_exprs != num_exprs)
+ return false;
+
+ for (i = 0; i < desc->field_count; i++) {
+ if (set->field_len[i] != desc->field_len[i])
+ return false;
+ }
+
+ for (i = 0; i < num_exprs; i++) {
+ if (set->exprs[i]->ops != exprs[i]->ops)
+ return false;
+ }
+
+ return true;
+}
+
static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- u32 ktype, dtype, flags, policy, gc_int, objtype;
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
u8 family = info->nfmsg->nfgen_family;
const struct nft_set_ops *ops;
- struct nft_expr *expr = NULL;
struct net *net = info->net;
struct nft_set_desc desc;
struct nft_table *table;
@@ -4405,10 +4497,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
struct nft_set *set;
struct nft_ctx ctx;
size_t alloc_size;
- u64 timeout;
+ int num_exprs = 0;
char *name;
int err, i;
u16 udlen;
+ u32 flags;
u64 size;
if (nla[NFTA_SET_TABLE] == NULL ||
@@ -4419,10 +4512,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
memset(&desc, 0, sizeof(desc));
- ktype = NFT_DATA_VALUE;
+ desc.ktype = NFT_DATA_VALUE;
if (nla[NFTA_SET_KEY_TYPE] != NULL) {
- ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
- if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
+ desc.ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
+ if ((desc.ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
return -EINVAL;
}
@@ -4447,17 +4540,17 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
return -EOPNOTSUPP;
}
- dtype = 0;
+ desc.dtype = 0;
if (nla[NFTA_SET_DATA_TYPE] != NULL) {
if (!(flags & NFT_SET_MAP))
return -EINVAL;
- dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
- if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
- dtype != NFT_DATA_VERDICT)
+ desc.dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
+ if ((desc.dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
+ desc.dtype != NFT_DATA_VERDICT)
return -EINVAL;
- if (dtype != NFT_DATA_VERDICT) {
+ if (desc.dtype != NFT_DATA_VERDICT) {
if (nla[NFTA_SET_DATA_LEN] == NULL)
return -EINVAL;
desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
@@ -4472,34 +4565,34 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(flags & NFT_SET_OBJECT))
return -EINVAL;
- objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
- if (objtype == NFT_OBJECT_UNSPEC ||
- objtype > NFT_OBJECT_MAX)
+ desc.objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
+ if (desc.objtype == NFT_OBJECT_UNSPEC ||
+ desc.objtype > NFT_OBJECT_MAX)
return -EOPNOTSUPP;
} else if (flags & NFT_SET_OBJECT)
return -EINVAL;
else
- objtype = NFT_OBJECT_UNSPEC;
+ desc.objtype = NFT_OBJECT_UNSPEC;
- timeout = 0;
+ desc.timeout = 0;
if (nla[NFTA_SET_TIMEOUT] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
- err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout);
+ err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
if (err)
return err;
}
- gc_int = 0;
+ desc.gc_int = 0;
if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
- gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+ desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
}
- policy = NFT_SET_POL_PERFORMANCE;
+ desc.policy = NFT_SET_POL_PERFORMANCE;
if (nla[NFTA_SET_POLICY] != NULL)
- policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+ desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
if (nla[NFTA_SET_DESC] != NULL) {
err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
@@ -4531,6 +4624,8 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
return PTR_ERR(set);
}
} else {
+ struct nft_expr *exprs[NFT_SET_EXPR_MAX] = {};
+
if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return -EEXIST;
@@ -4538,13 +4633,29 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- return 0;
+ err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags);
+ if (err < 0)
+ return err;
+
+ err = 0;
+ if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
+ err = -EEXIST;
+ }
+
+ for (i = 0; i < num_exprs; i++)
+ nft_expr_destroy(&ctx, exprs[i]);
+
+ if (err < 0)
+ return err;
+
+ return __nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set, &desc);
}
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
- ops = nft_select_set_ops(&ctx, nla, &desc, policy);
+ ops = nft_select_set_ops(&ctx, nla, &desc);
if (IS_ERR(ops))
return PTR_ERR(ops);
@@ -4584,18 +4695,18 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
set->table = table;
write_pnet(&set->net, net);
set->ops = ops;
- set->ktype = ktype;
+ set->ktype = desc.ktype;
set->klen = desc.klen;
- set->dtype = dtype;
- set->objtype = objtype;
+ set->dtype = desc.dtype;
+ set->objtype = desc.objtype;
set->dlen = desc.dlen;
set->flags = flags;
set->size = desc.size;
- set->policy = policy;
+ set->policy = desc.policy;
set->udlen = udlen;
set->udata = udata;
- set->timeout = timeout;
- set->gc_int = gc_int;
+ set->timeout = desc.timeout;
+ set->gc_int = desc.gc_int;
set->field_count = desc.field_count;
for (i = 0; i < desc.field_count; i++)
@@ -4605,43 +4716,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (err < 0)
goto err_set_init;
- if (nla[NFTA_SET_EXPR]) {
- expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]);
- if (IS_ERR(expr)) {
- err = PTR_ERR(expr);
- goto err_set_expr_alloc;
- }
- set->exprs[0] = expr;
- set->num_exprs++;
- } else if (nla[NFTA_SET_EXPRESSIONS]) {
- struct nft_expr *expr;
- struct nlattr *tmp;
- int left;
-
- if (!(flags & NFT_SET_EXPR)) {
- err = -EINVAL;
- goto err_set_expr_alloc;
- }
- i = 0;
- nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
- if (i == NFT_SET_EXPR_MAX) {
- err = -E2BIG;
- goto err_set_expr_alloc;
- }
- if (nla_type(tmp) != NFTA_LIST_ELEM) {
- err = -EINVAL;
- goto err_set_expr_alloc;
- }
- expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
- if (IS_ERR(expr)) {
- err = PTR_ERR(expr);
- goto err_set_expr_alloc;
- }
- set->exprs[i++] = expr;
- set->num_exprs++;
- }
- }
+ err = nft_set_expr_alloc(&ctx, set, nla, set->exprs, &num_exprs, flags);
+ if (err < 0)
+ goto err_set_destroy;
+ set->num_exprs = num_exprs;
set->handle = nf_tables_alloc_handle(table);
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
@@ -4655,7 +4734,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
err_set_expr_alloc:
for (i = 0; i < set->num_exprs; i++)
nft_expr_destroy(&ctx, set->exprs[i]);
-
+err_set_destroy:
ops->destroy(set);
err_set_init:
kfree(set->name);
@@ -6008,7 +6087,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return err;
} else if (set->flags & NFT_SET_TIMEOUT &&
!(flags & NFT_SET_ELEM_INTERVAL_END)) {
- timeout = set->timeout;
+ timeout = READ_ONCE(set->timeout);
}
expiration = 0;
@@ -6109,7 +6188,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err_parse_key_end;
- if (timeout != set->timeout) {
+ if (timeout != READ_ONCE(set->timeout)) {
err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
if (err < 0)
goto err_parse_key_end;
@@ -9031,14 +9110,20 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_NEWSET:
- nft_clear(net, nft_trans_set(trans));
- /* This avoids hitting -EBUSY when deleting the table
- * from the transaction.
- */
- if (nft_set_is_anonymous(nft_trans_set(trans)) &&
- !list_empty(&nft_trans_set(trans)->bindings))
- trans->ctx.table->use--;
+ if (nft_trans_set_update(trans)) {
+ struct nft_set *set = nft_trans_set(trans);
+ WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans));
+ WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans));
+ } else {
+ nft_clear(net, nft_trans_set(trans));
+ /* This avoids hitting -EBUSY when deleting the table
+ * from the transaction.
+ */
+ if (nft_set_is_anonymous(nft_trans_set(trans)) &&
+ !list_empty(&nft_trans_set(trans)->bindings))
+ trans->ctx.table->use--;
+ }
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
NFT_MSG_NEWSET, GFP_KERNEL);
nft_trans_destroy(trans);
@@ -9260,6 +9345,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSET:
+ if (nft_trans_set_update(trans)) {
+ nft_trans_destroy(trans);
+ break;
+ }
trans->ctx.table->use--;
if (nft_trans_set_bound(trans)) {
nft_trans_destroy(trans);
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 17b418a5a593..3a3c7746e88f 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -63,7 +63,7 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
return false;
if (offset + len > VLAN_ETH_HLEN + vlan_hlen)
- ethlen -= offset + len - VLAN_ETH_HLEN + vlan_hlen;
+ ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen;
memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 7325bee7d144..19ea4d3c3553 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -38,10 +38,12 @@ static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe)
return !nft_rbtree_interval_end(rbe);
}
-static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
- const struct nft_rbtree_elem *interval)
+static int nft_rbtree_cmp(const struct nft_set *set,
+ const struct nft_rbtree_elem *e1,
+ const struct nft_rbtree_elem *e2)
{
- return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
+ return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext),
+ set->klen);
}
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
@@ -52,7 +54,6 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
const struct nft_rbtree_elem *rbe, *interval = NULL;
u8 genmask = nft_genmask_cur(net);
const struct rb_node *parent;
- const void *this;
int d;
parent = rcu_dereference_raw(priv->root.rb_node);
@@ -62,12 +63,11 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- this = nft_set_ext_key(&rbe->ext);
- d = memcmp(this, key, set->klen);
+ d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
if (d < 0) {
parent = rcu_dereference_raw(parent->rb_left);
if (interval &&
- nft_rbtree_equal(set, this, interval) &&
+ !nft_rbtree_cmp(set, rbe, interval) &&
nft_rbtree_interval_end(rbe) &&
nft_rbtree_interval_start(interval))
continue;
@@ -215,154 +215,216 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
return rbe;
}
+static int nft_rbtree_gc_elem(const struct nft_set *__set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ struct nft_set *set = (struct nft_set *)__set;
+ struct rb_node *prev = rb_prev(&rbe->node);
+ struct nft_rbtree_elem *rbe_prev;
+ struct nft_set_gc_batch *gcb;
+
+ gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
+ if (!gcb)
+ return -ENOMEM;
+
+ /* search for expired end interval coming before this element. */
+ do {
+ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+ if (nft_rbtree_interval_end(rbe_prev))
+ break;
+
+ prev = rb_prev(prev);
+ } while (prev != NULL);
+
+ rb_erase(&rbe_prev->node, &priv->root);
+ rb_erase(&rbe->node, &priv->root);
+ atomic_sub(2, &set->nelems);
+
+ nft_set_gc_batch_add(gcb, rbe);
+ nft_set_gc_batch_complete(gcb);
+
+ return 0;
+}
+
+static bool nft_rbtree_update_first(const struct nft_set *set,
+ struct nft_rbtree_elem *rbe,
+ struct rb_node *first)
+{
+ struct nft_rbtree_elem *first_elem;
+
+ first_elem = rb_entry(first, struct nft_rbtree_elem, node);
+ /* this element is closest to where the new element is to be inserted:
+ * update the first element for the node list path.
+ */
+ if (nft_rbtree_cmp(set, rbe, first_elem) < 0)
+ return true;
+
+ return false;
+}
+
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
struct nft_set_ext **ext)
{
- bool overlap = false, dup_end_left = false, dup_end_right = false;
+ struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
+ struct rb_node *node, *parent, **p, *first = NULL;
struct nft_rbtree *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
- struct nft_rbtree_elem *rbe;
- struct rb_node *parent, **p;
- int d;
+ int d, err;
- /* Detect overlaps as we descend the tree. Set the flag in these cases:
- *
- * a1. _ _ __>| ?_ _ __| (insert end before existing end)
- * a2. _ _ ___| ?_ _ _>| (insert end after existing end)
- * a3. _ _ ___? >|_ _ __| (insert start before existing end)
- *
- * and clear it later on, as we eventually reach the points indicated by
- * '?' above, in the cases described below. We'll always meet these
- * later, locally, due to tree ordering, and overlaps for the intervals
- * that are the closest together are always evaluated last.
- *
- * b1. _ _ __>| !_ _ __| (insert end before existing start)
- * b2. _ _ ___| !_ _ _>| (insert end after existing start)
- * b3. _ _ ___! >|_ _ __| (insert start after existing end, as a leaf)
- * '--' no nodes falling in this range
- * b4. >|_ _ ! (insert start before existing start)
- *
- * Case a3. resolves to b3.:
- * - if the inserted start element is the leftmost, because the '0'
- * element in the tree serves as end element
- * - otherwise, if an existing end is found immediately to the left. If
- * there are existing nodes in between, we need to further descend the
- * tree before we can conclude the new start isn't causing an overlap
- *
- * or to b4., which, preceded by a3., means we already traversed one or
- * more existing intervals entirely, from the right.
- *
- * For a new, rightmost pair of elements, we'll hit cases b3. and b2.,
- * in that order.
- *
- * The flag is also cleared in two special cases:
- *
- * b5. |__ _ _!|<_ _ _ (insert start right before existing end)
- * b6. |__ _ >|!__ _ _ (insert end right after existing start)
- *
- * which always happen as last step and imply that no further
- * overlapping is possible.
- *
- * Another special case comes from the fact that start elements matching
- * an already existing start element are allowed: insertion is not
- * performed but we return -EEXIST in that case, and the error will be
- * cleared by the caller if NLM_F_EXCL is not present in the request.
- * This way, request for insertion of an exact overlap isn't reported as
- * error to userspace if not desired.
- *
- * However, if the existing start matches a pre-existing start, but the
- * end element doesn't match the corresponding pre-existing end element,
- * we need to report a partial overlap. This is a local condition that
- * can be noticed without need for a tracking flag, by checking for a
- * local duplicated end for a corresponding start, from left and right,
- * separately.
+ /* Descend the tree to search for an existing element greater than the
+ * key value to insert that is greater than the new element. This is the
+ * first element to walk the ordered elements to find possible overlap.
*/
-
parent = NULL;
p = &priv->root.rb_node;
while (*p != NULL) {
parent = *p;
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = memcmp(nft_set_ext_key(&rbe->ext),
- nft_set_ext_key(&new->ext),
- set->klen);
+ d = nft_rbtree_cmp(set, rbe, new);
+
if (d < 0) {
p = &parent->rb_left;
-
- if (nft_rbtree_interval_start(new)) {
- if (nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext) && !*p)
- overlap = false;
- } else {
- if (dup_end_left && !*p)
- return -ENOTEMPTY;
-
- overlap = nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext,
- genmask) &&
- !nft_set_elem_expired(&rbe->ext);
-
- if (overlap) {
- dup_end_right = true;
- continue;
- }
- }
} else if (d > 0) {
- p = &parent->rb_right;
+ if (!first ||
+ nft_rbtree_update_first(set, rbe, first))
+ first = &rbe->node;
- if (nft_rbtree_interval_end(new)) {
- if (dup_end_right && !*p)
- return -ENOTEMPTY;
-
- overlap = nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext,
- genmask) &&
- !nft_set_elem_expired(&rbe->ext);
-
- if (overlap) {
- dup_end_left = true;
- continue;
- }
- } else if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext)) {
- overlap = nft_rbtree_interval_end(rbe);
- }
+ p = &parent->rb_right;
} else {
- if (nft_rbtree_interval_end(rbe) &&
- nft_rbtree_interval_start(new)) {
+ if (nft_rbtree_interval_end(rbe))
p = &parent->rb_left;
-
- if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext))
- overlap = false;
- } else if (nft_rbtree_interval_start(rbe) &&
- nft_rbtree_interval_end(new)) {
+ else
p = &parent->rb_right;
+ }
+ }
+
+ if (!first)
+ first = rb_first(&priv->root);
+
+ /* Detect overlap by going through the list of valid tree nodes.
+ * Values stored in the tree are in reversed order, starting from
+ * highest to lowest value.
+ */
+ for (node = first; node != NULL; node = rb_next(node)) {
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
- if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext))
- overlap = false;
- } else if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext)) {
- *ext = &rbe->ext;
- return -EEXIST;
- } else {
- overlap = false;
- if (nft_rbtree_interval_end(rbe))
- p = &parent->rb_left;
- else
- p = &parent->rb_right;
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ continue;
+
+ /* perform garbage collection to avoid bogus overlap reports. */
+ if (nft_set_elem_expired(&rbe->ext)) {
+ err = nft_rbtree_gc_elem(set, priv, rbe);
+ if (err < 0)
+ return err;
+
+ continue;
+ }
+
+ d = nft_rbtree_cmp(set, rbe, new);
+ if (d == 0) {
+ /* Matching end element: no need to look for an
+ * overlapping greater or equal element.
+ */
+ if (nft_rbtree_interval_end(rbe)) {
+ rbe_le = rbe;
+ break;
+ }
+
+ /* first element that is greater or equal to key value. */
+ if (!rbe_ge) {
+ rbe_ge = rbe;
+ continue;
+ }
+
+ /* this is a closer more or equal element, update it. */
+ if (nft_rbtree_cmp(set, rbe_ge, new) != 0) {
+ rbe_ge = rbe;
+ continue;
+ }
+
+ /* element is equal to key value, make sure flags are
+ * the same, an existing more or equal start element
+ * must not be replaced by more or equal end element.
+ */
+ if ((nft_rbtree_interval_start(new) &&
+ nft_rbtree_interval_start(rbe_ge)) ||
+ (nft_rbtree_interval_end(new) &&
+ nft_rbtree_interval_end(rbe_ge))) {
+ rbe_ge = rbe;
+ continue;
}
+ } else if (d > 0) {
+ /* annotate element greater than the new element. */
+ rbe_ge = rbe;
+ continue;
+ } else if (d < 0) {
+ /* annotate element less than the new element. */
+ rbe_le = rbe;
+ break;
}
+ }
- dup_end_left = dup_end_right = false;
+ /* - new start element matching existing start element: full overlap
+ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
+ */
+ if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
+ nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
+ *ext = &rbe_ge->ext;
+ return -EEXIST;
}
- if (overlap)
+ /* - new end element matching existing end element: full overlap
+ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
+ */
+ if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) &&
+ nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) {
+ *ext = &rbe_le->ext;
+ return -EEXIST;
+ }
+
+ /* - new start element with existing closest, less or equal key value
+ * being a start element: partial overlap, reported as -ENOTEMPTY.
+ * Anonymous sets allow for two consecutive start element since they
+ * are constant, skip them to avoid bogus overlap reports.
+ */
+ if (!nft_set_is_anonymous(set) && rbe_le &&
+ nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new))
+ return -ENOTEMPTY;
+
+ /* - new end element with existing closest, less or equal key value
+ * being a end element: partial overlap, reported as -ENOTEMPTY.
+ */
+ if (rbe_le &&
+ nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new))
return -ENOTEMPTY;
+ /* - new end element with existing closest, greater or equal key value
+ * being an end element: partial overlap, reported as -ENOTEMPTY
+ */
+ if (rbe_ge &&
+ nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new))
+ return -ENOTEMPTY;
+
+ /* Accepted element: pick insertion point depending on key value */
+ parent = NULL;
+ p = &priv->root.rb_node;
+ while (*p != NULL) {
+ parent = *p;
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+ d = nft_rbtree_cmp(set, rbe, new);
+
+ if (d < 0)
+ p = &parent->rb_left;
+ else if (d > 0)
+ p = &parent->rb_right;
+ else if (nft_rbtree_interval_end(rbe))
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+
rb_link_node_rcu(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
return 0;
@@ -501,23 +563,37 @@ static void nft_rbtree_gc(struct work_struct *work)
struct nft_rbtree *priv;
struct rb_node *node;
struct nft_set *set;
+ struct net *net;
+ u8 genmask;
priv = container_of(work, struct nft_rbtree, gc_work.work);
set = nft_set_container_of(priv);
+ net = read_pnet(&set->net);
+ genmask = nft_genmask_cur(net);
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ continue;
+
+ /* elements are reversed in the rbtree for historical reasons,
+ * from highest to lowest value, that is why end element is
+ * always visited before the start element.
+ */
if (nft_rbtree_interval_end(rbe)) {
rbe_end = rbe;
continue;
}
if (!nft_set_elem_expired(&rbe->ext))
continue;
- if (nft_set_elem_mark_busy(&rbe->ext))
+
+ if (nft_set_elem_mark_busy(&rbe->ext)) {
+ rbe_end = NULL;
continue;
+ }
if (rbe_prev) {
rb_erase(&rbe_prev->node, &priv->root);