summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-11-29 13:04:52 -0800
committerJakub Kicinski <kuba@kernel.org>2022-11-29 13:04:52 -0800
commitf2bb566f5c977ff010baaa9e5e14d9a75b06e5f2 (patch)
tree6359cc9169bd06bfb8b757a534c82886df605b71 /net/ipv4
parent7a945ce0c19bbdf821d5f7ce1515e7fb8e444465 (diff)
parent01f856ae6d0ca5ad0505b79bf2d22d7ca439b2a1 (diff)
downloadlinux-f2bb566f5c977ff010baaa9e5e14d9a75b06e5f2.tar.bz2
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
tools/lib/bpf/ringbuf.c 927cbb478adf ("libbpf: Handle size overflow for ringbuf mmap") b486d19a0ab0 ("libbpf: checkpatch: Fixed code alignments in ringbuf.c") https://lore.kernel.org/all/20221121122707.44d1446a@canb.auug.org.au/ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/esp4_offload.c3
-rw-r--r--net/ipv4/fib_semantics.c8
-rw-r--r--net/ipv4/fib_trie.c6
-rw-r--r--net/ipv4/inet_hashtables.c84
-rw-r--r--net/ipv4/ip_input.c5
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c4
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_ipv4.c21
9 files changed, 98 insertions, 47 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5b4d86701822..ab4a06be489b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1230,7 +1230,6 @@ EXPORT_SYMBOL(inet_unregister_protosw);
static int inet_sk_reselect_saddr(struct sock *sk)
{
- struct inet_bind_hashbucket *prev_addr_hashbucket;
struct inet_sock *inet = inet_sk(sk);
__be32 old_saddr = inet->inet_saddr;
__be32 daddr = inet->inet_daddr;
@@ -1260,16 +1259,8 @@ static int inet_sk_reselect_saddr(struct sock *sk)
return 0;
}
- prev_addr_hashbucket =
- inet_bhashfn_portaddr(tcp_or_dccp_get_hashinfo(sk), sk,
- sock_net(sk), inet->inet_num);
-
- inet->inet_saddr = inet->inet_rcv_saddr = new_saddr;
-
- err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ err = inet_bhash2_update_saddr(sk, &new_saddr, AF_INET);
if (err) {
- inet->inet_saddr = old_saddr;
- inet->inet_rcv_saddr = old_saddr;
ip_rt_put(rt);
return err;
}
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 170152772d33..3969fa805679 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -314,6 +314,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
xo->seq.low += skb_shinfo(skb)->gso_segs;
}
+ if (xo->seq.low < seq)
+ xo->seq.hi++;
+
esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32));
ip_hdr(skb)->tot_len = htons(skb->len);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f721c308248b..19a662003eef 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -888,9 +888,11 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
return 1;
}
- /* cannot match on nexthop object attributes */
- if (fi->nh)
- return 1;
+ if (fi->nh) {
+ if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp)
+ return 1;
+ return 0;
+ }
if (cfg->fc_oif || cfg->fc_gw_family) {
struct fib_nh *nh;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 452ff177e4da..74d403dbd2b4 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -126,7 +126,7 @@ struct key_vector {
/* This list pointer if valid if (pos | bits) == 0 (LEAF) */
struct hlist_head leaf;
/* This array is valid if (pos | bits) > 0 (TNODE) */
- struct key_vector __rcu *tnode[0];
+ DECLARE_FLEX_ARRAY(struct key_vector __rcu *, tnode);
};
};
@@ -1381,8 +1381,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
/* The alias was already inserted, so the node must exist. */
l = l ? l : fib_find_node(t, &tp, key);
- if (WARN_ON_ONCE(!l))
+ if (WARN_ON_ONCE(!l)) {
+ err = -ENOENT;
goto out_free_new_fa;
+ }
if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) ==
new_fa) {
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 033bf3c2538f..3cec471a2cd2 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -858,34 +858,80 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in
return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
}
-int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk)
+static void inet_update_saddr(struct sock *sk, void *saddr, int family)
+{
+ if (family == AF_INET) {
+ inet_sk(sk)->inet_saddr = *(__be32 *)saddr;
+ sk_rcv_saddr_set(sk, inet_sk(sk)->inet_saddr);
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ else {
+ sk->sk_v6_rcv_saddr = *(struct in6_addr *)saddr;
+ }
+#endif
+}
+
+static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, bool reset)
{
struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
+ struct inet_bind_hashbucket *head, *head2;
struct inet_bind2_bucket *tb2, *new_tb2;
int l3mdev = inet_sk_bound_l3mdev(sk);
- struct inet_bind_hashbucket *head2;
int port = inet_sk(sk)->inet_num;
struct net *net = sock_net(sk);
+ int bhash;
+
+ if (!inet_csk(sk)->icsk_bind2_hash) {
+ /* Not bind()ed before. */
+ if (reset)
+ inet_reset_saddr(sk);
+ else
+ inet_update_saddr(sk, saddr, family);
+
+ return 0;
+ }
/* Allocate a bind2 bucket ahead of time to avoid permanently putting
* the bhash2 table in an inconsistent state if a new tb2 bucket
* allocation fails.
*/
new_tb2 = kmem_cache_alloc(hinfo->bind2_bucket_cachep, GFP_ATOMIC);
- if (!new_tb2)
+ if (!new_tb2) {
+ if (reset) {
+ /* The (INADDR_ANY, port) bucket might have already
+ * been freed, then we cannot fixup icsk_bind2_hash,
+ * so we give up and unlink sk from bhash/bhash2 not
+ * to leave inconsistency in bhash2.
+ */
+ inet_put_port(sk);
+ inet_reset_saddr(sk);
+ }
+
return -ENOMEM;
+ }
+ bhash = inet_bhashfn(net, port, hinfo->bhash_size);
+ head = &hinfo->bhash[bhash];
head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
- if (prev_saddr) {
- spin_lock_bh(&prev_saddr->lock);
- __sk_del_bind2_node(sk);
- inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
- inet_csk(sk)->icsk_bind2_hash);
- spin_unlock_bh(&prev_saddr->lock);
- }
+ /* If we change saddr locklessly, another thread
+ * iterating over bhash might see corrupted address.
+ */
+ spin_lock_bh(&head->lock);
- spin_lock_bh(&head2->lock);
+ spin_lock(&head2->lock);
+ __sk_del_bind2_node(sk);
+ inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash);
+ spin_unlock(&head2->lock);
+
+ if (reset)
+ inet_reset_saddr(sk);
+ else
+ inet_update_saddr(sk, saddr, family);
+
+ head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
+
+ spin_lock(&head2->lock);
tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
if (!tb2) {
tb2 = new_tb2;
@@ -893,15 +939,29 @@ int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct soc
}
sk_add_bind2_node(sk, &tb2->owners);
inet_csk(sk)->icsk_bind2_hash = tb2;
- spin_unlock_bh(&head2->lock);
+ spin_unlock(&head2->lock);
+
+ spin_unlock_bh(&head->lock);
if (tb2 != new_tb2)
kmem_cache_free(hinfo->bind2_bucket_cachep, new_tb2);
return 0;
}
+
+int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family)
+{
+ return __inet_bhash2_update_saddr(sk, saddr, family, false);
+}
EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr);
+void inet_bhash2_reset_saddr(struct sock *sk)
+{
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ __inet_bhash2_update_saddr(sk, NULL, 0, true);
+}
+EXPORT_SYMBOL_GPL(inet_bhash2_reset_saddr);
+
/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
* Note that we use 32bit integers (vs RFC 'short integers')
* because 2^16 is not a multiple of num_ephemeral and this
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 1b512390b3cf..e880ce77322a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -366,6 +366,11 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
iph->tos, dev);
if (unlikely(err))
goto drop_error;
+ } else {
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+ if (in_dev && IN_DEV_ORCONF(in_dev, NOPOLICY))
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
}
#ifdef CONFIG_IP_ROUTE_CLASSID
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index f8e176c77d1c..b3cc416ed292 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -435,7 +435,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
switch (ctinfo) {
case IP_CT_NEW:
- ct->mark = hash;
+ WRITE_ONCE(ct->mark, hash);
break;
case IP_CT_RELATED:
case IP_CT_RELATED_REPLY:
@@ -452,7 +452,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
#ifdef DEBUG
nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
#endif
- pr_debug("hash=%u ct_hash=%u ", hash, ct->mark);
+ pr_debug("hash=%u ct_hash=%u ", hash, READ_ONCE(ct->mark));
if (!clusterip_responsible(cipinfo->config, hash)) {
pr_debug("not responsible\n");
return NF_DROP;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4a69c5fcfedc..24602a5184b0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3114,8 +3114,7 @@ int tcp_disconnect(struct sock *sk, int flags)
inet->inet_dport = 0;
- if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
- inet_reset_saddr(sk);
+ inet_bhash2_reset_saddr(sk);
sk->sk_shutdown = 0;
sock_reset_flag(sk, SOCK_DONE);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f0343538d1f8..1215fa4c1b9f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -199,15 +199,14 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
- struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct inet_timewait_death_row *tcp_death_row;
- __be32 daddr, nexthop, prev_sk_rcv_saddr;
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct ip_options_rcu *inet_opt;
struct net *net = sock_net(sk);
__be16 orig_sport, orig_dport;
+ __be32 daddr, nexthop;
struct flowi4 *fl4;
struct rtable *rt;
int err;
@@ -251,24 +250,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
if (!inet->inet_saddr) {
- if (inet_csk(sk)->icsk_bind2_hash) {
- prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo,
- sk, net, inet->inet_num);
- prev_sk_rcv_saddr = sk->sk_rcv_saddr;
- }
- inet->inet_saddr = fl4->saddr;
- }
-
- sk_rcv_saddr_set(sk, inet->inet_saddr);
-
- if (prev_addr_hashbucket) {
- err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET);
if (err) {
- inet->inet_saddr = 0;
- sk_rcv_saddr_set(sk, prev_sk_rcv_saddr);
ip_rt_put(rt);
return err;
}
+ } else {
+ sk_rcv_saddr_set(sk, inet->inet_saddr);
}
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
@@ -343,6 +331,7 @@ failure:
* if necessary.
*/
tcp_set_state(sk, TCP_CLOSE);
+ inet_bhash2_reset_saddr(sk);
ip_rt_put(rt);
sk->sk_route_caps = 0;
inet->inet_dport = 0;