summaryrefslogtreecommitdiffstats
path: root/net/ipv4/inet_hashtables.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/inet_hashtables.c')
-rw-r--r--net/ipv4/inet_hashtables.c128
1 files changed, 86 insertions, 42 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 9111a4e22155..0cb9165421d4 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -18,15 +18,16 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/wait.h>
+#include <linux/vmalloc.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/secure_seq.h>
#include <net/ip.h>
-static unsigned int inet_ehashfn(struct net *net, const __be32 laddr,
- const __u16 lport, const __be32 faddr,
- const __be16 fport)
+static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
+ const __u16 lport, const __be32 faddr,
+ const __be16 fport)
{
static u32 inet_ehash_secret __read_mostly;
@@ -36,17 +37,21 @@ static unsigned int inet_ehashfn(struct net *net, const __be32 laddr,
inet_ehash_secret + net_hash_mix(net));
}
-
-static unsigned int inet_sk_ehashfn(const struct sock *sk)
+/* This function handles inet_sock, but also timewait and request sockets
+ * for IPv4/IPv6.
+ */
+u32 sk_ehashfn(const struct sock *sk)
{
- const struct inet_sock *inet = inet_sk(sk);
- const __be32 laddr = inet->inet_rcv_saddr;
- const __u16 lport = inet->inet_num;
- const __be32 faddr = inet->inet_daddr;
- const __be16 fport = inet->inet_dport;
- struct net *net = sock_net(sk);
-
- return inet_ehashfn(net, laddr, lport, faddr, fport);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6 &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
+ return inet6_ehashfn(sock_net(sk),
+ &sk->sk_v6_rcv_saddr, sk->sk_num,
+ &sk->sk_v6_daddr, sk->sk_dport);
+#endif
+ return inet_ehashfn(sock_net(sk),
+ sk->sk_rcv_saddr, sk->sk_num,
+ sk->sk_daddr, sk->sk_dport);
}
/*
@@ -60,8 +65,8 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
{
struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
- if (tb != NULL) {
- write_pnet(&tb->ib_net, hold_net(net));
+ if (tb) {
+ write_pnet(&tb->ib_net, net);
tb->port = snum;
tb->fastreuse = 0;
tb->fastreuseport = 0;
@@ -79,7 +84,6 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
{
if (hlist_empty(&tb->owners)) {
__hlist_del(&tb->node);
- release_net(ib_net(tb));
kmem_cache_free(cachep, tb);
}
}
@@ -87,10 +91,6 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
const unsigned short snum)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-
- atomic_inc(&hashinfo->bsockets);
-
inet_sk(sk)->inet_num = snum;
sk_add_bind_node(sk, &tb->owners);
tb->num_owners++;
@@ -108,8 +108,6 @@ static void __inet_put_port(struct sock *sk)
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
struct inet_bind_bucket *tb;
- atomic_dec(&hashinfo->bsockets);
-
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
__sk_del_bind_node(sk);
@@ -263,11 +261,19 @@ void sock_gen_put(struct sock *sk)
if (sk->sk_state == TCP_TIME_WAIT)
inet_twsk_free(inet_twsk(sk));
+ else if (sk->sk_state == TCP_NEW_SYN_RECV)
+ reqsk_free(inet_reqsk(sk));
else
sk_free(sk);
}
EXPORT_SYMBOL_GPL(sock_gen_put);
+void sock_edemux(struct sk_buff *skb)
+{
+ sock_gen_put(skb->sk);
+}
+EXPORT_SYMBOL(sock_edemux);
+
struct sock *__inet_lookup_established(struct net *net,
struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
@@ -377,7 +383,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
*twp = tw;
} else if (tw) {
/* Silly. Should hash-dance instead... */
- inet_twsk_deschedule(tw, death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
}
@@ -388,9 +394,10 @@ not_unique:
return -EADDRNOTAVAIL;
}
-static inline u32 inet_sk_port_offset(const struct sock *sk)
+static u32 inet_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
+
return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr,
inet->inet_daddr,
inet->inet_dport);
@@ -400,13 +407,13 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_nulls_head *list;
- spinlock_t *lock;
struct inet_ehash_bucket *head;
+ spinlock_t *lock;
int twrefcnt = 0;
WARN_ON(!sk_unhashed(sk));
- sk->sk_hash = inet_sk_ehashfn(sk);
+ sk->sk_hash = sk_ehashfn(sk);
head = inet_ehash_bucket(hashinfo, sk->sk_hash);
list = &head->chain;
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
@@ -423,15 +430,13 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
}
EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
-static void __inet_hash(struct sock *sk)
+int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
- if (sk->sk_state != TCP_LISTEN) {
- __inet_hash_nolisten(sk, NULL);
- return;
- }
+ if (sk->sk_state != TCP_LISTEN)
+ return __inet_hash_nolisten(sk, tw);
WARN_ON(!sk_unhashed(sk));
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
@@ -440,13 +445,15 @@ static void __inet_hash(struct sock *sk)
__sk_nulls_add_node_rcu(sk, &ilb->head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
spin_unlock(&ilb->lock);
+ return 0;
}
+EXPORT_SYMBOL(__inet_hash);
void inet_hash(struct sock *sk)
{
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
- __inet_hash(sk);
+ __inet_hash(sk, NULL);
local_bh_enable();
}
}
@@ -477,8 +484,7 @@ EXPORT_SYMBOL_GPL(inet_unhash);
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk, u32 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
- struct sock *, __u16, struct inet_timewait_sock **),
- int (*hash)(struct sock *sk, struct inet_timewait_sock *twp))
+ struct sock *, __u16, struct inet_timewait_sock **))
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
const unsigned short snum = inet_sk(sk)->inet_num;
@@ -497,8 +503,14 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
+ /* By starting with offset being an even number,
+ * we tend to leave about 50% of ports for other uses,
+ * like bind(0).
+ */
+ offset &= ~1;
+
local_bh_disable();
- for (i = 1; i <= remaining; i++) {
+ for (i = 0; i < remaining; i++) {
port = low + (i + offset) % remaining;
if (inet_is_local_reserved_port(net, port))
continue;
@@ -542,20 +554,20 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
return -EADDRNOTAVAIL;
ok:
- hint += i;
+ hint += (i + 2) & ~1;
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
- twrefcnt += hash(sk, tw);
+ twrefcnt += __inet_hash_nolisten(sk, tw);
}
if (tw)
twrefcnt += inet_twsk_bind_unhash(tw, hinfo);
spin_unlock(&head->lock);
if (tw) {
- inet_twsk_deschedule(tw, death_row);
+ inet_twsk_deschedule(tw);
while (twrefcnt) {
twrefcnt--;
inet_twsk_put(tw);
@@ -570,7 +582,7 @@ ok:
tb = inet_csk(sk)->icsk_bind_hash;
spin_lock_bh(&head->lock);
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
- hash(sk, NULL);
+ __inet_hash_nolisten(sk, NULL);
spin_unlock_bh(&head->lock);
return 0;
} else {
@@ -589,8 +601,12 @@ out:
int inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
- __inet_check_established, __inet_hash_nolisten);
+ u32 port_offset = 0;
+
+ if (!inet_sk(sk)->inet_num)
+ port_offset = inet_sk_port_offset(sk);
+ return __inet_hash_connect(death_row, sk, port_offset,
+ __inet_check_established);
}
EXPORT_SYMBOL_GPL(inet_hash_connect);
@@ -598,7 +614,6 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
{
int i;
- atomic_set(&h->bsockets, 0);
for (i = 0; i < INET_LHTABLE_SIZE; i++) {
spin_lock_init(&h->listening_hash[i].lock);
INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
@@ -606,3 +621,32 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
}
}
EXPORT_SYMBOL_GPL(inet_hashinfo_init);
+
+int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
+{
+ unsigned int locksz = sizeof(spinlock_t);
+ unsigned int i, nblocks = 1;
+
+ if (locksz != 0) {
+ /* allocate 2 cache lines or at least one spinlock per cpu */
+ nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U);
+ nblocks = roundup_pow_of_two(nblocks * num_possible_cpus());
+
+ /* no more locks than number of hash buckets */
+ nblocks = min(nblocks, hashinfo->ehash_mask + 1);
+
+ hashinfo->ehash_locks = kmalloc_array(nblocks, locksz,
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!hashinfo->ehash_locks)
+ hashinfo->ehash_locks = vmalloc(nblocks * locksz);
+
+ if (!hashinfo->ehash_locks)
+ return -ENOMEM;
+
+ for (i = 0; i < nblocks; i++)
+ spin_lock_init(&hashinfo->ehash_locks[i]);
+ }
+ hashinfo->ehash_locks_mask = nblocks - 1;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc);