diff options
author | David S. Miller <davem@davemloft.net> | 2015-01-13 14:01:06 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-01-13 14:01:06 -0500 |
commit | 52e3ad9f011fe72620b2f7050227cd48fd295ad5 (patch) | |
tree | 97def40b133732cc55d2f15bfa8e6f652528dc4e | |
parent | d2c60b1350c9a3eb7ed407c18f50306762365646 (diff) | |
parent | 6f73d3b13dc5e16ae06025cd1b12a36b2857caa2 (diff) | |
download | linux-52e3ad9f011fe72620b2f7050227cd48fd295ad5.tar.bz2 |
Merge branch 'rhashtable-next'
Ying Xue says:
====================
remove nl_sk_hash_lock from netlink socket
After tipc socket successfully avoids the involvement of an extra lock
with rhashtable_lookup_insert(), it's possible for netlink socket to
remove its hash socket lock now. But as netlink socket needs a compare
function to look for an object, we first introduce a new function
called rhashtable_lookup_compare_insert() in commit #1 which is
implemented based on original rhashtable_lookup_insert(). We
subsequently remove nl_sk_hash_lock from netlink socket with the new
introduced function in commit #2. Lastly, as Thomas requested, we add
commit #3 to indicate the implementation of what the grow and shrink
decision function must enforce min/max shift.
v2:
As Thomas pointed out, there was a race between checking portid and
then setting it in commit #2. Now use socket lock to make the process
of both checking and setting portid atomic, and then eliminate the
race.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/rhashtable.h | 9 | ||||
-rw-r--r-- | lib/rhashtable.c | 42 | ||||
-rw-r--r-- | net/netlink/af_netlink.c | 33 | ||||
-rw-r--r-- | net/netlink/af_netlink.h | 1 | ||||
-rw-r--r-- | net/netlink/diag.c | 10 |
5 files changed, 74 insertions, 21 deletions
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 326acd8c2e9f..9570832ab07c 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -79,6 +79,10 @@ struct rhashtable; * @obj_hashfn: Function to hash object * @grow_decision: If defined, may return true if table should expand * @shrink_decision: If defined, may return true if table should shrink + * + * Note: when implementing the grow and shrink decision function, min/max + * shift must be enforced, otherwise, resizing watermarks they set may be + * useless. */ struct rhashtable_params { size_t nelem_hint; @@ -168,7 +172,12 @@ int rhashtable_shrink(struct rhashtable *ht); void *rhashtable_lookup(struct rhashtable *ht, const void *key); void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, bool (*compare)(void *, void *), void *arg); + bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj); +bool rhashtable_lookup_compare_insert(struct rhashtable *ht, + struct rhash_head *obj, + bool (*compare)(void *, void *), + void *arg); void rhashtable_destroy(struct rhashtable *ht); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 8023b554905c..ed6ae1ad304c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -727,6 +727,43 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); */ bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj) { + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = rht_obj(ht, obj) + ht->p.key_offset, + }; + + BUG_ON(!ht->p.key_len); + + return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare, + &arg); +} +EXPORT_SYMBOL_GPL(rhashtable_lookup_insert); + +/** + * rhashtable_lookup_compare_insert - search and insert object to hash table + * with compare function + * @ht: hash table + * @obj: pointer to hash head inside object + * @compare: compare function, must return true on match + * @arg: argument passed on to compare function + * + * Locks down the bucket chain in both the old and new table if a resize + * is in progress to ensure that writers can't remove from the old table + * and can't insert to the new table during the atomic operation of search + * and insertion. Searches for duplicates in both the old and new table if + * a resize is in progress. + * + * Lookups may occur in parallel with hashtable mutations and resizing. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +bool rhashtable_lookup_compare_insert(struct rhashtable *ht, + struct rhash_head *obj, + bool (*compare)(void *, void *), + void *arg) +{ struct bucket_table *new_tbl, *old_tbl; spinlock_t *new_bucket_lock, *old_bucket_lock; u32 new_hash, old_hash; @@ -747,7 +784,8 @@ bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj) if (unlikely(old_tbl != new_tbl)) spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED); - if (rhashtable_lookup(ht, rht_obj(ht, obj) + ht->p.key_offset)) { + if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset, + compare, arg)) { success = false; goto exit; } @@ -763,7 +801,7 @@ exit: return success; } -EXPORT_SYMBOL_GPL(rhashtable_lookup_insert); +EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert); static size_t rounded_hashtable_size(struct rhashtable_params *params) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 298e1df7132a..01b702d63457 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -98,7 +98,7 @@ static void netlink_skb_destructor(struct sk_buff *skb); /* nl_table locking explained: * Lookup and traversal are protected with an RCU read-side lock. Insertion - * and removal are protected with nl_sk_hash_lock while using RCU list + * and removal are protected with per bucket lock while using RCU list * modification primitives and may run in parallel to RCU protected lookups. * Destruction of the Netlink socket may only occur *after* nl_table_lock has * been acquired * either during or after the socket has been removed from @@ -110,10 +110,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); -/* Protects netlink socket hash table mutations */ -DEFINE_MUTEX(nl_sk_hash_lock); -EXPORT_SYMBOL_GPL(nl_sk_hash_lock); - static ATOMIC_NOTIFIER_HEAD(netlink_chain); static DEFINE_SPINLOCK(netlink_tap_lock); @@ -998,6 +994,19 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, &netlink_compare, &arg); } +static bool __netlink_insert(struct netlink_table *table, struct sock *sk, + struct net *net) +{ + struct netlink_compare_arg arg = { + .net = net, + .portid = nlk_sk(sk)->portid, + }; + + return rhashtable_lookup_compare_insert(&table->hash, + &nlk_sk(sk)->node, + &netlink_compare, &arg); +} + static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { struct netlink_table *table = &nl_table[protocol]; @@ -1043,9 +1052,7 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) struct netlink_table *table = &nl_table[sk->sk_protocol]; int err = -EADDRINUSE; - mutex_lock(&nl_sk_hash_lock); - if (__netlink_lookup(table, portid, net)) - goto err; + lock_sock(sk); err = -EBUSY; if (nlk_sk(sk)->portid) @@ -1058,10 +1065,12 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) nlk_sk(sk)->portid = portid; sock_hold(sk); - rhashtable_insert(&table->hash, &nlk_sk(sk)->node); - err = 0; + if (__netlink_insert(table, sk, net)) + err = 0; + else + sock_put(sk); err: - mutex_unlock(&nl_sk_hash_lock); + release_sock(sk); return err; } @@ -1069,13 +1078,11 @@ static void netlink_remove(struct sock *sk) { struct netlink_table *table; - mutex_lock(&nl_sk_hash_lock); table = &nl_table[sk->sk_protocol]; if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) { WARN_ON(atomic_read(&sk->sk_refcnt) == 1); __sock_put(sk); } - mutex_unlock(&nl_sk_hash_lock); netlink_table_grab(); if (nlk_sk(sk)->subscriptions) { diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index fd96fa76202a..7518375782f5 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -74,6 +74,5 @@ struct netlink_table { extern struct netlink_table *nl_table; extern rwlock_t nl_table_lock; -extern struct mutex nl_sk_hash_lock; #endif diff --git a/net/netlink/diag.c b/net/netlink/diag.c index fcca36d81a62..bb59a7ed0859 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -103,7 +103,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, { struct netlink_table *tbl = &nl_table[protocol]; struct rhashtable *ht = &tbl->hash; - const struct bucket_table *htbl = rht_dereference(ht->tbl, ht); + const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht); struct net *net = sock_net(skb->sk); struct netlink_diag_req *req; struct netlink_sock *nlsk; @@ -115,7 +115,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, for (i = 0; i < htbl->size; i++) { struct rhash_head *pos; - rht_for_each_entry(nlsk, pos, htbl, i, node) { + rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) { sk = (struct sock *)nlsk; if (!net_eq(sock_net(sk), net)) @@ -172,7 +172,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) req = nlmsg_data(cb->nlh); - mutex_lock(&nl_sk_hash_lock); + rcu_read_lock(); read_lock(&nl_table_lock); if (req->sdiag_protocol == NDIAG_PROTO_ALL) { @@ -186,7 +186,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } else { if (req->sdiag_protocol >= MAX_LINKS) { read_unlock(&nl_table_lock); - mutex_unlock(&nl_sk_hash_lock); + rcu_read_unlock(); return -ENOENT; } @@ -194,7 +194,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } read_unlock(&nl_table_lock); - mutex_unlock(&nl_sk_hash_lock); + rcu_read_unlock(); return skb->len; } |