Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next

Pull networking updates from David Miller: "Highlights: 1) Support TX_RING in AF_PACKET TPACKET_V3 mode, from Sowmini Varadhan. 2) Simplify classifier state on sk_buff in order to shrink it a bit. From Willem de Bruijn. 3) Introduce SIPHASH and it's usage for secure sequence numbers and syncookies. From Jason A. Donenfeld. 4) Reduce CPU usage for ICMP replies we are going to limit or suppress, from Jesper Dangaard Brouer. 5) Introduce Shared Memory Communications socket layer, from Ursula Braun. 6) Add RACK loss detection and allow it to actually trigger fast recovery instead of just assisting after other algorithms have triggered it. From Yuchung Cheng. 7) Add xmit_more and BQL support to mvneta driver, from Simon Guinot. 8) skb_cow_data avoidance in esp4 and esp6, from Steffen Klassert. 9) Export MPLS packet stats via netlink, from Robert Shearman. 10) Significantly improve inet port bind conflict handling, especially when an application is restarted and changes it's setting of reuseport. From Josef Bacik. 11) Implement TX batching in vhost_net, from Jason Wang. 12) Extend the dummy device so that VF (virtual function) features, such as configuration, can be more easily tested. From Phil Sutter. 13) Avoid two atomic ops per page on x86 in bnx2x driver, from Eric Dumazet. 14) Add new bpf MAP, implementing a longest prefix match trie. From Daniel Mack. 15) Packet sample offloading support in mlxsw driver, from Yotam Gigi. 16) Add new aquantia driver, from David VomLehn. 17) Add bpf tracepoints, from Daniel Borkmann. 18) Add support for port mirroring to b53 and bcm_sf2 drivers, from Florian Fainelli. 19) Remove custom busy polling in many drivers, it is done in the core networking since 4.5 times. From Eric Dumazet. 20) Support XDP adjust_head in virtio_net, from John Fastabend. 21) Fix several major holes in neighbour entry confirmation, from Julian Anastasov. 22) Add XDP support to bnxt_en driver, from Michael Chan. 23) VXLAN offloads for enic driver, from Govindarajulu Varadarajan. 24) Add IPVTAP driver (IP-VLAN based tap driver) from Sainath Grandhi. 25) Support GRO in IPSEC protocols, from Steffen Klassert" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1764 commits) Revert "ath10k: Search SMBIOS for OEM board file extension" net: socket: fix recvmmsg not returning error from sock_error bnxt_en: use eth_hw_addr_random() bpf: fix unlocking of jited image when module ronx not set arch: add ARCH_HAS_SET_MEMORY config net: napi_watchdog() can use napi_schedule_irqoff() tcp: Revert "tcp: tcp_probe: use spin_lock_bh()" net/hsr: use eth_hw_addr_random() net: mvpp2: enable building on 64-bit platforms net: mvpp2: switch to build_skb() in the RX path net: mvpp2: simplify MVPP2_PRS_RI_* definitions net: mvpp2: fix indentation of MVPP2_EXT_GLOBAL_CTRL_DEFAULT net: mvpp2: remove unused register definitions net: mvpp2: simplify mvpp2_bm_bufs_add() net: mvpp2: drop useless fields in mvpp2_bm_pool and related code net: mvpp2: remove unused 'tx_skb' field of 'struct mvpp2_tx_queue' net: mvpp2: release reference to txq_cpu[] entry after unmapping net: mvpp2: handle too large value in mvpp2_rx_time_coal_set() net: mvpp2: handle too large value handling in mvpp2_rx_pkts_coal_set() net: mvpp2: remove useless arguments in mvpp2_rx_{pkts, time}_coal_set ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2017-02-22 10:15:09 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-02-22 10:15:09 -0800
commit: 3051bf36c25d5153051704291782f8d44e744d36 (patch)
tree: 72dfc8a1d12675c6f2981d13102df954b678f11b /lib/rhashtable.c
parent: 1e74a2eb1f5cc7f2f2b5aa9c9eeecbcf352220a3 (diff)
parent: 005c3490e9db23738d91e02788606c0fe4734723 (diff)
download: linux-3051bf36c25d5153051704291782f8d44e744d36.tar.bz2
1 files changed, 220 insertions, 50 deletions
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 32d0ad058380..172454e6b979 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -32,6 +32,11 @@
 #define HASH_MIN_SIZE		4U
 #define BUCKET_LOCKS_PER_CPU	32UL
 
+union nested_table {
+	union nested_table __rcu *table;
+	struct rhash_head __rcu *bucket;
+};
+
 static u32 head_hashfn(struct rhashtable *ht,
 		       const struct bucket_table *tbl,
 		       const struct rhash_head *he)
@@ -76,6 +81,9 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
 	/* Never allocate more than 0.5 locks per bucket */
 	size = min_t(unsigned int, size, tbl->size >> 1);
 
+	if (tbl->nest)
+		size = min(size, 1U << tbl->nest);
+
 	if (sizeof(spinlock_t) != 0) {
 		tbl->locks = NULL;
 #ifdef CONFIG_NUMA
@@ -99,8 +107,45 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
 	return 0;
 }
 
+static void nested_table_free(union nested_table *ntbl, unsigned int size)
+{
+	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+	const unsigned int len = 1 << shift;
+	unsigned int i;
+
+	ntbl = rcu_dereference_raw(ntbl->table);
+	if (!ntbl)
+		return;
+
+	if (size > len) {
+		size >>= shift;
+		for (i = 0; i < len; i++)
+			nested_table_free(ntbl + i, size);
+	}
+
+	kfree(ntbl);
+}
+
+static void nested_bucket_table_free(const struct bucket_table *tbl)
+{
+	unsigned int size = tbl->size >> tbl->nest;
+	unsigned int len = 1 << tbl->nest;
+	union nested_table *ntbl;
+	unsigned int i;
+
+	ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
+
+	for (i = 0; i < len; i++)
+		nested_table_free(ntbl + i, size);
+
+	kfree(ntbl);
+}
+
 static void bucket_table_free(const struct bucket_table *tbl)
 {
+	if (tbl->nest)
+		nested_bucket_table_free(tbl);
+
 	if (tbl)
 		kvfree(tbl->locks);
 
@@ -112,6 +157,59 @@ static void bucket_table_free_rcu(struct rcu_head *head)
 	bucket_table_free(container_of(head, struct bucket_table, rcu));
 }
 
+static union nested_table *nested_table_alloc(struct rhashtable *ht,
+					      union nested_table __rcu **prev,
+					      unsigned int shifted,
+					      unsigned int nhash)
+{
+	union nested_table *ntbl;
+	int i;
+
+	ntbl = rcu_dereference(*prev);
+	if (ntbl)
+		return ntbl;
+
+	ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
+
+	if (ntbl && shifted) {
+		for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++)
+			INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht,
+					    (i << shifted) | nhash);
+	}
+
+	rcu_assign_pointer(*prev, ntbl);
+
+	return ntbl;
+}
+
+static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
+						      size_t nbuckets,
+						      gfp_t gfp)
+{
+	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+	struct bucket_table *tbl;
+	size_t size;
+
+	if (nbuckets < (1 << (shift + 1)))
+		return NULL;
+
+	size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
+
+	tbl = kzalloc(size, gfp);
+	if (!tbl)
+		return NULL;
+
+	if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
+				0, 0)) {
+		kfree(tbl);
+		return NULL;
+	}
+
+	tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
+
+	return tbl;
+}
+
 static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 					       size_t nbuckets,
 					       gfp_t gfp)
@@ -126,10 +224,17 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 		tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
 	if (tbl == NULL && gfp == GFP_KERNEL)
 		tbl = vzalloc(size);
+
+	size = nbuckets;
+
+	if (tbl == NULL && gfp != GFP_KERNEL) {
+		tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
+		nbuckets = 0;
+	}
 	if (tbl == NULL)
 		return NULL;
 
-	tbl->size = nbuckets;
+	tbl->size = size;
 
 	if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
 		bucket_table_free(tbl);
@@ -164,12 +269,17 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	struct bucket_table *new_tbl = rhashtable_last_table(ht,
 		rht_dereference_rcu(old_tbl->future_tbl, ht));
-	struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
-	int err = -ENOENT;
+	struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
+	int err = -EAGAIN;
 	struct rhash_head *head, *next, *entry;
 	spinlock_t *new_bucket_lock;
 	unsigned int new_hash;
 
+	if (new_tbl->nest)
+		goto out;
+
+	err = -ENOENT;
+
 	rht_for_each(entry, old_tbl, old_hash) {
 		err = 0;
 		next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
@@ -202,19 +312,26 @@ out:
 	return err;
 }
 
-static void rhashtable_rehash_chain(struct rhashtable *ht,
+static int rhashtable_rehash_chain(struct rhashtable *ht,
 				    unsigned int old_hash)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	spinlock_t *old_bucket_lock;
+	int err;
 
 	old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
 
 	spin_lock_bh(old_bucket_lock);
-	while (!rhashtable_rehash_one(ht, old_hash))
+	while (!(err = rhashtable_rehash_one(ht, old_hash)))
 		;
-	old_tbl->rehash++;
+
+	if (err == -ENOENT) {
+		old_tbl->rehash++;
+		err = 0;
+	}
 	spin_unlock_bh(old_bucket_lock);
+
+	return err;
 }
 
 static int rhashtable_rehash_attach(struct rhashtable *ht,
@@ -246,13 +363,17 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
 	struct bucket_table *new_tbl;
 	struct rhashtable_walker *walker;
 	unsigned int old_hash;
+	int err;
 
 	new_tbl = rht_dereference(old_tbl->future_tbl, ht);
 	if (!new_tbl)
 		return 0;
 
-	for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
-		rhashtable_rehash_chain(ht, old_hash);
+	for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
+		err = rhashtable_rehash_chain(ht, old_hash);
+		if (err)
+			return err;
+	}
 
 	/* Publish the new table pointer. */
 	rcu_assign_pointer(ht->tbl, new_tbl);
@@ -271,31 +392,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
 	return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
 }
 
-/**
- * rhashtable_expand - Expand hash table while allowing concurrent lookups
- * @ht:		the hash table to expand
- *
- * A secondary bucket array is allocated and the hash entries are migrated.
- *
- * This function may only be called in a context where it is safe to call
- * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
- *
- * The caller must ensure that no concurrent resizing occurs by holding
- * ht->mutex.
- *
- * It is valid to have concurrent insertions and deletions protected by per
- * bucket locks or concurrent RCU protected lookups and traversals.
- */
-static int rhashtable_expand(struct rhashtable *ht)
+static int rhashtable_rehash_alloc(struct rhashtable *ht,
+				   struct bucket_table *old_tbl,
+				   unsigned int size)
 {
-	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
+	struct bucket_table *new_tbl;
 	int err;
 
 	ASSERT_RHT_MUTEX(ht);
 
-	old_tbl = rhashtable_last_table(ht, old_tbl);
-
-	new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
+	new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
@@ -324,12 +430,9 @@ static int rhashtable_expand(struct rhashtable *ht)
  */
 static int rhashtable_shrink(struct rhashtable *ht)
 {
-	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
+	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	unsigned int nelems = atomic_read(&ht->nelems);
 	unsigned int size = 0;
-	int err;
-
-	ASSERT_RHT_MUTEX(ht);
 
 	if (nelems)
 		size = roundup_pow_of_two(nelems * 3 / 2);
@@ -342,15 +445,7 @@ static int rhashtable_shrink(struct rhashtable *ht)
 	if (rht_dereference(old_tbl->future_tbl, ht))
 		return -EEXIST;
 
-	new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
-	if (new_tbl == NULL)
-		return -ENOMEM;
-
-	err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
-	if (err)
-		bucket_table_free(new_tbl);
-
-	return err;
+	return rhashtable_rehash_alloc(ht, old_tbl, size);
 }
 
 static void rht_deferred_worker(struct work_struct *work)
@@ -366,11 +461,14 @@ static void rht_deferred_worker(struct work_struct *work)
 	tbl = rhashtable_last_table(ht, tbl);
 
 	if (rht_grow_above_75(ht, tbl))
-		rhashtable_expand(ht);
+		err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
 	else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
-		rhashtable_shrink(ht);
+		err = rhashtable_shrink(ht);
+	else if (tbl->nest)
+		err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
 
-	err = rhashtable_rehash_table(ht);
+	if (!err)
+		err = rhashtable_rehash_table(ht);
 
 	mutex_unlock(&ht->mutex);
 
@@ -439,8 +537,8 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
 	int elasticity;
 
 	elasticity = ht->elasticity;
-	pprev = &tbl->buckets[hash];
-	rht_for_each(head, tbl, hash) {
+	pprev = rht_bucket_var(tbl, hash);
+	rht_for_each_continue(head, *pprev, tbl, hash) {
 		struct rhlist_head *list;
 		struct rhlist_head *plist;
 
@@ -477,6 +575,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 						  struct rhash_head *obj,
 						  void *data)
 {
+	struct rhash_head __rcu **pprev;
 	struct bucket_table *new_tbl;
 	struct rhash_head *head;
 
@@ -499,7 +598,11 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 	if (unlikely(rht_grow_above_100(ht, tbl)))
 		return ERR_PTR(-EAGAIN);
 
-	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+	pprev = rht_bucket_insert(ht, tbl, hash);
+	if (!pprev)
+		return ERR_PTR(-ENOMEM);
+
+	head = rht_dereference_bucket(*pprev, tbl, hash);
 
 	RCU_INIT_POINTER(obj->next, head);
 	if (ht->rhlist) {
@@ -509,7 +612,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 		RCU_INIT_POINTER(list->next, NULL);
 	}
 
-	rcu_assign_pointer(tbl->buckets[hash], obj);
+	rcu_assign_pointer(*pprev, obj);
 
 	atomic_inc(&ht->nelems);
 	if (rht_grow_above_75(ht, tbl))
@@ -975,7 +1078,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 				 void (*free_fn)(void *ptr, void *arg),
 				 void *arg)
 {
-	const struct bucket_table *tbl;
+	struct bucket_table *tbl;
 	unsigned int i;
 
 	cancel_work_sync(&ht->run_work);
@@ -986,7 +1089,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 		for (i = 0; i < tbl->size; i++) {
 			struct rhash_head *pos, *next;
 
-			for (pos = rht_dereference(tbl->buckets[i], ht),
+			for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
 			     next = !rht_is_a_nulls(pos) ?
 					rht_dereference(pos->next, ht) : NULL;
 			     !rht_is_a_nulls(pos);
@@ -1007,3 +1110,70 @@ void rhashtable_destroy(struct rhashtable *ht)
 	return rhashtable_free_and_destroy(ht, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(rhashtable_destroy);
+
+struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
+					    unsigned int hash)
+{
+	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+	static struct rhash_head __rcu *rhnull =
+		(struct rhash_head __rcu *)NULLS_MARKER(0);
+	unsigned int index = hash & ((1 << tbl->nest) - 1);
+	unsigned int size = tbl->size >> tbl->nest;
+	unsigned int subhash = hash;
+	union nested_table *ntbl;
+
+	ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
+	ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash);
+	subhash >>= tbl->nest;
+
+	while (ntbl && size > (1 << shift)) {
+		index = subhash & ((1 << shift) - 1);
+		ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash);
+		size >>= shift;
+		subhash >>= shift;
+	}
+
+	if (!ntbl)
+		return &rhnull;
+
+	return &ntbl[subhash].bucket;
+
+}
+EXPORT_SYMBOL_GPL(rht_bucket_nested);
+
+struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
+						   struct bucket_table *tbl,
+						   unsigned int hash)
+{
+	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+	unsigned int index = hash & ((1 << tbl->nest) - 1);
+	unsigned int size = tbl->size >> tbl->nest;
+	union nested_table *ntbl;
+	unsigned int shifted;
+	unsigned int nhash;
+
+	ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
+	hash >>= tbl->nest;
+	nhash = index;
+	shifted = tbl->nest;
+	ntbl = nested_table_alloc(ht, &ntbl[index].table,
+				  size <= (1 << shift) ? shifted : 0, nhash);
+
+	while (ntbl && size > (1 << shift)) {
+		index = hash & ((1 << shift) - 1);
+		size >>= shift;
+		hash >>= shift;
+		nhash |= index << shifted;
+		shifted += shift;
+		ntbl = nested_table_alloc(ht, &ntbl[index].table,
+					  size <= (1 << shift) ? shifted : 0,
+					  nhash);
+	}
+
+	if (!ntbl)
+		return NULL;
+
+	return &ntbl[hash].bucket;
+
+}
+EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);
author	Linus Torvalds <torvalds@linux-foundation.org>	2017-02-22 10:15:09 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-02-22 10:15:09 -0800
commit	3051bf36c25d5153051704291782f8d44e744d36 (patch)
tree	72dfc8a1d12675c6f2981d13102df954b678f11b /lib/rhashtable.c
parent	1e74a2eb1f5cc7f2f2b5aa9c9eeecbcf352220a3 (diff)
parent	005c3490e9db23738d91e02788606c0fe4734723 (diff)
download	linux-3051bf36c25d5153051704291782f8d44e744d36.tar.bz2