From b7f5e5c7f8cedf6b69c9702d448cdf78ffc45c7b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Feb 2015 21:14:21 +0100 Subject: rhashtable: don't allocate ht structure on stack in test_rht_init With object runtime debugging enabled, the rhashtable test suite will rightfully throw a warning "ODEBUG: object is on stack, but not annotated" from rhashtable_init(). This is because run_work is (correctly) being initialized via INIT_WORK(), and not annotated by INIT_WORK_ONSTACK(). Meaning, rhashtable_init() is okay as is, we just need to move ht e.g., into global scope. It never triggered anything, since test_rhashtable is rather a controlled environment and effectively runs to completion, so that stack memory is not vanishing underneath us, we shouldn't confuse any testers with it though. Fixes: 7e1e77636e36 ("lib: Resizable, Scalable, Concurrent Hash Table") Signed-off-by: Daniel Borkmann Acked-by: Thomas Graf Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 1dfeba73fc74..f6ce291b68e7 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -191,9 +191,10 @@ error: return err; } +static struct rhashtable ht; + static int __init test_rht_init(void) { - struct rhashtable ht; struct rhashtable_params params = { .nelem_hint = TEST_HT_SIZE, .head_offset = offsetof(struct test_obj, node), -- cgit v1.2.3 From 342100d937ed6e5faf1e7ee7dcd7b3935fec8877 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Feb 2015 00:53:37 +0100 Subject: rhashtable: don't test for shrink on insert, expansion on delete Restore pre 54c5b7d311c8 behaviour and only probe for expansions on inserts and shrinks on deletes. Currently, it will happen that on initial inserts into a sparse hash table, we may i.e. shrink it first simply because it's not fully populated yet, only to later realize that we need to grow again. This however is counter intuitive, e.g. an initial default size of 64 elements is already small enough, and in case an elements size hint is given to the hash table by a user, we should avoid unnecessary expansion steps, so a shrink is clearly unintended here. Fixes: 54c5b7d311c8 ("rhashtable: introduce rhashtable_wakeup_worker helper function") Signed-off-by: Daniel Borkmann Cc: Ying Xue Acked-by: Thomas Graf Signed-off-by: David S. Miller --- lib/rhashtable.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 9cc4c4a90d00..38f7879df0d8 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -537,16 +537,25 @@ unlock: mutex_unlock(&ht->mutex); } -static void rhashtable_wakeup_worker(struct rhashtable *ht) +static void rhashtable_probe_expand(struct rhashtable *ht) { - struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht); - size_t size = tbl->size; + const struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht); + const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); /* Only adjust the table if no resizing is currently in progress. */ - if (tbl == new_tbl && - ((ht->p.grow_decision && ht->p.grow_decision(ht, size)) || - (ht->p.shrink_decision && ht->p.shrink_decision(ht, size)))) + if (tbl == new_tbl && ht->p.grow_decision && + ht->p.grow_decision(ht, tbl->size)) + schedule_work(&ht->run_work); +} + +static void rhashtable_probe_shrink(struct rhashtable *ht) +{ + const struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht); + const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Only adjust the table if no resizing is currently in progress. */ + if (tbl == new_tbl && ht->p.shrink_decision && + ht->p.shrink_decision(ht, tbl->size)) schedule_work(&ht->run_work); } @@ -569,7 +578,7 @@ static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, atomic_inc(&ht->nelems); - rhashtable_wakeup_worker(ht); + rhashtable_probe_expand(ht); } /** @@ -682,7 +691,7 @@ found: if (ret) { atomic_dec(&ht->nelems); - rhashtable_wakeup_worker(ht); + rhashtable_probe_shrink(ht); } rcu_read_unlock(); -- cgit v1.2.3 From eb6d1abf1bd8bf1beb45b5401c8324bdb8f893c4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Feb 2015 00:53:38 +0100 Subject: rhashtable: better high order allocation attempts When trying to allocate future tables via bucket_table_alloc(), it seems overkill on large table shifts that we probe for kzalloc() unconditionally first, as it's likely to fail. Only probe with kzalloc() for more reasonable table sizes and use vzalloc() either as a fallback on failure or directly in case of large table sizes. Fixes: 7e1e77636e36 ("lib: Resizable, Scalable, Concurrent Hash Table") Signed-off-by: Daniel Borkmann Acked-by: Thomas Graf Signed-off-by: David S. Miller --- lib/rhashtable.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 38f7879df0d8..b41a5c09832a 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -217,15 +217,15 @@ static void bucket_table_free(const struct bucket_table *tbl) static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, size_t nbuckets) { - struct bucket_table *tbl; + struct bucket_table *tbl = NULL; size_t size; int i; size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); - tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); + if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (tbl == NULL) tbl = vzalloc(size); - if (tbl == NULL) return NULL; -- cgit v1.2.3 From 6dd0c1655be26345960a6bae574c7dc4648611d3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Feb 2015 00:53:39 +0100 Subject: rhashtable: allow to unload test module There's no good reason why to disallow unloading of the rhashtable test case module. Commit 9d6dbe1bbaf8 moved the code from a boot test into a stand-alone module, but only converted the subsys_initcall() handler into a module_init() function without a related exit handler, and thus preventing the test module from unloading. Fixes: 9d6dbe1bbaf8 ("rhashtable: Make selftest modular") Signed-off-by: Daniel Borkmann Acked-by: Thomas Graf Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index f6ce291b68e7..58b995323c44 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -223,6 +223,11 @@ static int __init test_rht_init(void) return err; } +static void __exit test_rht_exit(void) +{ +} + module_init(test_rht_init); +module_exit(test_rht_exit); MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 71bb0012c38fbd090a56b3cb96e9f626c415d264 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 23 Feb 2015 04:35:06 -0500 Subject: rhashtable: initialize all rhashtable walker members Commit f2dba9c6ff ("rhashtable: Introduce rhashtable_walk_*") forgot to initialize the members of struct rhashtable_walker after allocating it, which caused an undefined value for 'resize' which is used later on. Fixes: f2dba9c6ff ("rhashtable: Introduce rhashtable_walk_*") Signed-off-by: Sasha Levin Signed-off-by: David S. Miller --- lib/rhashtable.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index b41a5c09832a..e3a04e4b3ec5 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -903,6 +903,9 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter) if (!iter->walker) return -ENOMEM; + INIT_LIST_HEAD(&iter->walker->list); + iter->walker->resize = false; + mutex_lock(&ht->mutex); list_add(&iter->walker->list, &ht->walkers); mutex_unlock(&ht->mutex); -- cgit v1.2.3 From 8331de75cb13fc907ceba78e698c42150e61dda9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 25 Feb 2015 16:31:53 +0100 Subject: rhashtable: unconditionally grow when max_shift is not specified While commit c0c09bfdc415 ("rhashtable: avoid unnecessary wakeup for worker queue") rightfully moved part of the decision making of whether we should expand or shrink from the expand/shrink functions themselves into insert/delete functions in order to avoid unnecessary worker wake-ups, it however introduced a regression by doing so. Before that change, if no max_shift was specified (= 0) on rhashtable initialization, rhashtable_expand() would just grow unconditionally and lets the available memory be the limiting factor. After that change, if no max_shift was specified, there would be _no_ expansion step at all. Given that netlink and tipc have a max_shift specified, it was not visible there, but Josh Hunt reported that if nft that starts out with a default element hint of 3 if not otherwise provided, would slow i.e. inserts down trememdously as it cannot grow larger to relax table occupancy. Given that the test case verifies shrinks/expands manually, we also must remove pointer to the helper functions to explicitly avoid parallel resizing on insertions/deletions. test_bucket_stats() and test_rht_lookup() could also be wrapped around rhashtable mutex to explicitly synchronize a walk from resizing, but I think that defeats the actual test case which intended to have explicit test steps, i.e. 1) inserts, 2) expands, 3) shrinks, 4) deletions, with object verification after each stage. Reported-by: Josh Hunt Fixes: c0c09bfdc415 ("rhashtable: avoid unnecessary wakeup for worker queue") Signed-off-by: Daniel Borkmann Cc: Ying Xue Cc: Josh Hunt Acked-by: Thomas Graf Signed-off-by: David S. Miller --- lib/rhashtable.c | 2 +- lib/test_rhashtable.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index e3a04e4b3ec5..bcf119bfdef4 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -251,7 +251,7 @@ bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) { /* Expand table when exceeding 75% load */ return atomic_read(&ht->nelems) > (new_size / 4 * 3) && - (ht->p.max_shift && atomic_read(&ht->shift) < ht->p.max_shift); + (!ht->p.max_shift || atomic_read(&ht->shift) < ht->p.max_shift); } EXPORT_SYMBOL_GPL(rht_grow_above_75); diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 58b995323c44..f9e9d734446a 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -202,8 +202,6 @@ static int __init test_rht_init(void) .key_len = sizeof(int), .hashfn = jhash, .nulls_base = (3U << RHT_BASE_SHIFT), - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, }; int err; -- cgit v1.2.3 From 4c4b52d9b2df45e8216d3e30b5452e4a364d2cac Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 25 Feb 2015 16:31:54 +0100 Subject: rhashtable: remove indirection for grow/shrink decision functions Currently, all real users of rhashtable default their grow and shrink decision functions to rht_grow_above_75() and rht_shrink_below_30(), so that there's currently no need to have this explicitly selectable. It can/should be generic and private inside rhashtable until a real use case pops up. Since we can make this private, we'll save us this additional indirection layer and can improve insertion/deletion time as well. Reference: http://patchwork.ozlabs.org/patch/443040/ Suggested-by: David S. Miller Signed-off-by: Daniel Borkmann Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 13 ----------- lib/rhashtable.c | 56 ++++++++++++++-------------------------------- lib/test_rhashtable.c | 1 + net/netfilter/nft_hash.c | 2 -- net/netlink/af_netlink.c | 2 -- net/tipc/socket.c | 2 -- 6 files changed, 18 insertions(+), 58 deletions(-) (limited to 'lib') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index cb2104be2135..d438eeb08bff 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -79,12 +79,6 @@ struct rhashtable; * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Function to hash key * @obj_hashfn: Function to hash object - * @grow_decision: If defined, may return true if table should expand - * @shrink_decision: If defined, may return true if table should shrink - * - * Note: when implementing the grow and shrink decision function, min/max - * shift must be enforced, otherwise, resizing watermarks they set may be - * useless. */ struct rhashtable_params { size_t nelem_hint; @@ -98,10 +92,6 @@ struct rhashtable_params { size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; - bool (*grow_decision)(const struct rhashtable *ht, - size_t new_size); - bool (*shrink_decision)(const struct rhashtable *ht, - size_t new_size); }; /** @@ -193,9 +183,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params); void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); -bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size); -bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size); - int rhashtable_expand(struct rhashtable *ht); int rhashtable_shrink(struct rhashtable *ht); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index bcf119bfdef4..090641db4c0d 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -247,26 +247,24 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, * @ht: hash table * @new_size: new table size */ -bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) +static bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) { /* Expand table when exceeding 75% load */ return atomic_read(&ht->nelems) > (new_size / 4 * 3) && (!ht->p.max_shift || atomic_read(&ht->shift) < ht->p.max_shift); } -EXPORT_SYMBOL_GPL(rht_grow_above_75); /** * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size * @ht: hash table * @new_size: new table size */ -bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) +static bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) { /* Shrink table beneath 30% load */ return atomic_read(&ht->nelems) < (new_size * 3 / 10) && (atomic_read(&ht->shift) > ht->p.min_shift); } -EXPORT_SYMBOL_GPL(rht_shrink_below_30); static void lock_buckets(struct bucket_table *new_tbl, struct bucket_table *old_tbl, unsigned int hash) @@ -528,40 +526,19 @@ static void rht_deferred_worker(struct work_struct *work) list_for_each_entry(walker, &ht->walkers, list) walker->resize = true; - if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size)) + if (rht_grow_above_75(ht, tbl->size)) rhashtable_expand(ht); - else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size)) + else if (rht_shrink_below_30(ht, tbl->size)) rhashtable_shrink(ht); - unlock: mutex_unlock(&ht->mutex); } -static void rhashtable_probe_expand(struct rhashtable *ht) -{ - const struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht); - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - - /* Only adjust the table if no resizing is currently in progress. */ - if (tbl == new_tbl && ht->p.grow_decision && - ht->p.grow_decision(ht, tbl->size)) - schedule_work(&ht->run_work); -} - -static void rhashtable_probe_shrink(struct rhashtable *ht) -{ - const struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht); - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - - /* Only adjust the table if no resizing is currently in progress. */ - if (tbl == new_tbl && ht->p.shrink_decision && - ht->p.shrink_decision(ht, tbl->size)) - schedule_work(&ht->run_work); -} - static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, - struct bucket_table *tbl, u32 hash) + struct bucket_table *tbl, + const struct bucket_table *old_tbl, u32 hash) { + bool no_resize_running = tbl == old_tbl; struct rhash_head *head; hash = rht_bucket_index(tbl, hash); @@ -577,8 +554,8 @@ static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, rcu_assign_pointer(tbl->buckets[hash], obj); atomic_inc(&ht->nelems); - - rhashtable_probe_expand(ht); + if (no_resize_running && rht_grow_above_75(ht, tbl->size)) + schedule_work(&ht->run_work); } /** @@ -608,7 +585,7 @@ void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj) hash = obj_raw_hashfn(ht, rht_obj(ht, obj)); lock_buckets(tbl, old_tbl, hash); - __rhashtable_insert(ht, obj, tbl, hash); + __rhashtable_insert(ht, obj, tbl, old_tbl, hash); unlock_buckets(tbl, old_tbl, hash); rcu_read_unlock(); @@ -690,8 +667,11 @@ found: unlock_buckets(new_tbl, old_tbl, new_hash); if (ret) { + bool no_resize_running = new_tbl == old_tbl; + atomic_dec(&ht->nelems); - rhashtable_probe_shrink(ht); + if (no_resize_running && rht_shrink_below_30(ht, new_tbl->size)) + schedule_work(&ht->run_work); } rcu_read_unlock(); @@ -861,7 +841,7 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht, goto exit; } - __rhashtable_insert(ht, obj, new_tbl, new_hash); + __rhashtable_insert(ht, obj, new_tbl, old_tbl, new_hash); exit: unlock_buckets(new_tbl, old_tbl, new_hash); @@ -1123,8 +1103,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) if (!ht->p.hash_rnd) get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd)); - if (ht->p.grow_decision || ht->p.shrink_decision) - INIT_WORK(&ht->run_work, rht_deferred_worker); + INIT_WORK(&ht->run_work, rht_deferred_worker); return 0; } @@ -1142,8 +1121,7 @@ void rhashtable_destroy(struct rhashtable *ht) { ht->being_destroyed = true; - if (ht->p.grow_decision || ht->p.shrink_decision) - cancel_work_sync(&ht->run_work); + cancel_work_sync(&ht->run_work); mutex_lock(&ht->mutex); bucket_table_free(rht_dereference(ht->tbl, ht)); diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index f9e9d734446a..67c7593d1dd6 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -201,6 +201,7 @@ static int __init test_rht_init(void) .key_offset = offsetof(struct test_obj, value), .key_len = sizeof(int), .hashfn = jhash, + .max_shift = 1, /* we expand/shrink manually here */ .nulls_base = (3U << RHT_BASE_SHIFT), }; int err; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 61e6c407476a..c82df0a48fcd 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -192,8 +192,6 @@ static int nft_hash_init(const struct nft_set *set, .key_offset = offsetof(struct nft_hash_elem, key), .key_len = set->klen, .hashfn = jhash, - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, }; return rhashtable_init(priv, ¶ms); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2702673f0f23..05919bf3f670 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -3126,8 +3126,6 @@ static int __init netlink_proto_init(void) .key_len = sizeof(u32), /* portid */ .hashfn = jhash, .max_shift = 16, /* 64K */ - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, }; if (err != 0) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f73e975af80b..b4d4467d0bb0 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2364,8 +2364,6 @@ int tipc_sk_rht_init(struct net *net) .hashfn = jhash, .max_shift = 20, /* 1M */ .min_shift = 8, /* 256 */ - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, }; return rhashtable_init(&tn->sk_rht, &rht_params); -- cgit v1.2.3 From 5beb5c90c1f54d745da040aa05634a5830ba4a4c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Feb 2015 07:20:34 -0800 Subject: rhashtable: use cond_resched() If a hash table has 128 slots and 16384 elems, expand to 256 slots takes more than one second. For larger sets, a soft lockup is detected. Holding cpu for that long, even in a work queue is a show stopper for non preemptable kernels. cond_resched() at strategic points to allow process scheduler to reschedule us. Signed-off-by: Eric Dumazet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- lib/rhashtable.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 090641db4c0d..b5344ef4c684 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -412,6 +413,7 @@ int rhashtable_expand(struct rhashtable *ht) } } unlock_buckets(new_tbl, old_tbl, new_hash); + cond_resched(); } /* Unzip interleaved hash chains */ @@ -435,6 +437,7 @@ int rhashtable_expand(struct rhashtable *ht) complete = false; unlock_buckets(new_tbl, old_tbl, old_hash); + cond_resched(); } } @@ -493,6 +496,7 @@ int rhashtable_shrink(struct rhashtable *ht) tbl->buckets[new_hash + new_tbl->size]); unlock_buckets(new_tbl, tbl, new_hash); + cond_resched(); } /* Publish the new, valid hash table */ -- cgit v1.2.3