diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 17 | ||||
-rw-r--r-- | lib/Kconfig.debug | 8 | ||||
-rw-r--r-- | lib/Makefile | 1 | ||||
-rw-r--r-- | lib/nlattr.c | 200 | ||||
-rw-r--r-- | lib/packing.c | 213 | ||||
-rw-r--r-- | lib/rhashtable.c | 210 | ||||
-rw-r--r-- | lib/test_rhashtable.c | 2 |
7 files changed, 458 insertions, 193 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index e86975bfca6a..f323b85ad11c 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -18,6 +18,23 @@ config RAID6_PQ_BENCHMARK Benchmark all available RAID6 PQ functions on init and choose the fastest one. +config PACKING + bool "Generic bitfield packing and unpacking" + default n + help + This option provides the packing() helper function, which permits + converting bitfields between a CPU-usable representation and a + memory representation that can have any combination of these quirks: + - Is little endian (bytes are reversed within a 32-bit group) + - The least-significant 32-bit word comes first (within a 64-bit + group) + - The most significant bit of a byte is at its right (bit 0 of a + register description is numerically 2^7). + Drivers may use these helpers to match the bit indices as described + in the data sheets of the peripherals they are in control of. + + When in doubt, say N. + config BITREVERSE tristate diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 971c6c70891e..d695ec1477f3 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -219,6 +219,14 @@ config DEBUG_INFO_DWARF4 But it significantly improves the success of resolving variables in gdb on optimized code. +config DEBUG_INFO_BTF + bool "Generate BTF typeinfo" + depends on DEBUG_INFO + help + Generate deduplicated BTF type information from DWARF debug info. + Turning this on expects presence of pahole tool, which will convert + DWARF type info into equivalent deduplicated BTF type info. + config GDB_SCRIPTS bool "Provide GDB scripts for kernel debugging" depends on DEBUG_INFO diff --git a/lib/Makefile b/lib/Makefile index 07506e3891a0..83d7df2661ff 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -120,6 +120,7 @@ obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o obj-$(CONFIG_BITREVERSE) += bitrev.o +obj-$(CONFIG_PACKING) += packing.o obj-$(CONFIG_RATIONAL) += rational.o obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o obj-$(CONFIG_CRC16) += crc16.o diff --git a/lib/nlattr.c b/lib/nlattr.c index d26de6156b97..cace9b307781 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -69,7 +69,8 @@ static int validate_nla_bitfield32(const struct nlattr *nla, static int nla_validate_array(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + unsigned int validate) { const struct nlattr *entry; int rem; @@ -86,8 +87,8 @@ static int nla_validate_array(const struct nlattr *head, int len, int maxtype, return -ERANGE; } - ret = nla_validate(nla_data(entry), nla_len(entry), - maxtype, policy, extack); + ret = __nla_validate(nla_data(entry), nla_len(entry), + maxtype, policy, validate, extack); if (ret < 0) return ret; } @@ -154,13 +155,17 @@ static int nla_validate_int_range(const struct nla_policy *pt, } static int validate_nla(const struct nlattr *nla, int maxtype, - const struct nla_policy *policy, + const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack) { + u16 strict_start_type = policy[0].strict_start_type; const struct nla_policy *pt; int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla); int err = -ERANGE; + if (strict_start_type && type >= strict_start_type) + validate |= NL_VALIDATE_STRICT; + if (type <= 0 || type > maxtype) return 0; @@ -172,6 +177,26 @@ static int validate_nla(const struct nlattr *nla, int maxtype, (pt->type == NLA_EXACT_LEN_WARN && attrlen != pt->len)) { pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n", current->comm, type); + if (validate & NL_VALIDATE_STRICT_ATTRS) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "invalid attribute length"); + return -EINVAL; + } + } + + if (validate & NL_VALIDATE_NESTED) { + if ((pt->type == NLA_NESTED || pt->type == NLA_NESTED_ARRAY) && + !(nla->nla_type & NLA_F_NESTED)) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "NLA_F_NESTED is missing"); + return -EINVAL; + } + if (pt->type != NLA_NESTED && pt->type != NLA_NESTED_ARRAY && + pt->type != NLA_UNSPEC && (nla->nla_type & NLA_F_NESTED)) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "NLA_F_NESTED not expected"); + return -EINVAL; + } } switch (pt->type) { @@ -244,8 +269,9 @@ static int validate_nla(const struct nlattr *nla, int maxtype, if (attrlen < NLA_HDRLEN) goto out_err; if (pt->validation_data) { - err = nla_validate(nla_data(nla), nla_len(nla), pt->len, - pt->validation_data, extack); + err = __nla_validate(nla_data(nla), nla_len(nla), pt->len, + pt->validation_data, validate, + extack); if (err < 0) { /* * return directly to preserve the inner @@ -268,7 +294,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype, err = nla_validate_array(nla_data(nla), nla_len(nla), pt->len, pt->validation_data, - extack); + extack, validate); if (err < 0) { /* * return directly to preserve the inner @@ -278,10 +304,23 @@ static int validate_nla(const struct nlattr *nla, int maxtype, } } break; + + case NLA_UNSPEC: + if (validate & NL_VALIDATE_UNSPEC) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Unsupported attribute"); + return -EINVAL; + } + /* fall through */ + case NLA_MIN_LEN: + if (attrlen < pt->len) + goto out_err; + break; + default: if (pt->len) minlen = pt->len; - else if (pt->type != NLA_UNSPEC) + else minlen = nla_attr_minlen[pt->type]; if (attrlen < minlen) @@ -315,37 +354,76 @@ out_err: return err; } +static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy, + unsigned int validate, + struct netlink_ext_ack *extack, + struct nlattr **tb) +{ + const struct nlattr *nla; + int rem; + + if (tb) + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + + nla_for_each_attr(nla, head, len, rem) { + u16 type = nla_type(nla); + + if (type == 0 || type > maxtype) { + if (validate & NL_VALIDATE_MAXTYPE) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Unknown attribute type"); + return -EINVAL; + } + continue; + } + if (policy) { + int err = validate_nla(nla, maxtype, policy, + validate, extack); + + if (err < 0) + return err; + } + + if (tb) + tb[type] = (struct nlattr *)nla; + } + + if (unlikely(rem > 0)) { + pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n", + rem, current->comm); + NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes"); + if (validate & NL_VALIDATE_TRAILING) + return -EINVAL; + } + + return 0; +} + /** - * nla_validate - Validate a stream of attributes + * __nla_validate - Validate a stream of attributes * @head: head of attribute stream * @len: length of attribute stream * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @validate: validation strictness * @extack: extended ACK report struct * * Validates all attributes in the specified attribute stream against the - * specified policy. Attributes with a type exceeding maxtype will be - * ignored. See documenation of struct nla_policy for more details. + * specified policy. Validation depends on the validate flags passed, see + * &enum netlink_validation for more details on that. + * See documenation of struct nla_policy for more details. * * Returns 0 on success or a negative error code. */ -int nla_validate(const struct nlattr *head, int len, int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +int __nla_validate(const struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy, unsigned int validate, + struct netlink_ext_ack *extack) { - const struct nlattr *nla; - int rem; - - nla_for_each_attr(nla, head, len, rem) { - int err = validate_nla(nla, maxtype, policy, extack); - - if (err < 0) - return err; - } - - return 0; + return __nla_validate_parse(head, len, maxtype, policy, validate, + extack, NULL); } -EXPORT_SYMBOL(nla_validate); +EXPORT_SYMBOL(__nla_validate); /** * nla_policy_len - Determin the max. length of a policy @@ -377,76 +455,30 @@ nla_policy_len(const struct nla_policy *p, int n) EXPORT_SYMBOL(nla_policy_len); /** - * nla_parse - Parse a stream of attributes into a tb buffer + * __nla_parse - Parse a stream of attributes into a tb buffer * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @head: head of attribute stream * @len: length of attribute stream * @policy: validation policy + * @validate: validation strictness + * @extack: extended ACK pointer * * Parses a stream of attributes and stores a pointer to each attribute in - * the tb array accessible via the attribute type. Attributes with a type - * exceeding maxtype will be silently ignored for backwards compatibility - * reasons. policy may be set to NULL if no validation is required. + * the tb array accessible via the attribute type. + * Validation is controlled by the @validate parameter. * * Returns 0 on success or a negative error code. */ -static int __nla_parse(struct nlattr **tb, int maxtype, - const struct nlattr *head, int len, - bool strict, const struct nla_policy *policy, - struct netlink_ext_ack *extack) -{ - const struct nlattr *nla; - int rem; - - memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); - - nla_for_each_attr(nla, head, len, rem) { - u16 type = nla_type(nla); - - if (type == 0 || type > maxtype) { - if (strict) { - NL_SET_ERR_MSG(extack, "Unknown attribute type"); - return -EINVAL; - } - continue; - } - if (policy) { - int err = validate_nla(nla, maxtype, policy, extack); - - if (err < 0) - return err; - } - - tb[type] = (struct nlattr *)nla; - } - - if (unlikely(rem > 0)) { - pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n", - rem, current->comm); - NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes"); - if (strict) - return -EINVAL; - } - - return 0; -} - -int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy, - struct netlink_ext_ack *extack) -{ - return __nla_parse(tb, maxtype, head, len, false, policy, extack); -} -EXPORT_SYMBOL(nla_parse); - -int nla_parse_strict(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy, - struct netlink_ext_ack *extack) +int __nla_parse(struct nlattr **tb, int maxtype, + const struct nlattr *head, int len, + const struct nla_policy *policy, unsigned int validate, + struct netlink_ext_ack *extack) { - return __nla_parse(tb, maxtype, head, len, true, policy, extack); + return __nla_validate_parse(head, len, maxtype, policy, validate, + extack, tb); } -EXPORT_SYMBOL(nla_parse_strict); +EXPORT_SYMBOL(__nla_parse); /** * nla_find - Find a specific attribute in a stream of attributes diff --git a/lib/packing.c b/lib/packing.c new file mode 100644 index 000000000000..50d1e9f2f5a7 --- /dev/null +++ b/lib/packing.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2016-2018, NXP Semiconductors + * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com> + */ +#include <linux/packing.h> +#include <linux/module.h> +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/types.h> + +static int get_le_offset(int offset) +{ + int closest_multiple_of_4; + + closest_multiple_of_4 = (offset / 4) * 4; + offset -= closest_multiple_of_4; + return closest_multiple_of_4 + (3 - offset); +} + +static int get_reverse_lsw32_offset(int offset, size_t len) +{ + int closest_multiple_of_4; + int word_index; + + word_index = offset / 4; + closest_multiple_of_4 = word_index * 4; + offset -= closest_multiple_of_4; + word_index = (len / 4) - word_index - 1; + return word_index * 4 + offset; +} + +static u64 bit_reverse(u64 val, unsigned int width) +{ + u64 new_val = 0; + unsigned int bit; + unsigned int i; + + for (i = 0; i < width; i++) { + bit = (val & (1 << i)) != 0; + new_val |= (bit << (width - i - 1)); + } + return new_val; +} + +static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit, + int *box_end_bit, u8 *box_mask) +{ + int box_bit_width = *box_start_bit - *box_end_bit + 1; + int new_box_start_bit, new_box_end_bit; + + *to_write >>= *box_end_bit; + *to_write = bit_reverse(*to_write, box_bit_width); + *to_write <<= *box_end_bit; + + new_box_end_bit = box_bit_width - *box_start_bit - 1; + new_box_start_bit = box_bit_width - *box_end_bit - 1; + *box_mask = GENMASK_ULL(new_box_start_bit, new_box_end_bit); + *box_start_bit = new_box_start_bit; + *box_end_bit = new_box_end_bit; +} + +/** + * packing - Convert numbers (currently u64) between a packed and an unpacked + * format. Unpacked means laid out in memory in the CPU's native + * understanding of integers, while packed means anything else that + * requires translation. + * + * @pbuf: Pointer to a buffer holding the packed value. + * @uval: Pointer to an u64 holding the unpacked value. + * @startbit: The index (in logical notation, compensated for quirks) where + * the packed value starts within pbuf. Must be larger than, or + * equal to, endbit. + * @endbit: The index (in logical notation, compensated for quirks) where + * the packed value ends within pbuf. Must be smaller than, or equal + * to, startbit. + * @op: If PACK, then uval will be treated as const pointer and copied (packed) + * into pbuf, between startbit and endbit. + * If UNPACK, then pbuf will be treated as const pointer and the logical + * value between startbit and endbit will be copied (unpacked) to uval. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming + * correct usage, return code may be discarded. + * If op is PACK, pbuf is modified. + * If op is UNPACK, uval is modified. + */ +int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, + enum packing_op op, u8 quirks) +{ + /* Number of bits for storing "uval" + * also width of the field to access in the pbuf + */ + u64 value_width; + /* Logical byte indices corresponding to the + * start and end of the field. + */ + int plogical_first_u8, plogical_last_u8, box; + + /* startbit is expected to be larger than endbit */ + if (startbit < endbit) + /* Invalid function call */ + return -EINVAL; + + value_width = startbit - endbit + 1; + if (value_width > 64) + return -ERANGE; + + /* Check if "uval" fits in "value_width" bits. + * If value_width is 64, the check will fail, but any + * 64-bit uval will surely fit. + */ + if (op == PACK && value_width < 64 && (*uval >= (1ull << value_width))) + /* Cannot store "uval" inside "value_width" bits. + * Truncating "uval" is most certainly not desirable, + * so simply erroring out is appropriate. + */ + return -ERANGE; + + /* Initialize parameter */ + if (op == UNPACK) + *uval = 0; + + /* Iterate through an idealistic view of the pbuf as an u64 with + * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low + * logical bit significance. "box" denotes the current logical u8. + */ + plogical_first_u8 = startbit / 8; + plogical_last_u8 = endbit / 8; + + for (box = plogical_first_u8; box >= plogical_last_u8; box--) { + /* Bit indices into the currently accessed 8-bit box */ + int box_start_bit, box_end_bit, box_addr; + u8 box_mask; + /* Corresponding bits from the unpacked u64 parameter */ + int proj_start_bit, proj_end_bit; + u64 proj_mask; + + /* This u8 may need to be accessed in its entirety + * (from bit 7 to bit 0), or not, depending on the + * input arguments startbit and endbit. + */ + if (box == plogical_first_u8) + box_start_bit = startbit % 8; + else + box_start_bit = 7; + if (box == plogical_last_u8) + box_end_bit = endbit % 8; + else + box_end_bit = 0; + + /* We have determined the box bit start and end. + * Now we calculate where this (masked) u8 box would fit + * in the unpacked (CPU-readable) u64 - the u8 box's + * projection onto the unpacked u64. Though the + * box is u8, the projection is u64 because it may fall + * anywhere within the unpacked u64. + */ + proj_start_bit = ((box * 8) + box_start_bit) - endbit; + proj_end_bit = ((box * 8) + box_end_bit) - endbit; + proj_mask = GENMASK_ULL(proj_start_bit, proj_end_bit); + box_mask = GENMASK_ULL(box_start_bit, box_end_bit); + + /* Determine the offset of the u8 box inside the pbuf, + * adjusted for quirks. The adjusted box_addr will be used for + * effective addressing inside the pbuf (so it's not + * logical any longer). + */ + box_addr = pbuflen - box - 1; + if (quirks & QUIRK_LITTLE_ENDIAN) + box_addr = get_le_offset(box_addr); + if (quirks & QUIRK_LSW32_IS_FIRST) + box_addr = get_reverse_lsw32_offset(box_addr, + pbuflen); + + if (op == UNPACK) { + u64 pval; + + /* Read from pbuf, write to uval */ + pval = ((u8 *)pbuf)[box_addr] & box_mask; + if (quirks & QUIRK_MSB_ON_THE_RIGHT) + adjust_for_msb_right_quirk(&pval, + &box_start_bit, + &box_end_bit, + &box_mask); + + pval >>= box_end_bit; + pval <<= proj_end_bit; + *uval &= ~proj_mask; + *uval |= pval; + } else { + u64 pval; + + /* Write to pbuf, read from uval */ + pval = (*uval) & proj_mask; + pval >>= proj_end_bit; + if (quirks & QUIRK_MSB_ON_THE_RIGHT) + adjust_for_msb_right_quirk(&pval, + &box_start_bit, + &box_end_bit, + &box_mask); + + pval <<= box_end_bit; + ((u8 *)pbuf)[box_addr] &= ~box_mask; + ((u8 *)pbuf)[box_addr] |= pval; + } + } + return 0; +} +EXPORT_SYMBOL(packing); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Generic bitfield packing and unpacking"); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 97f59abc3e92..6529fe1b45c1 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -31,11 +31,10 @@ #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4U -#define BUCKET_LOCKS_PER_CPU 32UL union nested_table { union nested_table __rcu *table; - struct rhash_head __rcu *bucket; + struct rhash_lock_head __rcu *bucket; }; static u32 head_hashfn(struct rhashtable *ht, @@ -56,9 +55,11 @@ EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) { - spinlock_t *lock = rht_bucket_lock(tbl, hash); - - return (debug_locks) ? lockdep_is_held(lock) : 1; + if (!debug_locks) + return 1; + if (unlikely(tbl->nest)) + return 1; + return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]); } EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); #else @@ -104,7 +105,6 @@ static void bucket_table_free(const struct bucket_table *tbl) if (tbl->nest) nested_bucket_table_free(tbl); - free_bucket_spinlocks(tbl->locks); kvfree(tbl); } @@ -131,9 +131,11 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht, INIT_RHT_NULLS_HEAD(ntbl[i].bucket); } - rcu_assign_pointer(*prev, ntbl); - - return ntbl; + if (cmpxchg(prev, NULL, ntbl) == NULL) + return ntbl; + /* Raced with another thread. */ + kfree(ntbl); + return rcu_dereference(*prev); } static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, @@ -169,11 +171,11 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, gfp_t gfp) { struct bucket_table *tbl = NULL; - size_t size, max_locks; + size_t size; int i; + static struct lock_class_key __key; - size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); - tbl = kvzalloc(size, gfp); + tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp); size = nbuckets; @@ -185,18 +187,11 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, if (tbl == NULL) return NULL; - tbl->size = size; - - max_locks = size >> 1; - if (tbl->nest) - max_locks = min_t(size_t, max_locks, 1U << tbl->nest); + lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0); - if (alloc_bucket_spinlocks(&tbl->locks, &tbl->locks_mask, max_locks, - ht->p.locks_mul, gfp) < 0) { - bucket_table_free(tbl); - return NULL; - } + tbl->size = size; + rcu_head_init(&tbl->rcu); INIT_LIST_HEAD(&tbl->walkers); tbl->hash_rnd = get_random_u32(); @@ -220,14 +215,15 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, return new_tbl; } -static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) +static int rhashtable_rehash_one(struct rhashtable *ht, + struct rhash_lock_head __rcu **bkt, + unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl); - struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash); int err = -EAGAIN; struct rhash_head *head, *next, *entry; - spinlock_t *new_bucket_lock; + struct rhash_head __rcu **pprev = NULL; unsigned int new_hash; if (new_tbl->nest) @@ -235,7 +231,8 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) err = -ENOENT; - rht_for_each(entry, old_tbl, old_hash) { + rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash), + old_tbl, old_hash) { err = 0; next = rht_dereference_bucket(entry->next, old_tbl, old_hash); @@ -250,18 +247,19 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) new_hash = head_hashfn(ht, new_tbl, entry); - new_bucket_lock = rht_bucket_lock(new_tbl, new_hash); + rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING); - spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING); - head = rht_dereference_bucket(new_tbl->buckets[new_hash], - new_tbl, new_hash); + head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash); RCU_INIT_POINTER(entry->next, head); - rcu_assign_pointer(new_tbl->buckets[new_hash], entry); - spin_unlock(new_bucket_lock); + rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry); - rcu_assign_pointer(*pprev, next); + if (pprev) + rcu_assign_pointer(*pprev, next); + else + /* Need to preserved the bit lock. */ + rht_assign_locked(bkt, next); out: return err; @@ -271,20 +269,19 @@ static int rhashtable_rehash_chain(struct rhashtable *ht, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); - spinlock_t *old_bucket_lock; + struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash); int err; - old_bucket_lock = rht_bucket_lock(old_tbl, old_hash); + if (!bkt) + return 0; + rht_lock(old_tbl, bkt); - spin_lock_bh(old_bucket_lock); - while (!(err = rhashtable_rehash_one(ht, old_hash))) + while (!(err = rhashtable_rehash_one(ht, bkt, old_hash))) ; - if (err == -ENOENT) { - old_tbl->rehash++; + if (err == -ENOENT) err = 0; - } - spin_unlock_bh(old_bucket_lock); + rht_unlock(old_tbl, bkt); return err; } @@ -330,13 +327,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht) spin_lock(&ht->lock); list_for_each_entry(walker, &old_tbl->walkers, list) walker->tbl = NULL; - spin_unlock(&ht->lock); /* Wait for readers. All new readers will see the new * table, and thus no references to the old table will * remain. + * We do this inside the locked region so that + * rhashtable_walk_stop() can use rcu_head_after_call_rcu() + * to check if it should not re-link the table. */ call_rcu(&old_tbl->rcu, bucket_table_free_rcu); + spin_unlock(&ht->lock); return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } @@ -478,6 +478,7 @@ fail: } static void *rhashtable_lookup_one(struct rhashtable *ht, + struct rhash_lock_head __rcu **bkt, struct bucket_table *tbl, unsigned int hash, const void *key, struct rhash_head *obj) { @@ -485,13 +486,12 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, .ht = ht, .key = key, }; - struct rhash_head __rcu **pprev; + struct rhash_head __rcu **pprev = NULL; struct rhash_head *head; int elasticity; elasticity = RHT_ELASTICITY; - pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(head, *pprev, tbl, hash) { + rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) { struct rhlist_head *list; struct rhlist_head *plist; @@ -513,7 +513,11 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, RCU_INIT_POINTER(list->next, plist); head = rht_dereference_bucket(head->next, tbl, hash); RCU_INIT_POINTER(list->rhead.next, head); - rcu_assign_pointer(*pprev, obj); + if (pprev) + rcu_assign_pointer(*pprev, obj); + else + /* Need to preserve the bit lock */ + rht_assign_locked(bkt, obj); return NULL; } @@ -525,12 +529,12 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, } static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, + struct rhash_lock_head __rcu **bkt, struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj, void *data) { - struct rhash_head __rcu **pprev; struct bucket_table *new_tbl; struct rhash_head *head; @@ -553,11 +557,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, if (unlikely(rht_grow_above_100(ht, tbl))) return ERR_PTR(-EAGAIN); - pprev = rht_bucket_insert(ht, tbl, hash); - if (!pprev) - return ERR_PTR(-ENOMEM); - - head = rht_dereference_bucket(*pprev, tbl, hash); + head = rht_ptr(bkt, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (ht->rhlist) { @@ -567,7 +567,10 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(*pprev, obj); + /* bkt is always the head of the list, so it holds + * the lock, which we need to preserve + */ + rht_assign_locked(bkt, obj); atomic_inc(&ht->nelems); if (rht_grow_above_75(ht, tbl)) @@ -581,47 +584,35 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, { struct bucket_table *new_tbl; struct bucket_table *tbl; + struct rhash_lock_head __rcu **bkt; unsigned int hash; - spinlock_t *lock; void *data; - tbl = rcu_dereference(ht->tbl); - - /* All insertions must grab the oldest table containing - * the hashed bucket that is yet to be rehashed. - */ - for (;;) { - hash = rht_head_hashfn(ht, tbl, obj, ht->p); - lock = rht_bucket_lock(tbl, hash); - spin_lock_bh(lock); - - if (tbl->rehash <= hash) - break; - - spin_unlock_bh(lock); - tbl = rht_dereference_rcu(tbl->future_tbl, ht); - } - - data = rhashtable_lookup_one(ht, tbl, hash, key, obj); - new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); - if (PTR_ERR(new_tbl) != -EEXIST) - data = ERR_CAST(new_tbl); + new_tbl = rcu_dereference(ht->tbl); - while (!IS_ERR_OR_NULL(new_tbl)) { + do { tbl = new_tbl; hash = rht_head_hashfn(ht, tbl, obj, ht->p); - spin_lock_nested(rht_bucket_lock(tbl, hash), - SINGLE_DEPTH_NESTING); - - data = rhashtable_lookup_one(ht, tbl, hash, key, obj); - new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); - if (PTR_ERR(new_tbl) != -EEXIST) - data = ERR_CAST(new_tbl); - - spin_unlock(rht_bucket_lock(tbl, hash)); - } - - spin_unlock_bh(lock); + if (rcu_access_pointer(tbl->future_tbl)) + /* Failure is OK */ + bkt = rht_bucket_var(tbl, hash); + else + bkt = rht_bucket_insert(ht, tbl, hash); + if (bkt == NULL) { + new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); + data = ERR_PTR(-EAGAIN); + } else { + rht_lock(tbl, bkt); + data = rhashtable_lookup_one(ht, bkt, tbl, + hash, key, obj); + new_tbl = rhashtable_insert_one(ht, bkt, tbl, + hash, obj, data); + if (PTR_ERR(new_tbl) != -EEXIST) + data = ERR_CAST(new_tbl); + + rht_unlock(tbl, bkt); + } + } while (!IS_ERR_OR_NULL(new_tbl)); if (PTR_ERR(data) == -EAGAIN) data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: @@ -943,10 +934,11 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) ht = iter->ht; spin_lock(&ht->lock); - if (tbl->rehash < tbl->size) - list_add(&iter->walker.list, &tbl->walkers); - else + if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu)) + /* This bucket table is being freed, don't re-link it. */ iter->walker.tbl = NULL; + else + list_add(&iter->walker.list, &tbl->walkers); spin_unlock(&ht->lock); out: @@ -1046,11 +1038,6 @@ int rhashtable_init(struct rhashtable *ht, size = rounded_hashtable_size(&ht->p); - if (params->locks_mul) - ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); - else - ht->p.locks_mul = BUCKET_LOCKS_PER_CPU; - ht->key_len = ht->p.key_len; if (!params->hashfn) { ht->p.hashfn = jhash; @@ -1152,7 +1139,7 @@ restart: struct rhash_head *pos, *next; cond_resched(); - for (pos = rht_dereference(*rht_bucket(tbl, i), ht), + for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)), next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; !rht_is_a_nulls(pos); @@ -1179,11 +1166,10 @@ void rhashtable_destroy(struct rhashtable *ht) } EXPORT_SYMBOL_GPL(rhashtable_destroy); -struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash) +struct rhash_lock_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); - static struct rhash_head __rcu *rhnull; unsigned int index = hash & ((1 << tbl->nest) - 1); unsigned int size = tbl->size >> tbl->nest; unsigned int subhash = hash; @@ -1201,20 +1187,28 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, subhash >>= shift; } - if (!ntbl) { - if (!rhnull) - INIT_RHT_NULLS_HEAD(rhnull); - return &rhnull; - } + if (!ntbl) + return NULL; return &ntbl[subhash].bucket; } +EXPORT_SYMBOL_GPL(__rht_bucket_nested); + +struct rhash_lock_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash) +{ + static struct rhash_lock_head __rcu *rhnull; + + if (!rhnull) + INIT_RHT_NULLS_HEAD(rhnull); + return __rht_bucket_nested(tbl, hash) ?: &rhnull; +} EXPORT_SYMBOL_GPL(rht_bucket_nested); -struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash) +struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); unsigned int index = hash & ((1 << tbl->nest) - 1); diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 3bd2e91bfc29..084fe5a6ac57 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -500,7 +500,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt) struct rhash_head *pos, *next; struct test_obj_rhl *p; - pos = rht_dereference(tbl->buckets[i], ht); + pos = rht_ptr_exclusive(tbl->buckets + i); next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; if (!rht_is_a_nulls(pos)) { |