summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig3
-rw-r--r--lib/Kconfig.debug73
-rw-r--r--lib/Makefile10
-rw-r--r--lib/debugobjects.c58
-rw-r--r--lib/halfmd4.c67
-rw-r--r--lib/ioremap.c1
-rw-r--r--lib/iov_iter.c54
-rw-r--r--lib/parman.c376
-rw-r--r--lib/radix-tree.c11
-rw-r--r--lib/rhashtable.c270
-rw-r--r--lib/sbitmap.c139
-rw-r--r--lib/siphash.c551
-rw-r--r--lib/swiotlb.c58
-rw-r--r--lib/test_firmware.c92
-rw-r--r--lib/test_parman.c395
-rw-r--r--lib/test_siphash.c223
-rw-r--r--lib/test_user_copy.c117
-rw-r--r--lib/timerqueue.c3
18 files changed, 2261 insertions, 240 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 260a80e313b9..5d644f180fe5 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -550,4 +550,7 @@ config STACKDEPOT
config SBITMAP
bool
+config PARMAN
+ tristate "parman"
+
endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b06848a104e6..66fb4389f05c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -164,7 +164,7 @@ config DEBUG_INFO_REDUCED
config DEBUG_INFO_SPLIT
bool "Produce split debuginfo in .dwo files"
- depends on DEBUG_INFO
+ depends on DEBUG_INFO && !FRV
help
Generate debug info into separate .dwo files. This significantly
reduces the build directory size for builds with DEBUG_INFO,
@@ -416,6 +416,16 @@ config MAGIC_SYSRQ_DEFAULT_ENABLE
This may be set to 1 or 0 to enable or disable them all, or
to a bitmask as described in Documentation/sysrq.txt.
+config MAGIC_SYSRQ_SERIAL
+ bool "Enable magic SysRq key over serial"
+ depends on MAGIC_SYSRQ
+ default y
+ help
+ Many embedded boards have a disconnected TTL level serial which can
+ generate some garbage that can lead to spurious false sysrq detects.
+ This option allows you to decide whether you want to enable the
+ magic SysRq key.
+
config DEBUG_KERNEL
bool "Kernel debugging"
help
@@ -622,9 +632,12 @@ config DEBUG_VM_PGFLAGS
If unsure, say N.
+config ARCH_HAS_DEBUG_VIRTUAL
+ bool
+
config DEBUG_VIRTUAL
bool "Debug VM translations"
- depends on DEBUG_KERNEL && X86
+ depends on DEBUG_KERNEL && ARCH_HAS_DEBUG_VIRTUAL
help
Enable some costly sanity checks in virtual to page code. This can
catch mistakes with virt_to_page() and friends.
@@ -716,6 +729,19 @@ source "lib/Kconfig.kmemcheck"
source "lib/Kconfig.kasan"
+config DEBUG_REFCOUNT
+ bool "Verbose refcount checks"
+ help
+ Say Y here if you want reference counters (refcount_t and kref) to
+ generate WARNs on dubious usage. Without this refcount_t will still
+ be a saturating counter and avoid Use-After-Free by turning it into
+ a resource leak Denial-Of-Service.
+
+ Use of this option will increase kernel text size but will alert the
+ admin of potential abuse.
+
+ If in doubt, say "N".
+
endmenu # "Memory Debugging"
config ARCH_HAS_KCOV
@@ -980,20 +1006,6 @@ config DEBUG_TIMEKEEPING
If unsure, say N.
-config TIMER_STATS
- bool "Collect kernel timers statistics"
- depends on DEBUG_KERNEL && PROC_FS
- help
- If you say Y here, additional code will be inserted into the
- timer routines to collect statistics about kernel timers being
- reprogrammed. The statistics can be read from /proc/timer_stats.
- The statistics collection is started by writing 1 to /proc/timer_stats,
- writing 0 stops it. This feature is useful to collect information
- about timer usage patterns in kernel and userspace. This feature
- is lightweight if enabled in the kernel config but not activated
- (it defaults to deactivated on bootup and will only be activated
- if some application like powertop activates it explicitly).
-
config DEBUG_PREEMPT
bool "Debug preemptible kernel"
depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
@@ -1180,6 +1192,18 @@ config LOCK_TORTURE_TEST
Say M if you want these torture tests to build as a module.
Say N if you are unsure.
+config WW_MUTEX_SELFTEST
+ tristate "Wait/wound mutex selftests"
+ help
+ This option provides a kernel module that runs tests on the
+ on the struct ww_mutex locking API.
+
+ It is recommended to enable DEBUG_WW_MUTEX_SLOWPATH in conjunction
+ with this test harness.
+
+ Say M if you want these self tests to build as a module.
+ Say N if you are unsure.
+
endmenu # lock debugging
config TRACE_IRQFLAGS
@@ -1450,6 +1474,7 @@ config RCU_CPU_STALL_TIMEOUT
config RCU_TRACE
bool "Enable tracing for RCU"
depends on DEBUG_KERNEL
+ default y if TREE_RCU
select TRACE_CLOCK
help
This option provides tracing in RCU which presents stats
@@ -1819,13 +1844,23 @@ config TEST_HASH
tristate "Perform selftest on hash functions"
default n
help
- Enable this option to test the kernel's integer (<linux/hash,h>)
- and string (<linux/stringhash.h>) hash functions on boot
- (or module load).
+ Enable this option to test the kernel's integer (<linux/hash.h>),
+ string (<linux/stringhash.h>), and siphash (<linux/siphash.h>)
+ hash functions on boot (or module load).
This is intended to help people writing architecture-specific
optimized versions. If unsure, say N.
+config TEST_PARMAN
+ tristate "Perform selftest on priority array manager"
+ default n
+ depends on PARMAN
+ help
+ Enable this option to test priority array manager on boot
+ (or module load).
+
+ If unsure, say N.
+
endmenu # runtime tests
config PROVIDE_OHCI1394_DMA_INIT
diff --git a/lib/Makefile b/lib/Makefile
index bc4073a8cd08..6b768b58a38d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,7 +22,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
flex_proportions.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
- earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
+ earlycpio.o seq_buf.o siphash.o \
+ nmi_backtrace.o nodemask.o win_minmax.o
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
@@ -31,7 +32,7 @@ lib-$(CONFIG_HAS_DMA) += dma-noop.o
lib-y += kobject.o klist.o
obj-y += lockref.o
-obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
+obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
bsearch.o find_bit.o llist.o memweight.o kfifo.o \
@@ -44,7 +45,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
obj-y += kstrtox.o
obj-$(CONFIG_TEST_BPF) += test_bpf.o
obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
-obj-$(CONFIG_TEST_HASH) += test_hash.o
+obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
obj-$(CONFIG_TEST_KASAN) += test_kasan.o
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
obj-$(CONFIG_TEST_LKM) += test_module.o
@@ -55,6 +56,7 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
obj-$(CONFIG_TEST_PRINTF) += test_printf.o
obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
obj-$(CONFIG_TEST_UUID) += test_uuid.o
+obj-$(CONFIG_TEST_PARMAN) += test_parman.o
ifeq ($(CONFIG_DEBUG_KOBJECT),y)
CFLAGS_kobject.o += -DDEBUG
@@ -229,3 +231,5 @@ obj-$(CONFIG_UBSAN) += ubsan.o
UBSAN_SANITIZE_ubsan.o := n
obj-$(CONFIG_SBITMAP) += sbitmap.o
+
+obj-$(CONFIG_PARMAN) += parman.o
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 04c1ef717fe0..8c28cbd7e104 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -52,9 +52,18 @@ static int debug_objects_fixups __read_mostly;
static int debug_objects_warnings __read_mostly;
static int debug_objects_enabled __read_mostly
= CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
-
+static int debug_objects_pool_size __read_mostly
+ = ODEBUG_POOL_SIZE;
+static int debug_objects_pool_min_level __read_mostly
+ = ODEBUG_POOL_MIN_LEVEL;
static struct debug_obj_descr *descr_test __read_mostly;
+/*
+ * Track numbers of kmem_cache_alloc()/free() calls done.
+ */
+static int debug_objects_allocated;
+static int debug_objects_freed;
+
static void free_obj_work(struct work_struct *work);
static DECLARE_WORK(debug_obj_work, free_obj_work);
@@ -88,13 +97,13 @@ static void fill_pool(void)
struct debug_obj *new;
unsigned long flags;
- if (likely(obj_pool_free >= ODEBUG_POOL_MIN_LEVEL))
+ if (likely(obj_pool_free >= debug_objects_pool_min_level))
return;
if (unlikely(!obj_cache))
return;
- while (obj_pool_free < ODEBUG_POOL_MIN_LEVEL) {
+ while (obj_pool_free < debug_objects_pool_min_level) {
new = kmem_cache_zalloc(obj_cache, gfp);
if (!new)
@@ -102,6 +111,7 @@ static void fill_pool(void)
raw_spin_lock_irqsave(&pool_lock, flags);
hlist_add_head(&new->node, &obj_pool);
+ debug_objects_allocated++;
obj_pool_free++;
raw_spin_unlock_irqrestore(&pool_lock, flags);
}
@@ -162,24 +172,39 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
/*
* workqueue function to free objects.
+ *
+ * To reduce contention on the global pool_lock, the actual freeing of
+ * debug objects will be delayed if the pool_lock is busy. We also free
+ * the objects in a batch of 4 for each lock/unlock cycle.
*/
+#define ODEBUG_FREE_BATCH 4
+
static void free_obj_work(struct work_struct *work)
{
- struct debug_obj *obj;
+ struct debug_obj *objs[ODEBUG_FREE_BATCH];
unsigned long flags;
+ int i;
- raw_spin_lock_irqsave(&pool_lock, flags);
- while (obj_pool_free > ODEBUG_POOL_SIZE) {
- obj = hlist_entry(obj_pool.first, typeof(*obj), node);
- hlist_del(&obj->node);
- obj_pool_free--;
+ if (!raw_spin_trylock_irqsave(&pool_lock, flags))
+ return;
+ while (obj_pool_free >= debug_objects_pool_size + ODEBUG_FREE_BATCH) {
+ for (i = 0; i < ODEBUG_FREE_BATCH; i++) {
+ objs[i] = hlist_entry(obj_pool.first,
+ typeof(*objs[0]), node);
+ hlist_del(&objs[i]->node);
+ }
+
+ obj_pool_free -= ODEBUG_FREE_BATCH;
+ debug_objects_freed += ODEBUG_FREE_BATCH;
/*
* We release pool_lock across kmem_cache_free() to
* avoid contention on pool_lock.
*/
raw_spin_unlock_irqrestore(&pool_lock, flags);
- kmem_cache_free(obj_cache, obj);
- raw_spin_lock_irqsave(&pool_lock, flags);
+ for (i = 0; i < ODEBUG_FREE_BATCH; i++)
+ kmem_cache_free(obj_cache, objs[i]);
+ if (!raw_spin_trylock_irqsave(&pool_lock, flags))
+ return;
}
raw_spin_unlock_irqrestore(&pool_lock, flags);
}
@@ -198,7 +223,7 @@ static void free_object(struct debug_obj *obj)
* schedule work when the pool is filled and the cache is
* initialized:
*/
- if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache)
+ if (obj_pool_free > debug_objects_pool_size && obj_cache)
sched = 1;
hlist_add_head(&obj->node, &obj_pool);
obj_pool_free++;
@@ -758,6 +783,8 @@ static int debug_stats_show(struct seq_file *m, void *v)
seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free);
seq_printf(m, "pool_used :%d\n", obj_pool_used);
seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used);
+ seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated);
+ seq_printf(m, "objs_freed :%d\n", debug_objects_freed);
return 0;
}
@@ -1116,4 +1143,11 @@ void __init debug_objects_mem_init(void)
pr_warn("out of memory.\n");
} else
debug_objects_selftest();
+
+ /*
+ * Increase the thresholds for allocating and freeing objects
+ * according to the number of possible CPUs available in the system.
+ */
+ debug_objects_pool_size += num_possible_cpus() * 32;
+ debug_objects_pool_min_level += num_possible_cpus() * 4;
}
diff --git a/lib/halfmd4.c b/lib/halfmd4.c
deleted file mode 100644
index 137e861d9690..000000000000
--- a/lib/halfmd4.c
+++ /dev/null
@@ -1,67 +0,0 @@
-#include <linux/compiler.h>
-#include <linux/export.h>
-#include <linux/cryptohash.h>
-#include <linux/bitops.h>
-
-/* F, G and H are basic MD4 functions: selection, majority, parity */
-#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
-#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z)))
-#define H(x, y, z) ((x) ^ (y) ^ (z))
-
-/*
- * The generic round function. The application is so specific that
- * we don't bother protecting all the arguments with parens, as is generally
- * good macro practice, in favor of extra legibility.
- * Rotation is separate from addition to prevent recomputation
- */
-#define ROUND(f, a, b, c, d, x, s) \
- (a += f(b, c, d) + x, a = rol32(a, s))
-#define K1 0
-#define K2 013240474631UL
-#define K3 015666365641UL
-
-/*
- * Basic cut-down MD4 transform. Returns only 32 bits of result.
- */
-__u32 half_md4_transform(__u32 buf[4], __u32 const in[8])
-{
- __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3];
-
- /* Round 1 */
- ROUND(F, a, b, c, d, in[0] + K1, 3);
- ROUND(F, d, a, b, c, in[1] + K1, 7);
- ROUND(F, c, d, a, b, in[2] + K1, 11);
- ROUND(F, b, c, d, a, in[3] + K1, 19);
- ROUND(F, a, b, c, d, in[4] + K1, 3);
- ROUND(F, d, a, b, c, in[5] + K1, 7);
- ROUND(F, c, d, a, b, in[6] + K1, 11);
- ROUND(F, b, c, d, a, in[7] + K1, 19);
-
- /* Round 2 */
- ROUND(G, a, b, c, d, in[1] + K2, 3);
- ROUND(G, d, a, b, c, in[3] + K2, 5);
- ROUND(G, c, d, a, b, in[5] + K2, 9);
- ROUND(G, b, c, d, a, in[7] + K2, 13);
- ROUND(G, a, b, c, d, in[0] + K2, 3);
- ROUND(G, d, a, b, c, in[2] + K2, 5);
- ROUND(G, c, d, a, b, in[4] + K2, 9);
- ROUND(G, b, c, d, a, in[6] + K2, 13);
-
- /* Round 3 */
- ROUND(H, a, b, c, d, in[3] + K3, 3);
- ROUND(H, d, a, b, c, in[7] + K3, 9);
- ROUND(H, c, d, a, b, in[2] + K3, 11);
- ROUND(H, b, c, d, a, in[6] + K3, 15);
- ROUND(H, a, b, c, d, in[1] + K3, 3);
- ROUND(H, d, a, b, c, in[5] + K3, 9);
- ROUND(H, c, d, a, b, in[0] + K3, 11);
- ROUND(H, b, c, d, a, in[4] + K3, 15);
-
- buf[0] += a;
- buf[1] += b;
- buf[2] += c;
- buf[3] += d;
-
- return buf[1]; /* "most hashed" word */
-}
-EXPORT_SYMBOL(half_md4_transform);
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 86c8911b0e3a..a3e14ce92a56 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -144,4 +144,3 @@ int ioremap_page_range(unsigned long addr,
return err;
}
-EXPORT_SYMBOL_GPL(ioremap_page_range);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 25f572303801..e68604ae3ced 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -730,43 +730,50 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
}
EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
+static inline void pipe_truncate(struct iov_iter *i)
+{
+ struct pipe_inode_info *pipe = i->pipe;
+ if (pipe->nrbufs) {
+ size_t off = i->iov_offset;
+ int idx = i->idx;
+ int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1);
+ if (off) {
+ pipe->bufs[idx].len = off - pipe->bufs[idx].offset;
+ idx = next_idx(idx, pipe);
+ nrbufs++;
+ }
+ while (pipe->nrbufs > nrbufs) {
+ pipe_buf_release(pipe, &pipe->bufs[idx]);
+ idx = next_idx(idx, pipe);
+ pipe->nrbufs--;
+ }
+ }
+}
+
static void pipe_advance(struct iov_iter *i, size_t size)
{
struct pipe_inode_info *pipe = i->pipe;
- struct pipe_buffer *buf;
- int idx = i->idx;
- size_t off = i->iov_offset, orig_sz;
-
if (unlikely(i->count < size))
size = i->count;
- orig_sz = size;
-
if (size) {
+ struct pipe_buffer *buf;
+ size_t off = i->iov_offset, left = size;
+ int idx = i->idx;
if (off) /* make it relative to the beginning of buffer */
- size += off - pipe->bufs[idx].offset;
+ left += off - pipe->bufs[idx].offset;
while (1) {
buf = &pipe->bufs[idx];
- if (size <= buf->len)
+ if (left <= buf->len)
break;
- size -= buf->len;
+ left -= buf->len;
idx = next_idx(idx, pipe);
}
- buf->len = size;
i->idx = idx;
- off = i->iov_offset = buf->offset + size;
- }
- if (off)
- idx = next_idx(idx, pipe);
- if (pipe->nrbufs) {
- int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
- /* [curbuf,unused) is in use. Free [idx,unused) */
- while (idx != unused) {
- pipe_buf_release(pipe, &pipe->bufs[idx]);
- idx = next_idx(idx, pipe);
- pipe->nrbufs--;
- }
+ i->iov_offset = buf->offset + left;
}
- i->count -= orig_sz;
+ i->count -= size;
+ /* ... and discard everything past that point */
+ pipe_truncate(i);
}
void iov_iter_advance(struct iov_iter *i, size_t size)
@@ -826,6 +833,7 @@ void iov_iter_pipe(struct iov_iter *i, int direction,
size_t count)
{
BUG_ON(direction != ITER_PIPE);
+ WARN_ON(pipe->nrbufs == pipe->buffers);
i->type = direction;
i->pipe = pipe;
i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
diff --git a/lib/parman.c b/lib/parman.c
new file mode 100644
index 000000000000..c6e42a8db824
--- /dev/null
+++ b/lib/parman.c
@@ -0,0 +1,376 @@
+/*
+ * lib/parman.c - Manager for linear priority array areas
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/list.h>
+#include <linux/err.h>
+#include <linux/parman.h>
+
+struct parman_algo {
+ int (*item_add)(struct parman *parman, struct parman_prio *prio,
+ struct parman_item *item);
+ void (*item_remove)(struct parman *parman, struct parman_prio *prio,
+ struct parman_item *item);
+};
+
+struct parman {
+ const struct parman_ops *ops;
+ void *priv;
+ const struct parman_algo *algo;
+ unsigned long count;
+ unsigned long limit_count;
+ struct list_head prio_list;
+};
+
+static int parman_enlarge(struct parman *parman)
+{
+ unsigned long new_count = parman->limit_count +
+ parman->ops->resize_step;
+ int err;
+
+ err = parman->ops->resize(parman->priv, new_count);
+ if (err)
+ return err;
+ parman->limit_count = new_count;
+ return 0;
+}
+
+static int parman_shrink(struct parman *parman)
+{
+ unsigned long new_count = parman->limit_count -
+ parman->ops->resize_step;
+ int err;
+
+ if (new_count < parman->ops->base_count)
+ return 0;
+ err = parman->ops->resize(parman->priv, new_count);
+ if (err)
+ return err;
+ parman->limit_count = new_count;
+ return 0;
+}
+
+static bool parman_prio_used(struct parman_prio *prio)
+
+{
+ return !list_empty(&prio->item_list);
+}
+
+static struct parman_item *parman_prio_first_item(struct parman_prio *prio)
+{
+ return list_first_entry(&prio->item_list,
+ typeof(struct parman_item), list);
+}
+
+static unsigned long parman_prio_first_index(struct parman_prio *prio)
+{
+ return parman_prio_first_item(prio)->index;
+}
+
+static struct parman_item *parman_prio_last_item(struct parman_prio *prio)
+{
+ return list_last_entry(&prio->item_list,
+ typeof(struct parman_item), list);
+}
+
+static unsigned long parman_prio_last_index(struct parman_prio *prio)
+{
+ return parman_prio_last_item(prio)->index;
+}
+
+static unsigned long parman_lsort_new_index_find(struct parman *parman,
+ struct parman_prio *prio)
+{
+ list_for_each_entry_from_reverse(prio, &parman->prio_list, list) {
+ if (!parman_prio_used(prio))
+ continue;
+ return parman_prio_last_index(prio) + 1;
+ }
+ return 0;
+}
+
+static void __parman_prio_move(struct parman *parman, struct parman_prio *prio,
+ struct parman_item *item, unsigned long to_index,
+ unsigned long count)
+{
+ parman->ops->move(parman->priv, item->index, to_index, count);
+}
+
+static void parman_prio_shift_down(struct parman *parman,
+ struct parman_prio *prio)
+{
+ struct parman_item *item;
+ unsigned long to_index;
+
+ if (!parman_prio_used(prio))
+ return;
+ item = parman_prio_first_item(prio);
+ to_index = parman_prio_last_index(prio) + 1;
+ __parman_prio_move(parman, prio, item, to_index, 1);
+ list_move_tail(&item->list, &prio->item_list);
+ item->index = to_index;
+}
+
+static void parman_prio_shift_up(struct parman *parman,
+ struct parman_prio *prio)
+{
+ struct parman_item *item;
+ unsigned long to_index;
+
+ if (!parman_prio_used(prio))
+ return;
+ item = parman_prio_last_item(prio);
+ to_index = parman_prio_first_index(prio) - 1;
+ __parman_prio_move(parman, prio, item, to_index, 1);
+ list_move(&item->list, &prio->item_list);
+ item->index = to_index;
+}
+
+static void parman_prio_item_remove(struct parman *parman,
+ struct parman_prio *prio,
+ struct parman_item *item)
+{
+ struct parman_item *last_item;
+ unsigned long to_index;
+
+ last_item = parman_prio_last_item(prio);
+ if (last_item == item) {
+ list_del(&item->list);
+ return;
+ }
+ to_index = item->index;
+ __parman_prio_move(parman, prio, last_item, to_index, 1);
+ list_del(&last_item->list);
+ list_replace(&item->list, &last_item->list);
+ last_item->index = to_index;
+}
+
+static int parman_lsort_item_add(struct parman *parman,
+ struct parman_prio *prio,
+ struct parman_item *item)
+{
+ struct parman_prio *prio2;
+ unsigned long new_index;
+ int err;
+
+ if (parman->count + 1 > parman->limit_count) {
+ err = parman_enlarge(parman);
+ if (err)
+ return err;
+ }
+
+ new_index = parman_lsort_new_index_find(parman, prio);
+ list_for_each_entry_reverse(prio2, &parman->prio_list, list) {
+ if (prio2 == prio)
+ break;
+ parman_prio_shift_down(parman, prio2);
+ }
+ item->index = new_index;
+ list_add_tail(&item->list, &prio->item_list);
+ parman->count++;
+ return 0;
+}
+
+static void parman_lsort_item_remove(struct parman *parman,
+ struct parman_prio *prio,
+ struct parman_item *item)
+{
+ parman_prio_item_remove(parman, prio, item);
+ list_for_each_entry_continue(prio, &parman->prio_list, list)
+ parman_prio_shift_up(parman, prio);
+ parman->count--;
+ if (parman->limit_count - parman->count >= parman->ops->resize_step)
+ parman_shrink(parman);
+}
+
+static const struct parman_algo parman_lsort = {
+ .item_add = parman_lsort_item_add,
+ .item_remove = parman_lsort_item_remove,
+};
+
+static const struct parman_algo *parman_algos[] = {
+ &parman_lsort,
+};
+
+/**
+ * parman_create - creates a new parman instance
+ * @ops: caller-specific callbacks
+ * @priv: pointer to a private data passed to the ops
+ *
+ * Note: all locking must be provided by the caller.
+ *
+ * Each parman instance manages an array area with chunks of entries
+ * with the same priority. Consider following example:
+ *
+ * item 1 with prio 10
+ * item 2 with prio 10
+ * item 3 with prio 10
+ * item 4 with prio 20
+ * item 5 with prio 20
+ * item 6 with prio 30
+ * item 7 with prio 30
+ * item 8 with prio 30
+ *
+ * In this example, there are 3 priority chunks. The order of the priorities
+ * matters, however the order of items within a single priority chunk does not
+ * matter. So the same array could be ordered as follows:
+ *
+ * item 2 with prio 10
+ * item 3 with prio 10
+ * item 1 with prio 10
+ * item 5 with prio 20
+ * item 4 with prio 20
+ * item 7 with prio 30
+ * item 8 with prio 30
+ * item 6 with prio 30
+ *
+ * The goal of parman is to maintain the priority ordering. The caller
+ * provides @ops with callbacks parman uses to move the items
+ * and resize the array area.
+ *
+ * Returns a pointer to newly created parman instance in case of success,
+ * otherwise it returns NULL.
+ */
+struct parman *parman_create(const struct parman_ops *ops, void *priv)
+{
+ struct parman *parman;
+
+ parman = kzalloc(sizeof(*parman), GFP_KERNEL);
+ if (!parman)
+ return NULL;
+ INIT_LIST_HEAD(&parman->prio_list);
+ parman->ops = ops;
+ parman->priv = priv;
+ parman->limit_count = ops->base_count;
+ parman->algo = parman_algos[ops->algo];
+ return parman;
+}
+EXPORT_SYMBOL(parman_create);
+
+/**
+ * parman_destroy - destroys existing parman instance
+ * @parman: parman instance
+ *
+ * Note: all locking must be provided by the caller.
+ */
+void parman_destroy(struct parman *parman)
+{
+ WARN_ON(!list_empty(&parman->prio_list));
+ kfree(parman);
+}
+EXPORT_SYMBOL(parman_destroy);
+
+/**
+ * parman_prio_init - initializes a parman priority chunk
+ * @parman: parman instance
+ * @prio: parman prio structure to be initialized
+ * @prority: desired priority of the chunk
+ *
+ * Note: all locking must be provided by the caller.
+ *
+ * Before caller could add an item with certain priority, he has to
+ * initialize a priority chunk for it using this function.
+ */
+void parman_prio_init(struct parman *parman, struct parman_prio *prio,
+ unsigned long priority)
+{
+ struct parman_prio *prio2;
+ struct list_head *pos;
+
+ INIT_LIST_HEAD(&prio->item_list);
+ prio->priority = priority;
+
+ /* Position inside the list according to priority */
+ list_for_each(pos, &parman->prio_list) {
+ prio2 = list_entry(pos, typeof(*prio2), list);
+ if (prio2->priority > prio->priority)
+ break;
+ }
+ list_add_tail(&prio->list, pos);
+}
+EXPORT_SYMBOL(parman_prio_init);
+
+/**
+ * parman_prio_fini - finalizes use of parman priority chunk
+ * @prio: parman prio structure
+ *
+ * Note: all locking must be provided by the caller.
+ */
+void parman_prio_fini(struct parman_prio *prio)
+{
+ WARN_ON(parman_prio_used(prio));
+ list_del(&prio->list);
+}
+EXPORT_SYMBOL(parman_prio_fini);
+
+/**
+ * parman_item_add - adds a parman item under defined priority
+ * @parman: parman instance
+ * @prio: parman prio instance to add the item to
+ * @item: parman item instance
+ *
+ * Note: all locking must be provided by the caller.
+ *
+ * Adds item to a array managed by parman instance under the specified priority.
+ *
+ * Returns 0 in case of success, negative number to indicate an error.
+ */
+int parman_item_add(struct parman *parman, struct parman_prio *prio,
+ struct parman_item *item)
+{
+ return parman->algo->item_add(parman, prio, item);
+}
+EXPORT_SYMBOL(parman_item_add);
+
+/**
+ * parman_item_del - deletes parman item
+ * @parman: parman instance
+ * @prio: parman prio instance to delete the item from
+ * @item: parman item instance
+ *
+ * Note: all locking must be provided by the caller.
+ */
+void parman_item_remove(struct parman *parman, struct parman_prio *prio,
+ struct parman_item *item)
+{
+ parman->algo->item_remove(parman, prio, item);
+}
+EXPORT_SYMBOL(parman_item_remove);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
+MODULE_DESCRIPTION("Priority-based array manager");
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 6f382e07de77..84812a9fb16f 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -640,6 +640,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root,
update_node(node, private);
}
+ WARN_ON_ONCE(!list_empty(&node->private_list));
radix_tree_node_free(node);
}
}
@@ -666,6 +667,7 @@ static void delete_node(struct radix_tree_root *root,
root->rnode = NULL;
}
+ WARN_ON_ONCE(!list_empty(&node->private_list));
radix_tree_node_free(node);
node = parent;
@@ -767,6 +769,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
struct radix_tree_node *old = child;
offset = child->offset + 1;
child = child->parent;
+ WARN_ON_ONCE(!list_empty(&old->private_list));
radix_tree_node_free(old);
if (old == entry_to_node(node))
return;
@@ -1824,15 +1827,19 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
* __radix_tree_delete_node - try to free node after clearing a slot
* @root: radix tree root
* @node: node containing @index
+ * @update_node: callback for changing leaf nodes
+ * @private: private data to pass to @update_node
*
* After clearing the slot at @index in @node from radix tree
* rooted at @root, call this function to attempt freeing the
* node and shrinking the tree.
*/
void __radix_tree_delete_node(struct radix_tree_root *root,
- struct radix_tree_node *node)
+ struct radix_tree_node *node,
+ radix_tree_update_node_t update_node,
+ void *private)
{
- delete_node(root, node, NULL, NULL);
+ delete_node(root, node, update_node, private);
}
/**
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 32d0ad058380..172454e6b979 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -32,6 +32,11 @@
#define HASH_MIN_SIZE 4U
#define BUCKET_LOCKS_PER_CPU 32UL
+union nested_table {
+ union nested_table __rcu *table;
+ struct rhash_head __rcu *bucket;
+};
+
static u32 head_hashfn(struct rhashtable *ht,
const struct bucket_table *tbl,
const struct rhash_head *he)
@@ -76,6 +81,9 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
/* Never allocate more than 0.5 locks per bucket */
size = min_t(unsigned int, size, tbl->size >> 1);
+ if (tbl->nest)
+ size = min(size, 1U << tbl->nest);
+
if (sizeof(spinlock_t) != 0) {
tbl->locks = NULL;
#ifdef CONFIG_NUMA
@@ -99,8 +107,45 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
return 0;
}
+static void nested_table_free(union nested_table *ntbl, unsigned int size)
+{
+ const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+ const unsigned int len = 1 << shift;
+ unsigned int i;
+
+ ntbl = rcu_dereference_raw(ntbl->table);
+ if (!ntbl)
+ return;
+
+ if (size > len) {
+ size >>= shift;
+ for (i = 0; i < len; i++)
+ nested_table_free(ntbl + i, size);
+ }
+
+ kfree(ntbl);
+}
+
+static void nested_bucket_table_free(const struct bucket_table *tbl)
+{
+ unsigned int size = tbl->size >> tbl->nest;
+ unsigned int len = 1 << tbl->nest;
+ union nested_table *ntbl;
+ unsigned int i;
+
+ ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
+
+ for (i = 0; i < len; i++)
+ nested_table_free(ntbl + i, size);
+
+ kfree(ntbl);
+}
+
static void bucket_table_free(const struct bucket_table *tbl)
{
+ if (tbl->nest)
+ nested_bucket_table_free(tbl);
+
if (tbl)
kvfree(tbl->locks);
@@ -112,6 +157,59 @@ static void bucket_table_free_rcu(struct rcu_head *head)
bucket_table_free(container_of(head, struct bucket_table, rcu));
}
+static union nested_table *nested_table_alloc(struct rhashtable *ht,
+ union nested_table __rcu **prev,
+ unsigned int shifted,
+ unsigned int nhash)
+{
+ union nested_table *ntbl;
+ int i;
+
+ ntbl = rcu_dereference(*prev);
+ if (ntbl)
+ return ntbl;
+
+ ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
+
+ if (ntbl && shifted) {
+ for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++)
+ INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht,
+ (i << shifted) | nhash);
+ }
+
+ rcu_assign_pointer(*prev, ntbl);
+
+ return ntbl;
+}
+
+static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
+ size_t nbuckets,
+ gfp_t gfp)
+{
+ const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+ struct bucket_table *tbl;
+ size_t size;
+
+ if (nbuckets < (1 << (shift + 1)))
+ return NULL;
+
+ size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
+
+ tbl = kzalloc(size, gfp);
+ if (!tbl)
+ return NULL;
+
+ if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
+ 0, 0)) {
+ kfree(tbl);
+ return NULL;
+ }
+
+ tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
+
+ return tbl;
+}
+
static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
size_t nbuckets,
gfp_t gfp)
@@ -126,10 +224,17 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
if (tbl == NULL && gfp == GFP_KERNEL)
tbl = vzalloc(size);
+
+ size = nbuckets;
+
+ if (tbl == NULL && gfp != GFP_KERNEL) {
+ tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
+ nbuckets = 0;
+ }
if (tbl == NULL)
return NULL;
- tbl->size = nbuckets;
+ tbl->size = size;
if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
bucket_table_free(tbl);
@@ -164,12 +269,17 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
struct bucket_table *new_tbl = rhashtable_last_table(ht,
rht_dereference_rcu(old_tbl->future_tbl, ht));
- struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
- int err = -ENOENT;
+ struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
+ int err = -EAGAIN;
struct rhash_head *head, *next, *entry;
spinlock_t *new_bucket_lock;
unsigned int new_hash;
+ if (new_tbl->nest)
+ goto out;
+
+ err = -ENOENT;
+
rht_for_each(entry, old_tbl, old_hash) {
err = 0;
next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
@@ -202,19 +312,26 @@ out:
return err;
}
-static void rhashtable_rehash_chain(struct rhashtable *ht,
+static int rhashtable_rehash_chain(struct rhashtable *ht,
unsigned int old_hash)
{
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
spinlock_t *old_bucket_lock;
+ int err;
old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
spin_lock_bh(old_bucket_lock);
- while (!rhashtable_rehash_one(ht, old_hash))
+ while (!(err = rhashtable_rehash_one(ht, old_hash)))
;
- old_tbl->rehash++;
+
+ if (err == -ENOENT) {
+ old_tbl->rehash++;
+ err = 0;
+ }
spin_unlock_bh(old_bucket_lock);
+
+ return err;
}
static int rhashtable_rehash_attach(struct rhashtable *ht,
@@ -246,13 +363,17 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
struct bucket_table *new_tbl;
struct rhashtable_walker *walker;
unsigned int old_hash;
+ int err;
new_tbl = rht_dereference(old_tbl->future_tbl, ht);
if (!new_tbl)
return 0;
- for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
- rhashtable_rehash_chain(ht, old_hash);
+ for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
+ err = rhashtable_rehash_chain(ht, old_hash);
+ if (err)
+ return err;
+ }
/* Publish the new table pointer. */
rcu_assign_pointer(ht->tbl, new_tbl);
@@ -271,31 +392,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
}
-/**
- * rhashtable_expand - Expand hash table while allowing concurrent lookups
- * @ht: the hash table to expand
- *
- * A secondary bucket array is allocated and the hash entries are migrated.
- *
- * This function may only be called in a context where it is safe to call
- * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
- *
- * The caller must ensure that no concurrent resizing occurs by holding
- * ht->mutex.
- *
- * It is valid to have concurrent insertions and deletions protected by per
- * bucket locks or concurrent RCU protected lookups and traversals.
- */
-static int rhashtable_expand(struct rhashtable *ht)
+static int rhashtable_rehash_alloc(struct rhashtable *ht,
+ struct bucket_table *old_tbl,
+ unsigned int size)
{
- struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
+ struct bucket_table *new_tbl;
int err;
ASSERT_RHT_MUTEX(ht);
- old_tbl = rhashtable_last_table(ht, old_tbl);
-
- new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
+ new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
if (new_tbl == NULL)
return -ENOMEM;
@@ -324,12 +430,9 @@ static int rhashtable_expand(struct rhashtable *ht)
*/
static int rhashtable_shrink(struct rhashtable *ht)
{
- struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
+ struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
unsigned int nelems = atomic_read(&ht->nelems);
unsigned int size = 0;
- int err;
-
- ASSERT_RHT_MUTEX(ht);
if (nelems)
size = roundup_pow_of_two(nelems * 3 / 2);
@@ -342,15 +445,7 @@ static int rhashtable_shrink(struct rhashtable *ht)
if (rht_dereference(old_tbl->future_tbl, ht))
return -EEXIST;
- new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
- if (new_tbl == NULL)
- return -ENOMEM;
-
- err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
- if (err)
- bucket_table_free(new_tbl);
-
- return err;
+ return rhashtable_rehash_alloc(ht, old_tbl, size);
}
static void rht_deferred_worker(struct work_struct *work)
@@ -366,11 +461,14 @@ static void rht_deferred_worker(struct work_struct *work)
tbl = rhashtable_last_table(ht, tbl);
if (rht_grow_above_75(ht, tbl))
- rhashtable_expand(ht);
+ err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
- rhashtable_shrink(ht);
+ err = rhashtable_shrink(ht);
+ else if (tbl->nest)
+ err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
- err = rhashtable_rehash_table(ht);
+ if (!err)
+ err = rhashtable_rehash_table(ht);
mutex_unlock(&ht->mutex);
@@ -439,8 +537,8 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
int elasticity;
elasticity = ht->elasticity;
- pprev = &tbl->buckets[hash];
- rht_for_each(head, tbl, hash) {
+ pprev = rht_bucket_var(tbl, hash);
+ rht_for_each_continue(head, *pprev, tbl, hash) {
struct rhlist_head *list;
struct rhlist_head *plist;
@@ -477,6 +575,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
struct rhash_head *obj,
void *data)
{
+ struct rhash_head __rcu **pprev;
struct bucket_table *new_tbl;
struct rhash_head *head;
@@ -499,7 +598,11 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
if (unlikely(rht_grow_above_100(ht, tbl)))
return ERR_PTR(-EAGAIN);
- head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+ pprev = rht_bucket_insert(ht, tbl, hash);
+ if (!pprev)
+ return ERR_PTR(-ENOMEM);
+
+ head = rht_dereference_bucket(*pprev, tbl, hash);
RCU_INIT_POINTER(obj->next, head);
if (ht->rhlist) {
@@ -509,7 +612,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
RCU_INIT_POINTER(list->next, NULL);
}
- rcu_assign_pointer(tbl->buckets[hash], obj);
+ rcu_assign_pointer(*pprev, obj);
atomic_inc(&ht->nelems);
if (rht_grow_above_75(ht, tbl))
@@ -975,7 +1078,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
void (*free_fn)(void *ptr, void *arg),
void *arg)
{
- const struct bucket_table *tbl;
+ struct bucket_table *tbl;
unsigned int i;
cancel_work_sync(&ht->run_work);
@@ -986,7 +1089,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
for (i = 0; i < tbl->size; i++) {
struct rhash_head *pos, *next;
- for (pos = rht_dereference(tbl->buckets[i], ht),
+ for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
next = !rht_is_a_nulls(pos) ?
rht_dereference(pos->next, ht) : NULL;
!rht_is_a_nulls(pos);
@@ -1007,3 +1110,70 @@ void rhashtable_destroy(struct rhashtable *ht)
return rhashtable_free_and_destroy(ht, NULL, NULL);
}
EXPORT_SYMBOL_GPL(rhashtable_destroy);
+
+struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
+ unsigned int hash)
+{
+ const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+ static struct rhash_head __rcu *rhnull =
+ (struct rhash_head __rcu *)NULLS_MARKER(0);
+ unsigned int index = hash & ((1 << tbl->nest) - 1);
+ unsigned int size = tbl->size >> tbl->nest;
+ unsigned int subhash = hash;
+ union nested_table *ntbl;
+
+ ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
+ ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash);
+ subhash >>= tbl->nest;
+
+ while (ntbl && size > (1 << shift)) {
+ index = subhash & ((1 << shift) - 1);
+ ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash);
+ size >>= shift;
+ subhash >>= shift;
+ }
+
+ if (!ntbl)
+ return &rhnull;
+
+ return &ntbl[subhash].bucket;
+
+}
+EXPORT_SYMBOL_GPL(rht_bucket_nested);
+
+struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
+ struct bucket_table *tbl,
+ unsigned int hash)
+{
+ const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+ unsigned int index = hash & ((1 << tbl->nest) - 1);
+ unsigned int size = tbl->size >> tbl->nest;
+ union nested_table *ntbl;
+ unsigned int shifted;
+ unsigned int nhash;
+
+ ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
+ hash >>= tbl->nest;
+ nhash = index;
+ shifted = tbl->nest;
+ ntbl = nested_table_alloc(ht, &ntbl[index].table,
+ size <= (1 << shift) ? shifted : 0, nhash);
+
+ while (ntbl && size > (1 << shift)) {
+ index = hash & ((1 << shift) - 1);
+ size >>= shift;
+ hash >>= shift;
+ nhash |= index << shifted;
+ shifted += shift;
+ ntbl = nested_table_alloc(ht, &ntbl[index].table,
+ size <= (1 << shift) ? shifted : 0,
+ nhash);
+ }
+
+ if (!ntbl)
+ return NULL;
+
+ return &ntbl[hash].bucket;
+
+}
+EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 2cecf05c82fd..55e11c4b2f3b 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -17,6 +17,7 @@
#include <linux/random.h>
#include <linux/sbitmap.h>
+#include <linux/seq_file.h>
int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
gfp_t flags, int node)
@@ -180,6 +181,62 @@ unsigned int sbitmap_weight(const struct sbitmap *sb)
}
EXPORT_SYMBOL_GPL(sbitmap_weight);
+void sbitmap_show(struct sbitmap *sb, struct seq_file *m)
+{
+ seq_printf(m, "depth=%u\n", sb->depth);
+ seq_printf(m, "busy=%u\n", sbitmap_weight(sb));
+ seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift);
+ seq_printf(m, "map_nr=%u\n", sb->map_nr);
+}
+EXPORT_SYMBOL_GPL(sbitmap_show);
+
+static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte)
+{
+ if ((offset & 0xf) == 0) {
+ if (offset != 0)
+ seq_putc(m, '\n');
+ seq_printf(m, "%08x:", offset);
+ }
+ if ((offset & 0x1) == 0)
+ seq_putc(m, ' ');
+ seq_printf(m, "%02x", byte);
+}
+
+void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
+{
+ u8 byte = 0;
+ unsigned int byte_bits = 0;
+ unsigned int offset = 0;
+ int i;
+
+ for (i = 0; i < sb->map_nr; i++) {
+ unsigned long word = READ_ONCE(sb->map[i].word);
+ unsigned int word_bits = READ_ONCE(sb->map[i].depth);
+
+ while (word_bits > 0) {
+ unsigned int bits = min(8 - byte_bits, word_bits);
+
+ byte |= (word & (BIT(bits) - 1)) << byte_bits;
+ byte_bits += bits;
+ if (byte_bits == 8) {
+ emit_byte(m, offset, byte);
+ byte = 0;
+ byte_bits = 0;
+ offset++;
+ }
+ word >>= bits;
+ word_bits -= bits;
+ }
+ }
+ if (byte_bits) {
+ emit_byte(m, offset, byte);
+ offset++;
+ }
+ if (offset)
+ seq_putc(m, '\n');
+}
+EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
+
static unsigned int sbq_calc_wake_batch(unsigned int depth)
{
unsigned int wake_batch;
@@ -239,7 +296,19 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
{
- sbq->wake_batch = sbq_calc_wake_batch(depth);
+ unsigned int wake_batch = sbq_calc_wake_batch(depth);
+ int i;
+
+ if (sbq->wake_batch != wake_batch) {
+ WRITE_ONCE(sbq->wake_batch, wake_batch);
+ /*
+ * Pairs with the memory barrier in sbq_wake_up() to ensure that
+ * the batch size is updated before the wait counts.
+ */
+ smp_mb__before_atomic();
+ for (i = 0; i < SBQ_WAIT_QUEUES; i++)
+ atomic_set(&sbq->ws[i].wait_cnt, 1);
+ }
sbitmap_resize(&sbq->sb, depth);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
@@ -297,20 +366,39 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
static void sbq_wake_up(struct sbitmap_queue *sbq)
{
struct sbq_wait_state *ws;
+ unsigned int wake_batch;
int wait_cnt;
- /* Ensure that the wait list checks occur after clear_bit(). */
- smp_mb();
+ /*
+ * Pairs with the memory barrier in set_current_state() to ensure the
+ * proper ordering of clear_bit()/waitqueue_active() in the waker and
+ * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See
+ * the comment on waitqueue_active(). This is __after_atomic because we
+ * just did clear_bit() in the caller.
+ */
+ smp_mb__after_atomic();
ws = sbq_wake_ptr(sbq);
if (!ws)
return;
wait_cnt = atomic_dec_return(&ws->wait_cnt);
- if (unlikely(wait_cnt < 0))
- wait_cnt = atomic_inc_return(&ws->wait_cnt);
- if (wait_cnt == 0) {
- atomic_add(sbq->wake_batch, &ws->wait_cnt);
+ if (wait_cnt <= 0) {
+ wake_batch = READ_ONCE(sbq->wake_batch);
+ /*
+ * Pairs with the memory barrier in sbitmap_queue_resize() to
+ * ensure that we see the batch size update before the wait
+ * count is reset.
+ */
+ smp_mb__before_atomic();
+ /*
+ * If there are concurrent callers to sbq_wake_up(), the last
+ * one to decrement the wait count below zero will bump it back
+ * up. If there is a concurrent resize, the count reset will
+ * either cause the cmpxchg to fail or overwrite after the
+ * cmpxchg.
+ */
+ atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch);
sbq_index_atomic_inc(&sbq->wake_index);
wake_up(&ws->wait);
}
@@ -331,7 +419,8 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
int i, wake_index;
/*
- * Make sure all changes prior to this are visible from other CPUs.
+ * Pairs with the memory barrier in set_current_state() like in
+ * sbq_wake_up().
*/
smp_mb();
wake_index = atomic_read(&sbq->wake_index);
@@ -345,3 +434,37 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
}
}
EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all);
+
+void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
+{
+ bool first;
+ int i;
+
+ sbitmap_show(&sbq->sb, m);
+
+ seq_puts(m, "alloc_hint={");
+ first = true;
+ for_each_possible_cpu(i) {
+ if (!first)
+ seq_puts(m, ", ");
+ first = false;
+ seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i));
+ }
+ seq_puts(m, "}\n");
+
+ seq_printf(m, "wake_batch=%u\n", sbq->wake_batch);
+ seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index));
+
+ seq_puts(m, "ws={\n");
+ for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
+ struct sbq_wait_state *ws = &sbq->ws[i];
+
+ seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n",
+ atomic_read(&ws->wait_cnt),
+ waitqueue_active(&ws->wait) ? "active" : "inactive");
+ }
+ seq_puts(m, "}\n");
+
+ seq_printf(m, "round_robin=%d\n", sbq->round_robin);
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_show);
diff --git a/lib/siphash.c b/lib/siphash.c
new file mode 100644
index 000000000000..3ae58b4edad6
--- /dev/null
+++ b/lib/siphash.c
@@ -0,0 +1,551 @@
+/* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.
+ *
+ * SipHash: a fast short-input PRF
+ * https://131002.net/siphash/
+ *
+ * This implementation is specifically for SipHash2-4 for a secure PRF
+ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
+ * hashtables.
+ */
+
+#include <linux/siphash.h>
+#include <asm/unaligned.h>
+
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+#include <linux/dcache.h>
+#include <asm/word-at-a-time.h>
+#endif
+
+#define SIPROUND \
+ do { \
+ v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \
+ v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \
+ v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \
+ v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \
+ } while (0)
+
+#define PREAMBLE(len) \
+ u64 v0 = 0x736f6d6570736575ULL; \
+ u64 v1 = 0x646f72616e646f6dULL; \
+ u64 v2 = 0x6c7967656e657261ULL; \
+ u64 v3 = 0x7465646279746573ULL; \
+ u64 b = ((u64)(len)) << 56; \
+ v3 ^= key->key[1]; \
+ v2 ^= key->key[0]; \
+ v1 ^= key->key[1]; \
+ v0 ^= key->key[0];
+
+#define POSTAMBLE \
+ v3 ^= b; \
+ SIPROUND; \
+ SIPROUND; \
+ v0 ^= b; \
+ v2 ^= 0xff; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ return (v0 ^ v1) ^ (v2 ^ v3);
+
+u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u64));
+ const u8 left = len & (sizeof(u64) - 1);
+ u64 m;
+ PREAMBLE(len)
+ for (; data != end; data += sizeof(u64)) {
+ m = le64_to_cpup(data);
+ v3 ^= m;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= m;
+ }
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+ if (left)
+ b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+ bytemask_from_count(left)));
+#else
+ switch (left) {
+ case 7: b |= ((u64)end[6]) << 48;
+ case 6: b |= ((u64)end[5]) << 40;
+ case 5: b |= ((u64)end[4]) << 32;
+ case 4: b |= le32_to_cpup(data); break;
+ case 3: b |= ((u64)end[2]) << 16;
+ case 2: b |= le16_to_cpup(data); break;
+ case 1: b |= end[0];
+ }
+#endif
+ POSTAMBLE
+}
+EXPORT_SYMBOL(__siphash_aligned);
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u64));
+ const u8 left = len & (sizeof(u64) - 1);
+ u64 m;
+ PREAMBLE(len)
+ for (; data != end; data += sizeof(u64)) {
+ m = get_unaligned_le64(data);
+ v3 ^= m;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= m;
+ }
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+ if (left)
+ b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+ bytemask_from_count(left)));
+#else
+ switch (left) {
+ case 7: b |= ((u64)end[6]) << 48;
+ case 6: b |= ((u64)end[5]) << 40;
+ case 5: b |= ((u64)end[4]) << 32;
+ case 4: b |= get_unaligned_le32(end); break;
+ case 3: b |= ((u64)end[2]) << 16;
+ case 2: b |= get_unaligned_le16(end); break;
+ case 1: b |= end[0];
+ }
+#endif
+ POSTAMBLE
+}
+EXPORT_SYMBOL(__siphash_unaligned);
+#endif
+
+/**
+ * siphash_1u64 - compute 64-bit siphash PRF value of a u64
+ * @first: first u64
+ * @key: the siphash key
+ */
+u64 siphash_1u64(const u64 first, const siphash_key_t *key)
+{
+ PREAMBLE(8)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_1u64);
+
+/**
+ * siphash_2u64 - compute 64-bit siphash PRF value of 2 u64
+ * @first: first u64
+ * @second: second u64
+ * @key: the siphash key
+ */
+u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key)
+{
+ PREAMBLE(16)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= second;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_2u64);
+
+/**
+ * siphash_3u64 - compute 64-bit siphash PRF value of 3 u64
+ * @first: first u64
+ * @second: second u64
+ * @third: third u64
+ * @key: the siphash key
+ */
+u64 siphash_3u64(const u64 first, const u64 second, const u64 third,
+ const siphash_key_t *key)
+{
+ PREAMBLE(24)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= second;
+ v3 ^= third;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= third;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_3u64);
+
+/**
+ * siphash_4u64 - compute 64-bit siphash PRF value of 4 u64
+ * @first: first u64
+ * @second: second u64
+ * @third: third u64
+ * @forth: forth u64
+ * @key: the siphash key
+ */
+u64 siphash_4u64(const u64 first, const u64 second, const u64 third,
+ const u64 forth, const siphash_key_t *key)
+{
+ PREAMBLE(32)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= second;
+ v3 ^= third;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= third;
+ v3 ^= forth;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= forth;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_4u64);
+
+u64 siphash_1u32(const u32 first, const siphash_key_t *key)
+{
+ PREAMBLE(4)
+ b |= first;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_1u32);
+
+u64 siphash_3u32(const u32 first, const u32 second, const u32 third,
+ const siphash_key_t *key)
+{
+ u64 combined = (u64)second << 32 | first;
+ PREAMBLE(12)
+ v3 ^= combined;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= combined;
+ b |= third;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_3u32);
+
+#if BITS_PER_LONG == 64
+/* Note that on 64-bit, we make HalfSipHash1-3 actually be SipHash1-3, for
+ * performance reasons. On 32-bit, below, we actually implement HalfSipHash1-3.
+ */
+
+#define HSIPROUND SIPROUND
+#define HPREAMBLE(len) PREAMBLE(len)
+#define HPOSTAMBLE \
+ v3 ^= b; \
+ HSIPROUND; \
+ v0 ^= b; \
+ v2 ^= 0xff; \
+ HSIPROUND; \
+ HSIPROUND; \
+ HSIPROUND; \
+ return (v0 ^ v1) ^ (v2 ^ v3);
+
+u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u64));
+ const u8 left = len & (sizeof(u64) - 1);
+ u64 m;
+ HPREAMBLE(len)
+ for (; data != end; data += sizeof(u64)) {
+ m = le64_to_cpup(data);
+ v3 ^= m;
+ HSIPROUND;
+ v0 ^= m;
+ }
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+ if (left)
+ b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+ bytemask_from_count(left)));
+#else
+ switch (left) {
+ case 7: b |= ((u64)end[6]) << 48;
+ case 6: b |= ((u64)end[5]) << 40;
+ case 5: b |= ((u64)end[4]) << 32;
+ case 4: b |= le32_to_cpup(data); break;
+ case 3: b |= ((u64)end[2]) << 16;
+ case 2: b |= le16_to_cpup(data); break;
+ case 1: b |= end[0];
+ }
+#endif
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_aligned);
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u32 __hsiphash_unaligned(const void *data, size_t len,
+ const hsiphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u64));
+ const u8 left = len & (sizeof(u64) - 1);
+ u64 m;
+ HPREAMBLE(len)
+ for (; data != end; data += sizeof(u64)) {
+ m = get_unaligned_le64(data);
+ v3 ^= m;
+ HSIPROUND;
+ v0 ^= m;
+ }
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+ if (left)
+ b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+ bytemask_from_count(left)));
+#else
+ switch (left) {
+ case 7: b |= ((u64)end[6]) << 48;
+ case 6: b |= ((u64)end[5]) << 40;
+ case 5: b |= ((u64)end[4]) << 32;
+ case 4: b |= get_unaligned_le32(end); break;
+ case 3: b |= ((u64)end[2]) << 16;
+ case 2: b |= get_unaligned_le16(end); break;
+ case 1: b |= end[0];
+ }
+#endif
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_unaligned);
+#endif
+
+/**
+ * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32
+ * @first: first u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
+{
+ HPREAMBLE(4)
+ b |= first;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_1u32);
+
+/**
+ * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
+ * @first: first u32
+ * @second: second u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
+{
+ u64 combined = (u64)second << 32 | first;
+ HPREAMBLE(8)
+ v3 ^= combined;
+ HSIPROUND;
+ v0 ^= combined;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_2u32);
+
+/**
+ * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
+ const hsiphash_key_t *key)
+{
+ u64 combined = (u64)second << 32 | first;
+ HPREAMBLE(12)
+ v3 ^= combined;
+ HSIPROUND;
+ v0 ^= combined;
+ b |= third;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_3u32);
+
+/**
+ * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @forth: forth u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
+ const u32 forth, const hsiphash_key_t *key)
+{
+ u64 combined = (u64)second << 32 | first;
+ HPREAMBLE(16)
+ v3 ^= combined;
+ HSIPROUND;
+ v0 ^= combined;
+ combined = (u64)forth << 32 | third;
+ v3 ^= combined;
+ HSIPROUND;
+ v0 ^= combined;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_4u32);
+#else
+#define HSIPROUND \
+ do { \
+ v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \
+ v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \
+ v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \
+ v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \
+ } while (0)
+
+#define HPREAMBLE(len) \
+ u32 v0 = 0; \
+ u32 v1 = 0; \
+ u32 v2 = 0x6c796765U; \
+ u32 v3 = 0x74656462U; \
+ u32 b = ((u32)(len)) << 24; \
+ v3 ^= key->key[1]; \
+ v2 ^= key->key[0]; \
+ v1 ^= key->key[1]; \
+ v0 ^= key->key[0];
+
+#define HPOSTAMBLE \
+ v3 ^= b; \
+ HSIPROUND; \
+ v0 ^= b; \
+ v2 ^= 0xff; \
+ HSIPROUND; \
+ HSIPROUND; \
+ HSIPROUND; \
+ return v1 ^ v3;
+
+u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u32));
+ const u8 left = len & (sizeof(u32) - 1);
+ u32 m;
+ HPREAMBLE(len)
+ for (; data != end; data += sizeof(u32)) {
+ m = le32_to_cpup(data);
+ v3 ^= m;
+ HSIPROUND;
+ v0 ^= m;
+ }
+ switch (left) {
+ case 3: b |= ((u32)end[2]) << 16;
+ case 2: b |= le16_to_cpup(data); break;
+ case 1: b |= end[0];
+ }
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_aligned);
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u32 __hsiphash_unaligned(const void *data, size_t len,
+ const hsiphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u32));
+ const u8 left = len & (sizeof(u32) - 1);
+ u32 m;
+ HPREAMBLE(len)
+ for (; data != end; data += sizeof(u32)) {
+ m = get_unaligned_le32(data);
+ v3 ^= m;
+ HSIPROUND;
+ v0 ^= m;
+ }
+ switch (left) {
+ case 3: b |= ((u32)end[2]) << 16;
+ case 2: b |= get_unaligned_le16(end); break;
+ case 1: b |= end[0];
+ }
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_unaligned);
+#endif
+
+/**
+ * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32
+ * @first: first u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
+{
+ HPREAMBLE(4)
+ v3 ^= first;
+ HSIPROUND;
+ v0 ^= first;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_1u32);
+
+/**
+ * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
+ * @first: first u32
+ * @second: second u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
+{
+ HPREAMBLE(8)
+ v3 ^= first;
+ HSIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ HSIPROUND;
+ v0 ^= second;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_2u32);
+
+/**
+ * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
+ const hsiphash_key_t *key)
+{
+ HPREAMBLE(12)
+ v3 ^= first;
+ HSIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ HSIPROUND;
+ v0 ^= second;
+ v3 ^= third;
+ HSIPROUND;
+ v0 ^= third;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_3u32);
+
+/**
+ * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @forth: forth u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
+ const u32 forth, const hsiphash_key_t *key)
+{
+ HPREAMBLE(16)
+ v3 ^= first;
+ HSIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ HSIPROUND;
+ v0 ^= second;
+ v3 ^= third;
+ HSIPROUND;
+ v0 ^= third;
+ v3 ^= forth;
+ HSIPROUND;
+ v0 ^= forth;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_4u32);
+#endif
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index cb1b54ee8527..a8d74a733a38 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -53,7 +53,7 @@
*/
#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-int swiotlb_force;
+enum swiotlb_force swiotlb_force;
/*
* Used to do a quick range check in swiotlb_tbl_unmap_single and
@@ -83,6 +83,12 @@ static unsigned int *io_tlb_list;
static unsigned int io_tlb_index;
/*
+ * Max segment that we can provide which (if pages are contingous) will
+ * not be bounced (unless SWIOTLB_FORCE is set).
+ */
+unsigned int max_segment;
+
+/*
* We need to save away the original address corresponding to a mapped entry
* for the sync operations.
*/
@@ -106,8 +112,12 @@ setup_io_tlb_npages(char *str)
}
if (*str == ',')
++str;
- if (!strcmp(str, "force"))
- swiotlb_force = 1;
+ if (!strcmp(str, "force")) {
+ swiotlb_force = SWIOTLB_FORCE;
+ } else if (!strcmp(str, "noforce")) {
+ swiotlb_force = SWIOTLB_NO_FORCE;
+ io_tlb_nslabs = 1;
+ }
return 0;
}
@@ -120,6 +130,20 @@ unsigned long swiotlb_nr_tbl(void)
}
EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
+unsigned int swiotlb_max_segment(void)
+{
+ return max_segment;
+}
+EXPORT_SYMBOL_GPL(swiotlb_max_segment);
+
+void swiotlb_set_max_segment(unsigned int val)
+{
+ if (swiotlb_force == SWIOTLB_FORCE)
+ max_segment = 1;
+ else
+ max_segment = rounddown(val, PAGE_SIZE);
+}
+
/* default to 64MB */
#define IO_TLB_DEFAULT_SIZE (64UL<<20)
unsigned long swiotlb_size_or_default(void)
@@ -201,6 +225,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
if (verbose)
swiotlb_print_info();
+ swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
return 0;
}
@@ -279,6 +304,7 @@ swiotlb_late_init_with_default_size(size_t default_size)
rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
if (rc)
free_pages((unsigned long)vstart, order);
+
return rc;
}
@@ -333,6 +359,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
late_alloc = 1;
+ swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
+
return 0;
cleanup4:
@@ -347,6 +375,7 @@ cleanup2:
io_tlb_end = 0;
io_tlb_start = 0;
io_tlb_nslabs = 0;
+ max_segment = 0;
return -ENOMEM;
}
@@ -375,6 +404,7 @@ void __init swiotlb_free(void)
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
}
io_tlb_nslabs = 0;
+ max_segment = 0;
}
int is_swiotlb_buffer(phys_addr_t paddr)
@@ -453,11 +483,11 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
: 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
/*
- * For mappings greater than a page, we limit the stride (and
- * hence alignment) to a page size.
+ * For mappings greater than or equal to a page, we limit the stride
+ * (and hence alignment) to a page size.
*/
nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- if (size > PAGE_SIZE)
+ if (size >= PAGE_SIZE)
stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
else
stride = 1;
@@ -543,8 +573,15 @@ static phys_addr_t
map_single(struct device *hwdev, phys_addr_t phys, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
- dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
+ dma_addr_t start_dma_addr;
+ if (swiotlb_force == SWIOTLB_NO_FORCE) {
+ dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n",
+ &phys);
+ return SWIOTLB_MAP_ERROR;
+ }
+
+ start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
dir, attrs);
}
@@ -721,6 +758,9 @@ static void
swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
int do_panic)
{
+ if (swiotlb_force == SWIOTLB_NO_FORCE)
+ return;
+
/*
* Ran out of IOMMU space for this operation. This is very bad.
* Unfortunately the drivers cannot handle this operation properly.
@@ -763,7 +803,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
* we can safely return the device addr and not worry about bounce
* buffering it.
*/
- if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
+ if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE)
return dev_addr;
trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
@@ -904,7 +944,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
phys_addr_t paddr = sg_phys(sg);
dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
- if (swiotlb_force ||
+ if (swiotlb_force == SWIOTLB_FORCE ||
!dma_capable(hwdev, dev_addr, sg->length)) {
phys_addr_t map = map_single(hwdev, sg_phys(sg),
sg->length, dir, attrs);
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index a3e8ec3fb1c5..09371b0a9baf 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -42,12 +42,6 @@ static const struct file_operations test_fw_fops = {
.read = test_fw_misc_read,
};
-static struct miscdevice test_fw_misc_device = {
- .minor = MISC_DYNAMIC_MINOR,
- .name = "test_firmware",
- .fops = &test_fw_fops,
-};
-
static ssize_t trigger_request_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
@@ -132,39 +126,81 @@ out:
}
static DEVICE_ATTR_WO(trigger_async_request);
-static int __init test_firmware_init(void)
+static ssize_t trigger_custom_fallback_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
int rc;
+ char *name;
- rc = misc_register(&test_fw_misc_device);
+ name = kstrndup(buf, count, GFP_KERNEL);
+ if (!name)
+ return -ENOSPC;
+
+ pr_info("loading '%s' using custom fallback mechanism\n", name);
+
+ mutex_lock(&test_fw_mutex);
+ release_firmware(test_firmware);
+ test_firmware = NULL;
+ rc = request_firmware_nowait(THIS_MODULE, FW_ACTION_NOHOTPLUG, name,
+ dev, GFP_KERNEL, NULL,
+ trigger_async_request_cb);
if (rc) {
- pr_err("could not register misc device: %d\n", rc);
- return rc;
+ pr_info("async load of '%s' failed: %d\n", name, rc);
+ kfree(name);
+ goto out;
}
- rc = device_create_file(test_fw_misc_device.this_device,
- &dev_attr_trigger_request);
- if (rc) {
- pr_err("could not create sysfs interface: %d\n", rc);
- goto dereg;
+ /* Free 'name' ASAP, to test for race conditions */
+ kfree(name);
+
+ wait_for_completion(&async_fw_done);
+
+ if (test_firmware) {
+ pr_info("loaded: %zu\n", test_firmware->size);
+ rc = count;
+ } else {
+ pr_err("failed to async load firmware\n");
+ rc = -ENODEV;
}
- rc = device_create_file(test_fw_misc_device.this_device,
- &dev_attr_trigger_async_request);
+out:
+ mutex_unlock(&test_fw_mutex);
+
+ return rc;
+}
+static DEVICE_ATTR_WO(trigger_custom_fallback);
+
+#define TEST_FW_DEV_ATTR(name) &dev_attr_##name.attr
+
+static struct attribute *test_dev_attrs[] = {
+ TEST_FW_DEV_ATTR(trigger_request),
+ TEST_FW_DEV_ATTR(trigger_async_request),
+ TEST_FW_DEV_ATTR(trigger_custom_fallback),
+ NULL,
+};
+
+ATTRIBUTE_GROUPS(test_dev);
+
+static struct miscdevice test_fw_misc_device = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "test_firmware",
+ .fops = &test_fw_fops,
+ .groups = test_dev_groups,
+};
+
+static int __init test_firmware_init(void)
+{
+ int rc;
+
+ rc = misc_register(&test_fw_misc_device);
if (rc) {
- pr_err("could not create async sysfs interface: %d\n", rc);
- goto remove_file;
+ pr_err("could not register misc device: %d\n", rc);
+ return rc;
}
pr_warn("interface ready\n");
return 0;
-
-remove_file:
- device_remove_file(test_fw_misc_device.this_device,
- &dev_attr_trigger_async_request);
-dereg:
- misc_deregister(&test_fw_misc_device);
- return rc;
}
module_init(test_firmware_init);
@@ -172,10 +208,6 @@ module_init(test_firmware_init);
static void __exit test_firmware_exit(void)
{
release_firmware(test_firmware);
- device_remove_file(test_fw_misc_device.this_device,
- &dev_attr_trigger_async_request);
- device_remove_file(test_fw_misc_device.this_device,
- &dev_attr_trigger_request);
misc_deregister(&test_fw_misc_device);
pr_warn("removed interface\n");
}
diff --git a/lib/test_parman.c b/lib/test_parman.c
new file mode 100644
index 000000000000..fe9f3a785804
--- /dev/null
+++ b/lib/test_parman.c
@@ -0,0 +1,395 @@
+/*
+ * lib/test_parman.c - Test module for parman
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/random.h>
+#include <linux/parman.h>
+
+#define TEST_PARMAN_PRIO_SHIFT 7 /* defines number of prios for testing */
+#define TEST_PARMAN_PRIO_COUNT BIT(TEST_PARMAN_PRIO_SHIFT)
+#define TEST_PARMAN_PRIO_MASK (TEST_PARMAN_PRIO_COUNT - 1)
+
+#define TEST_PARMAN_ITEM_SHIFT 13 /* defines a total number
+ * of items for testing
+ */
+#define TEST_PARMAN_ITEM_COUNT BIT(TEST_PARMAN_ITEM_SHIFT)
+#define TEST_PARMAN_ITEM_MASK (TEST_PARMAN_ITEM_COUNT - 1)
+
+#define TEST_PARMAN_BASE_SHIFT 8
+#define TEST_PARMAN_BASE_COUNT BIT(TEST_PARMAN_BASE_SHIFT)
+#define TEST_PARMAN_RESIZE_STEP_SHIFT 7
+#define TEST_PARMAN_RESIZE_STEP_COUNT BIT(TEST_PARMAN_RESIZE_STEP_SHIFT)
+
+#define TEST_PARMAN_BULK_MAX_SHIFT (2 + TEST_PARMAN_RESIZE_STEP_SHIFT)
+#define TEST_PARMAN_BULK_MAX_COUNT BIT(TEST_PARMAN_BULK_MAX_SHIFT)
+#define TEST_PARMAN_BULK_MAX_MASK (TEST_PARMAN_BULK_MAX_COUNT - 1)
+
+#define TEST_PARMAN_RUN_BUDGET (TEST_PARMAN_ITEM_COUNT * 256)
+
+struct test_parman_prio {
+ struct parman_prio parman_prio;
+ unsigned long priority;
+};
+
+struct test_parman_item {
+ struct parman_item parman_item;
+ struct test_parman_prio *prio;
+ bool used;
+};
+
+struct test_parman {
+ struct parman *parman;
+ struct test_parman_item **prio_array;
+ unsigned long prio_array_limit;
+ struct test_parman_prio prios[TEST_PARMAN_PRIO_COUNT];
+ struct test_parman_item items[TEST_PARMAN_ITEM_COUNT];
+ struct rnd_state rnd;
+ unsigned long run_budget;
+ unsigned long bulk_budget;
+ bool bulk_noop;
+ unsigned int used_items;
+};
+
+#define ITEM_PTRS_SIZE(count) (sizeof(struct test_parman_item *) * (count))
+
+static int test_parman_resize(void *priv, unsigned long new_count)
+{
+ struct test_parman *test_parman = priv;
+ struct test_parman_item **prio_array;
+ unsigned long old_count;
+
+ prio_array = krealloc(test_parman->prio_array,
+ ITEM_PTRS_SIZE(new_count), GFP_KERNEL);
+ if (new_count == 0)
+ return 0;
+ if (!prio_array)
+ return -ENOMEM;
+ old_count = test_parman->prio_array_limit;
+ if (new_count > old_count)
+ memset(&prio_array[old_count], 0,
+ ITEM_PTRS_SIZE(new_count - old_count));
+ test_parman->prio_array = prio_array;
+ test_parman->prio_array_limit = new_count;
+ return 0;
+}
+
+static void test_parman_move(void *priv, unsigned long from_index,
+ unsigned long to_index, unsigned long count)
+{
+ struct test_parman *test_parman = priv;
+ struct test_parman_item **prio_array = test_parman->prio_array;
+
+ memmove(&prio_array[to_index], &prio_array[from_index],
+ ITEM_PTRS_SIZE(count));
+ memset(&prio_array[from_index], 0, ITEM_PTRS_SIZE(count));
+}
+
+static const struct parman_ops test_parman_lsort_ops = {
+ .base_count = TEST_PARMAN_BASE_COUNT,
+ .resize_step = TEST_PARMAN_RESIZE_STEP_COUNT,
+ .resize = test_parman_resize,
+ .move = test_parman_move,
+ .algo = PARMAN_ALGO_TYPE_LSORT,
+};
+
+static void test_parman_rnd_init(struct test_parman *test_parman)
+{
+ prandom_seed_state(&test_parman->rnd, 3141592653589793238ULL);
+}
+
+static u32 test_parman_rnd_get(struct test_parman *test_parman)
+{
+ return prandom_u32_state(&test_parman->rnd);
+}
+
+static unsigned long test_parman_priority_gen(struct test_parman *test_parman)
+{
+ unsigned long priority;
+ int i;
+
+again:
+ priority = test_parman_rnd_get(test_parman);
+ if (priority == 0)
+ goto again;
+
+ for (i = 0; i < TEST_PARMAN_PRIO_COUNT; i++) {
+ struct test_parman_prio *prio = &test_parman->prios[i];
+
+ if (prio->priority == 0)
+ break;
+ if (prio->priority == priority)
+ goto again;
+ }
+ return priority;
+}
+
+static void test_parman_prios_init(struct test_parman *test_parman)
+{
+ int i;
+
+ for (i = 0; i < TEST_PARMAN_PRIO_COUNT; i++) {
+ struct test_parman_prio *prio = &test_parman->prios[i];
+
+ /* Assign random uniqueue priority to each prio structure */
+ prio->priority = test_parman_priority_gen(test_parman);
+ parman_prio_init(test_parman->parman, &prio->parman_prio,
+ prio->priority);
+ }
+}
+
+static void test_parman_prios_fini(struct test_parman *test_parman)
+{
+ int i;
+
+ for (i = 0; i < TEST_PARMAN_PRIO_COUNT; i++) {
+ struct test_parman_prio *prio = &test_parman->prios[i];
+
+ parman_prio_fini(&prio->parman_prio);
+ }
+}
+
+static void test_parman_items_init(struct test_parman *test_parman)
+{
+ int i;
+
+ for (i = 0; i < TEST_PARMAN_ITEM_COUNT; i++) {
+ struct test_parman_item *item = &test_parman->items[i];
+ unsigned int prio_index = test_parman_rnd_get(test_parman) &
+ TEST_PARMAN_PRIO_MASK;
+
+ /* Assign random prio to each item structure */
+ item->prio = &test_parman->prios[prio_index];
+ }
+}
+
+static void test_parman_items_fini(struct test_parman *test_parman)
+{
+ int i;
+
+ for (i = 0; i < TEST_PARMAN_ITEM_COUNT; i++) {
+ struct test_parman_item *item = &test_parman->items[i];
+
+ if (!item->used)
+ continue;
+ parman_item_remove(test_parman->parman,
+ &item->prio->parman_prio,
+ &item->parman_item);
+ }
+}
+
+static struct test_parman *test_parman_create(const struct parman_ops *ops)
+{
+ struct test_parman *test_parman;
+ int err;
+
+ test_parman = kzalloc(sizeof(*test_parman), GFP_KERNEL);
+ if (!test_parman)
+ return ERR_PTR(-ENOMEM);
+ err = test_parman_resize(test_parman, TEST_PARMAN_BASE_COUNT);
+ if (err)
+ goto err_resize;
+ test_parman->parman = parman_create(ops, test_parman);
+ if (!test_parman->parman) {
+ err = -ENOMEM;
+ goto err_parman_create;
+ }
+ test_parman_rnd_init(test_parman);
+ test_parman_prios_init(test_parman);
+ test_parman_items_init(test_parman);
+ test_parman->run_budget = TEST_PARMAN_RUN_BUDGET;
+ return test_parman;
+
+err_parman_create:
+ test_parman_resize(test_parman, 0);
+err_resize:
+ kfree(test_parman);
+ return ERR_PTR(err);
+}
+
+static void test_parman_destroy(struct test_parman *test_parman)
+{
+ test_parman_items_fini(test_parman);
+ test_parman_prios_fini(test_parman);
+ parman_destroy(test_parman->parman);
+ test_parman_resize(test_parman, 0);
+ kfree(test_parman);
+}
+
+static bool test_parman_run_check_budgets(struct test_parman *test_parman)
+{
+ if (test_parman->run_budget-- == 0)
+ return false;
+ if (test_parman->bulk_budget-- != 0)
+ return true;
+
+ test_parman->bulk_budget = test_parman_rnd_get(test_parman) &
+ TEST_PARMAN_BULK_MAX_MASK;
+ test_parman->bulk_noop = test_parman_rnd_get(test_parman) & 1;
+ return true;
+}
+
+static int test_parman_run(struct test_parman *test_parman)
+{
+ unsigned int i = test_parman_rnd_get(test_parman);
+ int err;
+
+ while (test_parman_run_check_budgets(test_parman)) {
+ unsigned int item_index = i++ & TEST_PARMAN_ITEM_MASK;
+ struct test_parman_item *item = &test_parman->items[item_index];
+
+ if (test_parman->bulk_noop)
+ continue;
+
+ if (!item->used) {
+ err = parman_item_add(test_parman->parman,
+ &item->prio->parman_prio,
+ &item->parman_item);
+ if (err)
+ return err;
+ test_parman->prio_array[item->parman_item.index] = item;
+ test_parman->used_items++;
+ } else {
+ test_parman->prio_array[item->parman_item.index] = NULL;
+ parman_item_remove(test_parman->parman,
+ &item->prio->parman_prio,
+ &item->parman_item);
+ test_parman->used_items--;
+ }
+ item->used = !item->used;
+ }
+ return 0;
+}
+
+static int test_parman_check_array(struct test_parman *test_parman,
+ bool gaps_allowed)
+{
+ unsigned int last_unused_items = 0;
+ unsigned long last_priority = 0;
+ unsigned int used_items = 0;
+ int i;
+
+ if (test_parman->prio_array_limit < TEST_PARMAN_BASE_COUNT) {
+ pr_err("Array limit is lower than the base count (%lu < %lu)\n",
+ test_parman->prio_array_limit, TEST_PARMAN_BASE_COUNT);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < test_parman->prio_array_limit; i++) {
+ struct test_parman_item *item = test_parman->prio_array[i];
+
+ if (!item) {
+ last_unused_items++;
+ continue;
+ }
+ if (last_unused_items && !gaps_allowed) {
+ pr_err("Gap found in array even though they are forbidden\n");
+ return -EINVAL;
+ }
+
+ last_unused_items = 0;
+ used_items++;
+
+ if (item->prio->priority < last_priority) {
+ pr_err("Item belongs under higher priority then the last one (current: %lu, previous: %lu)\n",
+ item->prio->priority, last_priority);
+ return -EINVAL;
+ }
+ last_priority = item->prio->priority;
+
+ if (item->parman_item.index != i) {
+ pr_err("Item has different index in compare to where it actualy is (%lu != %d)\n",
+ item->parman_item.index, i);
+ return -EINVAL;
+ }
+ }
+
+ if (used_items != test_parman->used_items) {
+ pr_err("Number of used items in array does not match (%u != %u)\n",
+ used_items, test_parman->used_items);
+ return -EINVAL;
+ }
+
+ if (last_unused_items >= TEST_PARMAN_RESIZE_STEP_COUNT) {
+ pr_err("Number of unused item at the end of array is bigger than resize step (%u >= %lu)\n",
+ last_unused_items, TEST_PARMAN_RESIZE_STEP_COUNT);
+ return -EINVAL;
+ }
+
+ pr_info("Priority array check successful\n");
+
+ return 0;
+}
+
+static int test_parman_lsort(void)
+{
+ struct test_parman *test_parman;
+ int err;
+
+ test_parman = test_parman_create(&test_parman_lsort_ops);
+ if (IS_ERR(test_parman))
+ return PTR_ERR(test_parman);
+
+ err = test_parman_run(test_parman);
+ if (err)
+ goto out;
+
+ err = test_parman_check_array(test_parman, false);
+ if (err)
+ goto out;
+out:
+ test_parman_destroy(test_parman);
+ return err;
+}
+
+static int __init test_parman_init(void)
+{
+ return test_parman_lsort();
+}
+
+static void __exit test_parman_exit(void)
+{
+}
+
+module_init(test_parman_init);
+module_exit(test_parman_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
+MODULE_DESCRIPTION("Test module for parman");
diff --git a/lib/test_siphash.c b/lib/test_siphash.c
new file mode 100644
index 000000000000..a6d854d933bf
--- /dev/null
+++ b/lib/test_siphash.c
@@ -0,0 +1,223 @@
+/* Test cases for siphash.c
+ *
+ * Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.
+ *
+ * SipHash: a fast short-input PRF
+ * https://131002.net/siphash/
+ *
+ * This implementation is specifically for SipHash2-4 for a secure PRF
+ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
+ * hashtables.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/siphash.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+
+/* Test vectors taken from reference source available at:
+ * https://github.com/veorq/SipHash
+ */
+
+static const siphash_key_t test_key_siphash =
+ {{ 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }};
+
+static const u64 test_vectors_siphash[64] = {
+ 0x726fdb47dd0e0e31ULL, 0x74f839c593dc67fdULL, 0x0d6c8009d9a94f5aULL,
+ 0x85676696d7fb7e2dULL, 0xcf2794e0277187b7ULL, 0x18765564cd99a68dULL,
+ 0xcbc9466e58fee3ceULL, 0xab0200f58b01d137ULL, 0x93f5f5799a932462ULL,
+ 0x9e0082df0ba9e4b0ULL, 0x7a5dbbc594ddb9f3ULL, 0xf4b32f46226bada7ULL,
+ 0x751e8fbc860ee5fbULL, 0x14ea5627c0843d90ULL, 0xf723ca908e7af2eeULL,
+ 0xa129ca6149be45e5ULL, 0x3f2acc7f57c29bdbULL, 0x699ae9f52cbe4794ULL,
+ 0x4bc1b3f0968dd39cULL, 0xbb6dc91da77961bdULL, 0xbed65cf21aa2ee98ULL,
+ 0xd0f2cbb02e3b67c7ULL, 0x93536795e3a33e88ULL, 0xa80c038ccd5ccec8ULL,
+ 0xb8ad50c6f649af94ULL, 0xbce192de8a85b8eaULL, 0x17d835b85bbb15f3ULL,
+ 0x2f2e6163076bcfadULL, 0xde4daaaca71dc9a5ULL, 0xa6a2506687956571ULL,
+ 0xad87a3535c49ef28ULL, 0x32d892fad841c342ULL, 0x7127512f72f27cceULL,
+ 0xa7f32346f95978e3ULL, 0x12e0b01abb051238ULL, 0x15e034d40fa197aeULL,
+ 0x314dffbe0815a3b4ULL, 0x027990f029623981ULL, 0xcadcd4e59ef40c4dULL,
+ 0x9abfd8766a33735cULL, 0x0e3ea96b5304a7d0ULL, 0xad0c42d6fc585992ULL,
+ 0x187306c89bc215a9ULL, 0xd4a60abcf3792b95ULL, 0xf935451de4f21df2ULL,
+ 0xa9538f0419755787ULL, 0xdb9acddff56ca510ULL, 0xd06c98cd5c0975ebULL,
+ 0xe612a3cb9ecba951ULL, 0xc766e62cfcadaf96ULL, 0xee64435a9752fe72ULL,
+ 0xa192d576b245165aULL, 0x0a8787bf8ecb74b2ULL, 0x81b3e73d20b49b6fULL,
+ 0x7fa8220ba3b2eceaULL, 0x245731c13ca42499ULL, 0xb78dbfaf3a8d83bdULL,
+ 0xea1ad565322a1a0bULL, 0x60e61c23a3795013ULL, 0x6606d7e446282b93ULL,
+ 0x6ca4ecb15c5f91e1ULL, 0x9f626da15c9625f3ULL, 0xe51b38608ef25f57ULL,
+ 0x958a324ceb064572ULL
+};
+
+#if BITS_PER_LONG == 64
+static const hsiphash_key_t test_key_hsiphash =
+ {{ 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }};
+
+static const u32 test_vectors_hsiphash[64] = {
+ 0x050fc4dcU, 0x7d57ca93U, 0x4dc7d44dU,
+ 0xe7ddf7fbU, 0x88d38328U, 0x49533b67U,
+ 0xc59f22a7U, 0x9bb11140U, 0x8d299a8eU,
+ 0x6c063de4U, 0x92ff097fU, 0xf94dc352U,
+ 0x57b4d9a2U, 0x1229ffa7U, 0xc0f95d34U,
+ 0x2a519956U, 0x7d908b66U, 0x63dbd80cU,
+ 0xb473e63eU, 0x8d297d1cU, 0xa6cce040U,
+ 0x2b45f844U, 0xa320872eU, 0xdae6c123U,
+ 0x67349c8cU, 0x705b0979U, 0xca9913a5U,
+ 0x4ade3b35U, 0xef6cd00dU, 0x4ab1e1f4U,
+ 0x43c5e663U, 0x8c21d1bcU, 0x16a7b60dU,
+ 0x7a8ff9bfU, 0x1f2a753eU, 0xbf186b91U,
+ 0xada26206U, 0xa3c33057U, 0xae3a36a1U,
+ 0x7b108392U, 0x99e41531U, 0x3f1ad944U,
+ 0xc8138825U, 0xc28949a6U, 0xfaf8876bU,
+ 0x9f042196U, 0x68b1d623U, 0x8b5114fdU,
+ 0xdf074c46U, 0x12cc86b3U, 0x0a52098fU,
+ 0x9d292f9aU, 0xa2f41f12U, 0x43a71ed0U,
+ 0x73f0bce6U, 0x70a7e980U, 0x243c6d75U,
+ 0xfdb71513U, 0xa67d8a08U, 0xb7e8f148U,
+ 0xf7a644eeU, 0x0f1837f2U, 0x4b6694e0U,
+ 0xb7bbb3a8U
+};
+#else
+static const hsiphash_key_t test_key_hsiphash =
+ {{ 0x03020100U, 0x07060504U }};
+
+static const u32 test_vectors_hsiphash[64] = {
+ 0x5814c896U, 0xe7e864caU, 0xbc4b0e30U,
+ 0x01539939U, 0x7e059ea6U, 0x88e3d89bU,
+ 0xa0080b65U, 0x9d38d9d6U, 0x577999b1U,
+ 0xc839caedU, 0xe4fa32cfU, 0x959246eeU,
+ 0x6b28096cU, 0x66dd9cd6U, 0x16658a7cU,
+ 0xd0257b04U, 0x8b31d501U, 0x2b1cd04bU,
+ 0x06712339U, 0x522aca67U, 0x911bb605U,
+ 0x90a65f0eU, 0xf826ef7bU, 0x62512debU,
+ 0x57150ad7U, 0x5d473507U, 0x1ec47442U,
+ 0xab64afd3U, 0x0a4100d0U, 0x6d2ce652U,
+ 0x2331b6a3U, 0x08d8791aU, 0xbc6dda8dU,
+ 0xe0f6c934U, 0xb0652033U, 0x9b9851ccU,
+ 0x7c46fb7fU, 0x732ba8cbU, 0xf142997aU,
+ 0xfcc9aa1bU, 0x05327eb2U, 0xe110131cU,
+ 0xf9e5e7c0U, 0xa7d708a6U, 0x11795ab1U,
+ 0x65671619U, 0x9f5fff91U, 0xd89c5267U,
+ 0x007783ebU, 0x95766243U, 0xab639262U,
+ 0x9c7e1390U, 0xc368dda6U, 0x38ddc455U,
+ 0xfa13d379U, 0x979ea4e8U, 0x53ecd77eU,
+ 0x2ee80657U, 0x33dbb66aU, 0xae3f0577U,
+ 0x88b4c4ccU, 0x3e7f480bU, 0x74c1ebf8U,
+ 0x87178304U
+};
+#endif
+
+static int __init siphash_test_init(void)
+{
+ u8 in[64] __aligned(SIPHASH_ALIGNMENT);
+ u8 in_unaligned[65] __aligned(SIPHASH_ALIGNMENT);
+ u8 i;
+ int ret = 0;
+
+ for (i = 0; i < 64; ++i) {
+ in[i] = i;
+ in_unaligned[i + 1] = i;
+ if (siphash(in, i, &test_key_siphash) !=
+ test_vectors_siphash[i]) {
+ pr_info("siphash self-test aligned %u: FAIL\n", i + 1);
+ ret = -EINVAL;
+ }
+ if (siphash(in_unaligned + 1, i, &test_key_siphash) !=
+ test_vectors_siphash[i]) {
+ pr_info("siphash self-test unaligned %u: FAIL\n", i + 1);
+ ret = -EINVAL;
+ }
+ if (hsiphash(in, i, &test_key_hsiphash) !=
+ test_vectors_hsiphash[i]) {
+ pr_info("hsiphash self-test aligned %u: FAIL\n", i + 1);
+ ret = -EINVAL;
+ }
+ if (hsiphash(in_unaligned + 1, i, &test_key_hsiphash) !=
+ test_vectors_hsiphash[i]) {
+ pr_info("hsiphash self-test unaligned %u: FAIL\n", i + 1);
+ ret = -EINVAL;
+ }
+ }
+ if (siphash_1u64(0x0706050403020100ULL, &test_key_siphash) !=
+ test_vectors_siphash[8]) {
+ pr_info("siphash self-test 1u64: FAIL\n");
+ ret = -EINVAL;
+ }
+ if (siphash_2u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
+ &test_key_siphash) != test_vectors_siphash[16]) {
+ pr_info("siphash self-test 2u64: FAIL\n");
+ ret = -EINVAL;
+ }
+ if (siphash_3u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
+ 0x1716151413121110ULL, &test_key_siphash) !=
+ test_vectors_siphash[24]) {
+ pr_info("siphash self-test 3u64: FAIL\n");
+ ret = -EINVAL;
+ }
+ if (siphash_4u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
+ 0x1716151413121110ULL, 0x1f1e1d1c1b1a1918ULL,
+ &test_key_siphash) != test_vectors_siphash[32]) {
+ pr_info("siphash self-test 4u64: FAIL\n");
+ ret = -EINVAL;
+ }
+ if (siphash_1u32(0x03020100U, &test_key_siphash) !=
+ test_vectors_siphash[4]) {
+ pr_info("siphash self-test 1u32: FAIL\n");
+ ret = -EINVAL;
+ }
+ if (siphash_2u32(0x03020100U, 0x07060504U, &test_key_siphash) !=
+ test_vectors_siphash[8]) {
+ pr_info("siphash self-test 2u32: FAIL\n");
+ ret = -EINVAL;
+ }
+ if (siphash_3u32(0x03020100U, 0x07060504U,
+ 0x0b0a0908U, &test_key_siphash) !=
+ test_vectors_siphash[12]) {
+ pr_info("siphash self-test 3u32: FAIL\n");
+ ret = -EINVAL;
+ }
+ if (siphash_4u32(0x03020100U, 0x07060504U,
+ 0x0b0a0908U, 0x0f0e0d0cU, &test_key_siphash) !=
+ test_vectors_siphash[16]) {
+ pr_info("siphash self-test 4u32: FAIL\n");
+ ret = -EINVAL;
+ }
+ if (hsiphash_1u32(0x03020100U, &test_key_hsiphash) !=
+ test_vectors_hsiphash[4]) {
+ pr_info("hsiphash self-test 1u32: FAIL\n");
+ ret = -EINVAL;
+ }
+ if (hsiphash_2u32(0x03020100U, 0x07060504U, &test_key_hsiphash) !=
+ test_vectors_hsiphash[8]) {
+ pr_info("hsiphash self-test 2u32: FAIL\n");
+ ret = -EINVAL;
+ }
+ if (hsiphash_3u32(0x03020100U, 0x07060504U,
+ 0x0b0a0908U, &test_key_hsiphash) !=
+ test_vectors_hsiphash[12]) {
+ pr_info("hsiphash self-test 3u32: FAIL\n");
+ ret = -EINVAL;
+ }
+ if (hsiphash_4u32(0x03020100U, 0x07060504U,
+ 0x0b0a0908U, 0x0f0e0d0cU, &test_key_hsiphash) !=
+ test_vectors_hsiphash[16]) {
+ pr_info("hsiphash self-test 4u32: FAIL\n");
+ ret = -EINVAL;
+ }
+ if (!ret)
+ pr_info("self-tests: pass\n");
+ return ret;
+}
+
+static void __exit siphash_test_exit(void)
+{
+}
+
+module_init(siphash_test_init);
+module_exit(siphash_test_exit);
+
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c
index 0ecef3e4690e..6f335a3d4ae2 100644
--- a/lib/test_user_copy.c
+++ b/lib/test_user_copy.c
@@ -25,6 +25,23 @@
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
+/*
+ * Several 32-bit architectures support 64-bit {get,put}_user() calls.
+ * As there doesn't appear to be anything that can safely determine
+ * their capability at compile-time, we just have to opt-out certain archs.
+ */
+#if BITS_PER_LONG == 64 || (!defined(CONFIG_AVR32) && \
+ !defined(CONFIG_BLACKFIN) && \
+ !defined(CONFIG_M32R) && \
+ !defined(CONFIG_M68K) && \
+ !defined(CONFIG_MICROBLAZE) && \
+ !defined(CONFIG_MN10300) && \
+ !defined(CONFIG_NIOS2) && \
+ !defined(CONFIG_PPC32) && \
+ !defined(CONFIG_SUPERH))
+# define TEST_U64
+#endif
+
#define test(condition, msg) \
({ \
int cond = (condition); \
@@ -40,7 +57,12 @@ static int __init test_user_copy_init(void)
char __user *usermem;
char *bad_usermem;
unsigned long user_addr;
- unsigned long value = 0x5A;
+ u8 val_u8;
+ u16 val_u16;
+ u32 val_u32;
+#ifdef TEST_U64
+ u64 val_u64;
+#endif
kmem = kmalloc(PAGE_SIZE * 2, GFP_KERNEL);
if (!kmem)
@@ -58,33 +80,100 @@ static int __init test_user_copy_init(void)
usermem = (char __user *)user_addr;
bad_usermem = (char *)user_addr;
- /* Legitimate usage: none of these should fail. */
- ret |= test(copy_from_user(kmem, usermem, PAGE_SIZE),
- "legitimate copy_from_user failed");
+ /*
+ * Legitimate usage: none of these copies should fail.
+ */
+ memset(kmem, 0x3a, PAGE_SIZE * 2);
ret |= test(copy_to_user(usermem, kmem, PAGE_SIZE),
"legitimate copy_to_user failed");
- ret |= test(get_user(value, (unsigned long __user *)usermem),
- "legitimate get_user failed");
- ret |= test(put_user(value, (unsigned long __user *)usermem),
- "legitimate put_user failed");
-
- /* Invalid usage: none of these should succeed. */
+ memset(kmem, 0x0, PAGE_SIZE);
+ ret |= test(copy_from_user(kmem, usermem, PAGE_SIZE),
+ "legitimate copy_from_user failed");
+ ret |= test(memcmp(kmem, kmem + PAGE_SIZE, PAGE_SIZE),
+ "legitimate usercopy failed to copy data");
+
+#define test_legit(size, check) \
+ do { \
+ val_##size = check; \
+ ret |= test(put_user(val_##size, (size __user *)usermem), \
+ "legitimate put_user (" #size ") failed"); \
+ val_##size = 0; \
+ ret |= test(get_user(val_##size, (size __user *)usermem), \
+ "legitimate get_user (" #size ") failed"); \
+ ret |= test(val_##size != check, \
+ "legitimate get_user (" #size ") failed to do copy"); \
+ if (val_##size != check) { \
+ pr_info("0x%llx != 0x%llx\n", \
+ (unsigned long long)val_##size, \
+ (unsigned long long)check); \
+ } \
+ } while (0)
+
+ test_legit(u8, 0x5a);
+ test_legit(u16, 0x5a5b);
+ test_legit(u32, 0x5a5b5c5d);
+#ifdef TEST_U64
+ test_legit(u64, 0x5a5b5c5d6a6b6c6d);
+#endif
+#undef test_legit
+
+ /*
+ * Invalid usage: none of these copies should succeed.
+ */
+
+ /* Prepare kernel memory with check values. */
+ memset(kmem, 0x5a, PAGE_SIZE);
+ memset(kmem + PAGE_SIZE, 0, PAGE_SIZE);
+
+ /* Reject kernel-to-kernel copies through copy_from_user(). */
ret |= test(!copy_from_user(kmem, (char __user *)(kmem + PAGE_SIZE),
PAGE_SIZE),
"illegal all-kernel copy_from_user passed");
+
+ /* Destination half of buffer should have been zeroed. */
+ ret |= test(memcmp(kmem + PAGE_SIZE, kmem, PAGE_SIZE),
+ "zeroing failure for illegal all-kernel copy_from_user");
+
+#if 0
+ /*
+ * When running with SMAP/PAN/etc, this will Oops the kernel
+ * due to the zeroing of userspace memory on failure. This needs
+ * to be tested in LKDTM instead, since this test module does not
+ * expect to explode.
+ */
ret |= test(!copy_from_user(bad_usermem, (char __user *)kmem,
PAGE_SIZE),
"illegal reversed copy_from_user passed");
+#endif
ret |= test(!copy_to_user((char __user *)kmem, kmem + PAGE_SIZE,
PAGE_SIZE),
"illegal all-kernel copy_to_user passed");
ret |= test(!copy_to_user((char __user *)kmem, bad_usermem,
PAGE_SIZE),
"illegal reversed copy_to_user passed");
- ret |= test(!get_user(value, (unsigned long __user *)kmem),
- "illegal get_user passed");
- ret |= test(!put_user(value, (unsigned long __user *)kmem),
- "illegal put_user passed");
+
+#define test_illegal(size, check) \
+ do { \
+ val_##size = (check); \
+ ret |= test(!get_user(val_##size, (size __user *)kmem), \
+ "illegal get_user (" #size ") passed"); \
+ ret |= test(val_##size != (size)0, \
+ "zeroing failure for illegal get_user (" #size ")"); \
+ if (val_##size != (size)0) { \
+ pr_info("0x%llx != 0\n", \
+ (unsigned long long)val_##size); \
+ } \
+ ret |= test(!put_user(val_##size, (size __user *)kmem), \
+ "illegal put_user (" #size ") passed"); \
+ } while (0)
+
+ test_illegal(u8, 0x5a);
+ test_illegal(u16, 0x5a5b);
+ test_illegal(u32, 0x5a5b5c5d);
+#ifdef TEST_U64
+ test_illegal(u64, 0x5a5b5c5d6a6b6c6d);
+#endif
+#undef test_illegal
vm_munmap(user_addr, PAGE_SIZE * 2);
kfree(kmem);
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index adc6ee0a5126..4a720ed4fdaf 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -80,8 +80,7 @@ bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
if (head->next == node) {
struct rb_node *rbn = rb_next(&node->node);
- head->next = rbn ?
- rb_entry(rbn, struct timerqueue_node, node) : NULL;
+ head->next = rb_entry_safe(rbn, struct timerqueue_node, node);
}
rb_erase(&node->node, &head->head);
RB_CLEAR_NODE(&node->node);