summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig23
-rw-r--r--lib/Kconfig.debug45
-rw-r--r--lib/Kconfig.kasan10
-rw-r--r--lib/Kconfig.kcsan6
-rw-r--r--lib/Kconfig.kgdb2
-rw-r--r--lib/Kconfig.kmsan63
-rw-r--r--lib/Makefile12
-rw-r--r--lib/bitmap.c68
-rw-r--r--lib/cmdline.c2
-rw-r--r--lib/cmdline_kunit.c4
-rw-r--r--lib/cpumask.c40
-rw-r--r--lib/cpumask_kunit.c19
-rw-r--r--lib/crypto/Kconfig7
-rw-r--r--lib/crypto/Makefile3
-rw-r--r--lib/crypto/memneq.c (renamed from lib/memneq.c)7
-rw-r--r--lib/crypto/utils.c88
-rw-r--r--lib/debugobjects.c4
-rw-r--r--lib/earlycpio.c2
-rw-r--r--lib/fault-inject.c15
-rw-r--r--lib/find_bit.c233
-rw-r--r--lib/find_bit_benchmark.c22
-rw-r--r--lib/iomap.c44
-rw-r--r--lib/iov_iter.c9
-rw-r--r--lib/is_signed_type_kunit.c4
-rw-r--r--lib/kobject.c2
-rw-r--r--lib/kunit/kunit-test.c7
-rw-r--r--lib/kunit/string-stream.c100
-rw-r--r--lib/kunit/string-stream.h3
-rw-r--r--lib/kunit/test.c34
-rw-r--r--lib/llist.c12
-rw-r--r--lib/maple_tree.c7152
-rw-r--r--lib/nlattr.c41
-rw-r--r--lib/overflow_kunit.c47
-rw-r--r--lib/percpu-refcount.c3
-rw-r--r--lib/raid6/s390vx.uc3
-rw-r--r--lib/random32.c4
-rw-r--r--lib/reed_solomon/test_rslib.c12
-rw-r--r--lib/sbitmap.c4
-rw-r--r--lib/sg_pool.c16
-rw-r--r--lib/show_mem.c4
-rw-r--r--lib/slub_kunit.c57
-rw-r--r--lib/stackdepot.c29
-rw-r--r--lib/string.c8
-rw-r--r--lib/test-string_helpers.c2
-rw-r--r--lib/test_bitmap.c291
-rw-r--r--lib/test_fprobe.c2
-rw-r--r--lib/test_hexdump.c10
-rw-r--r--lib/test_hmm.c142
-rw-r--r--lib/test_hmm_uapi.h1
-rw-r--r--lib/test_kasan.c1426
-rw-r--r--lib/test_kasan_module.c141
-rw-r--r--lib/test_kprobes.c2
-rw-r--r--lib/test_list_sort.c2
-rw-r--r--lib/test_maple_tree.c2767
-rw-r--r--lib/test_meminit.c21
-rw-r--r--lib/test_min_heap.c6
-rw-r--r--lib/test_objagg.c2
-rw-r--r--lib/test_printf.c38
-rw-r--r--lib/test_rhashtable.c60
-rw-r--r--lib/test_vmalloc.c19
-rw-r--r--lib/usercopy.c3
-rw-r--r--lib/uuid.c2
-rw-r--r--lib/vdso/Makefile2
-rw-r--r--lib/vsprintf.c2
-rw-r--r--lib/zstd/Makefile16
-rw-r--r--lib/zstd/common/entropy_common.c5
-rw-r--r--lib/zstd/common/zstd_common.c10
67 files changed, 11151 insertions, 2091 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index dc1ab2ed1dc6..9eb514abcdec 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -127,9 +127,6 @@ config TRACE_MMIO_ACCESS
source "lib/crypto/Kconfig"
-config LIB_MEMNEQ
- bool
-
config CRC_CCITT
tristate "CRC-CCITT functions"
help
@@ -343,12 +340,16 @@ config LZ4HC_COMPRESS
config LZ4_DECOMPRESS
tristate
-config ZSTD_COMPRESS
+config ZSTD_COMMON
select XXHASH
tristate
+config ZSTD_COMPRESS
+ select ZSTD_COMMON
+ tristate
+
config ZSTD_DECOMPRESS
- select XXHASH
+ select ZSTD_COMMON
tristate
source "lib/xz/Kconfig"
@@ -527,6 +528,15 @@ config CPUMASK_OFFSTACK
them on the stack. This is a bit more expensive, but avoids
stack overflow.
+config FORCE_NR_CPUS
+ bool "NR_CPUS is set to an actual number of CPUs"
+ depends on SMP
+ help
+ Say Yes if you have NR_CPUS set to an actual number of possible
+ CPUs in your system, not to a default value. This forces the core
+ code to rely on compile-time value and optimize kernel routines
+ better.
+
config CPU_RMAP
bool
depends on SMP
@@ -662,6 +672,9 @@ config ARCH_HAS_PMEM_API
config MEMREGION
bool
+config ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
+ bool
+
config ARCH_HAS_MEMREMAP_COMPAT_ALIGN
bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b620a340d7df..13a1046a7d6f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -231,6 +231,11 @@ config DEBUG_INFO
in the "Debug information" choice below, indicating that debug
information will be generated for build targets.
+# Clang is known to generate .{s,u}leb128 with symbol deltas with DWARF5, which
+# some targets may not support: https://sourceware.org/bugzilla/show_bug.cgi?id=27215
+config AS_HAS_NON_CONST_LEB128
+ def_bool $(as-instr,.uleb128 .Lexpr_end4 - .Lexpr_start3\n.Lexpr_start3:\n.Lexpr_end4:)
+
choice
prompt "Debug information"
depends on DEBUG_KERNEL
@@ -253,6 +258,7 @@ config DEBUG_INFO_NONE
config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT
bool "Rely on the toolchain's implicit default DWARF version"
select DEBUG_INFO
+ depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128)
help
The implicit default version of DWARF debug info produced by a
toolchain changes over time.
@@ -264,7 +270,7 @@ config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT
config DEBUG_INFO_DWARF4
bool "Generate DWARF Version 4 debuginfo"
select DEBUG_INFO
- depends on !CC_IS_CLANG || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502)))
+ depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502)
help
Generate DWARF v4 debug info. This requires gcc 4.5+, binutils 2.35.2
if using clang without clang's integrated assembler, and gdb 7.0+.
@@ -276,7 +282,7 @@ config DEBUG_INFO_DWARF4
config DEBUG_INFO_DWARF5
bool "Generate DWARF Version 5 debuginfo"
select DEBUG_INFO
- depends on !CC_IS_CLANG || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502)))
+ depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128)
help
Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc
5.0+ accepts the -gdwarf-5 flag but only had partial support for some
@@ -389,13 +395,15 @@ endif # DEBUG_INFO
config FRAME_WARN
int "Warn for stack frames larger than"
range 0 8192
+ default 0 if KMSAN
default 2048 if GCC_PLUGIN_LATENT_ENTROPY
default 2048 if PARISC
default 1536 if (!64BIT && XTENSA)
+ default 1280 if KASAN && !64BIT
default 1024 if !64BIT
default 2048 if 64BIT
help
- Tell gcc to warn at build time for stack frames larger than this.
+ Tell the compiler to warn at build time for stack frames larger than this.
Setting this too low will cause a lot of warnings.
Setting it to 0 disables the warning.
@@ -817,13 +825,12 @@ config DEBUG_VM
If unsure, say N.
-config DEBUG_VM_VMACACHE
- bool "Debug VMA caching"
+config DEBUG_VM_MAPLE_TREE
+ bool "Debug VM maple trees"
depends on DEBUG_VM
+ select DEBUG_MAPLE_TREE
help
- Enable this to turn on VMA caching debug information. Doing so
- can cause significant overhead, so only enable it in non-production
- environments.
+ Enable VM maple tree debugging information and extra validations.
If unsure, say N.
@@ -976,6 +983,7 @@ config DEBUG_STACKOVERFLOW
source "lib/Kconfig.kasan"
source "lib/Kconfig.kfence"
+source "lib/Kconfig.kmsan"
endmenu # "Memory Debugging"
@@ -1640,6 +1648,14 @@ config BUG_ON_DATA_CORRUPTION
If unsure, say N.
+config DEBUG_MAPLE_TREE
+ bool "Debug maple trees"
+ depends on DEBUG_KERNEL
+ help
+ Enable maple tree debugging information and extra validations.
+
+ If unsure, say N.
+
endmenu
config DEBUG_CREDENTIALS
@@ -1869,8 +1885,14 @@ config NETDEV_NOTIFIER_ERROR_INJECT
If unsure, say N.
config FUNCTION_ERROR_INJECTION
- def_bool y
+ bool "Fault-injections of functions"
depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
+ help
+ Add fault injections into various functions that are annotated with
+ ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return
+ value of theses functions. This is useful to test error paths of code.
+
+ If unsure, say N
config FAULT_INJECTION
bool "Fault-injection framework"
@@ -2102,6 +2124,7 @@ config KPROBES_SANITY_TEST
depends on DEBUG_KERNEL
depends on KPROBES
depends on KUNIT
+ select STACKTRACE if ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
default KUNIT_ALL_TESTS
help
This option provides for testing basic kprobes functionality on
@@ -2236,6 +2259,10 @@ config TEST_UUID
config TEST_XARRAY
tristate "Test the XArray code at runtime"
+config TEST_MAPLE_TREE
+ select DEBUG_MAPLE_TREE
+ tristate "Test the Maple Tree code at runtime"
+
config TEST_RHASHTABLE
tristate "Perform selftest on resizable hash table"
help
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index f0973da583e0..836f70393e22 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -37,7 +37,7 @@ menuconfig KASAN
(HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \
CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \
HAVE_ARCH_KASAN_HW_TAGS
- depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
+ depends on (SLUB && SYSFS && !SLUB_TINY) || (SLAB && !DEBUG_SLAB)
select STACKDEPOT_ALWAYS_INIT
help
Enables KASAN (Kernel Address Sanitizer) - a dynamic memory safety
@@ -167,14 +167,6 @@ config KASAN_STACK
as well, as it adds inline-style instrumentation that is run
unconditionally.
-config KASAN_TAGS_IDENTIFY
- bool "Memory corruption type identification"
- depends on KASAN_SW_TAGS || KASAN_HW_TAGS
- help
- Enables best-effort identification of the bug types (use-after-free
- or out-of-bounds) at the cost of increased memory consumption.
- Only applicable for the tag-based KASAN modes.
-
config KASAN_VMALLOC
bool "Check accesses to vmalloc allocations"
depends on HAVE_ARCH_KASAN_VMALLOC
diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan
index 47a693c45864..375575a5a0e3 100644
--- a/lib/Kconfig.kcsan
+++ b/lib/Kconfig.kcsan
@@ -125,7 +125,7 @@ config KCSAN_SKIP_WATCH
default 4000
help
The number of per-CPU memory operations to skip, before another
- watchpoint is set up, i.e. one in KCSAN_WATCH_SKIP per-CPU
+ watchpoint is set up, i.e. one in KCSAN_SKIP_WATCH per-CPU
memory operations are used to set up a watchpoint. A smaller value
results in more aggressive race detection, whereas a larger value
improves system performance at the cost of missing some races.
@@ -135,8 +135,8 @@ config KCSAN_SKIP_WATCH_RANDOMIZE
default y
help
If instruction skip count should be randomized, where the maximum is
- KCSAN_WATCH_SKIP. If false, the chosen value is always
- KCSAN_WATCH_SKIP.
+ KCSAN_SKIP_WATCH. If false, the chosen value is always
+ KCSAN_SKIP_WATCH.
config KCSAN_INTERRUPT_WATCHER
bool "Interruptible watchers" if !KCSAN_STRICT
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 05dae05b6cc9..3b9a44008433 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -121,7 +121,7 @@ config KDB_DEFAULT_ENABLE
config KDB_KEYBOARD
bool "KGDB_KDB: keyboard as input device"
- depends on VT && KGDB_KDB
+ depends on VT && KGDB_KDB && !PARISC
default n
help
KDB can use a PS/2 type keyboard for an input device
diff --git a/lib/Kconfig.kmsan b/lib/Kconfig.kmsan
new file mode 100644
index 000000000000..ef2c8f256c57
--- /dev/null
+++ b/lib/Kconfig.kmsan
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config HAVE_ARCH_KMSAN
+ bool
+
+config HAVE_KMSAN_COMPILER
+ # Clang versions <14.0.0 also support -fsanitize=kernel-memory, but not
+ # all the features necessary to build the kernel with KMSAN.
+ depends on CC_IS_CLANG && CLANG_VERSION >= 140000
+ def_bool $(cc-option,-fsanitize=kernel-memory -mllvm -msan-disable-checks=1)
+
+config KMSAN
+ bool "KMSAN: detector of uninitialized values use"
+ depends on HAVE_ARCH_KMSAN && HAVE_KMSAN_COMPILER
+ depends on SLUB && DEBUG_KERNEL && !KASAN && !KCSAN
+ depends on !PREEMPT_RT
+ select STACKDEPOT
+ select STACKDEPOT_ALWAYS_INIT
+ help
+ KernelMemorySanitizer (KMSAN) is a dynamic detector of uses of
+ uninitialized values in the kernel. It is based on compiler
+ instrumentation provided by Clang and thus requires Clang to build.
+
+ An important note is that KMSAN is not intended for production use,
+ because it drastically increases kernel memory footprint and slows
+ the whole system down.
+
+ See <file:Documentation/dev-tools/kmsan.rst> for more details.
+
+if KMSAN
+
+config HAVE_KMSAN_PARAM_RETVAL
+ # -fsanitize-memory-param-retval is supported only by Clang >= 14.
+ depends on HAVE_KMSAN_COMPILER
+ def_bool $(cc-option,-fsanitize=kernel-memory -fsanitize-memory-param-retval)
+
+config KMSAN_CHECK_PARAM_RETVAL
+ bool "Check for uninitialized values passed to and returned from functions"
+ default y
+ depends on HAVE_KMSAN_PARAM_RETVAL
+ help
+ If the compiler supports -fsanitize-memory-param-retval, KMSAN will
+ eagerly check every function parameter passed by value and every
+ function return value.
+
+ Disabling KMSAN_CHECK_PARAM_RETVAL will result in tracking shadow for
+ function parameters and return values across function borders. This
+ is a more relaxed mode, but it generates more instrumentation code and
+ may potentially report errors in corner cases when non-instrumented
+ functions call instrumented ones.
+
+config KMSAN_KUNIT_TEST
+ tristate "KMSAN integration test suite" if !KUNIT_ALL_TESTS
+ default KUNIT_ALL_TESTS
+ depends on TRACEPOINTS && KUNIT
+ help
+ Test suite for KMSAN, testing various error detection scenarios,
+ and checking that reports are correctly output to console.
+
+ Say Y here if you want the test to be built into the kernel and run
+ during boot; say M if you want the test to build as a module; say N
+ if you are unsure.
+
+endif
diff --git a/lib/Makefile b/lib/Makefile
index 8f6fc027f605..59bd7c2f793a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -29,7 +29,7 @@ endif
lib-y := ctype.o string.o vsprintf.o cmdline.o \
rbtree.o radix-tree.o timerqueue.o xarray.o \
- idr.o extable.o irq_regs.o argv_split.o \
+ maple_tree.o idr.o extable.o irq_regs.o argv_split.o \
flex_proportions.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
@@ -65,11 +65,6 @@ obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
obj-$(CONFIG_TEST_SIPHASH) += test_siphash.o
obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o
obj-$(CONFIG_TEST_IDA) += test_ida.o
-obj-$(CONFIG_KASAN_KUNIT_TEST) += test_kasan.o
-CFLAGS_test_kasan.o += -fno-builtin
-CFLAGS_test_kasan.o += $(call cc-disable-warning, vla)
-obj-$(CONFIG_KASAN_MODULE_TEST) += test_kasan_module.o
-CFLAGS_test_kasan_module.o += -fno-builtin
obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
CFLAGS_test_ubsan.o += $(call cc-disable-warning, vla)
UBSAN_SANITIZE_test_ubsan.o := y
@@ -90,6 +85,7 @@ obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o
obj-$(CONFIG_TEST_UUID) += test_uuid.o
obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
+obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o
obj-$(CONFIG_TEST_PARMAN) += test_parman.o
obj-$(CONFIG_TEST_KMOD) += test_kmod.o
obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
@@ -255,7 +251,6 @@ obj-$(CONFIG_DIMLIB) += dim/
obj-$(CONFIG_SIGNATURE) += digsig.o
lib-$(CONFIG_CLZ_TAB) += clz_tab.o
-lib-$(CONFIG_LIB_MEMNEQ) += memneq.o
obj-$(CONFIG_GENERIC_STRNCPY_FROM_USER) += strncpy_from_user.o
obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o
@@ -276,6 +271,9 @@ obj-$(CONFIG_POLYNOMIAL) += polynomial.o
CFLAGS_stackdepot.o += -fno-builtin
obj-$(CONFIG_STACKDEPOT) += stackdepot.o
KASAN_SANITIZE_stackdepot.o := n
+# In particular, instrumenting stackdepot.c with KMSAN will result in infinite
+# recursion.
+KMSAN_SANITIZE_stackdepot.o := n
KCOV_INSTRUMENT_stackdepot.o := n
obj-$(CONFIG_REF_TRACKER) += ref_tracker.o
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 488e6c3e5acc..1c81413c51f8 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -333,20 +333,32 @@ bool __bitmap_subset(const unsigned long *bitmap1,
}
EXPORT_SYMBOL(__bitmap_subset);
+#define BITMAP_WEIGHT(FETCH, bits) \
+({ \
+ unsigned int __bits = (bits), idx, w = 0; \
+ \
+ for (idx = 0; idx < __bits / BITS_PER_LONG; idx++) \
+ w += hweight_long(FETCH); \
+ \
+ if (__bits % BITS_PER_LONG) \
+ w += hweight_long((FETCH) & BITMAP_LAST_WORD_MASK(__bits)); \
+ \
+ w; \
+})
+
unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
{
- unsigned int k, lim = bits/BITS_PER_LONG, w = 0;
-
- for (k = 0; k < lim; k++)
- w += hweight_long(bitmap[k]);
-
- if (bits % BITS_PER_LONG)
- w += hweight_long(bitmap[k] & BITMAP_LAST_WORD_MASK(bits));
-
- return w;
+ return BITMAP_WEIGHT(bitmap[idx], bits);
}
EXPORT_SYMBOL(__bitmap_weight);
+unsigned int __bitmap_weight_and(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int bits)
+{
+ return BITMAP_WEIGHT(bitmap1[idx] & bitmap2[idx], bits);
+}
+EXPORT_SYMBOL(__bitmap_weight_and);
+
void __bitmap_set(unsigned long *map, unsigned int start, int len)
{
unsigned long *p = map + BIT_WORD(start);
@@ -953,37 +965,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, unsigned int pos, unsigne
if (pos >= nbits || !test_bit(pos, buf))
return -1;
- return __bitmap_weight(buf, pos);
-}
-
-/**
- * bitmap_ord_to_pos - find position of n-th set bit in bitmap
- * @buf: pointer to bitmap
- * @ord: ordinal bit position (n-th set bit, n >= 0)
- * @nbits: number of valid bit positions in @buf
- *
- * Map the ordinal offset of bit @ord in @buf to its position in @buf.
- * Value of @ord should be in range 0 <= @ord < weight(buf). If @ord
- * >= weight(buf), returns @nbits.
- *
- * If for example, just bits 4 through 7 are set in @buf, then @ord
- * values 0 through 3 will get mapped to 4 through 7, respectively,
- * and all other @ord values returns @nbits. When @ord value 3
- * gets mapped to (returns) @pos value 7 in this example, that means
- * that the 3rd set bit (starting with 0th) is at position 7 in @buf.
- *
- * The bit positions 0 through @nbits-1 are valid positions in @buf.
- */
-unsigned int bitmap_ord_to_pos(const unsigned long *buf, unsigned int ord, unsigned int nbits)
-{
- unsigned int pos;
-
- for (pos = find_first_bit(buf, nbits);
- pos < nbits && ord;
- pos = find_next_bit(buf, nbits, pos + 1))
- ord--;
-
- return pos;
+ return bitmap_weight(buf, pos);
}
/**
@@ -1035,7 +1017,7 @@ void bitmap_remap(unsigned long *dst, const unsigned long *src,
if (n < 0 || w == 0)
set_bit(oldbit, dst); /* identity map */
else
- set_bit(bitmap_ord_to_pos(new, n % w, nbits), dst);
+ set_bit(find_nth_bit(new, nbits, n % w), dst);
}
}
EXPORT_SYMBOL(bitmap_remap);
@@ -1074,7 +1056,7 @@ int bitmap_bitremap(int oldbit, const unsigned long *old,
if (n < 0 || w == 0)
return oldbit;
else
- return bitmap_ord_to_pos(new, n % w, bits);
+ return find_nth_bit(new, bits, n % w);
}
EXPORT_SYMBOL(bitmap_bitremap);
@@ -1198,7 +1180,7 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig,
* The following code is a more efficient, but less
* obvious, equivalent to the loop:
* for (m = 0; m < bitmap_weight(relmap, bits); m++) {
- * n = bitmap_ord_to_pos(orig, m, bits);
+ * n = find_nth_bit(orig, bits, m);
* if (test_bit(m, orig))
* set_bit(n, dst);
* }
diff --git a/lib/cmdline.c b/lib/cmdline.c
index 5546bf588780..90ed997d9570 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -260,7 +260,7 @@ char *next_arg(char *args, char **param, char **val)
args[i-1] = '\0';
}
}
- if (quoted && args[i-1] == '"')
+ if (quoted && i > 0 && args[i-1] == '"')
args[i-1] = '\0';
if (args[i]) {
diff --git a/lib/cmdline_kunit.c b/lib/cmdline_kunit.c
index a72a2c16066e..d4572dbc9145 100644
--- a/lib/cmdline_kunit.c
+++ b/lib/cmdline_kunit.c
@@ -76,7 +76,7 @@ static void cmdline_test_lead_int(struct kunit *test)
int rc = cmdline_test_values[i];
int offset;
- sprintf(in, "%u%s", get_random_int() % 256, str);
+ sprintf(in, "%u%s", get_random_u8(), str);
/* Only first '-' after the number will advance the pointer */
offset = strlen(in) - strlen(str) + !!(rc == 2);
cmdline_do_one_test(test, in, rc, offset);
@@ -94,7 +94,7 @@ static void cmdline_test_tail_int(struct kunit *test)
int rc = strcmp(str, "") ? (strcmp(str, "-") ? 0 : 1) : 1;
int offset;
- sprintf(in, "%s%u", str, get_random_int() % 256);
+ sprintf(in, "%s%u", str, get_random_u8());
/*
* Only first and leading '-' not followed by integer
* will advance the pointer.
diff --git a/lib/cpumask.c b/lib/cpumask.c
index f0ae119be8c4..c7c392514fd3 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -128,23 +128,21 @@ unsigned int cpumask_local_spread(unsigned int i, int node)
i %= num_online_cpus();
if (node == NUMA_NO_NODE) {
- for_each_cpu(cpu, cpu_online_mask)
- if (i-- == 0)
- return cpu;
+ cpu = cpumask_nth(i, cpu_online_mask);
+ if (cpu < nr_cpu_ids)
+ return cpu;
} else {
/* NUMA first. */
- for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask)
- if (i-- == 0)
- return cpu;
-
- for_each_cpu(cpu, cpu_online_mask) {
- /* Skip NUMA nodes, done above. */
- if (cpumask_test_cpu(cpu, cpumask_of_node(node)))
- continue;
-
- if (i-- == 0)
- return cpu;
- }
+ cpu = cpumask_nth_and(i, cpu_online_mask, cpumask_of_node(node));
+ if (cpu < nr_cpu_ids)
+ return cpu;
+
+ i -= cpumask_weight_and(cpu_online_mask, cpumask_of_node(node));
+
+ /* Skip NUMA nodes, done above. */
+ cpu = cpumask_nth_andnot(i, cpu_online_mask, cpumask_of_node(node));
+ if (cpu < nr_cpu_ids)
+ return cpu;
}
BUG();
}
@@ -168,10 +166,8 @@ unsigned int cpumask_any_and_distribute(const struct cpumask *src1p,
/* NOTE: our first selection will skip 0. */
prev = __this_cpu_read(distribute_cpu_mask_prev);
- next = cpumask_next_and(prev, src1p, src2p);
- if (next >= nr_cpu_ids)
- next = cpumask_first_and(src1p, src2p);
-
+ next = find_next_and_bit_wrap(cpumask_bits(src1p), cpumask_bits(src2p),
+ nr_cpumask_bits, prev + 1);
if (next < nr_cpu_ids)
__this_cpu_write(distribute_cpu_mask_prev, next);
@@ -185,11 +181,7 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp)
/* NOTE: our first selection will skip 0. */
prev = __this_cpu_read(distribute_cpu_mask_prev);
-
- next = cpumask_next(prev, srcp);
- if (next >= nr_cpu_ids)
- next = cpumask_first(srcp);
-
+ next = find_next_bit_wrap(cpumask_bits(srcp), nr_cpumask_bits, prev + 1);
if (next < nr_cpu_ids)
__this_cpu_write(distribute_cpu_mask_prev, next);
diff --git a/lib/cpumask_kunit.c b/lib/cpumask_kunit.c
index ecbeec72221e..d1fc6ece21f3 100644
--- a/lib/cpumask_kunit.c
+++ b/lib/cpumask_kunit.c
@@ -33,6 +33,19 @@
KUNIT_EXPECT_EQ_MSG((test), nr_cpu_ids - mask_weight, iter, MASK_MSG(mask)); \
} while (0)
+#define EXPECT_FOR_EACH_CPU_OP_EQ(test, op, mask1, mask2) \
+ do { \
+ const cpumask_t *m1 = (mask1); \
+ const cpumask_t *m2 = (mask2); \
+ int weight; \
+ int cpu, iter = 0; \
+ cpumask_##op(&mask_tmp, m1, m2); \
+ weight = cpumask_weight(&mask_tmp); \
+ for_each_cpu_##op(cpu, mask1, mask2) \
+ iter++; \
+ KUNIT_EXPECT_EQ((test), weight, iter); \
+ } while (0)
+
#define EXPECT_FOR_EACH_CPU_WRAP_EQ(test, mask) \
do { \
const cpumask_t *m = (mask); \
@@ -54,6 +67,7 @@
static cpumask_t mask_empty;
static cpumask_t mask_all;
+static cpumask_t mask_tmp;
static void test_cpumask_weight(struct kunit *test)
{
@@ -101,10 +115,15 @@ static void test_cpumask_iterators(struct kunit *test)
EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty);
EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty);
EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty);
+ EXPECT_FOR_EACH_CPU_OP_EQ(test, and, &mask_empty, &mask_empty);
+ EXPECT_FOR_EACH_CPU_OP_EQ(test, and, cpu_possible_mask, &mask_empty);
+ EXPECT_FOR_EACH_CPU_OP_EQ(test, andnot, &mask_empty, &mask_empty);
EXPECT_FOR_EACH_CPU_EQ(test, cpu_possible_mask);
EXPECT_FOR_EACH_CPU_NOT_EQ(test, cpu_possible_mask);
EXPECT_FOR_EACH_CPU_WRAP_EQ(test, cpu_possible_mask);
+ EXPECT_FOR_EACH_CPU_OP_EQ(test, and, cpu_possible_mask, cpu_possible_mask);
+ EXPECT_FOR_EACH_CPU_OP_EQ(test, andnot, cpu_possible_mask, &mask_empty);
}
static void test_cpumask_iterators_builtin(struct kunit *test)
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 47816af9a9d7..7e9683e9f5c6 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -2,6 +2,9 @@
menu "Crypto library routines"
+config CRYPTO_LIB_UTILS
+ tristate
+
config CRYPTO_LIB_AES
tristate
@@ -33,6 +36,7 @@ config CRYPTO_ARCH_HAVE_LIB_CHACHA
config CRYPTO_LIB_CHACHA_GENERIC
tristate
+ select CRYPTO_LIB_UTILS
help
This symbol can be depended upon by arch implementations of the
ChaCha library interface that require the generic code as a
@@ -42,7 +46,6 @@ config CRYPTO_LIB_CHACHA_GENERIC
config CRYPTO_LIB_CHACHA
tristate "ChaCha library interface"
- depends on CRYPTO
depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA
select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n
help
@@ -70,7 +73,7 @@ config CRYPTO_LIB_CURVE25519
tristate "Curve25519 scalar multiplication library"
depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519
select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n
- select LIB_MEMNEQ
+ select CRYPTO_LIB_UTILS
help
Enable the Curve25519 library interface. This interface may be
fulfilled by either the generic implementation or an arch-specific
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 919cbb2c220d..c852f067ab06 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -1,5 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_CRYPTO_LIB_UTILS) += libcryptoutils.o
+libcryptoutils-y := memneq.o utils.o
+
# chacha is used by the /dev/random driver which is always builtin
obj-y += chacha.o
obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o
diff --git a/lib/memneq.c b/lib/crypto/memneq.c
index fb11608b1ec1..243d8677cc51 100644
--- a/lib/memneq.c
+++ b/lib/crypto/memneq.c
@@ -59,10 +59,9 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <crypto/algapi.h>
#include <asm/unaligned.h>
-
-#ifndef __HAVE_ARCH_CRYPTO_MEMNEQ
+#include <crypto/algapi.h>
+#include <linux/module.h>
/* Generic path for arbitrary size */
static inline unsigned long
@@ -172,5 +171,3 @@ noinline unsigned long __crypto_memneq(const void *a, const void *b,
}
}
EXPORT_SYMBOL(__crypto_memneq);
-
-#endif /* __HAVE_ARCH_CRYPTO_MEMNEQ */
diff --git a/lib/crypto/utils.c b/lib/crypto/utils.c
new file mode 100644
index 000000000000..53230ab1b195
--- /dev/null
+++ b/lib/crypto/utils.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Crypto library utility functions
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <linux/module.h>
+
+/*
+ * XOR @len bytes from @src1 and @src2 together, writing the result to @dst
+ * (which may alias one of the sources). Don't call this directly; call
+ * crypto_xor() or crypto_xor_cpy() instead.
+ */
+void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
+{
+ int relalign = 0;
+
+ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+ int size = sizeof(unsigned long);
+ int d = (((unsigned long)dst ^ (unsigned long)src1) |
+ ((unsigned long)dst ^ (unsigned long)src2)) &
+ (size - 1);
+
+ relalign = d ? 1 << __ffs(d) : size;
+
+ /*
+ * If we care about alignment, process as many bytes as
+ * needed to advance dst and src to values whose alignments
+ * equal their relative alignment. This will allow us to
+ * process the remainder of the input using optimal strides.
+ */
+ while (((unsigned long)dst & (relalign - 1)) && len > 0) {
+ *dst++ = *src1++ ^ *src2++;
+ len--;
+ }
+ }
+
+ while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
+ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+ u64 l = get_unaligned((u64 *)src1) ^
+ get_unaligned((u64 *)src2);
+ put_unaligned(l, (u64 *)dst);
+ } else {
+ *(u64 *)dst = *(u64 *)src1 ^ *(u64 *)src2;
+ }
+ dst += 8;
+ src1 += 8;
+ src2 += 8;
+ len -= 8;
+ }
+
+ while (len >= 4 && !(relalign & 3)) {
+ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+ u32 l = get_unaligned((u32 *)src1) ^
+ get_unaligned((u32 *)src2);
+ put_unaligned(l, (u32 *)dst);
+ } else {
+ *(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2;
+ }
+ dst += 4;
+ src1 += 4;
+ src2 += 4;
+ len -= 4;
+ }
+
+ while (len >= 2 && !(relalign & 1)) {
+ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+ u16 l = get_unaligned((u16 *)src1) ^
+ get_unaligned((u16 *)src2);
+ put_unaligned(l, (u16 *)dst);
+ } else {
+ *(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2;
+ }
+ dst += 2;
+ src1 += 2;
+ src2 += 2;
+ len -= 2;
+ }
+
+ while (len--)
+ *dst++ = *src1++ ^ *src2++;
+}
+EXPORT_SYMBOL_GPL(__crypto_xor);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 337d797a7141..4c670d3b6965 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -500,9 +500,9 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
descr->debug_hint(obj->object) : NULL;
limit++;
WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
- "object type: %s hint: %pS\n",
+ "object: %p object type: %s hint: %pS\n",
msg, obj_states[obj->state], obj->astate,
- descr->name, hint);
+ obj->object, descr->name, hint);
}
debug_objects_warnings++;
}
diff --git a/lib/earlycpio.c b/lib/earlycpio.c
index 7921193f0424..d2c37d64fd0c 100644
--- a/lib/earlycpio.c
+++ b/lib/earlycpio.c
@@ -126,7 +126,7 @@ struct cpio_data find_cpio_data(const char *path, void *data,
"File %s exceeding MAX_CPIO_FILE_NAME [%d]\n",
p, MAX_CPIO_FILE_NAME);
}
- strlcpy(cd.name, p + mypathsize, MAX_CPIO_FILE_NAME);
+ strscpy(cd.name, p + mypathsize, MAX_CPIO_FILE_NAME);
cd.data = (void *)dptr;
cd.size = ch[C_FILESIZE];
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 423784d9c058..adb2f9355ee6 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
static void fail_dump(struct fault_attr *attr)
{
- if (attr->no_warn)
- return;
-
if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
"name %pd, interval %lu, probability %lu, "
@@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
* http://www.nongnu.org/failmalloc/
*/
-bool should_fail(struct fault_attr *attr, ssize_t size)
+bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
{
if (in_task()) {
unsigned int fail_nth = READ_ONCE(current->fail_nth);
@@ -139,20 +136,26 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
return false;
}
- if (attr->probability <= prandom_u32() % 100)
+ if (attr->probability <= prandom_u32_max(100))
return false;
if (!fail_stacktrace(attr))
return false;
fail:
- fail_dump(attr);
+ if (!(flags & FAULT_NOWARN))
+ fail_dump(attr);
if (atomic_read(&attr->times) != -1)
atomic_dec_not_zero(&attr->times);
return true;
}
+
+bool should_fail(struct fault_attr *attr, ssize_t size)
+{
+ return should_fail_ex(attr, size, 0);
+}
EXPORT_SYMBOL_GPL(should_fail);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 1b8e4b2a9cba..18bc0a7ac8ee 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -19,57 +19,78 @@
#include <linux/minmax.h>
#include <linux/swab.h>
-#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \
- !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) || \
- !defined(find_next_and_bit)
/*
- * This is a common helper function for find_next_bit, find_next_zero_bit, and
- * find_next_and_bit. The differences are:
- * - The "invert" argument, which is XORed with each fetched word before
- * searching it for one bits.
- * - The optional "addr2", which is anded with "addr1" if present.
+ * Common helper for find_bit() function family
+ * @FETCH: The expression that fetches and pre-processes each word of bitmap(s)
+ * @MUNGE: The expression that post-processes a word containing found bit (may be empty)
+ * @size: The bitmap size in bits
*/
-unsigned long _find_next_bit(const unsigned long *addr1,
- const unsigned long *addr2, unsigned long nbits,
- unsigned long start, unsigned long invert, unsigned long le)
-{
- unsigned long tmp, mask;
-
- if (unlikely(start >= nbits))
- return nbits;
-
- tmp = addr1[start / BITS_PER_LONG];
- if (addr2)
- tmp &= addr2[start / BITS_PER_LONG];
- tmp ^= invert;
-
- /* Handle 1st word. */
- mask = BITMAP_FIRST_WORD_MASK(start);
- if (le)
- mask = swab(mask);
-
- tmp &= mask;
-
- start = round_down(start, BITS_PER_LONG);
-
- while (!tmp) {
- start += BITS_PER_LONG;
- if (start >= nbits)
- return nbits;
-
- tmp = addr1[start / BITS_PER_LONG];
- if (addr2)
- tmp &= addr2[start / BITS_PER_LONG];
- tmp ^= invert;
- }
+#define FIND_FIRST_BIT(FETCH, MUNGE, size) \
+({ \
+ unsigned long idx, val, sz = (size); \
+ \
+ for (idx = 0; idx * BITS_PER_LONG < sz; idx++) { \
+ val = (FETCH); \
+ if (val) { \
+ sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(val)), sz); \
+ break; \
+ } \
+ } \
+ \
+ sz; \
+})
- if (le)
- tmp = swab(tmp);
-
- return min(start + __ffs(tmp), nbits);
-}
-EXPORT_SYMBOL(_find_next_bit);
-#endif
+/*
+ * Common helper for find_next_bit() function family
+ * @FETCH: The expression that fetches and pre-processes each word of bitmap(s)
+ * @MUNGE: The expression that post-processes a word containing found bit (may be empty)
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ */
+#define FIND_NEXT_BIT(FETCH, MUNGE, size, start) \
+({ \
+ unsigned long mask, idx, tmp, sz = (size), __start = (start); \
+ \
+ if (unlikely(__start >= sz)) \
+ goto out; \
+ \
+ mask = MUNGE(BITMAP_FIRST_WORD_MASK(__start)); \
+ idx = __start / BITS_PER_LONG; \
+ \
+ for (tmp = (FETCH) & mask; !tmp; tmp = (FETCH)) { \
+ if ((idx + 1) * BITS_PER_LONG >= sz) \
+ goto out; \
+ idx++; \
+ } \
+ \
+ sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(tmp)), sz); \
+out: \
+ sz; \
+})
+
+#define FIND_NTH_BIT(FETCH, size, num) \
+({ \
+ unsigned long sz = (size), nr = (num), idx, w, tmp; \
+ \
+ for (idx = 0; (idx + 1) * BITS_PER_LONG <= sz; idx++) { \
+ if (idx * BITS_PER_LONG + nr >= sz) \
+ goto out; \
+ \
+ tmp = (FETCH); \
+ w = hweight_long(tmp); \
+ if (w > nr) \
+ goto found; \
+ \
+ nr -= w; \
+ } \
+ \
+ if (sz % BITS_PER_LONG) \
+ tmp = (FETCH) & BITMAP_LAST_WORD_MASK(sz); \
+found: \
+ sz = min(idx * BITS_PER_LONG + fns(tmp, nr), sz); \
+out: \
+ sz; \
+})
#ifndef find_first_bit
/*
@@ -77,14 +98,7 @@ EXPORT_SYMBOL(_find_next_bit);
*/
unsigned long _find_first_bit(const unsigned long *addr, unsigned long size)
{
- unsigned long idx;
-
- for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
- if (addr[idx])
- return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
- }
-
- return size;
+ return FIND_FIRST_BIT(addr[idx], /* nop */, size);
}
EXPORT_SYMBOL(_find_first_bit);
#endif
@@ -97,15 +111,7 @@ unsigned long _find_first_and_bit(const unsigned long *addr1,
const unsigned long *addr2,
unsigned long size)
{
- unsigned long idx, val;
-
- for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
- val = addr1[idx] & addr2[idx];
- if (val)
- return min(idx * BITS_PER_LONG + __ffs(val), size);
- }
-
- return size;
+ return FIND_FIRST_BIT(addr1[idx] & addr2[idx], /* nop */, size);
}
EXPORT_SYMBOL(_find_first_and_bit);
#endif
@@ -116,16 +122,64 @@ EXPORT_SYMBOL(_find_first_and_bit);
*/
unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size)
{
- unsigned long idx;
+ return FIND_FIRST_BIT(~addr[idx], /* nop */, size);
+}
+EXPORT_SYMBOL(_find_first_zero_bit);
+#endif
- for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
- if (addr[idx] != ~0UL)
- return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
- }
+#ifndef find_next_bit
+unsigned long _find_next_bit(const unsigned long *addr, unsigned long nbits, unsigned long start)
+{
+ return FIND_NEXT_BIT(addr[idx], /* nop */, nbits, start);
+}
+EXPORT_SYMBOL(_find_next_bit);
+#endif
- return size;
+unsigned long __find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n)
+{
+ return FIND_NTH_BIT(addr[idx], size, n);
}
-EXPORT_SYMBOL(_find_first_zero_bit);
+EXPORT_SYMBOL(__find_nth_bit);
+
+unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long size, unsigned long n)
+{
+ return FIND_NTH_BIT(addr1[idx] & addr2[idx], size, n);
+}
+EXPORT_SYMBOL(__find_nth_and_bit);
+
+unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long size, unsigned long n)
+{
+ return FIND_NTH_BIT(addr1[idx] & ~addr2[idx], size, n);
+}
+EXPORT_SYMBOL(__find_nth_andnot_bit);
+
+#ifndef find_next_and_bit
+unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long nbits, unsigned long start)
+{
+ return FIND_NEXT_BIT(addr1[idx] & addr2[idx], /* nop */, nbits, start);
+}
+EXPORT_SYMBOL(_find_next_and_bit);
+#endif
+
+#ifndef find_next_andnot_bit
+unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long nbits, unsigned long start)
+{
+ return FIND_NEXT_BIT(addr1[idx] & ~addr2[idx], /* nop */, nbits, start);
+}
+EXPORT_SYMBOL(_find_next_andnot_bit);
+#endif
+
+#ifndef find_next_zero_bit
+unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits,
+ unsigned long start)
+{
+ return FIND_NEXT_BIT(~addr[idx], /* nop */, nbits, start);
+}
+EXPORT_SYMBOL(_find_next_zero_bit);
#endif
#ifndef find_last_bit
@@ -161,3 +215,38 @@ unsigned long find_next_clump8(unsigned long *clump, const unsigned long *addr,
return offset;
}
EXPORT_SYMBOL(find_next_clump8);
+
+#ifdef __BIG_ENDIAN
+
+#ifndef find_first_zero_bit_le
+/*
+ * Find the first cleared bit in an LE memory region.
+ */
+unsigned long _find_first_zero_bit_le(const unsigned long *addr, unsigned long size)
+{
+ return FIND_FIRST_BIT(~addr[idx], swab, size);
+}
+EXPORT_SYMBOL(_find_first_zero_bit_le);
+
+#endif
+
+#ifndef find_next_zero_bit_le
+unsigned long _find_next_zero_bit_le(const unsigned long *addr,
+ unsigned long size, unsigned long offset)
+{
+ return FIND_NEXT_BIT(~addr[idx], swab, size, offset);
+}
+EXPORT_SYMBOL(_find_next_zero_bit_le);
+#endif
+
+#ifndef find_next_bit_le
+unsigned long _find_next_bit_le(const unsigned long *addr,
+ unsigned long size, unsigned long offset)
+{
+ return FIND_NEXT_BIT(addr[idx], swab, size, offset);
+}
+EXPORT_SYMBOL(_find_next_bit_le);
+
+#endif
+
+#endif /* __BIG_ENDIAN */
diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c
index db904b57d4b8..7c3c011abd29 100644
--- a/lib/find_bit_benchmark.c
+++ b/lib/find_bit_benchmark.c
@@ -115,6 +115,22 @@ static int __init test_find_last_bit(const void *bitmap, unsigned long len)
return 0;
}
+static int __init test_find_nth_bit(const unsigned long *bitmap, unsigned long len)
+{
+ unsigned long l, n, w = bitmap_weight(bitmap, len);
+ ktime_t time;
+
+ time = ktime_get();
+ for (n = 0; n < w; n++) {
+ l = find_nth_bit(bitmap, len, n);
+ WARN_ON(l >= len);
+ }
+ time = ktime_get() - time;
+ pr_err("find_nth_bit: %18llu ns, %6ld iterations\n", time, w);
+
+ return 0;
+}
+
static int __init test_find_next_and_bit(const void *bitmap,
const void *bitmap2, unsigned long len)
{
@@ -142,6 +158,7 @@ static int __init find_bit_test(void)
test_find_next_bit(bitmap, BITMAP_LEN);
test_find_next_zero_bit(bitmap, BITMAP_LEN);
test_find_last_bit(bitmap, BITMAP_LEN);
+ test_find_nth_bit(bitmap, BITMAP_LEN / 10);
/*
* test_find_first_bit() may take some time, so
@@ -157,13 +174,14 @@ static int __init find_bit_test(void)
bitmap_zero(bitmap2, BITMAP_LEN);
while (nbits--) {
- __set_bit(prandom_u32() % BITMAP_LEN, bitmap);
- __set_bit(prandom_u32() % BITMAP_LEN, bitmap2);
+ __set_bit(prandom_u32_max(BITMAP_LEN), bitmap);
+ __set_bit(prandom_u32_max(BITMAP_LEN), bitmap2);
}
test_find_next_bit(bitmap, BITMAP_LEN);
test_find_next_zero_bit(bitmap, BITMAP_LEN);
test_find_last_bit(bitmap, BITMAP_LEN);
+ test_find_nth_bit(bitmap, BITMAP_LEN);
test_find_first_bit(bitmap, BITMAP_LEN);
test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN);
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
diff --git a/lib/iomap.c b/lib/iomap.c
index fbaa3e8f19d6..4f8b31baa575 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -6,6 +6,7 @@
*/
#include <linux/pci.h>
#include <linux/io.h>
+#include <linux/kmsan-checks.h>
#include <linux/export.h>
@@ -70,26 +71,35 @@ static void bad_io_access(unsigned long port, const char *access)
#define mmio_read64be(addr) swab64(readq(addr))
#endif
+/*
+ * Here and below, we apply __no_kmsan_checks to functions reading data from
+ * hardware, to ensure that KMSAN marks their return values as initialized.
+ */
+__no_kmsan_checks
unsigned int ioread8(const void __iomem *addr)
{
IO_COND(addr, return inb(port), return readb(addr));
return 0xff;
}
+__no_kmsan_checks
unsigned int ioread16(const void __iomem *addr)
{
IO_COND(addr, return inw(port), return readw(addr));
return 0xffff;
}
+__no_kmsan_checks
unsigned int ioread16be(const void __iomem *addr)
{
IO_COND(addr, return pio_read16be(port), return mmio_read16be(addr));
return 0xffff;
}
+__no_kmsan_checks
unsigned int ioread32(const void __iomem *addr)
{
IO_COND(addr, return inl(port), return readl(addr));
return 0xffffffff;
}
+__no_kmsan_checks
unsigned int ioread32be(const void __iomem *addr)
{
IO_COND(addr, return pio_read32be(port), return mmio_read32be(addr));
@@ -142,18 +152,21 @@ static u64 pio_read64be_hi_lo(unsigned long port)
return lo | (hi << 32);
}
+__no_kmsan_checks
u64 ioread64_lo_hi(const void __iomem *addr)
{
IO_COND(addr, return pio_read64_lo_hi(port), return readq(addr));
return 0xffffffffffffffffULL;
}
+__no_kmsan_checks
u64 ioread64_hi_lo(const void __iomem *addr)
{
IO_COND(addr, return pio_read64_hi_lo(port), return readq(addr));
return 0xffffffffffffffffULL;
}
+__no_kmsan_checks
u64 ioread64be_lo_hi(const void __iomem *addr)
{
IO_COND(addr, return pio_read64be_lo_hi(port),
@@ -161,6 +174,7 @@ u64 ioread64be_lo_hi(const void __iomem *addr)
return 0xffffffffffffffffULL;
}
+__no_kmsan_checks
u64 ioread64be_hi_lo(const void __iomem *addr)
{
IO_COND(addr, return pio_read64be_hi_lo(port),
@@ -188,22 +202,32 @@ EXPORT_SYMBOL(ioread64be_hi_lo);
void iowrite8(u8 val, void __iomem *addr)
{
+ /* Make sure uninitialized memory isn't copied to devices. */
+ kmsan_check_memory(&val, sizeof(val));
IO_COND(addr, outb(val,port), writeb(val, addr));
}
void iowrite16(u16 val, void __iomem *addr)
{
+ /* Make sure uninitialized memory isn't copied to devices. */
+ kmsan_check_memory(&val, sizeof(val));
IO_COND(addr, outw(val,port), writew(val, addr));
}
void iowrite16be(u16 val, void __iomem *addr)
{
+ /* Make sure uninitialized memory isn't copied to devices. */
+ kmsan_check_memory(&val, sizeof(val));
IO_COND(addr, pio_write16be(val,port), mmio_write16be(val, addr));
}
void iowrite32(u32 val, void __iomem *addr)
{
+ /* Make sure uninitialized memory isn't copied to devices. */
+ kmsan_check_memory(&val, sizeof(val));
IO_COND(addr, outl(val,port), writel(val, addr));
}
void iowrite32be(u32 val, void __iomem *addr)
{
+ /* Make sure uninitialized memory isn't copied to devices. */
+ kmsan_check_memory(&val, sizeof(val));
IO_COND(addr, pio_write32be(val,port), mmio_write32be(val, addr));
}
EXPORT_SYMBOL(iowrite8);
@@ -239,24 +263,32 @@ static void pio_write64be_hi_lo(u64 val, unsigned long port)
void iowrite64_lo_hi(u64 val, void __iomem *addr)
{
+ /* Make sure uninitialized memory isn't copied to devices. */
+ kmsan_check_memory(&val, sizeof(val));
IO_COND(addr, pio_write64_lo_hi(val, port),
writeq(val, addr));
}
void iowrite64_hi_lo(u64 val, void __iomem *addr)
{
+ /* Make sure uninitialized memory isn't copied to devices. */
+ kmsan_check_memory(&val, sizeof(val));
IO_COND(addr, pio_write64_hi_lo(val, port),
writeq(val, addr));
}
void iowrite64be_lo_hi(u64 val, void __iomem *addr)
{
+ /* Make sure uninitialized memory isn't copied to devices. */
+ kmsan_check_memory(&val, sizeof(val));
IO_COND(addr, pio_write64be_lo_hi(val, port),
mmio_write64be(val, addr));
}
void iowrite64be_hi_lo(u64 val, void __iomem *addr)
{
+ /* Make sure uninitialized memory isn't copied to devices. */
+ kmsan_check_memory(&val, sizeof(val));
IO_COND(addr, pio_write64be_hi_lo(val, port),
mmio_write64be(val, addr));
}
@@ -328,14 +360,20 @@ static inline void mmio_outsl(void __iomem *addr, const u32 *src, int count)
void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count)
{
IO_COND(addr, insb(port,dst,count), mmio_insb(addr, dst, count));
+ /* KMSAN must treat values read from devices as initialized. */
+ kmsan_unpoison_memory(dst, count);
}
void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count)
{
IO_COND(addr, insw(port,dst,count), mmio_insw(addr, dst, count));
+ /* KMSAN must treat values read from devices as initialized. */
+ kmsan_unpoison_memory(dst, count * 2);
}
void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count)
{
IO_COND(addr, insl(port,dst,count), mmio_insl(addr, dst, count));
+ /* KMSAN must treat values read from devices as initialized. */
+ kmsan_unpoison_memory(dst, count * 4);
}
EXPORT_SYMBOL(ioread8_rep);
EXPORT_SYMBOL(ioread16_rep);
@@ -343,14 +381,20 @@ EXPORT_SYMBOL(ioread32_rep);
void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
{
+ /* Make sure uninitialized memory isn't copied to devices. */
+ kmsan_check_memory(src, count);
IO_COND(addr, outsb(port, src, count), mmio_outsb(addr, src, count));
}
void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
{
+ /* Make sure uninitialized memory isn't copied to devices. */
+ kmsan_check_memory(src, count * 2);
IO_COND(addr, outsw(port, src, count), mmio_outsw(addr, src, count));
}
void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
{
+ /* Make sure uninitialized memory isn't copied to devices. */
+ kmsan_check_memory(src, count * 4);
IO_COND(addr, outsl(port, src,count), mmio_outsl(addr, src, count));
}
EXPORT_SYMBOL(iowrite8_rep);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 4b7fce72e3e5..c3ca28ca68a6 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -174,13 +174,16 @@ static int copyout(void __user *to, const void *from, size_t n)
static int copyin(void *to, const void __user *from, size_t n)
{
+ size_t res = n;
+
if (should_fail_usercopy())
return n;
if (access_ok(from, n)) {
- instrument_copy_from_user(to, from, n);
- n = raw_copy_from_user(to, from, n);
+ instrument_copy_from_user_before(to, from, n);
+ res = raw_copy_from_user(to, from, n);
+ instrument_copy_from_user_after(to, from, n, res);
}
- return n;
+ return res;
}
static inline struct pipe_buffer *pipe_buf(const struct pipe_inode_info *pipe,
diff --git a/lib/is_signed_type_kunit.c b/lib/is_signed_type_kunit.c
index 207207522925..0a7f6ae62839 100644
--- a/lib/is_signed_type_kunit.c
+++ b/lib/is_signed_type_kunit.c
@@ -21,11 +21,7 @@ static void is_signed_type_test(struct kunit *test)
KUNIT_EXPECT_EQ(test, is_signed_type(bool), false);
KUNIT_EXPECT_EQ(test, is_signed_type(signed char), true);
KUNIT_EXPECT_EQ(test, is_signed_type(unsigned char), false);
-#ifdef __CHAR_UNSIGNED__
KUNIT_EXPECT_EQ(test, is_signed_type(char), false);
-#else
- KUNIT_EXPECT_EQ(test, is_signed_type(char), true);
-#endif
KUNIT_EXPECT_EQ(test, is_signed_type(int), true);
KUNIT_EXPECT_EQ(test, is_signed_type(unsigned int), false);
KUNIT_EXPECT_EQ(test, is_signed_type(long), true);
diff --git a/lib/kobject.c b/lib/kobject.c
index 5f0e71ab292c..a0b2dbfcfa23 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -694,7 +694,7 @@ static void kobject_release(struct kref *kref)
{
struct kobject *kobj = container_of(kref, struct kobject, kref);
#ifdef CONFIG_DEBUG_KOBJECT_RELEASE
- unsigned long delay = HZ + HZ * (get_random_int() & 0x3);
+ unsigned long delay = HZ + HZ * prandom_u32_max(4);
pr_info("kobject: '%s' (%p): %s, parent %p (delayed %ld)\n",
kobject_name(kobj), kobj, __func__, kobj->parent, delay);
INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup);
diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c
index 13d0bd8b07a9..4df0335d0d06 100644
--- a/lib/kunit/kunit-test.c
+++ b/lib/kunit/kunit-test.c
@@ -161,6 +161,13 @@ static void kunit_resource_test_alloc_resource(struct kunit *test)
kunit_put_resource(res);
}
+static inline bool kunit_resource_instance_match(struct kunit *test,
+ struct kunit_resource *res,
+ void *match_data)
+{
+ return res->data == match_data;
+}
+
/*
* Note: tests below use kunit_alloc_and_get_resource(), so as a consequence
* they have a reference to the associated resource that they must release
diff --git a/lib/kunit/string-stream.c b/lib/kunit/string-stream.c
index 141789ca8949..a608746020a9 100644
--- a/lib/kunit/string-stream.c
+++ b/lib/kunit/string-stream.c
@@ -12,62 +12,29 @@
#include "string-stream.h"
-struct string_stream_fragment_alloc_context {
- struct kunit *test;
- int len;
- gfp_t gfp;
-};
-static int string_stream_fragment_init(struct kunit_resource *res,
- void *context)
+static struct string_stream_fragment *alloc_string_stream_fragment(
+ struct kunit *test, int len, gfp_t gfp)
{
- struct string_stream_fragment_alloc_context *ctx = context;
struct string_stream_fragment *frag;
- frag = kunit_kzalloc(ctx->test, sizeof(*frag), ctx->gfp);
+ frag = kunit_kzalloc(test, sizeof(*frag), gfp);
if (!frag)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- frag->test = ctx->test;
- frag->fragment = kunit_kmalloc(ctx->test, ctx->len, ctx->gfp);
+ frag->fragment = kunit_kmalloc(test, len, gfp);
if (!frag->fragment)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- res->data = frag;
-
- return 0;
+ return frag;
}
-static void string_stream_fragment_free(struct kunit_resource *res)
+static void string_stream_fragment_destroy(struct kunit *test,
+ struct string_stream_fragment *frag)
{
- struct string_stream_fragment *frag = res->data;
-
list_del(&frag->node);
- kunit_kfree(frag->test, frag->fragment);
- kunit_kfree(frag->test, frag);
-}
-
-static struct string_stream_fragment *alloc_string_stream_fragment(
- struct kunit *test, int len, gfp_t gfp)
-{
- struct string_stream_fragment_alloc_context context = {
- .test = test,
- .len = len,
- .gfp = gfp
- };
-
- return kunit_alloc_resource(test,
- string_stream_fragment_init,
- string_stream_fragment_free,
- gfp,
- &context);
-}
-
-static int string_stream_fragment_destroy(struct string_stream_fragment *frag)
-{
- return kunit_destroy_resource(frag->test,
- kunit_resource_instance_match,
- frag);
+ kunit_kfree(test, frag->fragment);
+ kunit_kfree(test, frag);
}
int string_stream_vadd(struct string_stream *stream,
@@ -89,8 +56,8 @@ int string_stream_vadd(struct string_stream *stream,
frag_container = alloc_string_stream_fragment(stream->test,
len,
stream->gfp);
- if (!frag_container)
- return -ENOMEM;
+ if (IS_ERR(frag_container))
+ return PTR_ERR(frag_container);
len = vsnprintf(frag_container->fragment, len, fmt, args);
spin_lock(&stream->lock);
@@ -122,7 +89,7 @@ static void string_stream_clear(struct string_stream *stream)
frag_container_safe,
&stream->fragments,
node) {
- string_stream_fragment_destroy(frag_container);
+ string_stream_fragment_destroy(stream->test, frag_container);
}
stream->length = 0;
spin_unlock(&stream->lock);
@@ -169,48 +136,23 @@ struct string_stream_alloc_context {
gfp_t gfp;
};
-static int string_stream_init(struct kunit_resource *res, void *context)
+struct string_stream *alloc_string_stream(struct kunit *test, gfp_t gfp)
{
struct string_stream *stream;
- struct string_stream_alloc_context *ctx = context;
- stream = kunit_kzalloc(ctx->test, sizeof(*stream), ctx->gfp);
+ stream = kunit_kzalloc(test, sizeof(*stream), gfp);
if (!stream)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- res->data = stream;
- stream->gfp = ctx->gfp;
- stream->test = ctx->test;
+ stream->gfp = gfp;
+ stream->test = test;
INIT_LIST_HEAD(&stream->fragments);
spin_lock_init(&stream->lock);
- return 0;
+ return stream;
}
-static void string_stream_free(struct kunit_resource *res)
+void string_stream_destroy(struct string_stream *stream)
{
- struct string_stream *stream = res->data;
-
string_stream_clear(stream);
}
-
-struct string_stream *alloc_string_stream(struct kunit *test, gfp_t gfp)
-{
- struct string_stream_alloc_context context = {
- .test = test,
- .gfp = gfp
- };
-
- return kunit_alloc_resource(test,
- string_stream_init,
- string_stream_free,
- gfp,
- &context);
-}
-
-int string_stream_destroy(struct string_stream *stream)
-{
- return kunit_destroy_resource(stream->test,
- kunit_resource_instance_match,
- stream);
-}
diff --git a/lib/kunit/string-stream.h b/lib/kunit/string-stream.h
index 43f9508a55b4..b669f9a75a94 100644
--- a/lib/kunit/string-stream.h
+++ b/lib/kunit/string-stream.h
@@ -14,7 +14,6 @@
#include <linux/stdarg.h>
struct string_stream_fragment {
- struct kunit *test;
struct list_head node;
char *fragment;
};
@@ -46,6 +45,6 @@ int string_stream_append(struct string_stream *stream,
bool string_stream_is_empty(struct string_stream *stream);
-int string_stream_destroy(struct string_stream *stream);
+void string_stream_destroy(struct string_stream *stream);
#endif /* _KUNIT_STRING_STREAM_H */
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index 1e54373309a4..2a6992fe7c3e 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -258,14 +258,14 @@ static void kunit_print_string_stream(struct kunit *test,
static void kunit_fail(struct kunit *test, const struct kunit_loc *loc,
enum kunit_assert_type type, const struct kunit_assert *assert,
- const struct va_format *message)
+ assert_format_t assert_format, const struct va_format *message)
{
struct string_stream *stream;
kunit_set_failure(test);
stream = alloc_string_stream(test, GFP_KERNEL);
- if (!stream) {
+ if (IS_ERR(stream)) {
WARN(true,
"Could not allocate stream to print failed assertion in %s:%d\n",
loc->file,
@@ -274,11 +274,11 @@ static void kunit_fail(struct kunit *test, const struct kunit_loc *loc,
}
kunit_assert_prologue(loc, type, stream);
- assert->format(assert, message, stream);
+ assert_format(assert, message, stream);
kunit_print_string_stream(test, stream);
- WARN_ON(string_stream_destroy(stream));
+ string_stream_destroy(stream);
}
static void __noreturn kunit_abort(struct kunit *test)
@@ -298,6 +298,7 @@ void kunit_do_failed_assertion(struct kunit *test,
const struct kunit_loc *loc,
enum kunit_assert_type type,
const struct kunit_assert *assert,
+ assert_format_t assert_format,
const char *fmt, ...)
{
va_list args;
@@ -307,7 +308,7 @@ void kunit_do_failed_assertion(struct kunit *test,
message.fmt = fmt;
message.va = &args;
- kunit_fail(test, loc, type, assert, &message);
+ kunit_fail(test, loc, type, assert, assert_format, &message);
va_end(args);
@@ -713,21 +714,20 @@ void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t gfp)
}
EXPORT_SYMBOL_GPL(kunit_kmalloc_array);
-void kunit_kfree(struct kunit *test, const void *ptr)
+static inline bool kunit_kfree_match(struct kunit *test,
+ struct kunit_resource *res, void *match_data)
{
- struct kunit_resource *res;
-
- res = kunit_find_resource(test, kunit_resource_instance_match,
- (void *)ptr);
-
- /*
- * Removing the resource from the list of resources drops the
- * reference count to 1; the final put will trigger the free.
- */
- kunit_remove_resource(test, res);
+ /* Only match resources allocated with kunit_kmalloc() and friends. */
+ return res->free == kunit_kmalloc_array_free && res->data == match_data;
+}
- kunit_put_resource(res);
+void kunit_kfree(struct kunit *test, const void *ptr)
+{
+ if (!ptr)
+ return;
+ if (kunit_destroy_resource(test, kunit_kfree_match, (void *)ptr))
+ KUNIT_FAIL(test, "kunit_kfree: %px already freed or not allocated by kunit", ptr);
}
EXPORT_SYMBOL_GPL(kunit_kfree);
diff --git a/lib/llist.c b/lib/llist.c
index 611ce4881a87..7d78b736e8af 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -30,7 +30,7 @@ bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
do {
new_last->next = first = READ_ONCE(head->first);
- } while (cmpxchg(&head->first, first, new_first) != first);
+ } while (!try_cmpxchg(&head->first, &first, new_first));
return !first;
}
@@ -52,18 +52,14 @@ EXPORT_SYMBOL_GPL(llist_add_batch);
*/
struct llist_node *llist_del_first(struct llist_head *head)
{
- struct llist_node *entry, *old_entry, *next;
+ struct llist_node *entry, *next;
entry = smp_load_acquire(&head->first);
- for (;;) {
+ do {
if (entry == NULL)
return NULL;
- old_entry = entry;
next = READ_ONCE(entry->next);
- entry = cmpxchg(&head->first, old_entry, next);
- if (entry == old_entry)
- break;
- }
+ } while (!try_cmpxchg(&head->first, &entry, next));
return entry;
}
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
new file mode 100644
index 000000000000..df352f6ccc24
--- /dev/null
+++ b/lib/maple_tree.c
@@ -0,0 +1,7152 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Maple Tree implementation
+ * Copyright (c) 2018-2022 Oracle Corporation
+ * Authors: Liam R. Howlett <Liam.Howlett@oracle.com>
+ * Matthew Wilcox <willy@infradead.org>
+ */
+
+/*
+ * DOC: Interesting implementation details of the Maple Tree
+ *
+ * Each node type has a number of slots for entries and a number of slots for
+ * pivots. In the case of dense nodes, the pivots are implied by the position
+ * and are simply the slot index + the minimum of the node.
+ *
+ * In regular B-Tree terms, pivots are called keys. The term pivot is used to
+ * indicate that the tree is specifying ranges, Pivots may appear in the
+ * subtree with an entry attached to the value where as keys are unique to a
+ * specific position of a B-tree. Pivot values are inclusive of the slot with
+ * the same index.
+ *
+ *
+ * The following illustrates the layout of a range64 nodes slots and pivots.
+ *
+ *
+ * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 |
+ * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬
+ * │ │ │ │ │ │ │ │ └─ Implied maximum
+ * │ │ │ │ │ │ │ └─ Pivot 14
+ * │ │ │ │ │ │ └─ Pivot 13
+ * │ │ │ │ │ └─ Pivot 12
+ * │ │ │ │ └─ Pivot 11
+ * │ │ │ └─ Pivot 2
+ * │ │ └─ Pivot 1
+ * │ └─ Pivot 0
+ * └─ Implied minimum
+ *
+ * Slot contents:
+ * Internal (non-leaf) nodes contain pointers to other nodes.
+ * Leaf nodes contain entries.
+ *
+ * The location of interest is often referred to as an offset. All offsets have
+ * a slot, but the last offset has an implied pivot from the node above (or
+ * UINT_MAX for the root node.
+ *
+ * Ranges complicate certain write activities. When modifying any of
+ * the B-tree variants, it is known that one entry will either be added or
+ * deleted. When modifying the Maple Tree, one store operation may overwrite
+ * the entire data set, or one half of the tree, or the middle half of the tree.
+ *
+ */
+
+
+#include <linux/maple_tree.h>
+#include <linux/xarray.h>
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/limits.h>
+#include <asm/barrier.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/maple_tree.h>
+
+#define MA_ROOT_PARENT 1
+
+/*
+ * Maple state flags
+ * * MA_STATE_BULK - Bulk insert mode
+ * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert
+ * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation
+ */
+#define MA_STATE_BULK 1
+#define MA_STATE_REBALANCE 2
+#define MA_STATE_PREALLOC 4
+
+#define ma_parent_ptr(x) ((struct maple_pnode *)(x))
+#define ma_mnode_ptr(x) ((struct maple_node *)(x))
+#define ma_enode_ptr(x) ((struct maple_enode *)(x))
+static struct kmem_cache *maple_node_cache;
+
+#ifdef CONFIG_DEBUG_MAPLE_TREE
+static const unsigned long mt_max[] = {
+ [maple_dense] = MAPLE_NODE_SLOTS,
+ [maple_leaf_64] = ULONG_MAX,
+ [maple_range_64] = ULONG_MAX,
+ [maple_arange_64] = ULONG_MAX,
+};
+#define mt_node_max(x) mt_max[mte_node_type(x)]
+#endif
+
+static const unsigned char mt_slots[] = {
+ [maple_dense] = MAPLE_NODE_SLOTS,
+ [maple_leaf_64] = MAPLE_RANGE64_SLOTS,
+ [maple_range_64] = MAPLE_RANGE64_SLOTS,
+ [maple_arange_64] = MAPLE_ARANGE64_SLOTS,
+};
+#define mt_slot_count(x) mt_slots[mte_node_type(x)]
+
+static const unsigned char mt_pivots[] = {
+ [maple_dense] = 0,
+ [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1,
+ [maple_range_64] = MAPLE_RANGE64_SLOTS - 1,
+ [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1,
+};
+#define mt_pivot_count(x) mt_pivots[mte_node_type(x)]
+
+static const unsigned char mt_min_slots[] = {
+ [maple_dense] = MAPLE_NODE_SLOTS / 2,
+ [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2,
+ [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2,
+ [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1,
+};
+#define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)]
+
+#define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2)
+#define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1)
+
+struct maple_big_node {
+ struct maple_pnode *parent;
+ unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1];
+ union {
+ struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS];
+ struct {
+ unsigned long padding[MAPLE_BIG_NODE_GAPS];
+ unsigned long gap[MAPLE_BIG_NODE_GAPS];
+ };
+ };
+ unsigned char b_end;
+ enum maple_type type;
+};
+
+/*
+ * The maple_subtree_state is used to build a tree to replace a segment of an
+ * existing tree in a more atomic way. Any walkers of the older tree will hit a
+ * dead node and restart on updates.
+ */
+struct maple_subtree_state {
+ struct ma_state *orig_l; /* Original left side of subtree */
+ struct ma_state *orig_r; /* Original right side of subtree */
+ struct ma_state *l; /* New left side of subtree */
+ struct ma_state *m; /* New middle of subtree (rare) */
+ struct ma_state *r; /* New right side of subtree */
+ struct ma_topiary *free; /* nodes to be freed */
+ struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */
+ struct maple_big_node *bn;
+};
+
+/* Functions */
+static inline struct maple_node *mt_alloc_one(gfp_t gfp)
+{
+ return kmem_cache_alloc(maple_node_cache, gfp | __GFP_ZERO);
+}
+
+static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
+{
+ return kmem_cache_alloc_bulk(maple_node_cache, gfp | __GFP_ZERO, size,
+ nodes);
+}
+
+static inline void mt_free_bulk(size_t size, void __rcu **nodes)
+{
+ kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
+}
+
+static void mt_free_rcu(struct rcu_head *head)
+{
+ struct maple_node *node = container_of(head, struct maple_node, rcu);
+
+ kmem_cache_free(maple_node_cache, node);
+}
+
+/*
+ * ma_free_rcu() - Use rcu callback to free a maple node
+ * @node: The node to free
+ *
+ * The maple tree uses the parent pointer to indicate this node is no longer in
+ * use and will be freed.
+ */
+static void ma_free_rcu(struct maple_node *node)
+{
+ node->parent = ma_parent_ptr(node);
+ call_rcu(&node->rcu, mt_free_rcu);
+}
+
+
+static void mas_set_height(struct ma_state *mas)
+{
+ unsigned int new_flags = mas->tree->ma_flags;
+
+ new_flags &= ~MT_FLAGS_HEIGHT_MASK;
+ BUG_ON(mas->depth > MAPLE_HEIGHT_MAX);
+ new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET;
+ mas->tree->ma_flags = new_flags;
+}
+
+static unsigned int mas_mt_height(struct ma_state *mas)
+{
+ return mt_height(mas->tree);
+}
+
+static inline enum maple_type mte_node_type(const struct maple_enode *entry)
+{
+ return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) &
+ MAPLE_NODE_TYPE_MASK;
+}
+
+static inline bool ma_is_dense(const enum maple_type type)
+{
+ return type < maple_leaf_64;
+}
+
+static inline bool ma_is_leaf(const enum maple_type type)
+{
+ return type < maple_range_64;
+}
+
+static inline bool mte_is_leaf(const struct maple_enode *entry)
+{
+ return ma_is_leaf(mte_node_type(entry));
+}
+
+/*
+ * We also reserve values with the bottom two bits set to '10' which are
+ * below 4096
+ */
+static inline bool mt_is_reserved(const void *entry)
+{
+ return ((unsigned long)entry < MAPLE_RESERVED_RANGE) &&
+ xa_is_internal(entry);
+}
+
+static inline void mas_set_err(struct ma_state *mas, long err)
+{
+ mas->node = MA_ERROR(err);
+}
+
+static inline bool mas_is_ptr(struct ma_state *mas)
+{
+ return mas->node == MAS_ROOT;
+}
+
+static inline bool mas_is_start(struct ma_state *mas)
+{
+ return mas->node == MAS_START;
+}
+
+bool mas_is_err(struct ma_state *mas)
+{
+ return xa_is_err(mas->node);
+}
+
+static inline bool mas_searchable(struct ma_state *mas)
+{
+ if (mas_is_none(mas))
+ return false;
+
+ if (mas_is_ptr(mas))
+ return false;
+
+ return true;
+}
+
+static inline struct maple_node *mte_to_node(const struct maple_enode *entry)
+{
+ return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK);
+}
+
+/*
+ * mte_to_mat() - Convert a maple encoded node to a maple topiary node.
+ * @entry: The maple encoded node
+ *
+ * Return: a maple topiary pointer
+ */
+static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry)
+{
+ return (struct maple_topiary *)
+ ((unsigned long)entry & ~MAPLE_NODE_MASK);
+}
+
+/*
+ * mas_mn() - Get the maple state node.
+ * @mas: The maple state
+ *
+ * Return: the maple node (not encoded - bare pointer).
+ */
+static inline struct maple_node *mas_mn(const struct ma_state *mas)
+{
+ return mte_to_node(mas->node);
+}
+
+/*
+ * mte_set_node_dead() - Set a maple encoded node as dead.
+ * @mn: The maple encoded node.
+ */
+static inline void mte_set_node_dead(struct maple_enode *mn)
+{
+ mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn));
+ smp_wmb(); /* Needed for RCU */
+}
+
+/* Bit 1 indicates the root is a node */
+#define MAPLE_ROOT_NODE 0x02
+/* maple_type stored bit 3-6 */
+#define MAPLE_ENODE_TYPE_SHIFT 0x03
+/* Bit 2 means a NULL somewhere below */
+#define MAPLE_ENODE_NULL 0x04
+
+static inline struct maple_enode *mt_mk_node(const struct maple_node *node,
+ enum maple_type type)
+{
+ return (void *)((unsigned long)node |
+ (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL);
+}
+
+static inline void *mte_mk_root(const struct maple_enode *node)
+{
+ return (void *)((unsigned long)node | MAPLE_ROOT_NODE);
+}
+
+static inline void *mte_safe_root(const struct maple_enode *node)
+{
+ return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE);
+}
+
+static inline void mte_set_full(const struct maple_enode *node)
+{
+ node = (void *)((unsigned long)node & ~MAPLE_ENODE_NULL);
+}
+
+static inline void mte_clear_full(const struct maple_enode *node)
+{
+ node = (void *)((unsigned long)node | MAPLE_ENODE_NULL);
+}
+
+static inline bool ma_is_root(struct maple_node *node)
+{
+ return ((unsigned long)node->parent & MA_ROOT_PARENT);
+}
+
+static inline bool mte_is_root(const struct maple_enode *node)
+{
+ return ma_is_root(mte_to_node(node));
+}
+
+static inline bool mas_is_root_limits(const struct ma_state *mas)
+{
+ return !mas->min && mas->max == ULONG_MAX;
+}
+
+static inline bool mt_is_alloc(struct maple_tree *mt)
+{
+ return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE);
+}
+
+/*
+ * The Parent Pointer
+ * Excluding root, the parent pointer is 256B aligned like all other tree nodes.
+ * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16
+ * bit values need an extra bit to store the offset. This extra bit comes from
+ * a reuse of the last bit in the node type. This is possible by using bit 1 to
+ * indicate if bit 2 is part of the type or the slot.
+ *
+ * Note types:
+ * 0x??1 = Root
+ * 0x?00 = 16 bit nodes
+ * 0x010 = 32 bit nodes
+ * 0x110 = 64 bit nodes
+ *
+ * Slot size and alignment
+ * 0b??1 : Root
+ * 0b?00 : 16 bit values, type in 0-1, slot in 2-7
+ * 0b010 : 32 bit values, type in 0-2, slot in 3-7
+ * 0b110 : 64 bit values, type in 0-2, slot in 3-7
+ */
+
+#define MAPLE_PARENT_ROOT 0x01
+
+#define MAPLE_PARENT_SLOT_SHIFT 0x03
+#define MAPLE_PARENT_SLOT_MASK 0xF8
+
+#define MAPLE_PARENT_16B_SLOT_SHIFT 0x02
+#define MAPLE_PARENT_16B_SLOT_MASK 0xFC
+
+#define MAPLE_PARENT_RANGE64 0x06
+#define MAPLE_PARENT_RANGE32 0x04
+#define MAPLE_PARENT_NOT_RANGE16 0x02
+
+/*
+ * mte_parent_shift() - Get the parent shift for the slot storage.
+ * @parent: The parent pointer cast as an unsigned long
+ * Return: The shift into that pointer to the star to of the slot
+ */
+static inline unsigned long mte_parent_shift(unsigned long parent)
+{
+ /* Note bit 1 == 0 means 16B */
+ if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
+ return MAPLE_PARENT_SLOT_SHIFT;
+
+ return MAPLE_PARENT_16B_SLOT_SHIFT;
+}
+
+/*
+ * mte_parent_slot_mask() - Get the slot mask for the parent.
+ * @parent: The parent pointer cast as an unsigned long.
+ * Return: The slot mask for that parent.
+ */
+static inline unsigned long mte_parent_slot_mask(unsigned long parent)
+{
+ /* Note bit 1 == 0 means 16B */
+ if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
+ return MAPLE_PARENT_SLOT_MASK;
+
+ return MAPLE_PARENT_16B_SLOT_MASK;
+}
+
+/*
+ * mas_parent_enum() - Return the maple_type of the parent from the stored
+ * parent type.
+ * @mas: The maple state
+ * @node: The maple_enode to extract the parent's enum
+ * Return: The node->parent maple_type
+ */
+static inline
+enum maple_type mte_parent_enum(struct maple_enode *p_enode,
+ struct maple_tree *mt)
+{
+ unsigned long p_type;
+
+ p_type = (unsigned long)p_enode;
+ if (p_type & MAPLE_PARENT_ROOT)
+ return 0; /* Validated in the caller. */
+
+ p_type &= MAPLE_NODE_MASK;
+ p_type = p_type & ~(MAPLE_PARENT_ROOT | mte_parent_slot_mask(p_type));
+
+ switch (p_type) {
+ case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */
+ if (mt_is_alloc(mt))
+ return maple_arange_64;
+ return maple_range_64;
+ }
+
+ return 0;
+}
+
+static inline
+enum maple_type mas_parent_enum(struct ma_state *mas, struct maple_enode *enode)
+{
+ return mte_parent_enum(ma_enode_ptr(mte_to_node(enode)->parent), mas->tree);
+}
+
+/*
+ * mte_set_parent() - Set the parent node and encode the slot
+ * @enode: The encoded maple node.
+ * @parent: The encoded maple node that is the parent of @enode.
+ * @slot: The slot that @enode resides in @parent.
+ *
+ * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the
+ * parent type.
+ */
+static inline
+void mte_set_parent(struct maple_enode *enode, const struct maple_enode *parent,
+ unsigned char slot)
+{
+ unsigned long val = (unsigned long) parent;
+ unsigned long shift;
+ unsigned long type;
+ enum maple_type p_type = mte_node_type(parent);
+
+ BUG_ON(p_type == maple_dense);
+ BUG_ON(p_type == maple_leaf_64);
+
+ switch (p_type) {
+ case maple_range_64:
+ case maple_arange_64:
+ shift = MAPLE_PARENT_SLOT_SHIFT;
+ type = MAPLE_PARENT_RANGE64;
+ break;
+ default:
+ case maple_dense:
+ case maple_leaf_64:
+ shift = type = 0;
+ break;
+ }
+
+ val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */
+ val |= (slot << shift) | type;
+ mte_to_node(enode)->parent = ma_parent_ptr(val);
+}
+
+/*
+ * mte_parent_slot() - get the parent slot of @enode.
+ * @enode: The encoded maple node.
+ *
+ * Return: The slot in the parent node where @enode resides.
+ */
+static inline unsigned int mte_parent_slot(const struct maple_enode *enode)
+{
+ unsigned long val = (unsigned long) mte_to_node(enode)->parent;
+
+ /* Root. */
+ if (val & 1)
+ return 0;
+
+ /*
+ * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost
+ * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT
+ */
+ return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val);
+}
+
+/*
+ * mte_parent() - Get the parent of @node.
+ * @node: The encoded maple node.
+ *
+ * Return: The parent maple node.
+ */
+static inline struct maple_node *mte_parent(const struct maple_enode *enode)
+{
+ return (void *)((unsigned long)
+ (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK);
+}
+
+/*
+ * ma_dead_node() - check if the @enode is dead.
+ * @enode: The encoded maple node
+ *
+ * Return: true if dead, false otherwise.
+ */
+static inline bool ma_dead_node(const struct maple_node *node)
+{
+ struct maple_node *parent = (void *)((unsigned long)
+ node->parent & ~MAPLE_NODE_MASK);
+
+ return (parent == node);
+}
+/*
+ * mte_dead_node() - check if the @enode is dead.
+ * @enode: The encoded maple node
+ *
+ * Return: true if dead, false otherwise.
+ */
+static inline bool mte_dead_node(const struct maple_enode *enode)
+{
+ struct maple_node *parent, *node;
+
+ node = mte_to_node(enode);
+ parent = mte_parent(enode);
+ return (parent == node);
+}
+
+/*
+ * mas_allocated() - Get the number of nodes allocated in a maple state.
+ * @mas: The maple state
+ *
+ * The ma_state alloc member is overloaded to hold a pointer to the first
+ * allocated node or to the number of requested nodes to allocate. If bit 0 is
+ * set, then the alloc contains the number of requested nodes. If there is an
+ * allocated node, then the total allocated nodes is in that node.
+ *
+ * Return: The total number of nodes allocated
+ */
+static inline unsigned long mas_allocated(const struct ma_state *mas)
+{
+ if (!mas->alloc || ((unsigned long)mas->alloc & 0x1))
+ return 0;
+
+ return mas->alloc->total;
+}
+
+/*
+ * mas_set_alloc_req() - Set the requested number of allocations.
+ * @mas: the maple state
+ * @count: the number of allocations.
+ *
+ * The requested number of allocations is either in the first allocated node,
+ * located in @mas->alloc->request_count, or directly in @mas->alloc if there is
+ * no allocated node. Set the request either in the node or do the necessary
+ * encoding to store in @mas->alloc directly.
+ */
+static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count)
+{
+ if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) {
+ if (!count)
+ mas->alloc = NULL;
+ else
+ mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U);
+ return;
+ }
+
+ mas->alloc->request_count = count;
+}
+
+/*
+ * mas_alloc_req() - get the requested number of allocations.
+ * @mas: The maple state
+ *
+ * The alloc count is either stored directly in @mas, or in
+ * @mas->alloc->request_count if there is at least one node allocated. Decode
+ * the request count if it's stored directly in @mas->alloc.
+ *
+ * Return: The allocation request count.
+ */
+static inline unsigned int mas_alloc_req(const struct ma_state *mas)
+{
+ if ((unsigned long)mas->alloc & 0x1)
+ return (unsigned long)(mas->alloc) >> 1;
+ else if (mas->alloc)
+ return mas->alloc->request_count;
+ return 0;
+}
+
+/*
+ * ma_pivots() - Get a pointer to the maple node pivots.
+ * @node - the maple node
+ * @type - the node type
+ *
+ * Return: A pointer to the maple node pivots
+ */
+static inline unsigned long *ma_pivots(struct maple_node *node,
+ enum maple_type type)
+{
+ switch (type) {
+ case maple_arange_64:
+ return node->ma64.pivot;
+ case maple_range_64:
+ case maple_leaf_64:
+ return node->mr64.pivot;
+ case maple_dense:
+ return NULL;
+ }
+ return NULL;
+}
+
+/*
+ * ma_gaps() - Get a pointer to the maple node gaps.
+ * @node - the maple node
+ * @type - the node type
+ *
+ * Return: A pointer to the maple node gaps
+ */
+static inline unsigned long *ma_gaps(struct maple_node *node,
+ enum maple_type type)
+{
+ switch (type) {
+ case maple_arange_64:
+ return node->ma64.gap;
+ case maple_range_64:
+ case maple_leaf_64:
+ case maple_dense:
+ return NULL;
+ }
+ return NULL;
+}
+
+/*
+ * mte_pivot() - Get the pivot at @piv of the maple encoded node.
+ * @mn: The maple encoded node.
+ * @piv: The pivot.
+ *
+ * Return: the pivot at @piv of @mn.
+ */
+static inline unsigned long mte_pivot(const struct maple_enode *mn,
+ unsigned char piv)
+{
+ struct maple_node *node = mte_to_node(mn);
+
+ if (piv >= mt_pivots[piv]) {
+ WARN_ON(1);
+ return 0;
+ }
+ switch (mte_node_type(mn)) {
+ case maple_arange_64:
+ return node->ma64.pivot[piv];
+ case maple_range_64:
+ case maple_leaf_64:
+ return node->mr64.pivot[piv];
+ case maple_dense:
+ return 0;
+ }
+ return 0;
+}
+
+/*
+ * mas_safe_pivot() - get the pivot at @piv or mas->max.
+ * @mas: The maple state
+ * @pivots: The pointer to the maple node pivots
+ * @piv: The pivot to fetch
+ * @type: The maple node type
+ *
+ * Return: The pivot at @piv within the limit of the @pivots array, @mas->max
+ * otherwise.
+ */
+static inline unsigned long
+mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots,
+ unsigned char piv, enum maple_type type)
+{
+ if (piv >= mt_pivots[type])
+ return mas->max;
+
+ return pivots[piv];
+}
+
+/*
+ * mas_safe_min() - Return the minimum for a given offset.
+ * @mas: The maple state
+ * @pivots: The pointer to the maple node pivots
+ * @offset: The offset into the pivot array
+ *
+ * Return: The minimum range value that is contained in @offset.
+ */
+static inline unsigned long
+mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset)
+{
+ if (likely(offset))
+ return pivots[offset - 1] + 1;
+
+ return mas->min;
+}
+
+/*
+ * mas_logical_pivot() - Get the logical pivot of a given offset.
+ * @mas: The maple state
+ * @pivots: The pointer to the maple node pivots
+ * @offset: The offset into the pivot array
+ * @type: The maple node type
+ *
+ * When there is no value at a pivot (beyond the end of the data), then the
+ * pivot is actually @mas->max.
+ *
+ * Return: the logical pivot of a given @offset.
+ */
+static inline unsigned long
+mas_logical_pivot(struct ma_state *mas, unsigned long *pivots,
+ unsigned char offset, enum maple_type type)
+{
+ unsigned long lpiv = mas_safe_pivot(mas, pivots, offset, type);
+
+ if (likely(lpiv))
+ return lpiv;
+
+ if (likely(offset))
+ return mas->max;
+
+ return lpiv;
+}
+
+/*
+ * mte_set_pivot() - Set a pivot to a value in an encoded maple node.
+ * @mn: The encoded maple node
+ * @piv: The pivot offset
+ * @val: The value of the pivot
+ */
+static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv,
+ unsigned long val)
+{
+ struct maple_node *node = mte_to_node(mn);
+ enum maple_type type = mte_node_type(mn);
+
+ BUG_ON(piv >= mt_pivots[type]);
+ switch (type) {
+ default:
+ case maple_range_64:
+ case maple_leaf_64:
+ node->mr64.pivot[piv] = val;
+ break;
+ case maple_arange_64:
+ node->ma64.pivot[piv] = val;
+ break;
+ case maple_dense:
+ break;
+ }
+
+}
+
+/*
+ * ma_slots() - Get a pointer to the maple node slots.
+ * @mn: The maple node
+ * @mt: The maple node type
+ *
+ * Return: A pointer to the maple node slots
+ */
+static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt)
+{
+ switch (mt) {
+ default:
+ case maple_arange_64:
+ return mn->ma64.slot;
+ case maple_range_64:
+ case maple_leaf_64:
+ return mn->mr64.slot;
+ case maple_dense:
+ return mn->slot;
+ }
+}
+
+static inline bool mt_locked(const struct maple_tree *mt)
+{
+ return mt_external_lock(mt) ? mt_lock_is_held(mt) :
+ lockdep_is_held(&mt->ma_lock);
+}
+
+static inline void *mt_slot(const struct maple_tree *mt,
+ void __rcu **slots, unsigned char offset)
+{
+ return rcu_dereference_check(slots[offset], mt_locked(mt));
+}
+
+/*
+ * mas_slot_locked() - Get the slot value when holding the maple tree lock.
+ * @mas: The maple state
+ * @slots: The pointer to the slots
+ * @offset: The offset into the slots array to fetch
+ *
+ * Return: The entry stored in @slots at the @offset.
+ */
+static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots,
+ unsigned char offset)
+{
+ return rcu_dereference_protected(slots[offset], mt_locked(mas->tree));
+}
+
+/*
+ * mas_slot() - Get the slot value when not holding the maple tree lock.
+ * @mas: The maple state
+ * @slots: The pointer to the slots
+ * @offset: The offset into the slots array to fetch
+ *
+ * Return: The entry stored in @slots at the @offset
+ */
+static inline void *mas_slot(struct ma_state *mas, void __rcu **slots,
+ unsigned char offset)
+{
+ return mt_slot(mas->tree, slots, offset);
+}
+
+/*
+ * mas_root() - Get the maple tree root.
+ * @mas: The maple state.
+ *
+ * Return: The pointer to the root of the tree
+ */
+static inline void *mas_root(struct ma_state *mas)
+{
+ return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree));
+}
+
+static inline void *mt_root_locked(struct maple_tree *mt)
+{
+ return rcu_dereference_protected(mt->ma_root, mt_locked(mt));
+}
+
+/*
+ * mas_root_locked() - Get the maple tree root when holding the maple tree lock.
+ * @mas: The maple state.
+ *
+ * Return: The pointer to the root of the tree
+ */
+static inline void *mas_root_locked(struct ma_state *mas)
+{
+ return mt_root_locked(mas->tree);
+}
+
+static inline struct maple_metadata *ma_meta(struct maple_node *mn,
+ enum maple_type mt)
+{
+ switch (mt) {
+ case maple_arange_64:
+ return &mn->ma64.meta;
+ default:
+ return &mn->mr64.meta;
+ }
+}
+
+/*
+ * ma_set_meta() - Set the metadata information of a node.
+ * @mn: The maple node
+ * @mt: The maple node type
+ * @offset: The offset of the highest sub-gap in this node.
+ * @end: The end of the data in this node.
+ */
+static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
+ unsigned char offset, unsigned char end)
+{
+ struct maple_metadata *meta = ma_meta(mn, mt);
+
+ meta->gap = offset;
+ meta->end = end;
+}
+
+/*
+ * ma_meta_end() - Get the data end of a node from the metadata
+ * @mn: The maple node
+ * @mt: The maple node type
+ */
+static inline unsigned char ma_meta_end(struct maple_node *mn,
+ enum maple_type mt)
+{
+ struct maple_metadata *meta = ma_meta(mn, mt);
+
+ return meta->end;
+}
+
+/*
+ * ma_meta_gap() - Get the largest gap location of a node from the metadata
+ * @mn: The maple node
+ * @mt: The maple node type
+ */
+static inline unsigned char ma_meta_gap(struct maple_node *mn,
+ enum maple_type mt)
+{
+ BUG_ON(mt != maple_arange_64);
+
+ return mn->ma64.meta.gap;
+}
+
+/*
+ * ma_set_meta_gap() - Set the largest gap location in a nodes metadata
+ * @mn: The maple node
+ * @mn: The maple node type
+ * @offset: The location of the largest gap.
+ */
+static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt,
+ unsigned char offset)
+{
+
+ struct maple_metadata *meta = ma_meta(mn, mt);
+
+ meta->gap = offset;
+}
+
+/*
+ * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes.
+ * @mat - the ma_topiary, a linked list of dead nodes.
+ * @dead_enode - the node to be marked as dead and added to the tail of the list
+ *
+ * Add the @dead_enode to the linked list in @mat.
+ */
+static inline void mat_add(struct ma_topiary *mat,
+ struct maple_enode *dead_enode)
+{
+ mte_set_node_dead(dead_enode);
+ mte_to_mat(dead_enode)->next = NULL;
+ if (!mat->tail) {
+ mat->tail = mat->head = dead_enode;
+ return;
+ }
+
+ mte_to_mat(mat->tail)->next = dead_enode;
+ mat->tail = dead_enode;
+}
+
+static void mte_destroy_walk(struct maple_enode *, struct maple_tree *);
+static inline void mas_free(struct ma_state *mas, struct maple_enode *used);
+
+/*
+ * mas_mat_free() - Free all nodes in a dead list.
+ * @mas - the maple state
+ * @mat - the ma_topiary linked list of dead nodes to free.
+ *
+ * Free walk a dead list.
+ */
+static void mas_mat_free(struct ma_state *mas, struct ma_topiary *mat)
+{
+ struct maple_enode *next;
+
+ while (mat->head) {
+ next = mte_to_mat(mat->head)->next;
+ mas_free(mas, mat->head);
+ mat->head = next;
+ }
+}
+
+/*
+ * mas_mat_destroy() - Free all nodes and subtrees in a dead list.
+ * @mas - the maple state
+ * @mat - the ma_topiary linked list of dead nodes to free.
+ *
+ * Destroy walk a dead list.
+ */
+static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat)
+{
+ struct maple_enode *next;
+
+ while (mat->head) {
+ next = mte_to_mat(mat->head)->next;
+ mte_destroy_walk(mat->head, mat->mtree);
+ mat->head = next;
+ }
+}
+/*
+ * mas_descend() - Descend into the slot stored in the ma_state.
+ * @mas - the maple state.
+ *
+ * Note: Not RCU safe, only use in write side or debug code.
+ */
+static inline void mas_descend(struct ma_state *mas)
+{
+ enum maple_type type;
+ unsigned long *pivots;
+ struct maple_node *node;
+ void __rcu **slots;
+
+ node = mas_mn(mas);
+ type = mte_node_type(mas->node);
+ pivots = ma_pivots(node, type);
+ slots = ma_slots(node, type);
+
+ if (mas->offset)
+ mas->min = pivots[mas->offset - 1] + 1;
+ mas->max = mas_safe_pivot(mas, pivots, mas->offset, type);
+ mas->node = mas_slot(mas, slots, mas->offset);
+}
+
+/*
+ * mte_set_gap() - Set a maple node gap.
+ * @mn: The encoded maple node
+ * @gap: The offset of the gap to set
+ * @val: The gap value
+ */
+static inline void mte_set_gap(const struct maple_enode *mn,
+ unsigned char gap, unsigned long val)
+{
+ switch (mte_node_type(mn)) {
+ default:
+ break;
+ case maple_arange_64:
+ mte_to_node(mn)->ma64.gap[gap] = val;
+ break;
+ }
+}
+
+/*
+ * mas_ascend() - Walk up a level of the tree.
+ * @mas: The maple state
+ *
+ * Sets the @mas->max and @mas->min to the correct values when walking up. This
+ * may cause several levels of walking up to find the correct min and max.
+ * May find a dead node which will cause a premature return.
+ * Return: 1 on dead node, 0 otherwise
+ */
+static int mas_ascend(struct ma_state *mas)
+{
+ struct maple_enode *p_enode; /* parent enode. */
+ struct maple_enode *a_enode; /* ancestor enode. */
+ struct maple_node *a_node; /* ancestor node. */
+ struct maple_node *p_node; /* parent node. */
+ unsigned char a_slot;
+ enum maple_type a_type;
+ unsigned long min, max;
+ unsigned long *pivots;
+ unsigned char offset;
+ bool set_max = false, set_min = false;
+
+ a_node = mas_mn(mas);
+ if (ma_is_root(a_node)) {
+ mas->offset = 0;
+ return 0;
+ }
+
+ p_node = mte_parent(mas->node);
+ if (unlikely(a_node == p_node))
+ return 1;
+ a_type = mas_parent_enum(mas, mas->node);
+ offset = mte_parent_slot(mas->node);
+ a_enode = mt_mk_node(p_node, a_type);
+
+ /* Check to make sure all parent information is still accurate */
+ if (p_node != mte_parent(mas->node))
+ return 1;
+
+ mas->node = a_enode;
+ mas->offset = offset;
+
+ if (mte_is_root(a_enode)) {
+ mas->max = ULONG_MAX;
+ mas->min = 0;
+ return 0;
+ }
+
+ min = 0;
+ max = ULONG_MAX;
+ do {
+ p_enode = a_enode;
+ a_type = mas_parent_enum(mas, p_enode);
+ a_node = mte_parent(p_enode);
+ a_slot = mte_parent_slot(p_enode);
+ pivots = ma_pivots(a_node, a_type);
+ a_enode = mt_mk_node(a_node, a_type);
+
+ if (!set_min && a_slot) {
+ set_min = true;
+ min = pivots[a_slot - 1] + 1;
+ }
+
+ if (!set_max && a_slot < mt_pivots[a_type]) {
+ set_max = true;
+ max = pivots[a_slot];
+ }
+
+ if (unlikely(ma_dead_node(a_node)))
+ return 1;
+
+ if (unlikely(ma_is_root(a_node)))
+ break;
+
+ } while (!set_min || !set_max);
+
+ mas->max = max;
+ mas->min = min;
+ return 0;
+}
+
+/*
+ * mas_pop_node() - Get a previously allocated maple node from the maple state.
+ * @mas: The maple state
+ *
+ * Return: A pointer to a maple node.
+ */
+static inline struct maple_node *mas_pop_node(struct ma_state *mas)
+{
+ struct maple_alloc *ret, *node = mas->alloc;
+ unsigned long total = mas_allocated(mas);
+
+ /* nothing or a request pending. */
+ if (unlikely(!total))
+ return NULL;
+
+ if (total == 1) {
+ /* single allocation in this ma_state */
+ mas->alloc = NULL;
+ ret = node;
+ goto single_node;
+ }
+
+ if (!node->node_count) {
+ /* Single allocation in this node. */
+ mas->alloc = node->slot[0];
+ node->slot[0] = NULL;
+ mas->alloc->total = node->total - 1;
+ ret = node;
+ goto new_head;
+ }
+
+ node->total--;
+ ret = node->slot[node->node_count];
+ node->slot[node->node_count--] = NULL;
+
+single_node:
+new_head:
+ ret->total = 0;
+ ret->node_count = 0;
+ if (ret->request_count) {
+ mas_set_alloc_req(mas, ret->request_count + 1);
+ ret->request_count = 0;
+ }
+ return (struct maple_node *)ret;
+}
+
+/*
+ * mas_push_node() - Push a node back on the maple state allocation.
+ * @mas: The maple state
+ * @used: The used maple node
+ *
+ * Stores the maple node back into @mas->alloc for reuse. Updates allocated and
+ * requested node count as necessary.
+ */
+static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
+{
+ struct maple_alloc *reuse = (struct maple_alloc *)used;
+ struct maple_alloc *head = mas->alloc;
+ unsigned long count;
+ unsigned int requested = mas_alloc_req(mas);
+
+ memset(reuse, 0, sizeof(*reuse));
+ count = mas_allocated(mas);
+
+ if (count && (head->node_count < MAPLE_ALLOC_SLOTS - 1)) {
+ if (head->slot[0])
+ head->node_count++;
+ head->slot[head->node_count] = reuse;
+ head->total++;
+ goto done;
+ }
+
+ reuse->total = 1;
+ if ((head) && !((unsigned long)head & 0x1)) {
+ head->request_count = 0;
+ reuse->slot[0] = head;
+ reuse->total += head->total;
+ }
+
+ mas->alloc = reuse;
+done:
+ if (requested > 1)
+ mas_set_alloc_req(mas, requested - 1);
+}
+
+/*
+ * mas_alloc_nodes() - Allocate nodes into a maple state
+ * @mas: The maple state
+ * @gfp: The GFP Flags
+ */
+static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
+{
+ struct maple_alloc *node;
+ unsigned long allocated = mas_allocated(mas);
+ unsigned long success = allocated;
+ unsigned int requested = mas_alloc_req(mas);
+ unsigned int count;
+ void **slots = NULL;
+ unsigned int max_req = 0;
+
+ if (!requested)
+ return;
+
+ mas_set_alloc_req(mas, 0);
+ if (mas->mas_flags & MA_STATE_PREALLOC) {
+ if (allocated)
+ return;
+ WARN_ON(!allocated);
+ }
+
+ if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS - 1) {
+ node = (struct maple_alloc *)mt_alloc_one(gfp);
+ if (!node)
+ goto nomem_one;
+
+ if (allocated)
+ node->slot[0] = mas->alloc;
+
+ success++;
+ mas->alloc = node;
+ requested--;
+ }
+
+ node = mas->alloc;
+ while (requested) {
+ max_req = MAPLE_ALLOC_SLOTS;
+ if (node->slot[0]) {
+ unsigned int offset = node->node_count + 1;
+
+ slots = (void **)&node->slot[offset];
+ max_req -= offset;
+ } else {
+ slots = (void **)&node->slot;
+ }
+
+ max_req = min(requested, max_req);
+ count = mt_alloc_bulk(gfp, max_req, slots);
+ if (!count)
+ goto nomem_bulk;
+
+ node->node_count += count;
+ /* zero indexed. */
+ if (slots == (void **)&node->slot)
+ node->node_count--;
+
+ success += count;
+ node = node->slot[0];
+ requested -= count;
+ }
+ mas->alloc->total = success;
+ return;
+
+nomem_bulk:
+ /* Clean up potential freed allocations on bulk failure */
+ memset(slots, 0, max_req * sizeof(unsigned long));
+nomem_one:
+ mas_set_alloc_req(mas, requested);
+ if (mas->alloc && !(((unsigned long)mas->alloc & 0x1)))
+ mas->alloc->total = success;
+ mas_set_err(mas, -ENOMEM);
+ return;
+
+}
+
+/*
+ * mas_free() - Free an encoded maple node
+ * @mas: The maple state
+ * @used: The encoded maple node to free.
+ *
+ * Uses rcu free if necessary, pushes @used back on the maple state allocations
+ * otherwise.
+ */
+static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
+{
+ struct maple_node *tmp = mte_to_node(used);
+
+ if (mt_in_rcu(mas->tree))
+ ma_free_rcu(tmp);
+ else
+ mas_push_node(mas, tmp);
+}
+
+/*
+ * mas_node_count() - Check if enough nodes are allocated and request more if
+ * there is not enough nodes.
+ * @mas: The maple state
+ * @count: The number of nodes needed
+ * @gfp: the gfp flags
+ */
+static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp)
+{
+ unsigned long allocated = mas_allocated(mas);
+
+ if (allocated < count) {
+ mas_set_alloc_req(mas, count - allocated);
+ mas_alloc_nodes(mas, gfp);
+ }
+}
+
+/*
+ * mas_node_count() - Check if enough nodes are allocated and request more if
+ * there is not enough nodes.
+ * @mas: The maple state
+ * @count: The number of nodes needed
+ *
+ * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags.
+ */
+static void mas_node_count(struct ma_state *mas, int count)
+{
+ return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN);
+}
+
+/*
+ * mas_start() - Sets up maple state for operations.
+ * @mas: The maple state.
+ *
+ * If mas->node == MAS_START, then set the min, max, depth, and offset to
+ * defaults.
+ *
+ * Return:
+ * - If mas->node is an error or not MAS_START, return NULL.
+ * - If it's an empty tree: NULL & mas->node == MAS_NONE
+ * - If it's a single entry: The entry & mas->node == MAS_ROOT
+ * - If it's a tree: NULL & mas->node == safe root node.
+ */
+static inline struct maple_enode *mas_start(struct ma_state *mas)
+{
+ if (likely(mas_is_start(mas))) {
+ struct maple_enode *root;
+
+ mas->node = MAS_NONE;
+ mas->min = 0;
+ mas->max = ULONG_MAX;
+ mas->depth = 0;
+ mas->offset = 0;
+
+ root = mas_root(mas);
+ /* Tree with nodes */
+ if (likely(xa_is_node(root))) {
+ mas->depth = 1;
+ mas->node = mte_safe_root(root);
+ return NULL;
+ }
+
+ /* empty tree */
+ if (unlikely(!root)) {
+ mas->offset = MAPLE_NODE_SLOTS;
+ return NULL;
+ }
+
+ /* Single entry tree */
+ mas->node = MAS_ROOT;
+ mas->offset = MAPLE_NODE_SLOTS;
+
+ /* Single entry tree. */
+ if (mas->index > 0)
+ return NULL;
+
+ return root;
+ }
+
+ return NULL;
+}
+
+/*
+ * ma_data_end() - Find the end of the data in a node.
+ * @node: The maple node
+ * @type: The maple node type
+ * @pivots: The array of pivots in the node
+ * @max: The maximum value in the node
+ *
+ * Uses metadata to find the end of the data when possible.
+ * Return: The zero indexed last slot with data (may be null).
+ */
+static inline unsigned char ma_data_end(struct maple_node *node,
+ enum maple_type type,
+ unsigned long *pivots,
+ unsigned long max)
+{
+ unsigned char offset;
+
+ if (type == maple_arange_64)
+ return ma_meta_end(node, type);
+
+ offset = mt_pivots[type] - 1;
+ if (likely(!pivots[offset]))
+ return ma_meta_end(node, type);
+
+ if (likely(pivots[offset] == max))
+ return offset;
+
+ return mt_pivots[type];
+}
+
+/*
+ * mas_data_end() - Find the end of the data (slot).
+ * @mas: the maple state
+ *
+ * This method is optimized to check the metadata of a node if the node type
+ * supports data end metadata.
+ *
+ * Return: The zero indexed last slot with data (may be null).
+ */
+static inline unsigned char mas_data_end(struct ma_state *mas)
+{
+ enum maple_type type;
+ struct maple_node *node;
+ unsigned char offset;
+ unsigned long *pivots;
+
+ type = mte_node_type(mas->node);
+ node = mas_mn(mas);
+ if (type == maple_arange_64)
+ return ma_meta_end(node, type);
+
+ pivots = ma_pivots(node, type);
+ offset = mt_pivots[type] - 1;
+ if (likely(!pivots[offset]))
+ return ma_meta_end(node, type);
+
+ if (likely(pivots[offset] == mas->max))
+ return offset;
+
+ return mt_pivots[type];
+}
+
+/*
+ * mas_leaf_max_gap() - Returns the largest gap in a leaf node
+ * @mas - the maple state
+ *
+ * Return: The maximum gap in the leaf.
+ */
+static unsigned long mas_leaf_max_gap(struct ma_state *mas)
+{
+ enum maple_type mt;
+ unsigned long pstart, gap, max_gap;
+ struct maple_node *mn;
+ unsigned long *pivots;
+ void __rcu **slots;
+ unsigned char i;
+ unsigned char max_piv;
+
+ mt = mte_node_type(mas->node);
+ mn = mas_mn(mas);
+ slots = ma_slots(mn, mt);
+ max_gap = 0;
+ if (unlikely(ma_is_dense(mt))) {
+ gap = 0;
+ for (i = 0; i < mt_slots[mt]; i++) {
+ if (slots[i]) {
+ if (gap > max_gap)
+ max_gap = gap;
+ gap = 0;
+ } else {
+ gap++;
+ }
+ }
+ if (gap > max_gap)
+ max_gap = gap;
+ return max_gap;
+ }
+
+ /*
+ * Check the first implied pivot optimizes the loop below and slot 1 may
+ * be skipped if there is a gap in slot 0.
+ */
+ pivots = ma_pivots(mn, mt);
+ if (likely(!slots[0])) {
+ max_gap = pivots[0] - mas->min + 1;
+ i = 2;
+ } else {
+ i = 1;
+ }
+
+ /* reduce max_piv as the special case is checked before the loop */
+ max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1;
+ /*
+ * Check end implied pivot which can only be a gap on the right most
+ * node.
+ */
+ if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) {
+ gap = ULONG_MAX - pivots[max_piv];
+ if (gap > max_gap)
+ max_gap = gap;
+ }
+
+ for (; i <= max_piv; i++) {
+ /* data == no gap. */
+ if (likely(slots[i]))
+ continue;
+
+ pstart = pivots[i - 1];
+ gap = pivots[i] - pstart;
+ if (gap > max_gap)
+ max_gap = gap;
+
+ /* There cannot be two gaps in a row. */
+ i++;
+ }
+ return max_gap;
+}
+
+/*
+ * ma_max_gap() - Get the maximum gap in a maple node (non-leaf)
+ * @node: The maple node
+ * @gaps: The pointer to the gaps
+ * @mt: The maple node type
+ * @*off: Pointer to store the offset location of the gap.
+ *
+ * Uses the metadata data end to scan backwards across set gaps.
+ *
+ * Return: The maximum gap value
+ */
+static inline unsigned long
+ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt,
+ unsigned char *off)
+{
+ unsigned char offset, i;
+ unsigned long max_gap = 0;
+
+ i = offset = ma_meta_end(node, mt);
+ do {
+ if (gaps[i] > max_gap) {
+ max_gap = gaps[i];
+ offset = i;
+ }
+ } while (i--);
+
+ *off = offset;
+ return max_gap;
+}
+
+/*
+ * mas_max_gap() - find the largest gap in a non-leaf node and set the slot.
+ * @mas: The maple state.
+ *
+ * If the metadata gap is set to MAPLE_ARANGE64_META_MAX, there is no gap.
+ *
+ * Return: The gap value.
+ */
+static inline unsigned long mas_max_gap(struct ma_state *mas)
+{
+ unsigned long *gaps;
+ unsigned char offset;
+ enum maple_type mt;
+ struct maple_node *node;
+
+ mt = mte_node_type(mas->node);
+ if (ma_is_leaf(mt))
+ return mas_leaf_max_gap(mas);
+
+ node = mas_mn(mas);
+ offset = ma_meta_gap(node, mt);
+ if (offset == MAPLE_ARANGE64_META_MAX)
+ return 0;
+
+ gaps = ma_gaps(node, mt);
+ return gaps[offset];
+}
+
+/*
+ * mas_parent_gap() - Set the parent gap and any gaps above, as needed
+ * @mas: The maple state
+ * @offset: The gap offset in the parent to set
+ * @new: The new gap value.
+ *
+ * Set the parent gap then continue to set the gap upwards, using the metadata
+ * of the parent to see if it is necessary to check the node above.
+ */
+static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset,
+ unsigned long new)
+{
+ unsigned long meta_gap = 0;
+ struct maple_node *pnode;
+ struct maple_enode *penode;
+ unsigned long *pgaps;
+ unsigned char meta_offset;
+ enum maple_type pmt;
+
+ pnode = mte_parent(mas->node);
+ pmt = mas_parent_enum(mas, mas->node);
+ penode = mt_mk_node(pnode, pmt);
+ pgaps = ma_gaps(pnode, pmt);
+
+ascend:
+ meta_offset = ma_meta_gap(pnode, pmt);
+ if (meta_offset == MAPLE_ARANGE64_META_MAX)
+ meta_gap = 0;
+ else
+ meta_gap = pgaps[meta_offset];
+
+ pgaps[offset] = new;
+
+ if (meta_gap == new)
+ return;
+
+ if (offset != meta_offset) {
+ if (meta_gap > new)
+ return;
+
+ ma_set_meta_gap(pnode, pmt, offset);
+ } else if (new < meta_gap) {
+ meta_offset = 15;
+ new = ma_max_gap(pnode, pgaps, pmt, &meta_offset);
+ ma_set_meta_gap(pnode, pmt, meta_offset);
+ }
+
+ if (ma_is_root(pnode))
+ return;
+
+ /* Go to the parent node. */
+ pnode = mte_parent(penode);
+ pmt = mas_parent_enum(mas, penode);
+ pgaps = ma_gaps(pnode, pmt);
+ offset = mte_parent_slot(penode);
+ penode = mt_mk_node(pnode, pmt);
+ goto ascend;
+}
+
+/*
+ * mas_update_gap() - Update a nodes gaps and propagate up if necessary.
+ * @mas - the maple state.
+ */
+static inline void mas_update_gap(struct ma_state *mas)
+{
+ unsigned char pslot;
+ unsigned long p_gap;
+ unsigned long max_gap;
+
+ if (!mt_is_alloc(mas->tree))
+ return;
+
+ if (mte_is_root(mas->node))
+ return;
+
+ max_gap = mas_max_gap(mas);
+
+ pslot = mte_parent_slot(mas->node);
+ p_gap = ma_gaps(mte_parent(mas->node),
+ mas_parent_enum(mas, mas->node))[pslot];
+
+ if (p_gap != max_gap)
+ mas_parent_gap(mas, pslot, max_gap);
+}
+
+/*
+ * mas_adopt_children() - Set the parent pointer of all nodes in @parent to
+ * @parent with the slot encoded.
+ * @mas - the maple state (for the tree)
+ * @parent - the maple encoded node containing the children.
+ */
+static inline void mas_adopt_children(struct ma_state *mas,
+ struct maple_enode *parent)
+{
+ enum maple_type type = mte_node_type(parent);
+ struct maple_node *node = mas_mn(mas);
+ void __rcu **slots = ma_slots(node, type);
+ unsigned long *pivots = ma_pivots(node, type);
+ struct maple_enode *child;
+ unsigned char offset;
+
+ offset = ma_data_end(node, type, pivots, mas->max);
+ do {
+ child = mas_slot_locked(mas, slots, offset);
+ mte_set_parent(child, parent, offset);
+ } while (offset--);
+}
+
+/*
+ * mas_replace() - Replace a maple node in the tree with mas->node. Uses the
+ * parent encoding to locate the maple node in the tree.
+ * @mas - the ma_state to use for operations.
+ * @advanced - boolean to adopt the child nodes and free the old node (false) or
+ * leave the node (true) and handle the adoption and free elsewhere.
+ */
+static inline void mas_replace(struct ma_state *mas, bool advanced)
+ __must_hold(mas->tree->lock)
+{
+ struct maple_node *mn = mas_mn(mas);
+ struct maple_enode *old_enode;
+ unsigned char offset = 0;
+ void __rcu **slots = NULL;
+
+ if (ma_is_root(mn)) {
+ old_enode = mas_root_locked(mas);
+ } else {
+ offset = mte_parent_slot(mas->node);
+ slots = ma_slots(mte_parent(mas->node),
+ mas_parent_enum(mas, mas->node));
+ old_enode = mas_slot_locked(mas, slots, offset);
+ }
+
+ if (!advanced && !mte_is_leaf(mas->node))
+ mas_adopt_children(mas, mas->node);
+
+ if (mte_is_root(mas->node)) {
+ mn->parent = ma_parent_ptr(
+ ((unsigned long)mas->tree | MA_ROOT_PARENT));
+ rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
+ mas_set_height(mas);
+ } else {
+ rcu_assign_pointer(slots[offset], mas->node);
+ }
+
+ if (!advanced)
+ mas_free(mas, old_enode);
+}
+
+/*
+ * mas_new_child() - Find the new child of a node.
+ * @mas: the maple state
+ * @child: the maple state to store the child.
+ */
+static inline bool mas_new_child(struct ma_state *mas, struct ma_state *child)
+ __must_hold(mas->tree->lock)
+{
+ enum maple_type mt;
+ unsigned char offset;
+ unsigned char end;
+ unsigned long *pivots;
+ struct maple_enode *entry;
+ struct maple_node *node;
+ void __rcu **slots;
+
+ mt = mte_node_type(mas->node);
+ node = mas_mn(mas);
+ slots = ma_slots(node, mt);
+ pivots = ma_pivots(node, mt);
+ end = ma_data_end(node, mt, pivots, mas->max);
+ for (offset = mas->offset; offset <= end; offset++) {
+ entry = mas_slot_locked(mas, slots, offset);
+ if (mte_parent(entry) == node) {
+ *child = *mas;
+ mas->offset = offset + 1;
+ child->offset = offset;
+ mas_descend(child);
+ child->offset = 0;
+ return true;
+ }
+ }
+ return false;
+}
+
+/*
+ * mab_shift_right() - Shift the data in mab right. Note, does not clean out the
+ * old data or set b_node->b_end.
+ * @b_node: the maple_big_node
+ * @shift: the shift count
+ */
+static inline void mab_shift_right(struct maple_big_node *b_node,
+ unsigned char shift)
+{
+ unsigned long size = b_node->b_end * sizeof(unsigned long);
+
+ memmove(b_node->pivot + shift, b_node->pivot, size);
+ memmove(b_node->slot + shift, b_node->slot, size);
+ if (b_node->type == maple_arange_64)
+ memmove(b_node->gap + shift, b_node->gap, size);
+}
+
+/*
+ * mab_middle_node() - Check if a middle node is needed (unlikely)
+ * @b_node: the maple_big_node that contains the data.
+ * @size: the amount of data in the b_node
+ * @split: the potential split location
+ * @slot_count: the size that can be stored in a single node being considered.
+ *
+ * Return: true if a middle node is required.
+ */
+static inline bool mab_middle_node(struct maple_big_node *b_node, int split,
+ unsigned char slot_count)
+{
+ unsigned char size = b_node->b_end;
+
+ if (size >= 2 * slot_count)
+ return true;
+
+ if (!b_node->slot[split] && (size >= 2 * slot_count - 1))
+ return true;
+
+ return false;
+}
+
+/*
+ * mab_no_null_split() - ensure the split doesn't fall on a NULL
+ * @b_node: the maple_big_node with the data
+ * @split: the suggested split location
+ * @slot_count: the number of slots in the node being considered.
+ *
+ * Return: the split location.
+ */
+static inline int mab_no_null_split(struct maple_big_node *b_node,
+ unsigned char split, unsigned char slot_count)
+{
+ if (!b_node->slot[split]) {
+ /*
+ * If the split is less than the max slot && the right side will
+ * still be sufficient, then increment the split on NULL.
+ */
+ if ((split < slot_count - 1) &&
+ (b_node->b_end - split) > (mt_min_slots[b_node->type]))
+ split++;
+ else
+ split--;
+ }
+ return split;
+}
+
+/*
+ * mab_calc_split() - Calculate the split location and if there needs to be two
+ * splits.
+ * @bn: The maple_big_node with the data
+ * @mid_split: The second split, if required. 0 otherwise.
+ *
+ * Return: The first split location. The middle split is set in @mid_split.
+ */
+static inline int mab_calc_split(struct ma_state *mas,
+ struct maple_big_node *bn, unsigned char *mid_split, unsigned long min)
+{
+ unsigned char b_end = bn->b_end;
+ int split = b_end / 2; /* Assume equal split. */
+ unsigned char slot_min, slot_count = mt_slots[bn->type];
+
+ /*
+ * To support gap tracking, all NULL entries are kept together and a node cannot
+ * end on a NULL entry, with the exception of the left-most leaf. The
+ * limitation means that the split of a node must be checked for this condition
+ * and be able to put more data in one direction or the other.
+ */
+ if (unlikely((mas->mas_flags & MA_STATE_BULK))) {
+ *mid_split = 0;
+ split = b_end - mt_min_slots[bn->type];
+
+ if (!ma_is_leaf(bn->type))
+ return split;
+
+ mas->mas_flags |= MA_STATE_REBALANCE;
+ if (!bn->slot[split])
+ split--;
+ return split;
+ }
+
+ /*
+ * Although extremely rare, it is possible to enter what is known as the 3-way
+ * split scenario. The 3-way split comes about by means of a store of a range
+ * that overwrites the end and beginning of two full nodes. The result is a set
+ * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can
+ * also be located in different parent nodes which are also full. This can
+ * carry upwards all the way to the root in the worst case.
+ */
+ if (unlikely(mab_middle_node(bn, split, slot_count))) {
+ split = b_end / 3;
+ *mid_split = split * 2;
+ } else {
+ slot_min = mt_min_slots[bn->type];
+
+ *mid_split = 0;
+ /*
+ * Avoid having a range less than the slot count unless it
+ * causes one node to be deficient.
+ * NOTE: mt_min_slots is 1 based, b_end and split are zero.
+ */
+ while (((bn->pivot[split] - min) < slot_count - 1) &&
+ (split < slot_count - 1) && (b_end - split > slot_min))
+ split++;
+ }
+
+ /* Avoid ending a node on a NULL entry */
+ split = mab_no_null_split(bn, split, slot_count);
+ if (!(*mid_split))
+ return split;
+
+ *mid_split = mab_no_null_split(bn, *mid_split, slot_count);
+
+ return split;
+}
+
+/*
+ * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node
+ * and set @b_node->b_end to the next free slot.
+ * @mas: The maple state
+ * @mas_start: The starting slot to copy
+ * @mas_end: The end slot to copy (inclusively)
+ * @b_node: The maple_big_node to place the data
+ * @mab_start: The starting location in maple_big_node to store the data.
+ */
+static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start,
+ unsigned char mas_end, struct maple_big_node *b_node,
+ unsigned char mab_start)
+{
+ enum maple_type mt;
+ struct maple_node *node;
+ void __rcu **slots;
+ unsigned long *pivots, *gaps;
+ int i = mas_start, j = mab_start;
+ unsigned char piv_end;
+
+ node = mas_mn(mas);
+ mt = mte_node_type(mas->node);
+ pivots = ma_pivots(node, mt);
+ if (!i) {
+ b_node->pivot[j] = pivots[i++];
+ if (unlikely(i > mas_end))
+ goto complete;
+ j++;
+ }
+
+ piv_end = min(mas_end, mt_pivots[mt]);
+ for (; i < piv_end; i++, j++) {
+ b_node->pivot[j] = pivots[i];
+ if (unlikely(!b_node->pivot[j]))
+ break;
+
+ if (unlikely(mas->max == b_node->pivot[j]))
+ goto complete;
+ }
+
+ if (likely(i <= mas_end))
+ b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt);
+
+complete:
+ b_node->b_end = ++j;
+ j -= mab_start;
+ slots = ma_slots(node, mt);
+ memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j);
+ if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) {
+ gaps = ma_gaps(node, mt);
+ memcpy(b_node->gap + mab_start, gaps + mas_start,
+ sizeof(unsigned long) * j);
+ }
+}
+
+/*
+ * mas_leaf_set_meta() - Set the metadata of a leaf if possible.
+ * @mas: The maple state
+ * @node: The maple node
+ * @pivots: pointer to the maple node pivots
+ * @mt: The maple type
+ * @end: The assumed end
+ *
+ * Note, end may be incremented within this function but not modified at the
+ * source. This is fine since the metadata is the last thing to be stored in a
+ * node during a write.
+ */
+static inline void mas_leaf_set_meta(struct ma_state *mas,
+ struct maple_node *node, unsigned long *pivots,
+ enum maple_type mt, unsigned char end)
+{
+ /* There is no room for metadata already */
+ if (mt_pivots[mt] <= end)
+ return;
+
+ if (pivots[end] && pivots[end] < mas->max)
+ end++;
+
+ if (end < mt_slots[mt] - 1)
+ ma_set_meta(node, mt, 0, end);
+}
+
+/*
+ * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node.
+ * @b_node: the maple_big_node that has the data
+ * @mab_start: the start location in @b_node.
+ * @mab_end: The end location in @b_node (inclusively)
+ * @mas: The maple state with the maple encoded node.
+ */
+static inline void mab_mas_cp(struct maple_big_node *b_node,
+ unsigned char mab_start, unsigned char mab_end,
+ struct ma_state *mas, bool new_max)
+{
+ int i, j = 0;
+ enum maple_type mt = mte_node_type(mas->node);
+ struct maple_node *node = mte_to_node(mas->node);
+ void __rcu **slots = ma_slots(node, mt);
+ unsigned long *pivots = ma_pivots(node, mt);
+ unsigned long *gaps = NULL;
+ unsigned char end;
+
+ if (mab_end - mab_start > mt_pivots[mt])
+ mab_end--;
+
+ if (!pivots[mt_pivots[mt] - 1])
+ slots[mt_pivots[mt]] = NULL;
+
+ i = mab_start;
+ do {
+ pivots[j++] = b_node->pivot[i++];
+ } while (i <= mab_end && likely(b_node->pivot[i]));
+
+ memcpy(slots, b_node->slot + mab_start,
+ sizeof(void *) * (i - mab_start));
+
+ if (new_max)
+ mas->max = b_node->pivot[i - 1];
+
+ end = j - 1;
+ if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) {
+ unsigned long max_gap = 0;
+ unsigned char offset = 15;
+
+ gaps = ma_gaps(node, mt);
+ do {
+ gaps[--j] = b_node->gap[--i];
+ if (gaps[j] > max_gap) {
+ offset = j;
+ max_gap = gaps[j];
+ }
+ } while (j);
+
+ ma_set_meta(node, mt, offset, end);
+ } else {
+ mas_leaf_set_meta(mas, node, pivots, mt, end);
+ }
+}
+
+/*
+ * mas_descend_adopt() - Descend through a sub-tree and adopt children.
+ * @mas: the maple state with the maple encoded node of the sub-tree.
+ *
+ * Descend through a sub-tree and adopt children who do not have the correct
+ * parents set. Follow the parents which have the correct parents as they are
+ * the new entries which need to be followed to find other incorrectly set
+ * parents.
+ */
+static inline void mas_descend_adopt(struct ma_state *mas)
+{
+ struct ma_state list[3], next[3];
+ int i, n;
+
+ /*
+ * At each level there may be up to 3 correct parent pointers which indicates
+ * the new nodes which need to be walked to find any new nodes at a lower level.
+ */
+
+ for (i = 0; i < 3; i++) {
+ list[i] = *mas;
+ list[i].offset = 0;
+ next[i].offset = 0;
+ }
+ next[0] = *mas;
+
+ while (!mte_is_leaf(list[0].node)) {
+ n = 0;
+ for (i = 0; i < 3; i++) {
+ if (mas_is_none(&list[i]))
+ continue;
+
+ if (i && list[i-1].node == list[i].node)
+ continue;
+
+ while ((n < 3) && (mas_new_child(&list[i], &next[n])))
+ n++;
+
+ mas_adopt_children(&list[i], list[i].node);
+ }
+
+ while (n < 3)
+ next[n++].node = MAS_NONE;
+
+ /* descend by setting the list to the children */
+ for (i = 0; i < 3; i++)
+ list[i] = next[i];
+ }
+}
+
+/*
+ * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert.
+ * @mas: The maple state
+ * @end: The maple node end
+ * @mt: The maple node type
+ */
+static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end,
+ enum maple_type mt)
+{
+ if (!(mas->mas_flags & MA_STATE_BULK))
+ return;
+
+ if (mte_is_root(mas->node))
+ return;
+
+ if (end > mt_min_slots[mt]) {
+ mas->mas_flags &= ~MA_STATE_REBALANCE;
+ return;
+ }
+}
+
+/*
+ * mas_store_b_node() - Store an @entry into the b_node while also copying the
+ * data from a maple encoded node.
+ * @wr_mas: the maple write state
+ * @b_node: the maple_big_node to fill with data
+ * @offset_end: the offset to end copying
+ *
+ * Return: The actual end of the data stored in @b_node
+ */
+static inline void mas_store_b_node(struct ma_wr_state *wr_mas,
+ struct maple_big_node *b_node, unsigned char offset_end)
+{
+ unsigned char slot;
+ unsigned char b_end;
+ /* Possible underflow of piv will wrap back to 0 before use. */
+ unsigned long piv;
+ struct ma_state *mas = wr_mas->mas;
+
+ b_node->type = wr_mas->type;
+ b_end = 0;
+ slot = mas->offset;
+ if (slot) {
+ /* Copy start data up to insert. */
+ mas_mab_cp(mas, 0, slot - 1, b_node, 0);
+ b_end = b_node->b_end;
+ piv = b_node->pivot[b_end - 1];
+ } else
+ piv = mas->min - 1;
+
+ if (piv + 1 < mas->index) {
+ /* Handle range starting after old range */
+ b_node->slot[b_end] = wr_mas->content;
+ if (!wr_mas->content)
+ b_node->gap[b_end] = mas->index - 1 - piv;
+ b_node->pivot[b_end++] = mas->index - 1;
+ }
+
+ /* Store the new entry. */
+ mas->offset = b_end;
+ b_node->slot[b_end] = wr_mas->entry;
+ b_node->pivot[b_end] = mas->last;
+
+ /* Appended. */
+ if (mas->last >= mas->max)
+ goto b_end;
+
+ /* Handle new range ending before old range ends */
+ piv = mas_logical_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type);
+ if (piv > mas->last) {
+ if (piv == ULONG_MAX)
+ mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type);
+
+ if (offset_end != slot)
+ wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
+ offset_end);
+
+ b_node->slot[++b_end] = wr_mas->content;
+ if (!wr_mas->content)
+ b_node->gap[b_end] = piv - mas->last + 1;
+ b_node->pivot[b_end] = piv;
+ }
+
+ slot = offset_end + 1;
+ if (slot > wr_mas->node_end)
+ goto b_end;
+
+ /* Copy end data to the end of the node. */
+ mas_mab_cp(mas, slot, wr_mas->node_end + 1, b_node, ++b_end);
+ b_node->b_end--;
+ return;
+
+b_end:
+ b_node->b_end = b_end;
+}
+
+/*
+ * mas_prev_sibling() - Find the previous node with the same parent.
+ * @mas: the maple state
+ *
+ * Return: True if there is a previous sibling, false otherwise.
+ */
+static inline bool mas_prev_sibling(struct ma_state *mas)
+{
+ unsigned int p_slot = mte_parent_slot(mas->node);
+
+ if (mte_is_root(mas->node))
+ return false;
+
+ if (!p_slot)
+ return false;
+
+ mas_ascend(mas);
+ mas->offset = p_slot - 1;
+ mas_descend(mas);
+ return true;
+}
+
+/*
+ * mas_next_sibling() - Find the next node with the same parent.
+ * @mas: the maple state
+ *
+ * Return: true if there is a next sibling, false otherwise.
+ */
+static inline bool mas_next_sibling(struct ma_state *mas)
+{
+ MA_STATE(parent, mas->tree, mas->index, mas->last);
+
+ if (mte_is_root(mas->node))
+ return false;
+
+ parent = *mas;
+ mas_ascend(&parent);
+ parent.offset = mte_parent_slot(mas->node) + 1;
+ if (parent.offset > mas_data_end(&parent))
+ return false;
+
+ *mas = parent;
+ mas_descend(mas);
+ return true;
+}
+
+/*
+ * mte_node_or_node() - Return the encoded node or MAS_NONE.
+ * @enode: The encoded maple node.
+ *
+ * Shorthand to avoid setting %NULLs in the tree or maple_subtree_state.
+ *
+ * Return: @enode or MAS_NONE
+ */
+static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode)
+{
+ if (enode)
+ return enode;
+
+ return ma_enode_ptr(MAS_NONE);
+}
+
+/*
+ * mas_wr_node_walk() - Find the correct offset for the index in the @mas.
+ * @wr_mas: The maple write state
+ *
+ * Uses mas_slot_locked() and does not need to worry about dead nodes.
+ */
+static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas)
+{
+ struct ma_state *mas = wr_mas->mas;
+ unsigned char count;
+ unsigned char offset;
+ unsigned long index, min, max;
+
+ if (unlikely(ma_is_dense(wr_mas->type))) {
+ wr_mas->r_max = wr_mas->r_min = mas->index;
+ mas->offset = mas->index = mas->min;
+ return;
+ }
+
+ wr_mas->node = mas_mn(wr_mas->mas);
+ wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type);
+ count = wr_mas->node_end = ma_data_end(wr_mas->node, wr_mas->type,
+ wr_mas->pivots, mas->max);
+ offset = mas->offset;
+ min = mas_safe_min(mas, wr_mas->pivots, offset);
+ if (unlikely(offset == count))
+ goto max;
+
+ max = wr_mas->pivots[offset];
+ index = mas->index;
+ if (unlikely(index <= max))
+ goto done;
+
+ if (unlikely(!max && offset))
+ goto max;
+
+ min = max + 1;
+ while (++offset < count) {
+ max = wr_mas->pivots[offset];
+ if (index <= max)
+ goto done;
+ else if (unlikely(!max))
+ break;
+
+ min = max + 1;
+ }
+
+max:
+ max = mas->max;
+done:
+ wr_mas->r_max = max;
+ wr_mas->r_min = min;
+ wr_mas->offset_end = mas->offset = offset;
+}
+
+/*
+ * mas_topiary_range() - Add a range of slots to the topiary.
+ * @mas: The maple state
+ * @destroy: The topiary to add the slots (usually destroy)
+ * @start: The starting slot inclusively
+ * @end: The end slot inclusively
+ */
+static inline void mas_topiary_range(struct ma_state *mas,
+ struct ma_topiary *destroy, unsigned char start, unsigned char end)
+{
+ void __rcu **slots;
+ unsigned char offset;
+
+ MT_BUG_ON(mas->tree, mte_is_leaf(mas->node));
+ slots = ma_slots(mas_mn(mas), mte_node_type(mas->node));
+ for (offset = start; offset <= end; offset++) {
+ struct maple_enode *enode = mas_slot_locked(mas, slots, offset);
+
+ if (mte_dead_node(enode))
+ continue;
+
+ mat_add(destroy, enode);
+ }
+}
+
+/*
+ * mast_topiary() - Add the portions of the tree to the removal list; either to
+ * be freed or discarded (destroy walk).
+ * @mast: The maple_subtree_state.
+ */
+static inline void mast_topiary(struct maple_subtree_state *mast)
+{
+ MA_WR_STATE(wr_mas, mast->orig_l, NULL);
+ unsigned char r_start, r_end;
+ unsigned char l_start, l_end;
+ void __rcu **l_slots, **r_slots;
+
+ wr_mas.type = mte_node_type(mast->orig_l->node);
+ mast->orig_l->index = mast->orig_l->last;
+ mas_wr_node_walk(&wr_mas);
+ l_start = mast->orig_l->offset + 1;
+ l_end = mas_data_end(mast->orig_l);
+ r_start = 0;
+ r_end = mast->orig_r->offset;
+
+ if (r_end)
+ r_end--;
+
+ l_slots = ma_slots(mas_mn(mast->orig_l),
+ mte_node_type(mast->orig_l->node));
+
+ r_slots = ma_slots(mas_mn(mast->orig_r),
+ mte_node_type(mast->orig_r->node));
+
+ if ((l_start < l_end) &&
+ mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_start))) {
+ l_start++;
+ }
+
+ if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_end))) {
+ if (r_end)
+ r_end--;
+ }
+
+ if ((l_start > r_end) && (mast->orig_l->node == mast->orig_r->node))
+ return;
+
+ /* At the node where left and right sides meet, add the parts between */
+ if (mast->orig_l->node == mast->orig_r->node) {
+ return mas_topiary_range(mast->orig_l, mast->destroy,
+ l_start, r_end);
+ }
+
+ /* mast->orig_r is different and consumed. */
+ if (mte_is_leaf(mast->orig_r->node))
+ return;
+
+ if (mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_end)))
+ l_end--;
+
+
+ if (l_start <= l_end)
+ mas_topiary_range(mast->orig_l, mast->destroy, l_start, l_end);
+
+ if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_start)))
+ r_start++;
+
+ if (r_start <= r_end)
+ mas_topiary_range(mast->orig_r, mast->destroy, 0, r_end);
+}
+
+/*
+ * mast_rebalance_next() - Rebalance against the next node
+ * @mast: The maple subtree state
+ * @old_r: The encoded maple node to the right (next node).
+ */
+static inline void mast_rebalance_next(struct maple_subtree_state *mast)
+{
+ unsigned char b_end = mast->bn->b_end;
+
+ mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node),
+ mast->bn, b_end);
+ mast->orig_r->last = mast->orig_r->max;
+}
+
+/*
+ * mast_rebalance_prev() - Rebalance against the previous node
+ * @mast: The maple subtree state
+ * @old_l: The encoded maple node to the left (previous node)
+ */
+static inline void mast_rebalance_prev(struct maple_subtree_state *mast)
+{
+ unsigned char end = mas_data_end(mast->orig_l) + 1;
+ unsigned char b_end = mast->bn->b_end;
+
+ mab_shift_right(mast->bn, end);
+ mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0);
+ mast->l->min = mast->orig_l->min;
+ mast->orig_l->index = mast->orig_l->min;
+ mast->bn->b_end = end + b_end;
+ mast->l->offset += end;
+}
+
+/*
+ * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring
+ * the node to the right. Checking the nodes to the right then the left at each
+ * level upwards until root is reached. Free and destroy as needed.
+ * Data is copied into the @mast->bn.
+ * @mast: The maple_subtree_state.
+ */
+static inline
+bool mast_spanning_rebalance(struct maple_subtree_state *mast)
+{
+ struct ma_state r_tmp = *mast->orig_r;
+ struct ma_state l_tmp = *mast->orig_l;
+ struct maple_enode *ancestor = NULL;
+ unsigned char start, end;
+ unsigned char depth = 0;
+
+ r_tmp = *mast->orig_r;
+ l_tmp = *mast->orig_l;
+ do {
+ mas_ascend(mast->orig_r);
+ mas_ascend(mast->orig_l);
+ depth++;
+ if (!ancestor &&
+ (mast->orig_r->node == mast->orig_l->node)) {
+ ancestor = mast->orig_r->node;
+ end = mast->orig_r->offset - 1;
+ start = mast->orig_l->offset + 1;
+ }
+
+ if (mast->orig_r->offset < mas_data_end(mast->orig_r)) {
+ if (!ancestor) {
+ ancestor = mast->orig_r->node;
+ start = 0;
+ }
+
+ mast->orig_r->offset++;
+ do {
+ mas_descend(mast->orig_r);
+ mast->orig_r->offset = 0;
+ depth--;
+ } while (depth);
+
+ mast_rebalance_next(mast);
+ do {
+ unsigned char l_off = 0;
+ struct maple_enode *child = r_tmp.node;
+
+ mas_ascend(&r_tmp);
+ if (ancestor == r_tmp.node)
+ l_off = start;
+
+ if (r_tmp.offset)
+ r_tmp.offset--;
+
+ if (l_off < r_tmp.offset)
+ mas_topiary_range(&r_tmp, mast->destroy,
+ l_off, r_tmp.offset);
+
+ if (l_tmp.node != child)
+ mat_add(mast->free, child);
+
+ } while (r_tmp.node != ancestor);
+
+ *mast->orig_l = l_tmp;
+ return true;
+
+ } else if (mast->orig_l->offset != 0) {
+ if (!ancestor) {
+ ancestor = mast->orig_l->node;
+ end = mas_data_end(mast->orig_l);
+ }
+
+ mast->orig_l->offset--;
+ do {
+ mas_descend(mast->orig_l);
+ mast->orig_l->offset =
+ mas_data_end(mast->orig_l);
+ depth--;
+ } while (depth);
+
+ mast_rebalance_prev(mast);
+ do {
+ unsigned char r_off;
+ struct maple_enode *child = l_tmp.node;
+
+ mas_ascend(&l_tmp);
+ if (ancestor == l_tmp.node)
+ r_off = end;
+ else
+ r_off = mas_data_end(&l_tmp);
+
+ if (l_tmp.offset < r_off)
+ l_tmp.offset++;
+
+ if (l_tmp.offset < r_off)
+ mas_topiary_range(&l_tmp, mast->destroy,
+ l_tmp.offset, r_off);
+
+ if (r_tmp.node != child)
+ mat_add(mast->free, child);
+
+ } while (l_tmp.node != ancestor);
+
+ *mast->orig_r = r_tmp;
+ return true;
+ }
+ } while (!mte_is_root(mast->orig_r->node));
+
+ *mast->orig_r = r_tmp;
+ *mast->orig_l = l_tmp;
+ return false;
+}
+
+/*
+ * mast_ascend_free() - Add current original maple state nodes to the free list
+ * and ascend.
+ * @mast: the maple subtree state.
+ *
+ * Ascend the original left and right sides and add the previous nodes to the
+ * free list. Set the slots to point to the correct location in the new nodes.
+ */
+static inline void
+mast_ascend_free(struct maple_subtree_state *mast)
+{
+ MA_WR_STATE(wr_mas, mast->orig_r, NULL);
+ struct maple_enode *left = mast->orig_l->node;
+ struct maple_enode *right = mast->orig_r->node;
+
+ mas_ascend(mast->orig_l);
+ mas_ascend(mast->orig_r);
+ mat_add(mast->free, left);
+
+ if (left != right)
+ mat_add(mast->free, right);
+
+ mast->orig_r->offset = 0;
+ mast->orig_r->index = mast->r->max;
+ /* last should be larger than or equal to index */
+ if (mast->orig_r->last < mast->orig_r->index)
+ mast->orig_r->last = mast->orig_r->index;
+ /*
+ * The node may not contain the value so set slot to ensure all
+ * of the nodes contents are freed or destroyed.
+ */
+ wr_mas.type = mte_node_type(mast->orig_r->node);
+ mas_wr_node_walk(&wr_mas);
+ /* Set up the left side of things */
+ mast->orig_l->offset = 0;
+ mast->orig_l->index = mast->l->min;
+ wr_mas.mas = mast->orig_l;
+ wr_mas.type = mte_node_type(mast->orig_l->node);
+ mas_wr_node_walk(&wr_mas);
+
+ mast->bn->type = wr_mas.type;
+}
+
+/*
+ * mas_new_ma_node() - Create and return a new maple node. Helper function.
+ * @mas: the maple state with the allocations.
+ * @b_node: the maple_big_node with the type encoding.
+ *
+ * Use the node type from the maple_big_node to allocate a new node from the
+ * ma_state. This function exists mainly for code readability.
+ *
+ * Return: A new maple encoded node
+ */
+static inline struct maple_enode
+*mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node)
+{
+ return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type);
+}
+
+/*
+ * mas_mab_to_node() - Set up right and middle nodes
+ *
+ * @mas: the maple state that contains the allocations.
+ * @b_node: the node which contains the data.
+ * @left: The pointer which will have the left node
+ * @right: The pointer which may have the right node
+ * @middle: the pointer which may have the middle node (rare)
+ * @mid_split: the split location for the middle node
+ *
+ * Return: the split of left.
+ */
+static inline unsigned char mas_mab_to_node(struct ma_state *mas,
+ struct maple_big_node *b_node, struct maple_enode **left,
+ struct maple_enode **right, struct maple_enode **middle,
+ unsigned char *mid_split, unsigned long min)
+{
+ unsigned char split = 0;
+ unsigned char slot_count = mt_slots[b_node->type];
+
+ *left = mas_new_ma_node(mas, b_node);
+ *right = NULL;
+ *middle = NULL;
+ *mid_split = 0;
+
+ if (b_node->b_end < slot_count) {
+ split = b_node->b_end;
+ } else {
+ split = mab_calc_split(mas, b_node, mid_split, min);
+ *right = mas_new_ma_node(mas, b_node);
+ }
+
+ if (*mid_split)
+ *middle = mas_new_ma_node(mas, b_node);
+
+ return split;
+
+}
+
+/*
+ * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end
+ * pointer.
+ * @b_node - the big node to add the entry
+ * @mas - the maple state to get the pivot (mas->max)
+ * @entry - the entry to add, if NULL nothing happens.
+ */
+static inline void mab_set_b_end(struct maple_big_node *b_node,
+ struct ma_state *mas,
+ void *entry)
+{
+ if (!entry)
+ return;
+
+ b_node->slot[b_node->b_end] = entry;
+ if (mt_is_alloc(mas->tree))
+ b_node->gap[b_node->b_end] = mas_max_gap(mas);
+ b_node->pivot[b_node->b_end++] = mas->max;
+}
+
+/*
+ * mas_set_split_parent() - combine_then_separate helper function. Sets the parent
+ * of @mas->node to either @left or @right, depending on @slot and @split
+ *
+ * @mas - the maple state with the node that needs a parent
+ * @left - possible parent 1
+ * @right - possible parent 2
+ * @slot - the slot the mas->node was placed
+ * @split - the split location between @left and @right
+ */
+static inline void mas_set_split_parent(struct ma_state *mas,
+ struct maple_enode *left,
+ struct maple_enode *right,
+ unsigned char *slot, unsigned char split)
+{
+ if (mas_is_none(mas))
+ return;
+
+ if ((*slot) <= split)
+ mte_set_parent(mas->node, left, *slot);
+ else if (right)
+ mte_set_parent(mas->node, right, (*slot) - split - 1);
+
+ (*slot)++;
+}
+
+/*
+ * mte_mid_split_check() - Check if the next node passes the mid-split
+ * @**l: Pointer to left encoded maple node.
+ * @**m: Pointer to middle encoded maple node.
+ * @**r: Pointer to right encoded maple node.
+ * @slot: The offset
+ * @*split: The split location.
+ * @mid_split: The middle split.
+ */
+static inline void mte_mid_split_check(struct maple_enode **l,
+ struct maple_enode **r,
+ struct maple_enode *right,
+ unsigned char slot,
+ unsigned char *split,
+ unsigned char mid_split)
+{
+ if (*r == right)
+ return;
+
+ if (slot < mid_split)
+ return;
+
+ *l = *r;
+ *r = right;
+ *split = mid_split;
+}
+
+/*
+ * mast_set_split_parents() - Helper function to set three nodes parents. Slot
+ * is taken from @mast->l.
+ * @mast - the maple subtree state
+ * @left - the left node
+ * @right - the right node
+ * @split - the split location.
+ */
+static inline void mast_set_split_parents(struct maple_subtree_state *mast,
+ struct maple_enode *left,
+ struct maple_enode *middle,
+ struct maple_enode *right,
+ unsigned char split,
+ unsigned char mid_split)
+{
+ unsigned char slot;
+ struct maple_enode *l = left;
+ struct maple_enode *r = right;
+
+ if (mas_is_none(mast->l))
+ return;
+
+ if (middle)
+ r = middle;
+
+ slot = mast->l->offset;
+
+ mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
+ mas_set_split_parent(mast->l, l, r, &slot, split);
+
+ mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
+ mas_set_split_parent(mast->m, l, r, &slot, split);
+
+ mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
+ mas_set_split_parent(mast->r, l, r, &slot, split);
+}
+
+/*
+ * mas_wmb_replace() - Write memory barrier and replace
+ * @mas: The maple state
+ * @free: the maple topiary list of nodes to free
+ * @destroy: The maple topiary list of nodes to destroy (walk and free)
+ *
+ * Updates gap as necessary.
+ */
+static inline void mas_wmb_replace(struct ma_state *mas,
+ struct ma_topiary *free,
+ struct ma_topiary *destroy)
+{
+ /* All nodes must see old data as dead prior to replacing that data */
+ smp_wmb(); /* Needed for RCU */
+
+ /* Insert the new data in the tree */
+ mas_replace(mas, true);
+
+ if (!mte_is_leaf(mas->node))
+ mas_descend_adopt(mas);
+
+ mas_mat_free(mas, free);
+
+ if (destroy)
+ mas_mat_destroy(mas, destroy);
+
+ if (mte_is_leaf(mas->node))
+ return;
+
+ mas_update_gap(mas);
+}
+
+/*
+ * mast_new_root() - Set a new tree root during subtree creation
+ * @mast: The maple subtree state
+ * @mas: The maple state
+ */
+static inline void mast_new_root(struct maple_subtree_state *mast,
+ struct ma_state *mas)
+{
+ mas_mn(mast->l)->parent =
+ ma_parent_ptr(((unsigned long)mas->tree | MA_ROOT_PARENT));
+ if (!mte_dead_node(mast->orig_l->node) &&
+ !mte_is_root(mast->orig_l->node)) {
+ do {
+ mast_ascend_free(mast);
+ mast_topiary(mast);
+ } while (!mte_is_root(mast->orig_l->node));
+ }
+ if ((mast->orig_l->node != mas->node) &&
+ (mast->l->depth > mas_mt_height(mas))) {
+ mat_add(mast->free, mas->node);
+ }
+}
+
+/*
+ * mast_cp_to_nodes() - Copy data out to nodes.
+ * @mast: The maple subtree state
+ * @left: The left encoded maple node
+ * @middle: The middle encoded maple node
+ * @right: The right encoded maple node
+ * @split: The location to split between left and (middle ? middle : right)
+ * @mid_split: The location to split between middle and right.
+ */
+static inline void mast_cp_to_nodes(struct maple_subtree_state *mast,
+ struct maple_enode *left, struct maple_enode *middle,
+ struct maple_enode *right, unsigned char split, unsigned char mid_split)
+{
+ bool new_lmax = true;
+
+ mast->l->node = mte_node_or_none(left);
+ mast->m->node = mte_node_or_none(middle);
+ mast->r->node = mte_node_or_none(right);
+
+ mast->l->min = mast->orig_l->min;
+ if (split == mast->bn->b_end) {
+ mast->l->max = mast->orig_r->max;
+ new_lmax = false;
+ }
+
+ mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax);
+
+ if (middle) {
+ mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true);
+ mast->m->min = mast->bn->pivot[split] + 1;
+ split = mid_split;
+ }
+
+ mast->r->max = mast->orig_r->max;
+ if (right) {
+ mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false);
+ mast->r->min = mast->bn->pivot[split] + 1;
+ }
+}
+
+/*
+ * mast_combine_cp_left - Copy in the original left side of the tree into the
+ * combined data set in the maple subtree state big node.
+ * @mast: The maple subtree state
+ */
+static inline void mast_combine_cp_left(struct maple_subtree_state *mast)
+{
+ unsigned char l_slot = mast->orig_l->offset;
+
+ if (!l_slot)
+ return;
+
+ mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0);
+}
+
+/*
+ * mast_combine_cp_right: Copy in the original right side of the tree into the
+ * combined data set in the maple subtree state big node.
+ * @mast: The maple subtree state
+ */
+static inline void mast_combine_cp_right(struct maple_subtree_state *mast)
+{
+ if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max)
+ return;
+
+ mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1,
+ mt_slot_count(mast->orig_r->node), mast->bn,
+ mast->bn->b_end);
+ mast->orig_r->last = mast->orig_r->max;
+}
+
+/*
+ * mast_sufficient: Check if the maple subtree state has enough data in the big
+ * node to create at least one sufficient node
+ * @mast: the maple subtree state
+ */
+static inline bool mast_sufficient(struct maple_subtree_state *mast)
+{
+ if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node))
+ return true;
+
+ return false;
+}
+
+/*
+ * mast_overflow: Check if there is too much data in the subtree state for a
+ * single node.
+ * @mast: The maple subtree state
+ */
+static inline bool mast_overflow(struct maple_subtree_state *mast)
+{
+ if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node))
+ return true;
+
+ return false;
+}
+
+static inline void *mtree_range_walk(struct ma_state *mas)
+{
+ unsigned long *pivots;
+ unsigned char offset;
+ struct maple_node *node;
+ struct maple_enode *next, *last;
+ enum maple_type type;
+ void __rcu **slots;
+ unsigned char end;
+ unsigned long max, min;
+ unsigned long prev_max, prev_min;
+
+ next = mas->node;
+ min = mas->min;
+ max = mas->max;
+ do {
+ offset = 0;
+ last = next;
+ node = mte_to_node(next);
+ type = mte_node_type(next);
+ pivots = ma_pivots(node, type);
+ end = ma_data_end(node, type, pivots, max);
+ if (unlikely(ma_dead_node(node)))
+ goto dead_node;
+
+ if (pivots[offset] >= mas->index) {
+ prev_max = max;
+ prev_min = min;
+ max = pivots[offset];
+ goto next;
+ }
+
+ do {
+ offset++;
+ } while ((offset < end) && (pivots[offset] < mas->index));
+
+ prev_min = min;
+ min = pivots[offset - 1] + 1;
+ prev_max = max;
+ if (likely(offset < end && pivots[offset]))
+ max = pivots[offset];
+
+next:
+ slots = ma_slots(node, type);
+ next = mt_slot(mas->tree, slots, offset);
+ if (unlikely(ma_dead_node(node)))
+ goto dead_node;
+ } while (!ma_is_leaf(type));
+
+ mas->offset = offset;
+ mas->index = min;
+ mas->last = max;
+ mas->min = prev_min;
+ mas->max = prev_max;
+ mas->node = last;
+ return (void *) next;
+
+dead_node:
+ mas_reset(mas);
+ return NULL;
+}
+
+/*
+ * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers.
+ * @mas: The starting maple state
+ * @mast: The maple_subtree_state, keeps track of 4 maple states.
+ * @count: The estimated count of iterations needed.
+ *
+ * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root
+ * is hit. First @b_node is split into two entries which are inserted into the
+ * next iteration of the loop. @b_node is returned populated with the final
+ * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the
+ * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last
+ * to account of what has been copied into the new sub-tree. The update of
+ * orig_l_mas->last is used in mas_consume to find the slots that will need to
+ * be either freed or destroyed. orig_l_mas->depth keeps track of the height of
+ * the new sub-tree in case the sub-tree becomes the full tree.
+ *
+ * Return: the number of elements in b_node during the last loop.
+ */
+static int mas_spanning_rebalance(struct ma_state *mas,
+ struct maple_subtree_state *mast, unsigned char count)
+{
+ unsigned char split, mid_split;
+ unsigned char slot = 0;
+ struct maple_enode *left = NULL, *middle = NULL, *right = NULL;
+
+ MA_STATE(l_mas, mas->tree, mas->index, mas->index);
+ MA_STATE(r_mas, mas->tree, mas->index, mas->last);
+ MA_STATE(m_mas, mas->tree, mas->index, mas->index);
+ MA_TOPIARY(free, mas->tree);
+ MA_TOPIARY(destroy, mas->tree);
+
+ /*
+ * The tree needs to be rebalanced and leaves need to be kept at the same level.
+ * Rebalancing is done by use of the ``struct maple_topiary``.
+ */
+ mast->l = &l_mas;
+ mast->m = &m_mas;
+ mast->r = &r_mas;
+ mast->free = &free;
+ mast->destroy = &destroy;
+ l_mas.node = r_mas.node = m_mas.node = MAS_NONE;
+ if (!(mast->orig_l->min && mast->orig_r->max == ULONG_MAX) &&
+ unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type]))
+ mast_spanning_rebalance(mast);
+
+ mast->orig_l->depth = 0;
+
+ /*
+ * Each level of the tree is examined and balanced, pushing data to the left or
+ * right, or rebalancing against left or right nodes is employed to avoid
+ * rippling up the tree to limit the amount of churn. Once a new sub-section of
+ * the tree is created, there may be a mix of new and old nodes. The old nodes
+ * will have the incorrect parent pointers and currently be in two trees: the
+ * original tree and the partially new tree. To remedy the parent pointers in
+ * the old tree, the new data is swapped into the active tree and a walk down
+ * the tree is performed and the parent pointers are updated.
+ * See mas_descend_adopt() for more information..
+ */
+ while (count--) {
+ mast->bn->b_end--;
+ mast->bn->type = mte_node_type(mast->orig_l->node);
+ split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle,
+ &mid_split, mast->orig_l->min);
+ mast_set_split_parents(mast, left, middle, right, split,
+ mid_split);
+ mast_cp_to_nodes(mast, left, middle, right, split, mid_split);
+
+ /*
+ * Copy data from next level in the tree to mast->bn from next
+ * iteration
+ */
+ memset(mast->bn, 0, sizeof(struct maple_big_node));
+ mast->bn->type = mte_node_type(left);
+ mast->orig_l->depth++;
+
+ /* Root already stored in l->node. */
+ if (mas_is_root_limits(mast->l))
+ goto new_root;
+
+ mast_ascend_free(mast);
+ mast_combine_cp_left(mast);
+ l_mas.offset = mast->bn->b_end;
+ mab_set_b_end(mast->bn, &l_mas, left);
+ mab_set_b_end(mast->bn, &m_mas, middle);
+ mab_set_b_end(mast->bn, &r_mas, right);
+
+ /* Copy anything necessary out of the right node. */
+ mast_combine_cp_right(mast);
+ mast_topiary(mast);
+ mast->orig_l->last = mast->orig_l->max;
+
+ if (mast_sufficient(mast))
+ continue;
+
+ if (mast_overflow(mast))
+ continue;
+
+ /* May be a new root stored in mast->bn */
+ if (mas_is_root_limits(mast->orig_l))
+ break;
+
+ mast_spanning_rebalance(mast);
+
+ /* rebalancing from other nodes may require another loop. */
+ if (!count)
+ count++;
+ }
+
+ l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)),
+ mte_node_type(mast->orig_l->node));
+ mast->orig_l->depth++;
+ mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true);
+ mte_set_parent(left, l_mas.node, slot);
+ if (middle)
+ mte_set_parent(middle, l_mas.node, ++slot);
+
+ if (right)
+ mte_set_parent(right, l_mas.node, ++slot);
+
+ if (mas_is_root_limits(mast->l)) {
+new_root:
+ mast_new_root(mast, mas);
+ } else {
+ mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent;
+ }
+
+ if (!mte_dead_node(mast->orig_l->node))
+ mat_add(&free, mast->orig_l->node);
+
+ mas->depth = mast->orig_l->depth;
+ *mast->orig_l = l_mas;
+ mte_set_node_dead(mas->node);
+
+ /* Set up mas for insertion. */
+ mast->orig_l->depth = mas->depth;
+ mast->orig_l->alloc = mas->alloc;
+ *mas = *mast->orig_l;
+ mas_wmb_replace(mas, &free, &destroy);
+ mtree_range_walk(mas);
+ return mast->bn->b_end;
+}
+
+/*
+ * mas_rebalance() - Rebalance a given node.
+ * @mas: The maple state
+ * @b_node: The big maple node.
+ *
+ * Rebalance two nodes into a single node or two new nodes that are sufficient.
+ * Continue upwards until tree is sufficient.
+ *
+ * Return: the number of elements in b_node during the last loop.
+ */
+static inline int mas_rebalance(struct ma_state *mas,
+ struct maple_big_node *b_node)
+{
+ char empty_count = mas_mt_height(mas);
+ struct maple_subtree_state mast;
+ unsigned char shift, b_end = ++b_node->b_end;
+
+ MA_STATE(l_mas, mas->tree, mas->index, mas->last);
+ MA_STATE(r_mas, mas->tree, mas->index, mas->last);
+
+ trace_ma_op(__func__, mas);
+
+ /*
+ * Rebalancing occurs if a node is insufficient. Data is rebalanced
+ * against the node to the right if it exists, otherwise the node to the
+ * left of this node is rebalanced against this node. If rebalancing
+ * causes just one node to be produced instead of two, then the parent
+ * is also examined and rebalanced if it is insufficient. Every level
+ * tries to combine the data in the same way. If one node contains the
+ * entire range of the tree, then that node is used as a new root node.
+ */
+ mas_node_count(mas, 1 + empty_count * 3);
+ if (mas_is_err(mas))
+ return 0;
+
+ mast.orig_l = &l_mas;
+ mast.orig_r = &r_mas;
+ mast.bn = b_node;
+ mast.bn->type = mte_node_type(mas->node);
+
+ l_mas = r_mas = *mas;
+
+ if (mas_next_sibling(&r_mas)) {
+ mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end);
+ r_mas.last = r_mas.index = r_mas.max;
+ } else {
+ mas_prev_sibling(&l_mas);
+ shift = mas_data_end(&l_mas) + 1;
+ mab_shift_right(b_node, shift);
+ mas->offset += shift;
+ mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0);
+ b_node->b_end = shift + b_end;
+ l_mas.index = l_mas.last = l_mas.min;
+ }
+
+ return mas_spanning_rebalance(mas, &mast, empty_count);
+}
+
+/*
+ * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple
+ * state.
+ * @mas: The maple state
+ * @end: The end of the left-most node.
+ *
+ * During a mass-insert event (such as forking), it may be necessary to
+ * rebalance the left-most node when it is not sufficient.
+ */
+static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end)
+{
+ enum maple_type mt = mte_node_type(mas->node);
+ struct maple_node reuse, *newnode, *parent, *new_left, *left, *node;
+ struct maple_enode *eparent;
+ unsigned char offset, tmp, split = mt_slots[mt] / 2;
+ void __rcu **l_slots, **slots;
+ unsigned long *l_pivs, *pivs, gap;
+ bool in_rcu = mt_in_rcu(mas->tree);
+
+ MA_STATE(l_mas, mas->tree, mas->index, mas->last);
+
+ l_mas = *mas;
+ mas_prev_sibling(&l_mas);
+
+ /* set up node. */
+ if (in_rcu) {
+ /* Allocate for both left and right as well as parent. */
+ mas_node_count(mas, 3);
+ if (mas_is_err(mas))
+ return;
+
+ newnode = mas_pop_node(mas);
+ } else {
+ newnode = &reuse;
+ }
+
+ node = mas_mn(mas);
+ newnode->parent = node->parent;
+ slots = ma_slots(newnode, mt);
+ pivs = ma_pivots(newnode, mt);
+ left = mas_mn(&l_mas);
+ l_slots = ma_slots(left, mt);
+ l_pivs = ma_pivots(left, mt);
+ if (!l_slots[split])
+ split++;
+ tmp = mas_data_end(&l_mas) - split;
+
+ memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp);
+ memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp);
+ pivs[tmp] = l_mas.max;
+ memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end);
+ memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end);
+
+ l_mas.max = l_pivs[split];
+ mas->min = l_mas.max + 1;
+ eparent = mt_mk_node(mte_parent(l_mas.node),
+ mas_parent_enum(&l_mas, l_mas.node));
+ tmp += end;
+ if (!in_rcu) {
+ unsigned char max_p = mt_pivots[mt];
+ unsigned char max_s = mt_slots[mt];
+
+ if (tmp < max_p)
+ memset(pivs + tmp, 0,
+ sizeof(unsigned long *) * (max_p - tmp));
+
+ if (tmp < mt_slots[mt])
+ memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp));
+
+ memcpy(node, newnode, sizeof(struct maple_node));
+ ma_set_meta(node, mt, 0, tmp - 1);
+ mte_set_pivot(eparent, mte_parent_slot(l_mas.node),
+ l_pivs[split]);
+
+ /* Remove data from l_pivs. */
+ tmp = split + 1;
+ memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp));
+ memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp));
+ ma_set_meta(left, mt, 0, split);
+
+ goto done;
+ }
+
+ /* RCU requires replacing both l_mas, mas, and parent. */
+ mas->node = mt_mk_node(newnode, mt);
+ ma_set_meta(newnode, mt, 0, tmp);
+
+ new_left = mas_pop_node(mas);
+ new_left->parent = left->parent;
+ mt = mte_node_type(l_mas.node);
+ slots = ma_slots(new_left, mt);
+ pivs = ma_pivots(new_left, mt);
+ memcpy(slots, l_slots, sizeof(void *) * split);
+ memcpy(pivs, l_pivs, sizeof(unsigned long) * split);
+ ma_set_meta(new_left, mt, 0, split);
+ l_mas.node = mt_mk_node(new_left, mt);
+
+ /* replace parent. */
+ offset = mte_parent_slot(mas->node);
+ mt = mas_parent_enum(&l_mas, l_mas.node);
+ parent = mas_pop_node(mas);
+ slots = ma_slots(parent, mt);
+ pivs = ma_pivots(parent, mt);
+ memcpy(parent, mte_to_node(eparent), sizeof(struct maple_node));
+ rcu_assign_pointer(slots[offset], mas->node);
+ rcu_assign_pointer(slots[offset - 1], l_mas.node);
+ pivs[offset - 1] = l_mas.max;
+ eparent = mt_mk_node(parent, mt);
+done:
+ gap = mas_leaf_max_gap(mas);
+ mte_set_gap(eparent, mte_parent_slot(mas->node), gap);
+ gap = mas_leaf_max_gap(&l_mas);
+ mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap);
+ mas_ascend(mas);
+
+ if (in_rcu)
+ mas_replace(mas, false);
+
+ mas_update_gap(mas);
+}
+
+/*
+ * mas_split_final_node() - Split the final node in a subtree operation.
+ * @mast: the maple subtree state
+ * @mas: The maple state
+ * @height: The height of the tree in case it's a new root.
+ */
+static inline bool mas_split_final_node(struct maple_subtree_state *mast,
+ struct ma_state *mas, int height)
+{
+ struct maple_enode *ancestor;
+
+ if (mte_is_root(mas->node)) {
+ if (mt_is_alloc(mas->tree))
+ mast->bn->type = maple_arange_64;
+ else
+ mast->bn->type = maple_range_64;
+ mas->depth = height;
+ }
+ /*
+ * Only a single node is used here, could be root.
+ * The Big_node data should just fit in a single node.
+ */
+ ancestor = mas_new_ma_node(mas, mast->bn);
+ mte_set_parent(mast->l->node, ancestor, mast->l->offset);
+ mte_set_parent(mast->r->node, ancestor, mast->r->offset);
+ mte_to_node(ancestor)->parent = mas_mn(mas)->parent;
+
+ mast->l->node = ancestor;
+ mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true);
+ mas->offset = mast->bn->b_end - 1;
+ return true;
+}
+
+/*
+ * mast_fill_bnode() - Copy data into the big node in the subtree state
+ * @mast: The maple subtree state
+ * @mas: the maple state
+ * @skip: The number of entries to skip for new nodes insertion.
+ */
+static inline void mast_fill_bnode(struct maple_subtree_state *mast,
+ struct ma_state *mas,
+ unsigned char skip)
+{
+ bool cp = true;
+ struct maple_enode *old = mas->node;
+ unsigned char split;
+
+ memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap));
+ memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot));
+ memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot));
+ mast->bn->b_end = 0;
+
+ if (mte_is_root(mas->node)) {
+ cp = false;
+ } else {
+ mas_ascend(mas);
+ mat_add(mast->free, old);
+ mas->offset = mte_parent_slot(mas->node);
+ }
+
+ if (cp && mast->l->offset)
+ mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0);
+
+ split = mast->bn->b_end;
+ mab_set_b_end(mast->bn, mast->l, mast->l->node);
+ mast->r->offset = mast->bn->b_end;
+ mab_set_b_end(mast->bn, mast->r, mast->r->node);
+ if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max)
+ cp = false;
+
+ if (cp)
+ mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1,
+ mast->bn, mast->bn->b_end);
+
+ mast->bn->b_end--;
+ mast->bn->type = mte_node_type(mas->node);
+}
+
+/*
+ * mast_split_data() - Split the data in the subtree state big node into regular
+ * nodes.
+ * @mast: The maple subtree state
+ * @mas: The maple state
+ * @split: The location to split the big node
+ */
+static inline void mast_split_data(struct maple_subtree_state *mast,
+ struct ma_state *mas, unsigned char split)
+{
+ unsigned char p_slot;
+
+ mab_mas_cp(mast->bn, 0, split, mast->l, true);
+ mte_set_pivot(mast->r->node, 0, mast->r->max);
+ mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false);
+ mast->l->offset = mte_parent_slot(mas->node);
+ mast->l->max = mast->bn->pivot[split];
+ mast->r->min = mast->l->max + 1;
+ if (mte_is_leaf(mas->node))
+ return;
+
+ p_slot = mast->orig_l->offset;
+ mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node,
+ &p_slot, split);
+ mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node,
+ &p_slot, split);
+}
+
+/*
+ * mas_push_data() - Instead of splitting a node, it is beneficial to push the
+ * data to the right or left node if there is room.
+ * @mas: The maple state
+ * @height: The current height of the maple state
+ * @mast: The maple subtree state
+ * @left: Push left or not.
+ *
+ * Keeping the height of the tree low means faster lookups.
+ *
+ * Return: True if pushed, false otherwise.
+ */
+static inline bool mas_push_data(struct ma_state *mas, int height,
+ struct maple_subtree_state *mast, bool left)
+{
+ unsigned char slot_total = mast->bn->b_end;
+ unsigned char end, space, split;
+
+ MA_STATE(tmp_mas, mas->tree, mas->index, mas->last);
+ tmp_mas = *mas;
+ tmp_mas.depth = mast->l->depth;
+
+ if (left && !mas_prev_sibling(&tmp_mas))
+ return false;
+ else if (!left && !mas_next_sibling(&tmp_mas))
+ return false;
+
+ end = mas_data_end(&tmp_mas);
+ slot_total += end;
+ space = 2 * mt_slot_count(mas->node) - 2;
+ /* -2 instead of -1 to ensure there isn't a triple split */
+ if (ma_is_leaf(mast->bn->type))
+ space--;
+
+ if (mas->max == ULONG_MAX)
+ space--;
+
+ if (slot_total >= space)
+ return false;
+
+ /* Get the data; Fill mast->bn */
+ mast->bn->b_end++;
+ if (left) {
+ mab_shift_right(mast->bn, end + 1);
+ mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0);
+ mast->bn->b_end = slot_total + 1;
+ } else {
+ mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end);
+ }
+
+ /* Configure mast for splitting of mast->bn */
+ split = mt_slots[mast->bn->type] - 2;
+ if (left) {
+ /* Switch mas to prev node */
+ mat_add(mast->free, mas->node);
+ *mas = tmp_mas;
+ /* Start using mast->l for the left side. */
+ tmp_mas.node = mast->l->node;
+ *mast->l = tmp_mas;
+ } else {
+ mat_add(mast->free, tmp_mas.node);
+ tmp_mas.node = mast->r->node;
+ *mast->r = tmp_mas;
+ split = slot_total - split;
+ }
+ split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]);
+ /* Update parent slot for split calculation. */
+ if (left)
+ mast->orig_l->offset += end + 1;
+
+ mast_split_data(mast, mas, split);
+ mast_fill_bnode(mast, mas, 2);
+ mas_split_final_node(mast, mas, height + 1);
+ return true;
+}
+
+/*
+ * mas_split() - Split data that is too big for one node into two.
+ * @mas: The maple state
+ * @b_node: The maple big node
+ * Return: 1 on success, 0 on failure.
+ */
+static int mas_split(struct ma_state *mas, struct maple_big_node *b_node)
+{
+
+ struct maple_subtree_state mast;
+ int height = 0;
+ unsigned char mid_split, split = 0;
+
+ /*
+ * Splitting is handled differently from any other B-tree; the Maple
+ * Tree splits upwards. Splitting up means that the split operation
+ * occurs when the walk of the tree hits the leaves and not on the way
+ * down. The reason for splitting up is that it is impossible to know
+ * how much space will be needed until the leaf is (or leaves are)
+ * reached. Since overwriting data is allowed and a range could
+ * overwrite more than one range or result in changing one entry into 3
+ * entries, it is impossible to know if a split is required until the
+ * data is examined.
+ *
+ * Splitting is a balancing act between keeping allocations to a minimum
+ * and avoiding a 'jitter' event where a tree is expanded to make room
+ * for an entry followed by a contraction when the entry is removed. To
+ * accomplish the balance, there are empty slots remaining in both left
+ * and right nodes after a split.
+ */
+ MA_STATE(l_mas, mas->tree, mas->index, mas->last);
+ MA_STATE(r_mas, mas->tree, mas->index, mas->last);
+ MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last);
+ MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last);
+ MA_TOPIARY(mat, mas->tree);
+
+ trace_ma_op(__func__, mas);
+ mas->depth = mas_mt_height(mas);
+ /* Allocation failures will happen early. */
+ mas_node_count(mas, 1 + mas->depth * 2);
+ if (mas_is_err(mas))
+ return 0;
+
+ mast.l = &l_mas;
+ mast.r = &r_mas;
+ mast.orig_l = &prev_l_mas;
+ mast.orig_r = &prev_r_mas;
+ mast.free = &mat;
+ mast.bn = b_node;
+
+ while (height++ <= mas->depth) {
+ if (mt_slots[b_node->type] > b_node->b_end) {
+ mas_split_final_node(&mast, mas, height);
+ break;
+ }
+
+ l_mas = r_mas = *mas;
+ l_mas.node = mas_new_ma_node(mas, b_node);
+ r_mas.node = mas_new_ma_node(mas, b_node);
+ /*
+ * Another way that 'jitter' is avoided is to terminate a split up early if the
+ * left or right node has space to spare. This is referred to as "pushing left"
+ * or "pushing right" and is similar to the B* tree, except the nodes left or
+ * right can rarely be reused due to RCU, but the ripple upwards is halted which
+ * is a significant savings.
+ */
+ /* Try to push left. */
+ if (mas_push_data(mas, height, &mast, true))
+ break;
+
+ /* Try to push right. */
+ if (mas_push_data(mas, height, &mast, false))
+ break;
+
+ split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min);
+ mast_split_data(&mast, mas, split);
+ /*
+ * Usually correct, mab_mas_cp in the above call overwrites
+ * r->max.
+ */
+ mast.r->max = mas->max;
+ mast_fill_bnode(&mast, mas, 1);
+ prev_l_mas = *mast.l;
+ prev_r_mas = *mast.r;
+ }
+
+ /* Set the original node as dead */
+ mat_add(mast.free, mas->node);
+ mas->node = l_mas.node;
+ mas_wmb_replace(mas, mast.free, NULL);
+ mtree_range_walk(mas);
+ return 1;
+}
+
+/*
+ * mas_reuse_node() - Reuse the node to store the data.
+ * @wr_mas: The maple write state
+ * @bn: The maple big node
+ * @end: The end of the data.
+ *
+ * Will always return false in RCU mode.
+ *
+ * Return: True if node was reused, false otherwise.
+ */
+static inline bool mas_reuse_node(struct ma_wr_state *wr_mas,
+ struct maple_big_node *bn, unsigned char end)
+{
+ /* Need to be rcu safe. */
+ if (mt_in_rcu(wr_mas->mas->tree))
+ return false;
+
+ if (end > bn->b_end) {
+ int clear = mt_slots[wr_mas->type] - bn->b_end;
+
+ memset(wr_mas->slots + bn->b_end, 0, sizeof(void *) * clear--);
+ memset(wr_mas->pivots + bn->b_end, 0, sizeof(void *) * clear);
+ }
+ mab_mas_cp(bn, 0, bn->b_end, wr_mas->mas, false);
+ return true;
+}
+
+/*
+ * mas_commit_b_node() - Commit the big node into the tree.
+ * @wr_mas: The maple write state
+ * @b_node: The maple big node
+ * @end: The end of the data.
+ */
+static inline int mas_commit_b_node(struct ma_wr_state *wr_mas,
+ struct maple_big_node *b_node, unsigned char end)
+{
+ struct maple_node *node;
+ unsigned char b_end = b_node->b_end;
+ enum maple_type b_type = b_node->type;
+
+ if ((b_end < mt_min_slots[b_type]) &&
+ (!mte_is_root(wr_mas->mas->node)) &&
+ (mas_mt_height(wr_mas->mas) > 1))
+ return mas_rebalance(wr_mas->mas, b_node);
+
+ if (b_end >= mt_slots[b_type])
+ return mas_split(wr_mas->mas, b_node);
+
+ if (mas_reuse_node(wr_mas, b_node, end))
+ goto reuse_node;
+
+ mas_node_count(wr_mas->mas, 1);
+ if (mas_is_err(wr_mas->mas))
+ return 0;
+
+ node = mas_pop_node(wr_mas->mas);
+ node->parent = mas_mn(wr_mas->mas)->parent;
+ wr_mas->mas->node = mt_mk_node(node, b_type);
+ mab_mas_cp(b_node, 0, b_end, wr_mas->mas, false);
+ mas_replace(wr_mas->mas, false);
+reuse_node:
+ mas_update_gap(wr_mas->mas);
+ return 1;
+}
+
+/*
+ * mas_root_expand() - Expand a root to a node
+ * @mas: The maple state
+ * @entry: The entry to store into the tree
+ */
+static inline int mas_root_expand(struct ma_state *mas, void *entry)
+{
+ void *contents = mas_root_locked(mas);
+ enum maple_type type = maple_leaf_64;
+ struct maple_node *node;
+ void __rcu **slots;
+ unsigned long *pivots;
+ int slot = 0;
+
+ mas_node_count(mas, 1);
+ if (unlikely(mas_is_err(mas)))
+ return 0;
+
+ node = mas_pop_node(mas);
+ pivots = ma_pivots(node, type);
+ slots = ma_slots(node, type);
+ node->parent = ma_parent_ptr(
+ ((unsigned long)mas->tree | MA_ROOT_PARENT));
+ mas->node = mt_mk_node(node, type);
+
+ if (mas->index) {
+ if (contents) {
+ rcu_assign_pointer(slots[slot], contents);
+ if (likely(mas->index > 1))
+ slot++;
+ }
+ pivots[slot++] = mas->index - 1;
+ }
+
+ rcu_assign_pointer(slots[slot], entry);
+ mas->offset = slot;
+ pivots[slot] = mas->last;
+ if (mas->last != ULONG_MAX)
+ slot++;
+ mas->depth = 1;
+ mas_set_height(mas);
+
+ /* swap the new root into the tree */
+ rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
+ ma_set_meta(node, maple_leaf_64, 0, slot);
+ return slot;
+}
+
+static inline void mas_store_root(struct ma_state *mas, void *entry)
+{
+ if (likely((mas->last != 0) || (mas->index != 0)))
+ mas_root_expand(mas, entry);
+ else if (((unsigned long) (entry) & 3) == 2)
+ mas_root_expand(mas, entry);
+ else {
+ rcu_assign_pointer(mas->tree->ma_root, entry);
+ mas->node = MAS_START;
+ }
+}
+
+/*
+ * mas_is_span_wr() - Check if the write needs to be treated as a write that
+ * spans the node.
+ * @mas: The maple state
+ * @piv: The pivot value being written
+ * @type: The maple node type
+ * @entry: The data to write
+ *
+ * Spanning writes are writes that start in one node and end in another OR if
+ * the write of a %NULL will cause the node to end with a %NULL.
+ *
+ * Return: True if this is a spanning write, false otherwise.
+ */
+static bool mas_is_span_wr(struct ma_wr_state *wr_mas)
+{
+ unsigned long max;
+ unsigned long last = wr_mas->mas->last;
+ unsigned long piv = wr_mas->r_max;
+ enum maple_type type = wr_mas->type;
+ void *entry = wr_mas->entry;
+
+ /* Contained in this pivot */
+ if (piv > last)
+ return false;
+
+ max = wr_mas->mas->max;
+ if (unlikely(ma_is_leaf(type))) {
+ /* Fits in the node, but may span slots. */
+ if (last < max)
+ return false;
+
+ /* Writes to the end of the node but not null. */
+ if ((last == max) && entry)
+ return false;
+
+ /*
+ * Writing ULONG_MAX is not a spanning write regardless of the
+ * value being written as long as the range fits in the node.
+ */
+ if ((last == ULONG_MAX) && (last == max))
+ return false;
+ } else if (piv == last) {
+ if (entry)
+ return false;
+
+ /* Detect spanning store wr walk */
+ if (last == ULONG_MAX)
+ return false;
+ }
+
+ trace_ma_write(__func__, wr_mas->mas, piv, entry);
+
+ return true;
+}
+
+static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas)
+{
+ wr_mas->type = mte_node_type(wr_mas->mas->node);
+ mas_wr_node_walk(wr_mas);
+ wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type);
+}
+
+static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas)
+{
+ wr_mas->mas->max = wr_mas->r_max;
+ wr_mas->mas->min = wr_mas->r_min;
+ wr_mas->mas->node = wr_mas->content;
+ wr_mas->mas->offset = 0;
+ wr_mas->mas->depth++;
+}
+/*
+ * mas_wr_walk() - Walk the tree for a write.
+ * @wr_mas: The maple write state
+ *
+ * Uses mas_slot_locked() and does not need to worry about dead nodes.
+ *
+ * Return: True if it's contained in a node, false on spanning write.
+ */
+static bool mas_wr_walk(struct ma_wr_state *wr_mas)
+{
+ struct ma_state *mas = wr_mas->mas;
+
+ while (true) {
+ mas_wr_walk_descend(wr_mas);
+ if (unlikely(mas_is_span_wr(wr_mas)))
+ return false;
+
+ wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
+ mas->offset);
+ if (ma_is_leaf(wr_mas->type))
+ return true;
+
+ mas_wr_walk_traverse(wr_mas);
+ }
+
+ return true;
+}
+
+static bool mas_wr_walk_index(struct ma_wr_state *wr_mas)
+{
+ struct ma_state *mas = wr_mas->mas;
+
+ while (true) {
+ mas_wr_walk_descend(wr_mas);
+ wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
+ mas->offset);
+ if (ma_is_leaf(wr_mas->type))
+ return true;
+ mas_wr_walk_traverse(wr_mas);
+
+ }
+ return true;
+}
+/*
+ * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs.
+ * @l_wr_mas: The left maple write state
+ * @r_wr_mas: The right maple write state
+ */
+static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas,
+ struct ma_wr_state *r_wr_mas)
+{
+ struct ma_state *r_mas = r_wr_mas->mas;
+ struct ma_state *l_mas = l_wr_mas->mas;
+ unsigned char l_slot;
+
+ l_slot = l_mas->offset;
+ if (!l_wr_mas->content)
+ l_mas->index = l_wr_mas->r_min;
+
+ if ((l_mas->index == l_wr_mas->r_min) &&
+ (l_slot &&
+ !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) {
+ if (l_slot > 1)
+ l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1;
+ else
+ l_mas->index = l_mas->min;
+
+ l_mas->offset = l_slot - 1;
+ }
+
+ if (!r_wr_mas->content) {
+ if (r_mas->last < r_wr_mas->r_max)
+ r_mas->last = r_wr_mas->r_max;
+ r_mas->offset++;
+ } else if ((r_mas->last == r_wr_mas->r_max) &&
+ (r_mas->last < r_mas->max) &&
+ !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) {
+ r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots,
+ r_wr_mas->type, r_mas->offset + 1);
+ r_mas->offset++;
+ }
+}
+
+static inline void *mas_state_walk(struct ma_state *mas)
+{
+ void *entry;
+
+ entry = mas_start(mas);
+ if (mas_is_none(mas))
+ return NULL;
+
+ if (mas_is_ptr(mas))
+ return entry;
+
+ return mtree_range_walk(mas);
+}
+
+/*
+ * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up
+ * to date.
+ *
+ * @mas: The maple state.
+ *
+ * Note: Leaves mas in undesirable state.
+ * Return: The entry for @mas->index or %NULL on dead node.
+ */
+static inline void *mtree_lookup_walk(struct ma_state *mas)
+{
+ unsigned long *pivots;
+ unsigned char offset;
+ struct maple_node *node;
+ struct maple_enode *next;
+ enum maple_type type;
+ void __rcu **slots;
+ unsigned char end;
+ unsigned long max;
+
+ next = mas->node;
+ max = ULONG_MAX;
+ do {
+ offset = 0;
+ node = mte_to_node(next);
+ type = mte_node_type(next);
+ pivots = ma_pivots(node, type);
+ end = ma_data_end(node, type, pivots, max);
+ if (unlikely(ma_dead_node(node)))
+ goto dead_node;
+
+ if (pivots[offset] >= mas->index)
+ goto next;
+
+ do {
+ offset++;
+ } while ((offset < end) && (pivots[offset] < mas->index));
+
+ if (likely(offset > end))
+ max = pivots[offset];
+
+next:
+ slots = ma_slots(node, type);
+ next = mt_slot(mas->tree, slots, offset);
+ if (unlikely(ma_dead_node(node)))
+ goto dead_node;
+ } while (!ma_is_leaf(type));
+
+ return (void *) next;
+
+dead_node:
+ mas_reset(mas);
+ return NULL;
+}
+
+/*
+ * mas_new_root() - Create a new root node that only contains the entry passed
+ * in.
+ * @mas: The maple state
+ * @entry: The entry to store.
+ *
+ * Only valid when the index == 0 and the last == ULONG_MAX
+ *
+ * Return 0 on error, 1 on success.
+ */
+static inline int mas_new_root(struct ma_state *mas, void *entry)
+{
+ struct maple_enode *root = mas_root_locked(mas);
+ enum maple_type type = maple_leaf_64;
+ struct maple_node *node;
+ void __rcu **slots;
+ unsigned long *pivots;
+
+ if (!entry && !mas->index && mas->last == ULONG_MAX) {
+ mas->depth = 0;
+ mas_set_height(mas);
+ rcu_assign_pointer(mas->tree->ma_root, entry);
+ mas->node = MAS_START;
+ goto done;
+ }
+
+ mas_node_count(mas, 1);
+ if (mas_is_err(mas))
+ return 0;
+
+ node = mas_pop_node(mas);
+ pivots = ma_pivots(node, type);
+ slots = ma_slots(node, type);
+ node->parent = ma_parent_ptr(
+ ((unsigned long)mas->tree | MA_ROOT_PARENT));
+ mas->node = mt_mk_node(node, type);
+ rcu_assign_pointer(slots[0], entry);
+ pivots[0] = mas->last;
+ mas->depth = 1;
+ mas_set_height(mas);
+ rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
+
+done:
+ if (xa_is_node(root))
+ mte_destroy_walk(root, mas->tree);
+
+ return 1;
+}
+/*
+ * mas_wr_spanning_store() - Create a subtree with the store operation completed
+ * and new nodes where necessary, then place the sub-tree in the actual tree.
+ * Note that mas is expected to point to the node which caused the store to
+ * span.
+ * @wr_mas: The maple write state
+ *
+ * Return: 0 on error, positive on success.
+ */
+static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
+{
+ struct maple_subtree_state mast;
+ struct maple_big_node b_node;
+ struct ma_state *mas;
+ unsigned char height;
+
+ /* Left and Right side of spanning store */
+ MA_STATE(l_mas, NULL, 0, 0);
+ MA_STATE(r_mas, NULL, 0, 0);
+
+ MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry);
+ MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry);
+
+ /*
+ * A store operation that spans multiple nodes is called a spanning
+ * store and is handled early in the store call stack by the function
+ * mas_is_span_wr(). When a spanning store is identified, the maple
+ * state is duplicated. The first maple state walks the left tree path
+ * to ``index``, the duplicate walks the right tree path to ``last``.
+ * The data in the two nodes are combined into a single node, two nodes,
+ * or possibly three nodes (see the 3-way split above). A ``NULL``
+ * written to the last entry of a node is considered a spanning store as
+ * a rebalance is required for the operation to complete and an overflow
+ * of data may happen.
+ */
+ mas = wr_mas->mas;
+ trace_ma_op(__func__, mas);
+
+ if (unlikely(!mas->index && mas->last == ULONG_MAX))
+ return mas_new_root(mas, wr_mas->entry);
+ /*
+ * Node rebalancing may occur due to this store, so there may be three new
+ * entries per level plus a new root.
+ */
+ height = mas_mt_height(mas);
+ mas_node_count(mas, 1 + height * 3);
+ if (mas_is_err(mas))
+ return 0;
+
+ /*
+ * Set up right side. Need to get to the next offset after the spanning
+ * store to ensure it's not NULL and to combine both the next node and
+ * the node with the start together.
+ */
+ r_mas = *mas;
+ /* Avoid overflow, walk to next slot in the tree. */
+ if (r_mas.last + 1)
+ r_mas.last++;
+
+ r_mas.index = r_mas.last;
+ mas_wr_walk_index(&r_wr_mas);
+ r_mas.last = r_mas.index = mas->last;
+
+ /* Set up left side. */
+ l_mas = *mas;
+ mas_wr_walk_index(&l_wr_mas);
+
+ if (!wr_mas->entry) {
+ mas_extend_spanning_null(&l_wr_mas, &r_wr_mas);
+ mas->offset = l_mas.offset;
+ mas->index = l_mas.index;
+ mas->last = l_mas.last = r_mas.last;
+ }
+
+ /* expanding NULLs may make this cover the entire range */
+ if (!l_mas.index && r_mas.last == ULONG_MAX) {
+ mas_set_range(mas, 0, ULONG_MAX);
+ return mas_new_root(mas, wr_mas->entry);
+ }
+
+ memset(&b_node, 0, sizeof(struct maple_big_node));
+ /* Copy l_mas and store the value in b_node. */
+ mas_store_b_node(&l_wr_mas, &b_node, l_wr_mas.node_end);
+ /* Copy r_mas into b_node. */
+ if (r_mas.offset <= r_wr_mas.node_end)
+ mas_mab_cp(&r_mas, r_mas.offset, r_wr_mas.node_end,
+ &b_node, b_node.b_end + 1);
+ else
+ b_node.b_end++;
+
+ /* Stop spanning searches by searching for just index. */
+ l_mas.index = l_mas.last = mas->index;
+
+ mast.bn = &b_node;
+ mast.orig_l = &l_mas;
+ mast.orig_r = &r_mas;
+ /* Combine l_mas and r_mas and split them up evenly again. */
+ return mas_spanning_rebalance(mas, &mast, height + 1);
+}
+
+/*
+ * mas_wr_node_store() - Attempt to store the value in a node
+ * @wr_mas: The maple write state
+ *
+ * Attempts to reuse the node, but may allocate.
+ *
+ * Return: True if stored, false otherwise
+ */
+static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas)
+{
+ struct ma_state *mas = wr_mas->mas;
+ void __rcu **dst_slots;
+ unsigned long *dst_pivots;
+ unsigned char dst_offset;
+ unsigned char new_end = wr_mas->node_end;
+ unsigned char offset;
+ unsigned char node_slots = mt_slots[wr_mas->type];
+ struct maple_node reuse, *newnode;
+ unsigned char copy_size, max_piv = mt_pivots[wr_mas->type];
+ bool in_rcu = mt_in_rcu(mas->tree);
+
+ offset = mas->offset;
+ if (mas->last == wr_mas->r_max) {
+ /* runs right to the end of the node */
+ if (mas->last == mas->max)
+ new_end = offset;
+ /* don't copy this offset */
+ wr_mas->offset_end++;
+ } else if (mas->last < wr_mas->r_max) {
+ /* new range ends in this range */
+ if (unlikely(wr_mas->r_max == ULONG_MAX))
+ mas_bulk_rebalance(mas, wr_mas->node_end, wr_mas->type);
+
+ new_end++;
+ } else {
+ if (wr_mas->end_piv == mas->last)
+ wr_mas->offset_end++;
+
+ new_end -= wr_mas->offset_end - offset - 1;
+ }
+
+ /* new range starts within a range */
+ if (wr_mas->r_min < mas->index)
+ new_end++;
+
+ /* Not enough room */
+ if (new_end >= node_slots)
+ return false;
+
+ /* Not enough data. */
+ if (!mte_is_root(mas->node) && (new_end <= mt_min_slots[wr_mas->type]) &&
+ !(mas->mas_flags & MA_STATE_BULK))
+ return false;
+
+ /* set up node. */
+ if (in_rcu) {
+ mas_node_count(mas, 1);
+ if (mas_is_err(mas))
+ return false;
+
+ newnode = mas_pop_node(mas);
+ } else {
+ memset(&reuse, 0, sizeof(struct maple_node));
+ newnode = &reuse;
+ }
+
+ newnode->parent = mas_mn(mas)->parent;
+ dst_pivots = ma_pivots(newnode, wr_mas->type);
+ dst_slots = ma_slots(newnode, wr_mas->type);
+ /* Copy from start to insert point */
+ memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * (offset + 1));
+ memcpy(dst_slots, wr_mas->slots, sizeof(void *) * (offset + 1));
+ dst_offset = offset;
+
+ /* Handle insert of new range starting after old range */
+ if (wr_mas->r_min < mas->index) {
+ mas->offset++;
+ rcu_assign_pointer(dst_slots[dst_offset], wr_mas->content);
+ dst_pivots[dst_offset++] = mas->index - 1;
+ }
+
+ /* Store the new entry and range end. */
+ if (dst_offset < max_piv)
+ dst_pivots[dst_offset] = mas->last;
+ mas->offset = dst_offset;
+ rcu_assign_pointer(dst_slots[dst_offset], wr_mas->entry);
+
+ /*
+ * this range wrote to the end of the node or it overwrote the rest of
+ * the data
+ */
+ if (wr_mas->offset_end > wr_mas->node_end || mas->last >= mas->max) {
+ new_end = dst_offset;
+ goto done;
+ }
+
+ dst_offset++;
+ /* Copy to the end of node if necessary. */
+ copy_size = wr_mas->node_end - wr_mas->offset_end + 1;
+ memcpy(dst_slots + dst_offset, wr_mas->slots + wr_mas->offset_end,
+ sizeof(void *) * copy_size);
+ if (dst_offset < max_piv) {
+ if (copy_size > max_piv - dst_offset)
+ copy_size = max_piv - dst_offset;
+
+ memcpy(dst_pivots + dst_offset,
+ wr_mas->pivots + wr_mas->offset_end,
+ sizeof(unsigned long) * copy_size);
+ }
+
+ if ((wr_mas->node_end == node_slots - 1) && (new_end < node_slots - 1))
+ dst_pivots[new_end] = mas->max;
+
+done:
+ mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end);
+ if (in_rcu) {
+ mas->node = mt_mk_node(newnode, wr_mas->type);
+ mas_replace(mas, false);
+ } else {
+ memcpy(wr_mas->node, newnode, sizeof(struct maple_node));
+ }
+ trace_ma_write(__func__, mas, 0, wr_mas->entry);
+ mas_update_gap(mas);
+ return true;
+}
+
+/*
+ * mas_wr_slot_store: Attempt to store a value in a slot.
+ * @wr_mas: the maple write state
+ *
+ * Return: True if stored, false otherwise
+ */
+static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas)
+{
+ struct ma_state *mas = wr_mas->mas;
+ unsigned long lmax; /* Logical max. */
+ unsigned char offset = mas->offset;
+
+ if ((wr_mas->r_max > mas->last) && ((wr_mas->r_min != mas->index) ||
+ (offset != wr_mas->node_end)))
+ return false;
+
+ if (offset == wr_mas->node_end - 1)
+ lmax = mas->max;
+ else
+ lmax = wr_mas->pivots[offset + 1];
+
+ /* going to overwrite too many slots. */
+ if (lmax < mas->last)
+ return false;
+
+ if (wr_mas->r_min == mas->index) {
+ /* overwriting two or more ranges with one. */
+ if (lmax == mas->last)
+ return false;
+
+ /* Overwriting all of offset and a portion of offset + 1. */
+ rcu_assign_pointer(wr_mas->slots[offset], wr_mas->entry);
+ wr_mas->pivots[offset] = mas->last;
+ goto done;
+ }
+
+ /* Doesn't end on the next range end. */
+ if (lmax != mas->last)
+ return false;
+
+ /* Overwriting a portion of offset and all of offset + 1 */
+ if ((offset + 1 < mt_pivots[wr_mas->type]) &&
+ (wr_mas->entry || wr_mas->pivots[offset + 1]))
+ wr_mas->pivots[offset + 1] = mas->last;
+
+ rcu_assign_pointer(wr_mas->slots[offset + 1], wr_mas->entry);
+ wr_mas->pivots[offset] = mas->index - 1;
+ mas->offset++; /* Keep mas accurate. */
+
+done:
+ trace_ma_write(__func__, mas, 0, wr_mas->entry);
+ mas_update_gap(mas);
+ return true;
+}
+
+static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas)
+{
+ while ((wr_mas->mas->last > wr_mas->end_piv) &&
+ (wr_mas->offset_end < wr_mas->node_end))
+ wr_mas->end_piv = wr_mas->pivots[++wr_mas->offset_end];
+
+ if (wr_mas->mas->last > wr_mas->end_piv)
+ wr_mas->end_piv = wr_mas->mas->max;
+}
+
+static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas)
+{
+ struct ma_state *mas = wr_mas->mas;
+
+ if (mas->last < wr_mas->end_piv && !wr_mas->slots[wr_mas->offset_end])
+ mas->last = wr_mas->end_piv;
+
+ /* Check next slot(s) if we are overwriting the end */
+ if ((mas->last == wr_mas->end_piv) &&
+ (wr_mas->node_end != wr_mas->offset_end) &&
+ !wr_mas->slots[wr_mas->offset_end + 1]) {
+ wr_mas->offset_end++;
+ if (wr_mas->offset_end == wr_mas->node_end)
+ mas->last = mas->max;
+ else
+ mas->last = wr_mas->pivots[wr_mas->offset_end];
+ wr_mas->end_piv = mas->last;
+ }
+
+ if (!wr_mas->content) {
+ /* If this one is null, the next and prev are not */
+ mas->index = wr_mas->r_min;
+ } else {
+ /* Check prev slot if we are overwriting the start */
+ if (mas->index == wr_mas->r_min && mas->offset &&
+ !wr_mas->slots[mas->offset - 1]) {
+ mas->offset--;
+ wr_mas->r_min = mas->index =
+ mas_safe_min(mas, wr_mas->pivots, mas->offset);
+ wr_mas->r_max = wr_mas->pivots[mas->offset];
+ }
+ }
+}
+
+static inline bool mas_wr_append(struct ma_wr_state *wr_mas)
+{
+ unsigned char end = wr_mas->node_end;
+ unsigned char new_end = end + 1;
+ struct ma_state *mas = wr_mas->mas;
+ unsigned char node_pivots = mt_pivots[wr_mas->type];
+
+ if ((mas->index != wr_mas->r_min) && (mas->last == wr_mas->r_max)) {
+ if (new_end < node_pivots)
+ wr_mas->pivots[new_end] = wr_mas->pivots[end];
+
+ if (new_end < node_pivots)
+ ma_set_meta(wr_mas->node, maple_leaf_64, 0, new_end);
+
+ rcu_assign_pointer(wr_mas->slots[new_end], wr_mas->entry);
+ mas->offset = new_end;
+ wr_mas->pivots[end] = mas->index - 1;
+
+ return true;
+ }
+
+ if ((mas->index == wr_mas->r_min) && (mas->last < wr_mas->r_max)) {
+ if (new_end < node_pivots)
+ wr_mas->pivots[new_end] = wr_mas->pivots[end];
+
+ rcu_assign_pointer(wr_mas->slots[new_end], wr_mas->content);
+ if (new_end < node_pivots)
+ ma_set_meta(wr_mas->node, maple_leaf_64, 0, new_end);
+
+ wr_mas->pivots[end] = mas->last;
+ rcu_assign_pointer(wr_mas->slots[end], wr_mas->entry);
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * mas_wr_bnode() - Slow path for a modification.
+ * @wr_mas: The write maple state
+ *
+ * This is where split, rebalance end up.
+ */
+static void mas_wr_bnode(struct ma_wr_state *wr_mas)
+{
+ struct maple_big_node b_node;
+
+ trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry);
+ memset(&b_node, 0, sizeof(struct maple_big_node));
+ mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end);
+ mas_commit_b_node(wr_mas, &b_node, wr_mas->node_end);
+}
+
+static inline void mas_wr_modify(struct ma_wr_state *wr_mas)
+{
+ unsigned char node_slots;
+ unsigned char node_size;
+ struct ma_state *mas = wr_mas->mas;
+
+ /* Direct replacement */
+ if (wr_mas->r_min == mas->index && wr_mas->r_max == mas->last) {
+ rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry);
+ if (!!wr_mas->entry ^ !!wr_mas->content)
+ mas_update_gap(mas);
+ return;
+ }
+
+ /* Attempt to append */
+ node_slots = mt_slots[wr_mas->type];
+ node_size = wr_mas->node_end - wr_mas->offset_end + mas->offset + 2;
+ if (mas->max == ULONG_MAX)
+ node_size++;
+
+ /* slot and node store will not fit, go to the slow path */
+ if (unlikely(node_size >= node_slots))
+ goto slow_path;
+
+ if (wr_mas->entry && (wr_mas->node_end < node_slots - 1) &&
+ (mas->offset == wr_mas->node_end) && mas_wr_append(wr_mas)) {
+ if (!wr_mas->content || !wr_mas->entry)
+ mas_update_gap(mas);
+ return;
+ }
+
+ if ((wr_mas->offset_end - mas->offset <= 1) && mas_wr_slot_store(wr_mas))
+ return;
+ else if (mas_wr_node_store(wr_mas))
+ return;
+
+ if (mas_is_err(mas))
+ return;
+
+slow_path:
+ mas_wr_bnode(wr_mas);
+}
+
+/*
+ * mas_wr_store_entry() - Internal call to store a value
+ * @mas: The maple state
+ * @entry: The entry to store.
+ *
+ * Return: The contents that was stored at the index.
+ */
+static inline void *mas_wr_store_entry(struct ma_wr_state *wr_mas)
+{
+ struct ma_state *mas = wr_mas->mas;
+
+ wr_mas->content = mas_start(mas);
+ if (mas_is_none(mas) || mas_is_ptr(mas)) {
+ mas_store_root(mas, wr_mas->entry);
+ return wr_mas->content;
+ }
+
+ if (unlikely(!mas_wr_walk(wr_mas))) {
+ mas_wr_spanning_store(wr_mas);
+ return wr_mas->content;
+ }
+
+ /* At this point, we are at the leaf node that needs to be altered. */
+ wr_mas->end_piv = wr_mas->r_max;
+ mas_wr_end_piv(wr_mas);
+
+ if (!wr_mas->entry)
+ mas_wr_extend_null(wr_mas);
+
+ /* New root for a single pointer */
+ if (unlikely(!mas->index && mas->last == ULONG_MAX)) {
+ mas_new_root(mas, wr_mas->entry);
+ return wr_mas->content;
+ }
+
+ mas_wr_modify(wr_mas);
+ return wr_mas->content;
+}
+
+/**
+ * mas_insert() - Internal call to insert a value
+ * @mas: The maple state
+ * @entry: The entry to store
+ *
+ * Return: %NULL or the contents that already exists at the requested index
+ * otherwise. The maple state needs to be checked for error conditions.
+ */
+static inline void *mas_insert(struct ma_state *mas, void *entry)
+{
+ MA_WR_STATE(wr_mas, mas, entry);
+
+ /*
+ * Inserting a new range inserts either 0, 1, or 2 pivots within the
+ * tree. If the insert fits exactly into an existing gap with a value
+ * of NULL, then the slot only needs to be written with the new value.
+ * If the range being inserted is adjacent to another range, then only a
+ * single pivot needs to be inserted (as well as writing the entry). If
+ * the new range is within a gap but does not touch any other ranges,
+ * then two pivots need to be inserted: the start - 1, and the end. As
+ * usual, the entry must be written. Most operations require a new node
+ * to be allocated and replace an existing node to ensure RCU safety,
+ * when in RCU mode. The exception to requiring a newly allocated node
+ * is when inserting at the end of a node (appending). When done
+ * carefully, appending can reuse the node in place.
+ */
+ wr_mas.content = mas_start(mas);
+ if (wr_mas.content)
+ goto exists;
+
+ if (mas_is_none(mas) || mas_is_ptr(mas)) {
+ mas_store_root(mas, entry);
+ return NULL;
+ }
+
+ /* spanning writes always overwrite something */
+ if (!mas_wr_walk(&wr_mas))
+ goto exists;
+
+ /* At this point, we are at the leaf node that needs to be altered. */
+ wr_mas.offset_end = mas->offset;
+ wr_mas.end_piv = wr_mas.r_max;
+
+ if (wr_mas.content || (mas->last > wr_mas.r_max))
+ goto exists;
+
+ if (!entry)
+ return NULL;
+
+ mas_wr_modify(&wr_mas);
+ return wr_mas.content;
+
+exists:
+ mas_set_err(mas, -EEXIST);
+ return wr_mas.content;
+
+}
+
+/*
+ * mas_prev_node() - Find the prev non-null entry at the same level in the
+ * tree. The prev value will be mas->node[mas->offset] or MAS_NONE.
+ * @mas: The maple state
+ * @min: The lower limit to search
+ *
+ * The prev node value will be mas->node[mas->offset] or MAS_NONE.
+ * Return: 1 if the node is dead, 0 otherwise.
+ */
+static inline int mas_prev_node(struct ma_state *mas, unsigned long min)
+{
+ enum maple_type mt;
+ int offset, level;
+ void __rcu **slots;
+ struct maple_node *node;
+ struct maple_enode *enode;
+ unsigned long *pivots;
+
+ if (mas_is_none(mas))
+ return 0;
+
+ level = 0;
+ do {
+ node = mas_mn(mas);
+ if (ma_is_root(node))
+ goto no_entry;
+
+ /* Walk up. */
+ if (unlikely(mas_ascend(mas)))
+ return 1;
+ offset = mas->offset;
+ level++;
+ } while (!offset);
+
+ offset--;
+ mt = mte_node_type(mas->node);
+ node = mas_mn(mas);
+ slots = ma_slots(node, mt);
+ pivots = ma_pivots(node, mt);
+ mas->max = pivots[offset];
+ if (offset)
+ mas->min = pivots[offset - 1] + 1;
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
+ if (mas->max < min)
+ goto no_entry_min;
+
+ while (level > 1) {
+ level--;
+ enode = mas_slot(mas, slots, offset);
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
+ mas->node = enode;
+ mt = mte_node_type(mas->node);
+ node = mas_mn(mas);
+ slots = ma_slots(node, mt);
+ pivots = ma_pivots(node, mt);
+ offset = ma_data_end(node, mt, pivots, mas->max);
+ if (offset)
+ mas->min = pivots[offset - 1] + 1;
+
+ if (offset < mt_pivots[mt])
+ mas->max = pivots[offset];
+
+ if (mas->max < min)
+ goto no_entry;
+ }
+
+ mas->node = mas_slot(mas, slots, offset);
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
+ mas->offset = mas_data_end(mas);
+ if (unlikely(mte_dead_node(mas->node)))
+ return 1;
+
+ return 0;
+
+no_entry_min:
+ mas->offset = offset;
+ if (offset)
+ mas->min = pivots[offset - 1] + 1;
+no_entry:
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
+ mas->node = MAS_NONE;
+ return 0;
+}
+
+/*
+ * mas_next_node() - Get the next node at the same level in the tree.
+ * @mas: The maple state
+ * @max: The maximum pivot value to check.
+ *
+ * The next value will be mas->node[mas->offset] or MAS_NONE.
+ * Return: 1 on dead node, 0 otherwise.
+ */
+static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
+ unsigned long max)
+{
+ unsigned long min, pivot;
+ unsigned long *pivots;
+ struct maple_enode *enode;
+ int level = 0;
+ unsigned char offset;
+ enum maple_type mt;
+ void __rcu **slots;
+
+ if (mas->max >= max)
+ goto no_entry;
+
+ level = 0;
+ do {
+ if (ma_is_root(node))
+ goto no_entry;
+
+ min = mas->max + 1;
+ if (min > max)
+ goto no_entry;
+
+ if (unlikely(mas_ascend(mas)))
+ return 1;
+
+ offset = mas->offset;
+ level++;
+ node = mas_mn(mas);
+ mt = mte_node_type(mas->node);
+ pivots = ma_pivots(node, mt);
+ } while (unlikely(offset == ma_data_end(node, mt, pivots, mas->max)));
+
+ slots = ma_slots(node, mt);
+ pivot = mas_safe_pivot(mas, pivots, ++offset, mt);
+ while (unlikely(level > 1)) {
+ /* Descend, if necessary */
+ enode = mas_slot(mas, slots, offset);
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
+ mas->node = enode;
+ level--;
+ node = mas_mn(mas);
+ mt = mte_node_type(mas->node);
+ slots = ma_slots(node, mt);
+ pivots = ma_pivots(node, mt);
+ offset = 0;
+ pivot = pivots[0];
+ }
+
+ enode = mas_slot(mas, slots, offset);
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
+ mas->node = enode;
+ mas->min = min;
+ mas->max = pivot;
+ return 0;
+
+no_entry:
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
+ mas->node = MAS_NONE;
+ return 0;
+}
+
+/*
+ * mas_next_nentry() - Get the next node entry
+ * @mas: The maple state
+ * @max: The maximum value to check
+ * @*range_start: Pointer to store the start of the range.
+ *
+ * Sets @mas->offset to the offset of the next node entry, @mas->last to the
+ * pivot of the entry.
+ *
+ * Return: The next entry, %NULL otherwise
+ */
+static inline void *mas_next_nentry(struct ma_state *mas,
+ struct maple_node *node, unsigned long max, enum maple_type type)
+{
+ unsigned char count;
+ unsigned long pivot;
+ unsigned long *pivots;
+ void __rcu **slots;
+ void *entry;
+
+ if (mas->last == mas->max) {
+ mas->index = mas->max;
+ return NULL;
+ }
+
+ pivots = ma_pivots(node, type);
+ slots = ma_slots(node, type);
+ mas->index = mas_safe_min(mas, pivots, mas->offset);
+ if (ma_dead_node(node))
+ return NULL;
+
+ if (mas->index > max)
+ return NULL;
+
+ count = ma_data_end(node, type, pivots, mas->max);
+ if (mas->offset > count)
+ return NULL;
+
+ while (mas->offset < count) {
+ pivot = pivots[mas->offset];
+ entry = mas_slot(mas, slots, mas->offset);
+ if (ma_dead_node(node))
+ return NULL;
+
+ if (entry)
+ goto found;
+
+ if (pivot >= max)
+ return NULL;
+
+ mas->index = pivot + 1;
+ mas->offset++;
+ }
+
+ if (mas->index > mas->max) {
+ mas->index = mas->last;
+ return NULL;
+ }
+
+ pivot = mas_safe_pivot(mas, pivots, mas->offset, type);
+ entry = mas_slot(mas, slots, mas->offset);
+ if (ma_dead_node(node))
+ return NULL;
+
+ if (!pivot)
+ return NULL;
+
+ if (!entry)
+ return NULL;
+
+found:
+ mas->last = pivot;
+ return entry;
+}
+
+static inline void mas_rewalk(struct ma_state *mas, unsigned long index)
+{
+
+retry:
+ mas_set(mas, index);
+ mas_state_walk(mas);
+ if (mas_is_start(mas))
+ goto retry;
+
+ return;
+
+}
+
+/*
+ * mas_next_entry() - Internal function to get the next entry.
+ * @mas: The maple state
+ * @limit: The maximum range start.
+ *
+ * Set the @mas->node to the next entry and the range_start to
+ * the beginning value for the entry. Does not check beyond @limit.
+ * Sets @mas->index and @mas->last to the limit if it is hit.
+ * Restarts on dead nodes.
+ *
+ * Return: the next entry or %NULL.
+ */
+static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit)
+{
+ void *entry = NULL;
+ struct maple_enode *prev_node;
+ struct maple_node *node;
+ unsigned char offset;
+ unsigned long last;
+ enum maple_type mt;
+
+ last = mas->last;
+retry:
+ offset = mas->offset;
+ prev_node = mas->node;
+ node = mas_mn(mas);
+ mt = mte_node_type(mas->node);
+ mas->offset++;
+ if (unlikely(mas->offset >= mt_slots[mt])) {
+ mas->offset = mt_slots[mt] - 1;
+ goto next_node;
+ }
+
+ while (!mas_is_none(mas)) {
+ entry = mas_next_nentry(mas, node, limit, mt);
+ if (unlikely(ma_dead_node(node))) {
+ mas_rewalk(mas, last);
+ goto retry;
+ }
+
+ if (likely(entry))
+ return entry;
+
+ if (unlikely((mas->index > limit)))
+ break;
+
+next_node:
+ prev_node = mas->node;
+ offset = mas->offset;
+ if (unlikely(mas_next_node(mas, node, limit))) {
+ mas_rewalk(mas, last);
+ goto retry;
+ }
+ mas->offset = 0;
+ node = mas_mn(mas);
+ mt = mte_node_type(mas->node);
+ }
+
+ mas->index = mas->last = limit;
+ mas->offset = offset;
+ mas->node = prev_node;
+ return NULL;
+}
+
+/*
+ * mas_prev_nentry() - Get the previous node entry.
+ * @mas: The maple state.
+ * @limit: The lower limit to check for a value.
+ *
+ * Return: the entry, %NULL otherwise.
+ */
+static inline void *mas_prev_nentry(struct ma_state *mas, unsigned long limit,
+ unsigned long index)
+{
+ unsigned long pivot, min;
+ unsigned char offset;
+ struct maple_node *mn;
+ enum maple_type mt;
+ unsigned long *pivots;
+ void __rcu **slots;
+ void *entry;
+
+retry:
+ if (!mas->offset)
+ return NULL;
+
+ mn = mas_mn(mas);
+ mt = mte_node_type(mas->node);
+ offset = mas->offset - 1;
+ if (offset >= mt_slots[mt])
+ offset = mt_slots[mt] - 1;
+
+ slots = ma_slots(mn, mt);
+ pivots = ma_pivots(mn, mt);
+ if (offset == mt_pivots[mt])
+ pivot = mas->max;
+ else
+ pivot = pivots[offset];
+
+ if (unlikely(ma_dead_node(mn))) {
+ mas_rewalk(mas, index);
+ goto retry;
+ }
+
+ while (offset && ((!mas_slot(mas, slots, offset) && pivot >= limit) ||
+ !pivot))
+ pivot = pivots[--offset];
+
+ min = mas_safe_min(mas, pivots, offset);
+ entry = mas_slot(mas, slots, offset);
+ if (unlikely(ma_dead_node(mn))) {
+ mas_rewalk(mas, index);
+ goto retry;
+ }
+
+ if (likely(entry)) {
+ mas->offset = offset;
+ mas->last = pivot;
+ mas->index = min;
+ }
+ return entry;
+}
+
+static inline void *mas_prev_entry(struct ma_state *mas, unsigned long min)
+{
+ void *entry;
+
+retry:
+ while (likely(!mas_is_none(mas))) {
+ entry = mas_prev_nentry(mas, min, mas->index);
+ if (unlikely(mas->last < min))
+ goto not_found;
+
+ if (likely(entry))
+ return entry;
+
+ if (unlikely(mas_prev_node(mas, min))) {
+ mas_rewalk(mas, mas->index);
+ goto retry;
+ }
+
+ mas->offset++;
+ }
+
+ mas->offset--;
+not_found:
+ mas->index = mas->last = min;
+ return NULL;
+}
+
+/*
+ * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the
+ * highest gap address of a given size in a given node and descend.
+ * @mas: The maple state
+ * @size: The needed size.
+ *
+ * Return: True if found in a leaf, false otherwise.
+ *
+ */
+static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
+{
+ enum maple_type type = mte_node_type(mas->node);
+ struct maple_node *node = mas_mn(mas);
+ unsigned long *pivots, *gaps;
+ void __rcu **slots;
+ unsigned long gap = 0;
+ unsigned long max, min, index;
+ unsigned char offset;
+
+ if (unlikely(mas_is_err(mas)))
+ return true;
+
+ if (ma_is_dense(type)) {
+ /* dense nodes. */
+ mas->offset = (unsigned char)(mas->index - mas->min);
+ return true;
+ }
+
+ pivots = ma_pivots(node, type);
+ slots = ma_slots(node, type);
+ gaps = ma_gaps(node, type);
+ offset = mas->offset;
+ min = mas_safe_min(mas, pivots, offset);
+ /* Skip out of bounds. */
+ while (mas->last < min)
+ min = mas_safe_min(mas, pivots, --offset);
+
+ max = mas_safe_pivot(mas, pivots, offset, type);
+ index = mas->index;
+ while (index <= max) {
+ gap = 0;
+ if (gaps)
+ gap = gaps[offset];
+ else if (!mas_slot(mas, slots, offset))
+ gap = max - min + 1;
+
+ if (gap) {
+ if ((size <= gap) && (size <= mas->last - min + 1))
+ break;
+
+ if (!gaps) {
+ /* Skip the next slot, it cannot be a gap. */
+ if (offset < 2)
+ goto ascend;
+
+ offset -= 2;
+ max = pivots[offset];
+ min = mas_safe_min(mas, pivots, offset);
+ continue;
+ }
+ }
+
+ if (!offset)
+ goto ascend;
+
+ offset--;
+ max = min - 1;
+ min = mas_safe_min(mas, pivots, offset);
+ }
+
+ if (unlikely(index > max)) {
+ mas_set_err(mas, -EBUSY);
+ return false;
+ }
+
+ if (unlikely(ma_is_leaf(type))) {
+ mas->offset = offset;
+ mas->min = min;
+ mas->max = min + gap - 1;
+ return true;
+ }
+
+ /* descend, only happens under lock. */
+ mas->node = mas_slot(mas, slots, offset);
+ mas->min = min;
+ mas->max = max;
+ mas->offset = mas_data_end(mas);
+ return false;
+
+ascend:
+ if (mte_is_root(mas->node))
+ mas_set_err(mas, -EBUSY);
+
+ return false;
+}
+
+static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
+{
+ enum maple_type type = mte_node_type(mas->node);
+ unsigned long pivot, min, gap = 0;
+ unsigned char offset;
+ unsigned long *gaps;
+ unsigned long *pivots = ma_pivots(mas_mn(mas), type);
+ void __rcu **slots = ma_slots(mas_mn(mas), type);
+ bool found = false;
+
+ if (ma_is_dense(type)) {
+ mas->offset = (unsigned char)(mas->index - mas->min);
+ return true;
+ }
+
+ gaps = ma_gaps(mte_to_node(mas->node), type);
+ offset = mas->offset;
+ min = mas_safe_min(mas, pivots, offset);
+ for (; offset < mt_slots[type]; offset++) {
+ pivot = mas_safe_pivot(mas, pivots, offset, type);
+ if (offset && !pivot)
+ break;
+
+ /* Not within lower bounds */
+ if (mas->index > pivot)
+ goto next_slot;
+
+ if (gaps)
+ gap = gaps[offset];
+ else if (!mas_slot(mas, slots, offset))
+ gap = min(pivot, mas->last) - max(mas->index, min) + 1;
+ else
+ goto next_slot;
+
+ if (gap >= size) {
+ if (ma_is_leaf(type)) {
+ found = true;
+ goto done;
+ }
+ if (mas->index <= pivot) {
+ mas->node = mas_slot(mas, slots, offset);
+ mas->min = min;
+ mas->max = pivot;
+ offset = 0;
+ break;
+ }
+ }
+next_slot:
+ min = pivot + 1;
+ if (mas->last <= pivot) {
+ mas_set_err(mas, -EBUSY);
+ return true;
+ }
+ }
+
+ if (mte_is_root(mas->node))
+ found = true;
+done:
+ mas->offset = offset;
+ return found;
+}
+
+/**
+ * mas_walk() - Search for @mas->index in the tree.
+ * @mas: The maple state.
+ *
+ * mas->index and mas->last will be set to the range if there is a value. If
+ * mas->node is MAS_NONE, reset to MAS_START.
+ *
+ * Return: the entry at the location or %NULL.
+ */
+void *mas_walk(struct ma_state *mas)
+{
+ void *entry;
+
+retry:
+ entry = mas_state_walk(mas);
+ if (mas_is_start(mas))
+ goto retry;
+
+ if (mas_is_ptr(mas)) {
+ if (!mas->index) {
+ mas->last = 0;
+ } else {
+ mas->index = 1;
+ mas->last = ULONG_MAX;
+ }
+ return entry;
+ }
+
+ if (mas_is_none(mas)) {
+ mas->index = 0;
+ mas->last = ULONG_MAX;
+ }
+
+ return entry;
+}
+EXPORT_SYMBOL_GPL(mas_walk);
+
+static inline bool mas_rewind_node(struct ma_state *mas)
+{
+ unsigned char slot;
+
+ do {
+ if (mte_is_root(mas->node)) {
+ slot = mas->offset;
+ if (!slot)
+ return false;
+ } else {
+ mas_ascend(mas);
+ slot = mas->offset;
+ }
+ } while (!slot);
+
+ mas->offset = --slot;
+ return true;
+}
+
+/*
+ * mas_skip_node() - Internal function. Skip over a node.
+ * @mas: The maple state.
+ *
+ * Return: true if there is another node, false otherwise.
+ */
+static inline bool mas_skip_node(struct ma_state *mas)
+{
+ unsigned char slot, slot_count;
+ unsigned long *pivots;
+ enum maple_type mt;
+
+ mt = mte_node_type(mas->node);
+ slot_count = mt_slots[mt] - 1;
+ do {
+ if (mte_is_root(mas->node)) {
+ slot = mas->offset;
+ if (slot > slot_count) {
+ mas_set_err(mas, -EBUSY);
+ return false;
+ }
+ } else {
+ mas_ascend(mas);
+ slot = mas->offset;
+ mt = mte_node_type(mas->node);
+ slot_count = mt_slots[mt] - 1;
+ }
+ } while (slot > slot_count);
+
+ mas->offset = ++slot;
+ pivots = ma_pivots(mas_mn(mas), mt);
+ if (slot > 0)
+ mas->min = pivots[slot - 1] + 1;
+
+ if (slot <= slot_count)
+ mas->max = pivots[slot];
+
+ return true;
+}
+
+/*
+ * mas_awalk() - Allocation walk. Search from low address to high, for a gap of
+ * @size
+ * @mas: The maple state
+ * @size: The size of the gap required
+ *
+ * Search between @mas->index and @mas->last for a gap of @size.
+ */
+static inline void mas_awalk(struct ma_state *mas, unsigned long size)
+{
+ struct maple_enode *last = NULL;
+
+ /*
+ * There are 4 options:
+ * go to child (descend)
+ * go back to parent (ascend)
+ * no gap found. (return, slot == MAPLE_NODE_SLOTS)
+ * found the gap. (return, slot != MAPLE_NODE_SLOTS)
+ */
+ while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) {
+ if (last == mas->node)
+ mas_skip_node(mas);
+ else
+ last = mas->node;
+ }
+}
+
+/*
+ * mas_fill_gap() - Fill a located gap with @entry.
+ * @mas: The maple state
+ * @entry: The value to store
+ * @slot: The offset into the node to store the @entry
+ * @size: The size of the entry
+ * @index: The start location
+ */
+static inline void mas_fill_gap(struct ma_state *mas, void *entry,
+ unsigned char slot, unsigned long size, unsigned long *index)
+{
+ MA_WR_STATE(wr_mas, mas, entry);
+ unsigned char pslot = mte_parent_slot(mas->node);
+ struct maple_enode *mn = mas->node;
+ unsigned long *pivots;
+ enum maple_type ptype;
+ /*
+ * mas->index is the start address for the search
+ * which may no longer be needed.
+ * mas->last is the end address for the search
+ */
+
+ *index = mas->index;
+ mas->last = mas->index + size - 1;
+
+ /*
+ * It is possible that using mas->max and mas->min to correctly
+ * calculate the index and last will cause an issue in the gap
+ * calculation, so fix the ma_state here
+ */
+ mas_ascend(mas);
+ ptype = mte_node_type(mas->node);
+ pivots = ma_pivots(mas_mn(mas), ptype);
+ mas->max = mas_safe_pivot(mas, pivots, pslot, ptype);
+ mas->min = mas_safe_min(mas, pivots, pslot);
+ mas->node = mn;
+ mas->offset = slot;
+ mas_wr_store_entry(&wr_mas);
+}
+
+/*
+ * mas_sparse_area() - Internal function. Return upper or lower limit when
+ * searching for a gap in an empty tree.
+ * @mas: The maple state
+ * @min: the minimum range
+ * @max: The maximum range
+ * @size: The size of the gap
+ * @fwd: Searching forward or back
+ */
+static inline void mas_sparse_area(struct ma_state *mas, unsigned long min,
+ unsigned long max, unsigned long size, bool fwd)
+{
+ unsigned long start = 0;
+
+ if (!unlikely(mas_is_none(mas)))
+ start++;
+ /* mas_is_ptr */
+
+ if (start < min)
+ start = min;
+
+ if (fwd) {
+ mas->index = start;
+ mas->last = start + size - 1;
+ return;
+ }
+
+ mas->index = max;
+}
+
+/*
+ * mas_empty_area() - Get the lowest address within the range that is
+ * sufficient for the size requested.
+ * @mas: The maple state
+ * @min: The lowest value of the range
+ * @max: The highest value of the range
+ * @size: The size needed
+ */
+int mas_empty_area(struct ma_state *mas, unsigned long min,
+ unsigned long max, unsigned long size)
+{
+ unsigned char offset;
+ unsigned long *pivots;
+ enum maple_type mt;
+
+ if (mas_is_start(mas))
+ mas_start(mas);
+ else if (mas->offset >= 2)
+ mas->offset -= 2;
+ else if (!mas_skip_node(mas))
+ return -EBUSY;
+
+ /* Empty set */
+ if (mas_is_none(mas) || mas_is_ptr(mas)) {
+ mas_sparse_area(mas, min, max, size, true);
+ return 0;
+ }
+
+ /* The start of the window can only be within these values */
+ mas->index = min;
+ mas->last = max;
+ mas_awalk(mas, size);
+
+ if (unlikely(mas_is_err(mas)))
+ return xa_err(mas->node);
+
+ offset = mas->offset;
+ if (unlikely(offset == MAPLE_NODE_SLOTS))
+ return -EBUSY;
+
+ mt = mte_node_type(mas->node);
+ pivots = ma_pivots(mas_mn(mas), mt);
+ if (offset)
+ mas->min = pivots[offset - 1] + 1;
+
+ if (offset < mt_pivots[mt])
+ mas->max = pivots[offset];
+
+ if (mas->index < mas->min)
+ mas->index = mas->min;
+
+ mas->last = mas->index + size - 1;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mas_empty_area);
+
+/*
+ * mas_empty_area_rev() - Get the highest address within the range that is
+ * sufficient for the size requested.
+ * @mas: The maple state
+ * @min: The lowest value of the range
+ * @max: The highest value of the range
+ * @size: The size needed
+ */
+int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
+ unsigned long max, unsigned long size)
+{
+ struct maple_enode *last = mas->node;
+
+ if (mas_is_start(mas)) {
+ mas_start(mas);
+ mas->offset = mas_data_end(mas);
+ } else if (mas->offset >= 2) {
+ mas->offset -= 2;
+ } else if (!mas_rewind_node(mas)) {
+ return -EBUSY;
+ }
+
+ /* Empty set. */
+ if (mas_is_none(mas) || mas_is_ptr(mas)) {
+ mas_sparse_area(mas, min, max, size, false);
+ return 0;
+ }
+
+ /* The start of the window can only be within these values. */
+ mas->index = min;
+ mas->last = max;
+
+ while (!mas_rev_awalk(mas, size)) {
+ if (last == mas->node) {
+ if (!mas_rewind_node(mas))
+ return -EBUSY;
+ } else {
+ last = mas->node;
+ }
+ }
+
+ if (mas_is_err(mas))
+ return xa_err(mas->node);
+
+ if (unlikely(mas->offset == MAPLE_NODE_SLOTS))
+ return -EBUSY;
+
+ /*
+ * mas_rev_awalk() has set mas->min and mas->max to the gap values. If
+ * the maximum is outside the window we are searching, then use the last
+ * location in the search.
+ * mas->max and mas->min is the range of the gap.
+ * mas->index and mas->last are currently set to the search range.
+ */
+
+ /* Trim the upper limit to the max. */
+ if (mas->max <= mas->last)
+ mas->last = mas->max;
+
+ mas->index = mas->last - size + 1;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mas_empty_area_rev);
+
+static inline int mas_alloc(struct ma_state *mas, void *entry,
+ unsigned long size, unsigned long *index)
+{
+ unsigned long min;
+
+ mas_start(mas);
+ if (mas_is_none(mas) || mas_is_ptr(mas)) {
+ mas_root_expand(mas, entry);
+ if (mas_is_err(mas))
+ return xa_err(mas->node);
+
+ if (!mas->index)
+ return mte_pivot(mas->node, 0);
+ return mte_pivot(mas->node, 1);
+ }
+
+ /* Must be walking a tree. */
+ mas_awalk(mas, size);
+ if (mas_is_err(mas))
+ return xa_err(mas->node);
+
+ if (mas->offset == MAPLE_NODE_SLOTS)
+ goto no_gap;
+
+ /*
+ * At this point, mas->node points to the right node and we have an
+ * offset that has a sufficient gap.
+ */
+ min = mas->min;
+ if (mas->offset)
+ min = mte_pivot(mas->node, mas->offset - 1) + 1;
+
+ if (mas->index < min)
+ mas->index = min;
+
+ mas_fill_gap(mas, entry, mas->offset, size, index);
+ return 0;
+
+no_gap:
+ return -EBUSY;
+}
+
+static inline int mas_rev_alloc(struct ma_state *mas, unsigned long min,
+ unsigned long max, void *entry,
+ unsigned long size, unsigned long *index)
+{
+ int ret = 0;
+
+ ret = mas_empty_area_rev(mas, min, max, size);
+ if (ret)
+ return ret;
+
+ if (mas_is_err(mas))
+ return xa_err(mas->node);
+
+ if (mas->offset == MAPLE_NODE_SLOTS)
+ goto no_gap;
+
+ mas_fill_gap(mas, entry, mas->offset, size, index);
+ return 0;
+
+no_gap:
+ return -EBUSY;
+}
+
+/*
+ * mas_dead_leaves() - Mark all leaves of a node as dead.
+ * @mas: The maple state
+ * @slots: Pointer to the slot array
+ *
+ * Must hold the write lock.
+ *
+ * Return: The number of leaves marked as dead.
+ */
+static inline
+unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)
+{
+ struct maple_node *node;
+ enum maple_type type;
+ void *entry;
+ int offset;
+
+ for (offset = 0; offset < mt_slot_count(mas->node); offset++) {
+ entry = mas_slot_locked(mas, slots, offset);
+ type = mte_node_type(entry);
+ node = mte_to_node(entry);
+ /* Use both node and type to catch LE & BE metadata */
+ if (!node || !type)
+ break;
+
+ mte_set_node_dead(entry);
+ smp_wmb(); /* Needed for RCU */
+ node->type = type;
+ rcu_assign_pointer(slots[offset], node);
+ }
+
+ return offset;
+}
+
+static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offset)
+{
+ struct maple_node *node, *next;
+ void __rcu **slots = NULL;
+
+ next = mas_mn(mas);
+ do {
+ mas->node = ma_enode_ptr(next);
+ node = mas_mn(mas);
+ slots = ma_slots(node, node->type);
+ next = mas_slot_locked(mas, slots, offset);
+ offset = 0;
+ } while (!ma_is_leaf(next->type));
+
+ return slots;
+}
+
+static void mt_free_walk(struct rcu_head *head)
+{
+ void __rcu **slots;
+ struct maple_node *node, *start;
+ struct maple_tree mt;
+ unsigned char offset;
+ enum maple_type type;
+ MA_STATE(mas, &mt, 0, 0);
+
+ node = container_of(head, struct maple_node, rcu);
+
+ if (ma_is_leaf(node->type))
+ goto free_leaf;
+
+ mt_init_flags(&mt, node->ma_flags);
+ mas_lock(&mas);
+ start = node;
+ mas.node = mt_mk_node(node, node->type);
+ slots = mas_dead_walk(&mas, 0);
+ node = mas_mn(&mas);
+ do {
+ mt_free_bulk(node->slot_len, slots);
+ offset = node->parent_slot + 1;
+ mas.node = node->piv_parent;
+ if (mas_mn(&mas) == node)
+ goto start_slots_free;
+
+ type = mte_node_type(mas.node);
+ slots = ma_slots(mte_to_node(mas.node), type);
+ if ((offset < mt_slots[type]) && (slots[offset]))
+ slots = mas_dead_walk(&mas, offset);
+
+ node = mas_mn(&mas);
+ } while ((node != start) || (node->slot_len < offset));
+
+ slots = ma_slots(node, node->type);
+ mt_free_bulk(node->slot_len, slots);
+
+start_slots_free:
+ mas_unlock(&mas);
+free_leaf:
+ mt_free_rcu(&node->rcu);
+}
+
+static inline void __rcu **mas_destroy_descend(struct ma_state *mas,
+ struct maple_enode *prev, unsigned char offset)
+{
+ struct maple_node *node;
+ struct maple_enode *next = mas->node;
+ void __rcu **slots = NULL;
+
+ do {
+ mas->node = next;
+ node = mas_mn(mas);
+ slots = ma_slots(node, mte_node_type(mas->node));
+ next = mas_slot_locked(mas, slots, 0);
+ if ((mte_dead_node(next)))
+ next = mas_slot_locked(mas, slots, 1);
+
+ mte_set_node_dead(mas->node);
+ node->type = mte_node_type(mas->node);
+ node->piv_parent = prev;
+ node->parent_slot = offset;
+ offset = 0;
+ prev = mas->node;
+ } while (!mte_is_leaf(next));
+
+ return slots;
+}
+
+static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags,
+ bool free)
+{
+ void __rcu **slots;
+ struct maple_node *node = mte_to_node(enode);
+ struct maple_enode *start;
+ struct maple_tree mt;
+
+ MA_STATE(mas, &mt, 0, 0);
+
+ if (mte_is_leaf(enode))
+ goto free_leaf;
+
+ mt_init_flags(&mt, ma_flags);
+ mas_lock(&mas);
+
+ mas.node = start = enode;
+ slots = mas_destroy_descend(&mas, start, 0);
+ node = mas_mn(&mas);
+ do {
+ enum maple_type type;
+ unsigned char offset;
+ struct maple_enode *parent, *tmp;
+
+ node->slot_len = mas_dead_leaves(&mas, slots);
+ if (free)
+ mt_free_bulk(node->slot_len, slots);
+ offset = node->parent_slot + 1;
+ mas.node = node->piv_parent;
+ if (mas_mn(&mas) == node)
+ goto start_slots_free;
+
+ type = mte_node_type(mas.node);
+ slots = ma_slots(mte_to_node(mas.node), type);
+ if (offset >= mt_slots[type])
+ goto next;
+
+ tmp = mas_slot_locked(&mas, slots, offset);
+ if (mte_node_type(tmp) && mte_to_node(tmp)) {
+ parent = mas.node;
+ mas.node = tmp;
+ slots = mas_destroy_descend(&mas, parent, offset);
+ }
+next:
+ node = mas_mn(&mas);
+ } while (start != mas.node);
+
+ node = mas_mn(&mas);
+ node->slot_len = mas_dead_leaves(&mas, slots);
+ if (free)
+ mt_free_bulk(node->slot_len, slots);
+
+start_slots_free:
+ mas_unlock(&mas);
+
+free_leaf:
+ if (free)
+ mt_free_rcu(&node->rcu);
+}
+
+/*
+ * mte_destroy_walk() - Free a tree or sub-tree.
+ * @enode - the encoded maple node (maple_enode) to start
+ * @mn - the tree to free - needed for node types.
+ *
+ * Must hold the write lock.
+ */
+static inline void mte_destroy_walk(struct maple_enode *enode,
+ struct maple_tree *mt)
+{
+ struct maple_node *node = mte_to_node(enode);
+
+ if (mt_in_rcu(mt)) {
+ mt_destroy_walk(enode, mt->ma_flags, false);
+ call_rcu(&node->rcu, mt_free_walk);
+ } else {
+ mt_destroy_walk(enode, mt->ma_flags, true);
+ }
+}
+
+static void mas_wr_store_setup(struct ma_wr_state *wr_mas)
+{
+ if (!mas_is_start(wr_mas->mas)) {
+ if (mas_is_none(wr_mas->mas)) {
+ mas_reset(wr_mas->mas);
+ } else {
+ wr_mas->r_max = wr_mas->mas->max;
+ wr_mas->type = mte_node_type(wr_mas->mas->node);
+ if (mas_is_span_wr(wr_mas))
+ mas_reset(wr_mas->mas);
+ }
+ }
+
+}
+
+/* Interface */
+
+/**
+ * mas_store() - Store an @entry.
+ * @mas: The maple state.
+ * @entry: The entry to store.
+ *
+ * The @mas->index and @mas->last is used to set the range for the @entry.
+ * Note: The @mas should have pre-allocated entries to ensure there is memory to
+ * store the entry. Please see mas_expected_entries()/mas_destroy() for more details.
+ *
+ * Return: the first entry between mas->index and mas->last or %NULL.
+ */
+void *mas_store(struct ma_state *mas, void *entry)
+{
+ MA_WR_STATE(wr_mas, mas, entry);
+
+ trace_ma_write(__func__, mas, 0, entry);
+#ifdef CONFIG_DEBUG_MAPLE_TREE
+ if (mas->index > mas->last)
+ pr_err("Error %lu > %lu %p\n", mas->index, mas->last, entry);
+ MT_BUG_ON(mas->tree, mas->index > mas->last);
+ if (mas->index > mas->last) {
+ mas_set_err(mas, -EINVAL);
+ return NULL;
+ }
+
+#endif
+
+ /*
+ * Storing is the same operation as insert with the added caveat that it
+ * can overwrite entries. Although this seems simple enough, one may
+ * want to examine what happens if a single store operation was to
+ * overwrite multiple entries within a self-balancing B-Tree.
+ */
+ mas_wr_store_setup(&wr_mas);
+ mas_wr_store_entry(&wr_mas);
+ return wr_mas.content;
+}
+EXPORT_SYMBOL_GPL(mas_store);
+
+/**
+ * mas_store_gfp() - Store a value into the tree.
+ * @mas: The maple state
+ * @entry: The entry to store
+ * @gfp: The GFP_FLAGS to use for allocations if necessary.
+ *
+ * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
+ * be allocated.
+ */
+int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
+{
+ MA_WR_STATE(wr_mas, mas, entry);
+
+ mas_wr_store_setup(&wr_mas);
+ trace_ma_write(__func__, mas, 0, entry);
+retry:
+ mas_wr_store_entry(&wr_mas);
+ if (unlikely(mas_nomem(mas, gfp)))
+ goto retry;
+
+ if (unlikely(mas_is_err(mas)))
+ return xa_err(mas->node);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mas_store_gfp);
+
+/**
+ * mas_store_prealloc() - Store a value into the tree using memory
+ * preallocated in the maple state.
+ * @mas: The maple state
+ * @entry: The entry to store.
+ */
+void mas_store_prealloc(struct ma_state *mas, void *entry)
+{
+ MA_WR_STATE(wr_mas, mas, entry);
+
+ mas_wr_store_setup(&wr_mas);
+ trace_ma_write(__func__, mas, 0, entry);
+ mas_wr_store_entry(&wr_mas);
+ BUG_ON(mas_is_err(mas));
+ mas_destroy(mas);
+}
+EXPORT_SYMBOL_GPL(mas_store_prealloc);
+
+/**
+ * mas_preallocate() - Preallocate enough nodes for a store operation
+ * @mas: The maple state
+ * @entry: The entry that will be stored
+ * @gfp: The GFP_FLAGS to use for allocations.
+ *
+ * Return: 0 on success, -ENOMEM if memory could not be allocated.
+ */
+int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
+{
+ int ret;
+
+ mas_node_count_gfp(mas, 1 + mas_mt_height(mas) * 3, gfp);
+ mas->mas_flags |= MA_STATE_PREALLOC;
+ if (likely(!mas_is_err(mas)))
+ return 0;
+
+ mas_set_alloc_req(mas, 0);
+ ret = xa_err(mas->node);
+ mas_reset(mas);
+ mas_destroy(mas);
+ mas_reset(mas);
+ return ret;
+}
+
+/*
+ * mas_destroy() - destroy a maple state.
+ * @mas: The maple state
+ *
+ * Upon completion, check the left-most node and rebalance against the node to
+ * the right if necessary. Frees any allocated nodes associated with this maple
+ * state.
+ */
+void mas_destroy(struct ma_state *mas)
+{
+ struct maple_alloc *node;
+
+ /*
+ * When using mas_for_each() to insert an expected number of elements,
+ * it is possible that the number inserted is less than the expected
+ * number. To fix an invalid final node, a check is performed here to
+ * rebalance the previous node with the final node.
+ */
+ if (mas->mas_flags & MA_STATE_REBALANCE) {
+ unsigned char end;
+
+ if (mas_is_start(mas))
+ mas_start(mas);
+
+ mtree_range_walk(mas);
+ end = mas_data_end(mas) + 1;
+ if (end < mt_min_slot_count(mas->node) - 1)
+ mas_destroy_rebalance(mas, end);
+
+ mas->mas_flags &= ~MA_STATE_REBALANCE;
+ }
+ mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);
+
+ while (mas->alloc && !((unsigned long)mas->alloc & 0x1)) {
+ node = mas->alloc;
+ mas->alloc = node->slot[0];
+ if (node->node_count > 0)
+ mt_free_bulk(node->node_count,
+ (void __rcu **)&node->slot[1]);
+ kmem_cache_free(maple_node_cache, node);
+ }
+ mas->alloc = NULL;
+}
+EXPORT_SYMBOL_GPL(mas_destroy);
+
+/*
+ * mas_expected_entries() - Set the expected number of entries that will be inserted.
+ * @mas: The maple state
+ * @nr_entries: The number of expected entries.
+ *
+ * This will attempt to pre-allocate enough nodes to store the expected number
+ * of entries. The allocations will occur using the bulk allocator interface
+ * for speed. Please call mas_destroy() on the @mas after inserting the entries
+ * to ensure any unused nodes are freed.
+ *
+ * Return: 0 on success, -ENOMEM if memory could not be allocated.
+ */
+int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries)
+{
+ int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2;
+ struct maple_enode *enode = mas->node;
+ int nr_nodes;
+ int ret;
+
+ /*
+ * Sometimes it is necessary to duplicate a tree to a new tree, such as
+ * forking a process and duplicating the VMAs from one tree to a new
+ * tree. When such a situation arises, it is known that the new tree is
+ * not going to be used until the entire tree is populated. For
+ * performance reasons, it is best to use a bulk load with RCU disabled.
+ * This allows for optimistic splitting that favours the left and reuse
+ * of nodes during the operation.
+ */
+
+ /* Optimize splitting for bulk insert in-order */
+ mas->mas_flags |= MA_STATE_BULK;
+
+ /*
+ * Avoid overflow, assume a gap between each entry and a trailing null.
+ * If this is wrong, it just means allocation can happen during
+ * insertion of entries.
+ */
+ nr_nodes = max(nr_entries, nr_entries * 2 + 1);
+ if (!mt_is_alloc(mas->tree))
+ nonleaf_cap = MAPLE_RANGE64_SLOTS - 2;
+
+ /* Leaves; reduce slots to keep space for expansion */
+ nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2);
+ /* Internal nodes */
+ nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap);
+ /* Add working room for split (2 nodes) + new parents */
+ mas_node_count(mas, nr_nodes + 3);
+
+ /* Detect if allocations run out */
+ mas->mas_flags |= MA_STATE_PREALLOC;
+
+ if (!mas_is_err(mas))
+ return 0;
+
+ ret = xa_err(mas->node);
+ mas->node = enode;
+ mas_destroy(mas);
+ return ret;
+
+}
+EXPORT_SYMBOL_GPL(mas_expected_entries);
+
+/**
+ * mas_next() - Get the next entry.
+ * @mas: The maple state
+ * @max: The maximum index to check.
+ *
+ * Returns the next entry after @mas->index.
+ * Must hold rcu_read_lock or the write lock.
+ * Can return the zero entry.
+ *
+ * Return: The next entry or %NULL
+ */
+void *mas_next(struct ma_state *mas, unsigned long max)
+{
+ if (mas_is_none(mas) || mas_is_paused(mas))
+ mas->node = MAS_START;
+
+ if (mas_is_start(mas))
+ mas_walk(mas); /* Retries on dead nodes handled by mas_walk */
+
+ if (mas_is_ptr(mas)) {
+ if (!mas->index) {
+ mas->index = 1;
+ mas->last = ULONG_MAX;
+ }
+ return NULL;
+ }
+
+ if (mas->last == ULONG_MAX)
+ return NULL;
+
+ /* Retries on dead nodes handled by mas_next_entry */
+ return mas_next_entry(mas, max);
+}
+EXPORT_SYMBOL_GPL(mas_next);
+
+/**
+ * mt_next() - get the next value in the maple tree
+ * @mt: The maple tree
+ * @index: The start index
+ * @max: The maximum index to check
+ *
+ * Return: The entry at @index or higher, or %NULL if nothing is found.
+ */
+void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max)
+{
+ void *entry = NULL;
+ MA_STATE(mas, mt, index, index);
+
+ rcu_read_lock();
+ entry = mas_next(&mas, max);
+ rcu_read_unlock();
+ return entry;
+}
+EXPORT_SYMBOL_GPL(mt_next);
+
+/**
+ * mas_prev() - Get the previous entry
+ * @mas: The maple state
+ * @min: The minimum value to check.
+ *
+ * Must hold rcu_read_lock or the write lock.
+ * Will reset mas to MAS_START if the node is MAS_NONE. Will stop on not
+ * searchable nodes.
+ *
+ * Return: the previous value or %NULL.
+ */
+void *mas_prev(struct ma_state *mas, unsigned long min)
+{
+ if (!mas->index) {
+ /* Nothing comes before 0 */
+ mas->last = 0;
+ return NULL;
+ }
+
+ if (unlikely(mas_is_ptr(mas)))
+ return NULL;
+
+ if (mas_is_none(mas) || mas_is_paused(mas))
+ mas->node = MAS_START;
+
+ if (mas_is_start(mas)) {
+ mas_walk(mas);
+ if (!mas->index)
+ return NULL;
+ }
+
+ if (mas_is_ptr(mas)) {
+ if (!mas->index) {
+ mas->last = 0;
+ return NULL;
+ }
+
+ mas->index = mas->last = 0;
+ return mas_root_locked(mas);
+ }
+ return mas_prev_entry(mas, min);
+}
+EXPORT_SYMBOL_GPL(mas_prev);
+
+/**
+ * mt_prev() - get the previous value in the maple tree
+ * @mt: The maple tree
+ * @index: The start index
+ * @min: The minimum index to check
+ *
+ * Return: The entry at @index or lower, or %NULL if nothing is found.
+ */
+void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min)
+{
+ void *entry = NULL;
+ MA_STATE(mas, mt, index, index);
+
+ rcu_read_lock();
+ entry = mas_prev(&mas, min);
+ rcu_read_unlock();
+ return entry;
+}
+EXPORT_SYMBOL_GPL(mt_prev);
+
+/**
+ * mas_pause() - Pause a mas_find/mas_for_each to drop the lock.
+ * @mas: The maple state to pause
+ *
+ * Some users need to pause a walk and drop the lock they're holding in
+ * order to yield to a higher priority thread or carry out an operation
+ * on an entry. Those users should call this function before they drop
+ * the lock. It resets the @mas to be suitable for the next iteration
+ * of the loop after the user has reacquired the lock. If most entries
+ * found during a walk require you to call mas_pause(), the mt_for_each()
+ * iterator may be more appropriate.
+ *
+ */
+void mas_pause(struct ma_state *mas)
+{
+ mas->node = MAS_PAUSE;
+}
+EXPORT_SYMBOL_GPL(mas_pause);
+
+/**
+ * mas_find() - On the first call, find the entry at or after mas->index up to
+ * %max. Otherwise, find the entry after mas->index.
+ * @mas: The maple state
+ * @max: The maximum value to check.
+ *
+ * Must hold rcu_read_lock or the write lock.
+ * If an entry exists, last and index are updated accordingly.
+ * May set @mas->node to MAS_NONE.
+ *
+ * Return: The entry or %NULL.
+ */
+void *mas_find(struct ma_state *mas, unsigned long max)
+{
+ if (unlikely(mas_is_paused(mas))) {
+ if (unlikely(mas->last == ULONG_MAX)) {
+ mas->node = MAS_NONE;
+ return NULL;
+ }
+ mas->node = MAS_START;
+ mas->index = ++mas->last;
+ }
+
+ if (unlikely(mas_is_start(mas))) {
+ /* First run or continue */
+ void *entry;
+
+ if (mas->index > max)
+ return NULL;
+
+ entry = mas_walk(mas);
+ if (entry)
+ return entry;
+ }
+
+ if (unlikely(!mas_searchable(mas)))
+ return NULL;
+
+ /* Retries on dead nodes handled by mas_next_entry */
+ return mas_next_entry(mas, max);
+}
+EXPORT_SYMBOL_GPL(mas_find);
+
+/**
+ * mas_find_rev: On the first call, find the first non-null entry at or below
+ * mas->index down to %min. Otherwise find the first non-null entry below
+ * mas->index down to %min.
+ * @mas: The maple state
+ * @min: The minimum value to check.
+ *
+ * Must hold rcu_read_lock or the write lock.
+ * If an entry exists, last and index are updated accordingly.
+ * May set @mas->node to MAS_NONE.
+ *
+ * Return: The entry or %NULL.
+ */
+void *mas_find_rev(struct ma_state *mas, unsigned long min)
+{
+ if (unlikely(mas_is_paused(mas))) {
+ if (unlikely(mas->last == ULONG_MAX)) {
+ mas->node = MAS_NONE;
+ return NULL;
+ }
+ mas->node = MAS_START;
+ mas->last = --mas->index;
+ }
+
+ if (unlikely(mas_is_start(mas))) {
+ /* First run or continue */
+ void *entry;
+
+ if (mas->index < min)
+ return NULL;
+
+ entry = mas_walk(mas);
+ if (entry)
+ return entry;
+ }
+
+ if (unlikely(!mas_searchable(mas)))
+ return NULL;
+
+ if (mas->index < min)
+ return NULL;
+
+ /* Retries on dead nodes handled by mas_next_entry */
+ return mas_prev_entry(mas, min);
+}
+EXPORT_SYMBOL_GPL(mas_find_rev);
+
+/**
+ * mas_erase() - Find the range in which index resides and erase the entire
+ * range.
+ * @mas: The maple state
+ *
+ * Must hold the write lock.
+ * Searches for @mas->index, sets @mas->index and @mas->last to the range and
+ * erases that range.
+ *
+ * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated.
+ */
+void *mas_erase(struct ma_state *mas)
+{
+ void *entry;
+ MA_WR_STATE(wr_mas, mas, NULL);
+
+ if (mas_is_none(mas) || mas_is_paused(mas))
+ mas->node = MAS_START;
+
+ /* Retry unnecessary when holding the write lock. */
+ entry = mas_state_walk(mas);
+ if (!entry)
+ return NULL;
+
+write_retry:
+ /* Must reset to ensure spanning writes of last slot are detected */
+ mas_reset(mas);
+ mas_wr_store_setup(&wr_mas);
+ mas_wr_store_entry(&wr_mas);
+ if (mas_nomem(mas, GFP_KERNEL))
+ goto write_retry;
+
+ return entry;
+}
+EXPORT_SYMBOL_GPL(mas_erase);
+
+/**
+ * mas_nomem() - Check if there was an error allocating and do the allocation
+ * if necessary If there are allocations, then free them.
+ * @mas: The maple state
+ * @gfp: The GFP_FLAGS to use for allocations
+ * Return: true on allocation, false otherwise.
+ */
+bool mas_nomem(struct ma_state *mas, gfp_t gfp)
+ __must_hold(mas->tree->lock)
+{
+ if (likely(mas->node != MA_ERROR(-ENOMEM))) {
+ mas_destroy(mas);
+ return false;
+ }
+
+ if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) {
+ mtree_unlock(mas->tree);
+ mas_alloc_nodes(mas, gfp);
+ mtree_lock(mas->tree);
+ } else {
+ mas_alloc_nodes(mas, gfp);
+ }
+
+ if (!mas_allocated(mas))
+ return false;
+
+ mas->node = MAS_START;
+ return true;
+}
+
+void __init maple_tree_init(void)
+{
+ maple_node_cache = kmem_cache_create("maple_node",
+ sizeof(struct maple_node), sizeof(struct maple_node),
+ SLAB_PANIC, NULL);
+}
+
+/**
+ * mtree_load() - Load a value stored in a maple tree
+ * @mt: The maple tree
+ * @index: The index to load
+ *
+ * Return: the entry or %NULL
+ */
+void *mtree_load(struct maple_tree *mt, unsigned long index)
+{
+ MA_STATE(mas, mt, index, index);
+ void *entry;
+
+ trace_ma_read(__func__, &mas);
+ rcu_read_lock();
+retry:
+ entry = mas_start(&mas);
+ if (unlikely(mas_is_none(&mas)))
+ goto unlock;
+
+ if (unlikely(mas_is_ptr(&mas))) {
+ if (index)
+ entry = NULL;
+
+ goto unlock;
+ }
+
+ entry = mtree_lookup_walk(&mas);
+ if (!entry && unlikely(mas_is_start(&mas)))
+ goto retry;
+unlock:
+ rcu_read_unlock();
+ if (xa_is_zero(entry))
+ return NULL;
+
+ return entry;
+}
+EXPORT_SYMBOL(mtree_load);
+
+/**
+ * mtree_store_range() - Store an entry at a given range.
+ * @mt: The maple tree
+ * @index: The start of the range
+ * @last: The end of the range
+ * @entry: The entry to store
+ * @gfp: The GFP_FLAGS to use for allocations
+ *
+ * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
+ * be allocated.
+ */
+int mtree_store_range(struct maple_tree *mt, unsigned long index,
+ unsigned long last, void *entry, gfp_t gfp)
+{
+ MA_STATE(mas, mt, index, last);
+ MA_WR_STATE(wr_mas, &mas, entry);
+
+ trace_ma_write(__func__, &mas, 0, entry);
+ if (WARN_ON_ONCE(xa_is_advanced(entry)))
+ return -EINVAL;
+
+ if (index > last)
+ return -EINVAL;
+
+ mtree_lock(mt);
+retry:
+ mas_wr_store_entry(&wr_mas);
+ if (mas_nomem(&mas, gfp))
+ goto retry;
+
+ mtree_unlock(mt);
+ if (mas_is_err(&mas))
+ return xa_err(mas.node);
+
+ return 0;
+}
+EXPORT_SYMBOL(mtree_store_range);
+
+/**
+ * mtree_store() - Store an entry at a given index.
+ * @mt: The maple tree
+ * @index: The index to store the value
+ * @entry: The entry to store
+ * @gfp: The GFP_FLAGS to use for allocations
+ *
+ * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
+ * be allocated.
+ */
+int mtree_store(struct maple_tree *mt, unsigned long index, void *entry,
+ gfp_t gfp)
+{
+ return mtree_store_range(mt, index, index, entry, gfp);
+}
+EXPORT_SYMBOL(mtree_store);
+
+/**
+ * mtree_insert_range() - Insert an entry at a give range if there is no value.
+ * @mt: The maple tree
+ * @first: The start of the range
+ * @last: The end of the range
+ * @entry: The entry to store
+ * @gfp: The GFP_FLAGS to use for allocations.
+ *
+ * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
+ * request, -ENOMEM if memory could not be allocated.
+ */
+int mtree_insert_range(struct maple_tree *mt, unsigned long first,
+ unsigned long last, void *entry, gfp_t gfp)
+{
+ MA_STATE(ms, mt, first, last);
+
+ if (WARN_ON_ONCE(xa_is_advanced(entry)))
+ return -EINVAL;
+
+ if (first > last)
+ return -EINVAL;
+
+ mtree_lock(mt);
+retry:
+ mas_insert(&ms, entry);
+ if (mas_nomem(&ms, gfp))
+ goto retry;
+
+ mtree_unlock(mt);
+ if (mas_is_err(&ms))
+ return xa_err(ms.node);
+
+ return 0;
+}
+EXPORT_SYMBOL(mtree_insert_range);
+
+/**
+ * mtree_insert() - Insert an entry at a give index if there is no value.
+ * @mt: The maple tree
+ * @index : The index to store the value
+ * @entry: The entry to store
+ * @gfp: The FGP_FLAGS to use for allocations.
+ *
+ * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
+ * request, -ENOMEM if memory could not be allocated.
+ */
+int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry,
+ gfp_t gfp)
+{
+ return mtree_insert_range(mt, index, index, entry, gfp);
+}
+EXPORT_SYMBOL(mtree_insert);
+
+int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
+ void *entry, unsigned long size, unsigned long min,
+ unsigned long max, gfp_t gfp)
+{
+ int ret = 0;
+
+ MA_STATE(mas, mt, min, max - size);
+ if (!mt_is_alloc(mt))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(mt_is_reserved(entry)))
+ return -EINVAL;
+
+ if (min > max)
+ return -EINVAL;
+
+ if (max < size)
+ return -EINVAL;
+
+ if (!size)
+ return -EINVAL;
+
+ mtree_lock(mt);
+retry:
+ mas.offset = 0;
+ mas.index = min;
+ mas.last = max - size;
+ ret = mas_alloc(&mas, entry, size, startp);
+ if (mas_nomem(&mas, gfp))
+ goto retry;
+
+ mtree_unlock(mt);
+ return ret;
+}
+EXPORT_SYMBOL(mtree_alloc_range);
+
+int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
+ void *entry, unsigned long size, unsigned long min,
+ unsigned long max, gfp_t gfp)
+{
+ int ret = 0;
+
+ MA_STATE(mas, mt, min, max - size);
+ if (!mt_is_alloc(mt))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(mt_is_reserved(entry)))
+ return -EINVAL;
+
+ if (min >= max)
+ return -EINVAL;
+
+ if (max < size - 1)
+ return -EINVAL;
+
+ if (!size)
+ return -EINVAL;
+
+ mtree_lock(mt);
+retry:
+ ret = mas_rev_alloc(&mas, min, max, entry, size, startp);
+ if (mas_nomem(&mas, gfp))
+ goto retry;
+
+ mtree_unlock(mt);
+ return ret;
+}
+EXPORT_SYMBOL(mtree_alloc_rrange);
+
+/**
+ * mtree_erase() - Find an index and erase the entire range.
+ * @mt: The maple tree
+ * @index: The index to erase
+ *
+ * Erasing is the same as a walk to an entry then a store of a NULL to that
+ * ENTIRE range. In fact, it is implemented as such using the advanced API.
+ *
+ * Return: The entry stored at the @index or %NULL
+ */
+void *mtree_erase(struct maple_tree *mt, unsigned long index)
+{
+ void *entry = NULL;
+
+ MA_STATE(mas, mt, index, index);
+ trace_ma_op(__func__, &mas);
+
+ mtree_lock(mt);
+ entry = mas_erase(&mas);
+ mtree_unlock(mt);
+
+ return entry;
+}
+EXPORT_SYMBOL(mtree_erase);
+
+/**
+ * __mt_destroy() - Walk and free all nodes of a locked maple tree.
+ * @mt: The maple tree
+ *
+ * Note: Does not handle locking.
+ */
+void __mt_destroy(struct maple_tree *mt)
+{
+ void *root = mt_root_locked(mt);
+
+ rcu_assign_pointer(mt->ma_root, NULL);
+ if (xa_is_node(root))
+ mte_destroy_walk(root, mt);
+
+ mt->ma_flags = 0;
+}
+EXPORT_SYMBOL_GPL(__mt_destroy);
+
+/**
+ * mtree_destroy() - Destroy a maple tree
+ * @mt: The maple tree
+ *
+ * Frees all resources used by the tree. Handles locking.
+ */
+void mtree_destroy(struct maple_tree *mt)
+{
+ mtree_lock(mt);
+ __mt_destroy(mt);
+ mtree_unlock(mt);
+}
+EXPORT_SYMBOL(mtree_destroy);
+
+/**
+ * mt_find() - Search from the start up until an entry is found.
+ * @mt: The maple tree
+ * @index: Pointer which contains the start location of the search
+ * @max: The maximum value to check
+ *
+ * Handles locking. @index will be incremented to one beyond the range.
+ *
+ * Return: The entry at or after the @index or %NULL
+ */
+void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max)
+{
+ MA_STATE(mas, mt, *index, *index);
+ void *entry;
+#ifdef CONFIG_DEBUG_MAPLE_TREE
+ unsigned long copy = *index;
+#endif
+
+ trace_ma_read(__func__, &mas);
+
+ if ((*index) > max)
+ return NULL;
+
+ rcu_read_lock();
+retry:
+ entry = mas_state_walk(&mas);
+ if (mas_is_start(&mas))
+ goto retry;
+
+ if (unlikely(xa_is_zero(entry)))
+ entry = NULL;
+
+ if (entry)
+ goto unlock;
+
+ while (mas_searchable(&mas) && (mas.index < max)) {
+ entry = mas_next_entry(&mas, max);
+ if (likely(entry && !xa_is_zero(entry)))
+ break;
+ }
+
+ if (unlikely(xa_is_zero(entry)))
+ entry = NULL;
+unlock:
+ rcu_read_unlock();
+ if (likely(entry)) {
+ *index = mas.last + 1;
+#ifdef CONFIG_DEBUG_MAPLE_TREE
+ if ((*index) && (*index) <= copy)
+ pr_err("index not increased! %lx <= %lx\n",
+ *index, copy);
+ MT_BUG_ON(mt, (*index) && ((*index) <= copy));
+#endif
+ }
+
+ return entry;
+}
+EXPORT_SYMBOL(mt_find);
+
+/**
+ * mt_find_after() - Search from the start up until an entry is found.
+ * @mt: The maple tree
+ * @index: Pointer which contains the start location of the search
+ * @max: The maximum value to check
+ *
+ * Handles locking, detects wrapping on index == 0
+ *
+ * Return: The entry at or after the @index or %NULL
+ */
+void *mt_find_after(struct maple_tree *mt, unsigned long *index,
+ unsigned long max)
+{
+ if (!(*index))
+ return NULL;
+
+ return mt_find(mt, index, max);
+}
+EXPORT_SYMBOL(mt_find_after);
+
+#ifdef CONFIG_DEBUG_MAPLE_TREE
+atomic_t maple_tree_tests_run;
+EXPORT_SYMBOL_GPL(maple_tree_tests_run);
+atomic_t maple_tree_tests_passed;
+EXPORT_SYMBOL_GPL(maple_tree_tests_passed);
+
+#ifndef __KERNEL__
+extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int);
+void mt_set_non_kernel(unsigned int val)
+{
+ kmem_cache_set_non_kernel(maple_node_cache, val);
+}
+
+extern unsigned long kmem_cache_get_alloc(struct kmem_cache *);
+unsigned long mt_get_alloc_size(void)
+{
+ return kmem_cache_get_alloc(maple_node_cache);
+}
+
+extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *);
+void mt_zero_nr_tallocated(void)
+{
+ kmem_cache_zero_nr_tallocated(maple_node_cache);
+}
+
+extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *);
+unsigned int mt_nr_tallocated(void)
+{
+ return kmem_cache_nr_tallocated(maple_node_cache);
+}
+
+extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *);
+unsigned int mt_nr_allocated(void)
+{
+ return kmem_cache_nr_allocated(maple_node_cache);
+}
+
+/*
+ * mas_dead_node() - Check if the maple state is pointing to a dead node.
+ * @mas: The maple state
+ * @index: The index to restore in @mas.
+ *
+ * Used in test code.
+ * Return: 1 if @mas has been reset to MAS_START, 0 otherwise.
+ */
+static inline int mas_dead_node(struct ma_state *mas, unsigned long index)
+{
+ if (unlikely(!mas_searchable(mas) || mas_is_start(mas)))
+ return 0;
+
+ if (likely(!mte_dead_node(mas->node)))
+ return 0;
+
+ mas_rewalk(mas, index);
+ return 1;
+}
+
+void mt_cache_shrink(void)
+{
+}
+#else
+/*
+ * mt_cache_shrink() - For testing, don't use this.
+ *
+ * Certain testcases can trigger an OOM when combined with other memory
+ * debugging configuration options. This function is used to reduce the
+ * possibility of an out of memory even due to kmem_cache objects remaining
+ * around for longer than usual.
+ */
+void mt_cache_shrink(void)
+{
+ kmem_cache_shrink(maple_node_cache);
+
+}
+EXPORT_SYMBOL_GPL(mt_cache_shrink);
+
+#endif /* not defined __KERNEL__ */
+/*
+ * mas_get_slot() - Get the entry in the maple state node stored at @offset.
+ * @mas: The maple state
+ * @offset: The offset into the slot array to fetch.
+ *
+ * Return: The entry stored at @offset.
+ */
+static inline struct maple_enode *mas_get_slot(struct ma_state *mas,
+ unsigned char offset)
+{
+ return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)),
+ offset);
+}
+
+
+/*
+ * mas_first_entry() - Go the first leaf and find the first entry.
+ * @mas: the maple state.
+ * @limit: the maximum index to check.
+ * @*r_start: Pointer to set to the range start.
+ *
+ * Sets mas->offset to the offset of the entry, r_start to the range minimum.
+ *
+ * Return: The first entry or MAS_NONE.
+ */
+static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn,
+ unsigned long limit, enum maple_type mt)
+
+{
+ unsigned long max;
+ unsigned long *pivots;
+ void __rcu **slots;
+ void *entry = NULL;
+
+ mas->index = mas->min;
+ if (mas->index > limit)
+ goto none;
+
+ max = mas->max;
+ mas->offset = 0;
+ while (likely(!ma_is_leaf(mt))) {
+ MT_BUG_ON(mas->tree, mte_dead_node(mas->node));
+ slots = ma_slots(mn, mt);
+ pivots = ma_pivots(mn, mt);
+ max = pivots[0];
+ entry = mas_slot(mas, slots, 0);
+ if (unlikely(ma_dead_node(mn)))
+ return NULL;
+ mas->node = entry;
+ mn = mas_mn(mas);
+ mt = mte_node_type(mas->node);
+ }
+ MT_BUG_ON(mas->tree, mte_dead_node(mas->node));
+
+ mas->max = max;
+ slots = ma_slots(mn, mt);
+ entry = mas_slot(mas, slots, 0);
+ if (unlikely(ma_dead_node(mn)))
+ return NULL;
+
+ /* Slot 0 or 1 must be set */
+ if (mas->index > limit)
+ goto none;
+
+ if (likely(entry))
+ return entry;
+
+ pivots = ma_pivots(mn, mt);
+ mas->index = pivots[0] + 1;
+ mas->offset = 1;
+ entry = mas_slot(mas, slots, 1);
+ if (unlikely(ma_dead_node(mn)))
+ return NULL;
+
+ if (mas->index > limit)
+ goto none;
+
+ if (likely(entry))
+ return entry;
+
+none:
+ if (likely(!ma_dead_node(mn)))
+ mas->node = MAS_NONE;
+ return NULL;
+}
+
+/* Depth first search, post-order */
+static void mas_dfs_postorder(struct ma_state *mas, unsigned long max)
+{
+
+ struct maple_enode *p = MAS_NONE, *mn = mas->node;
+ unsigned long p_min, p_max;
+
+ mas_next_node(mas, mas_mn(mas), max);
+ if (!mas_is_none(mas))
+ return;
+
+ if (mte_is_root(mn))
+ return;
+
+ mas->node = mn;
+ mas_ascend(mas);
+ while (mas->node != MAS_NONE) {
+ p = mas->node;
+ p_min = mas->min;
+ p_max = mas->max;
+ mas_prev_node(mas, 0);
+ }
+
+ if (p == MAS_NONE)
+ return;
+
+ mas->node = p;
+ mas->max = p_max;
+ mas->min = p_min;
+}
+
+/* Tree validations */
+static void mt_dump_node(const struct maple_tree *mt, void *entry,
+ unsigned long min, unsigned long max, unsigned int depth);
+static void mt_dump_range(unsigned long min, unsigned long max,
+ unsigned int depth)
+{
+ static const char spaces[] = " ";
+
+ if (min == max)
+ pr_info("%.*s%lu: ", depth * 2, spaces, min);
+ else
+ pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max);
+}
+
+static void mt_dump_entry(void *entry, unsigned long min, unsigned long max,
+ unsigned int depth)
+{
+ mt_dump_range(min, max, depth);
+
+ if (xa_is_value(entry))
+ pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry),
+ xa_to_value(entry), entry);
+ else if (xa_is_zero(entry))
+ pr_cont("zero (%ld)\n", xa_to_internal(entry));
+ else if (mt_is_reserved(entry))
+ pr_cont("UNKNOWN ENTRY (%p)\n", entry);
+ else
+ pr_cont("%p\n", entry);
+}
+
+static void mt_dump_range64(const struct maple_tree *mt, void *entry,
+ unsigned long min, unsigned long max, unsigned int depth)
+{
+ struct maple_range_64 *node = &mte_to_node(entry)->mr64;
+ bool leaf = mte_is_leaf(entry);
+ unsigned long first = min;
+ int i;
+
+ pr_cont(" contents: ");
+ for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++)
+ pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
+ pr_cont("%p\n", node->slot[i]);
+ for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) {
+ unsigned long last = max;
+
+ if (i < (MAPLE_RANGE64_SLOTS - 1))
+ last = node->pivot[i];
+ else if (!node->slot[i] && max != mt_max[mte_node_type(entry)])
+ break;
+ if (last == 0 && i > 0)
+ break;
+ if (leaf)
+ mt_dump_entry(mt_slot(mt, node->slot, i),
+ first, last, depth + 1);
+ else if (node->slot[i])
+ mt_dump_node(mt, mt_slot(mt, node->slot, i),
+ first, last, depth + 1);
+
+ if (last == max)
+ break;
+ if (last > max) {
+ pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
+ node, last, max, i);
+ break;
+ }
+ first = last + 1;
+ }
+}
+
+static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
+ unsigned long min, unsigned long max, unsigned int depth)
+{
+ struct maple_arange_64 *node = &mte_to_node(entry)->ma64;
+ bool leaf = mte_is_leaf(entry);
+ unsigned long first = min;
+ int i;
+
+ pr_cont(" contents: ");
+ for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++)
+ pr_cont("%lu ", node->gap[i]);
+ pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap);
+ for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++)
+ pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
+ pr_cont("%p\n", node->slot[i]);
+ for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
+ unsigned long last = max;
+
+ if (i < (MAPLE_ARANGE64_SLOTS - 1))
+ last = node->pivot[i];
+ else if (!node->slot[i])
+ break;
+ if (last == 0 && i > 0)
+ break;
+ if (leaf)
+ mt_dump_entry(mt_slot(mt, node->slot, i),
+ first, last, depth + 1);
+ else if (node->slot[i])
+ mt_dump_node(mt, mt_slot(mt, node->slot, i),
+ first, last, depth + 1);
+
+ if (last == max)
+ break;
+ if (last > max) {
+ pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
+ node, last, max, i);
+ break;
+ }
+ first = last + 1;
+ }
+}
+
+static void mt_dump_node(const struct maple_tree *mt, void *entry,
+ unsigned long min, unsigned long max, unsigned int depth)
+{
+ struct maple_node *node = mte_to_node(entry);
+ unsigned int type = mte_node_type(entry);
+ unsigned int i;
+
+ mt_dump_range(min, max, depth);
+
+ pr_cont("node %p depth %d type %d parent %p", node, depth, type,
+ node ? node->parent : NULL);
+ switch (type) {
+ case maple_dense:
+ pr_cont("\n");
+ for (i = 0; i < MAPLE_NODE_SLOTS; i++) {
+ if (min + i > max)
+ pr_cont("OUT OF RANGE: ");
+ mt_dump_entry(mt_slot(mt, node->slot, i),
+ min + i, min + i, depth);
+ }
+ break;
+ case maple_leaf_64:
+ case maple_range_64:
+ mt_dump_range64(mt, entry, min, max, depth);
+ break;
+ case maple_arange_64:
+ mt_dump_arange64(mt, entry, min, max, depth);
+ break;
+
+ default:
+ pr_cont(" UNKNOWN TYPE\n");
+ }
+}
+
+void mt_dump(const struct maple_tree *mt)
+{
+ void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt));
+
+ pr_info("maple_tree(%p) flags %X, height %u root %p\n",
+ mt, mt->ma_flags, mt_height(mt), entry);
+ if (!xa_is_node(entry))
+ mt_dump_entry(entry, 0, 0, 0);
+ else if (entry)
+ mt_dump_node(mt, entry, 0, mt_max[mte_node_type(entry)], 0);
+}
+EXPORT_SYMBOL_GPL(mt_dump);
+
+/*
+ * Calculate the maximum gap in a node and check if that's what is reported in
+ * the parent (unless root).
+ */
+static void mas_validate_gaps(struct ma_state *mas)
+{
+ struct maple_enode *mte = mas->node;
+ struct maple_node *p_mn;
+ unsigned long gap = 0, max_gap = 0;
+ unsigned long p_end, p_start = mas->min;
+ unsigned char p_slot;
+ unsigned long *gaps = NULL;
+ unsigned long *pivots = ma_pivots(mte_to_node(mte), mte_node_type(mte));
+ int i;
+
+ if (ma_is_dense(mte_node_type(mte))) {
+ for (i = 0; i < mt_slot_count(mte); i++) {
+ if (mas_get_slot(mas, i)) {
+ if (gap > max_gap)
+ max_gap = gap;
+ gap = 0;
+ continue;
+ }
+ gap++;
+ }
+ goto counted;
+ }
+
+ gaps = ma_gaps(mte_to_node(mte), mte_node_type(mte));
+ for (i = 0; i < mt_slot_count(mte); i++) {
+ p_end = mas_logical_pivot(mas, pivots, i, mte_node_type(mte));
+
+ if (!gaps) {
+ if (mas_get_slot(mas, i)) {
+ gap = 0;
+ goto not_empty;
+ }
+
+ gap += p_end - p_start + 1;
+ } else {
+ void *entry = mas_get_slot(mas, i);
+
+ gap = gaps[i];
+ if (!entry) {
+ if (gap != p_end - p_start + 1) {
+ pr_err("%p[%u] -> %p %lu != %lu - %lu + 1\n",
+ mas_mn(mas), i,
+ mas_get_slot(mas, i), gap,
+ p_end, p_start);
+ mt_dump(mas->tree);
+
+ MT_BUG_ON(mas->tree,
+ gap != p_end - p_start + 1);
+ }
+ } else {
+ if (gap > p_end - p_start + 1) {
+ pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n",
+ mas_mn(mas), i, gap, p_end, p_start,
+ p_end - p_start + 1);
+ MT_BUG_ON(mas->tree,
+ gap > p_end - p_start + 1);
+ }
+ }
+ }
+
+ if (gap > max_gap)
+ max_gap = gap;
+not_empty:
+ p_start = p_end + 1;
+ if (p_end >= mas->max)
+ break;
+ }
+
+counted:
+ if (mte_is_root(mte))
+ return;
+
+ p_slot = mte_parent_slot(mas->node);
+ p_mn = mte_parent(mte);
+ MT_BUG_ON(mas->tree, max_gap > mas->max);
+ if (ma_gaps(p_mn, mas_parent_enum(mas, mte))[p_slot] != max_gap) {
+ pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap);
+ mt_dump(mas->tree);
+ }
+
+ MT_BUG_ON(mas->tree,
+ ma_gaps(p_mn, mas_parent_enum(mas, mte))[p_slot] != max_gap);
+}
+
+static void mas_validate_parent_slot(struct ma_state *mas)
+{
+ struct maple_node *parent;
+ struct maple_enode *node;
+ enum maple_type p_type = mas_parent_enum(mas, mas->node);
+ unsigned char p_slot = mte_parent_slot(mas->node);
+ void __rcu **slots;
+ int i;
+
+ if (mte_is_root(mas->node))
+ return;
+
+ parent = mte_parent(mas->node);
+ slots = ma_slots(parent, p_type);
+ MT_BUG_ON(mas->tree, mas_mn(mas) == parent);
+
+ /* Check prev/next parent slot for duplicate node entry */
+
+ for (i = 0; i < mt_slots[p_type]; i++) {
+ node = mas_slot(mas, slots, i);
+ if (i == p_slot) {
+ if (node != mas->node)
+ pr_err("parent %p[%u] does not have %p\n",
+ parent, i, mas_mn(mas));
+ MT_BUG_ON(mas->tree, node != mas->node);
+ } else if (node == mas->node) {
+ pr_err("Invalid child %p at parent %p[%u] p_slot %u\n",
+ mas_mn(mas), parent, i, p_slot);
+ MT_BUG_ON(mas->tree, node == mas->node);
+ }
+ }
+}
+
+static void mas_validate_child_slot(struct ma_state *mas)
+{
+ enum maple_type type = mte_node_type(mas->node);
+ void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
+ unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type);
+ struct maple_enode *child;
+ unsigned char i;
+
+ if (mte_is_leaf(mas->node))
+ return;
+
+ for (i = 0; i < mt_slots[type]; i++) {
+ child = mas_slot(mas, slots, i);
+ if (!pivots[i] || pivots[i] == mas->max)
+ break;
+
+ if (!child)
+ break;
+
+ if (mte_parent_slot(child) != i) {
+ pr_err("Slot error at %p[%u]: child %p has pslot %u\n",
+ mas_mn(mas), i, mte_to_node(child),
+ mte_parent_slot(child));
+ MT_BUG_ON(mas->tree, 1);
+ }
+
+ if (mte_parent(child) != mte_to_node(mas->node)) {
+ pr_err("child %p has parent %p not %p\n",
+ mte_to_node(child), mte_parent(child),
+ mte_to_node(mas->node));
+ MT_BUG_ON(mas->tree, 1);
+ }
+ }
+}
+
+/*
+ * Validate all pivots are within mas->min and mas->max.
+ */
+static void mas_validate_limits(struct ma_state *mas)
+{
+ int i;
+ unsigned long prev_piv = 0;
+ enum maple_type type = mte_node_type(mas->node);
+ void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
+ unsigned long *pivots = ma_pivots(mas_mn(mas), type);
+
+ /* all limits are fine here. */
+ if (mte_is_root(mas->node))
+ return;
+
+ for (i = 0; i < mt_slots[type]; i++) {
+ unsigned long piv;
+
+ piv = mas_safe_pivot(mas, pivots, i, type);
+
+ if (!piv && (i != 0))
+ break;
+
+ if (!mte_is_leaf(mas->node)) {
+ void *entry = mas_slot(mas, slots, i);
+
+ if (!entry)
+ pr_err("%p[%u] cannot be null\n",
+ mas_mn(mas), i);
+
+ MT_BUG_ON(mas->tree, !entry);
+ }
+
+ if (prev_piv > piv) {
+ pr_err("%p[%u] piv %lu < prev_piv %lu\n",
+ mas_mn(mas), i, piv, prev_piv);
+ MT_BUG_ON(mas->tree, piv < prev_piv);
+ }
+
+ if (piv < mas->min) {
+ pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i,
+ piv, mas->min);
+ MT_BUG_ON(mas->tree, piv < mas->min);
+ }
+ if (piv > mas->max) {
+ pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i,
+ piv, mas->max);
+ MT_BUG_ON(mas->tree, piv > mas->max);
+ }
+ prev_piv = piv;
+ if (piv == mas->max)
+ break;
+ }
+ for (i += 1; i < mt_slots[type]; i++) {
+ void *entry = mas_slot(mas, slots, i);
+
+ if (entry && (i != mt_slots[type] - 1)) {
+ pr_err("%p[%u] should not have entry %p\n", mas_mn(mas),
+ i, entry);
+ MT_BUG_ON(mas->tree, entry != NULL);
+ }
+
+ if (i < mt_pivots[type]) {
+ unsigned long piv = pivots[i];
+
+ if (!piv)
+ continue;
+
+ pr_err("%p[%u] should not have piv %lu\n",
+ mas_mn(mas), i, piv);
+ MT_BUG_ON(mas->tree, i < mt_pivots[type] - 1);
+ }
+ }
+}
+
+static void mt_validate_nulls(struct maple_tree *mt)
+{
+ void *entry, *last = (void *)1;
+ unsigned char offset = 0;
+ void __rcu **slots;
+ MA_STATE(mas, mt, 0, 0);
+
+ mas_start(&mas);
+ if (mas_is_none(&mas) || (mas.node == MAS_ROOT))
+ return;
+
+ while (!mte_is_leaf(mas.node))
+ mas_descend(&mas);
+
+ slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node));
+ do {
+ entry = mas_slot(&mas, slots, offset);
+ if (!last && !entry) {
+ pr_err("Sequential nulls end at %p[%u]\n",
+ mas_mn(&mas), offset);
+ }
+ MT_BUG_ON(mt, !last && !entry);
+ last = entry;
+ if (offset == mas_data_end(&mas)) {
+ mas_next_node(&mas, mas_mn(&mas), ULONG_MAX);
+ if (mas_is_none(&mas))
+ return;
+ offset = 0;
+ slots = ma_slots(mte_to_node(mas.node),
+ mte_node_type(mas.node));
+ } else {
+ offset++;
+ }
+
+ } while (!mas_is_none(&mas));
+}
+
+/*
+ * validate a maple tree by checking:
+ * 1. The limits (pivots are within mas->min to mas->max)
+ * 2. The gap is correctly set in the parents
+ */
+void mt_validate(struct maple_tree *mt)
+{
+ unsigned char end;
+
+ MA_STATE(mas, mt, 0, 0);
+ rcu_read_lock();
+ mas_start(&mas);
+ if (!mas_searchable(&mas))
+ goto done;
+
+ mas_first_entry(&mas, mas_mn(&mas), ULONG_MAX, mte_node_type(mas.node));
+ while (!mas_is_none(&mas)) {
+ MT_BUG_ON(mas.tree, mte_dead_node(mas.node));
+ if (!mte_is_root(mas.node)) {
+ end = mas_data_end(&mas);
+ if ((end < mt_min_slot_count(mas.node)) &&
+ (mas.max != ULONG_MAX)) {
+ pr_err("Invalid size %u of %p\n", end,
+ mas_mn(&mas));
+ MT_BUG_ON(mas.tree, 1);
+ }
+
+ }
+ mas_validate_parent_slot(&mas);
+ mas_validate_child_slot(&mas);
+ mas_validate_limits(&mas);
+ if (mt_is_alloc(mt))
+ mas_validate_gaps(&mas);
+ mas_dfs_postorder(&mas, ULONG_MAX);
+ }
+ mt_validate_nulls(mt);
+done:
+ rcu_read_unlock();
+
+}
+EXPORT_SYMBOL_GPL(mt_validate);
+
+#endif /* CONFIG_DEBUG_MAPLE_TREE */
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 40f22b177d69..b67a53e29b8f 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -124,10 +124,12 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
range->max = U8_MAX;
break;
case NLA_U16:
+ case NLA_BE16:
case NLA_BINARY:
range->max = U16_MAX;
break;
case NLA_U32:
+ case NLA_BE32:
range->max = U32_MAX;
break;
case NLA_U64:
@@ -159,31 +161,6 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
}
}
-static u64 nla_get_attr_bo(const struct nla_policy *pt,
- const struct nlattr *nla)
-{
- switch (pt->type) {
- case NLA_U16:
- if (pt->network_byte_order)
- return ntohs(nla_get_be16(nla));
-
- return nla_get_u16(nla);
- case NLA_U32:
- if (pt->network_byte_order)
- return ntohl(nla_get_be32(nla));
-
- return nla_get_u32(nla);
- case NLA_U64:
- if (pt->network_byte_order)
- return be64_to_cpu(nla_get_be64(nla));
-
- return nla_get_u64(nla);
- }
-
- WARN_ON_ONCE(1);
- return 0;
-}
-
static int nla_validate_range_unsigned(const struct nla_policy *pt,
const struct nlattr *nla,
struct netlink_ext_ack *extack,
@@ -197,9 +174,13 @@ static int nla_validate_range_unsigned(const struct nla_policy *pt,
value = nla_get_u8(nla);
break;
case NLA_U16:
+ value = nla_get_u16(nla);
+ break;
case NLA_U32:
+ value = nla_get_u32(nla);
+ break;
case NLA_U64:
- value = nla_get_attr_bo(pt, nla);
+ value = nla_get_u64(nla);
break;
case NLA_MSECS:
value = nla_get_u64(nla);
@@ -207,6 +188,12 @@ static int nla_validate_range_unsigned(const struct nla_policy *pt,
case NLA_BINARY:
value = nla_len(nla);
break;
+ case NLA_BE16:
+ value = ntohs(nla_get_be16(nla));
+ break;
+ case NLA_BE32:
+ value = ntohl(nla_get_be32(nla));
+ break;
default:
return -EINVAL;
}
@@ -334,6 +321,8 @@ static int nla_validate_int_range(const struct nla_policy *pt,
case NLA_U64:
case NLA_MSECS:
case NLA_BINARY:
+ case NLA_BE16:
+ case NLA_BE32:
return nla_validate_range_unsigned(pt, nla, extack, validate);
case NLA_S8:
case NLA_S16:
diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c
index 5369634701fa..b8556a2e7bb1 100644
--- a/lib/overflow_kunit.c
+++ b/lib/overflow_kunit.c
@@ -16,6 +16,34 @@
#include <linux/types.h>
#include <linux/vmalloc.h>
+#define SKIP(cond, reason) do { \
+ if (cond) { \
+ kunit_skip(test, reason); \
+ return; \
+ } \
+} while (0)
+
+/*
+ * Clang 11 and earlier generate unwanted libcalls for signed output
+ * on unsigned input.
+ */
+#if defined(CONFIG_CC_IS_CLANG) && __clang_major__ <= 11
+# define SKIP_SIGN_MISMATCH(t) SKIP(t, "Clang 11 unwanted libcalls")
+#else
+# define SKIP_SIGN_MISMATCH(t) do { } while (0)
+#endif
+
+/*
+ * Clang 13 and earlier generate unwanted libcalls for 64-bit tests on
+ * 32-bit hosts.
+ */
+#if defined(CONFIG_CC_IS_CLANG) && __clang_major__ <= 13 && \
+ BITS_PER_LONG != 64
+# define SKIP_64_ON_32(t) SKIP(t, "Clang 13 unwanted libcalls")
+#else
+# define SKIP_64_ON_32(t) do { } while (0)
+#endif
+
#define DEFINE_TEST_ARRAY_TYPED(t1, t2, t) \
static const struct test_ ## t1 ## _ ## t2 ## __ ## t { \
t1 a; \
@@ -94,7 +122,6 @@ DEFINE_TEST_ARRAY(u32) = {
{-4U, 5U, 1U, -9U, -20U, true, false, true},
};
-#if BITS_PER_LONG == 64
DEFINE_TEST_ARRAY(u64) = {
{0, 0, 0, 0, 0, false, false, false},
{1, 1, 2, 0, 1, false, false, false},
@@ -118,7 +145,6 @@ DEFINE_TEST_ARRAY(u64) = {
false, true, false},
{-15ULL, 10ULL, -5ULL, -25ULL, -150ULL, false, false, true},
};
-#endif
DEFINE_TEST_ARRAY(s8) = {
{0, 0, 0, 0, 0, false, false, false},
@@ -194,7 +220,6 @@ DEFINE_TEST_ARRAY(s32) = {
{S32_MAX, S32_MAX, -2, 0, 1, true, false, true},
};
-#if BITS_PER_LONG == 64
DEFINE_TEST_ARRAY(s64) = {
{0, 0, 0, 0, 0, false, false, false},
@@ -223,7 +248,6 @@ DEFINE_TEST_ARRAY(s64) = {
{-128, -1, -129, -127, 128, false, false, false},
{0, -S64_MAX, -S64_MAX, S64_MAX, 0, false, false, false},
};
-#endif
#define check_one_op(t, fmt, op, sym, a, b, r, of) do { \
int _a_orig = a, _a_bump = a + 1; \
@@ -246,7 +270,7 @@ DEFINE_TEST_ARRAY(s64) = {
#define DEFINE_TEST_FUNC_TYPED(n, t, fmt) \
static void do_test_ ## n(struct kunit *test, const struct test_ ## n *p) \
-{ \
+{ \
check_one_op(t, fmt, add, "+", p->a, p->b, p->sum, p->s_of); \
check_one_op(t, fmt, add, "+", p->b, p->a, p->sum, p->s_of); \
check_one_op(t, fmt, sub, "-", p->a, p->b, p->diff, p->d_of); \
@@ -257,6 +281,12 @@ static void do_test_ ## n(struct kunit *test, const struct test_ ## n *p) \
static void n ## _overflow_test(struct kunit *test) { \
unsigned i; \
\
+ SKIP_64_ON_32(__same_type(t, u64)); \
+ SKIP_64_ON_32(__same_type(t, s64)); \
+ SKIP_SIGN_MISMATCH(__same_type(n ## _tests[0].a, u32) && \
+ __same_type(n ## _tests[0].b, u32) && \
+ __same_type(n ## _tests[0].sum, int)); \
+ \
for (i = 0; i < ARRAY_SIZE(n ## _tests); ++i) \
do_test_ ## n(test, &n ## _tests[i]); \
kunit_info(test, "%zu %s arithmetic tests finished\n", \
@@ -272,10 +302,8 @@ DEFINE_TEST_FUNC(u16, "%d");
DEFINE_TEST_FUNC(s16, "%d");
DEFINE_TEST_FUNC(u32, "%u");
DEFINE_TEST_FUNC(s32, "%d");
-#if BITS_PER_LONG == 64
DEFINE_TEST_FUNC(u64, "%llu");
DEFINE_TEST_FUNC(s64, "%lld");
-#endif
DEFINE_TEST_ARRAY_TYPED(u32, u32, u8) = {
{0, 0, 0, 0, 0, false, false, false},
@@ -715,13 +743,10 @@ static struct kunit_case overflow_test_cases[] = {
KUNIT_CASE(s16_s16__s16_overflow_test),
KUNIT_CASE(u32_u32__u32_overflow_test),
KUNIT_CASE(s32_s32__s32_overflow_test),
-/* Clang 13 and earlier generate unwanted libcalls on 32-bit. */
-#if BITS_PER_LONG == 64
KUNIT_CASE(u64_u64__u64_overflow_test),
KUNIT_CASE(s64_s64__s64_overflow_test),
-#endif
- KUNIT_CASE(u32_u32__u8_overflow_test),
KUNIT_CASE(u32_u32__int_overflow_test),
+ KUNIT_CASE(u32_u32__u8_overflow_test),
KUNIT_CASE(u8_u8__int_overflow_test),
KUNIT_CASE(int_int__u8_overflow_test),
KUNIT_CASE(shift_sane_test),
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index e5c5315da274..668f6aa6a75d 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -230,7 +230,8 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
percpu_ref_noop_confirm_switch;
percpu_ref_get(ref); /* put after confirmation */
- call_rcu(&ref->data->rcu, percpu_ref_switch_to_atomic_rcu);
+ call_rcu_hurry(&ref->data->rcu,
+ percpu_ref_switch_to_atomic_rcu);
}
static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
index 9e597e1f91a4..b25dfc9c7759 100644
--- a/lib/raid6/s390vx.uc
+++ b/lib/raid6/s390vx.uc
@@ -13,8 +13,7 @@
#include <linux/raid/pq.h>
#include <asm/fpu/api.h>
-
-asm(".include \"asm/vx-insn.h\"\n");
+#include <asm/vx-insn.h>
#define NSIZE 16
diff --git a/lib/random32.c b/lib/random32.c
index d5d9029362cb..32060b852668 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -47,7 +47,7 @@
* @state: pointer to state structure holding seeded state.
*
* This is used for pseudo-randomness with no outside seeding.
- * For more random results, use prandom_u32().
+ * For more random results, use get_random_u32().
*/
u32 prandom_u32_state(struct rnd_state *state)
{
@@ -69,7 +69,7 @@ EXPORT_SYMBOL(prandom_u32_state);
* @bytes: the requested number of bytes
*
* This is used for pseudo-randomness with no outside seeding.
- * For more random results, use prandom_bytes().
+ * For more random results, use get_random_bytes().
*/
void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes)
{
diff --git a/lib/reed_solomon/test_rslib.c b/lib/reed_solomon/test_rslib.c
index d9d1c33aebda..848e7eb5da92 100644
--- a/lib/reed_solomon/test_rslib.c
+++ b/lib/reed_solomon/test_rslib.c
@@ -164,7 +164,7 @@ static int get_rcw_we(struct rs_control *rs, struct wspace *ws,
/* Load c with random data and encode */
for (i = 0; i < dlen; i++)
- c[i] = prandom_u32() & nn;
+ c[i] = get_random_u32() & nn;
memset(c + dlen, 0, nroots * sizeof(*c));
encode_rs16(rs, c, dlen, c + dlen, 0);
@@ -178,12 +178,12 @@ static int get_rcw_we(struct rs_control *rs, struct wspace *ws,
for (i = 0; i < errs; i++) {
do {
/* Error value must be nonzero */
- errval = prandom_u32() & nn;
+ errval = get_random_u32() & nn;
} while (errval == 0);
do {
/* Must not choose the same location twice */
- errloc = prandom_u32() % len;
+ errloc = prandom_u32_max(len);
} while (errlocs[errloc] != 0);
errlocs[errloc] = 1;
@@ -194,19 +194,19 @@ static int get_rcw_we(struct rs_control *rs, struct wspace *ws,
for (i = 0; i < eras; i++) {
do {
/* Must not choose the same location twice */
- errloc = prandom_u32() % len;
+ errloc = prandom_u32_max(len);
} while (errlocs[errloc] != 0);
derrlocs[i] = errloc;
- if (ewsc && (prandom_u32() & 1)) {
+ if (ewsc && prandom_u32_max(2)) {
/* Erasure with the symbol intact */
errlocs[errloc] = 2;
} else {
/* Erasure with corrupted symbol */
do {
/* Error value must be nonzero */
- errval = prandom_u32() & nn;
+ errval = get_random_u32() & nn;
} while (errval == 0);
errlocs[errloc] = 1;
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index a8108a962dfd..7280ae8ca88c 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -21,7 +21,7 @@ static int init_alloc_hint(struct sbitmap *sb, gfp_t flags)
int i;
for_each_possible_cpu(i)
- *per_cpu_ptr(sb->alloc_hint, i) = prandom_u32() % depth;
+ *per_cpu_ptr(sb->alloc_hint, i) = prandom_u32_max(depth);
}
return 0;
}
@@ -33,7 +33,7 @@ static inline unsigned update_alloc_hint_before_get(struct sbitmap *sb,
hint = this_cpu_read(*sb->alloc_hint);
if (unlikely(hint >= depth)) {
- hint = depth ? prandom_u32() % depth : 0;
+ hint = depth ? prandom_u32_max(depth) : 0;
this_cpu_write(*sb->alloc_hint, hint);
}
diff --git a/lib/sg_pool.c b/lib/sg_pool.c
index a0b1a52cd6f7..9bfe60ca3f37 100644
--- a/lib/sg_pool.c
+++ b/lib/sg_pool.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/scatterlist.h>
#include <linux/mempool.h>
#include <linux/slab.h>
@@ -177,16 +177,4 @@ cleanup_sdb:
return -ENOMEM;
}
-static __exit void sg_pool_exit(void)
-{
- int i;
-
- for (i = 0; i < SG_MEMPOOL_NR; i++) {
- struct sg_pool *sgp = sg_pools + i;
- mempool_destroy(sgp->pool);
- kmem_cache_destroy(sgp->slab);
- }
-}
-
-module_init(sg_pool_init);
-module_exit(sg_pool_exit);
+subsys_initcall(sg_pool_init);
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 1c26c14ffbb9..0d7585cde2a6 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -8,13 +8,13 @@
#include <linux/mm.h>
#include <linux/cma.h>
-void show_mem(unsigned int filter, nodemask_t *nodemask)
+void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
{
pg_data_t *pgdat;
unsigned long total = 0, reserved = 0, highmem = 0;
printk("Mem-Info:\n");
- show_free_areas(filter, nodemask);
+ __show_free_areas(filter, nodemask, max_zone_idx);
for_each_online_pgdat(pgdat) {
int zoneid;
diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
index 7a0564d7cb7a..bdf358d520b4 100644
--- a/lib/slub_kunit.c
+++ b/lib/slub_kunit.c
@@ -9,10 +9,25 @@
static struct kunit_resource resource;
static int slab_errors;
+/*
+ * Wrapper function for kmem_cache_create(), which reduces 2 parameters:
+ * 'align' and 'ctor', and sets SLAB_SKIP_KFENCE flag to avoid getting an
+ * object from kfence pool, where the operation could be caught by both
+ * our test and kfence sanity check.
+ */
+static struct kmem_cache *test_kmem_cache_create(const char *name,
+ unsigned int size, slab_flags_t flags)
+{
+ struct kmem_cache *s = kmem_cache_create(name, size, 0,
+ (flags | SLAB_NO_USER_FLAGS), NULL);
+ s->flags |= SLAB_SKIP_KFENCE;
+ return s;
+}
+
static void test_clobber_zone(struct kunit *test)
{
- struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0,
- SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL);
+ struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_alloc", 64,
+ SLAB_RED_ZONE);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kasan_disable_current();
@@ -29,8 +44,8 @@ static void test_clobber_zone(struct kunit *test)
#ifndef CONFIG_KASAN
static void test_next_pointer(struct kunit *test)
{
- struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0,
- SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
+ struct kmem_cache *s = test_kmem_cache_create("TestSlub_next_ptr_free",
+ 64, SLAB_POISON);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
unsigned long tmp;
unsigned long *ptr_addr;
@@ -74,8 +89,8 @@ static void test_next_pointer(struct kunit *test)
static void test_first_word(struct kunit *test)
{
- struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0,
- SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
+ struct kmem_cache *s = test_kmem_cache_create("TestSlub_1th_word_free",
+ 64, SLAB_POISON);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kmem_cache_free(s, p);
@@ -89,8 +104,8 @@ static void test_first_word(struct kunit *test)
static void test_clobber_50th_byte(struct kunit *test)
{
- struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0,
- SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
+ struct kmem_cache *s = test_kmem_cache_create("TestSlub_50th_word_free",
+ 64, SLAB_POISON);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kmem_cache_free(s, p);
@@ -105,8 +120,8 @@ static void test_clobber_50th_byte(struct kunit *test)
static void test_clobber_redzone_free(struct kunit *test)
{
- struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0,
- SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL);
+ struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_free", 64,
+ SLAB_RED_ZONE);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kasan_disable_current();
@@ -120,6 +135,27 @@ static void test_clobber_redzone_free(struct kunit *test)
kmem_cache_destroy(s);
}
+static void test_kmalloc_redzone_access(struct kunit *test)
+{
+ struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_kmalloc", 32,
+ SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);
+ u8 *p = kmalloc_trace(s, GFP_KERNEL, 18);
+
+ kasan_disable_current();
+
+ /* Suppress the -Warray-bounds warning */
+ OPTIMIZER_HIDE_VAR(p);
+ p[18] = 0xab;
+ p[19] = 0xab;
+
+ validate_slab_cache(s);
+ KUNIT_EXPECT_EQ(test, 2, slab_errors);
+
+ kasan_enable_current();
+ kmem_cache_free(s, p);
+ kmem_cache_destroy(s);
+}
+
static int test_init(struct kunit *test)
{
slab_errors = 0;
@@ -139,6 +175,7 @@ static struct kunit_case test_cases[] = {
#endif
KUNIT_CASE(test_clobber_redzone_free),
+ KUNIT_CASE(test_kmalloc_redzone_access),
{}
};
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index e73fda23388d..79e894cf8406 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -43,7 +43,8 @@
#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \
STACK_ALLOC_ALIGN)
#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \
- STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS)
+ STACK_ALLOC_NULL_PROTECTION_BITS - \
+ STACK_ALLOC_OFFSET_BITS - STACK_DEPOT_EXTRA_BITS)
#define STACK_ALLOC_SLABS_CAP 8192
#define STACK_ALLOC_MAX_SLABS \
(((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \
@@ -56,6 +57,7 @@ union handle_parts {
u32 slabindex : STACK_ALLOC_INDEX_BITS;
u32 offset : STACK_ALLOC_OFFSET_BITS;
u32 valid : STACK_ALLOC_NULL_PROTECTION_BITS;
+ u32 extra : STACK_DEPOT_EXTRA_BITS;
};
};
@@ -77,6 +79,14 @@ static int next_slab_inited;
static size_t depot_offset;
static DEFINE_RAW_SPINLOCK(depot_lock);
+unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle)
+{
+ union handle_parts parts = { .handle = handle };
+
+ return parts.extra;
+}
+EXPORT_SYMBOL(stack_depot_get_extra_bits);
+
static bool init_stack_slab(void **prealloc)
{
if (!*prealloc)
@@ -140,6 +150,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
stack->handle.slabindex = depot_index;
stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
stack->handle.valid = 1;
+ stack->handle.extra = 0;
memcpy(stack->entries, entries, flex_array_size(stack, entries, size));
depot_offset += required_size;
@@ -382,6 +393,7 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch);
*
* @entries: Pointer to storage array
* @nr_entries: Size of the storage array
+ * @extra_bits: Flags to store in unused bits of depot_stack_handle_t
* @alloc_flags: Allocation gfp flags
* @can_alloc: Allocate stack slabs (increased chance of failure if false)
*
@@ -393,6 +405,10 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch);
* If the stack trace in @entries is from an interrupt, only the portion up to
* interrupt entry is saved.
*
+ * Additional opaque flags can be passed in @extra_bits, stored in the unused
+ * bits of the stack handle, and retrieved using stack_depot_get_extra_bits()
+ * without calling stack_depot_fetch().
+ *
* Context: Any context, but setting @can_alloc to %false is required if
* alloc_pages() cannot be used from the current context. Currently
* this is the case from contexts where neither %GFP_ATOMIC nor
@@ -402,10 +418,11 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch);
*/
depot_stack_handle_t __stack_depot_save(unsigned long *entries,
unsigned int nr_entries,
+ unsigned int extra_bits,
gfp_t alloc_flags, bool can_alloc)
{
struct stack_record *found = NULL, **bucket;
- depot_stack_handle_t retval = 0;
+ union handle_parts retval = { .handle = 0 };
struct page *page = NULL;
void *prealloc = NULL;
unsigned long flags;
@@ -489,9 +506,11 @@ exit:
free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
}
if (found)
- retval = found->handle.handle;
+ retval.handle = found->handle.handle;
fast_exit:
- return retval;
+ retval.extra = extra_bits;
+
+ return retval.handle;
}
EXPORT_SYMBOL_GPL(__stack_depot_save);
@@ -511,6 +530,6 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
unsigned int nr_entries,
gfp_t alloc_flags)
{
- return __stack_depot_save(entries, nr_entries, alloc_flags, true);
+ return __stack_depot_save(entries, nr_entries, 0, alloc_flags, true);
}
EXPORT_SYMBOL_GPL(stack_depot_save);
diff --git a/lib/string.c b/lib/string.c
index 6f334420f687..3371d26a0e39 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -197,6 +197,14 @@ ssize_t strscpy(char *dest, const char *src, size_t count)
max = 0;
#endif
+ /*
+ * read_word_at_a_time() below may read uninitialized bytes after the
+ * trailing zero and use them in comparisons. Disable this optimization
+ * under KMSAN to prevent false positive reports.
+ */
+ if (IS_ENABLED(CONFIG_KMSAN))
+ max = 0;
+
while (max >= sizeof(unsigned long)) {
unsigned long c, data;
diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c
index 437d8e6b7cb1..86fadd3ba08c 100644
--- a/lib/test-string_helpers.c
+++ b/lib/test-string_helpers.c
@@ -587,7 +587,7 @@ static int __init test_string_helpers_init(void)
for (i = 0; i < UNESCAPE_ALL_MASK + 1; i++)
test_string_unescape("unescape", i, false);
test_string_unescape("unescape inplace",
- get_random_int() % (UNESCAPE_ANY + 1), true);
+ prandom_u32_max(UNESCAPE_ANY + 1), true);
/* Without dictionary */
for (i = 0; i < ESCAPE_ALL_MASK + 1; i++)
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 98754ff9fe68..a8005ad3bd58 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -16,6 +16,8 @@
#include "../tools/testing/selftests/kselftest_module.h"
+#define EXP1_IN_BITS (sizeof(exp1) * 8)
+
KSTM_MODULE_GLOBALS();
static char pbl_buffer[PAGE_SIZE] __initdata;
@@ -219,6 +221,47 @@ static void __init test_zero_clear(void)
expect_eq_pbl("", bmap, 1024);
}
+static void __init test_find_nth_bit(void)
+{
+ unsigned long b, bit, cnt = 0;
+ DECLARE_BITMAP(bmap, 64 * 3);
+
+ bitmap_zero(bmap, 64 * 3);
+ __set_bit(10, bmap);
+ __set_bit(20, bmap);
+ __set_bit(30, bmap);
+ __set_bit(40, bmap);
+ __set_bit(50, bmap);
+ __set_bit(60, bmap);
+ __set_bit(80, bmap);
+ __set_bit(123, bmap);
+
+ expect_eq_uint(10, find_nth_bit(bmap, 64 * 3, 0));
+ expect_eq_uint(20, find_nth_bit(bmap, 64 * 3, 1));
+ expect_eq_uint(30, find_nth_bit(bmap, 64 * 3, 2));
+ expect_eq_uint(40, find_nth_bit(bmap, 64 * 3, 3));
+ expect_eq_uint(50, find_nth_bit(bmap, 64 * 3, 4));
+ expect_eq_uint(60, find_nth_bit(bmap, 64 * 3, 5));
+ expect_eq_uint(80, find_nth_bit(bmap, 64 * 3, 6));
+ expect_eq_uint(123, find_nth_bit(bmap, 64 * 3, 7));
+ expect_eq_uint(64 * 3, find_nth_bit(bmap, 64 * 3, 8));
+
+ expect_eq_uint(10, find_nth_bit(bmap, 64 * 3 - 1, 0));
+ expect_eq_uint(20, find_nth_bit(bmap, 64 * 3 - 1, 1));
+ expect_eq_uint(30, find_nth_bit(bmap, 64 * 3 - 1, 2));
+ expect_eq_uint(40, find_nth_bit(bmap, 64 * 3 - 1, 3));
+ expect_eq_uint(50, find_nth_bit(bmap, 64 * 3 - 1, 4));
+ expect_eq_uint(60, find_nth_bit(bmap, 64 * 3 - 1, 5));
+ expect_eq_uint(80, find_nth_bit(bmap, 64 * 3 - 1, 6));
+ expect_eq_uint(123, find_nth_bit(bmap, 64 * 3 - 1, 7));
+ expect_eq_uint(64 * 3 - 1, find_nth_bit(bmap, 64 * 3 - 1, 8));
+
+ for_each_set_bit(bit, exp1, EXP1_IN_BITS) {
+ b = find_nth_bit(exp1, EXP1_IN_BITS, cnt++);
+ expect_eq_uint(b, bit);
+ }
+}
+
static void __init test_fill_set(void)
{
DECLARE_BITMAP(bmap, 1024);
@@ -557,8 +600,6 @@ static void __init test_bitmap_parse(void)
}
}
-#define EXP1_IN_BITS (sizeof(exp1) * 8)
-
static void __init test_bitmap_arr32(void)
{
unsigned int nbits, next_bit;
@@ -685,6 +726,239 @@ static void __init test_for_each_set_clump8(void)
expect_eq_clump8(start, CLUMP_EXP_NUMBITS, clump_exp, &clump);
}
+static void __init test_for_each_set_bit_wrap(void)
+{
+ DECLARE_BITMAP(orig, 500);
+ DECLARE_BITMAP(copy, 500);
+ unsigned int wr, bit;
+
+ bitmap_zero(orig, 500);
+
+ /* Set individual bits */
+ for (bit = 0; bit < 500; bit += 10)
+ bitmap_set(orig, bit, 1);
+
+ /* Set range of bits */
+ bitmap_set(orig, 100, 50);
+
+ for (wr = 0; wr < 500; wr++) {
+ bitmap_zero(copy, 500);
+
+ for_each_set_bit_wrap(bit, orig, 500, wr)
+ bitmap_set(copy, bit, 1);
+
+ expect_eq_bitmap(orig, copy, 500);
+ }
+}
+
+static void __init test_for_each_set_bit(void)
+{
+ DECLARE_BITMAP(orig, 500);
+ DECLARE_BITMAP(copy, 500);
+ unsigned int bit;
+
+ bitmap_zero(orig, 500);
+ bitmap_zero(copy, 500);
+
+ /* Set individual bits */
+ for (bit = 0; bit < 500; bit += 10)
+ bitmap_set(orig, bit, 1);
+
+ /* Set range of bits */
+ bitmap_set(orig, 100, 50);
+
+ for_each_set_bit(bit, orig, 500)
+ bitmap_set(copy, bit, 1);
+
+ expect_eq_bitmap(orig, copy, 500);
+}
+
+static void __init test_for_each_set_bit_from(void)
+{
+ DECLARE_BITMAP(orig, 500);
+ DECLARE_BITMAP(copy, 500);
+ unsigned int wr, bit;
+
+ bitmap_zero(orig, 500);
+
+ /* Set individual bits */
+ for (bit = 0; bit < 500; bit += 10)
+ bitmap_set(orig, bit, 1);
+
+ /* Set range of bits */
+ bitmap_set(orig, 100, 50);
+
+ for (wr = 0; wr < 500; wr++) {
+ DECLARE_BITMAP(tmp, 500);
+
+ bitmap_zero(copy, 500);
+ bit = wr;
+
+ for_each_set_bit_from(bit, orig, 500)
+ bitmap_set(copy, bit, 1);
+
+ bitmap_copy(tmp, orig, 500);
+ bitmap_clear(tmp, 0, wr);
+ expect_eq_bitmap(tmp, copy, 500);
+ }
+}
+
+static void __init test_for_each_clear_bit(void)
+{
+ DECLARE_BITMAP(orig, 500);
+ DECLARE_BITMAP(copy, 500);
+ unsigned int bit;
+
+ bitmap_fill(orig, 500);
+ bitmap_fill(copy, 500);
+
+ /* Set individual bits */
+ for (bit = 0; bit < 500; bit += 10)
+ bitmap_clear(orig, bit, 1);
+
+ /* Set range of bits */
+ bitmap_clear(orig, 100, 50);
+
+ for_each_clear_bit(bit, orig, 500)
+ bitmap_clear(copy, bit, 1);
+
+ expect_eq_bitmap(orig, copy, 500);
+}
+
+static void __init test_for_each_clear_bit_from(void)
+{
+ DECLARE_BITMAP(orig, 500);
+ DECLARE_BITMAP(copy, 500);
+ unsigned int wr, bit;
+
+ bitmap_fill(orig, 500);
+
+ /* Set individual bits */
+ for (bit = 0; bit < 500; bit += 10)
+ bitmap_clear(orig, bit, 1);
+
+ /* Set range of bits */
+ bitmap_clear(orig, 100, 50);
+
+ for (wr = 0; wr < 500; wr++) {
+ DECLARE_BITMAP(tmp, 500);
+
+ bitmap_fill(copy, 500);
+ bit = wr;
+
+ for_each_clear_bit_from(bit, orig, 500)
+ bitmap_clear(copy, bit, 1);
+
+ bitmap_copy(tmp, orig, 500);
+ bitmap_set(tmp, 0, wr);
+ expect_eq_bitmap(tmp, copy, 500);
+ }
+}
+
+static void __init test_for_each_set_bitrange(void)
+{
+ DECLARE_BITMAP(orig, 500);
+ DECLARE_BITMAP(copy, 500);
+ unsigned int s, e;
+
+ bitmap_zero(orig, 500);
+ bitmap_zero(copy, 500);
+
+ /* Set individual bits */
+ for (s = 0; s < 500; s += 10)
+ bitmap_set(orig, s, 1);
+
+ /* Set range of bits */
+ bitmap_set(orig, 100, 50);
+
+ for_each_set_bitrange(s, e, orig, 500)
+ bitmap_set(copy, s, e-s);
+
+ expect_eq_bitmap(orig, copy, 500);
+}
+
+static void __init test_for_each_clear_bitrange(void)
+{
+ DECLARE_BITMAP(orig, 500);
+ DECLARE_BITMAP(copy, 500);
+ unsigned int s, e;
+
+ bitmap_fill(orig, 500);
+ bitmap_fill(copy, 500);
+
+ /* Set individual bits */
+ for (s = 0; s < 500; s += 10)
+ bitmap_clear(orig, s, 1);
+
+ /* Set range of bits */
+ bitmap_clear(orig, 100, 50);
+
+ for_each_clear_bitrange(s, e, orig, 500)
+ bitmap_clear(copy, s, e-s);
+
+ expect_eq_bitmap(orig, copy, 500);
+}
+
+static void __init test_for_each_set_bitrange_from(void)
+{
+ DECLARE_BITMAP(orig, 500);
+ DECLARE_BITMAP(copy, 500);
+ unsigned int wr, s, e;
+
+ bitmap_zero(orig, 500);
+
+ /* Set individual bits */
+ for (s = 0; s < 500; s += 10)
+ bitmap_set(orig, s, 1);
+
+ /* Set range of bits */
+ bitmap_set(orig, 100, 50);
+
+ for (wr = 0; wr < 500; wr++) {
+ DECLARE_BITMAP(tmp, 500);
+
+ bitmap_zero(copy, 500);
+ s = wr;
+
+ for_each_set_bitrange_from(s, e, orig, 500)
+ bitmap_set(copy, s, e - s);
+
+ bitmap_copy(tmp, orig, 500);
+ bitmap_clear(tmp, 0, wr);
+ expect_eq_bitmap(tmp, copy, 500);
+ }
+}
+
+static void __init test_for_each_clear_bitrange_from(void)
+{
+ DECLARE_BITMAP(orig, 500);
+ DECLARE_BITMAP(copy, 500);
+ unsigned int wr, s, e;
+
+ bitmap_fill(orig, 500);
+
+ /* Set individual bits */
+ for (s = 0; s < 500; s += 10)
+ bitmap_clear(orig, s, 1);
+
+ /* Set range of bits */
+ bitmap_set(orig, 100, 50);
+
+ for (wr = 0; wr < 500; wr++) {
+ DECLARE_BITMAP(tmp, 500);
+
+ bitmap_fill(copy, 500);
+ s = wr;
+
+ for_each_clear_bitrange_from(s, e, orig, 500)
+ bitmap_clear(copy, s, e - s);
+
+ bitmap_copy(tmp, orig, 500);
+ bitmap_set(tmp, 0, wr);
+ expect_eq_bitmap(tmp, copy, 500);
+ }
+}
+
struct test_bitmap_cut {
unsigned int first;
unsigned int cut;
@@ -948,10 +1222,21 @@ static void __init selftest(void)
test_bitmap_parselist();
test_bitmap_printlist();
test_mem_optimisations();
- test_for_each_set_clump8();
test_bitmap_cut();
test_bitmap_print_buf();
test_bitmap_const_eval();
+
+ test_find_nth_bit();
+ test_for_each_set_bit();
+ test_for_each_set_bit_from();
+ test_for_each_clear_bit();
+ test_for_each_clear_bit_from();
+ test_for_each_set_bitrange();
+ test_for_each_clear_bitrange();
+ test_for_each_set_bitrange_from();
+ test_for_each_clear_bitrange_from();
+ test_for_each_set_clump8();
+ test_for_each_set_bit_wrap();
}
KSTM_MODULE_LOADERS(test_bitmap);
diff --git a/lib/test_fprobe.c b/lib/test_fprobe.c
index ed70637a2ffa..e0381b3ec410 100644
--- a/lib/test_fprobe.c
+++ b/lib/test_fprobe.c
@@ -145,7 +145,7 @@ static unsigned long get_ftrace_location(void *func)
static int fprobe_test_init(struct kunit *test)
{
do {
- rand1 = prandom_u32();
+ rand1 = get_random_u32();
} while (rand1 <= div_factor);
target = fprobe_selftest_target;
diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c
index 5144899d3c6b..0927f44cd478 100644
--- a/lib/test_hexdump.c
+++ b/lib/test_hexdump.c
@@ -149,7 +149,7 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize,
static void __init test_hexdump_set(int rowsize, bool ascii)
{
size_t d = min_t(size_t, sizeof(data_b), rowsize);
- size_t len = get_random_int() % d + 1;
+ size_t len = prandom_u32_max(d) + 1;
test_hexdump(len, rowsize, 4, ascii);
test_hexdump(len, rowsize, 2, ascii);
@@ -208,11 +208,11 @@ static void __init test_hexdump_overflow(size_t buflen, size_t len,
static void __init test_hexdump_overflow_set(size_t buflen, bool ascii)
{
unsigned int i = 0;
- int rs = (get_random_int() % 2 + 1) * 16;
+ int rs = (prandom_u32_max(2) + 1) * 16;
do {
int gs = 1 << i;
- size_t len = get_random_int() % rs + gs;
+ size_t len = prandom_u32_max(rs) + gs;
test_hexdump_overflow(buflen, rounddown(len, gs), rs, gs, ascii);
} while (i++ < 3);
@@ -223,11 +223,11 @@ static int __init test_hexdump_init(void)
unsigned int i;
int rowsize;
- rowsize = (get_random_int() % 2 + 1) * 16;
+ rowsize = (prandom_u32_max(2) + 1) * 16;
for (i = 0; i < 16; i++)
test_hexdump_set(rowsize, false);
- rowsize = (get_random_int() % 2 + 1) * 16;
+ rowsize = (prandom_u32_max(2) + 1) * 16;
for (i = 0; i < 16; i++)
test_hexdump_set(rowsize, true);
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index e3965cafd27c..67e6f83fe0f8 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -100,6 +100,7 @@ struct dmirror {
struct dmirror_chunk {
struct dev_pagemap pagemap;
struct dmirror_device *mdevice;
+ bool remove;
};
/*
@@ -107,8 +108,8 @@ struct dmirror_chunk {
*/
struct dmirror_device {
struct cdev cdevice;
- struct hmm_devmem *devmem;
unsigned int zone_device_type;
+ struct device device;
unsigned int devmem_capacity;
unsigned int devmem_count;
@@ -192,11 +193,15 @@ static int dmirror_fops_release(struct inode *inode, struct file *filp)
return 0;
}
+static struct dmirror_chunk *dmirror_page_to_chunk(struct page *page)
+{
+ return container_of(page->pgmap, struct dmirror_chunk, pagemap);
+}
+
static struct dmirror_device *dmirror_page_to_device(struct page *page)
{
- return container_of(page->pgmap, struct dmirror_chunk,
- pagemap)->mdevice;
+ return dmirror_page_to_chunk(page)->mdevice;
}
static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range)
@@ -627,8 +632,8 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
goto error;
}
+ zone_device_page_init(dpage);
dpage->zone_device_data = rpage;
- lock_page(dpage);
return dpage;
error:
@@ -907,7 +912,7 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror,
struct vm_area_struct *vma;
unsigned long src_pfns[64] = { 0 };
unsigned long dst_pfns[64] = { 0 };
- struct migrate_vma args;
+ struct migrate_vma args = { 0 };
unsigned long next;
int ret;
@@ -968,7 +973,7 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
unsigned long src_pfns[64] = { 0 };
unsigned long dst_pfns[64] = { 0 };
struct dmirror_bounce bounce;
- struct migrate_vma args;
+ struct migrate_vma args = { 0 };
unsigned long next;
int ret;
@@ -1218,6 +1223,85 @@ static int dmirror_snapshot(struct dmirror *dmirror,
return ret;
}
+static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
+{
+ unsigned long start_pfn = chunk->pagemap.range.start >> PAGE_SHIFT;
+ unsigned long end_pfn = chunk->pagemap.range.end >> PAGE_SHIFT;
+ unsigned long npages = end_pfn - start_pfn + 1;
+ unsigned long i;
+ unsigned long *src_pfns;
+ unsigned long *dst_pfns;
+
+ src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL);
+ dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL);
+
+ migrate_device_range(src_pfns, start_pfn, npages);
+ for (i = 0; i < npages; i++) {
+ struct page *dpage, *spage;
+
+ spage = migrate_pfn_to_page(src_pfns[i]);
+ if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
+ continue;
+
+ if (WARN_ON(!is_device_private_page(spage) &&
+ !is_device_coherent_page(spage)))
+ continue;
+ spage = BACKING_PAGE(spage);
+ dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL);
+ lock_page(dpage);
+ copy_highpage(dpage, spage);
+ dst_pfns[i] = migrate_pfn(page_to_pfn(dpage));
+ if (src_pfns[i] & MIGRATE_PFN_WRITE)
+ dst_pfns[i] |= MIGRATE_PFN_WRITE;
+ }
+ migrate_device_pages(src_pfns, dst_pfns, npages);
+ migrate_device_finalize(src_pfns, dst_pfns, npages);
+ kfree(src_pfns);
+ kfree(dst_pfns);
+}
+
+/* Removes free pages from the free list so they can't be re-allocated */
+static void dmirror_remove_free_pages(struct dmirror_chunk *devmem)
+{
+ struct dmirror_device *mdevice = devmem->mdevice;
+ struct page *page;
+
+ for (page = mdevice->free_pages; page; page = page->zone_device_data)
+ if (dmirror_page_to_chunk(page) == devmem)
+ mdevice->free_pages = page->zone_device_data;
+}
+
+static void dmirror_device_remove_chunks(struct dmirror_device *mdevice)
+{
+ unsigned int i;
+
+ mutex_lock(&mdevice->devmem_lock);
+ if (mdevice->devmem_chunks) {
+ for (i = 0; i < mdevice->devmem_count; i++) {
+ struct dmirror_chunk *devmem =
+ mdevice->devmem_chunks[i];
+
+ spin_lock(&mdevice->lock);
+ devmem->remove = true;
+ dmirror_remove_free_pages(devmem);
+ spin_unlock(&mdevice->lock);
+
+ dmirror_device_evict_chunk(devmem);
+ memunmap_pages(&devmem->pagemap);
+ if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
+ release_mem_region(devmem->pagemap.range.start,
+ range_len(&devmem->pagemap.range));
+ kfree(devmem);
+ }
+ mdevice->devmem_count = 0;
+ mdevice->devmem_capacity = 0;
+ mdevice->free_pages = NULL;
+ kfree(mdevice->devmem_chunks);
+ mdevice->devmem_chunks = NULL;
+ }
+ mutex_unlock(&mdevice->devmem_lock);
+}
+
static long dmirror_fops_unlocked_ioctl(struct file *filp,
unsigned int command,
unsigned long arg)
@@ -1272,6 +1356,11 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp,
ret = dmirror_snapshot(dmirror, &cmd);
break;
+ case HMM_DMIRROR_RELEASE:
+ dmirror_device_remove_chunks(dmirror->mdevice);
+ ret = 0;
+ break;
+
default:
return -EINVAL;
}
@@ -1326,15 +1415,19 @@ static void dmirror_devmem_free(struct page *page)
mdevice = dmirror_page_to_device(page);
spin_lock(&mdevice->lock);
- mdevice->cfree++;
- page->zone_device_data = mdevice->free_pages;
- mdevice->free_pages = page;
+
+ /* Return page to our allocator if not freeing the chunk */
+ if (!dmirror_page_to_chunk(page)->remove) {
+ mdevice->cfree++;
+ page->zone_device_data = mdevice->free_pages;
+ mdevice->free_pages = page;
+ }
spin_unlock(&mdevice->lock);
}
static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
{
- struct migrate_vma args;
+ struct migrate_vma args = { 0 };
unsigned long src_pfns = 0;
unsigned long dst_pfns = 0;
struct page *rpage;
@@ -1357,6 +1450,7 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
args.dst = &dst_pfns;
args.pgmap_owner = dmirror->mdevice;
args.flags = dmirror_select_device(dmirror);
+ args.fault_page = vmf->page;
if (migrate_vma_setup(&args))
return VM_FAULT_SIGBUS;
@@ -1390,7 +1484,14 @@ static int dmirror_device_init(struct dmirror_device *mdevice, int id)
cdev_init(&mdevice->cdevice, &dmirror_fops);
mdevice->cdevice.owner = THIS_MODULE;
- ret = cdev_add(&mdevice->cdevice, dev, 1);
+ device_initialize(&mdevice->device);
+ mdevice->device.devt = dev;
+
+ ret = dev_set_name(&mdevice->device, "hmm_dmirror%u", id);
+ if (ret)
+ return ret;
+
+ ret = cdev_device_add(&mdevice->cdevice, &mdevice->device);
if (ret)
return ret;
@@ -1400,23 +1501,8 @@ static int dmirror_device_init(struct dmirror_device *mdevice, int id)
static void dmirror_device_remove(struct dmirror_device *mdevice)
{
- unsigned int i;
-
- if (mdevice->devmem_chunks) {
- for (i = 0; i < mdevice->devmem_count; i++) {
- struct dmirror_chunk *devmem =
- mdevice->devmem_chunks[i];
-
- memunmap_pages(&devmem->pagemap);
- if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
- release_mem_region(devmem->pagemap.range.start,
- range_len(&devmem->pagemap.range));
- kfree(devmem);
- }
- kfree(mdevice->devmem_chunks);
- }
-
- cdev_del(&mdevice->cdevice);
+ dmirror_device_remove_chunks(mdevice);
+ cdev_device_del(&mdevice->cdevice, &mdevice->device);
}
static int __init hmm_dmirror_init(void)
diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h
index e31d58c9034a..8c818a2cf4f6 100644
--- a/lib/test_hmm_uapi.h
+++ b/lib/test_hmm_uapi.h
@@ -36,6 +36,7 @@ struct hmm_dmirror_cmd {
#define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x04, struct hmm_dmirror_cmd)
#define HMM_DMIRROR_EXCLUSIVE _IOWR('H', 0x05, struct hmm_dmirror_cmd)
#define HMM_DMIRROR_CHECK_EXCLUSIVE _IOWR('H', 0x06, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_RELEASE _IOWR('H', 0x07, struct hmm_dmirror_cmd)
/*
* Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT.
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
deleted file mode 100644
index 58c1b01ccfe2..000000000000
--- a/lib/test_kasan.c
+++ /dev/null
@@ -1,1426 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *
- * Copyright (c) 2014 Samsung Electronics Co., Ltd.
- * Author: Andrey Ryabinin <a.ryabinin@samsung.com>
- */
-
-#include <linux/bitops.h>
-#include <linux/delay.h>
-#include <linux/kasan.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/module.h>
-#include <linux/printk.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/uaccess.h>
-#include <linux/io.h>
-#include <linux/vmalloc.h>
-#include <linux/set_memory.h>
-
-#include <asm/page.h>
-
-#include <kunit/test.h>
-
-#include "../mm/kasan/kasan.h"
-
-#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_GRANULE_SIZE)
-
-/*
- * Some tests use these global variables to store return values from function
- * calls that could otherwise be eliminated by the compiler as dead code.
- */
-void *kasan_ptr_result;
-int kasan_int_result;
-
-static struct kunit_resource resource;
-static struct kunit_kasan_status test_status;
-static bool multishot;
-
-/*
- * Temporarily enable multi-shot mode. Otherwise, KASAN would only report the
- * first detected bug and panic the kernel if panic_on_warn is enabled. For
- * hardware tag-based KASAN also allow tag checking to be reenabled for each
- * test, see the comment for KUNIT_EXPECT_KASAN_FAIL().
- */
-static int kasan_test_init(struct kunit *test)
-{
- if (!kasan_enabled()) {
- kunit_err(test, "can't run KASAN tests with KASAN disabled");
- return -1;
- }
-
- multishot = kasan_save_enable_multi_shot();
- test_status.report_found = false;
- test_status.sync_fault = false;
- kunit_add_named_resource(test, NULL, NULL, &resource,
- "kasan_status", &test_status);
- return 0;
-}
-
-static void kasan_test_exit(struct kunit *test)
-{
- kasan_restore_multi_shot(multishot);
- KUNIT_EXPECT_FALSE(test, test_status.report_found);
-}
-
-/**
- * KUNIT_EXPECT_KASAN_FAIL() - check that the executed expression produces a
- * KASAN report; causes a test failure otherwise. This relies on a KUnit
- * resource named "kasan_status". Do not use this name for KUnit resources
- * outside of KASAN tests.
- *
- * For hardware tag-based KASAN, when a synchronous tag fault happens, tag
- * checking is auto-disabled. When this happens, this test handler reenables
- * tag checking. As tag checking can be only disabled or enabled per CPU,
- * this handler disables migration (preemption).
- *
- * Since the compiler doesn't see that the expression can change the test_status
- * fields, it can reorder or optimize away the accesses to those fields.
- * Use READ/WRITE_ONCE() for the accesses and compiler barriers around the
- * expression to prevent that.
- *
- * In between KUNIT_EXPECT_KASAN_FAIL checks, test_status.report_found is kept
- * as false. This allows detecting KASAN reports that happen outside of the
- * checks by asserting !test_status.report_found at the start of
- * KUNIT_EXPECT_KASAN_FAIL and in kasan_test_exit.
- */
-#define KUNIT_EXPECT_KASAN_FAIL(test, expression) do { \
- if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \
- kasan_sync_fault_possible()) \
- migrate_disable(); \
- KUNIT_EXPECT_FALSE(test, READ_ONCE(test_status.report_found)); \
- barrier(); \
- expression; \
- barrier(); \
- if (kasan_async_fault_possible()) \
- kasan_force_async_fault(); \
- if (!READ_ONCE(test_status.report_found)) { \
- KUNIT_FAIL(test, KUNIT_SUBTEST_INDENT "KASAN failure " \
- "expected in \"" #expression \
- "\", but none occurred"); \
- } \
- if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \
- kasan_sync_fault_possible()) { \
- if (READ_ONCE(test_status.report_found) && \
- READ_ONCE(test_status.sync_fault)) \
- kasan_enable_tagging(); \
- migrate_enable(); \
- } \
- WRITE_ONCE(test_status.report_found, false); \
-} while (0)
-
-#define KASAN_TEST_NEEDS_CONFIG_ON(test, config) do { \
- if (!IS_ENABLED(config)) \
- kunit_skip((test), "Test requires " #config "=y"); \
-} while (0)
-
-#define KASAN_TEST_NEEDS_CONFIG_OFF(test, config) do { \
- if (IS_ENABLED(config)) \
- kunit_skip((test), "Test requires " #config "=n"); \
-} while (0)
-
-static void kmalloc_oob_right(struct kunit *test)
-{
- char *ptr;
- size_t size = 128 - KASAN_GRANULE_SIZE - 5;
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- OPTIMIZER_HIDE_VAR(ptr);
- /*
- * An unaligned access past the requested kmalloc size.
- * Only generic KASAN can precisely detect these.
- */
- if (IS_ENABLED(CONFIG_KASAN_GENERIC))
- KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 'x');
-
- /*
- * An aligned access into the first out-of-bounds granule that falls
- * within the aligned kmalloc object.
- */
- KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + 5] = 'y');
-
- /* Out-of-bounds access past the aligned kmalloc object. */
- KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] =
- ptr[size + KASAN_GRANULE_SIZE + 5]);
-
- kfree(ptr);
-}
-
-static void kmalloc_oob_left(struct kunit *test)
-{
- char *ptr;
- size_t size = 15;
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- OPTIMIZER_HIDE_VAR(ptr);
- KUNIT_EXPECT_KASAN_FAIL(test, *ptr = *(ptr - 1));
- kfree(ptr);
-}
-
-static void kmalloc_node_oob_right(struct kunit *test)
-{
- char *ptr;
- size_t size = 4096;
-
- ptr = kmalloc_node(size, GFP_KERNEL, 0);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- OPTIMIZER_HIDE_VAR(ptr);
- KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = ptr[size]);
- kfree(ptr);
-}
-
-/*
- * These kmalloc_pagealloc_* tests try allocating a memory chunk that doesn't
- * fit into a slab cache and therefore is allocated via the page allocator
- * fallback. Since this kind of fallback is only implemented for SLUB, these
- * tests are limited to that allocator.
- */
-static void kmalloc_pagealloc_oob_right(struct kunit *test)
-{
- char *ptr;
- size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
-
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB);
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- OPTIMIZER_HIDE_VAR(ptr);
- KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + OOB_TAG_OFF] = 0);
-
- kfree(ptr);
-}
-
-static void kmalloc_pagealloc_uaf(struct kunit *test)
-{
- char *ptr;
- size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
-
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB);
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
- kfree(ptr);
-
- KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]);
-}
-
-static void kmalloc_pagealloc_invalid_free(struct kunit *test)
-{
- char *ptr;
- size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
-
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB);
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- KUNIT_EXPECT_KASAN_FAIL(test, kfree(ptr + 1));
-}
-
-static void pagealloc_oob_right(struct kunit *test)
-{
- char *ptr;
- struct page *pages;
- size_t order = 4;
- size_t size = (1UL << (PAGE_SHIFT + order));
-
- /*
- * With generic KASAN page allocations have no redzones, thus
- * out-of-bounds detection is not guaranteed.
- * See https://bugzilla.kernel.org/show_bug.cgi?id=210503.
- */
- KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC);
-
- pages = alloc_pages(GFP_KERNEL, order);
- ptr = page_address(pages);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = ptr[size]);
- free_pages((unsigned long)ptr, order);
-}
-
-static void pagealloc_uaf(struct kunit *test)
-{
- char *ptr;
- struct page *pages;
- size_t order = 4;
-
- pages = alloc_pages(GFP_KERNEL, order);
- ptr = page_address(pages);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
- free_pages((unsigned long)ptr, order);
-
- KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]);
-}
-
-static void kmalloc_large_oob_right(struct kunit *test)
-{
- char *ptr;
- size_t size = KMALLOC_MAX_CACHE_SIZE - 256;
-
- /*
- * Allocate a chunk that is large enough, but still fits into a slab
- * and does not trigger the page allocator fallback in SLUB.
- */
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- OPTIMIZER_HIDE_VAR(ptr);
- KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0);
- kfree(ptr);
-}
-
-static void krealloc_more_oob_helper(struct kunit *test,
- size_t size1, size_t size2)
-{
- char *ptr1, *ptr2;
- size_t middle;
-
- KUNIT_ASSERT_LT(test, size1, size2);
- middle = size1 + (size2 - size1) / 2;
-
- ptr1 = kmalloc(size1, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
-
- ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
-
- /* All offsets up to size2 must be accessible. */
- ptr2[size1 - 1] = 'x';
- ptr2[size1] = 'x';
- ptr2[middle] = 'x';
- ptr2[size2 - 1] = 'x';
-
- /* Generic mode is precise, so unaligned size2 must be inaccessible. */
- if (IS_ENABLED(CONFIG_KASAN_GENERIC))
- KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2] = 'x');
-
- /* For all modes first aligned offset after size2 must be inaccessible. */
- KUNIT_EXPECT_KASAN_FAIL(test,
- ptr2[round_up(size2, KASAN_GRANULE_SIZE)] = 'x');
-
- kfree(ptr2);
-}
-
-static void krealloc_less_oob_helper(struct kunit *test,
- size_t size1, size_t size2)
-{
- char *ptr1, *ptr2;
- size_t middle;
-
- KUNIT_ASSERT_LT(test, size2, size1);
- middle = size2 + (size1 - size2) / 2;
-
- ptr1 = kmalloc(size1, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
-
- ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
-
- /* Must be accessible for all modes. */
- ptr2[size2 - 1] = 'x';
-
- /* Generic mode is precise, so unaligned size2 must be inaccessible. */
- if (IS_ENABLED(CONFIG_KASAN_GENERIC))
- KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2] = 'x');
-
- /* For all modes first aligned offset after size2 must be inaccessible. */
- KUNIT_EXPECT_KASAN_FAIL(test,
- ptr2[round_up(size2, KASAN_GRANULE_SIZE)] = 'x');
-
- /*
- * For all modes all size2, middle, and size1 should land in separate
- * granules and thus the latter two offsets should be inaccessible.
- */
- KUNIT_EXPECT_LE(test, round_up(size2, KASAN_GRANULE_SIZE),
- round_down(middle, KASAN_GRANULE_SIZE));
- KUNIT_EXPECT_LE(test, round_up(middle, KASAN_GRANULE_SIZE),
- round_down(size1, KASAN_GRANULE_SIZE));
- KUNIT_EXPECT_KASAN_FAIL(test, ptr2[middle] = 'x');
- KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size1 - 1] = 'x');
- KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size1] = 'x');
-
- kfree(ptr2);
-}
-
-static void krealloc_more_oob(struct kunit *test)
-{
- krealloc_more_oob_helper(test, 201, 235);
-}
-
-static void krealloc_less_oob(struct kunit *test)
-{
- krealloc_less_oob_helper(test, 235, 201);
-}
-
-static void krealloc_pagealloc_more_oob(struct kunit *test)
-{
- /* page_alloc fallback in only implemented for SLUB. */
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB);
-
- krealloc_more_oob_helper(test, KMALLOC_MAX_CACHE_SIZE + 201,
- KMALLOC_MAX_CACHE_SIZE + 235);
-}
-
-static void krealloc_pagealloc_less_oob(struct kunit *test)
-{
- /* page_alloc fallback in only implemented for SLUB. */
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB);
-
- krealloc_less_oob_helper(test, KMALLOC_MAX_CACHE_SIZE + 235,
- KMALLOC_MAX_CACHE_SIZE + 201);
-}
-
-/*
- * Check that krealloc() detects a use-after-free, returns NULL,
- * and doesn't unpoison the freed object.
- */
-static void krealloc_uaf(struct kunit *test)
-{
- char *ptr1, *ptr2;
- int size1 = 201;
- int size2 = 235;
-
- ptr1 = kmalloc(size1, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
- kfree(ptr1);
-
- KUNIT_EXPECT_KASAN_FAIL(test, ptr2 = krealloc(ptr1, size2, GFP_KERNEL));
- KUNIT_ASSERT_NULL(test, ptr2);
- KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)ptr1);
-}
-
-static void kmalloc_oob_16(struct kunit *test)
-{
- struct {
- u64 words[2];
- } *ptr1, *ptr2;
-
- /* This test is specifically crafted for the generic mode. */
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
-
- ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
-
- ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
-
- OPTIMIZER_HIDE_VAR(ptr1);
- OPTIMIZER_HIDE_VAR(ptr2);
- KUNIT_EXPECT_KASAN_FAIL(test, *ptr1 = *ptr2);
- kfree(ptr1);
- kfree(ptr2);
-}
-
-static void kmalloc_uaf_16(struct kunit *test)
-{
- struct {
- u64 words[2];
- } *ptr1, *ptr2;
-
- ptr1 = kmalloc(sizeof(*ptr1), GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
-
- ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
- kfree(ptr2);
-
- KUNIT_EXPECT_KASAN_FAIL(test, *ptr1 = *ptr2);
- kfree(ptr1);
-}
-
-/*
- * Note: in the memset tests below, the written range touches both valid and
- * invalid memory. This makes sure that the instrumentation does not only check
- * the starting address but the whole range.
- */
-
-static void kmalloc_oob_memset_2(struct kunit *test)
-{
- char *ptr;
- size_t size = 128 - KASAN_GRANULE_SIZE;
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- OPTIMIZER_HIDE_VAR(size);
- KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 1, 0, 2));
- kfree(ptr);
-}
-
-static void kmalloc_oob_memset_4(struct kunit *test)
-{
- char *ptr;
- size_t size = 128 - KASAN_GRANULE_SIZE;
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- OPTIMIZER_HIDE_VAR(size);
- KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 3, 0, 4));
- kfree(ptr);
-}
-
-static void kmalloc_oob_memset_8(struct kunit *test)
-{
- char *ptr;
- size_t size = 128 - KASAN_GRANULE_SIZE;
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- OPTIMIZER_HIDE_VAR(size);
- KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 7, 0, 8));
- kfree(ptr);
-}
-
-static void kmalloc_oob_memset_16(struct kunit *test)
-{
- char *ptr;
- size_t size = 128 - KASAN_GRANULE_SIZE;
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- OPTIMIZER_HIDE_VAR(size);
- KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 15, 0, 16));
- kfree(ptr);
-}
-
-static void kmalloc_oob_in_memset(struct kunit *test)
-{
- char *ptr;
- size_t size = 128 - KASAN_GRANULE_SIZE;
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- OPTIMIZER_HIDE_VAR(ptr);
- OPTIMIZER_HIDE_VAR(size);
- KUNIT_EXPECT_KASAN_FAIL(test,
- memset(ptr, 0, size + KASAN_GRANULE_SIZE));
- kfree(ptr);
-}
-
-static void kmalloc_memmove_negative_size(struct kunit *test)
-{
- char *ptr;
- size_t size = 64;
- size_t invalid_size = -2;
-
- /*
- * Hardware tag-based mode doesn't check memmove for negative size.
- * As a result, this test introduces a side-effect memory corruption,
- * which can result in a crash.
- */
- KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_HW_TAGS);
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- memset((char *)ptr, 0, 64);
- OPTIMIZER_HIDE_VAR(ptr);
- OPTIMIZER_HIDE_VAR(invalid_size);
- KUNIT_EXPECT_KASAN_FAIL(test,
- memmove((char *)ptr, (char *)ptr + 4, invalid_size));
- kfree(ptr);
-}
-
-static void kmalloc_memmove_invalid_size(struct kunit *test)
-{
- char *ptr;
- size_t size = 64;
- volatile size_t invalid_size = size;
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- memset((char *)ptr, 0, 64);
- OPTIMIZER_HIDE_VAR(ptr);
- KUNIT_EXPECT_KASAN_FAIL(test,
- memmove((char *)ptr, (char *)ptr + 4, invalid_size));
- kfree(ptr);
-}
-
-static void kmalloc_uaf(struct kunit *test)
-{
- char *ptr;
- size_t size = 10;
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- kfree(ptr);
- KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[8]);
-}
-
-static void kmalloc_uaf_memset(struct kunit *test)
-{
- char *ptr;
- size_t size = 33;
-
- /*
- * Only generic KASAN uses quarantine, which is required to avoid a
- * kernel memory corruption this test causes.
- */
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- kfree(ptr);
- KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr, 0, size));
-}
-
-static void kmalloc_uaf2(struct kunit *test)
-{
- char *ptr1, *ptr2;
- size_t size = 43;
- int counter = 0;
-
-again:
- ptr1 = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
-
- kfree(ptr1);
-
- ptr2 = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
-
- /*
- * For tag-based KASAN ptr1 and ptr2 tags might happen to be the same.
- * Allow up to 16 attempts at generating different tags.
- */
- if (!IS_ENABLED(CONFIG_KASAN_GENERIC) && ptr1 == ptr2 && counter++ < 16) {
- kfree(ptr2);
- goto again;
- }
-
- KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr1)[40]);
- KUNIT_EXPECT_PTR_NE(test, ptr1, ptr2);
-
- kfree(ptr2);
-}
-
-static void kfree_via_page(struct kunit *test)
-{
- char *ptr;
- size_t size = 8;
- struct page *page;
- unsigned long offset;
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- page = virt_to_page(ptr);
- offset = offset_in_page(ptr);
- kfree(page_address(page) + offset);
-}
-
-static void kfree_via_phys(struct kunit *test)
-{
- char *ptr;
- size_t size = 8;
- phys_addr_t phys;
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- phys = virt_to_phys(ptr);
- kfree(phys_to_virt(phys));
-}
-
-static void kmem_cache_oob(struct kunit *test)
-{
- char *p;
- size_t size = 200;
- struct kmem_cache *cache;
-
- cache = kmem_cache_create("test_cache", size, 0, 0, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
-
- p = kmem_cache_alloc(cache, GFP_KERNEL);
- if (!p) {
- kunit_err(test, "Allocation failed: %s\n", __func__);
- kmem_cache_destroy(cache);
- return;
- }
-
- KUNIT_EXPECT_KASAN_FAIL(test, *p = p[size + OOB_TAG_OFF]);
-
- kmem_cache_free(cache, p);
- kmem_cache_destroy(cache);
-}
-
-static void kmem_cache_accounted(struct kunit *test)
-{
- int i;
- char *p;
- size_t size = 200;
- struct kmem_cache *cache;
-
- cache = kmem_cache_create("test_cache", size, 0, SLAB_ACCOUNT, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
-
- /*
- * Several allocations with a delay to allow for lazy per memcg kmem
- * cache creation.
- */
- for (i = 0; i < 5; i++) {
- p = kmem_cache_alloc(cache, GFP_KERNEL);
- if (!p)
- goto free_cache;
-
- kmem_cache_free(cache, p);
- msleep(100);
- }
-
-free_cache:
- kmem_cache_destroy(cache);
-}
-
-static void kmem_cache_bulk(struct kunit *test)
-{
- struct kmem_cache *cache;
- size_t size = 200;
- char *p[10];
- bool ret;
- int i;
-
- cache = kmem_cache_create("test_cache", size, 0, 0, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
-
- ret = kmem_cache_alloc_bulk(cache, GFP_KERNEL, ARRAY_SIZE(p), (void **)&p);
- if (!ret) {
- kunit_err(test, "Allocation failed: %s\n", __func__);
- kmem_cache_destroy(cache);
- return;
- }
-
- for (i = 0; i < ARRAY_SIZE(p); i++)
- p[i][0] = p[i][size - 1] = 42;
-
- kmem_cache_free_bulk(cache, ARRAY_SIZE(p), (void **)&p);
- kmem_cache_destroy(cache);
-}
-
-static char global_array[10];
-
-static void kasan_global_oob_right(struct kunit *test)
-{
- /*
- * Deliberate out-of-bounds access. To prevent CONFIG_UBSAN_LOCAL_BOUNDS
- * from failing here and panicking the kernel, access the array via a
- * volatile pointer, which will prevent the compiler from being able to
- * determine the array bounds.
- *
- * This access uses a volatile pointer to char (char *volatile) rather
- * than the more conventional pointer to volatile char (volatile char *)
- * because we want to prevent the compiler from making inferences about
- * the pointer itself (i.e. its array bounds), not the data that it
- * refers to.
- */
- char *volatile array = global_array;
- char *p = &array[ARRAY_SIZE(global_array) + 3];
-
- /* Only generic mode instruments globals. */
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
-
- KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
-}
-
-static void kasan_global_oob_left(struct kunit *test)
-{
- char *volatile array = global_array;
- char *p = array - 3;
-
- /*
- * GCC is known to fail this test, skip it.
- * See https://bugzilla.kernel.org/show_bug.cgi?id=215051.
- */
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_CC_IS_CLANG);
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
- KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
-}
-
-/* Check that ksize() makes the whole object accessible. */
-static void ksize_unpoisons_memory(struct kunit *test)
-{
- char *ptr;
- size_t size = 123, real_size;
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
- real_size = ksize(ptr);
-
- OPTIMIZER_HIDE_VAR(ptr);
-
- /* This access shouldn't trigger a KASAN report. */
- ptr[size] = 'x';
-
- /* This one must. */
- KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[real_size]);
-
- kfree(ptr);
-}
-
-/*
- * Check that a use-after-free is detected by ksize() and via normal accesses
- * after it.
- */
-static void ksize_uaf(struct kunit *test)
-{
- char *ptr;
- int size = 128 - KASAN_GRANULE_SIZE;
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
- kfree(ptr);
-
- OPTIMIZER_HIDE_VAR(ptr);
- KUNIT_EXPECT_KASAN_FAIL(test, ksize(ptr));
- KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]);
- KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[size]);
-}
-
-static void kasan_stack_oob(struct kunit *test)
-{
- char stack_array[10];
- /* See comment in kasan_global_oob_right. */
- char *volatile array = stack_array;
- char *p = &array[ARRAY_SIZE(stack_array) + OOB_TAG_OFF];
-
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_STACK);
-
- KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
-}
-
-static void kasan_alloca_oob_left(struct kunit *test)
-{
- volatile int i = 10;
- char alloca_array[i];
- /* See comment in kasan_global_oob_right. */
- char *volatile array = alloca_array;
- char *p = array - 1;
-
- /* Only generic mode instruments dynamic allocas. */
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_STACK);
-
- KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
-}
-
-static void kasan_alloca_oob_right(struct kunit *test)
-{
- volatile int i = 10;
- char alloca_array[i];
- /* See comment in kasan_global_oob_right. */
- char *volatile array = alloca_array;
- char *p = array + i;
-
- /* Only generic mode instruments dynamic allocas. */
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_STACK);
-
- KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
-}
-
-static void kmem_cache_double_free(struct kunit *test)
-{
- char *p;
- size_t size = 200;
- struct kmem_cache *cache;
-
- cache = kmem_cache_create("test_cache", size, 0, 0, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
-
- p = kmem_cache_alloc(cache, GFP_KERNEL);
- if (!p) {
- kunit_err(test, "Allocation failed: %s\n", __func__);
- kmem_cache_destroy(cache);
- return;
- }
-
- kmem_cache_free(cache, p);
- KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_free(cache, p));
- kmem_cache_destroy(cache);
-}
-
-static void kmem_cache_invalid_free(struct kunit *test)
-{
- char *p;
- size_t size = 200;
- struct kmem_cache *cache;
-
- cache = kmem_cache_create("test_cache", size, 0, SLAB_TYPESAFE_BY_RCU,
- NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
-
- p = kmem_cache_alloc(cache, GFP_KERNEL);
- if (!p) {
- kunit_err(test, "Allocation failed: %s\n", __func__);
- kmem_cache_destroy(cache);
- return;
- }
-
- /* Trigger invalid free, the object doesn't get freed. */
- KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_free(cache, p + 1));
-
- /*
- * Properly free the object to prevent the "Objects remaining in
- * test_cache on __kmem_cache_shutdown" BUG failure.
- */
- kmem_cache_free(cache, p);
-
- kmem_cache_destroy(cache);
-}
-
-static void empty_cache_ctor(void *object) { }
-
-static void kmem_cache_double_destroy(struct kunit *test)
-{
- struct kmem_cache *cache;
-
- /* Provide a constructor to prevent cache merging. */
- cache = kmem_cache_create("test_cache", 200, 0, 0, empty_cache_ctor);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
- kmem_cache_destroy(cache);
- KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_destroy(cache));
-}
-
-static void kasan_memchr(struct kunit *test)
-{
- char *ptr;
- size_t size = 24;
-
- /*
- * str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT.
- * See https://bugzilla.kernel.org/show_bug.cgi?id=206337 for details.
- */
- KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_AMD_MEM_ENCRYPT);
-
- if (OOB_TAG_OFF)
- size = round_up(size, OOB_TAG_OFF);
-
- ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- OPTIMIZER_HIDE_VAR(ptr);
- OPTIMIZER_HIDE_VAR(size);
- KUNIT_EXPECT_KASAN_FAIL(test,
- kasan_ptr_result = memchr(ptr, '1', size + 1));
-
- kfree(ptr);
-}
-
-static void kasan_memcmp(struct kunit *test)
-{
- char *ptr;
- size_t size = 24;
- int arr[9];
-
- /*
- * str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT.
- * See https://bugzilla.kernel.org/show_bug.cgi?id=206337 for details.
- */
- KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_AMD_MEM_ENCRYPT);
-
- if (OOB_TAG_OFF)
- size = round_up(size, OOB_TAG_OFF);
-
- ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
- memset(arr, 0, sizeof(arr));
-
- OPTIMIZER_HIDE_VAR(ptr);
- OPTIMIZER_HIDE_VAR(size);
- KUNIT_EXPECT_KASAN_FAIL(test,
- kasan_int_result = memcmp(ptr, arr, size+1));
- kfree(ptr);
-}
-
-static void kasan_strings(struct kunit *test)
-{
- char *ptr;
- size_t size = 24;
-
- /*
- * str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT.
- * See https://bugzilla.kernel.org/show_bug.cgi?id=206337 for details.
- */
- KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_AMD_MEM_ENCRYPT);
-
- ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- kfree(ptr);
-
- /*
- * Try to cause only 1 invalid access (less spam in dmesg).
- * For that we need ptr to point to zeroed byte.
- * Skip metadata that could be stored in freed object so ptr
- * will likely point to zeroed byte.
- */
- ptr += 16;
- KUNIT_EXPECT_KASAN_FAIL(test, kasan_ptr_result = strchr(ptr, '1'));
-
- KUNIT_EXPECT_KASAN_FAIL(test, kasan_ptr_result = strrchr(ptr, '1'));
-
- KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strcmp(ptr, "2"));
-
- KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strncmp(ptr, "2", 1));
-
- KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strlen(ptr));
-
- KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strnlen(ptr, 1));
-}
-
-static void kasan_bitops_modify(struct kunit *test, int nr, void *addr)
-{
- KUNIT_EXPECT_KASAN_FAIL(test, set_bit(nr, addr));
- KUNIT_EXPECT_KASAN_FAIL(test, __set_bit(nr, addr));
- KUNIT_EXPECT_KASAN_FAIL(test, clear_bit(nr, addr));
- KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit(nr, addr));
- KUNIT_EXPECT_KASAN_FAIL(test, clear_bit_unlock(nr, addr));
- KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit_unlock(nr, addr));
- KUNIT_EXPECT_KASAN_FAIL(test, change_bit(nr, addr));
- KUNIT_EXPECT_KASAN_FAIL(test, __change_bit(nr, addr));
-}
-
-static void kasan_bitops_test_and_modify(struct kunit *test, int nr, void *addr)
-{
- KUNIT_EXPECT_KASAN_FAIL(test, test_and_set_bit(nr, addr));
- KUNIT_EXPECT_KASAN_FAIL(test, __test_and_set_bit(nr, addr));
- KUNIT_EXPECT_KASAN_FAIL(test, test_and_set_bit_lock(nr, addr));
- KUNIT_EXPECT_KASAN_FAIL(test, test_and_clear_bit(nr, addr));
- KUNIT_EXPECT_KASAN_FAIL(test, __test_and_clear_bit(nr, addr));
- KUNIT_EXPECT_KASAN_FAIL(test, test_and_change_bit(nr, addr));
- KUNIT_EXPECT_KASAN_FAIL(test, __test_and_change_bit(nr, addr));
- KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = test_bit(nr, addr));
-
-#if defined(clear_bit_unlock_is_negative_byte)
- KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result =
- clear_bit_unlock_is_negative_byte(nr, addr));
-#endif
-}
-
-static void kasan_bitops_generic(struct kunit *test)
-{
- long *bits;
-
- /* This test is specifically crafted for the generic mode. */
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
-
- /*
- * Allocate 1 more byte, which causes kzalloc to round up to 16 bytes;
- * this way we do not actually corrupt other memory.
- */
- bits = kzalloc(sizeof(*bits) + 1, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits);
-
- /*
- * Below calls try to access bit within allocated memory; however, the
- * below accesses are still out-of-bounds, since bitops are defined to
- * operate on the whole long the bit is in.
- */
- kasan_bitops_modify(test, BITS_PER_LONG, bits);
-
- /*
- * Below calls try to access bit beyond allocated memory.
- */
- kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, bits);
-
- kfree(bits);
-}
-
-static void kasan_bitops_tags(struct kunit *test)
-{
- long *bits;
-
- /* This test is specifically crafted for tag-based modes. */
- KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC);
-
- /* kmalloc-64 cache will be used and the last 16 bytes will be the redzone. */
- bits = kzalloc(48, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits);
-
- /* Do the accesses past the 48 allocated bytes, but within the redone. */
- kasan_bitops_modify(test, BITS_PER_LONG, (void *)bits + 48);
- kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, (void *)bits + 48);
-
- kfree(bits);
-}
-
-static void kmalloc_double_kzfree(struct kunit *test)
-{
- char *ptr;
- size_t size = 16;
-
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- kfree_sensitive(ptr);
- KUNIT_EXPECT_KASAN_FAIL(test, kfree_sensitive(ptr));
-}
-
-static void vmalloc_helpers_tags(struct kunit *test)
-{
- void *ptr;
-
- /* This test is intended for tag-based modes. */
- KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC);
-
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_VMALLOC);
-
- ptr = vmalloc(PAGE_SIZE);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- /* Check that the returned pointer is tagged. */
- KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN);
- KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL);
-
- /* Make sure exported vmalloc helpers handle tagged pointers. */
- KUNIT_ASSERT_TRUE(test, is_vmalloc_addr(ptr));
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vmalloc_to_page(ptr));
-
-#if !IS_MODULE(CONFIG_KASAN_KUNIT_TEST)
- {
- int rv;
-
- /* Make sure vmalloc'ed memory permissions can be changed. */
- rv = set_memory_ro((unsigned long)ptr, 1);
- KUNIT_ASSERT_GE(test, rv, 0);
- rv = set_memory_rw((unsigned long)ptr, 1);
- KUNIT_ASSERT_GE(test, rv, 0);
- }
-#endif
-
- vfree(ptr);
-}
-
-static void vmalloc_oob(struct kunit *test)
-{
- char *v_ptr, *p_ptr;
- struct page *page;
- size_t size = PAGE_SIZE / 2 - KASAN_GRANULE_SIZE - 5;
-
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_VMALLOC);
-
- v_ptr = vmalloc(size);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_ptr);
-
- OPTIMIZER_HIDE_VAR(v_ptr);
-
- /*
- * We have to be careful not to hit the guard page in vmalloc tests.
- * The MMU will catch that and crash us.
- */
-
- /* Make sure in-bounds accesses are valid. */
- v_ptr[0] = 0;
- v_ptr[size - 1] = 0;
-
- /*
- * An unaligned access past the requested vmalloc size.
- * Only generic KASAN can precisely detect these.
- */
- if (IS_ENABLED(CONFIG_KASAN_GENERIC))
- KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)v_ptr)[size]);
-
- /* An aligned access into the first out-of-bounds granule. */
- KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)v_ptr)[size + 5]);
-
- /* Check that in-bounds accesses to the physical page are valid. */
- page = vmalloc_to_page(v_ptr);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, page);
- p_ptr = page_address(page);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_ptr);
- p_ptr[0] = 0;
-
- vfree(v_ptr);
-
- /*
- * We can't check for use-after-unmap bugs in this nor in the following
- * vmalloc tests, as the page might be fully unmapped and accessing it
- * will crash the kernel.
- */
-}
-
-static void vmap_tags(struct kunit *test)
-{
- char *p_ptr, *v_ptr;
- struct page *p_page, *v_page;
-
- /*
- * This test is specifically crafted for the software tag-based mode,
- * the only tag-based mode that poisons vmap mappings.
- */
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS);
-
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_VMALLOC);
-
- p_page = alloc_pages(GFP_KERNEL, 1);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_page);
- p_ptr = page_address(p_page);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_ptr);
-
- v_ptr = vmap(&p_page, 1, VM_MAP, PAGE_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_ptr);
-
- /*
- * We can't check for out-of-bounds bugs in this nor in the following
- * vmalloc tests, as allocations have page granularity and accessing
- * the guard page will crash the kernel.
- */
-
- KUNIT_EXPECT_GE(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_MIN);
- KUNIT_EXPECT_LT(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_KERNEL);
-
- /* Make sure that in-bounds accesses through both pointers work. */
- *p_ptr = 0;
- *v_ptr = 0;
-
- /* Make sure vmalloc_to_page() correctly recovers the page pointer. */
- v_page = vmalloc_to_page(v_ptr);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_page);
- KUNIT_EXPECT_PTR_EQ(test, p_page, v_page);
-
- vunmap(v_ptr);
- free_pages((unsigned long)p_ptr, 1);
-}
-
-static void vm_map_ram_tags(struct kunit *test)
-{
- char *p_ptr, *v_ptr;
- struct page *page;
-
- /*
- * This test is specifically crafted for the software tag-based mode,
- * the only tag-based mode that poisons vm_map_ram mappings.
- */
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS);
-
- page = alloc_pages(GFP_KERNEL, 1);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, page);
- p_ptr = page_address(page);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_ptr);
-
- v_ptr = vm_map_ram(&page, 1, -1);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_ptr);
-
- KUNIT_EXPECT_GE(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_MIN);
- KUNIT_EXPECT_LT(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_KERNEL);
-
- /* Make sure that in-bounds accesses through both pointers work. */
- *p_ptr = 0;
- *v_ptr = 0;
-
- vm_unmap_ram(v_ptr, 1);
- free_pages((unsigned long)p_ptr, 1);
-}
-
-static void vmalloc_percpu(struct kunit *test)
-{
- char __percpu *ptr;
- int cpu;
-
- /*
- * This test is specifically crafted for the software tag-based mode,
- * the only tag-based mode that poisons percpu mappings.
- */
- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS);
-
- ptr = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
-
- for_each_possible_cpu(cpu) {
- char *c_ptr = per_cpu_ptr(ptr, cpu);
-
- KUNIT_EXPECT_GE(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_MIN);
- KUNIT_EXPECT_LT(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_KERNEL);
-
- /* Make sure that in-bounds accesses don't crash the kernel. */
- *c_ptr = 0;
- }
-
- free_percpu(ptr);
-}
-
-/*
- * Check that the assigned pointer tag falls within the [KASAN_TAG_MIN,
- * KASAN_TAG_KERNEL) range (note: excluding the match-all tag) for tag-based
- * modes.
- */
-static void match_all_not_assigned(struct kunit *test)
-{
- char *ptr;
- struct page *pages;
- int i, size, order;
-
- KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC);
-
- for (i = 0; i < 256; i++) {
- size = (get_random_int() % 1024) + 1;
- ptr = kmalloc(size, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
- KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN);
- KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL);
- kfree(ptr);
- }
-
- for (i = 0; i < 256; i++) {
- order = (get_random_int() % 4) + 1;
- pages = alloc_pages(GFP_KERNEL, order);
- ptr = page_address(pages);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
- KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN);
- KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL);
- free_pages((unsigned long)ptr, order);
- }
-
- if (!IS_ENABLED(CONFIG_KASAN_VMALLOC))
- return;
-
- for (i = 0; i < 256; i++) {
- size = (get_random_int() % 1024) + 1;
- ptr = vmalloc(size);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
- KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN);
- KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL);
- vfree(ptr);
- }
-}
-
-/* Check that 0xff works as a match-all pointer tag for tag-based modes. */
-static void match_all_ptr_tag(struct kunit *test)
-{
- char *ptr;
- u8 tag;
-
- KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC);
-
- ptr = kmalloc(128, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
- /* Backup the assigned tag. */
- tag = get_tag(ptr);
- KUNIT_EXPECT_NE(test, tag, (u8)KASAN_TAG_KERNEL);
-
- /* Reset the tag to 0xff.*/
- ptr = set_tag(ptr, KASAN_TAG_KERNEL);
-
- /* This access shouldn't trigger a KASAN report. */
- *ptr = 0;
-
- /* Recover the pointer tag and free. */
- ptr = set_tag(ptr, tag);
- kfree(ptr);
-}
-
-/* Check that there are no match-all memory tags for tag-based modes. */
-static void match_all_mem_tag(struct kunit *test)
-{
- char *ptr;
- int tag;
-
- KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC);
-
- ptr = kmalloc(128, GFP_KERNEL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
- KUNIT_EXPECT_NE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL);
-
- /* For each possible tag value not matching the pointer tag. */
- for (tag = KASAN_TAG_MIN; tag <= KASAN_TAG_KERNEL; tag++) {
- if (tag == get_tag(ptr))
- continue;
-
- /* Mark the first memory granule with the chosen memory tag. */
- kasan_poison(ptr, KASAN_GRANULE_SIZE, (u8)tag, false);
-
- /* This access must cause a KASAN report. */
- KUNIT_EXPECT_KASAN_FAIL(test, *ptr = 0);
- }
-
- /* Recover the memory tag and free. */
- kasan_poison(ptr, KASAN_GRANULE_SIZE, get_tag(ptr), false);
- kfree(ptr);
-}
-
-static struct kunit_case kasan_kunit_test_cases[] = {
- KUNIT_CASE(kmalloc_oob_right),
- KUNIT_CASE(kmalloc_oob_left),
- KUNIT_CASE(kmalloc_node_oob_right),
- KUNIT_CASE(kmalloc_pagealloc_oob_right),
- KUNIT_CASE(kmalloc_pagealloc_uaf),
- KUNIT_CASE(kmalloc_pagealloc_invalid_free),
- KUNIT_CASE(pagealloc_oob_right),
- KUNIT_CASE(pagealloc_uaf),
- KUNIT_CASE(kmalloc_large_oob_right),
- KUNIT_CASE(krealloc_more_oob),
- KUNIT_CASE(krealloc_less_oob),
- KUNIT_CASE(krealloc_pagealloc_more_oob),
- KUNIT_CASE(krealloc_pagealloc_less_oob),
- KUNIT_CASE(krealloc_uaf),
- KUNIT_CASE(kmalloc_oob_16),
- KUNIT_CASE(kmalloc_uaf_16),
- KUNIT_CASE(kmalloc_oob_in_memset),
- KUNIT_CASE(kmalloc_oob_memset_2),
- KUNIT_CASE(kmalloc_oob_memset_4),
- KUNIT_CASE(kmalloc_oob_memset_8),
- KUNIT_CASE(kmalloc_oob_memset_16),
- KUNIT_CASE(kmalloc_memmove_negative_size),
- KUNIT_CASE(kmalloc_memmove_invalid_size),
- KUNIT_CASE(kmalloc_uaf),
- KUNIT_CASE(kmalloc_uaf_memset),
- KUNIT_CASE(kmalloc_uaf2),
- KUNIT_CASE(kfree_via_page),
- KUNIT_CASE(kfree_via_phys),
- KUNIT_CASE(kmem_cache_oob),
- KUNIT_CASE(kmem_cache_accounted),
- KUNIT_CASE(kmem_cache_bulk),
- KUNIT_CASE(kasan_global_oob_right),
- KUNIT_CASE(kasan_global_oob_left),
- KUNIT_CASE(kasan_stack_oob),
- KUNIT_CASE(kasan_alloca_oob_left),
- KUNIT_CASE(kasan_alloca_oob_right),
- KUNIT_CASE(ksize_unpoisons_memory),
- KUNIT_CASE(ksize_uaf),
- KUNIT_CASE(kmem_cache_double_free),
- KUNIT_CASE(kmem_cache_invalid_free),
- KUNIT_CASE(kmem_cache_double_destroy),
- KUNIT_CASE(kasan_memchr),
- KUNIT_CASE(kasan_memcmp),
- KUNIT_CASE(kasan_strings),
- KUNIT_CASE(kasan_bitops_generic),
- KUNIT_CASE(kasan_bitops_tags),
- KUNIT_CASE(kmalloc_double_kzfree),
- KUNIT_CASE(vmalloc_helpers_tags),
- KUNIT_CASE(vmalloc_oob),
- KUNIT_CASE(vmap_tags),
- KUNIT_CASE(vm_map_ram_tags),
- KUNIT_CASE(vmalloc_percpu),
- KUNIT_CASE(match_all_not_assigned),
- KUNIT_CASE(match_all_ptr_tag),
- KUNIT_CASE(match_all_mem_tag),
- {}
-};
-
-static struct kunit_suite kasan_kunit_test_suite = {
- .name = "kasan",
- .init = kasan_test_init,
- .test_cases = kasan_kunit_test_cases,
- .exit = kasan_test_exit,
-};
-
-kunit_test_suite(kasan_kunit_test_suite);
-
-MODULE_LICENSE("GPL");
diff --git a/lib/test_kasan_module.c b/lib/test_kasan_module.c
deleted file mode 100644
index b112cbc835e9..000000000000
--- a/lib/test_kasan_module.c
+++ /dev/null
@@ -1,141 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *
- * Copyright (c) 2014 Samsung Electronics Co., Ltd.
- * Author: Andrey Ryabinin <a.ryabinin@samsung.com>
- */
-
-#define pr_fmt(fmt) "kasan test: %s " fmt, __func__
-
-#include <linux/mman.h>
-#include <linux/module.h>
-#include <linux/printk.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-
-#include "../mm/kasan/kasan.h"
-
-static noinline void __init copy_user_test(void)
-{
- char *kmem;
- char __user *usermem;
- size_t size = 128 - KASAN_GRANULE_SIZE;
- int __maybe_unused unused;
-
- kmem = kmalloc(size, GFP_KERNEL);
- if (!kmem)
- return;
-
- usermem = (char __user *)vm_mmap(NULL, 0, PAGE_SIZE,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_ANONYMOUS | MAP_PRIVATE, 0);
- if (IS_ERR(usermem)) {
- pr_err("Failed to allocate user memory\n");
- kfree(kmem);
- return;
- }
-
- OPTIMIZER_HIDE_VAR(size);
-
- pr_info("out-of-bounds in copy_from_user()\n");
- unused = copy_from_user(kmem, usermem, size + 1);
-
- pr_info("out-of-bounds in copy_to_user()\n");
- unused = copy_to_user(usermem, kmem, size + 1);
-
- pr_info("out-of-bounds in __copy_from_user()\n");
- unused = __copy_from_user(kmem, usermem, size + 1);
-
- pr_info("out-of-bounds in __copy_to_user()\n");
- unused = __copy_to_user(usermem, kmem, size + 1);
-
- pr_info("out-of-bounds in __copy_from_user_inatomic()\n");
- unused = __copy_from_user_inatomic(kmem, usermem, size + 1);
-
- pr_info("out-of-bounds in __copy_to_user_inatomic()\n");
- unused = __copy_to_user_inatomic(usermem, kmem, size + 1);
-
- pr_info("out-of-bounds in strncpy_from_user()\n");
- unused = strncpy_from_user(kmem, usermem, size + 1);
-
- vm_munmap((unsigned long)usermem, PAGE_SIZE);
- kfree(kmem);
-}
-
-static struct kasan_rcu_info {
- int i;
- struct rcu_head rcu;
-} *global_rcu_ptr;
-
-static noinline void __init kasan_rcu_reclaim(struct rcu_head *rp)
-{
- struct kasan_rcu_info *fp = container_of(rp,
- struct kasan_rcu_info, rcu);
-
- kfree(fp);
- ((volatile struct kasan_rcu_info *)fp)->i;
-}
-
-static noinline void __init kasan_rcu_uaf(void)
-{
- struct kasan_rcu_info *ptr;
-
- pr_info("use-after-free in kasan_rcu_reclaim\n");
- ptr = kmalloc(sizeof(struct kasan_rcu_info), GFP_KERNEL);
- if (!ptr) {
- pr_err("Allocation failed\n");
- return;
- }
-
- global_rcu_ptr = rcu_dereference_protected(ptr, NULL);
- call_rcu(&global_rcu_ptr->rcu, kasan_rcu_reclaim);
-}
-
-static noinline void __init kasan_workqueue_work(struct work_struct *work)
-{
- kfree(work);
-}
-
-static noinline void __init kasan_workqueue_uaf(void)
-{
- struct workqueue_struct *workqueue;
- struct work_struct *work;
-
- workqueue = create_workqueue("kasan_wq_test");
- if (!workqueue) {
- pr_err("Allocation failed\n");
- return;
- }
- work = kmalloc(sizeof(struct work_struct), GFP_KERNEL);
- if (!work) {
- pr_err("Allocation failed\n");
- return;
- }
-
- INIT_WORK(work, kasan_workqueue_work);
- queue_work(workqueue, work);
- destroy_workqueue(workqueue);
-
- pr_info("use-after-free on workqueue\n");
- ((volatile struct work_struct *)work)->data;
-}
-
-static int __init test_kasan_module_init(void)
-{
- /*
- * Temporarily enable multi-shot mode. Otherwise, KASAN would only
- * report the first detected bug and panic the kernel if panic_on_warn
- * is enabled.
- */
- bool multishot = kasan_save_enable_multi_shot();
-
- copy_user_test();
- kasan_rcu_uaf();
- kasan_workqueue_uaf();
-
- kasan_restore_multi_shot(multishot);
- return -EAGAIN;
-}
-
-module_init(test_kasan_module_init);
-MODULE_LICENSE("GPL");
diff --git a/lib/test_kprobes.c b/lib/test_kprobes.c
index a5edc2ebc947..eeb1d728d974 100644
--- a/lib/test_kprobes.c
+++ b/lib/test_kprobes.c
@@ -341,7 +341,7 @@ static int kprobes_test_init(struct kunit *test)
stacktrace_driver = kprobe_stacktrace_driver;
do {
- rand1 = prandom_u32();
+ rand1 = get_random_u32();
} while (rand1 <= div_factor);
return 0;
}
diff --git a/lib/test_list_sort.c b/lib/test_list_sort.c
index ade7a1ea0c8e..19ff229b9c3a 100644
--- a/lib/test_list_sort.c
+++ b/lib/test_list_sort.c
@@ -71,7 +71,7 @@ static void list_sort_test(struct kunit *test)
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, el);
/* force some equivalencies */
- el->value = prandom_u32() % (TEST_LIST_LEN / 3);
+ el->value = prandom_u32_max(TEST_LIST_LEN / 3);
el->serial = i;
el->poison1 = TEST_POISON1;
el->poison2 = TEST_POISON2;
diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c
new file mode 100644
index 000000000000..f425f169ef08
--- /dev/null
+++ b/lib/test_maple_tree.c
@@ -0,0 +1,2767 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * test_maple_tree.c: Test the maple tree API
+ * Copyright (c) 2018-2022 Oracle Corporation
+ * Author: Liam R. Howlett <Liam.Howlett@Oracle.com>
+ *
+ * Any tests that only require the interface of the tree.
+ */
+
+#include <linux/maple_tree.h>
+#include <linux/module.h>
+
+#define MTREE_ALLOC_MAX 0x2000000000000Ul
+#ifndef CONFIG_DEBUG_MAPLE_TREE
+#define CONFIG_DEBUG_MAPLE_TREE
+#endif
+#define CONFIG_MAPLE_SEARCH
+#define MAPLE_32BIT (MAPLE_NODE_SLOTS > 31)
+
+/* #define BENCH_SLOT_STORE */
+/* #define BENCH_NODE_STORE */
+/* #define BENCH_AWALK */
+/* #define BENCH_WALK */
+/* #define BENCH_MT_FOR_EACH */
+/* #define BENCH_FORK */
+
+#ifdef __KERNEL__
+#define mt_set_non_kernel(x) do {} while (0)
+#define mt_zero_nr_tallocated(x) do {} while (0)
+#else
+#define cond_resched() do {} while (0)
+#endif
+static
+int mtree_insert_index(struct maple_tree *mt, unsigned long index, gfp_t gfp)
+{
+ return mtree_insert(mt, index, xa_mk_value(index & LONG_MAX), gfp);
+}
+
+static void mtree_erase_index(struct maple_tree *mt, unsigned long index)
+{
+ MT_BUG_ON(mt, mtree_erase(mt, index) != xa_mk_value(index & LONG_MAX));
+ MT_BUG_ON(mt, mtree_load(mt, index) != NULL);
+}
+
+static int mtree_test_insert(struct maple_tree *mt, unsigned long index,
+ void *ptr)
+{
+ return mtree_insert(mt, index, ptr, GFP_KERNEL);
+}
+
+static int mtree_test_store_range(struct maple_tree *mt, unsigned long start,
+ unsigned long end, void *ptr)
+{
+ return mtree_store_range(mt, start, end, ptr, GFP_KERNEL);
+}
+
+static int mtree_test_store(struct maple_tree *mt, unsigned long start,
+ void *ptr)
+{
+ return mtree_test_store_range(mt, start, start, ptr);
+}
+
+static int mtree_test_insert_range(struct maple_tree *mt, unsigned long start,
+ unsigned long end, void *ptr)
+{
+ return mtree_insert_range(mt, start, end, ptr, GFP_KERNEL);
+}
+
+static void *mtree_test_load(struct maple_tree *mt, unsigned long index)
+{
+ return mtree_load(mt, index);
+}
+
+static void *mtree_test_erase(struct maple_tree *mt, unsigned long index)
+{
+ return mtree_erase(mt, index);
+}
+
+#if defined(CONFIG_64BIT)
+static noinline void check_mtree_alloc_range(struct maple_tree *mt,
+ unsigned long start, unsigned long end, unsigned long size,
+ unsigned long expected, int eret, void *ptr)
+{
+
+ unsigned long result = expected + 1;
+ int ret;
+
+ ret = mtree_alloc_range(mt, &result, ptr, size, start, end,
+ GFP_KERNEL);
+ MT_BUG_ON(mt, ret != eret);
+ if (ret)
+ return;
+
+ MT_BUG_ON(mt, result != expected);
+}
+
+static noinline void check_mtree_alloc_rrange(struct maple_tree *mt,
+ unsigned long start, unsigned long end, unsigned long size,
+ unsigned long expected, int eret, void *ptr)
+{
+
+ unsigned long result = expected + 1;
+ int ret;
+
+ ret = mtree_alloc_rrange(mt, &result, ptr, size, start, end - 1,
+ GFP_KERNEL);
+ MT_BUG_ON(mt, ret != eret);
+ if (ret)
+ return;
+
+ MT_BUG_ON(mt, result != expected);
+}
+#endif
+
+static noinline void check_load(struct maple_tree *mt, unsigned long index,
+ void *ptr)
+{
+ void *ret = mtree_test_load(mt, index);
+
+ if (ret != ptr)
+ pr_err("Load %lu returned %p expect %p\n", index, ret, ptr);
+ MT_BUG_ON(mt, ret != ptr);
+}
+
+static noinline void check_store_range(struct maple_tree *mt,
+ unsigned long start, unsigned long end, void *ptr, int expected)
+{
+ int ret = -EINVAL;
+ unsigned long i;
+
+ ret = mtree_test_store_range(mt, start, end, ptr);
+ MT_BUG_ON(mt, ret != expected);
+
+ if (ret)
+ return;
+
+ for (i = start; i <= end; i++)
+ check_load(mt, i, ptr);
+}
+
+static noinline void check_insert_range(struct maple_tree *mt,
+ unsigned long start, unsigned long end, void *ptr, int expected)
+{
+ int ret = -EINVAL;
+ unsigned long i;
+
+ ret = mtree_test_insert_range(mt, start, end, ptr);
+ MT_BUG_ON(mt, ret != expected);
+
+ if (ret)
+ return;
+
+ for (i = start; i <= end; i++)
+ check_load(mt, i, ptr);
+}
+
+static noinline void check_insert(struct maple_tree *mt, unsigned long index,
+ void *ptr)
+{
+ int ret = -EINVAL;
+
+ ret = mtree_test_insert(mt, index, ptr);
+ MT_BUG_ON(mt, ret != 0);
+}
+
+static noinline void check_dup_insert(struct maple_tree *mt,
+ unsigned long index, void *ptr)
+{
+ int ret = -EINVAL;
+
+ ret = mtree_test_insert(mt, index, ptr);
+ MT_BUG_ON(mt, ret != -EEXIST);
+}
+
+
+static noinline
+void check_index_load(struct maple_tree *mt, unsigned long index)
+{
+ return check_load(mt, index, xa_mk_value(index & LONG_MAX));
+}
+
+static inline int not_empty(struct maple_node *node)
+{
+ int i;
+
+ if (node->parent)
+ return 1;
+
+ for (i = 0; i < ARRAY_SIZE(node->slot); i++)
+ if (node->slot[i])
+ return 1;
+
+ return 0;
+}
+
+
+static noinline void check_rev_seq(struct maple_tree *mt, unsigned long max,
+ bool verbose)
+{
+ unsigned long i = max, j;
+
+ MT_BUG_ON(mt, !mtree_empty(mt));
+
+ mt_zero_nr_tallocated();
+ while (i) {
+ MT_BUG_ON(mt, mtree_insert_index(mt, i, GFP_KERNEL));
+ for (j = i; j <= max; j++)
+ check_index_load(mt, j);
+
+ check_load(mt, i - 1, NULL);
+ mt_set_in_rcu(mt);
+ MT_BUG_ON(mt, !mt_height(mt));
+ mt_clear_in_rcu(mt);
+ MT_BUG_ON(mt, !mt_height(mt));
+ i--;
+ }
+ check_load(mt, max + 1, NULL);
+
+#ifndef __KERNEL__
+ if (verbose) {
+ rcu_barrier();
+ mt_dump(mt);
+ pr_info(" %s test of 0-%lu %luK in %d active (%d total)\n",
+ __func__, max, mt_get_alloc_size()/1024, mt_nr_allocated(),
+ mt_nr_tallocated());
+ }
+#endif
+}
+
+static noinline void check_seq(struct maple_tree *mt, unsigned long max,
+ bool verbose)
+{
+ unsigned long i, j;
+
+ MT_BUG_ON(mt, !mtree_empty(mt));
+
+ mt_zero_nr_tallocated();
+ for (i = 0; i <= max; i++) {
+ MT_BUG_ON(mt, mtree_insert_index(mt, i, GFP_KERNEL));
+ for (j = 0; j <= i; j++)
+ check_index_load(mt, j);
+
+ if (i)
+ MT_BUG_ON(mt, !mt_height(mt));
+ check_load(mt, i + 1, NULL);
+ }
+
+#ifndef __KERNEL__
+ if (verbose) {
+ rcu_barrier();
+ mt_dump(mt);
+ pr_info(" seq test of 0-%lu %luK in %d active (%d total)\n",
+ max, mt_get_alloc_size()/1024, mt_nr_allocated(),
+ mt_nr_tallocated());
+ }
+#endif
+}
+
+static noinline void check_lb_not_empty(struct maple_tree *mt)
+{
+ unsigned long i, j;
+ unsigned long huge = 4000UL * 1000 * 1000;
+
+
+ i = huge;
+ while (i > 4096) {
+ check_insert(mt, i, (void *) i);
+ for (j = huge; j >= i; j /= 2) {
+ check_load(mt, j-1, NULL);
+ check_load(mt, j, (void *) j);
+ check_load(mt, j+1, NULL);
+ }
+ i /= 2;
+ }
+ mtree_destroy(mt);
+}
+
+static noinline void check_lower_bound_split(struct maple_tree *mt)
+{
+ MT_BUG_ON(mt, !mtree_empty(mt));
+ check_lb_not_empty(mt);
+}
+
+static noinline void check_upper_bound_split(struct maple_tree *mt)
+{
+ unsigned long i, j;
+ unsigned long huge;
+
+ MT_BUG_ON(mt, !mtree_empty(mt));
+
+ if (MAPLE_32BIT)
+ huge = 2147483647UL;
+ else
+ huge = 4000UL * 1000 * 1000;
+
+ i = 4096;
+ while (i < huge) {
+ check_insert(mt, i, (void *) i);
+ for (j = i; j >= huge; j *= 2) {
+ check_load(mt, j-1, NULL);
+ check_load(mt, j, (void *) j);
+ check_load(mt, j+1, NULL);
+ }
+ i *= 2;
+ }
+ mtree_destroy(mt);
+}
+
+static noinline void check_mid_split(struct maple_tree *mt)
+{
+ unsigned long huge = 8000UL * 1000 * 1000;
+
+ check_insert(mt, huge, (void *) huge);
+ check_insert(mt, 0, xa_mk_value(0));
+ check_lb_not_empty(mt);
+}
+
+static noinline void check_rev_find(struct maple_tree *mt)
+{
+ int i, nr_entries = 200;
+ void *val;
+ MA_STATE(mas, mt, 0, 0);
+
+ for (i = 0; i <= nr_entries; i++)
+ mtree_store_range(mt, i*10, i*10 + 5,
+ xa_mk_value(i), GFP_KERNEL);
+
+ rcu_read_lock();
+ mas_set(&mas, 1000);
+ val = mas_find_rev(&mas, 1000);
+ MT_BUG_ON(mt, val != xa_mk_value(100));
+ val = mas_find_rev(&mas, 1000);
+ MT_BUG_ON(mt, val != NULL);
+
+ mas_set(&mas, 999);
+ val = mas_find_rev(&mas, 997);
+ MT_BUG_ON(mt, val != NULL);
+
+ mas_set(&mas, 1000);
+ val = mas_find_rev(&mas, 900);
+ MT_BUG_ON(mt, val != xa_mk_value(100));
+ val = mas_find_rev(&mas, 900);
+ MT_BUG_ON(mt, val != xa_mk_value(99));
+
+ mas_set(&mas, 20);
+ val = mas_find_rev(&mas, 0);
+ MT_BUG_ON(mt, val != xa_mk_value(2));
+ val = mas_find_rev(&mas, 0);
+ MT_BUG_ON(mt, val != xa_mk_value(1));
+ val = mas_find_rev(&mas, 0);
+ MT_BUG_ON(mt, val != xa_mk_value(0));
+ val = mas_find_rev(&mas, 0);
+ MT_BUG_ON(mt, val != NULL);
+ rcu_read_unlock();
+}
+
+static noinline void check_find(struct maple_tree *mt)
+{
+ unsigned long val = 0;
+ unsigned long count;
+ unsigned long max;
+ unsigned long top;
+ unsigned long last = 0, index = 0;
+ void *entry, *entry2;
+
+ MA_STATE(mas, mt, 0, 0);
+
+ /* Insert 0. */
+ MT_BUG_ON(mt, mtree_insert_index(mt, val++, GFP_KERNEL));
+
+#if defined(CONFIG_64BIT)
+ top = 4398046511104UL;
+#else
+ top = ULONG_MAX;
+#endif
+
+ if (MAPLE_32BIT) {
+ count = 15;
+ } else {
+ count = 20;
+ }
+
+ for (int i = 0; i <= count; i++) {
+ if (val != 64)
+ MT_BUG_ON(mt, mtree_insert_index(mt, val, GFP_KERNEL));
+ else
+ MT_BUG_ON(mt, mtree_insert(mt, val,
+ XA_ZERO_ENTRY, GFP_KERNEL));
+
+ val <<= 2;
+ }
+
+ val = 0;
+ mas_set(&mas, val);
+ mas_lock(&mas);
+ while ((entry = mas_find(&mas, 268435456)) != NULL) {
+ if (val != 64)
+ MT_BUG_ON(mt, xa_mk_value(val) != entry);
+ else
+ MT_BUG_ON(mt, entry != XA_ZERO_ENTRY);
+
+ val <<= 2;
+ /* For zero check. */
+ if (!val)
+ val = 1;
+ }
+ mas_unlock(&mas);
+
+ val = 0;
+ mas_set(&mas, val);
+ mas_lock(&mas);
+ mas_for_each(&mas, entry, ULONG_MAX) {
+ if (val != 64)
+ MT_BUG_ON(mt, xa_mk_value(val) != entry);
+ else
+ MT_BUG_ON(mt, entry != XA_ZERO_ENTRY);
+ val <<= 2;
+ /* For zero check. */
+ if (!val)
+ val = 1;
+ }
+ mas_unlock(&mas);
+
+ /* Test mas_pause */
+ val = 0;
+ mas_set(&mas, val);
+ mas_lock(&mas);
+ mas_for_each(&mas, entry, ULONG_MAX) {
+ if (val != 64)
+ MT_BUG_ON(mt, xa_mk_value(val) != entry);
+ else
+ MT_BUG_ON(mt, entry != XA_ZERO_ENTRY);
+ val <<= 2;
+ /* For zero check. */
+ if (!val)
+ val = 1;
+
+ mas_pause(&mas);
+ mas_unlock(&mas);
+ mas_lock(&mas);
+ }
+ mas_unlock(&mas);
+
+ val = 0;
+ max = 300; /* A value big enough to include XA_ZERO_ENTRY at 64. */
+ mt_for_each(mt, entry, index, max) {
+ MT_BUG_ON(mt, xa_mk_value(val) != entry);
+ val <<= 2;
+ if (val == 64) /* Skip zero entry. */
+ val <<= 2;
+ /* For zero check. */
+ if (!val)
+ val = 1;
+ }
+
+ val = 0;
+ max = 0;
+ index = 0;
+ MT_BUG_ON(mt, mtree_insert_index(mt, ULONG_MAX, GFP_KERNEL));
+ mt_for_each(mt, entry, index, ULONG_MAX) {
+ if (val == top)
+ MT_BUG_ON(mt, entry != xa_mk_value(LONG_MAX));
+ else
+ MT_BUG_ON(mt, xa_mk_value(val) != entry);
+
+ /* Workaround for 32bit */
+ if ((val << 2) < val)
+ val = ULONG_MAX;
+ else
+ val <<= 2;
+
+ if (val == 64) /* Skip zero entry. */
+ val <<= 2;
+ /* For zero check. */
+ if (!val)
+ val = 1;
+ max++;
+ MT_BUG_ON(mt, max > 25);
+ }
+ mtree_erase_index(mt, ULONG_MAX);
+
+ mas_reset(&mas);
+ index = 17;
+ entry = mt_find(mt, &index, 512);
+ MT_BUG_ON(mt, xa_mk_value(256) != entry);
+
+ mas_reset(&mas);
+ index = 17;
+ entry = mt_find(mt, &index, 20);
+ MT_BUG_ON(mt, entry != NULL);
+
+
+ /* Range check.. */
+ /* Insert ULONG_MAX */
+ MT_BUG_ON(mt, mtree_insert_index(mt, ULONG_MAX, GFP_KERNEL));
+
+ val = 0;
+ mas_set(&mas, 0);
+ mas_lock(&mas);
+ mas_for_each(&mas, entry, ULONG_MAX) {
+ if (val == 64)
+ MT_BUG_ON(mt, entry != XA_ZERO_ENTRY);
+ else if (val == top)
+ MT_BUG_ON(mt, entry != xa_mk_value(LONG_MAX));
+ else
+ MT_BUG_ON(mt, xa_mk_value(val) != entry);
+
+ /* Workaround for 32bit */
+ if ((val << 2) < val)
+ val = ULONG_MAX;
+ else
+ val <<= 2;
+
+ /* For zero check. */
+ if (!val)
+ val = 1;
+ mas_pause(&mas);
+ mas_unlock(&mas);
+ mas_lock(&mas);
+ }
+ mas_unlock(&mas);
+
+ mas_set(&mas, 1048576);
+ mas_lock(&mas);
+ entry = mas_find(&mas, 1048576);
+ mas_unlock(&mas);
+ MT_BUG_ON(mas.tree, entry == NULL);
+
+ /*
+ * Find last value.
+ * 1. get the expected value, leveraging the existence of an end entry
+ * 2. delete end entry
+ * 3. find the last value but searching for ULONG_MAX and then using
+ * prev
+ */
+ /* First, get the expected result. */
+ mas_lock(&mas);
+ mas_reset(&mas);
+ mas.index = ULONG_MAX; /* start at max.. */
+ entry = mas_find(&mas, ULONG_MAX);
+ entry = mas_prev(&mas, 0);
+ index = mas.index;
+ last = mas.last;
+
+ /* Erase the last entry. */
+ mas_reset(&mas);
+ mas.index = ULONG_MAX;
+ mas.last = ULONG_MAX;
+ mas_erase(&mas);
+
+ /* Get the previous value from MAS_START */
+ mas_reset(&mas);
+ entry2 = mas_prev(&mas, 0);
+
+ /* Check results. */
+ MT_BUG_ON(mt, entry != entry2);
+ MT_BUG_ON(mt, index != mas.index);
+ MT_BUG_ON(mt, last != mas.last);
+
+
+ mas.node = MAS_NONE;
+ mas.index = ULONG_MAX;
+ mas.last = ULONG_MAX;
+ entry2 = mas_prev(&mas, 0);
+ MT_BUG_ON(mt, entry != entry2);
+
+ mas_set(&mas, 0);
+ MT_BUG_ON(mt, mas_prev(&mas, 0) != NULL);
+
+ mas_unlock(&mas);
+ mtree_destroy(mt);
+}
+
+static noinline void check_find_2(struct maple_tree *mt)
+{
+ unsigned long i, j;
+ void *entry;
+
+ MA_STATE(mas, mt, 0, 0);
+ rcu_read_lock();
+ mas_for_each(&mas, entry, ULONG_MAX)
+ MT_BUG_ON(mt, true);
+ rcu_read_unlock();
+
+ for (i = 0; i < 256; i++) {
+ mtree_insert_index(mt, i, GFP_KERNEL);
+ j = 0;
+ mas_set(&mas, 0);
+ rcu_read_lock();
+ mas_for_each(&mas, entry, ULONG_MAX) {
+ MT_BUG_ON(mt, entry != xa_mk_value(j));
+ j++;
+ }
+ rcu_read_unlock();
+ MT_BUG_ON(mt, j != i + 1);
+ }
+
+ for (i = 0; i < 256; i++) {
+ mtree_erase_index(mt, i);
+ j = i + 1;
+ mas_set(&mas, 0);
+ rcu_read_lock();
+ mas_for_each(&mas, entry, ULONG_MAX) {
+ if (xa_is_zero(entry))
+ continue;
+
+ MT_BUG_ON(mt, entry != xa_mk_value(j));
+ j++;
+ }
+ rcu_read_unlock();
+ MT_BUG_ON(mt, j != 256);
+ }
+
+ /*MT_BUG_ON(mt, !mtree_empty(mt)); */
+}
+
+
+#if defined(CONFIG_64BIT)
+static noinline void check_alloc_rev_range(struct maple_tree *mt)
+{
+ /*
+ * Generated by:
+ * cat /proc/self/maps | awk '{print $1}'|
+ * awk -F "-" '{printf "0x%s, 0x%s, ", $1, $2}'
+ */
+
+ unsigned long range[] = {
+ /* Inclusive , Exclusive. */
+ 0x565234af2000, 0x565234af4000,
+ 0x565234af4000, 0x565234af9000,
+ 0x565234af9000, 0x565234afb000,
+ 0x565234afc000, 0x565234afd000,
+ 0x565234afd000, 0x565234afe000,
+ 0x565235def000, 0x565235e10000,
+ 0x7f36d4bfd000, 0x7f36d4ee2000,
+ 0x7f36d4ee2000, 0x7f36d4f04000,
+ 0x7f36d4f04000, 0x7f36d504c000,
+ 0x7f36d504c000, 0x7f36d5098000,
+ 0x7f36d5098000, 0x7f36d5099000,
+ 0x7f36d5099000, 0x7f36d509d000,
+ 0x7f36d509d000, 0x7f36d509f000,
+ 0x7f36d509f000, 0x7f36d50a5000,
+ 0x7f36d50b9000, 0x7f36d50db000,
+ 0x7f36d50db000, 0x7f36d50dc000,
+ 0x7f36d50dc000, 0x7f36d50fa000,
+ 0x7f36d50fa000, 0x7f36d5102000,
+ 0x7f36d5102000, 0x7f36d5103000,
+ 0x7f36d5103000, 0x7f36d5104000,
+ 0x7f36d5104000, 0x7f36d5105000,
+ 0x7fff5876b000, 0x7fff5878d000,
+ 0x7fff5878e000, 0x7fff58791000,
+ 0x7fff58791000, 0x7fff58793000,
+ };
+
+ unsigned long holes[] = {
+ /*
+ * Note: start of hole is INCLUSIVE
+ * end of hole is EXCLUSIVE
+ * (opposite of the above table.)
+ * Start of hole, end of hole, size of hole (+1)
+ */
+ 0x565234afb000, 0x565234afc000, 0x1000,
+ 0x565234afe000, 0x565235def000, 0x12F1000,
+ 0x565235e10000, 0x7f36d4bfd000, 0x28E49EDED000,
+ };
+
+ /*
+ * req_range consists of 4 values.
+ * 1. min index
+ * 2. max index
+ * 3. size
+ * 4. number that should be returned.
+ * 5. return value
+ */
+ unsigned long req_range[] = {
+ 0x565234af9000, /* Min */
+ 0x7fff58791000, /* Max */
+ 0x1000, /* Size */
+ 0x7fff5878d << 12, /* First rev hole of size 0x1000 */
+ 0, /* Return value success. */
+
+ 0x0, /* Min */
+ 0x565234AF1 << 12, /* Max */
+ 0x3000, /* Size */
+ 0x565234AEE << 12, /* max - 3. */
+ 0, /* Return value success. */
+
+ 0x0, /* Min */
+ -1, /* Max */
+ 0x1000, /* Size */
+ 562949953421311 << 12,/* First rev hole of size 0x1000 */
+ 0, /* Return value success. */
+
+ 0x0, /* Min */
+ 0x7F36D510A << 12, /* Max */
+ 0x4000, /* Size */
+ 0x7F36D5106 << 12, /* First rev hole of size 0x4000 */
+ 0, /* Return value success. */
+
+ /* Ascend test. */
+ 0x0,
+ 34148798629 << 12,
+ 19 << 12,
+ 34148797418 << 12,
+ 0x0,
+
+ /* Too big test. */
+ 0x0,
+ 18446744073709551615UL,
+ 562915594369134UL << 12,
+ 0x0,
+ -EBUSY,
+
+ };
+
+ int i, range_count = ARRAY_SIZE(range);
+ int req_range_count = ARRAY_SIZE(req_range);
+ unsigned long min = 0;
+
+ MA_STATE(mas, mt, 0, 0);
+
+ mtree_store_range(mt, MTREE_ALLOC_MAX, ULONG_MAX, XA_ZERO_ENTRY,
+ GFP_KERNEL);
+#define DEBUG_REV_RANGE 0
+ for (i = 0; i < range_count; i += 2) {
+ /* Inclusive, Inclusive (with the -1) */
+
+#if DEBUG_REV_RANGE
+ pr_debug("\t%s: Insert %lu-%lu\n", __func__, range[i] >> 12,
+ (range[i + 1] >> 12) - 1);
+#endif
+ check_insert_range(mt, range[i] >> 12, (range[i + 1] >> 12) - 1,
+ xa_mk_value(range[i] >> 12), 0);
+ mt_validate(mt);
+ }
+
+
+ mas_lock(&mas);
+ for (i = 0; i < ARRAY_SIZE(holes); i += 3) {
+#if DEBUG_REV_RANGE
+ pr_debug("Search from %lu-%lu for gap %lu should be at %lu\n",
+ min, holes[i+1]>>12, holes[i+2]>>12,
+ holes[i] >> 12);
+#endif
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, min,
+ holes[i+1] >> 12,
+ holes[i+2] >> 12));
+#if DEBUG_REV_RANGE
+ pr_debug("Found %lu %lu\n", mas.index, mas.last);
+ pr_debug("gap %lu %lu\n", (holes[i] >> 12),
+ (holes[i+1] >> 12));
+#endif
+ MT_BUG_ON(mt, mas.last + 1 != (holes[i+1] >> 12));
+ MT_BUG_ON(mt, mas.index != (holes[i+1] >> 12) - (holes[i+2] >> 12));
+ min = holes[i+1] >> 12;
+ mas_reset(&mas);
+ }
+
+ mas_unlock(&mas);
+ for (i = 0; i < req_range_count; i += 5) {
+#if DEBUG_REV_RANGE
+ pr_debug("\tReverse request between %lu-%lu size %lu, should get %lu\n",
+ req_range[i] >> 12,
+ (req_range[i + 1] >> 12) - 1,
+ req_range[i+2] >> 12,
+ req_range[i+3] >> 12);
+#endif
+ check_mtree_alloc_rrange(mt,
+ req_range[i] >> 12, /* start */
+ req_range[i+1] >> 12, /* end */
+ req_range[i+2] >> 12, /* size */
+ req_range[i+3] >> 12, /* expected address */
+ req_range[i+4], /* expected return */
+ xa_mk_value(req_range[i] >> 12)); /* pointer */
+ mt_validate(mt);
+ }
+
+ mt_set_non_kernel(1);
+ mtree_erase(mt, 34148798727); /* create a deleted range. */
+ check_mtree_alloc_rrange(mt, 0, 34359052173, 210253414,
+ 34148798725, 0, mt);
+
+ mtree_destroy(mt);
+}
+
+static noinline void check_alloc_range(struct maple_tree *mt)
+{
+ /*
+ * Generated by:
+ * cat /proc/self/maps|awk '{print $1}'|
+ * awk -F "-" '{printf "0x%s, 0x%s, ", $1, $2}'
+ */
+
+ unsigned long range[] = {
+ /* Inclusive , Exclusive. */
+ 0x565234af2000, 0x565234af4000,
+ 0x565234af4000, 0x565234af9000,
+ 0x565234af9000, 0x565234afb000,
+ 0x565234afc000, 0x565234afd000,
+ 0x565234afd000, 0x565234afe000,
+ 0x565235def000, 0x565235e10000,
+ 0x7f36d4bfd000, 0x7f36d4ee2000,
+ 0x7f36d4ee2000, 0x7f36d4f04000,
+ 0x7f36d4f04000, 0x7f36d504c000,
+ 0x7f36d504c000, 0x7f36d5098000,
+ 0x7f36d5098000, 0x7f36d5099000,
+ 0x7f36d5099000, 0x7f36d509d000,
+ 0x7f36d509d000, 0x7f36d509f000,
+ 0x7f36d509f000, 0x7f36d50a5000,
+ 0x7f36d50b9000, 0x7f36d50db000,
+ 0x7f36d50db000, 0x7f36d50dc000,
+ 0x7f36d50dc000, 0x7f36d50fa000,
+ 0x7f36d50fa000, 0x7f36d5102000,
+ 0x7f36d5102000, 0x7f36d5103000,
+ 0x7f36d5103000, 0x7f36d5104000,
+ 0x7f36d5104000, 0x7f36d5105000,
+ 0x7fff5876b000, 0x7fff5878d000,
+ 0x7fff5878e000, 0x7fff58791000,
+ 0x7fff58791000, 0x7fff58793000,
+ };
+ unsigned long holes[] = {
+ /* Start of hole, end of hole, size of hole (+1) */
+ 0x565234afb000, 0x565234afc000, 0x1000,
+ 0x565234afe000, 0x565235def000, 0x12F1000,
+ 0x565235e10000, 0x7f36d4bfd000, 0x28E49EDED000,
+ };
+
+ /*
+ * req_range consists of 4 values.
+ * 1. min index
+ * 2. max index
+ * 3. size
+ * 4. number that should be returned.
+ * 5. return value
+ */
+ unsigned long req_range[] = {
+ 0x565234af9000, /* Min */
+ 0x7fff58791000, /* Max */
+ 0x1000, /* Size */
+ 0x565234afb000, /* First hole in our data of size 1000. */
+ 0, /* Return value success. */
+
+ 0x0, /* Min */
+ 0x7fff58791000, /* Max */
+ 0x1F00, /* Size */
+ 0x0, /* First hole in our data of size 2000. */
+ 0, /* Return value success. */
+
+ /* Test ascend. */
+ 34148797436 << 12, /* Min */
+ 0x7fff587AF000, /* Max */
+ 0x3000, /* Size */
+ 34148798629 << 12, /* Expected location */
+ 0, /* Return value success. */
+
+ /* Test failing. */
+ 34148798623 << 12, /* Min */
+ 34148798683 << 12, /* Max */
+ 0x15000, /* Size */
+ 0, /* Expected location */
+ -EBUSY, /* Return value failed. */
+
+ /* Test filling entire gap. */
+ 34148798623 << 12, /* Min */
+ 0x7fff587AF000, /* Max */
+ 0x10000, /* Size */
+ 34148798632 << 12, /* Expected location */
+ 0, /* Return value success. */
+
+ /* Test walking off the end of root. */
+ 0, /* Min */
+ -1, /* Max */
+ -1, /* Size */
+ 0, /* Expected location */
+ -EBUSY, /* Return value failure. */
+
+ /* Test looking for too large a hole across entire range. */
+ 0, /* Min */
+ -1, /* Max */
+ 4503599618982063UL << 12, /* Size */
+ 34359052178 << 12, /* Expected location */
+ -EBUSY, /* Return failure. */
+ };
+ int i, range_count = ARRAY_SIZE(range);
+ int req_range_count = ARRAY_SIZE(req_range);
+ unsigned long min = 0x565234af2000;
+ MA_STATE(mas, mt, 0, 0);
+
+ mtree_store_range(mt, MTREE_ALLOC_MAX, ULONG_MAX, XA_ZERO_ENTRY,
+ GFP_KERNEL);
+ for (i = 0; i < range_count; i += 2) {
+#define DEBUG_ALLOC_RANGE 0
+#if DEBUG_ALLOC_RANGE
+ pr_debug("\tInsert %lu-%lu\n", range[i] >> 12,
+ (range[i + 1] >> 12) - 1);
+ mt_dump(mt);
+#endif
+ check_insert_range(mt, range[i] >> 12, (range[i + 1] >> 12) - 1,
+ xa_mk_value(range[i] >> 12), 0);
+ mt_validate(mt);
+ }
+
+
+
+ mas_lock(&mas);
+ for (i = 0; i < ARRAY_SIZE(holes); i += 3) {
+
+#if DEBUG_ALLOC_RANGE
+ pr_debug("\tGet empty %lu-%lu size %lu (%lx-%lx)\n", min >> 12,
+ holes[i+1] >> 12, holes[i+2] >> 12,
+ min, holes[i+1]);
+#endif
+ MT_BUG_ON(mt, mas_empty_area(&mas, min >> 12,
+ holes[i+1] >> 12,
+ holes[i+2] >> 12));
+ MT_BUG_ON(mt, mas.index != holes[i] >> 12);
+ min = holes[i+1];
+ mas_reset(&mas);
+ }
+ mas_unlock(&mas);
+ for (i = 0; i < req_range_count; i += 5) {
+#if DEBUG_ALLOC_RANGE
+ pr_debug("\tTest %d: %lu-%lu size %lu expected %lu (%lu-%lu)\n",
+ i/5, req_range[i] >> 12, req_range[i + 1] >> 12,
+ req_range[i + 2] >> 12, req_range[i + 3] >> 12,
+ req_range[i], req_range[i+1]);
+#endif
+ check_mtree_alloc_range(mt,
+ req_range[i] >> 12, /* start */
+ req_range[i+1] >> 12, /* end */
+ req_range[i+2] >> 12, /* size */
+ req_range[i+3] >> 12, /* expected address */
+ req_range[i+4], /* expected return */
+ xa_mk_value(req_range[i] >> 12)); /* pointer */
+ mt_validate(mt);
+#if DEBUG_ALLOC_RANGE
+ mt_dump(mt);
+#endif
+ }
+
+ mtree_destroy(mt);
+}
+#endif
+
+static noinline void check_ranges(struct maple_tree *mt)
+{
+ int i, val, val2;
+ unsigned long r[] = {
+ 10, 15,
+ 20, 25,
+ 17, 22, /* Overlaps previous range. */
+ 9, 1000, /* Huge. */
+ 100, 200,
+ 45, 168,
+ 118, 128,
+ };
+
+ MT_BUG_ON(mt, !mtree_empty(mt));
+ check_insert_range(mt, r[0], r[1], xa_mk_value(r[0]), 0);
+ check_insert_range(mt, r[2], r[3], xa_mk_value(r[2]), 0);
+ check_insert_range(mt, r[4], r[5], xa_mk_value(r[4]), -EEXIST);
+ MT_BUG_ON(mt, !mt_height(mt));
+ /* Store */
+ check_store_range(mt, r[4], r[5], xa_mk_value(r[4]), 0);
+ check_store_range(mt, r[6], r[7], xa_mk_value(r[6]), 0);
+ check_store_range(mt, r[8], r[9], xa_mk_value(r[8]), 0);
+ MT_BUG_ON(mt, !mt_height(mt));
+ mtree_destroy(mt);
+ MT_BUG_ON(mt, mt_height(mt));
+
+ check_seq(mt, 50, false);
+ mt_set_non_kernel(4);
+ check_store_range(mt, 5, 47, xa_mk_value(47), 0);
+ MT_BUG_ON(mt, !mt_height(mt));
+ mtree_destroy(mt);
+
+ /* Create tree of 1-100 */
+ check_seq(mt, 100, false);
+ /* Store 45-168 */
+ mt_set_non_kernel(10);
+ check_store_range(mt, r[10], r[11], xa_mk_value(r[10]), 0);
+ MT_BUG_ON(mt, !mt_height(mt));
+ mtree_destroy(mt);
+
+ /* Create tree of 1-200 */
+ check_seq(mt, 200, false);
+ /* Store 45-168 */
+ check_store_range(mt, r[10], r[11], xa_mk_value(r[10]), 0);
+ MT_BUG_ON(mt, !mt_height(mt));
+ mtree_destroy(mt);
+
+ check_seq(mt, 30, false);
+ check_store_range(mt, 6, 18, xa_mk_value(6), 0);
+ MT_BUG_ON(mt, !mt_height(mt));
+ mtree_destroy(mt);
+
+ /* Overwrite across multiple levels. */
+ /* Create tree of 1-400 */
+ check_seq(mt, 400, false);
+ mt_set_non_kernel(50);
+ /* Store 118-128 */
+ check_store_range(mt, r[12], r[13], xa_mk_value(r[12]), 0);
+ mt_set_non_kernel(50);
+ mtree_test_erase(mt, 140);
+ mtree_test_erase(mt, 141);
+ mtree_test_erase(mt, 142);
+ mtree_test_erase(mt, 143);
+ mtree_test_erase(mt, 130);
+ mtree_test_erase(mt, 131);
+ mtree_test_erase(mt, 132);
+ mtree_test_erase(mt, 133);
+ mtree_test_erase(mt, 134);
+ mtree_test_erase(mt, 135);
+ check_load(mt, r[12], xa_mk_value(r[12]));
+ check_load(mt, r[13], xa_mk_value(r[12]));
+ check_load(mt, r[13] - 1, xa_mk_value(r[12]));
+ check_load(mt, r[13] + 1, xa_mk_value(r[13] + 1));
+ check_load(mt, 135, NULL);
+ check_load(mt, 140, NULL);
+ mt_set_non_kernel(0);
+ MT_BUG_ON(mt, !mt_height(mt));
+ mtree_destroy(mt);
+
+
+
+ /* Overwrite multiple levels at the end of the tree (slot 7) */
+ mt_set_non_kernel(50);
+ check_seq(mt, 400, false);
+ check_store_range(mt, 353, 361, xa_mk_value(353), 0);
+ check_store_range(mt, 347, 352, xa_mk_value(347), 0);
+
+ check_load(mt, 346, xa_mk_value(346));
+ for (i = 347; i <= 352; i++)
+ check_load(mt, i, xa_mk_value(347));
+ for (i = 353; i <= 361; i++)
+ check_load(mt, i, xa_mk_value(353));
+ check_load(mt, 362, xa_mk_value(362));
+ mt_set_non_kernel(0);
+ MT_BUG_ON(mt, !mt_height(mt));
+ mtree_destroy(mt);
+
+ mt_set_non_kernel(50);
+ check_seq(mt, 400, false);
+ check_store_range(mt, 352, 364, NULL, 0);
+ check_store_range(mt, 351, 363, xa_mk_value(352), 0);
+ check_load(mt, 350, xa_mk_value(350));
+ check_load(mt, 351, xa_mk_value(352));
+ for (i = 352; i <= 363; i++)
+ check_load(mt, i, xa_mk_value(352));
+ check_load(mt, 364, NULL);
+ check_load(mt, 365, xa_mk_value(365));
+ mt_set_non_kernel(0);
+ MT_BUG_ON(mt, !mt_height(mt));
+ mtree_destroy(mt);
+
+ mt_set_non_kernel(5);
+ check_seq(mt, 400, false);
+ check_store_range(mt, 352, 364, NULL, 0);
+ check_store_range(mt, 351, 364, xa_mk_value(352), 0);
+ check_load(mt, 350, xa_mk_value(350));
+ check_load(mt, 351, xa_mk_value(352));
+ for (i = 352; i <= 364; i++)
+ check_load(mt, i, xa_mk_value(352));
+ check_load(mt, 365, xa_mk_value(365));
+ mt_set_non_kernel(0);
+ MT_BUG_ON(mt, !mt_height(mt));
+ mtree_destroy(mt);
+
+
+ mt_set_non_kernel(50);
+ check_seq(mt, 400, false);
+ check_store_range(mt, 362, 367, xa_mk_value(362), 0);
+ check_store_range(mt, 353, 361, xa_mk_value(353), 0);
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ MT_BUG_ON(mt, !mt_height(mt));
+ mtree_destroy(mt);
+ /*
+ * Interesting cases:
+ * 1. Overwrite the end of a node and end in the first entry of the next
+ * node.
+ * 2. Split a single range
+ * 3. Overwrite the start of a range
+ * 4. Overwrite the end of a range
+ * 5. Overwrite the entire range
+ * 6. Overwrite a range that causes multiple parent nodes to be
+ * combined
+ * 7. Overwrite a range that causes multiple parent nodes and part of
+ * root to be combined
+ * 8. Overwrite the whole tree
+ * 9. Try to overwrite the zero entry of an alloc tree.
+ * 10. Write a range larger than a nodes current pivot
+ */
+
+ mt_set_non_kernel(50);
+ for (i = 0; i <= 500; i++) {
+ val = i*5;
+ val2 = (i+1)*5;
+ check_store_range(mt, val, val2, xa_mk_value(val), 0);
+ }
+ check_store_range(mt, 2400, 2400, xa_mk_value(2400), 0);
+ check_store_range(mt, 2411, 2411, xa_mk_value(2411), 0);
+ check_store_range(mt, 2412, 2412, xa_mk_value(2412), 0);
+ check_store_range(mt, 2396, 2400, xa_mk_value(4052020), 0);
+ check_store_range(mt, 2402, 2402, xa_mk_value(2402), 0);
+ mtree_destroy(mt);
+ mt_set_non_kernel(0);
+
+ mt_set_non_kernel(50);
+ for (i = 0; i <= 500; i++) {
+ val = i*5;
+ val2 = (i+1)*5;
+ check_store_range(mt, val, val2, xa_mk_value(val), 0);
+ }
+ check_store_range(mt, 2422, 2422, xa_mk_value(2422), 0);
+ check_store_range(mt, 2424, 2424, xa_mk_value(2424), 0);
+ check_store_range(mt, 2425, 2425, xa_mk_value(2), 0);
+ check_store_range(mt, 2460, 2470, NULL, 0);
+ check_store_range(mt, 2435, 2460, xa_mk_value(2435), 0);
+ check_store_range(mt, 2461, 2470, xa_mk_value(2461), 0);
+ mt_set_non_kernel(0);
+ MT_BUG_ON(mt, !mt_height(mt));
+ mtree_destroy(mt);
+
+ /* Test rebalance gaps */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ mt_set_non_kernel(50);
+ for (i = 0; i <= 50; i++) {
+ val = i*10;
+ val2 = (i+1)*10;
+ check_store_range(mt, val, val2, xa_mk_value(val), 0);
+ }
+ check_store_range(mt, 161, 161, xa_mk_value(161), 0);
+ check_store_range(mt, 162, 162, xa_mk_value(162), 0);
+ check_store_range(mt, 163, 163, xa_mk_value(163), 0);
+ check_store_range(mt, 240, 249, NULL, 0);
+ mtree_erase(mt, 200);
+ mtree_erase(mt, 210);
+ mtree_erase(mt, 220);
+ mtree_erase(mt, 230);
+ mt_set_non_kernel(0);
+ MT_BUG_ON(mt, !mt_height(mt));
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i <= 500; i++) {
+ val = i*10;
+ val2 = (i+1)*10;
+ check_store_range(mt, val, val2, xa_mk_value(val), 0);
+ }
+ check_store_range(mt, 4600, 4959, xa_mk_value(1), 0);
+ mt_validate(mt);
+ MT_BUG_ON(mt, !mt_height(mt));
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i <= 500; i++) {
+ val = i*10;
+ val2 = (i+1)*10;
+ check_store_range(mt, val, val2, xa_mk_value(val), 0);
+ }
+ check_store_range(mt, 4811, 4811, xa_mk_value(4811), 0);
+ check_store_range(mt, 4812, 4812, xa_mk_value(4812), 0);
+ check_store_range(mt, 4861, 4861, xa_mk_value(4861), 0);
+ check_store_range(mt, 4862, 4862, xa_mk_value(4862), 0);
+ check_store_range(mt, 4842, 4849, NULL, 0);
+ mt_validate(mt);
+ MT_BUG_ON(mt, !mt_height(mt));
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i <= 1300; i++) {
+ val = i*10;
+ val2 = (i+1)*10;
+ check_store_range(mt, val, val2, xa_mk_value(val), 0);
+ MT_BUG_ON(mt, mt_height(mt) >= 4);
+ }
+ /* Cause a 3 child split all the way up the tree. */
+ for (i = 5; i < 215; i += 10)
+ check_store_range(mt, 11450 + i, 11450 + i + 1, NULL, 0);
+ for (i = 5; i < 65; i += 10)
+ check_store_range(mt, 11770 + i, 11770 + i + 1, NULL, 0);
+
+ MT_BUG_ON(mt, mt_height(mt) >= 4);
+ for (i = 5; i < 45; i += 10)
+ check_store_range(mt, 11700 + i, 11700 + i + 1, NULL, 0);
+ if (!MAPLE_32BIT)
+ MT_BUG_ON(mt, mt_height(mt) < 4);
+ mtree_destroy(mt);
+
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i <= 1200; i++) {
+ val = i*10;
+ val2 = (i+1)*10;
+ check_store_range(mt, val, val2, xa_mk_value(val), 0);
+ MT_BUG_ON(mt, mt_height(mt) >= 4);
+ }
+ /* Fill parents and leaves before split. */
+ for (i = 5; i < 455; i += 10)
+ check_store_range(mt, 7800 + i, 7800 + i + 1, NULL, 0);
+
+ for (i = 1; i < 16; i++)
+ check_store_range(mt, 8185 + i, 8185 + i + 1,
+ xa_mk_value(8185+i), 0);
+ MT_BUG_ON(mt, mt_height(mt) >= 4);
+ /* triple split across multiple levels. */
+ check_store_range(mt, 8184, 8184, xa_mk_value(8184), 0);
+ if (!MAPLE_32BIT)
+ MT_BUG_ON(mt, mt_height(mt) != 4);
+}
+
+static noinline void check_next_entry(struct maple_tree *mt)
+{
+ void *entry = NULL;
+ unsigned long limit = 30, i = 0;
+ MA_STATE(mas, mt, i, i);
+
+ MT_BUG_ON(mt, !mtree_empty(mt));
+
+ check_seq(mt, limit, false);
+ rcu_read_lock();
+
+ /* Check the first one and get ma_state in the correct state. */
+ MT_BUG_ON(mt, mas_walk(&mas) != xa_mk_value(i++));
+ for ( ; i <= limit + 1; i++) {
+ entry = mas_next(&mas, limit);
+ if (i > limit)
+ MT_BUG_ON(mt, entry != NULL);
+ else
+ MT_BUG_ON(mt, xa_mk_value(i) != entry);
+ }
+ rcu_read_unlock();
+ mtree_destroy(mt);
+}
+
+static noinline void check_prev_entry(struct maple_tree *mt)
+{
+ unsigned long index = 16;
+ void *value;
+ int i;
+
+ MA_STATE(mas, mt, index, index);
+
+ MT_BUG_ON(mt, !mtree_empty(mt));
+ check_seq(mt, 30, false);
+
+ rcu_read_lock();
+ value = mas_find(&mas, ULONG_MAX);
+ MT_BUG_ON(mt, value != xa_mk_value(index));
+ value = mas_prev(&mas, 0);
+ MT_BUG_ON(mt, value != xa_mk_value(index - 1));
+ rcu_read_unlock();
+ mtree_destroy(mt);
+
+ /* Check limits on prev */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ mas_lock(&mas);
+ for (i = 0; i <= index; i++) {
+ mas_set_range(&mas, i*10, i*10+5);
+ mas_store_gfp(&mas, xa_mk_value(i), GFP_KERNEL);
+ }
+
+ mas_set(&mas, 20);
+ value = mas_walk(&mas);
+ MT_BUG_ON(mt, value != xa_mk_value(2));
+
+ value = mas_prev(&mas, 19);
+ MT_BUG_ON(mt, value != NULL);
+
+ mas_set(&mas, 80);
+ value = mas_walk(&mas);
+ MT_BUG_ON(mt, value != xa_mk_value(8));
+
+ value = mas_prev(&mas, 76);
+ MT_BUG_ON(mt, value != NULL);
+
+ mas_unlock(&mas);
+}
+
+static noinline void check_root_expand(struct maple_tree *mt)
+{
+ MA_STATE(mas, mt, 0, 0);
+ void *ptr;
+
+
+ mas_lock(&mas);
+ mas_set(&mas, 3);
+ ptr = mas_walk(&mas);
+ MT_BUG_ON(mt, ptr != NULL);
+ MT_BUG_ON(mt, mas.index != 0);
+ MT_BUG_ON(mt, mas.last != ULONG_MAX);
+
+ ptr = &check_prev_entry;
+ mas_set(&mas, 1);
+ mas_store_gfp(&mas, ptr, GFP_KERNEL);
+
+ mas_set(&mas, 0);
+ ptr = mas_walk(&mas);
+ MT_BUG_ON(mt, ptr != NULL);
+
+ mas_set(&mas, 1);
+ ptr = mas_walk(&mas);
+ MT_BUG_ON(mt, ptr != &check_prev_entry);
+
+ mas_set(&mas, 2);
+ ptr = mas_walk(&mas);
+ MT_BUG_ON(mt, ptr != NULL);
+ mas_unlock(&mas);
+ mtree_destroy(mt);
+
+
+ mt_init_flags(mt, 0);
+ mas_lock(&mas);
+
+ mas_set(&mas, 0);
+ ptr = &check_prev_entry;
+ mas_store_gfp(&mas, ptr, GFP_KERNEL);
+
+ mas_set(&mas, 5);
+ ptr = mas_walk(&mas);
+ MT_BUG_ON(mt, ptr != NULL);
+ MT_BUG_ON(mt, mas.index != 1);
+ MT_BUG_ON(mt, mas.last != ULONG_MAX);
+
+ mas_set_range(&mas, 0, 100);
+ ptr = mas_walk(&mas);
+ MT_BUG_ON(mt, ptr != &check_prev_entry);
+ MT_BUG_ON(mt, mas.last != 0);
+ mas_unlock(&mas);
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, 0);
+ mas_lock(&mas);
+
+ mas_set(&mas, 0);
+ ptr = (void *)((unsigned long) check_prev_entry | 1UL);
+ mas_store_gfp(&mas, ptr, GFP_KERNEL);
+ ptr = mas_next(&mas, ULONG_MAX);
+ MT_BUG_ON(mt, ptr != NULL);
+ MT_BUG_ON(mt, (mas.index != 1) && (mas.last != ULONG_MAX));
+
+ mas_set(&mas, 1);
+ ptr = mas_prev(&mas, 0);
+ MT_BUG_ON(mt, (mas.index != 0) && (mas.last != 0));
+ MT_BUG_ON(mt, ptr != (void *)((unsigned long) check_prev_entry | 1UL));
+
+ mas_unlock(&mas);
+
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, 0);
+ mas_lock(&mas);
+ mas_set(&mas, 0);
+ ptr = (void *)((unsigned long) check_prev_entry | 2UL);
+ mas_store_gfp(&mas, ptr, GFP_KERNEL);
+ ptr = mas_next(&mas, ULONG_MAX);
+ MT_BUG_ON(mt, ptr != NULL);
+ MT_BUG_ON(mt, (mas.index != 1) && (mas.last != ULONG_MAX));
+
+ mas_set(&mas, 1);
+ ptr = mas_prev(&mas, 0);
+ MT_BUG_ON(mt, (mas.index != 0) && (mas.last != 0));
+ MT_BUG_ON(mt, ptr != (void *)((unsigned long) check_prev_entry | 2UL));
+
+
+ mas_unlock(&mas);
+}
+
+static noinline void check_gap_combining(struct maple_tree *mt)
+{
+ struct maple_enode *mn1, *mn2;
+ void *entry;
+ unsigned long singletons = 100;
+ unsigned long *seq100;
+ unsigned long seq100_64[] = {
+ /* 0-5 */
+ 74, 75, 76,
+ 50, 100, 2,
+
+ /* 6-12 */
+ 44, 45, 46, 43,
+ 20, 50, 3,
+
+ /* 13-20*/
+ 80, 81, 82,
+ 76, 2, 79, 85, 4,
+ };
+
+ unsigned long seq100_32[] = {
+ /* 0-5 */
+ 61, 62, 63,
+ 50, 100, 2,
+
+ /* 6-12 */
+ 31, 32, 33, 30,
+ 20, 50, 3,
+
+ /* 13-20*/
+ 80, 81, 82,
+ 76, 2, 79, 85, 4,
+ };
+
+ unsigned long seq2000[] = {
+ 1152, 1151,
+ 1100, 1200, 2,
+ };
+ unsigned long seq400[] = {
+ 286, 318,
+ 256, 260, 266, 270, 275, 280, 290, 398,
+ 286, 310,
+ };
+
+ unsigned long index;
+
+ MA_STATE(mas, mt, 0, 0);
+
+ if (MAPLE_32BIT)
+ seq100 = seq100_32;
+ else
+ seq100 = seq100_64;
+
+ index = seq100[0];
+ mas_set(&mas, index);
+ MT_BUG_ON(mt, !mtree_empty(mt));
+ check_seq(mt, singletons, false); /* create 100 singletons. */
+
+ mt_set_non_kernel(1);
+ mtree_test_erase(mt, seq100[2]);
+ check_load(mt, seq100[2], NULL);
+ mtree_test_erase(mt, seq100[1]);
+ check_load(mt, seq100[1], NULL);
+
+ rcu_read_lock();
+ entry = mas_find(&mas, ULONG_MAX);
+ MT_BUG_ON(mt, entry != xa_mk_value(index));
+ mn1 = mas.node;
+ mas_next(&mas, ULONG_MAX);
+ entry = mas_next(&mas, ULONG_MAX);
+ MT_BUG_ON(mt, entry != xa_mk_value(index + 4));
+ mn2 = mas.node;
+ MT_BUG_ON(mt, mn1 == mn2); /* test the test. */
+
+ /*
+ * At this point, there is a gap of 2 at index + 1 between seq100[3] and
+ * seq100[4]. Search for the gap.
+ */
+ mt_set_non_kernel(1);
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, seq100[3], seq100[4],
+ seq100[5]));
+ MT_BUG_ON(mt, mas.index != index + 1);
+ rcu_read_unlock();
+
+ mtree_test_erase(mt, seq100[6]);
+ check_load(mt, seq100[6], NULL);
+ mtree_test_erase(mt, seq100[7]);
+ check_load(mt, seq100[7], NULL);
+ mtree_test_erase(mt, seq100[8]);
+ index = seq100[9];
+
+ rcu_read_lock();
+ mas.index = index;
+ mas.last = index;
+ mas_reset(&mas);
+ entry = mas_find(&mas, ULONG_MAX);
+ MT_BUG_ON(mt, entry != xa_mk_value(index));
+ mn1 = mas.node;
+ entry = mas_next(&mas, ULONG_MAX);
+ MT_BUG_ON(mt, entry != xa_mk_value(index + 4));
+ mas_next(&mas, ULONG_MAX); /* go to the next entry. */
+ mn2 = mas.node;
+ MT_BUG_ON(mt, mn1 == mn2); /* test the next entry is in the next node. */
+
+ /*
+ * At this point, there is a gap of 3 at seq100[6]. Find it by
+ * searching 20 - 50 for size 3.
+ */
+ mas_reset(&mas);
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, seq100[10], seq100[11],
+ seq100[12]));
+ MT_BUG_ON(mt, mas.index != seq100[6]);
+ rcu_read_unlock();
+
+ mt_set_non_kernel(1);
+ mtree_store(mt, seq100[13], NULL, GFP_KERNEL);
+ check_load(mt, seq100[13], NULL);
+ check_load(mt, seq100[14], xa_mk_value(seq100[14]));
+ mtree_store(mt, seq100[14], NULL, GFP_KERNEL);
+ check_load(mt, seq100[13], NULL);
+ check_load(mt, seq100[14], NULL);
+
+ mas_reset(&mas);
+ rcu_read_lock();
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, seq100[16], seq100[15],
+ seq100[17]));
+ MT_BUG_ON(mt, mas.index != seq100[13]);
+ mt_validate(mt);
+ rcu_read_unlock();
+
+ /*
+ * *DEPRECATED: no retries anymore* Test retry entry in the start of a
+ * gap.
+ */
+ mt_set_non_kernel(2);
+ mtree_test_store_range(mt, seq100[18], seq100[14], NULL);
+ mtree_test_erase(mt, seq100[15]);
+ mas_reset(&mas);
+ rcu_read_lock();
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, seq100[16], seq100[19],
+ seq100[20]));
+ rcu_read_unlock();
+ MT_BUG_ON(mt, mas.index != seq100[18]);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ /* seq 2000 tests are for multi-level tree gaps */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_seq(mt, 2000, false);
+ mt_set_non_kernel(1);
+ mtree_test_erase(mt, seq2000[0]);
+ mtree_test_erase(mt, seq2000[1]);
+
+ mt_set_non_kernel(2);
+ mas_reset(&mas);
+ rcu_read_lock();
+ MT_BUG_ON(mt, mas_empty_area_rev(&mas, seq2000[2], seq2000[3],
+ seq2000[4]));
+ MT_BUG_ON(mt, mas.index != seq2000[1]);
+ rcu_read_unlock();
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ /* seq 400 tests rebalancing over two levels. */
+ mt_set_non_kernel(99);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_seq(mt, 400, false);
+ mtree_test_store_range(mt, seq400[0], seq400[1], NULL);
+ mt_set_non_kernel(0);
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_seq(mt, 400, false);
+ mt_set_non_kernel(50);
+ mtree_test_store_range(mt, seq400[2], seq400[9],
+ xa_mk_value(seq400[2]));
+ mtree_test_store_range(mt, seq400[3], seq400[9],
+ xa_mk_value(seq400[3]));
+ mtree_test_store_range(mt, seq400[4], seq400[9],
+ xa_mk_value(seq400[4]));
+ mtree_test_store_range(mt, seq400[5], seq400[9],
+ xa_mk_value(seq400[5]));
+ mtree_test_store_range(mt, seq400[0], seq400[9],
+ xa_mk_value(seq400[0]));
+ mtree_test_store_range(mt, seq400[6], seq400[9],
+ xa_mk_value(seq400[6]));
+ mtree_test_store_range(mt, seq400[7], seq400[9],
+ xa_mk_value(seq400[7]));
+ mtree_test_store_range(mt, seq400[8], seq400[9],
+ xa_mk_value(seq400[8]));
+ mtree_test_store_range(mt, seq400[10], seq400[11],
+ xa_mk_value(seq400[10]));
+ mt_validate(mt);
+ mt_set_non_kernel(0);
+ mtree_destroy(mt);
+}
+static noinline void check_node_overwrite(struct maple_tree *mt)
+{
+ int i, max = 4000;
+
+ for (i = 0; i < max; i++)
+ mtree_test_store_range(mt, i*100, i*100 + 50, xa_mk_value(i*100));
+
+ mtree_test_store_range(mt, 319951, 367950, NULL);
+ /*mt_dump(mt); */
+ mt_validate(mt);
+}
+
+#if defined(BENCH_SLOT_STORE)
+static noinline void bench_slot_store(struct maple_tree *mt)
+{
+ int i, brk = 105, max = 1040, brk_start = 100, count = 20000000;
+
+ for (i = 0; i < max; i += 10)
+ mtree_store_range(mt, i, i + 5, xa_mk_value(i), GFP_KERNEL);
+
+ for (i = 0; i < count; i++) {
+ mtree_store_range(mt, brk, brk, NULL, GFP_KERNEL);
+ mtree_store_range(mt, brk_start, brk, xa_mk_value(brk),
+ GFP_KERNEL);
+ }
+}
+#endif
+
+#if defined(BENCH_NODE_STORE)
+static noinline void bench_node_store(struct maple_tree *mt)
+{
+ int i, overwrite = 76, max = 240, count = 20000000;
+
+ for (i = 0; i < max; i += 10)
+ mtree_store_range(mt, i, i + 5, xa_mk_value(i), GFP_KERNEL);
+
+ for (i = 0; i < count; i++) {
+ mtree_store_range(mt, overwrite, overwrite + 15,
+ xa_mk_value(overwrite), GFP_KERNEL);
+
+ overwrite += 5;
+ if (overwrite >= 135)
+ overwrite = 76;
+ }
+}
+#endif
+
+#if defined(BENCH_AWALK)
+static noinline void bench_awalk(struct maple_tree *mt)
+{
+ int i, max = 2500, count = 50000000;
+ MA_STATE(mas, mt, 1470, 1470);
+
+ for (i = 0; i < max; i += 10)
+ mtree_store_range(mt, i, i + 5, xa_mk_value(i), GFP_KERNEL);
+
+ mtree_store_range(mt, 1470, 1475, NULL, GFP_KERNEL);
+
+ for (i = 0; i < count; i++) {
+ mas_empty_area_rev(&mas, 0, 2000, 10);
+ mas_reset(&mas);
+ }
+}
+#endif
+#if defined(BENCH_WALK)
+static noinline void bench_walk(struct maple_tree *mt)
+{
+ int i, max = 2500, count = 550000000;
+ MA_STATE(mas, mt, 1470, 1470);
+
+ for (i = 0; i < max; i += 10)
+ mtree_store_range(mt, i, i + 5, xa_mk_value(i), GFP_KERNEL);
+
+ for (i = 0; i < count; i++) {
+ mas_walk(&mas);
+ mas_reset(&mas);
+ }
+
+}
+#endif
+
+#if defined(BENCH_MT_FOR_EACH)
+static noinline void bench_mt_for_each(struct maple_tree *mt)
+{
+ int i, count = 1000000;
+ unsigned long max = 2500, index = 0;
+ void *entry;
+
+ for (i = 0; i < max; i += 5)
+ mtree_store_range(mt, i, i + 4, xa_mk_value(i), GFP_KERNEL);
+
+ for (i = 0; i < count; i++) {
+ unsigned long j = 0;
+
+ mt_for_each(mt, entry, index, max) {
+ MT_BUG_ON(mt, entry != xa_mk_value(j));
+ j += 5;
+ }
+
+ index = 0;
+ }
+
+}
+#endif
+
+/* check_forking - simulate the kernel forking sequence with the tree. */
+static noinline void check_forking(struct maple_tree *mt)
+{
+
+ struct maple_tree newmt;
+ int i, nr_entries = 134;
+ void *val;
+ MA_STATE(mas, mt, 0, 0);
+ MA_STATE(newmas, mt, 0, 0);
+
+ for (i = 0; i <= nr_entries; i++)
+ mtree_store_range(mt, i*10, i*10 + 5,
+ xa_mk_value(i), GFP_KERNEL);
+
+ mt_set_non_kernel(99999);
+ mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE);
+ newmas.tree = &newmt;
+ mas_reset(&newmas);
+ mas_reset(&mas);
+ mas_lock(&newmas);
+ mas.index = 0;
+ mas.last = 0;
+ if (mas_expected_entries(&newmas, nr_entries)) {
+ pr_err("OOM!");
+ BUG_ON(1);
+ }
+ rcu_read_lock();
+ mas_for_each(&mas, val, ULONG_MAX) {
+ newmas.index = mas.index;
+ newmas.last = mas.last;
+ mas_store(&newmas, val);
+ }
+ rcu_read_unlock();
+ mas_destroy(&newmas);
+ mas_unlock(&newmas);
+ mt_validate(&newmt);
+ mt_set_non_kernel(0);
+ mtree_destroy(&newmt);
+}
+
+static noinline void check_mas_store_gfp(struct maple_tree *mt)
+{
+
+ struct maple_tree newmt;
+ int i, nr_entries = 135;
+ void *val;
+ MA_STATE(mas, mt, 0, 0);
+ MA_STATE(newmas, mt, 0, 0);
+
+ for (i = 0; i <= nr_entries; i++)
+ mtree_store_range(mt, i*10, i*10 + 5,
+ xa_mk_value(i), GFP_KERNEL);
+
+ mt_set_non_kernel(99999);
+ mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE);
+ newmas.tree = &newmt;
+ rcu_read_lock();
+ mas_lock(&newmas);
+ mas_reset(&newmas);
+ mas_set(&mas, 0);
+ mas_for_each(&mas, val, ULONG_MAX) {
+ newmas.index = mas.index;
+ newmas.last = mas.last;
+ mas_store_gfp(&newmas, val, GFP_KERNEL);
+ }
+ mas_unlock(&newmas);
+ rcu_read_unlock();
+ mt_validate(&newmt);
+ mt_set_non_kernel(0);
+ mtree_destroy(&newmt);
+}
+
+#if defined(BENCH_FORK)
+static noinline void bench_forking(struct maple_tree *mt)
+{
+
+ struct maple_tree newmt;
+ int i, nr_entries = 134, nr_fork = 80000;
+ void *val;
+ MA_STATE(mas, mt, 0, 0);
+ MA_STATE(newmas, mt, 0, 0);
+
+ for (i = 0; i <= nr_entries; i++)
+ mtree_store_range(mt, i*10, i*10 + 5,
+ xa_mk_value(i), GFP_KERNEL);
+
+ for (i = 0; i < nr_fork; i++) {
+ mt_set_non_kernel(99999);
+ mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE);
+ newmas.tree = &newmt;
+ mas_reset(&newmas);
+ mas_reset(&mas);
+ mas.index = 0;
+ mas.last = 0;
+ rcu_read_lock();
+ mas_lock(&newmas);
+ if (mas_expected_entries(&newmas, nr_entries)) {
+ printk("OOM!");
+ BUG_ON(1);
+ }
+ mas_for_each(&mas, val, ULONG_MAX) {
+ newmas.index = mas.index;
+ newmas.last = mas.last;
+ mas_store(&newmas, val);
+ }
+ mas_destroy(&newmas);
+ mas_unlock(&newmas);
+ rcu_read_unlock();
+ mt_validate(&newmt);
+ mt_set_non_kernel(0);
+ mtree_destroy(&newmt);
+ }
+}
+#endif
+
+static noinline void next_prev_test(struct maple_tree *mt)
+{
+ int i, nr_entries;
+ void *val;
+ MA_STATE(mas, mt, 0, 0);
+ struct maple_enode *mn;
+ unsigned long *level2;
+ unsigned long level2_64[] = {707, 1000, 710, 715, 720, 725};
+ unsigned long level2_32[] = {1747, 2000, 1750, 1755, 1760, 1765};
+
+ if (MAPLE_32BIT) {
+ nr_entries = 500;
+ level2 = level2_32;
+ } else {
+ nr_entries = 200;
+ level2 = level2_64;
+ }
+
+ for (i = 0; i <= nr_entries; i++)
+ mtree_store_range(mt, i*10, i*10 + 5,
+ xa_mk_value(i), GFP_KERNEL);
+
+ mas_lock(&mas);
+ for (i = 0; i <= nr_entries / 2; i++) {
+ mas_next(&mas, 1000);
+ if (mas_is_none(&mas))
+ break;
+
+ }
+ mas_reset(&mas);
+ mas_set(&mas, 0);
+ i = 0;
+ mas_for_each(&mas, val, 1000) {
+ i++;
+ }
+
+ mas_reset(&mas);
+ mas_set(&mas, 0);
+ i = 0;
+ mas_for_each(&mas, val, 1000) {
+ mas_pause(&mas);
+ i++;
+ }
+
+ /*
+ * 680 - 685 = 0x61a00001930c
+ * 686 - 689 = NULL;
+ * 690 - 695 = 0x61a00001930c
+ * Check simple next/prev
+ */
+ mas_set(&mas, 686);
+ val = mas_walk(&mas);
+ MT_BUG_ON(mt, val != NULL);
+
+ val = mas_next(&mas, 1000);
+ MT_BUG_ON(mt, val != xa_mk_value(690 / 10));
+ MT_BUG_ON(mt, mas.index != 690);
+ MT_BUG_ON(mt, mas.last != 695);
+
+ val = mas_prev(&mas, 0);
+ MT_BUG_ON(mt, val != xa_mk_value(680 / 10));
+ MT_BUG_ON(mt, mas.index != 680);
+ MT_BUG_ON(mt, mas.last != 685);
+
+ val = mas_next(&mas, 1000);
+ MT_BUG_ON(mt, val != xa_mk_value(690 / 10));
+ MT_BUG_ON(mt, mas.index != 690);
+ MT_BUG_ON(mt, mas.last != 695);
+
+ val = mas_next(&mas, 1000);
+ MT_BUG_ON(mt, val != xa_mk_value(700 / 10));
+ MT_BUG_ON(mt, mas.index != 700);
+ MT_BUG_ON(mt, mas.last != 705);
+
+ /* Check across node boundaries of the tree */
+ mas_set(&mas, 70);
+ val = mas_walk(&mas);
+ MT_BUG_ON(mt, val != xa_mk_value(70 / 10));
+ MT_BUG_ON(mt, mas.index != 70);
+ MT_BUG_ON(mt, mas.last != 75);
+
+ val = mas_next(&mas, 1000);
+ MT_BUG_ON(mt, val != xa_mk_value(80 / 10));
+ MT_BUG_ON(mt, mas.index != 80);
+ MT_BUG_ON(mt, mas.last != 85);
+
+ val = mas_prev(&mas, 70);
+ MT_BUG_ON(mt, val != xa_mk_value(70 / 10));
+ MT_BUG_ON(mt, mas.index != 70);
+ MT_BUG_ON(mt, mas.last != 75);
+
+ /* Check across two levels of the tree */
+ mas_reset(&mas);
+ mas_set(&mas, level2[0]);
+ val = mas_walk(&mas);
+ MT_BUG_ON(mt, val != NULL);
+ val = mas_next(&mas, level2[1]);
+ MT_BUG_ON(mt, val != xa_mk_value(level2[2] / 10));
+ MT_BUG_ON(mt, mas.index != level2[2]);
+ MT_BUG_ON(mt, mas.last != level2[3]);
+ mn = mas.node;
+
+ val = mas_next(&mas, level2[1]);
+ MT_BUG_ON(mt, val != xa_mk_value(level2[4] / 10));
+ MT_BUG_ON(mt, mas.index != level2[4]);
+ MT_BUG_ON(mt, mas.last != level2[5]);
+ MT_BUG_ON(mt, mn == mas.node);
+
+ val = mas_prev(&mas, 0);
+ MT_BUG_ON(mt, val != xa_mk_value(level2[2] / 10));
+ MT_BUG_ON(mt, mas.index != level2[2]);
+ MT_BUG_ON(mt, mas.last != level2[3]);
+
+ /* Check running off the end and back on */
+ mas_set(&mas, nr_entries * 10);
+ val = mas_walk(&mas);
+ MT_BUG_ON(mt, val != xa_mk_value(nr_entries));
+ MT_BUG_ON(mt, mas.index != (nr_entries * 10));
+ MT_BUG_ON(mt, mas.last != (nr_entries * 10 + 5));
+
+ val = mas_next(&mas, ULONG_MAX);
+ MT_BUG_ON(mt, val != NULL);
+ MT_BUG_ON(mt, mas.index != ULONG_MAX);
+ MT_BUG_ON(mt, mas.last != ULONG_MAX);
+
+ val = mas_prev(&mas, 0);
+ MT_BUG_ON(mt, val != xa_mk_value(nr_entries));
+ MT_BUG_ON(mt, mas.index != (nr_entries * 10));
+ MT_BUG_ON(mt, mas.last != (nr_entries * 10 + 5));
+
+ /* Check running off the start and back on */
+ mas_reset(&mas);
+ mas_set(&mas, 10);
+ val = mas_walk(&mas);
+ MT_BUG_ON(mt, val != xa_mk_value(1));
+ MT_BUG_ON(mt, mas.index != 10);
+ MT_BUG_ON(mt, mas.last != 15);
+
+ val = mas_prev(&mas, 0);
+ MT_BUG_ON(mt, val != xa_mk_value(0));
+ MT_BUG_ON(mt, mas.index != 0);
+ MT_BUG_ON(mt, mas.last != 5);
+
+ val = mas_prev(&mas, 0);
+ MT_BUG_ON(mt, val != NULL);
+ MT_BUG_ON(mt, mas.index != 0);
+ MT_BUG_ON(mt, mas.last != 0);
+
+ mas.index = 0;
+ mas.last = 5;
+ mas_store(&mas, NULL);
+ mas_reset(&mas);
+ mas_set(&mas, 10);
+ mas_walk(&mas);
+
+ val = mas_prev(&mas, 0);
+ MT_BUG_ON(mt, val != NULL);
+ MT_BUG_ON(mt, mas.index != 0);
+ MT_BUG_ON(mt, mas.last != 0);
+ mas_unlock(&mas);
+
+ mtree_destroy(mt);
+
+ mt_init(mt);
+ mtree_store_range(mt, 0, 0, xa_mk_value(0), GFP_KERNEL);
+ mtree_store_range(mt, 5, 5, xa_mk_value(5), GFP_KERNEL);
+ rcu_read_lock();
+ mas_set(&mas, 5);
+ val = mas_prev(&mas, 4);
+ MT_BUG_ON(mt, val != NULL);
+ rcu_read_unlock();
+}
+
+
+
+/* Test spanning writes that require balancing right sibling or right cousin */
+static noinline void check_spanning_relatives(struct maple_tree *mt)
+{
+
+ unsigned long i, nr_entries = 1000;
+
+ for (i = 0; i <= nr_entries; i++)
+ mtree_store_range(mt, i*10, i*10 + 5,
+ xa_mk_value(i), GFP_KERNEL);
+
+
+ mtree_store_range(mt, 9365, 9955, NULL, GFP_KERNEL);
+}
+
+static noinline void check_fuzzer(struct maple_tree *mt)
+{
+ /*
+ * 1. Causes a spanning rebalance of a single root node.
+ * Fixed by setting the correct limit in mast_cp_to_nodes() when the
+ * entire right side is consumed.
+ */
+ mtree_test_insert(mt, 88, (void *)0xb1);
+ mtree_test_insert(mt, 84, (void *)0xa9);
+ mtree_test_insert(mt, 2, (void *)0x5);
+ mtree_test_insert(mt, 4, (void *)0x9);
+ mtree_test_insert(mt, 14, (void *)0x1d);
+ mtree_test_insert(mt, 7, (void *)0xf);
+ mtree_test_insert(mt, 12, (void *)0x19);
+ mtree_test_insert(mt, 18, (void *)0x25);
+ mtree_test_store_range(mt, 8, 18, (void *)0x11);
+ mtree_destroy(mt);
+
+
+ /*
+ * 2. Cause a spanning rebalance of two nodes in root.
+ * Fixed by setting mast->r->max correctly.
+ */
+ mt_init_flags(mt, 0);
+ mtree_test_store(mt, 87, (void *)0xaf);
+ mtree_test_store(mt, 0, (void *)0x1);
+ mtree_test_load(mt, 4);
+ mtree_test_insert(mt, 4, (void *)0x9);
+ mtree_test_store(mt, 8, (void *)0x11);
+ mtree_test_store(mt, 44, (void *)0x59);
+ mtree_test_store(mt, 68, (void *)0x89);
+ mtree_test_store(mt, 2, (void *)0x5);
+ mtree_test_insert(mt, 43, (void *)0x57);
+ mtree_test_insert(mt, 24, (void *)0x31);
+ mtree_test_insert(mt, 844, (void *)0x699);
+ mtree_test_store(mt, 84, (void *)0xa9);
+ mtree_test_store(mt, 4, (void *)0x9);
+ mtree_test_erase(mt, 4);
+ mtree_test_load(mt, 5);
+ mtree_test_erase(mt, 0);
+ mtree_destroy(mt);
+
+ /*
+ * 3. Cause a node overflow on copy
+ * Fixed by using the correct check for node size in mas_wr_modify()
+ * Also discovered issue with metadata setting.
+ */
+ mt_init_flags(mt, 0);
+ mtree_test_store_range(mt, 0, ULONG_MAX, (void *)0x1);
+ mtree_test_store(mt, 4, (void *)0x9);
+ mtree_test_erase(mt, 5);
+ mtree_test_erase(mt, 0);
+ mtree_test_erase(mt, 4);
+ mtree_test_store(mt, 5, (void *)0xb);
+ mtree_test_erase(mt, 5);
+ mtree_test_store(mt, 5, (void *)0xb);
+ mtree_test_erase(mt, 5);
+ mtree_test_erase(mt, 4);
+ mtree_test_store(mt, 4, (void *)0x9);
+ mtree_test_store(mt, 444, (void *)0x379);
+ mtree_test_store(mt, 0, (void *)0x1);
+ mtree_test_load(mt, 0);
+ mtree_test_store(mt, 5, (void *)0xb);
+ mtree_test_erase(mt, 0);
+ mtree_destroy(mt);
+
+ /*
+ * 4. spanning store failure due to writing incorrect pivot value at
+ * last slot.
+ * Fixed by setting mast->r->max correctly in mast_cp_to_nodes()
+ *
+ */
+ mt_init_flags(mt, 0);
+ mtree_test_insert(mt, 261, (void *)0x20b);
+ mtree_test_store(mt, 516, (void *)0x409);
+ mtree_test_store(mt, 6, (void *)0xd);
+ mtree_test_insert(mt, 5, (void *)0xb);
+ mtree_test_insert(mt, 1256, (void *)0x9d1);
+ mtree_test_store(mt, 4, (void *)0x9);
+ mtree_test_erase(mt, 1);
+ mtree_test_store(mt, 56, (void *)0x71);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_store(mt, 24, (void *)0x31);
+ mtree_test_erase(mt, 1);
+ mtree_test_insert(mt, 2263, (void *)0x11af);
+ mtree_test_insert(mt, 446, (void *)0x37d);
+ mtree_test_store_range(mt, 6, 45, (void *)0xd);
+ mtree_test_store_range(mt, 3, 446, (void *)0x7);
+ mtree_destroy(mt);
+
+ /*
+ * 5. mas_wr_extend_null() may overflow slots.
+ * Fix by checking against wr_mas->node_end.
+ */
+ mt_init_flags(mt, 0);
+ mtree_test_store(mt, 48, (void *)0x61);
+ mtree_test_store(mt, 3, (void *)0x7);
+ mtree_test_load(mt, 0);
+ mtree_test_store(mt, 88, (void *)0xb1);
+ mtree_test_store(mt, 81, (void *)0xa3);
+ mtree_test_insert(mt, 0, (void *)0x1);
+ mtree_test_insert(mt, 8, (void *)0x11);
+ mtree_test_insert(mt, 4, (void *)0x9);
+ mtree_test_insert(mt, 2480, (void *)0x1361);
+ mtree_test_insert(mt, ULONG_MAX,
+ (void *)0xffffffffffffffff);
+ mtree_test_erase(mt, ULONG_MAX);
+ mtree_destroy(mt);
+
+ /*
+ * 6. When reusing a node with an implied pivot and the node is
+ * shrinking, old data would be left in the implied slot
+ * Fixed by checking the last pivot for the mas->max and clear
+ * accordingly. This only affected the left-most node as that node is
+ * the only one allowed to end in NULL.
+ */
+ mt_init_flags(mt, 0);
+ mtree_test_erase(mt, 3);
+ mtree_test_insert(mt, 22, (void *)0x2d);
+ mtree_test_insert(mt, 15, (void *)0x1f);
+ mtree_test_load(mt, 2);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_insert(mt, 5, (void *)0xb);
+ mtree_test_erase(mt, 1);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_insert(mt, 4, (void *)0x9);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_erase(mt, 1);
+ mtree_test_insert(mt, 2, (void *)0x5);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_erase(mt, 3);
+ mtree_test_insert(mt, 22, (void *)0x2d);
+ mtree_test_insert(mt, 15, (void *)0x1f);
+ mtree_test_insert(mt, 2, (void *)0x5);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_insert(mt, 8, (void *)0x11);
+ mtree_test_load(mt, 2);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_store(mt, 1, (void *)0x3);
+ mtree_test_insert(mt, 5, (void *)0xb);
+ mtree_test_erase(mt, 1);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_insert(mt, 4, (void *)0x9);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_erase(mt, 1);
+ mtree_test_insert(mt, 2, (void *)0x5);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_erase(mt, 3);
+ mtree_test_insert(mt, 22, (void *)0x2d);
+ mtree_test_insert(mt, 15, (void *)0x1f);
+ mtree_test_insert(mt, 2, (void *)0x5);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_insert(mt, 8, (void *)0x11);
+ mtree_test_insert(mt, 12, (void *)0x19);
+ mtree_test_erase(mt, 1);
+ mtree_test_store_range(mt, 4, 62, (void *)0x9);
+ mtree_test_erase(mt, 62);
+ mtree_test_store_range(mt, 1, 0, (void *)0x3);
+ mtree_test_insert(mt, 11, (void *)0x17);
+ mtree_test_insert(mt, 3, (void *)0x7);
+ mtree_test_insert(mt, 3, (void *)0x7);
+ mtree_test_store(mt, 62, (void *)0x7d);
+ mtree_test_erase(mt, 62);
+ mtree_test_store_range(mt, 1, 15, (void *)0x3);
+ mtree_test_erase(mt, 1);
+ mtree_test_insert(mt, 22, (void *)0x2d);
+ mtree_test_insert(mt, 12, (void *)0x19);
+ mtree_test_erase(mt, 1);
+ mtree_test_insert(mt, 3, (void *)0x7);
+ mtree_test_store(mt, 62, (void *)0x7d);
+ mtree_test_erase(mt, 62);
+ mtree_test_insert(mt, 122, (void *)0xf5);
+ mtree_test_store(mt, 3, (void *)0x7);
+ mtree_test_insert(mt, 0, (void *)0x1);
+ mtree_test_store_range(mt, 0, 1, (void *)0x1);
+ mtree_test_insert(mt, 85, (void *)0xab);
+ mtree_test_insert(mt, 72, (void *)0x91);
+ mtree_test_insert(mt, 81, (void *)0xa3);
+ mtree_test_insert(mt, 726, (void *)0x5ad);
+ mtree_test_insert(mt, 0, (void *)0x1);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_store(mt, 51, (void *)0x67);
+ mtree_test_insert(mt, 611, (void *)0x4c7);
+ mtree_test_insert(mt, 485, (void *)0x3cb);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_erase(mt, 1);
+ mtree_test_insert(mt, 0, (void *)0x1);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_insert_range(mt, 26, 1, (void *)0x35);
+ mtree_test_load(mt, 1);
+ mtree_test_store_range(mt, 1, 22, (void *)0x3);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_erase(mt, 1);
+ mtree_test_load(mt, 53);
+ mtree_test_load(mt, 1);
+ mtree_test_store_range(mt, 1, 1, (void *)0x3);
+ mtree_test_insert(mt, 222, (void *)0x1bd);
+ mtree_test_insert(mt, 485, (void *)0x3cb);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_erase(mt, 1);
+ mtree_test_load(mt, 0);
+ mtree_test_insert(mt, 21, (void *)0x2b);
+ mtree_test_insert(mt, 3, (void *)0x7);
+ mtree_test_store(mt, 621, (void *)0x4db);
+ mtree_test_insert(mt, 0, (void *)0x1);
+ mtree_test_erase(mt, 5);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_store(mt, 62, (void *)0x7d);
+ mtree_test_erase(mt, 62);
+ mtree_test_store_range(mt, 1, 0, (void *)0x3);
+ mtree_test_insert(mt, 22, (void *)0x2d);
+ mtree_test_insert(mt, 12, (void *)0x19);
+ mtree_test_erase(mt, 1);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_store_range(mt, 4, 62, (void *)0x9);
+ mtree_test_erase(mt, 62);
+ mtree_test_erase(mt, 1);
+ mtree_test_load(mt, 1);
+ mtree_test_store_range(mt, 1, 22, (void *)0x3);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_erase(mt, 1);
+ mtree_test_load(mt, 53);
+ mtree_test_load(mt, 1);
+ mtree_test_store_range(mt, 1, 1, (void *)0x3);
+ mtree_test_insert(mt, 222, (void *)0x1bd);
+ mtree_test_insert(mt, 485, (void *)0x3cb);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_erase(mt, 1);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_load(mt, 0);
+ mtree_test_load(mt, 0);
+ mtree_destroy(mt);
+
+ /*
+ * 7. Previous fix was incomplete, fix mas_resuse_node() clearing of old
+ * data by overwriting it first - that way metadata is of no concern.
+ */
+ mt_init_flags(mt, 0);
+ mtree_test_load(mt, 1);
+ mtree_test_insert(mt, 102, (void *)0xcd);
+ mtree_test_erase(mt, 2);
+ mtree_test_erase(mt, 0);
+ mtree_test_load(mt, 0);
+ mtree_test_insert(mt, 4, (void *)0x9);
+ mtree_test_insert(mt, 2, (void *)0x5);
+ mtree_test_insert(mt, 110, (void *)0xdd);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_insert_range(mt, 5, 0, (void *)0xb);
+ mtree_test_erase(mt, 2);
+ mtree_test_store(mt, 0, (void *)0x1);
+ mtree_test_store(mt, 112, (void *)0xe1);
+ mtree_test_insert(mt, 21, (void *)0x2b);
+ mtree_test_store(mt, 1, (void *)0x3);
+ mtree_test_insert_range(mt, 110, 2, (void *)0xdd);
+ mtree_test_store(mt, 2, (void *)0x5);
+ mtree_test_load(mt, 22);
+ mtree_test_erase(mt, 2);
+ mtree_test_store(mt, 210, (void *)0x1a5);
+ mtree_test_store_range(mt, 0, 2, (void *)0x1);
+ mtree_test_store(mt, 2, (void *)0x5);
+ mtree_test_erase(mt, 2);
+ mtree_test_erase(mt, 22);
+ mtree_test_erase(mt, 1);
+ mtree_test_erase(mt, 2);
+ mtree_test_store(mt, 0, (void *)0x1);
+ mtree_test_load(mt, 112);
+ mtree_test_insert(mt, 2, (void *)0x5);
+ mtree_test_erase(mt, 2);
+ mtree_test_store(mt, 1, (void *)0x3);
+ mtree_test_insert_range(mt, 1, 2, (void *)0x3);
+ mtree_test_erase(mt, 0);
+ mtree_test_erase(mt, 2);
+ mtree_test_store(mt, 2, (void *)0x5);
+ mtree_test_erase(mt, 0);
+ mtree_test_erase(mt, 2);
+ mtree_test_store(mt, 0, (void *)0x1);
+ mtree_test_store(mt, 0, (void *)0x1);
+ mtree_test_erase(mt, 2);
+ mtree_test_store(mt, 2, (void *)0x5);
+ mtree_test_erase(mt, 2);
+ mtree_test_insert(mt, 2, (void *)0x5);
+ mtree_test_insert_range(mt, 1, 2, (void *)0x3);
+ mtree_test_erase(mt, 0);
+ mtree_test_erase(mt, 2);
+ mtree_test_store(mt, 0, (void *)0x1);
+ mtree_test_load(mt, 112);
+ mtree_test_store_range(mt, 110, 12, (void *)0xdd);
+ mtree_test_store(mt, 2, (void *)0x5);
+ mtree_test_load(mt, 110);
+ mtree_test_insert_range(mt, 4, 71, (void *)0x9);
+ mtree_test_load(mt, 2);
+ mtree_test_store(mt, 2, (void *)0x5);
+ mtree_test_insert_range(mt, 11, 22, (void *)0x17);
+ mtree_test_erase(mt, 12);
+ mtree_test_store(mt, 2, (void *)0x5);
+ mtree_test_load(mt, 22);
+ mtree_destroy(mt);
+
+
+ /*
+ * 8. When rebalancing or spanning_rebalance(), the max of the new node
+ * may be set incorrectly to the final pivot and not the right max.
+ * Fix by setting the left max to orig right max if the entire node is
+ * consumed.
+ */
+ mt_init_flags(mt, 0);
+ mtree_test_store(mt, 6, (void *)0xd);
+ mtree_test_store(mt, 67, (void *)0x87);
+ mtree_test_insert(mt, 15, (void *)0x1f);
+ mtree_test_insert(mt, 6716, (void *)0x3479);
+ mtree_test_store(mt, 61, (void *)0x7b);
+ mtree_test_insert(mt, 13, (void *)0x1b);
+ mtree_test_store(mt, 8, (void *)0x11);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_load(mt, 0);
+ mtree_test_erase(mt, 67167);
+ mtree_test_insert_range(mt, 6, 7167, (void *)0xd);
+ mtree_test_insert(mt, 6, (void *)0xd);
+ mtree_test_erase(mt, 67);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_erase(mt, 667167);
+ mtree_test_insert(mt, 6, (void *)0xd);
+ mtree_test_store(mt, 67, (void *)0x87);
+ mtree_test_insert(mt, 5, (void *)0xb);
+ mtree_test_erase(mt, 1);
+ mtree_test_insert(mt, 6, (void *)0xd);
+ mtree_test_erase(mt, 67);
+ mtree_test_insert(mt, 15, (void *)0x1f);
+ mtree_test_insert(mt, 67167, (void *)0x20cbf);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_load(mt, 7);
+ mtree_test_insert(mt, 16, (void *)0x21);
+ mtree_test_insert(mt, 36, (void *)0x49);
+ mtree_test_store(mt, 67, (void *)0x87);
+ mtree_test_store(mt, 6, (void *)0xd);
+ mtree_test_insert(mt, 367, (void *)0x2df);
+ mtree_test_insert(mt, 115, (void *)0xe7);
+ mtree_test_store(mt, 0, (void *)0x1);
+ mtree_test_store_range(mt, 1, 3, (void *)0x3);
+ mtree_test_store(mt, 1, (void *)0x3);
+ mtree_test_erase(mt, 67167);
+ mtree_test_insert_range(mt, 6, 47, (void *)0xd);
+ mtree_test_store(mt, 1, (void *)0x3);
+ mtree_test_insert_range(mt, 1, 67, (void *)0x3);
+ mtree_test_load(mt, 67);
+ mtree_test_insert(mt, 1, (void *)0x3);
+ mtree_test_erase(mt, 67167);
+ mtree_destroy(mt);
+
+ /*
+ * 9. spanning store to the end of data caused an invalid metadata
+ * length which resulted in a crash eventually.
+ * Fix by checking if there is a value in pivot before incrementing the
+ * metadata end in mab_mas_cp(). To ensure this doesn't happen again,
+ * abstract the two locations this happens into a function called
+ * mas_leaf_set_meta().
+ */
+ mt_init_flags(mt, 0);
+ mtree_test_insert(mt, 21, (void *)0x2b);
+ mtree_test_insert(mt, 12, (void *)0x19);
+ mtree_test_insert(mt, 6, (void *)0xd);
+ mtree_test_insert(mt, 8, (void *)0x11);
+ mtree_test_insert(mt, 2, (void *)0x5);
+ mtree_test_insert(mt, 91, (void *)0xb7);
+ mtree_test_insert(mt, 18, (void *)0x25);
+ mtree_test_insert(mt, 81, (void *)0xa3);
+ mtree_test_store_range(mt, 0, 128, (void *)0x1);
+ mtree_test_store(mt, 1, (void *)0x3);
+ mtree_test_erase(mt, 8);
+ mtree_test_insert(mt, 11, (void *)0x17);
+ mtree_test_insert(mt, 8, (void *)0x11);
+ mtree_test_insert(mt, 21, (void *)0x2b);
+ mtree_test_insert(mt, 2, (void *)0x5);
+ mtree_test_insert(mt, ULONG_MAX - 10, (void *)0xffffffffffffffeb);
+ mtree_test_erase(mt, ULONG_MAX - 10);
+ mtree_test_store_range(mt, 0, 281, (void *)0x1);
+ mtree_test_erase(mt, 2);
+ mtree_test_insert(mt, 1211, (void *)0x977);
+ mtree_test_insert(mt, 111, (void *)0xdf);
+ mtree_test_insert(mt, 13, (void *)0x1b);
+ mtree_test_insert(mt, 211, (void *)0x1a7);
+ mtree_test_insert(mt, 11, (void *)0x17);
+ mtree_test_insert(mt, 5, (void *)0xb);
+ mtree_test_insert(mt, 1218, (void *)0x985);
+ mtree_test_insert(mt, 61, (void *)0x7b);
+ mtree_test_store(mt, 1, (void *)0x3);
+ mtree_test_insert(mt, 121, (void *)0xf3);
+ mtree_test_insert(mt, 8, (void *)0x11);
+ mtree_test_insert(mt, 21, (void *)0x2b);
+ mtree_test_insert(mt, 2, (void *)0x5);
+ mtree_test_insert(mt, ULONG_MAX - 10, (void *)0xffffffffffffffeb);
+ mtree_test_erase(mt, ULONG_MAX - 10);
+}
+
+/* duplicate the tree with a specific gap */
+static noinline void check_dup_gaps(struct maple_tree *mt,
+ unsigned long nr_entries, bool zero_start,
+ unsigned long gap)
+{
+ unsigned long i = 0;
+ struct maple_tree newmt;
+ int ret;
+ void *tmp;
+ MA_STATE(mas, mt, 0, 0);
+ MA_STATE(newmas, &newmt, 0, 0);
+
+ if (!zero_start)
+ i = 1;
+
+ mt_zero_nr_tallocated();
+ for (; i <= nr_entries; i++)
+ mtree_store_range(mt, i*10, (i+1)*10 - gap,
+ xa_mk_value(i), GFP_KERNEL);
+
+ mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE);
+ mt_set_non_kernel(99999);
+ mas_lock(&newmas);
+ ret = mas_expected_entries(&newmas, nr_entries);
+ mt_set_non_kernel(0);
+ MT_BUG_ON(mt, ret != 0);
+
+ rcu_read_lock();
+ mas_for_each(&mas, tmp, ULONG_MAX) {
+ newmas.index = mas.index;
+ newmas.last = mas.last;
+ mas_store(&newmas, tmp);
+ }
+ rcu_read_unlock();
+ mas_destroy(&newmas);
+ mas_unlock(&newmas);
+
+ mtree_destroy(&newmt);
+}
+
+/* Duplicate many sizes of trees. Mainly to test expected entry values */
+static noinline void check_dup(struct maple_tree *mt)
+{
+ int i;
+ int big_start = 100010;
+
+ /* Check with a value at zero */
+ for (i = 10; i < 1000; i++) {
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_dup_gaps(mt, i, true, 5);
+ mtree_destroy(mt);
+ rcu_barrier();
+ }
+
+ cond_resched();
+ mt_cache_shrink();
+ /* Check with a value at zero, no gap */
+ for (i = 1000; i < 2000; i++) {
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_dup_gaps(mt, i, true, 0);
+ mtree_destroy(mt);
+ rcu_barrier();
+ }
+
+ cond_resched();
+ mt_cache_shrink();
+ /* Check with a value at zero and unreasonably large */
+ for (i = big_start; i < big_start + 10; i++) {
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_dup_gaps(mt, i, true, 5);
+ mtree_destroy(mt);
+ rcu_barrier();
+ }
+
+ cond_resched();
+ mt_cache_shrink();
+ /* Small to medium size not starting at zero*/
+ for (i = 200; i < 1000; i++) {
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_dup_gaps(mt, i, false, 5);
+ mtree_destroy(mt);
+ rcu_barrier();
+ }
+
+ cond_resched();
+ mt_cache_shrink();
+ /* Unreasonably large not starting at zero*/
+ for (i = big_start; i < big_start + 10; i++) {
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_dup_gaps(mt, i, false, 5);
+ mtree_destroy(mt);
+ rcu_barrier();
+ cond_resched();
+ mt_cache_shrink();
+ }
+
+ /* Check non-allocation tree not starting at zero */
+ for (i = 1500; i < 3000; i++) {
+ mt_init_flags(mt, 0);
+ check_dup_gaps(mt, i, false, 5);
+ mtree_destroy(mt);
+ rcu_barrier();
+ cond_resched();
+ if (i % 2 == 0)
+ mt_cache_shrink();
+ }
+
+ mt_cache_shrink();
+ /* Check non-allocation tree starting at zero */
+ for (i = 200; i < 1000; i++) {
+ mt_init_flags(mt, 0);
+ check_dup_gaps(mt, i, true, 5);
+ mtree_destroy(mt);
+ rcu_barrier();
+ cond_resched();
+ }
+
+ mt_cache_shrink();
+ /* Unreasonably large */
+ for (i = big_start + 5; i < big_start + 10; i++) {
+ mt_init_flags(mt, 0);
+ check_dup_gaps(mt, i, true, 5);
+ mtree_destroy(mt);
+ rcu_barrier();
+ mt_cache_shrink();
+ cond_resched();
+ }
+}
+
+static DEFINE_MTREE(tree);
+static int maple_tree_seed(void)
+{
+ unsigned long set[] = {5015, 5014, 5017, 25, 1000,
+ 1001, 1002, 1003, 1005, 0,
+ 5003, 5002};
+ void *ptr = &set;
+
+ pr_info("\nTEST STARTING\n\n");
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_root_expand(&tree);
+ mtree_destroy(&tree);
+
+#if defined(BENCH_SLOT_STORE)
+#define BENCH
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ bench_slot_store(&tree);
+ mtree_destroy(&tree);
+ goto skip;
+#endif
+#if defined(BENCH_NODE_STORE)
+#define BENCH
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ bench_node_store(&tree);
+ mtree_destroy(&tree);
+ goto skip;
+#endif
+#if defined(BENCH_AWALK)
+#define BENCH
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ bench_awalk(&tree);
+ mtree_destroy(&tree);
+ goto skip;
+#endif
+#if defined(BENCH_WALK)
+#define BENCH
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ bench_walk(&tree);
+ mtree_destroy(&tree);
+ goto skip;
+#endif
+#if defined(BENCH_FORK)
+#define BENCH
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ bench_forking(&tree);
+ mtree_destroy(&tree);
+ goto skip;
+#endif
+#if defined(BENCH_MT_FOR_EACH)
+#define BENCH
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ bench_mt_for_each(&tree);
+ mtree_destroy(&tree);
+ goto skip;
+#endif
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_forking(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_mas_store_gfp(&tree);
+ mtree_destroy(&tree);
+
+ /* Test ranges (store and insert) */
+ mt_init_flags(&tree, 0);
+ check_ranges(&tree);
+ mtree_destroy(&tree);
+
+#if defined(CONFIG_64BIT)
+ /* These tests have ranges outside of 4GB */
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_alloc_range(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_alloc_rev_range(&tree);
+ mtree_destroy(&tree);
+#endif
+
+ mt_init_flags(&tree, 0);
+
+ check_load(&tree, set[0], NULL); /* See if 5015 -> NULL */
+
+ check_insert(&tree, set[9], &tree); /* Insert 0 */
+ check_load(&tree, set[9], &tree); /* See if 0 -> &tree */
+ check_load(&tree, set[0], NULL); /* See if 5015 -> NULL */
+
+ check_insert(&tree, set[10], ptr); /* Insert 5003 */
+ check_load(&tree, set[9], &tree); /* See if 0 -> &tree */
+ check_load(&tree, set[11], NULL); /* See if 5002 -> NULL */
+ check_load(&tree, set[10], ptr); /* See if 5003 -> ptr */
+
+ /* Clear out the tree */
+ mtree_destroy(&tree);
+
+ /* Try to insert, insert a dup, and load back what was inserted. */
+ mt_init_flags(&tree, 0);
+ check_insert(&tree, set[0], &tree); /* Insert 5015 */
+ check_dup_insert(&tree, set[0], &tree); /* Insert 5015 again */
+ check_load(&tree, set[0], &tree); /* See if 5015 -> &tree */
+
+ /*
+ * Second set of tests try to load a value that doesn't exist, inserts
+ * a second value, then loads the value again
+ */
+ check_load(&tree, set[1], NULL); /* See if 5014 -> NULL */
+ check_insert(&tree, set[1], ptr); /* insert 5014 -> ptr */
+ check_load(&tree, set[1], ptr); /* See if 5014 -> ptr */
+ check_load(&tree, set[0], &tree); /* See if 5015 -> &tree */
+ /*
+ * Tree currently contains:
+ * p[0]: 14 -> (nil) p[1]: 15 -> ptr p[2]: 16 -> &tree p[3]: 0 -> (nil)
+ */
+ check_insert(&tree, set[6], ptr); /* insert 1002 -> ptr */
+ check_insert(&tree, set[7], &tree); /* insert 1003 -> &tree */
+
+ check_load(&tree, set[0], &tree); /* See if 5015 -> &tree */
+ check_load(&tree, set[1], ptr); /* See if 5014 -> ptr */
+ check_load(&tree, set[6], ptr); /* See if 1002 -> ptr */
+ check_load(&tree, set[7], &tree); /* 1003 = &tree ? */
+
+ /* Clear out tree */
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, 0);
+ /* Test inserting into a NULL hole. */
+ check_insert(&tree, set[5], ptr); /* insert 1001 -> ptr */
+ check_insert(&tree, set[7], &tree); /* insert 1003 -> &tree */
+ check_insert(&tree, set[6], ptr); /* insert 1002 -> ptr */
+ check_load(&tree, set[5], ptr); /* See if 1001 -> ptr */
+ check_load(&tree, set[6], ptr); /* See if 1002 -> ptr */
+ check_load(&tree, set[7], &tree); /* See if 1003 -> &tree */
+
+ /* Clear out the tree */
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, 0);
+ /*
+ * set[] = {5015, 5014, 5017, 25, 1000,
+ * 1001, 1002, 1003, 1005, 0,
+ * 5003, 5002};
+ */
+
+ check_insert(&tree, set[0], ptr); /* 5015 */
+ check_insert(&tree, set[1], &tree); /* 5014 */
+ check_insert(&tree, set[2], ptr); /* 5017 */
+ check_insert(&tree, set[3], &tree); /* 25 */
+ check_load(&tree, set[0], ptr);
+ check_load(&tree, set[1], &tree);
+ check_load(&tree, set[2], ptr);
+ check_load(&tree, set[3], &tree);
+ check_insert(&tree, set[4], ptr); /* 1000 < Should split. */
+ check_load(&tree, set[0], ptr);
+ check_load(&tree, set[1], &tree);
+ check_load(&tree, set[2], ptr);
+ check_load(&tree, set[3], &tree); /*25 */
+ check_load(&tree, set[4], ptr);
+ check_insert(&tree, set[5], &tree); /* 1001 */
+ check_load(&tree, set[0], ptr);
+ check_load(&tree, set[1], &tree);
+ check_load(&tree, set[2], ptr);
+ check_load(&tree, set[3], &tree);
+ check_load(&tree, set[4], ptr);
+ check_load(&tree, set[5], &tree);
+ check_insert(&tree, set[6], ptr);
+ check_load(&tree, set[0], ptr);
+ check_load(&tree, set[1], &tree);
+ check_load(&tree, set[2], ptr);
+ check_load(&tree, set[3], &tree);
+ check_load(&tree, set[4], ptr);
+ check_load(&tree, set[5], &tree);
+ check_load(&tree, set[6], ptr);
+ check_insert(&tree, set[7], &tree);
+ check_load(&tree, set[0], ptr);
+ check_insert(&tree, set[8], ptr);
+
+ check_insert(&tree, set[9], &tree);
+
+ check_load(&tree, set[0], ptr);
+ check_load(&tree, set[1], &tree);
+ check_load(&tree, set[2], ptr);
+ check_load(&tree, set[3], &tree);
+ check_load(&tree, set[4], ptr);
+ check_load(&tree, set[5], &tree);
+ check_load(&tree, set[6], ptr);
+ check_load(&tree, set[9], &tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, 0);
+ check_seq(&tree, 16, false);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, 0);
+ check_seq(&tree, 1000, true);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_rev_seq(&tree, 1000, true);
+ mtree_destroy(&tree);
+
+ check_lower_bound_split(&tree);
+ check_upper_bound_split(&tree);
+ check_mid_split(&tree);
+
+ mt_init_flags(&tree, 0);
+ check_next_entry(&tree);
+ check_find(&tree);
+ check_find_2(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_prev_entry(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_gap_combining(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_node_overwrite(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ next_prev_test(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_spanning_relatives(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_rev_find(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, 0);
+ check_fuzzer(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_dup(&tree);
+ mtree_destroy(&tree);
+
+#if defined(BENCH)
+skip:
+#endif
+ rcu_barrier();
+ pr_info("maple_tree: %u of %u tests passed\n",
+ atomic_read(&maple_tree_tests_passed),
+ atomic_read(&maple_tree_tests_run));
+ if (atomic_read(&maple_tree_tests_run) ==
+ atomic_read(&maple_tree_tests_passed))
+ return 0;
+
+ return -EINVAL;
+}
+
+static void maple_tree_harvest(void)
+{
+
+}
+
+module_init(maple_tree_seed);
+module_exit(maple_tree_harvest);
+MODULE_AUTHOR("Liam R. Howlett <Liam.Howlett@Oracle.com>");
+MODULE_LICENSE("GPL");
diff --git a/lib/test_meminit.c b/lib/test_meminit.c
index c95db11a6906..60e1984c060f 100644
--- a/lib/test_meminit.c
+++ b/lib/test_meminit.c
@@ -67,17 +67,24 @@ static int __init do_alloc_pages_order(int order, int *total_failures)
size_t size = PAGE_SIZE << order;
page = alloc_pages(GFP_KERNEL, order);
+ if (!page)
+ goto err;
buf = page_address(page);
fill_with_garbage(buf, size);
__free_pages(page, order);
page = alloc_pages(GFP_KERNEL, order);
+ if (!page)
+ goto err;
buf = page_address(page);
if (count_nonzero_bytes(buf, size))
(*total_failures)++;
fill_with_garbage(buf, size);
__free_pages(page, order);
return 1;
+err:
+ (*total_failures)++;
+ return 1;
}
/* Test the page allocator by calling alloc_pages with different orders. */
@@ -100,15 +107,22 @@ static int __init do_kmalloc_size(size_t size, int *total_failures)
void *buf;
buf = kmalloc(size, GFP_KERNEL);
+ if (!buf)
+ goto err;
fill_with_garbage(buf, size);
kfree(buf);
buf = kmalloc(size, GFP_KERNEL);
+ if (!buf)
+ goto err;
if (count_nonzero_bytes(buf, size))
(*total_failures)++;
fill_with_garbage(buf, size);
kfree(buf);
return 1;
+err:
+ (*total_failures)++;
+ return 1;
}
/* Test vmalloc() with given parameters. */
@@ -117,15 +131,22 @@ static int __init do_vmalloc_size(size_t size, int *total_failures)
void *buf;
buf = vmalloc(size);
+ if (!buf)
+ goto err;
fill_with_garbage(buf, size);
vfree(buf);
buf = vmalloc(size);
+ if (!buf)
+ goto err;
if (count_nonzero_bytes(buf, size))
(*total_failures)++;
fill_with_garbage(buf, size);
vfree(buf);
return 1;
+err:
+ (*total_failures)++;
+ return 1;
}
/* Test kmalloc()/vmalloc() by allocating objects of different sizes. */
diff --git a/lib/test_min_heap.c b/lib/test_min_heap.c
index d19c8080fd4d..7b01b4387cfb 100644
--- a/lib/test_min_heap.c
+++ b/lib/test_min_heap.c
@@ -83,7 +83,7 @@ static __init int test_heapify_all(bool min_heap)
/* Test with randomly generated values. */
heap.nr = ARRAY_SIZE(values);
for (i = 0; i < heap.nr; i++)
- values[i] = get_random_int();
+ values[i] = get_random_u32();
min_heapify_all(&heap, &funcs);
err += pop_verify_heap(min_heap, &heap, &funcs);
@@ -116,7 +116,7 @@ static __init int test_heap_push(bool min_heap)
/* Test with randomly generated values. */
while (heap.nr < heap.size) {
- temp = get_random_int();
+ temp = get_random_u32();
min_heap_push(&heap, &temp, &funcs);
}
err += pop_verify_heap(min_heap, &heap, &funcs);
@@ -158,7 +158,7 @@ static __init int test_heap_pop_push(bool min_heap)
/* Test with randomly generated values. */
for (i = 0; i < ARRAY_SIZE(data); i++) {
- temp = get_random_int();
+ temp = get_random_u32();
min_heap_pop_push(&heap, &temp, &funcs);
}
err += pop_verify_heap(min_heap, &heap, &funcs);
diff --git a/lib/test_objagg.c b/lib/test_objagg.c
index da137939a410..c0c957c50635 100644
--- a/lib/test_objagg.c
+++ b/lib/test_objagg.c
@@ -157,7 +157,7 @@ static int test_nodelta_obj_get(struct world *world, struct objagg *objagg,
int err;
if (should_create_root)
- prandom_bytes(world->next_root_buf,
+ get_random_bytes(world->next_root_buf,
sizeof(world->next_root_buf));
objagg_obj = world_obj_get(world, objagg, key_id);
diff --git a/lib/test_printf.c b/lib/test_printf.c
index 4bd15a593fbd..d6a5d4b5f884 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -179,18 +179,6 @@ test_number(void)
* behaviour.
*/
test("00|0|0|0|0", "%.2d|%.1d|%.0d|%.*d|%1.0d", 0, 0, 0, 0, 0, 0);
-#ifndef __CHAR_UNSIGNED__
- {
- /*
- * Passing a 'char' to a %02x specifier doesn't do
- * what was presumably the intention when char is
- * signed and the value is negative. One must either &
- * with 0xff or cast to u8.
- */
- char val = -16;
- test("0xfffffff0|0xf0|0xf0", "%#02x|%#02x|%#02x", val, val & 0xff, (u8)val);
- }
-#endif
}
static void __init
@@ -704,31 +692,29 @@ flags(void)
static void __init fwnode_pointer(void)
{
- const struct software_node softnodes[] = {
- { .name = "first", },
- { .name = "second", .parent = &softnodes[0], },
- { .name = "third", .parent = &softnodes[1], },
- { NULL /* Guardian */ }
- };
- const char * const full_name = "first/second/third";
+ const struct software_node first = { .name = "first" };
+ const struct software_node second = { .name = "second", .parent = &first };
+ const struct software_node third = { .name = "third", .parent = &second };
+ const struct software_node *group[] = { &first, &second, &third, NULL };
const char * const full_name_second = "first/second";
+ const char * const full_name_third = "first/second/third";
const char * const second_name = "second";
const char * const third_name = "third";
int rval;
- rval = software_node_register_nodes(softnodes);
+ rval = software_node_register_node_group(group);
if (rval) {
pr_warn("cannot register softnodes; rval %d\n", rval);
return;
}
- test(full_name_second, "%pfw", software_node_fwnode(&softnodes[1]));
- test(full_name, "%pfw", software_node_fwnode(&softnodes[2]));
- test(full_name, "%pfwf", software_node_fwnode(&softnodes[2]));
- test(second_name, "%pfwP", software_node_fwnode(&softnodes[1]));
- test(third_name, "%pfwP", software_node_fwnode(&softnodes[2]));
+ test(full_name_second, "%pfw", software_node_fwnode(&second));
+ test(full_name_third, "%pfw", software_node_fwnode(&third));
+ test(full_name_third, "%pfwf", software_node_fwnode(&third));
+ test(second_name, "%pfwP", software_node_fwnode(&second));
+ test(third_name, "%pfwP", software_node_fwnode(&third));
- software_node_unregister_nodes(softnodes);
+ software_node_unregister_node_group(group);
}
static void __init fourcc_pointer(void)
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 5a1dd4736b56..f2ba5787055a 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -291,7 +291,7 @@ static int __init test_rhltable(unsigned int entries)
if (WARN_ON(err))
goto out_free;
- k = prandom_u32();
+ k = get_random_u32();
ret = 0;
for (i = 0; i < entries; i++) {
rhl_test_objects[i].value.id = k;
@@ -369,18 +369,10 @@ static int __init test_rhltable(unsigned int entries)
pr_info("test %d random rhlist add/delete operations\n", entries);
for (j = 0; j < entries; j++) {
u32 i = prandom_u32_max(entries);
- u32 prand = prandom_u32();
+ u32 prand = prandom_u32_max(4);
cond_resched();
- if (prand == 0)
- prand = prandom_u32();
-
- if (prand & 1) {
- prand >>= 1;
- continue;
- }
-
err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
if (test_bit(i, obj_in_table)) {
clear_bit(i, obj_in_table);
@@ -393,35 +385,29 @@ static int __init test_rhltable(unsigned int entries)
}
if (prand & 1) {
- prand >>= 1;
- continue;
- }
-
- err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
- if (err == 0) {
- if (WARN(test_and_set_bit(i, obj_in_table), "succeeded to insert same object %d", i))
- continue;
- } else {
- if (WARN(!test_bit(i, obj_in_table), "failed to insert object %d", i))
- continue;
- }
-
- if (prand & 1) {
- prand >>= 1;
- continue;
+ err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
+ if (err == 0) {
+ if (WARN(test_and_set_bit(i, obj_in_table), "succeeded to insert same object %d", i))
+ continue;
+ } else {
+ if (WARN(!test_bit(i, obj_in_table), "failed to insert object %d", i))
+ continue;
+ }
}
- i = prandom_u32_max(entries);
- if (test_bit(i, obj_in_table)) {
- err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
- WARN(err, "cannot remove element at slot %d", i);
- if (err == 0)
- clear_bit(i, obj_in_table);
- } else {
- err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
- WARN(err, "failed to insert object %d", i);
- if (err == 0)
- set_bit(i, obj_in_table);
+ if (prand & 2) {
+ i = prandom_u32_max(entries);
+ if (test_bit(i, obj_in_table)) {
+ err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
+ WARN(err, "cannot remove element at slot %d", i);
+ if (err == 0)
+ clear_bit(i, obj_in_table);
+ } else {
+ err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
+ WARN(err, "failed to insert object %d", i);
+ if (err == 0)
+ set_bit(i, obj_in_table);
+ }
}
}
diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index 4f2f2d1bac56..cf7780572f5b 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -80,7 +80,7 @@ static int random_size_align_alloc_test(void)
int i;
for (i = 0; i < test_loop_count; i++) {
- rnd = prandom_u32();
+ rnd = get_random_u8();
/*
* Maximum 1024 pages, if PAGE_SIZE is 4096.
@@ -151,9 +151,7 @@ static int random_size_alloc_test(void)
int i;
for (i = 0; i < test_loop_count; i++) {
- n = prandom_u32();
- n = (n % 100) + 1;
-
+ n = prandom_u32_max(100) + 1;
p = vmalloc(n * PAGE_SIZE);
if (!p)
@@ -293,16 +291,12 @@ pcpu_alloc_test(void)
return -1;
for (i = 0; i < 35000; i++) {
- unsigned int r;
-
- r = prandom_u32();
- size = (r % (PAGE_SIZE / 4)) + 1;
+ size = prandom_u32_max(PAGE_SIZE / 4) + 1;
/*
* Maximum PAGE_SIZE
*/
- r = prandom_u32();
- align = 1 << ((r % 11) + 1);
+ align = 1 << (prandom_u32_max(11) + 1);
pcpu[i] = __alloc_percpu(size, align);
if (!pcpu[i])
@@ -393,14 +387,11 @@ static struct test_driver {
static void shuffle_array(int *arr, int n)
{
- unsigned int rnd;
int i, j;
for (i = n - 1; i > 0; i--) {
- rnd = prandom_u32();
-
/* Cut the range. */
- j = rnd % i;
+ j = prandom_u32_max(i);
/* Swap indexes. */
swap(arr[i], arr[j]);
diff --git a/lib/usercopy.c b/lib/usercopy.c
index 7413dd300516..1505a52f23a0 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -12,8 +12,9 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n
unsigned long res = n;
might_fault();
if (!should_fail_usercopy() && likely(access_ok(from, n))) {
- instrument_copy_from_user(to, from, n);
+ instrument_copy_from_user_before(to, from, n);
res = raw_copy_from_user(to, from, n);
+ instrument_copy_from_user_after(to, from, n, res);
}
if (unlikely(res))
memset(to + (n - res), 0, res);
diff --git a/lib/uuid.c b/lib/uuid.c
index 562d53977cab..e309b4c5be3d 100644
--- a/lib/uuid.c
+++ b/lib/uuid.c
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(generate_random_guid);
static void __uuid_gen_common(__u8 b[16])
{
- prandom_bytes(b, 16);
+ get_random_bytes(b, 16);
/* reversion 0b10 */
b[8] = (b[8] & 0x3F) | 0x80;
}
diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile
index c415a685d61b..e814061d6aa0 100644
--- a/lib/vdso/Makefile
+++ b/lib/vdso/Makefile
@@ -17,6 +17,6 @@ $(error ARCH_REL_TYPE_ABS is not set)
endif
quiet_cmd_vdso_check = VDSOCHK $@
- cmd_vdso_check = if $(OBJDUMP) -R $@ | egrep -h "$(ARCH_REL_TYPE_ABS)"; \
+ cmd_vdso_check = if $(OBJDUMP) -R $@ | grep -E -h "$(ARCH_REL_TYPE_ABS)"; \
then (echo >&2 "$@: dynamic relocations are not supported"; \
rm -f $@; /bin/false); fi
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 24f37bab8bc1..5b0611c00956 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -866,7 +866,7 @@ char *restricted_pointer(char *buf, char *end, const void *ptr,
* kptr_restrict==1 cannot be used in IRQ context
* because its test for CAP_SYSLOG would be meaningless.
*/
- if (in_irq() || in_serving_softirq() || in_nmi()) {
+ if (in_hardirq() || in_serving_softirq() || in_nmi()) {
if (spec.field_width == -1)
spec.field_width = 2 * sizeof(ptr);
return error_string(buf, end, "pK-error", spec);
diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile
index fc45339fc3a3..440bd0007ae2 100644
--- a/lib/zstd/Makefile
+++ b/lib/zstd/Makefile
@@ -10,14 +10,10 @@
# ################################################################
obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o
obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o
+obj-$(CONFIG_ZSTD_COMMON) += zstd_common.o
zstd_compress-y := \
zstd_compress_module.o \
- common/debug.o \
- common/entropy_common.o \
- common/error_private.o \
- common/fse_decompress.o \
- common/zstd_common.o \
compress/fse_compress.o \
compress/hist.o \
compress/huf_compress.o \
@@ -33,12 +29,14 @@ zstd_compress-y := \
zstd_decompress-y := \
zstd_decompress_module.o \
+ decompress/huf_decompress.o \
+ decompress/zstd_ddict.o \
+ decompress/zstd_decompress.o \
+ decompress/zstd_decompress_block.o \
+
+zstd_common-y := \
common/debug.o \
common/entropy_common.o \
common/error_private.o \
common/fse_decompress.o \
common/zstd_common.o \
- decompress/huf_decompress.o \
- decompress/zstd_ddict.o \
- decompress/zstd_decompress.o \
- decompress/zstd_decompress_block.o \
diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c
index 53b47a2b52ff..a311808c0d56 100644
--- a/lib/zstd/common/entropy_common.c
+++ b/lib/zstd/common/entropy_common.c
@@ -15,6 +15,7 @@
/* *************************************
* Dependencies
***************************************/
+#include <linux/module.h>
#include "mem.h"
#include "error_private.h" /* ERR_*, ERROR */
#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
@@ -239,7 +240,7 @@ size_t FSE_readNCount(
{
return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
}
-
+EXPORT_SYMBOL_GPL(FSE_readNCount);
/*! HUF_readStats() :
Read compact Huffman tree, saved by HUF_writeCTable().
@@ -255,6 +256,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
}
+EXPORT_SYMBOL_GPL(HUF_readStats);
FORCE_INLINE_TEMPLATE size_t
HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
@@ -355,3 +357,4 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
(void)bmi2;
return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
}
+EXPORT_SYMBOL_GPL(HUF_readStats_wksp);
diff --git a/lib/zstd/common/zstd_common.c b/lib/zstd/common/zstd_common.c
index 3d7e35b309b5..0f1f63be25d9 100644
--- a/lib/zstd/common/zstd_common.c
+++ b/lib/zstd/common/zstd_common.c
@@ -13,6 +13,7 @@
/*-*************************************
* Dependencies
***************************************/
+#include <linux/module.h>
#define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
#include "error_private.h"
@@ -35,14 +36,17 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
* tells if a return value is an error code
* symbol is required for external callers */
unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+EXPORT_SYMBOL_GPL(ZSTD_isError);
/*! ZSTD_getErrorName() :
* provides error code string from function result (useful for debugging) */
const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
+EXPORT_SYMBOL_GPL(ZSTD_getErrorName);
/*! ZSTD_getError() :
* convert a `size_t` function result into a proper ZSTD_errorCode enum */
ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
+EXPORT_SYMBOL_GPL(ZSTD_getErrorCode);
/*! ZSTD_getErrorString() :
* provides error code string from enum */
@@ -59,6 +63,7 @@ void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
return customMem.customAlloc(customMem.opaque, size);
return ZSTD_malloc(size);
}
+EXPORT_SYMBOL_GPL(ZSTD_customMalloc);
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
{
@@ -71,6 +76,7 @@ void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
}
return ZSTD_calloc(1, size);
}
+EXPORT_SYMBOL_GPL(ZSTD_customCalloc);
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
{
@@ -81,3 +87,7 @@ void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
ZSTD_free(ptr);
}
}
+EXPORT_SYMBOL_GPL(ZSTD_customFree);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Common");