diff options
Diffstat (limited to 'lib')
105 files changed, 15101 insertions, 2994 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index dc1ab2ed1dc6..ce2abffb9ed8 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -24,6 +24,7 @@ config LINEAR_RANGES config PACKING bool "Generic bitfield packing and unpacking" + select BITREVERSE default n help This option provides the packing() helper function, which permits @@ -127,9 +128,6 @@ config TRACE_MMIO_ACCESS source "lib/crypto/Kconfig" -config LIB_MEMNEQ - bool - config CRC_CCITT tristate "CRC-CCITT functions" help @@ -343,12 +341,16 @@ config LZ4HC_COMPRESS config LZ4_DECOMPRESS tristate -config ZSTD_COMPRESS +config ZSTD_COMMON select XXHASH tristate +config ZSTD_COMPRESS + select ZSTD_COMMON + tristate + config ZSTD_DECOMPRESS - select XXHASH + select ZSTD_COMMON tristate source "lib/xz/Kconfig" @@ -478,6 +480,10 @@ config INTERVAL_TREE for more information. +config INTERVAL_TREE_SPAN_ITER + bool + depends on INTERVAL_TREE + config XARRAY_MULTI bool help @@ -527,6 +533,15 @@ config CPUMASK_OFFSTACK them on the stack. This is a bit more expensive, but avoids stack overflow. +config FORCE_NR_CPUS + bool "Set number of CPUs at compile time" + depends on SMP && EXPERT && !COMPILE_TEST + help + Say Yes if you have NR_CPUS set to an actual number of possible + CPUs in your system, not to a default value. This forces the core + code to rely on compile-time value and optimize kernel routines + better. + config CPU_RMAP bool depends on SMP @@ -662,6 +677,9 @@ config ARCH_HAS_PMEM_API config MEMREGION bool +config ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION + bool + config ARCH_HAS_MEMREMAP_COMPAT_ALIGN bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 072e4b289c13..6831b798152d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -231,6 +231,11 @@ config DEBUG_INFO in the "Debug information" choice below, indicating that debug information will be generated for build targets. +# Clang is known to generate .{s,u}leb128 with symbol deltas with DWARF5, which +# some targets may not support: https://sourceware.org/bugzilla/show_bug.cgi?id=27215 +config AS_HAS_NON_CONST_LEB128 + def_bool $(as-instr,.uleb128 .Lexpr_end4 - .Lexpr_start3\n.Lexpr_start3:\n.Lexpr_end4:) + choice prompt "Debug information" depends on DEBUG_KERNEL @@ -253,6 +258,7 @@ config DEBUG_INFO_NONE config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT bool "Rely on the toolchain's implicit default DWARF version" select DEBUG_INFO + depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128) help The implicit default version of DWARF debug info produced by a toolchain changes over time. @@ -264,8 +270,10 @@ config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT config DEBUG_INFO_DWARF4 bool "Generate DWARF Version 4 debuginfo" select DEBUG_INFO + depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502) help - Generate DWARF v4 debug info. This requires gcc 4.5+ and gdb 7.0+. + Generate DWARF v4 debug info. This requires gcc 4.5+, binutils 2.35.2 + if using clang without clang's integrated assembler, and gdb 7.0+. If you have consumers of DWARF debug info that are not ready for newer revisions of DWARF, you may wish to choose this or have your @@ -274,7 +282,7 @@ config DEBUG_INFO_DWARF4 config DEBUG_INFO_DWARF5 bool "Generate DWARF Version 5 debuginfo" select DEBUG_INFO - depends on !CC_IS_CLANG || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502))) + depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128) help Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc 5.0+ accepts the -gdwarf-5 flag but only had partial support for some @@ -387,13 +395,15 @@ endif # DEBUG_INFO config FRAME_WARN int "Warn for stack frames larger than" range 0 8192 + default 0 if KMSAN default 2048 if GCC_PLUGIN_LATENT_ENTROPY default 2048 if PARISC default 1536 if (!64BIT && XTENSA) + default 1280 if KASAN && !64BIT default 1024 if !64BIT default 2048 if 64BIT help - Tell gcc to warn at build time for stack frames larger than this. + Tell the compiler to warn at build time for stack frames larger than this. Setting this too low will cause a lot of warnings. Setting it to 0 disables the warning. @@ -803,6 +813,9 @@ config ARCH_HAS_DEBUG_VM_PGTABLE An architecture should select this when it can successfully build and run DEBUG_VM_PGTABLE. +config DEBUG_VM_IRQSOFF + def_bool DEBUG_VM && !PREEMPT_RT + config DEBUG_VM bool "Debug VM" depends on DEBUG_KERNEL @@ -812,13 +825,12 @@ config DEBUG_VM If unsure, say N. -config DEBUG_VM_VMACACHE - bool "Debug VMA caching" +config DEBUG_VM_MAPLE_TREE + bool "Debug VM maple trees" depends on DEBUG_VM + select DEBUG_MAPLE_TREE help - Enable this to turn on VMA caching debug information. Doing so - can cause significant overhead, so only enable it in non-production - environments. + Enable VM maple tree debugging information and extra validations. If unsure, say N. @@ -971,6 +983,7 @@ config DEBUG_STACKOVERFLOW source "lib/Kconfig.kasan" source "lib/Kconfig.kfence" +source "lib/Kconfig.kmsan" endmenu # "Memory Debugging" @@ -1635,6 +1648,14 @@ config BUG_ON_DATA_CORRUPTION If unsure, say N. +config DEBUG_MAPLE_TREE + bool "Debug maple trees" + depends on DEBUG_KERNEL + help + Enable maple tree debugging information and extra validations. + + If unsure, say N. + endmenu config DEBUG_CREDENTIALS @@ -1696,6 +1717,16 @@ config LATENCYTOP Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations. +config DEBUG_CGROUP_REF + bool "Disable inlining of cgroup css reference count functions" + depends on DEBUG_KERNEL + depends on CGROUPS + depends on KPROBES + default n + help + Force cgroup css reference count functions to not be inlined so + that they can be kprobed for debugging. + source "kernel/trace/Kconfig" config PROVIDE_OHCI1394_DMA_INIT @@ -1854,8 +1885,14 @@ config NETDEV_NOTIFIER_ERROR_INJECT If unsure, say N. config FUNCTION_ERROR_INJECTION - def_bool y + bool "Fault-injections of functions" depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES + help + Add fault injections into various functions that are annotated with + ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return + value of theses functions. This is useful to test error paths of code. + + If unsure, say N config FAULT_INJECTION bool "Fault-injection framework" @@ -2029,13 +2066,16 @@ config LKDTM Documentation on how to use the module can be found in Documentation/fault-injection/provoke-crashes.rst -config TEST_CPUMASK - tristate "cpumask tests" if !KUNIT_ALL_TESTS +config CPUMASK_KUNIT_TEST + tristate "KUnit test for cpumask" if !KUNIT_ALL_TESTS depends on KUNIT default KUNIT_ALL_TESTS help Enable to turn on cpumask tests, running at boot or module load time. + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + If unsure, say N. config TEST_LIST_SORT @@ -2084,6 +2124,7 @@ config KPROBES_SANITY_TEST depends on DEBUG_KERNEL depends on KPROBES depends on KUNIT + select STACKTRACE if ARCH_CORRECT_STACKTRACE_ON_KRETPROBE default KUNIT_ALL_TESTS help This option provides for testing basic kprobes functionality on @@ -2193,9 +2234,6 @@ config STRING_SELFTEST config TEST_STRING_HELPERS tristate "Test functions located in the string_helpers module at runtime" -config TEST_STRSCPY - tristate "Test strscpy*() family of functions at runtime" - config TEST_KSTRTOX tristate "Test kstrto*() family of functions at runtime" @@ -2218,6 +2256,11 @@ config TEST_UUID config TEST_XARRAY tristate "Test the XArray code at runtime" +config TEST_MAPLE_TREE + depends on DEBUG_KERNEL + select DEBUG_MAPLE_TREE + tristate "Test the Maple Tree code at runtime" + config TEST_RHASHTABLE tristate "Perform selftest on resizable hash table" help @@ -2225,15 +2268,6 @@ config TEST_RHASHTABLE If unsure, say N. -config TEST_SIPHASH - tristate "Perform selftest on siphash functions" - help - Enable this option to test the kernel's siphash (<linux/siphash.h>) hash - functions on boot (or module load). - - This is intended to help people writing architecture-specific - optimized versions. If unsure, say N. - config TEST_IDA tristate "Perform selftest on IDA functions" @@ -2506,6 +2540,18 @@ config MEMCPY_KUNIT_TEST If unsure, say N. +config IS_SIGNED_TYPE_KUNIT_TEST + tristate "Test is_signed_type() macro" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Builds unit tests for the is_signed_type() macro. + + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + config OVERFLOW_KUNIT_TEST tristate "Test check_*_overflow() functions at runtime" if !KUNIT_ALL_TESTS depends on KUNIT @@ -2530,6 +2576,41 @@ config STACKINIT_KUNIT_TEST CONFIG_GCC_PLUGIN_STRUCTLEAK, CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF, or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL. +config FORTIFY_KUNIT_TEST + tristate "Test fortified str*() and mem*() function internals at runtime" if !KUNIT_ALL_TESTS + depends on KUNIT && FORTIFY_SOURCE + default KUNIT_ALL_TESTS + help + Builds unit tests for checking internals of FORTIFY_SOURCE as used + by the str*() and mem*() family of functions. For testing runtime + traps of FORTIFY_SOURCE, see LKDTM's "FORTIFY_*" tests. + +config HW_BREAKPOINT_KUNIT_TEST + bool "Test hw_breakpoint constraints accounting" if !KUNIT_ALL_TESTS + depends on HAVE_HW_BREAKPOINT + depends on KUNIT=y + default KUNIT_ALL_TESTS + help + Tests for hw_breakpoint constraints accounting. + + If unsure, say N. + +config STRSCPY_KUNIT_TEST + tristate "Test strscpy*() family of functions at runtime" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + +config SIPHASH_KUNIT_TEST + tristate "Perform selftest on siphash functions" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Enable this option to test the kernel's siphash (<linux/siphash.h>) hash + functions on boot (or module load). + + This is intended to help people writing architecture-specific + optimized versions. If unsure, say N. + config TEST_UDELAY tristate "udelay test driver" help @@ -2546,6 +2627,16 @@ config TEST_STATIC_KEYS If unsure, say N. +config TEST_DYNAMIC_DEBUG + tristate "Test DYNAMIC_DEBUG" + depends on DYNAMIC_DEBUG + help + This module registers a tracer callback to count enabled + pr_debugs in a 'do_debugging' function, then alters their + enablements, calls the function, and compares counts. + + If unsure, say N. + config TEST_KMOD tristate "kmod stress tester" depends on m @@ -2705,6 +2796,56 @@ config HYPERV_TESTING endmenu # "Kernel Testing and Coverage" +menu "Rust hacking" + +config RUST_DEBUG_ASSERTIONS + bool "Debug assertions" + depends on RUST + help + Enables rustc's `-Cdebug-assertions` codegen option. + + This flag lets you turn `cfg(debug_assertions)` conditional + compilation on or off. This can be used to enable extra debugging + code in development but not in production. For example, it controls + the behavior of the standard library's `debug_assert!` macro. + + Note that this will apply to all Rust code, including `core`. + + If unsure, say N. + +config RUST_OVERFLOW_CHECKS + bool "Overflow checks" + default y + depends on RUST + help + Enables rustc's `-Coverflow-checks` codegen option. + + This flag allows you to control the behavior of runtime integer + overflow. When overflow-checks are enabled, a Rust panic will occur + on overflow. + + Note that this will apply to all Rust code, including `core`. + + If unsure, say Y. + +config RUST_BUILD_ASSERT_ALLOW + bool "Allow unoptimized build-time assertions" + depends on RUST + help + Controls how are `build_error!` and `build_assert!` handled during build. + + If calls to them exist in the binary, it may indicate a violated invariant + or that the optimizer failed to verify the invariant during compilation. + + This should not happen, thus by default the build is aborted. However, + as an escape hatch, you can choose Y here to ignore them during build + and let the check be carried at runtime (with `panic!` being called if + the check fails). + + If unsure, say N. + +endmenu # "Rust" + source "Documentation/Kconfig" endmenu # Kernel hacking diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index f0973da583e0..be6ee6020290 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -37,7 +37,7 @@ menuconfig KASAN (HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \ CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \ HAVE_ARCH_KASAN_HW_TAGS - depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB) + depends on (SLUB && SYSFS && !SLUB_TINY) || (SLAB && !DEBUG_SLAB) select STACKDEPOT_ALWAYS_INIT help Enables KASAN (Kernel Address Sanitizer) - a dynamic memory safety @@ -167,14 +167,6 @@ config KASAN_STACK as well, as it adds inline-style instrumentation that is run unconditionally. -config KASAN_TAGS_IDENTIFY - bool "Memory corruption type identification" - depends on KASAN_SW_TAGS || KASAN_HW_TAGS - help - Enables best-effort identification of the bug types (use-after-free - or out-of-bounds) at the cost of increased memory consumption. - Only applicable for the tag-based KASAN modes. - config KASAN_VMALLOC bool "Check accesses to vmalloc allocations" depends on HAVE_ARCH_KASAN_VMALLOC @@ -189,7 +181,7 @@ config KASAN_VMALLOC config KASAN_KUNIT_TEST tristate "KUnit-compatible tests of KASAN bug detection capabilities" if !KUNIT_ALL_TESTS - depends on KASAN && KUNIT + depends on KASAN && KUNIT && TRACEPOINTS default KUNIT_ALL_TESTS help A KUnit-based KASAN test suite. Triggers different kinds of diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 47a693c45864..375575a5a0e3 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -125,7 +125,7 @@ config KCSAN_SKIP_WATCH default 4000 help The number of per-CPU memory operations to skip, before another - watchpoint is set up, i.e. one in KCSAN_WATCH_SKIP per-CPU + watchpoint is set up, i.e. one in KCSAN_SKIP_WATCH per-CPU memory operations are used to set up a watchpoint. A smaller value results in more aggressive race detection, whereas a larger value improves system performance at the cost of missing some races. @@ -135,8 +135,8 @@ config KCSAN_SKIP_WATCH_RANDOMIZE default y help If instruction skip count should be randomized, where the maximum is - KCSAN_WATCH_SKIP. If false, the chosen value is always - KCSAN_WATCH_SKIP. + KCSAN_SKIP_WATCH. If false, the chosen value is always + KCSAN_SKIP_WATCH. config KCSAN_INTERRUPT_WATCHER bool "Interruptible watchers" if !KCSAN_STRICT diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index 05dae05b6cc9..3b9a44008433 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -121,7 +121,7 @@ config KDB_DEFAULT_ENABLE config KDB_KEYBOARD bool "KGDB_KDB: keyboard as input device" - depends on VT && KGDB_KDB + depends on VT && KGDB_KDB && !PARISC default n help KDB can use a PS/2 type keyboard for an input device diff --git a/lib/Kconfig.kmsan b/lib/Kconfig.kmsan new file mode 100644 index 000000000000..ef2c8f256c57 --- /dev/null +++ b/lib/Kconfig.kmsan @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: GPL-2.0-only +config HAVE_ARCH_KMSAN + bool + +config HAVE_KMSAN_COMPILER + # Clang versions <14.0.0 also support -fsanitize=kernel-memory, but not + # all the features necessary to build the kernel with KMSAN. + depends on CC_IS_CLANG && CLANG_VERSION >= 140000 + def_bool $(cc-option,-fsanitize=kernel-memory -mllvm -msan-disable-checks=1) + +config KMSAN + bool "KMSAN: detector of uninitialized values use" + depends on HAVE_ARCH_KMSAN && HAVE_KMSAN_COMPILER + depends on SLUB && DEBUG_KERNEL && !KASAN && !KCSAN + depends on !PREEMPT_RT + select STACKDEPOT + select STACKDEPOT_ALWAYS_INIT + help + KernelMemorySanitizer (KMSAN) is a dynamic detector of uses of + uninitialized values in the kernel. It is based on compiler + instrumentation provided by Clang and thus requires Clang to build. + + An important note is that KMSAN is not intended for production use, + because it drastically increases kernel memory footprint and slows + the whole system down. + + See <file:Documentation/dev-tools/kmsan.rst> for more details. + +if KMSAN + +config HAVE_KMSAN_PARAM_RETVAL + # -fsanitize-memory-param-retval is supported only by Clang >= 14. + depends on HAVE_KMSAN_COMPILER + def_bool $(cc-option,-fsanitize=kernel-memory -fsanitize-memory-param-retval) + +config KMSAN_CHECK_PARAM_RETVAL + bool "Check for uninitialized values passed to and returned from functions" + default y + depends on HAVE_KMSAN_PARAM_RETVAL + help + If the compiler supports -fsanitize-memory-param-retval, KMSAN will + eagerly check every function parameter passed by value and every + function return value. + + Disabling KMSAN_CHECK_PARAM_RETVAL will result in tracking shadow for + function parameters and return values across function borders. This + is a more relaxed mode, but it generates more instrumentation code and + may potentially report errors in corner cases when non-instrumented + functions call instrumented ones. + +config KMSAN_KUNIT_TEST + tristate "KMSAN integration test suite" if !KUNIT_ALL_TESTS + default KUNIT_ALL_TESTS + depends on TRACEPOINTS && KUNIT + help + Test suite for KMSAN, testing various error detection scenarios, + and checking that reports are correctly output to console. + + Say Y here if you want the test to be built into the kernel and run + during boot; say M if you want the test to build as a module; say N + if you are unsure. + +endif diff --git a/lib/Makefile b/lib/Makefile index c95212141928..4d9461bfea42 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -29,14 +29,15 @@ endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o timerqueue.o xarray.o \ - idr.o extable.o irq_regs.o argv_split.o \ + maple_tree.o idr.o extable.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o win_minmax.o memcat_p.o \ - buildid.o cpumask.o + buildid.o lib-$(CONFIG_PRINTK) += dump_stack.o +lib-$(CONFIG_SMP) += cpumask.o lib-y += kobject.o klist.o obj-y += lockref.o @@ -59,15 +60,10 @@ obj-$(CONFIG_TEST_BPF) += test_bpf.o obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o obj-$(CONFIG_TEST_BITOPS) += test_bitops.o CFLAGS_test_bitops.o += -Werror +obj-$(CONFIG_CPUMASK_KUNIT_TEST) += cpumask_kunit.o obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o -obj-$(CONFIG_TEST_SIPHASH) += test_siphash.o obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o obj-$(CONFIG_TEST_IDA) += test_ida.o -obj-$(CONFIG_KASAN_KUNIT_TEST) += test_kasan.o -CFLAGS_test_kasan.o += -fno-builtin -CFLAGS_test_kasan.o += $(call cc-disable-warning, vla) -obj-$(CONFIG_KASAN_MODULE_TEST) += test_kasan_module.o -CFLAGS_test_kasan_module.o += -fno-builtin obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o CFLAGS_test_ubsan.o += $(call cc-disable-warning, vla) UBSAN_SANITIZE_test_ubsan.o := y @@ -81,12 +77,13 @@ obj-$(CONFIG_TEST_SORT) += test_sort.o obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o +obj-$(CONFIG_TEST_DYNAMIC_DEBUG) += test_dynamic_debug.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o obj-$(CONFIG_TEST_SCANF) += test_scanf.o obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o -obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_XARRAY) += test_xarray.o +obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o @@ -99,7 +96,6 @@ obj-$(CONFIG_TEST_HMM) += test_hmm.o obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o -obj-$(CONFIG_TEST_CPUMASK) += test_cpumask.o CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE) obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o # @@ -253,7 +249,6 @@ obj-$(CONFIG_DIMLIB) += dim/ obj-$(CONFIG_SIGNATURE) += digsig.o lib-$(CONFIG_CLZ_TAB) += clz_tab.o -lib-$(CONFIG_LIB_MEMNEQ) += memneq.o obj-$(CONFIG_GENERIC_STRNCPY_FROM_USER) += strncpy_from_user.o obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o @@ -274,6 +269,9 @@ obj-$(CONFIG_POLYNOMIAL) += polynomial.o CFLAGS_stackdepot.o += -fno-builtin obj-$(CONFIG_STACKDEPOT) += stackdepot.o KASAN_SANITIZE_stackdepot.o := n +# In particular, instrumenting stackdepot.c with KMSAN will result in infinite +# recursion. +KMSAN_SANITIZE_stackdepot.o := n KCOV_INSTRUMENT_stackdepot.o := n obj-$(CONFIG_REF_TRACKER) += ref_tracker.o @@ -376,9 +374,16 @@ obj-$(CONFIG_BITS_TEST) += test_bits.o obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o obj-$(CONFIG_SLUB_KUNIT_TEST) += slub_kunit.o obj-$(CONFIG_MEMCPY_KUNIT_TEST) += memcpy_kunit.o +obj-$(CONFIG_IS_SIGNED_TYPE_KUNIT_TEST) += is_signed_type_kunit.o +CFLAGS_overflow_kunit.o = $(call cc-disable-warning, tautological-constant-out-of-range-compare) obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o CFLAGS_stackinit_kunit.o += $(call cc-disable-warning, switch-unreachable) obj-$(CONFIG_STACKINIT_KUNIT_TEST) += stackinit_kunit.o +CFLAGS_fortify_kunit.o += $(call cc-disable-warning, unsequenced) +CFLAGS_fortify_kunit.o += $(DISABLE_STRUCTLEAK_PLUGIN) +obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o +obj-$(CONFIG_STRSCPY_KUNIT_TEST) += strscpy_kunit.o +obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o diff --git a/lib/bitmap.c b/lib/bitmap.c index 488e6c3e5acc..1c81413c51f8 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -333,20 +333,32 @@ bool __bitmap_subset(const unsigned long *bitmap1, } EXPORT_SYMBOL(__bitmap_subset); +#define BITMAP_WEIGHT(FETCH, bits) \ +({ \ + unsigned int __bits = (bits), idx, w = 0; \ + \ + for (idx = 0; idx < __bits / BITS_PER_LONG; idx++) \ + w += hweight_long(FETCH); \ + \ + if (__bits % BITS_PER_LONG) \ + w += hweight_long((FETCH) & BITMAP_LAST_WORD_MASK(__bits)); \ + \ + w; \ +}) + unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int bits) { - unsigned int k, lim = bits/BITS_PER_LONG, w = 0; - - for (k = 0; k < lim; k++) - w += hweight_long(bitmap[k]); - - if (bits % BITS_PER_LONG) - w += hweight_long(bitmap[k] & BITMAP_LAST_WORD_MASK(bits)); - - return w; + return BITMAP_WEIGHT(bitmap[idx], bits); } EXPORT_SYMBOL(__bitmap_weight); +unsigned int __bitmap_weight_and(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) +{ + return BITMAP_WEIGHT(bitmap1[idx] & bitmap2[idx], bits); +} +EXPORT_SYMBOL(__bitmap_weight_and); + void __bitmap_set(unsigned long *map, unsigned int start, int len) { unsigned long *p = map + BIT_WORD(start); @@ -953,37 +965,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, unsigned int pos, unsigne if (pos >= nbits || !test_bit(pos, buf)) return -1; - return __bitmap_weight(buf, pos); -} - -/** - * bitmap_ord_to_pos - find position of n-th set bit in bitmap - * @buf: pointer to bitmap - * @ord: ordinal bit position (n-th set bit, n >= 0) - * @nbits: number of valid bit positions in @buf - * - * Map the ordinal offset of bit @ord in @buf to its position in @buf. - * Value of @ord should be in range 0 <= @ord < weight(buf). If @ord - * >= weight(buf), returns @nbits. - * - * If for example, just bits 4 through 7 are set in @buf, then @ord - * values 0 through 3 will get mapped to 4 through 7, respectively, - * and all other @ord values returns @nbits. When @ord value 3 - * gets mapped to (returns) @pos value 7 in this example, that means - * that the 3rd set bit (starting with 0th) is at position 7 in @buf. - * - * The bit positions 0 through @nbits-1 are valid positions in @buf. - */ -unsigned int bitmap_ord_to_pos(const unsigned long *buf, unsigned int ord, unsigned int nbits) -{ - unsigned int pos; - - for (pos = find_first_bit(buf, nbits); - pos < nbits && ord; - pos = find_next_bit(buf, nbits, pos + 1)) - ord--; - - return pos; + return bitmap_weight(buf, pos); } /** @@ -1035,7 +1017,7 @@ void bitmap_remap(unsigned long *dst, const unsigned long *src, if (n < 0 || w == 0) set_bit(oldbit, dst); /* identity map */ else - set_bit(bitmap_ord_to_pos(new, n % w, nbits), dst); + set_bit(find_nth_bit(new, nbits, n % w), dst); } } EXPORT_SYMBOL(bitmap_remap); @@ -1074,7 +1056,7 @@ int bitmap_bitremap(int oldbit, const unsigned long *old, if (n < 0 || w == 0) return oldbit; else - return bitmap_ord_to_pos(new, n % w, bits); + return find_nth_bit(new, bits, n % w); } EXPORT_SYMBOL(bitmap_bitremap); @@ -1198,7 +1180,7 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig, * The following code is a more efficient, but less * obvious, equivalent to the loop: * for (m = 0; m < bitmap_weight(relmap, bits); m++) { - * n = bitmap_ord_to_pos(orig, m, bits); + * n = find_nth_bit(orig, bits, m); * if (test_bit(m, orig)) * set_bit(n, dst); * } diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c index 8be59f84eaea..bfd53972a4d8 100644 --- a/lib/bust_spinlocks.c +++ b/lib/bust_spinlocks.c @@ -22,9 +22,6 @@ void bust_spinlocks(int yes) if (yes) { ++oops_in_progress; } else { -#ifdef CONFIG_VT - unblank_screen(); -#endif console_unblank(); if (--oops_in_progress == 0) wake_up_klogd(); diff --git a/lib/cmdline.c b/lib/cmdline.c index 5546bf588780..90ed997d9570 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -260,7 +260,7 @@ char *next_arg(char *args, char **param, char **val) args[i-1] = '\0'; } } - if (quoted && args[i-1] == '"') + if (quoted && i > 0 && args[i-1] == '"') args[i-1] = '\0'; if (args[i]) { diff --git a/lib/cmdline_kunit.c b/lib/cmdline_kunit.c index a72a2c16066e..d4572dbc9145 100644 --- a/lib/cmdline_kunit.c +++ b/lib/cmdline_kunit.c @@ -76,7 +76,7 @@ static void cmdline_test_lead_int(struct kunit *test) int rc = cmdline_test_values[i]; int offset; - sprintf(in, "%u%s", get_random_int() % 256, str); + sprintf(in, "%u%s", get_random_u8(), str); /* Only first '-' after the number will advance the pointer */ offset = strlen(in) - strlen(str) + !!(rc == 2); cmdline_do_one_test(test, in, rc, offset); @@ -94,7 +94,7 @@ static void cmdline_test_tail_int(struct kunit *test) int rc = strcmp(str, "") ? (strcmp(str, "-") ? 0 : 1) : 1; int offset; - sprintf(in, "%s%u", str, get_random_int() % 256); + sprintf(in, "%s%u", str, get_random_u8()); /* * Only first and leading '-' not followed by integer * will advance the pointer. diff --git a/lib/cpumask.c b/lib/cpumask.c index 8baeb37e23d3..c7c392514fd3 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -109,7 +109,6 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) } #endif -#if NR_CPUS > 1 /** * cpumask_local_spread - select the i'th cpu with local numa cpu's first * @i: index number @@ -129,23 +128,21 @@ unsigned int cpumask_local_spread(unsigned int i, int node) i %= num_online_cpus(); if (node == NUMA_NO_NODE) { - for_each_cpu(cpu, cpu_online_mask) - if (i-- == 0) - return cpu; + cpu = cpumask_nth(i, cpu_online_mask); + if (cpu < nr_cpu_ids) + return cpu; } else { /* NUMA first. */ - for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask) - if (i-- == 0) - return cpu; - - for_each_cpu(cpu, cpu_online_mask) { - /* Skip NUMA nodes, done above. */ - if (cpumask_test_cpu(cpu, cpumask_of_node(node))) - continue; - - if (i-- == 0) - return cpu; - } + cpu = cpumask_nth_and(i, cpu_online_mask, cpumask_of_node(node)); + if (cpu < nr_cpu_ids) + return cpu; + + i -= cpumask_weight_and(cpu_online_mask, cpumask_of_node(node)); + + /* Skip NUMA nodes, done above. */ + cpu = cpumask_nth_andnot(i, cpu_online_mask, cpumask_of_node(node)); + if (cpu < nr_cpu_ids) + return cpu; } BUG(); } @@ -169,10 +166,8 @@ unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, /* NOTE: our first selection will skip 0. */ prev = __this_cpu_read(distribute_cpu_mask_prev); - next = cpumask_next_and(prev, src1p, src2p); - if (next >= nr_cpu_ids) - next = cpumask_first_and(src1p, src2p); - + next = find_next_and_bit_wrap(cpumask_bits(src1p), cpumask_bits(src2p), + nr_cpumask_bits, prev + 1); if (next < nr_cpu_ids) __this_cpu_write(distribute_cpu_mask_prev, next); @@ -186,15 +181,10 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp) /* NOTE: our first selection will skip 0. */ prev = __this_cpu_read(distribute_cpu_mask_prev); - - next = cpumask_next(prev, srcp); - if (next >= nr_cpu_ids) - next = cpumask_first(srcp); - + next = find_next_bit_wrap(cpumask_bits(srcp), nr_cpumask_bits, prev + 1); if (next < nr_cpu_ids) __this_cpu_write(distribute_cpu_mask_prev, next); return next; } EXPORT_SYMBOL(cpumask_any_distribute); -#endif /* NR_CPUS */ diff --git a/lib/cpumask_kunit.c b/lib/cpumask_kunit.c new file mode 100644 index 000000000000..d1fc6ece21f3 --- /dev/null +++ b/lib/cpumask_kunit.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KUnit tests for cpumask. + * + * Author: Sander Vanheule <sander@svanheule.net> + */ + +#include <kunit/test.h> +#include <linux/cpu.h> +#include <linux/cpumask.h> + +#define MASK_MSG(m) \ + "%s contains %sCPUs %*pbl", #m, (cpumask_weight(m) ? "" : "no "), \ + nr_cpumask_bits, cpumask_bits(m) + +#define EXPECT_FOR_EACH_CPU_EQ(test, mask) \ + do { \ + const cpumask_t *m = (mask); \ + int mask_weight = cpumask_weight(m); \ + int cpu, iter = 0; \ + for_each_cpu(cpu, m) \ + iter++; \ + KUNIT_EXPECT_EQ_MSG((test), mask_weight, iter, MASK_MSG(mask)); \ + } while (0) + +#define EXPECT_FOR_EACH_CPU_NOT_EQ(test, mask) \ + do { \ + const cpumask_t *m = (mask); \ + int mask_weight = cpumask_weight(m); \ + int cpu, iter = 0; \ + for_each_cpu_not(cpu, m) \ + iter++; \ + KUNIT_EXPECT_EQ_MSG((test), nr_cpu_ids - mask_weight, iter, MASK_MSG(mask)); \ + } while (0) + +#define EXPECT_FOR_EACH_CPU_OP_EQ(test, op, mask1, mask2) \ + do { \ + const cpumask_t *m1 = (mask1); \ + const cpumask_t *m2 = (mask2); \ + int weight; \ + int cpu, iter = 0; \ + cpumask_##op(&mask_tmp, m1, m2); \ + weight = cpumask_weight(&mask_tmp); \ + for_each_cpu_##op(cpu, mask1, mask2) \ + iter++; \ + KUNIT_EXPECT_EQ((test), weight, iter); \ + } while (0) + +#define EXPECT_FOR_EACH_CPU_WRAP_EQ(test, mask) \ + do { \ + const cpumask_t *m = (mask); \ + int mask_weight = cpumask_weight(m); \ + int cpu, iter = 0; \ + for_each_cpu_wrap(cpu, m, nr_cpu_ids / 2) \ + iter++; \ + KUNIT_EXPECT_EQ_MSG((test), mask_weight, iter, MASK_MSG(mask)); \ + } while (0) + +#define EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, name) \ + do { \ + int mask_weight = num_##name##_cpus(); \ + int cpu, iter = 0; \ + for_each_##name##_cpu(cpu) \ + iter++; \ + KUNIT_EXPECT_EQ_MSG((test), mask_weight, iter, MASK_MSG(cpu_##name##_mask)); \ + } while (0) + +static cpumask_t mask_empty; +static cpumask_t mask_all; +static cpumask_t mask_tmp; + +static void test_cpumask_weight(struct kunit *test) +{ + KUNIT_EXPECT_TRUE_MSG(test, cpumask_empty(&mask_empty), MASK_MSG(&mask_empty)); + KUNIT_EXPECT_TRUE_MSG(test, cpumask_full(&mask_all), MASK_MSG(&mask_all)); + + KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_weight(&mask_empty), MASK_MSG(&mask_empty)); + KUNIT_EXPECT_EQ_MSG(test, nr_cpu_ids, cpumask_weight(cpu_possible_mask), + MASK_MSG(cpu_possible_mask)); + KUNIT_EXPECT_EQ_MSG(test, nr_cpumask_bits, cpumask_weight(&mask_all), MASK_MSG(&mask_all)); +} + +static void test_cpumask_first(struct kunit *test) +{ + KUNIT_EXPECT_LE_MSG(test, nr_cpu_ids, cpumask_first(&mask_empty), MASK_MSG(&mask_empty)); + KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_first(cpu_possible_mask), MASK_MSG(cpu_possible_mask)); + + KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_first_zero(&mask_empty), MASK_MSG(&mask_empty)); + KUNIT_EXPECT_LE_MSG(test, nr_cpu_ids, cpumask_first_zero(cpu_possible_mask), + MASK_MSG(cpu_possible_mask)); +} + +static void test_cpumask_last(struct kunit *test) +{ + KUNIT_EXPECT_LE_MSG(test, nr_cpumask_bits, cpumask_last(&mask_empty), + MASK_MSG(&mask_empty)); + KUNIT_EXPECT_EQ_MSG(test, nr_cpu_ids - 1, cpumask_last(cpu_possible_mask), + MASK_MSG(cpu_possible_mask)); +} + +static void test_cpumask_next(struct kunit *test) +{ + KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_next_zero(-1, &mask_empty), MASK_MSG(&mask_empty)); + KUNIT_EXPECT_LE_MSG(test, nr_cpu_ids, cpumask_next_zero(-1, cpu_possible_mask), + MASK_MSG(cpu_possible_mask)); + + KUNIT_EXPECT_LE_MSG(test, nr_cpu_ids, cpumask_next(-1, &mask_empty), + MASK_MSG(&mask_empty)); + KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_next(-1, cpu_possible_mask), + MASK_MSG(cpu_possible_mask)); +} + +static void test_cpumask_iterators(struct kunit *test) +{ + EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty); + EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty); + EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty); + EXPECT_FOR_EACH_CPU_OP_EQ(test, and, &mask_empty, &mask_empty); + EXPECT_FOR_EACH_CPU_OP_EQ(test, and, cpu_possible_mask, &mask_empty); + EXPECT_FOR_EACH_CPU_OP_EQ(test, andnot, &mask_empty, &mask_empty); + + EXPECT_FOR_EACH_CPU_EQ(test, cpu_possible_mask); + EXPECT_FOR_EACH_CPU_NOT_EQ(test, cpu_possible_mask); + EXPECT_FOR_EACH_CPU_WRAP_EQ(test, cpu_possible_mask); + EXPECT_FOR_EACH_CPU_OP_EQ(test, and, cpu_possible_mask, cpu_possible_mask); + EXPECT_FOR_EACH_CPU_OP_EQ(test, andnot, cpu_possible_mask, &mask_empty); +} + +static void test_cpumask_iterators_builtin(struct kunit *test) +{ + EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, possible); + + /* Ensure the dynamic masks are stable while running the tests */ + cpu_hotplug_disable(); + + EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, online); + EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, present); + + cpu_hotplug_enable(); +} + +static int test_cpumask_init(struct kunit *test) +{ + cpumask_clear(&mask_empty); + cpumask_setall(&mask_all); + + return 0; +} + +static struct kunit_case test_cpumask_cases[] = { + KUNIT_CASE(test_cpumask_weight), + KUNIT_CASE(test_cpumask_first), + KUNIT_CASE(test_cpumask_last), + KUNIT_CASE(test_cpumask_next), + KUNIT_CASE(test_cpumask_iterators), + KUNIT_CASE(test_cpumask_iterators_builtin), + {} +}; + +static struct kunit_suite test_cpumask_suite = { + .name = "cpumask", + .init = test_cpumask_init, + .test_cases = test_cpumask_cases, +}; +kunit_test_suite(test_cpumask_suite); + +MODULE_LICENSE("GPL"); diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 9ff549f63540..45436bfc6dff 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -2,12 +2,24 @@ menu "Crypto library routines" +config CRYPTO_LIB_UTILS + tristate + config CRYPTO_LIB_AES tristate +config CRYPTO_LIB_AESGCM + tristate + select CRYPTO_LIB_AES + select CRYPTO_LIB_GF128MUL + select CRYPTO_LIB_UTILS + config CRYPTO_LIB_ARC4 tristate +config CRYPTO_LIB_GF128MUL + tristate + config CRYPTO_ARCH_HAVE_LIB_BLAKE2S bool help @@ -33,7 +45,7 @@ config CRYPTO_ARCH_HAVE_LIB_CHACHA config CRYPTO_LIB_CHACHA_GENERIC tristate - select XOR_BLOCKS + select CRYPTO_LIB_UTILS help This symbol can be depended upon by arch implementations of the ChaCha library interface that require the generic code as a @@ -43,7 +55,6 @@ config CRYPTO_LIB_CHACHA_GENERIC config CRYPTO_LIB_CHACHA tristate "ChaCha library interface" - depends on CRYPTO depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n help @@ -71,7 +82,7 @@ config CRYPTO_LIB_CURVE25519 tristate "Curve25519 scalar multiplication library" depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519 select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n - select LIB_MEMNEQ + select CRYPTO_LIB_UTILS help Enable the Curve25519 library interface. This interface may be fulfilled by either the generic implementation or an arch-specific diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 919cbb2c220d..6ec2d4543d9c 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -1,5 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_CRYPTO_LIB_UTILS) += libcryptoutils.o +libcryptoutils-y := memneq.o utils.o + # chacha is used by the /dev/random driver which is always builtin obj-y += chacha.o obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o @@ -7,9 +10,14 @@ obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o libaes-y := aes.o +obj-$(CONFIG_CRYPTO_LIB_AESGCM) += libaesgcm.o +libaesgcm-y := aesgcm.o + obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o libarc4-y := arc4.o +obj-$(CONFIG_CRYPTO_LIB_GF128MUL) += gf128mul.o + # blake2s is used by the /dev/random driver which is always builtin obj-y += libblake2s.o libblake2s-y := blake2s.o diff --git a/lib/crypto/aesgcm.c b/lib/crypto/aesgcm.c new file mode 100644 index 000000000000..c632d6e17af8 --- /dev/null +++ b/lib/crypto/aesgcm.c @@ -0,0 +1,727 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Minimal library implementation of GCM + * + * Copyright 2022 Google LLC + */ + +#include <linux/module.h> + +#include <crypto/algapi.h> +#include <crypto/gcm.h> +#include <crypto/ghash.h> + +#include <asm/irqflags.h> + +static void aesgcm_encrypt_block(const struct crypto_aes_ctx *ctx, void *dst, + const void *src) +{ + unsigned long flags; + + /* + * In AES-GCM, both the GHASH key derivation and the CTR mode + * encryption operate on known plaintext, making them susceptible to + * timing attacks on the encryption key. The AES library already + * mitigates this risk to some extent by pulling the entire S-box into + * the caches before doing any substitutions, but this strategy is more + * effective when running with interrupts disabled. + */ + local_irq_save(flags); + aes_encrypt(ctx, dst, src); + local_irq_restore(flags); +} + +/** + * aesgcm_expandkey - Expands the AES and GHASH keys for the AES-GCM key + * schedule + * + * @ctx: The data structure that will hold the AES-GCM key schedule + * @key: The AES encryption input key + * @keysize: The length in bytes of the input key + * @authsize: The size in bytes of the GCM authentication tag + * + * Returns: 0 on success, or -EINVAL if @keysize or @authsize contain values + * that are not permitted by the GCM specification. + */ +int aesgcm_expandkey(struct aesgcm_ctx *ctx, const u8 *key, + unsigned int keysize, unsigned int authsize) +{ + u8 kin[AES_BLOCK_SIZE] = {}; + int ret; + + ret = crypto_gcm_check_authsize(authsize) ?: + aes_expandkey(&ctx->aes_ctx, key, keysize); + if (ret) + return ret; + + ctx->authsize = authsize; + aesgcm_encrypt_block(&ctx->aes_ctx, &ctx->ghash_key, kin); + + return 0; +} +EXPORT_SYMBOL(aesgcm_expandkey); + +static void aesgcm_ghash(be128 *ghash, const be128 *key, const void *src, + int len) +{ + while (len > 0) { + crypto_xor((u8 *)ghash, src, min(len, GHASH_BLOCK_SIZE)); + gf128mul_lle(ghash, key); + + src += GHASH_BLOCK_SIZE; + len -= GHASH_BLOCK_SIZE; + } +} + +static void aesgcm_mac(const struct aesgcm_ctx *ctx, const u8 *src, int src_len, + const u8 *assoc, int assoc_len, __be32 *ctr, u8 *authtag) +{ + be128 tail = { cpu_to_be64(assoc_len * 8), cpu_to_be64(src_len * 8) }; + u8 buf[AES_BLOCK_SIZE]; + be128 ghash = {}; + + aesgcm_ghash(&ghash, &ctx->ghash_key, assoc, assoc_len); + aesgcm_ghash(&ghash, &ctx->ghash_key, src, src_len); + aesgcm_ghash(&ghash, &ctx->ghash_key, &tail, sizeof(tail)); + + ctr[3] = cpu_to_be32(1); + aesgcm_encrypt_block(&ctx->aes_ctx, buf, ctr); + crypto_xor_cpy(authtag, buf, (u8 *)&ghash, ctx->authsize); + + memzero_explicit(&ghash, sizeof(ghash)); + memzero_explicit(buf, sizeof(buf)); +} + +static void aesgcm_crypt(const struct aesgcm_ctx *ctx, u8 *dst, const u8 *src, + int len, __be32 *ctr) +{ + u8 buf[AES_BLOCK_SIZE]; + unsigned int n = 2; + + while (len > 0) { + /* + * The counter increment below must not result in overflow or + * carry into the next 32-bit word, as this could result in + * inadvertent IV reuse, which must be avoided at all cost for + * stream ciphers such as AES-CTR. Given the range of 'int + * len', this cannot happen, so no explicit test is necessary. + */ + ctr[3] = cpu_to_be32(n++); + aesgcm_encrypt_block(&ctx->aes_ctx, buf, ctr); + crypto_xor_cpy(dst, src, buf, min(len, AES_BLOCK_SIZE)); + + dst += AES_BLOCK_SIZE; + src += AES_BLOCK_SIZE; + len -= AES_BLOCK_SIZE; + } + memzero_explicit(buf, sizeof(buf)); +} + +/** + * aesgcm_encrypt - Perform AES-GCM encryption on a block of data + * + * @ctx: The AES-GCM key schedule + * @dst: Pointer to the ciphertext output buffer + * @src: Pointer the plaintext (may equal @dst for encryption in place) + * @crypt_len: The size in bytes of the plaintext and ciphertext. + * @assoc: Pointer to the associated data, + * @assoc_len: The size in bytes of the associated data + * @iv: The initialization vector (IV) to use for this block of data + * (must be 12 bytes in size as per the GCM spec recommendation) + * @authtag: The address of the buffer in memory where the authentication + * tag should be stored. The buffer is assumed to have space for + * @ctx->authsize bytes. + */ +void aesgcm_encrypt(const struct aesgcm_ctx *ctx, u8 *dst, const u8 *src, + int crypt_len, const u8 *assoc, int assoc_len, + const u8 iv[GCM_AES_IV_SIZE], u8 *authtag) +{ + __be32 ctr[4]; + + memcpy(ctr, iv, GCM_AES_IV_SIZE); + + aesgcm_crypt(ctx, dst, src, crypt_len, ctr); + aesgcm_mac(ctx, dst, crypt_len, assoc, assoc_len, ctr, authtag); +} +EXPORT_SYMBOL(aesgcm_encrypt); + +/** + * aesgcm_decrypt - Perform AES-GCM decryption on a block of data + * + * @ctx: The AES-GCM key schedule + * @dst: Pointer to the plaintext output buffer + * @src: Pointer the ciphertext (may equal @dst for decryption in place) + * @crypt_len: The size in bytes of the plaintext and ciphertext. + * @assoc: Pointer to the associated data, + * @assoc_len: The size in bytes of the associated data + * @iv: The initialization vector (IV) to use for this block of data + * (must be 12 bytes in size as per the GCM spec recommendation) + * @authtag: The address of the buffer in memory where the authentication + * tag is stored. + * + * Returns: true on success, or false if the ciphertext failed authentication. + * On failure, no plaintext will be returned. + */ +bool __must_check aesgcm_decrypt(const struct aesgcm_ctx *ctx, u8 *dst, + const u8 *src, int crypt_len, const u8 *assoc, + int assoc_len, const u8 iv[GCM_AES_IV_SIZE], + const u8 *authtag) +{ + u8 tagbuf[AES_BLOCK_SIZE]; + __be32 ctr[4]; + + memcpy(ctr, iv, GCM_AES_IV_SIZE); + + aesgcm_mac(ctx, src, crypt_len, assoc, assoc_len, ctr, tagbuf); + if (crypto_memneq(authtag, tagbuf, ctx->authsize)) { + memzero_explicit(tagbuf, sizeof(tagbuf)); + return false; + } + aesgcm_crypt(ctx, dst, src, crypt_len, ctr); + return true; +} +EXPORT_SYMBOL(aesgcm_decrypt); + +MODULE_DESCRIPTION("Generic AES-GCM library"); +MODULE_AUTHOR("Ard Biesheuvel <ardb@kernel.org>"); +MODULE_LICENSE("GPL"); + +#ifndef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS + +/* + * Test code below. Vectors taken from crypto/testmgr.h + */ + +static const u8 __initconst ctext0[16] = + "\x58\xe2\xfc\xce\xfa\x7e\x30\x61" + "\x36\x7f\x1d\x57\xa4\xe7\x45\x5a"; + +static const u8 __initconst ptext1[16]; + +static const u8 __initconst ctext1[32] = + "\x03\x88\xda\xce\x60\xb6\xa3\x92" + "\xf3\x28\xc2\xb9\x71\xb2\xfe\x78" + "\xab\x6e\x47\xd4\x2c\xec\x13\xbd" + "\xf5\x3a\x67\xb2\x12\x57\xbd\xdf"; + +static const u8 __initconst ptext2[64] = + "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39\x1a\xaf\xd2\x55"; + +static const u8 __initconst ctext2[80] = + "\x42\x83\x1e\xc2\x21\x77\x74\x24" + "\x4b\x72\x21\xb7\x84\xd0\xd4\x9c" + "\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0" + "\x35\xc1\x7e\x23\x29\xac\xa1\x2e" + "\x21\xd5\x14\xb2\x54\x66\x93\x1c" + "\x7d\x8f\x6a\x5a\xac\x84\xaa\x05" + "\x1b\xa3\x0b\x39\x6a\x0a\xac\x97" + "\x3d\x58\xe0\x91\x47\x3f\x59\x85" + "\x4d\x5c\x2a\xf3\x27\xcd\x64\xa6" + "\x2c\xf3\x5a\xbd\x2b\xa6\xfa\xb4"; + +static const u8 __initconst ptext3[60] = + "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39"; + +static const u8 __initconst ctext3[76] = + "\x42\x83\x1e\xc2\x21\x77\x74\x24" + "\x4b\x72\x21\xb7\x84\xd0\xd4\x9c" + "\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0" + "\x35\xc1\x7e\x23\x29\xac\xa1\x2e" + "\x21\xd5\x14\xb2\x54\x66\x93\x1c" + "\x7d\x8f\x6a\x5a\xac\x84\xaa\x05" + "\x1b\xa3\x0b\x39\x6a\x0a\xac\x97" + "\x3d\x58\xe0\x91" + "\x5b\xc9\x4f\xbc\x32\x21\xa5\xdb" + "\x94\xfa\xe9\x5a\xe7\x12\x1a\x47"; + +static const u8 __initconst ctext4[16] = + "\xcd\x33\xb2\x8a\xc7\x73\xf7\x4b" + "\xa0\x0e\xd1\xf3\x12\x57\x24\x35"; + +static const u8 __initconst ctext5[32] = + "\x98\xe7\x24\x7c\x07\xf0\xfe\x41" + "\x1c\x26\x7e\x43\x84\xb0\xf6\x00" + "\x2f\xf5\x8d\x80\x03\x39\x27\xab" + "\x8e\xf4\xd4\x58\x75\x14\xf0\xfb"; + +static const u8 __initconst ptext6[64] = + "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39\x1a\xaf\xd2\x55"; + +static const u8 __initconst ctext6[80] = + "\x39\x80\xca\x0b\x3c\x00\xe8\x41" + "\xeb\x06\xfa\xc4\x87\x2a\x27\x57" + "\x85\x9e\x1c\xea\xa6\xef\xd9\x84" + "\x62\x85\x93\xb4\x0c\xa1\xe1\x9c" + "\x7d\x77\x3d\x00\xc1\x44\xc5\x25" + "\xac\x61\x9d\x18\xc8\x4a\x3f\x47" + "\x18\xe2\x44\x8b\x2f\xe3\x24\xd9" + "\xcc\xda\x27\x10\xac\xad\xe2\x56" + "\x99\x24\xa7\xc8\x58\x73\x36\xbf" + "\xb1\x18\x02\x4d\xb8\x67\x4a\x14"; + +static const u8 __initconst ctext7[16] = + "\x53\x0f\x8a\xfb\xc7\x45\x36\xb9" + "\xa9\x63\xb4\xf1\xc4\xcb\x73\x8b"; + +static const u8 __initconst ctext8[32] = + "\xce\xa7\x40\x3d\x4d\x60\x6b\x6e" + "\x07\x4e\xc5\xd3\xba\xf3\x9d\x18" + "\xd0\xd1\xc8\xa7\x99\x99\x6b\xf0" + "\x26\x5b\x98\xb5\xd4\x8a\xb9\x19"; + +static const u8 __initconst ptext9[64] = + "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39\x1a\xaf\xd2\x55"; + +static const u8 __initconst ctext9[80] = + "\x52\x2d\xc1\xf0\x99\x56\x7d\x07" + "\xf4\x7f\x37\xa3\x2a\x84\x42\x7d" + "\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9" + "\x75\x98\xa2\xbd\x25\x55\xd1\xaa" + "\x8c\xb0\x8e\x48\x59\x0d\xbb\x3d" + "\xa7\xb0\x8b\x10\x56\x82\x88\x38" + "\xc5\xf6\x1e\x63\x93\xba\x7a\x0a" + "\xbc\xc9\xf6\x62\x89\x80\x15\xad" + "\xb0\x94\xda\xc5\xd9\x34\x71\xbd" + "\xec\x1a\x50\x22\x70\xe3\xcc\x6c"; + +static const u8 __initconst ptext10[60] = + "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39"; + +static const u8 __initconst ctext10[76] = + "\x52\x2d\xc1\xf0\x99\x56\x7d\x07" + "\xf4\x7f\x37\xa3\x2a\x84\x42\x7d" + "\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9" + "\x75\x98\xa2\xbd\x25\x55\xd1\xaa" + "\x8c\xb0\x8e\x48\x59\x0d\xbb\x3d" + "\xa7\xb0\x8b\x10\x56\x82\x88\x38" + "\xc5\xf6\x1e\x63\x93\xba\x7a\x0a" + "\xbc\xc9\xf6\x62" + "\x76\xfc\x6e\xce\x0f\x4e\x17\x68" + "\xcd\xdf\x88\x53\xbb\x2d\x55\x1b"; + +static const u8 __initconst ptext11[60] = + "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39"; + +static const u8 __initconst ctext11[76] = + "\x39\x80\xca\x0b\x3c\x00\xe8\x41" + "\xeb\x06\xfa\xc4\x87\x2a\x27\x57" + "\x85\x9e\x1c\xea\xa6\xef\xd9\x84" + "\x62\x85\x93\xb4\x0c\xa1\xe1\x9c" + "\x7d\x77\x3d\x00\xc1\x44\xc5\x25" + "\xac\x61\x9d\x18\xc8\x4a\x3f\x47" + "\x18\xe2\x44\x8b\x2f\xe3\x24\xd9" + "\xcc\xda\x27\x10" + "\x25\x19\x49\x8e\x80\xf1\x47\x8f" + "\x37\xba\x55\xbd\x6d\x27\x61\x8c"; + +static const u8 __initconst ptext12[719] = + "\x42\xc1\xcc\x08\x48\x6f\x41\x3f" + "\x2f\x11\x66\x8b\x2a\x16\xf0\xe0" + "\x58\x83\xf0\xc3\x70\x14\xc0\x5b" + "\x3f\xec\x1d\x25\x3c\x51\xd2\x03" + "\xcf\x59\x74\x1f\xb2\x85\xb4\x07" + "\xc6\x6a\x63\x39\x8a\x5b\xde\xcb" + "\xaf\x08\x44\xbd\x6f\x91\x15\xe1" + "\xf5\x7a\x6e\x18\xbd\xdd\x61\x50" + "\x59\xa9\x97\xab\xbb\x0e\x74\x5c" + "\x00\xa4\x43\x54\x04\x54\x9b\x3b" + "\x77\xec\xfd\x5c\xa6\xe8\x7b\x08" + "\xae\xe6\x10\x3f\x32\x65\xd1\xfc" + "\xa4\x1d\x2c\x31\xfb\x33\x7a\xb3" + "\x35\x23\xf4\x20\x41\xd4\xad\x82" + "\x8b\xa4\xad\x96\x1c\x20\x53\xbe" + "\x0e\xa6\xf4\xdc\x78\x49\x3e\x72" + "\xb1\xa9\xb5\x83\xcb\x08\x54\xb7" + "\xad\x49\x3a\xae\x98\xce\xa6\x66" + "\x10\x30\x90\x8c\x55\x83\xd7\x7c" + "\x8b\xe6\x53\xde\xd2\x6e\x18\x21" + "\x01\x52\xd1\x9f\x9d\xbb\x9c\x73" + "\x57\xcc\x89\x09\x75\x9b\x78\x70" + "\xed\x26\x97\x4d\xb4\xe4\x0c\xa5" + "\xfa\x70\x04\x70\xc6\x96\x1c\x7d" + "\x54\x41\x77\xa8\xe3\xb0\x7e\x96" + "\x82\xd9\xec\xa2\x87\x68\x55\xf9" + "\x8f\x9e\x73\x43\x47\x6a\x08\x36" + "\x93\x67\xa8\x2d\xde\xac\x41\xa9" + "\x5c\x4d\x73\x97\x0f\x70\x68\xfa" + "\x56\x4d\x00\xc2\x3b\x1f\xc8\xb9" + "\x78\x1f\x51\x07\xe3\x9a\x13\x4e" + "\xed\x2b\x2e\xa3\xf7\x44\xb2\xe7" + "\xab\x19\x37\xd9\xba\x76\x5e\xd2" + "\xf2\x53\x15\x17\x4c\x6b\x16\x9f" + "\x02\x66\x49\xca\x7c\x91\x05\xf2" + "\x45\x36\x1e\xf5\x77\xad\x1f\x46" + "\xa8\x13\xfb\x63\xb6\x08\x99\x63" + "\x82\xa2\xed\xb3\xac\xdf\x43\x19" + "\x45\xea\x78\x73\xd9\xb7\x39\x11" + "\xa3\x13\x7c\xf8\x3f\xf7\xad\x81" + "\x48\x2f\xa9\x5c\x5f\xa0\xf0\x79" + "\xa4\x47\x7d\x80\x20\x26\xfd\x63" + "\x0a\xc7\x7e\x6d\x75\x47\xff\x76" + "\x66\x2e\x8a\x6c\x81\x35\xaf\x0b" + "\x2e\x6a\x49\x60\xc1\x10\xe1\xe1" + "\x54\x03\xa4\x09\x0c\x37\x7a\x15" + "\x23\x27\x5b\x8b\x4b\xa5\x64\x97" + "\xae\x4a\x50\x73\x1f\x66\x1c\x5c" + "\x03\x25\x3c\x8d\x48\x58\x71\x34" + "\x0e\xec\x4e\x55\x1a\x03\x6a\xe5" + "\xb6\x19\x2b\x84\x2a\x20\xd1\xea" + "\x80\x6f\x96\x0e\x05\x62\xc7\x78" + "\x87\x79\x60\x38\x46\xb4\x25\x57" + "\x6e\x16\x63\xf8\xad\x6e\xd7\x42" + "\x69\xe1\x88\xef\x6e\xd5\xb4\x9a" + "\x3c\x78\x6c\x3b\xe5\xa0\x1d\x22" + "\x86\x5c\x74\x3a\xeb\x24\x26\xc7" + "\x09\xfc\x91\x96\x47\x87\x4f\x1a" + "\xd6\x6b\x2c\x18\x47\xc0\xb8\x24" + "\xa8\x5a\x4a\x9e\xcb\x03\xe7\x2a" + "\x09\xe6\x4d\x9c\x6d\x86\x60\xf5" + "\x2f\x48\x69\x37\x9f\xf2\xd2\xcb" + "\x0e\x5a\xdd\x6e\x8a\xfb\x6a\xfe" + "\x0b\x63\xde\x87\x42\x79\x8a\x68" + "\x51\x28\x9b\x7a\xeb\xaf\xb8\x2f" + "\x9d\xd1\xc7\x45\x90\x08\xc9\x83" + "\xe9\x83\x84\xcb\x28\x69\x09\x69" + "\xce\x99\x46\x00\x54\xcb\xd8\x38" + "\xf9\x53\x4a\xbf\x31\xce\x57\x15" + "\x33\xfa\x96\x04\x33\x42\xe3\xc0" + "\xb7\x54\x4a\x65\x7a\x7c\x02\xe6" + "\x19\x95\xd0\x0e\x82\x07\x63\xf9" + "\xe1\x2b\x2a\xfc\x55\x92\x52\xc9" + "\xb5\x9f\x23\x28\x60\xe7\x20\x51" + "\x10\xd3\xed\x6d\x9b\xab\xb8\xe2" + "\x5d\x9a\x34\xb3\xbe\x9c\x64\xcb" + "\x78\xc6\x91\x22\x40\x91\x80\xbe" + "\xd7\x78\x5c\x0e\x0a\xdc\x08\xe9" + "\x67\x10\xa4\x83\x98\x79\x23\xe7" + "\x92\xda\xa9\x22\x16\xb1\xe7\x78" + "\xa3\x1c\x6c\x8f\x35\x7c\x4d\x37" + "\x2f\x6e\x0b\x50\x5c\x34\xb9\xf9" + "\xe6\x3d\x91\x0d\x32\x95\xaa\x3d" + "\x48\x11\x06\xbb\x2d\xf2\x63\x88" + "\x3f\x73\x09\xe2\x45\x56\x31\x51" + "\xfa\x5e\x4e\x62\xf7\x90\xf9\xa9" + "\x7d\x7b\x1b\xb1\xc8\x26\x6e\x66" + "\xf6\x90\x9a\x7f\xf2\x57\xcc\x23" + "\x59\xfa\xfa\xaa\x44\x04\x01\xa7" + "\xa4\x78\xdb\x74\x3d\x8b\xb5"; + +static const u8 __initconst ctext12[735] = + "\x84\x0b\xdb\xd5\xb7\xa8\xfe\x20" + "\xbb\xb1\x12\x7f\x41\xea\xb3\xc0" + "\xa2\xb4\x37\x19\x11\x58\xb6\x0b" + "\x4c\x1d\x38\x05\x54\xd1\x16\x73" + "\x8e\x1c\x20\x90\xa2\x9a\xb7\x74" + "\x47\xe6\xd8\xfc\x18\x3a\xb4\xea" + "\xd5\x16\x5a\x2c\x53\x01\x46\xb3" + "\x18\x33\x74\x6c\x50\xf2\xe8\xc0" + "\x73\xda\x60\x22\xeb\xe3\xe5\x9b" + "\x20\x93\x6c\x4b\x37\x99\xb8\x23" + "\x3b\x4e\xac\xe8\x5b\xe8\x0f\xb7" + "\xc3\x8f\xfb\x4a\x37\xd9\x39\x95" + "\x34\xf1\xdb\x8f\x71\xd9\xc7\x0b" + "\x02\xf1\x63\xfc\x9b\xfc\xc5\xab" + "\xb9\x14\x13\x21\xdf\xce\xaa\x88" + "\x44\x30\x1e\xce\x26\x01\x92\xf8" + "\x9f\x00\x4b\x0c\x4b\xf7\x5f\xe0" + "\x89\xca\x94\x66\x11\x21\x97\xca" + "\x3e\x83\x74\x2d\xdb\x4d\x11\xeb" + "\x97\xc2\x14\xff\x9e\x1e\xa0\x6b" + "\x08\xb4\x31\x2b\x85\xc6\x85\x6c" + "\x90\xec\x39\xc0\xec\xb3\xb5\x4e" + "\xf3\x9c\xe7\x83\x3a\x77\x0a\xf4" + "\x56\xfe\xce\x18\x33\x6d\x0b\x2d" + "\x33\xda\xc8\x05\x5c\xb4\x09\x2a" + "\xde\x6b\x52\x98\x01\xef\x36\x3d" + "\xbd\xf9\x8f\xa8\x3e\xaa\xcd\xd1" + "\x01\x2d\x42\x49\xc3\xb6\x84\xbb" + "\x48\x96\xe0\x90\x93\x6c\x48\x64" + "\xd4\xfa\x7f\x93\x2c\xa6\x21\xc8" + "\x7a\x23\x7b\xaa\x20\x56\x12\xae" + "\x16\x9d\x94\x0f\x54\xa1\xec\xca" + "\x51\x4e\xf2\x39\xf4\xf8\x5f\x04" + "\x5a\x0d\xbf\xf5\x83\xa1\x15\xe1" + "\xf5\x3c\xd8\x62\xa3\xed\x47\x89" + "\x85\x4c\xe5\xdb\xac\x9e\x17\x1d" + "\x0c\x09\xe3\x3e\x39\x5b\x4d\x74" + "\x0e\xf5\x34\xee\x70\x11\x4c\xfd" + "\xdb\x34\xb1\xb5\x10\x3f\x73\xb7" + "\xf5\xfa\xed\xb0\x1f\xa5\xcd\x3c" + "\x8d\x35\x83\xd4\x11\x44\x6e\x6c" + "\x5b\xe0\x0e\x69\xa5\x39\xe5\xbb" + "\xa9\x57\x24\x37\xe6\x1f\xdd\xcf" + "\x16\x2a\x13\xf9\x6a\x2d\x90\xa0" + "\x03\x60\x7a\xed\x69\xd5\x00\x8b" + "\x7e\x4f\xcb\xb9\xfa\x91\xb9\x37" + "\xc1\x26\xce\x90\x97\x22\x64\x64" + "\xc1\x72\x43\x1b\xf6\xac\xc1\x54" + "\x8a\x10\x9c\xdd\x8d\xd5\x8e\xb2" + "\xe4\x85\xda\xe0\x20\x5f\xf4\xb4" + "\x15\xb5\xa0\x8d\x12\x74\x49\x23" + "\x3a\xdf\x4a\xd3\xf0\x3b\x89\xeb" + "\xf8\xcc\x62\x7b\xfb\x93\x07\x41" + "\x61\x26\x94\x58\x70\xa6\x3c\xe4" + "\xff\x58\xc4\x13\x3d\xcb\x36\x6b" + "\x32\xe5\xb2\x6d\x03\x74\x6f\x76" + "\x93\x77\xde\x48\xc4\xfa\x30\x4a" + "\xda\x49\x80\x77\x0f\x1c\xbe\x11" + "\xc8\x48\xb1\xe5\xbb\xf2\x8a\xe1" + "\x96\x2f\x9f\xd1\x8e\x8a\x5c\xe2" + "\xf7\xd7\xd8\x54\xf3\x3f\xc4\x91" + "\xb8\xfb\x86\xdc\x46\x24\x91\x60" + "\x6c\x2f\xc9\x41\x37\x51\x49\x54" + "\x09\x81\x21\xf3\x03\x9f\x2b\xe3" + "\x1f\x39\x63\xaf\xf4\xd7\x53\x60" + "\xa7\xc7\x54\xf9\xee\xb1\xb1\x7d" + "\x75\x54\x65\x93\xfe\xb1\x68\x6b" + "\x57\x02\xf9\xbb\x0e\xf9\xf8\xbf" + "\x01\x12\x27\xb4\xfe\xe4\x79\x7a" + "\x40\x5b\x51\x4b\xdf\x38\xec\xb1" + "\x6a\x56\xff\x35\x4d\x42\x33\xaa" + "\x6f\x1b\xe4\xdc\xe0\xdb\x85\x35" + "\x62\x10\xd4\xec\xeb\xc5\x7e\x45" + "\x1c\x6f\x17\xca\x3b\x8e\x2d\x66" + "\x4f\x4b\x36\x56\xcd\x1b\x59\xaa" + "\xd2\x9b\x17\xb9\x58\xdf\x7b\x64" + "\x8a\xff\x3b\x9c\xa6\xb5\x48\x9e" + "\xaa\xe2\x5d\x09\x71\x32\x5f\xb6" + "\x29\xbe\xe7\xc7\x52\x7e\x91\x82" + "\x6b\x6d\x33\xe1\x34\x06\x36\x21" + "\x5e\xbe\x1e\x2f\x3e\xc1\xfb\xea" + "\x49\x2c\xb5\xca\xf7\xb0\x37\xea" + "\x1f\xed\x10\x04\xd9\x48\x0d\x1a" + "\x1c\xfb\xe7\x84\x0e\x83\x53\x74" + "\xc7\x65\xe2\x5c\xe5\xba\x73\x4c" + "\x0e\xe1\xb5\x11\x45\x61\x43\x46" + "\xaa\x25\x8f\xbd\x85\x08\xfa\x4c" + "\x15\xc1\xc0\xd8\xf5\xdc\x16\xbb" + "\x7b\x1d\xe3\x87\x57\xa7\x2a\x1d" + "\x38\x58\x9e\x8a\x43\xdc\x57" + "\xd1\x81\x7d\x2b\xe9\xff\x99\x3a" + "\x4b\x24\x52\x58\x55\xe1\x49\x14"; + +static struct { + const u8 *ptext; + const u8 *ctext; + + u8 key[AES_MAX_KEY_SIZE]; + u8 iv[GCM_AES_IV_SIZE]; + u8 assoc[20]; + + int klen; + int clen; + int plen; + int alen; +} const aesgcm_tv[] __initconst = { + { /* From McGrew & Viega - http://citeseer.ist.psu.edu/656989.html */ + .klen = 16, + .ctext = ctext0, + .clen = sizeof(ctext0), + }, { + .klen = 16, + .ptext = ptext1, + .plen = sizeof(ptext1), + .ctext = ctext1, + .clen = sizeof(ctext1), + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08", + .klen = 16, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = ptext2, + .plen = sizeof(ptext2), + .ctext = ctext2, + .clen = sizeof(ctext2), + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08", + .klen = 16, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = ptext3, + .plen = sizeof(ptext3), + .assoc = "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xab\xad\xda\xd2", + .alen = 20, + .ctext = ctext3, + .clen = sizeof(ctext3), + }, { + .klen = 24, + .ctext = ctext4, + .clen = sizeof(ctext4), + }, { + .klen = 24, + .ptext = ptext1, + .plen = sizeof(ptext1), + .ctext = ctext5, + .clen = sizeof(ctext5), + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\xfe\xff\xe9\x92\x86\x65\x73\x1c", + .klen = 24, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = ptext6, + .plen = sizeof(ptext6), + .ctext = ctext6, + .clen = sizeof(ctext6), + }, { + .klen = 32, + .ctext = ctext7, + .clen = sizeof(ctext7), + }, { + .klen = 32, + .ptext = ptext1, + .plen = sizeof(ptext1), + .ctext = ctext8, + .clen = sizeof(ctext8), + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08", + .klen = 32, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = ptext9, + .plen = sizeof(ptext9), + .ctext = ctext9, + .clen = sizeof(ctext9), + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08", + .klen = 32, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = ptext10, + .plen = sizeof(ptext10), + .assoc = "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xab\xad\xda\xd2", + .alen = 20, + .ctext = ctext10, + .clen = sizeof(ctext10), + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\xfe\xff\xe9\x92\x86\x65\x73\x1c", + .klen = 24, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = ptext11, + .plen = sizeof(ptext11), + .assoc = "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xab\xad\xda\xd2", + .alen = 20, + .ctext = ctext11, + .clen = sizeof(ctext11), + }, { + .key = "\x62\x35\xf8\x95\xfc\xa5\xeb\xf6" + "\x0e\x92\x12\x04\xd3\xa1\x3f\x2e" + "\x8b\x32\xcf\xe7\x44\xed\x13\x59" + "\x04\x38\x77\xb0\xb9\xad\xb4\x38", + .klen = 32, + .iv = "\x00\xff\xff\xff\xff\x00\x00\xff" + "\xff\xff\x00\xff", + .ptext = ptext12, + .plen = sizeof(ptext12), + .ctext = ctext12, + .clen = sizeof(ctext12), + } +}; + +static int __init libaesgcm_init(void) +{ + for (int i = 0; i < ARRAY_SIZE(aesgcm_tv); i++) { + u8 tagbuf[AES_BLOCK_SIZE]; + int plen = aesgcm_tv[i].plen; + struct aesgcm_ctx ctx; + u8 buf[sizeof(ptext12)]; + + if (aesgcm_expandkey(&ctx, aesgcm_tv[i].key, aesgcm_tv[i].klen, + aesgcm_tv[i].clen - plen)) { + pr_err("aesgcm_expandkey() failed on vector %d\n", i); + return -ENODEV; + } + + if (!aesgcm_decrypt(&ctx, buf, aesgcm_tv[i].ctext, plen, + aesgcm_tv[i].assoc, aesgcm_tv[i].alen, + aesgcm_tv[i].iv, aesgcm_tv[i].ctext + plen) + || memcmp(buf, aesgcm_tv[i].ptext, plen)) { + pr_err("aesgcm_decrypt() #1 failed on vector %d\n", i); + return -ENODEV; + } + + /* encrypt in place */ + aesgcm_encrypt(&ctx, buf, buf, plen, aesgcm_tv[i].assoc, + aesgcm_tv[i].alen, aesgcm_tv[i].iv, tagbuf); + if (memcmp(buf, aesgcm_tv[i].ctext, plen)) { + pr_err("aesgcm_encrypt() failed on vector %d\n", i); + return -ENODEV; + } + + /* decrypt in place */ + if (!aesgcm_decrypt(&ctx, buf, buf, plen, aesgcm_tv[i].assoc, + aesgcm_tv[i].alen, aesgcm_tv[i].iv, tagbuf) + || memcmp(buf, aesgcm_tv[i].ptext, plen)) { + pr_err("aesgcm_decrypt() #2 failed on vector %d\n", i); + return -ENODEV; + } + } + return 0; +} +module_init(libaesgcm_init); + +static void __exit libaesgcm_exit(void) +{ +} +module_exit(libaesgcm_exit); +#endif diff --git a/lib/crypto/gf128mul.c b/lib/crypto/gf128mul.c new file mode 100644 index 000000000000..8f8c45e0cdcf --- /dev/null +++ b/lib/crypto/gf128mul.c @@ -0,0 +1,436 @@ +/* gf128mul.c - GF(2^128) multiplication functions + * + * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. + * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org> + * + * Based on Dr Brian Gladman's (GPL'd) work published at + * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php + * See the original copyright notice below. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 31/01/2006 + + This file provides fast multiplication in GF(2^128) as required by several + cryptographic authentication modes +*/ + +#include <crypto/gf128mul.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> + +#define gf128mul_dat(q) { \ + q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\ + q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\ + q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\ + q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\ + q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\ + q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\ + q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\ + q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\ + q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\ + q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\ + q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\ + q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\ + q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\ + q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\ + q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\ + q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\ + q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\ + q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\ + q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\ + q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\ + q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\ + q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\ + q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\ + q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\ + q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\ + q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\ + q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\ + q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\ + q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\ + q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\ + q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\ + q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \ +} + +/* + * Given a value i in 0..255 as the byte overflow when a field element + * in GF(2^128) is multiplied by x^8, the following macro returns the + * 16-bit value that must be XOR-ed into the low-degree end of the + * product to reduce it modulo the polynomial x^128 + x^7 + x^2 + x + 1. + * + * There are two versions of the macro, and hence two tables: one for + * the "be" convention where the highest-order bit is the coefficient of + * the highest-degree polynomial term, and one for the "le" convention + * where the highest-order bit is the coefficient of the lowest-degree + * polynomial term. In both cases the values are stored in CPU byte + * endianness such that the coefficients are ordered consistently across + * bytes, i.e. in the "be" table bits 15..0 of the stored value + * correspond to the coefficients of x^15..x^0, and in the "le" table + * bits 15..0 correspond to the coefficients of x^0..x^15. + * + * Therefore, provided that the appropriate byte endianness conversions + * are done by the multiplication functions (and these must be in place + * anyway to support both little endian and big endian CPUs), the "be" + * table can be used for multiplications of both "bbe" and "ble" + * elements, and the "le" table can be used for multiplications of both + * "lle" and "lbe" elements. + */ + +#define xda_be(i) ( \ + (i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \ + (i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \ + (i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \ + (i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \ +) + +#define xda_le(i) ( \ + (i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \ + (i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \ + (i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \ + (i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \ +) + +static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le); +static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be); + +/* + * The following functions multiply a field element by x^8 in + * the polynomial field representation. They use 64-bit word operations + * to gain speed but compensate for machine endianness and hence work + * correctly on both styles of machine. + */ + +static void gf128mul_x8_lle(be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + u64 _tt = gf128mul_table_le[b & 0xff]; + + x->b = cpu_to_be64((b >> 8) | (a << 56)); + x->a = cpu_to_be64((a >> 8) ^ (_tt << 48)); +} + +/* time invariant version of gf128mul_x8_lle */ +static void gf128mul_x8_lle_ti(be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + u64 _tt = xda_le(b & 0xff); /* avoid table lookup */ + + x->b = cpu_to_be64((b >> 8) | (a << 56)); + x->a = cpu_to_be64((a >> 8) ^ (_tt << 48)); +} + +static void gf128mul_x8_bbe(be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + u64 _tt = gf128mul_table_be[a >> 56]; + + x->a = cpu_to_be64((a << 8) | (b >> 56)); + x->b = cpu_to_be64((b << 8) ^ _tt); +} + +void gf128mul_x8_ble(le128 *r, const le128 *x) +{ + u64 a = le64_to_cpu(x->a); + u64 b = le64_to_cpu(x->b); + u64 _tt = gf128mul_table_be[a >> 56]; + + r->a = cpu_to_le64((a << 8) | (b >> 56)); + r->b = cpu_to_le64((b << 8) ^ _tt); +} +EXPORT_SYMBOL(gf128mul_x8_ble); + +void gf128mul_lle(be128 *r, const be128 *b) +{ + /* + * The p array should be aligned to twice the size of its element type, + * so that every even/odd pair is guaranteed to share a cacheline + * (assuming a cacheline size of 32 bytes or more, which is by far the + * most common). This ensures that each be128_xor() call in the loop + * takes the same amount of time regardless of the value of 'ch', which + * is derived from function parameter 'b', which is commonly used as a + * key, e.g., for GHASH. The odd array elements are all set to zero, + * making each be128_xor() a NOP if its associated bit in 'ch' is not + * set, and this is equivalent to calling be128_xor() conditionally. + * This approach aims to avoid leaking information about such keys + * through execution time variances. + * + * Unfortunately, __aligned(16) or higher does not work on x86 for + * variables on the stack so we need to perform the alignment by hand. + */ + be128 array[16 + 3] = {}; + be128 *p = PTR_ALIGN(&array[0], 2 * sizeof(be128)); + int i; + + p[0] = *r; + for (i = 0; i < 7; ++i) + gf128mul_x_lle(&p[2 * i + 2], &p[2 * i]); + + memset(r, 0, sizeof(*r)); + for (i = 0;;) { + u8 ch = ((u8 *)b)[15 - i]; + + be128_xor(r, r, &p[ 0 + !(ch & 0x80)]); + be128_xor(r, r, &p[ 2 + !(ch & 0x40)]); + be128_xor(r, r, &p[ 4 + !(ch & 0x20)]); + be128_xor(r, r, &p[ 6 + !(ch & 0x10)]); + be128_xor(r, r, &p[ 8 + !(ch & 0x08)]); + be128_xor(r, r, &p[10 + !(ch & 0x04)]); + be128_xor(r, r, &p[12 + !(ch & 0x02)]); + be128_xor(r, r, &p[14 + !(ch & 0x01)]); + + if (++i >= 16) + break; + + gf128mul_x8_lle_ti(r); /* use the time invariant version */ + } +} +EXPORT_SYMBOL(gf128mul_lle); + +void gf128mul_bbe(be128 *r, const be128 *b) +{ + be128 p[8]; + int i; + + p[0] = *r; + for (i = 0; i < 7; ++i) + gf128mul_x_bbe(&p[i + 1], &p[i]); + + memset(r, 0, sizeof(*r)); + for (i = 0;;) { + u8 ch = ((u8 *)b)[i]; + + if (ch & 0x80) + be128_xor(r, r, &p[7]); + if (ch & 0x40) + be128_xor(r, r, &p[6]); + if (ch & 0x20) + be128_xor(r, r, &p[5]); + if (ch & 0x10) + be128_xor(r, r, &p[4]); + if (ch & 0x08) + be128_xor(r, r, &p[3]); + if (ch & 0x04) + be128_xor(r, r, &p[2]); + if (ch & 0x02) + be128_xor(r, r, &p[1]); + if (ch & 0x01) + be128_xor(r, r, &p[0]); + + if (++i >= 16) + break; + + gf128mul_x8_bbe(r); + } +} +EXPORT_SYMBOL(gf128mul_bbe); + +/* This version uses 64k bytes of table space. + A 16 byte buffer has to be multiplied by a 16 byte key + value in GF(2^128). If we consider a GF(2^128) value in + the buffer's lowest byte, we can construct a table of + the 256 16 byte values that result from the 256 values + of this byte. This requires 4096 bytes. But we also + need tables for each of the 16 higher bytes in the + buffer as well, which makes 64 kbytes in total. +*/ +/* additional explanation + * t[0][BYTE] contains g*BYTE + * t[1][BYTE] contains g*x^8*BYTE + * .. + * t[15][BYTE] contains g*x^120*BYTE */ +struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g) +{ + struct gf128mul_64k *t; + int i, j, k; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + goto out; + + for (i = 0; i < 16; i++) { + t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL); + if (!t->t[i]) { + gf128mul_free_64k(t); + t = NULL; + goto out; + } + } + + t->t[0]->t[1] = *g; + for (j = 1; j <= 64; j <<= 1) + gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]); + + for (i = 0;;) { + for (j = 2; j < 256; j += j) + for (k = 1; k < j; ++k) + be128_xor(&t->t[i]->t[j + k], + &t->t[i]->t[j], &t->t[i]->t[k]); + + if (++i >= 16) + break; + + for (j = 128; j > 0; j >>= 1) { + t->t[i]->t[j] = t->t[i - 1]->t[j]; + gf128mul_x8_bbe(&t->t[i]->t[j]); + } + } + +out: + return t; +} +EXPORT_SYMBOL(gf128mul_init_64k_bbe); + +void gf128mul_free_64k(struct gf128mul_64k *t) +{ + int i; + + for (i = 0; i < 16; i++) + kfree_sensitive(t->t[i]); + kfree_sensitive(t); +} +EXPORT_SYMBOL(gf128mul_free_64k); + +void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t) +{ + u8 *ap = (u8 *)a; + be128 r[1]; + int i; + + *r = t->t[0]->t[ap[15]]; + for (i = 1; i < 16; ++i) + be128_xor(r, r, &t->t[i]->t[ap[15 - i]]); + *a = *r; +} +EXPORT_SYMBOL(gf128mul_64k_bbe); + +/* This version uses 4k bytes of table space. + A 16 byte buffer has to be multiplied by a 16 byte key + value in GF(2^128). If we consider a GF(2^128) value in a + single byte, we can construct a table of the 256 16 byte + values that result from the 256 values of this byte. + This requires 4096 bytes. If we take the highest byte in + the buffer and use this table to get the result, we then + have to multiply by x^120 to get the final value. For the + next highest byte the result has to be multiplied by x^112 + and so on. But we can do this by accumulating the result + in an accumulator starting with the result for the top + byte. We repeatedly multiply the accumulator value by + x^8 and then add in (i.e. xor) the 16 bytes of the next + lower byte in the buffer, stopping when we reach the + lowest byte. This requires a 4096 byte table. +*/ +struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g) +{ + struct gf128mul_4k *t; + int j, k; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + goto out; + + t->t[128] = *g; + for (j = 64; j > 0; j >>= 1) + gf128mul_x_lle(&t->t[j], &t->t[j+j]); + + for (j = 2; j < 256; j += j) + for (k = 1; k < j; ++k) + be128_xor(&t->t[j + k], &t->t[j], &t->t[k]); + +out: + return t; +} +EXPORT_SYMBOL(gf128mul_init_4k_lle); + +struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g) +{ + struct gf128mul_4k *t; + int j, k; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + goto out; + + t->t[1] = *g; + for (j = 1; j <= 64; j <<= 1) + gf128mul_x_bbe(&t->t[j + j], &t->t[j]); + + for (j = 2; j < 256; j += j) + for (k = 1; k < j; ++k) + be128_xor(&t->t[j + k], &t->t[j], &t->t[k]); + +out: + return t; +} +EXPORT_SYMBOL(gf128mul_init_4k_bbe); + +void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t) +{ + u8 *ap = (u8 *)a; + be128 r[1]; + int i = 15; + + *r = t->t[ap[15]]; + while (i--) { + gf128mul_x8_lle(r); + be128_xor(r, r, &t->t[ap[i]]); + } + *a = *r; +} +EXPORT_SYMBOL(gf128mul_4k_lle); + +void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t) +{ + u8 *ap = (u8 *)a; + be128 r[1]; + int i = 0; + + *r = t->t[ap[0]]; + while (++i < 16) { + gf128mul_x8_bbe(r); + be128_xor(r, r, &t->t[ap[i]]); + } + *a = *r; +} +EXPORT_SYMBOL(gf128mul_4k_bbe); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)"); diff --git a/lib/memneq.c b/lib/crypto/memneq.c index fb11608b1ec1..243d8677cc51 100644 --- a/lib/memneq.c +++ b/lib/crypto/memneq.c @@ -59,10 +59,9 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <crypto/algapi.h> #include <asm/unaligned.h> - -#ifndef __HAVE_ARCH_CRYPTO_MEMNEQ +#include <crypto/algapi.h> +#include <linux/module.h> /* Generic path for arbitrary size */ static inline unsigned long @@ -172,5 +171,3 @@ noinline unsigned long __crypto_memneq(const void *a, const void *b, } } EXPORT_SYMBOL(__crypto_memneq); - -#endif /* __HAVE_ARCH_CRYPTO_MEMNEQ */ diff --git a/lib/crypto/utils.c b/lib/crypto/utils.c new file mode 100644 index 000000000000..53230ab1b195 --- /dev/null +++ b/lib/crypto/utils.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Crypto library utility functions + * + * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> + */ + +#include <asm/unaligned.h> +#include <crypto/algapi.h> +#include <linux/module.h> + +/* + * XOR @len bytes from @src1 and @src2 together, writing the result to @dst + * (which may alias one of the sources). Don't call this directly; call + * crypto_xor() or crypto_xor_cpy() instead. + */ +void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len) +{ + int relalign = 0; + + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { + int size = sizeof(unsigned long); + int d = (((unsigned long)dst ^ (unsigned long)src1) | + ((unsigned long)dst ^ (unsigned long)src2)) & + (size - 1); + + relalign = d ? 1 << __ffs(d) : size; + + /* + * If we care about alignment, process as many bytes as + * needed to advance dst and src to values whose alignments + * equal their relative alignment. This will allow us to + * process the remainder of the input using optimal strides. + */ + while (((unsigned long)dst & (relalign - 1)) && len > 0) { + *dst++ = *src1++ ^ *src2++; + len--; + } + } + + while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) { + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { + u64 l = get_unaligned((u64 *)src1) ^ + get_unaligned((u64 *)src2); + put_unaligned(l, (u64 *)dst); + } else { + *(u64 *)dst = *(u64 *)src1 ^ *(u64 *)src2; + } + dst += 8; + src1 += 8; + src2 += 8; + len -= 8; + } + + while (len >= 4 && !(relalign & 3)) { + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { + u32 l = get_unaligned((u32 *)src1) ^ + get_unaligned((u32 *)src2); + put_unaligned(l, (u32 *)dst); + } else { + *(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2; + } + dst += 4; + src1 += 4; + src2 += 4; + len -= 4; + } + + while (len >= 2 && !(relalign & 1)) { + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { + u16 l = get_unaligned((u16 *)src1) ^ + get_unaligned((u16 *)src2); + put_unaligned(l, (u16 *)dst); + } else { + *(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2; + } + dst += 2; + src1 += 2; + src2 += 2; + len -= 2; + } + + while (len--) + *dst++ = *src1++ ^ *src2++; +} +EXPORT_SYMBOL_GPL(__crypto_xor); + +MODULE_LICENSE("GPL"); diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 337d797a7141..df86e649d8be 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -437,6 +437,7 @@ static int object_cpu_offline(unsigned int cpu) struct debug_percpu_free *percpu_pool; struct hlist_node *tmp; struct debug_obj *obj; + unsigned long flags; /* Remote access is safe as the CPU is dead already */ percpu_pool = per_cpu_ptr(&percpu_obj_pool, cpu); @@ -444,6 +445,12 @@ static int object_cpu_offline(unsigned int cpu) hlist_del(&obj->node); kmem_cache_free(obj_cache, obj); } + + raw_spin_lock_irqsave(&pool_lock, flags); + obj_pool_used -= percpu_pool->obj_free; + debug_objects_freed += percpu_pool->obj_free; + raw_spin_unlock_irqrestore(&pool_lock, flags); + percpu_pool->obj_free = 0; return 0; @@ -500,9 +507,9 @@ static void debug_print_object(struct debug_obj *obj, char *msg) descr->debug_hint(obj->object) : NULL; limit++; WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) " - "object type: %s hint: %pS\n", + "object: %p object type: %s hint: %pS\n", msg, obj_states[obj->state], obj->astate, - descr->name, hint); + obj->object, descr->name, hint); } debug_objects_warnings++; } @@ -1318,6 +1325,8 @@ static int __init debug_objects_replace_static_objects(void) hlist_add_head(&obj->node, &objects); } + debug_objects_allocated += i; + /* * debug_objects_mem_init() is now called early that only one CPU is up * and interrupts have been disabled, so it is safe to replace the @@ -1386,6 +1395,7 @@ void __init debug_objects_mem_init(void) debug_objects_enabled = 0; kmem_cache_destroy(obj_cache); pr_warn("out of memory.\n"); + return; } else debug_objects_selftest(); diff --git a/lib/devres.c b/lib/devres.c index 55eb07e80cbb..6baf43902ead 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -104,21 +104,6 @@ void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset, EXPORT_SYMBOL(devm_ioremap_wc); /** - * devm_ioremap_np - Managed ioremap_np() - * @dev: Generic device to remap IO address for - * @offset: Resource address to map - * @size: Size of map - * - * Managed ioremap_np(). Map is automatically unmapped on driver detach. - */ -void __iomem *devm_ioremap_np(struct device *dev, resource_size_t offset, - resource_size_t size) -{ - return __devm_ioremap(dev, offset, size, DEVM_IOREMAP_NP); -} -EXPORT_SYMBOL(devm_ioremap_np); - -/** * devm_iounmap - Managed iounmap() * @dev: Generic device to unmap for * @addr: Address to unmap diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index dd7f56af9aed..009f2ead09c1 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -41,9 +41,11 @@ extern struct _ddebug __start___dyndbg[]; extern struct _ddebug __stop___dyndbg[]; +extern struct ddebug_class_map __start___dyndbg_classes[]; +extern struct ddebug_class_map __stop___dyndbg_classes[]; struct ddebug_table { - struct list_head link; + struct list_head link, maps; const char *mod_name; unsigned int num_ddebugs; struct _ddebug *ddebugs; @@ -54,12 +56,13 @@ struct ddebug_query { const char *module; const char *function; const char *format; + const char *class_string; unsigned int first_lineno, last_lineno; }; struct ddebug_iter { struct ddebug_table *table; - unsigned int idx; + int idx; }; struct flag_settings { @@ -134,15 +137,33 @@ static void vpr_info_dq(const struct ddebug_query *query, const char *msg) fmtlen--; } - v3pr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" lineno=%u-%u\n", - msg, - query->function ?: "", - query->filename ?: "", - query->module ?: "", - fmtlen, query->format ?: "", - query->first_lineno, query->last_lineno); + v3pr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" lineno=%u-%u class=%s\n", + msg, + query->function ?: "", + query->filename ?: "", + query->module ?: "", + fmtlen, query->format ?: "", + query->first_lineno, query->last_lineno, query->class_string); } +static struct ddebug_class_map *ddebug_find_valid_class(struct ddebug_table const *dt, + const char *class_string, int *class_id) +{ + struct ddebug_class_map *map; + int idx; + + list_for_each_entry(map, &dt->maps, link) { + idx = match_string(map->class_names, map->length, class_string); + if (idx >= 0) { + *class_id = idx + map->base; + return map; + } + } + *class_id = -ENOENT; + return NULL; +} + +#define __outvar /* filled by callee */ /* * Search the tables for _ddebug's which match the given `query' and * apply the `flags' and `mask' to them. Returns number of matching @@ -156,7 +177,9 @@ static int ddebug_change(const struct ddebug_query *query, struct ddebug_table *dt; unsigned int newflags; unsigned int nfound = 0; - struct flagsbuf fbuf; + struct flagsbuf fbuf, nbuf; + struct ddebug_class_map *map = NULL; + int __outvar valid_class; /* search for matching ddebugs */ mutex_lock(&ddebug_lock); @@ -167,9 +190,22 @@ static int ddebug_change(const struct ddebug_query *query, !match_wildcard(query->module, dt->mod_name)) continue; + if (query->class_string) { + map = ddebug_find_valid_class(dt, query->class_string, &valid_class); + if (!map) + continue; + } else { + /* constrain query, do not touch class'd callsites */ + valid_class = _DPRINTK_CLASS_DFLT; + } + for (i = 0; i < dt->num_ddebugs; i++) { struct _ddebug *dp = &dt->ddebugs[i]; + /* match site against query-class */ + if (dp->class_id != valid_class) + continue; + /* match against the source filename */ if (query->filename && !match_wildcard(query->filename, dp->filename) && @@ -211,16 +247,18 @@ static int ddebug_change(const struct ddebug_query *query, continue; #ifdef CONFIG_JUMP_LABEL if (dp->flags & _DPRINTK_FLAGS_PRINT) { - if (!(modifiers->flags & _DPRINTK_FLAGS_PRINT)) + if (!(newflags & _DPRINTK_FLAGS_PRINT)) static_branch_disable(&dp->key.dd_key_true); - } else if (modifiers->flags & _DPRINTK_FLAGS_PRINT) + } else if (newflags & _DPRINTK_FLAGS_PRINT) { static_branch_enable(&dp->key.dd_key_true); + } #endif + v4pr_info("changed %s:%d [%s]%s %s => %s\n", + trim_prefix(dp->filename), dp->lineno, + dt->mod_name, dp->function, + ddebug_describe_flags(dp->flags, &fbuf), + ddebug_describe_flags(newflags, &nbuf)); dp->flags = newflags; - v4pr_info("changed %s:%d [%s]%s =%s\n", - trim_prefix(dp->filename), dp->lineno, - dt->mod_name, dp->function, - ddebug_describe_flags(dp->flags, &fbuf)); } } mutex_unlock(&ddebug_lock); @@ -383,10 +421,6 @@ static int ddebug_parse_query(char *words[], int nwords, return -EINVAL; } - if (modname) - /* support $modname.dyndbg=<multiple queries> */ - query->module = modname; - for (i = 0; i < nwords; i += 2) { char *keyword = words[i]; char *arg = words[i+1]; @@ -420,6 +454,8 @@ static int ddebug_parse_query(char *words[], int nwords, } else if (!strcmp(keyword, "line")) { if (parse_linerange(query, arg)) return -EINVAL; + } else if (!strcmp(keyword, "class")) { + rc = check_set(&query->class_string, arg, "class"); } else { pr_err("unknown keyword \"%s\"\n", keyword); return -EINVAL; @@ -427,6 +463,13 @@ static int ddebug_parse_query(char *words[], int nwords, if (rc) return rc; } + if (!query->module && modname) + /* + * support $modname.dyndbg=<multiple queries>, when + * not given in the query itself + */ + query->module = modname; + vpr_info_dq(query, "parsed"); return 0; } @@ -553,34 +596,217 @@ static int ddebug_exec_queries(char *query, const char *modname) return nfound; } +/* apply a new bitmap to the sys-knob's current bit-state */ +static int ddebug_apply_class_bitmap(const struct ddebug_class_param *dcp, + unsigned long *new_bits, unsigned long *old_bits) +{ +#define QUERY_SIZE 128 + char query[QUERY_SIZE]; + const struct ddebug_class_map *map = dcp->map; + int matches = 0; + int bi, ct; + + v2pr_info("apply: 0x%lx to: 0x%lx\n", *new_bits, *old_bits); + + for (bi = 0; bi < map->length; bi++) { + if (test_bit(bi, new_bits) == test_bit(bi, old_bits)) + continue; + + snprintf(query, QUERY_SIZE, "class %s %c%s", map->class_names[bi], + test_bit(bi, new_bits) ? '+' : '-', dcp->flags); + + ct = ddebug_exec_queries(query, NULL); + matches += ct; + + v2pr_info("bit_%d: %d matches on class: %s -> 0x%lx\n", bi, + ct, map->class_names[bi], *new_bits); + } + return matches; +} + +/* stub to later conditionally add "$module." prefix where not already done */ +#define KP_NAME(kp) kp->name + +#define CLASSMAP_BITMASK(width) ((1UL << (width)) - 1) + +/* accept comma-separated-list of [+-] classnames */ +static int param_set_dyndbg_classnames(const char *instr, const struct kernel_param *kp) +{ + const struct ddebug_class_param *dcp = kp->arg; + const struct ddebug_class_map *map = dcp->map; + unsigned long curr_bits, old_bits; + char *cl_str, *p, *tmp; + int cls_id, totct = 0; + bool wanted; + + cl_str = tmp = kstrdup(instr, GFP_KERNEL); + p = strchr(cl_str, '\n'); + if (p) + *p = '\0'; + + /* start with previously set state-bits, then modify */ + curr_bits = old_bits = *dcp->bits; + vpr_info("\"%s\" > %s:0x%lx\n", cl_str, KP_NAME(kp), curr_bits); + + for (; cl_str; cl_str = p) { + p = strchr(cl_str, ','); + if (p) + *p++ = '\0'; + + if (*cl_str == '-') { + wanted = false; + cl_str++; + } else { + wanted = true; + if (*cl_str == '+') + cl_str++; + } + cls_id = match_string(map->class_names, map->length, cl_str); + if (cls_id < 0) { + pr_err("%s unknown to %s\n", cl_str, KP_NAME(kp)); + continue; + } + + /* have one or more valid class_ids of one *_NAMES type */ + switch (map->map_type) { + case DD_CLASS_TYPE_DISJOINT_NAMES: + /* the +/- pertains to a single bit */ + if (test_bit(cls_id, &curr_bits) == wanted) { + v3pr_info("no change on %s\n", cl_str); + continue; + } + curr_bits ^= BIT(cls_id); + totct += ddebug_apply_class_bitmap(dcp, &curr_bits, dcp->bits); + *dcp->bits = curr_bits; + v2pr_info("%s: changed bit %d:%s\n", KP_NAME(kp), cls_id, + map->class_names[cls_id]); + break; + case DD_CLASS_TYPE_LEVEL_NAMES: + /* cls_id = N in 0..max. wanted +/- determines N or N-1 */ + old_bits = CLASSMAP_BITMASK(*dcp->lvl); + curr_bits = CLASSMAP_BITMASK(cls_id + (wanted ? 1 : 0 )); + + totct += ddebug_apply_class_bitmap(dcp, &curr_bits, &old_bits); + *dcp->lvl = (cls_id + (wanted ? 1 : 0)); + v2pr_info("%s: changed bit-%d: \"%s\" %lx->%lx\n", KP_NAME(kp), cls_id, + map->class_names[cls_id], old_bits, curr_bits); + break; + default: + pr_err("illegal map-type value %d\n", map->map_type); + } + } + kfree(tmp); + vpr_info("total matches: %d\n", totct); + return 0; +} + /** - * dynamic_debug_exec_queries - select and change dynamic-debug prints - * @query: query-string described in admin-guide/dynamic-debug-howto - * @modname: string containing module name, usually &module.mod_name + * param_set_dyndbg_classes - class FOO >control + * @instr: string echo>d to sysfs, input depends on map_type + * @kp: kp->arg has state: bits/lvl, map, map_type + * + * Enable/disable prdbgs by their class, as given in the arguments to + * DECLARE_DYNDBG_CLASSMAP. For LEVEL map-types, enforce relative + * levels by bitpos. * - * This uses the >/proc/dynamic_debug/control reader, allowing module - * authors to modify their dynamic-debug callsites. The modname is - * canonically struct module.mod_name, but can also be null or a - * module-wildcard, for example: "drm*". + * Returns: 0 or <0 if error. */ -int dynamic_debug_exec_queries(const char *query, const char *modname) +int param_set_dyndbg_classes(const char *instr, const struct kernel_param *kp) { - int rc; - char *qry; /* writable copy of query */ - - if (!query) { - pr_err("non-null query/command string expected\n"); + const struct ddebug_class_param *dcp = kp->arg; + const struct ddebug_class_map *map = dcp->map; + unsigned long inrep, new_bits, old_bits; + int rc, totct = 0; + + switch (map->map_type) { + + case DD_CLASS_TYPE_DISJOINT_NAMES: + case DD_CLASS_TYPE_LEVEL_NAMES: + /* handle [+-]classnames list separately, we are done here */ + return param_set_dyndbg_classnames(instr, kp); + + case DD_CLASS_TYPE_DISJOINT_BITS: + case DD_CLASS_TYPE_LEVEL_NUM: + /* numeric input, accept and fall-thru */ + rc = kstrtoul(instr, 0, &inrep); + if (rc) { + pr_err("expecting numeric input: %s > %s\n", instr, KP_NAME(kp)); + return -EINVAL; + } + break; + default: + pr_err("%s: bad map type: %d\n", KP_NAME(kp), map->map_type); return -EINVAL; } - qry = kstrndup(query, PAGE_SIZE, GFP_KERNEL); - if (!qry) - return -ENOMEM; - rc = ddebug_exec_queries(qry, modname); - kfree(qry); - return rc; + /* only _BITS,_NUM (numeric) map-types get here */ + switch (map->map_type) { + case DD_CLASS_TYPE_DISJOINT_BITS: + /* expect bits. mask and warn if too many */ + if (inrep & ~CLASSMAP_BITMASK(map->length)) { + pr_warn("%s: input: 0x%lx exceeds mask: 0x%lx, masking\n", + KP_NAME(kp), inrep, CLASSMAP_BITMASK(map->length)); + inrep &= CLASSMAP_BITMASK(map->length); + } + v2pr_info("bits:%lx > %s\n", inrep, KP_NAME(kp)); + totct += ddebug_apply_class_bitmap(dcp, &inrep, dcp->bits); + *dcp->bits = inrep; + break; + case DD_CLASS_TYPE_LEVEL_NUM: + /* input is bitpos, of highest verbosity to be enabled */ + if (inrep > map->length) { + pr_warn("%s: level:%ld exceeds max:%d, clamping\n", + KP_NAME(kp), inrep, map->length); + inrep = map->length; + } + old_bits = CLASSMAP_BITMASK(*dcp->lvl); + new_bits = CLASSMAP_BITMASK(inrep); + v2pr_info("lvl:%ld bits:0x%lx > %s\n", inrep, new_bits, KP_NAME(kp)); + totct += ddebug_apply_class_bitmap(dcp, &new_bits, &old_bits); + *dcp->lvl = inrep; + break; + default: + pr_warn("%s: bad map type: %d\n", KP_NAME(kp), map->map_type); + } + vpr_info("%s: total matches: %d\n", KP_NAME(kp), totct); + return 0; +} +EXPORT_SYMBOL(param_set_dyndbg_classes); + +/** + * param_get_dyndbg_classes - classes reader + * @buffer: string description of controlled bits -> classes + * @kp: kp->arg has state: bits, map + * + * Reads last written state, underlying prdbg state may have been + * altered by direct >control. Displays 0x for DISJOINT, 0-N for + * LEVEL Returns: #chars written or <0 on error + */ +int param_get_dyndbg_classes(char *buffer, const struct kernel_param *kp) +{ + const struct ddebug_class_param *dcp = kp->arg; + const struct ddebug_class_map *map = dcp->map; + + switch (map->map_type) { + + case DD_CLASS_TYPE_DISJOINT_NAMES: + case DD_CLASS_TYPE_DISJOINT_BITS: + return scnprintf(buffer, PAGE_SIZE, "0x%lx\n", *dcp->bits); + + case DD_CLASS_TYPE_LEVEL_NAMES: + case DD_CLASS_TYPE_LEVEL_NUM: + return scnprintf(buffer, PAGE_SIZE, "%d\n", *dcp->lvl); + default: + return -1; + } } -EXPORT_SYMBOL_GPL(dynamic_debug_exec_queries); +EXPORT_SYMBOL(param_get_dyndbg_classes); + +const struct kernel_param_ops param_ops_dyndbg_classes = { + .set = param_set_dyndbg_classes, + .get = param_get_dyndbg_classes, +}; +EXPORT_SYMBOL(param_ops_dyndbg_classes); #define PREFIX_SIZE 64 @@ -803,13 +1029,12 @@ static struct _ddebug *ddebug_iter_first(struct ddebug_iter *iter) { if (list_empty(&ddebug_tables)) { iter->table = NULL; - iter->idx = 0; return NULL; } iter->table = list_entry(ddebug_tables.next, struct ddebug_table, link); - iter->idx = 0; - return &iter->table->ddebugs[iter->idx]; + iter->idx = iter->table->num_ddebugs; + return &iter->table->ddebugs[--iter->idx]; } /* @@ -822,15 +1047,16 @@ static struct _ddebug *ddebug_iter_next(struct ddebug_iter *iter) { if (iter->table == NULL) return NULL; - if (++iter->idx == iter->table->num_ddebugs) { + if (--iter->idx < 0) { /* iterate to next table */ - iter->idx = 0; if (list_is_last(&iter->table->link, &ddebug_tables)) { iter->table = NULL; return NULL; } iter->table = list_entry(iter->table->link.next, struct ddebug_table, link); + iter->idx = iter->table->num_ddebugs; + --iter->idx; } return &iter->table->ddebugs[iter->idx]; } @@ -876,6 +1102,20 @@ static void *ddebug_proc_next(struct seq_file *m, void *p, loff_t *pos) return dp; } +#define class_in_range(class_id, map) \ + (class_id >= map->base && class_id < map->base + map->length) + +static const char *ddebug_class_name(struct ddebug_iter *iter, struct _ddebug *dp) +{ + struct ddebug_class_map *map; + + list_for_each_entry(map, &iter->table->maps, link) + if (class_in_range(dp->class_id, map)) + return map->class_names[dp->class_id - map->base]; + + return NULL; +} + /* * Seq_ops show method. Called several times within a read() * call from userspace, with ddebug_lock held. Formats the @@ -887,6 +1127,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p) struct ddebug_iter *iter = m->private; struct _ddebug *dp = p; struct flagsbuf flags; + char const *class; if (p == SEQ_START_TOKEN) { seq_puts(m, @@ -898,8 +1139,17 @@ static int ddebug_proc_show(struct seq_file *m, void *p) trim_prefix(dp->filename), dp->lineno, iter->table->mod_name, dp->function, ddebug_describe_flags(dp->flags, &flags)); - seq_escape(m, dp->format, "\t\r\n\""); - seq_puts(m, "\"\n"); + seq_escape_str(m, dp->format, ESCAPE_SPACE, "\t\r\n\""); + seq_puts(m, "\""); + + if (dp->class_id != _DPRINTK_CLASS_DFLT) { + class = ddebug_class_name(iter, dp); + if (class) + seq_printf(m, " class:%s", class); + else + seq_printf(m, " class unknown, _id:%d", dp->class_id); + } + seq_puts(m, "\n"); return 0; } @@ -943,18 +1193,50 @@ static const struct proc_ops proc_fops = { .proc_write = ddebug_proc_write }; +static void ddebug_attach_module_classes(struct ddebug_table *dt, + struct ddebug_class_map *classes, + int num_classes) +{ + struct ddebug_class_map *cm; + int i, j, ct = 0; + + for (cm = classes, i = 0; i < num_classes; i++, cm++) { + + if (!strcmp(cm->mod_name, dt->mod_name)) { + + v2pr_info("class[%d]: module:%s base:%d len:%d ty:%d\n", i, + cm->mod_name, cm->base, cm->length, cm->map_type); + + for (j = 0; j < cm->length; j++) + v3pr_info(" %d: %d %s\n", j + cm->base, j, + cm->class_names[j]); + + list_add(&cm->link, &dt->maps); + ct++; + } + } + if (ct) + vpr_info("module:%s attached %d classes\n", dt->mod_name, ct); +} + /* * Allocate a new ddebug_table for the given module * and add it to the global list. */ -int ddebug_add_module(struct _ddebug *tab, unsigned int n, - const char *name) +static int __ddebug_add_module(struct _ddebug_info *di, unsigned int base, + const char *modname) { struct ddebug_table *dt; + v3pr_info("add-module: %s.%d sites\n", modname, di->num_descs); + if (!di->num_descs) { + v3pr_info(" skip %s\n", modname); + return 0; + } + dt = kzalloc(sizeof(*dt), GFP_KERNEL); if (dt == NULL) { - pr_err("error adding module: %s\n", name); + pr_err("error adding module: %s\n", modname); return -ENOMEM; } /* @@ -963,18 +1245,29 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, * member of struct module, which lives at least as long as * this struct ddebug_table. */ - dt->mod_name = name; - dt->num_ddebugs = n; - dt->ddebugs = tab; + dt->mod_name = modname; + dt->ddebugs = di->descs; + dt->num_ddebugs = di->num_descs; + + INIT_LIST_HEAD(&dt->link); + INIT_LIST_HEAD(&dt->maps); + + if (di->classes && di->num_classes) + ddebug_attach_module_classes(dt, di->classes, di->num_classes); mutex_lock(&ddebug_lock); - list_add(&dt->link, &ddebug_tables); + list_add_tail(&dt->link, &ddebug_tables); mutex_unlock(&ddebug_lock); - vpr_info("%3u debug prints in module %s\n", n, dt->mod_name); + vpr_info("%3u debug prints in module %s\n", di->num_descs, modname); return 0; } +int ddebug_add_module(struct _ddebug_info *di, const char *modname) +{ + return __ddebug_add_module(di, 0, modname); +} + /* helper for ddebug_dyndbg_(boot|module)_param_cb */ static int ddebug_dyndbg_param_cb(char *param, char *val, const char *modname, int on_err) @@ -1083,11 +1376,17 @@ static int __init dynamic_debug_init_control(void) static int __init dynamic_debug_init(void) { - struct _ddebug *iter, *iter_start; - const char *modname = NULL; + struct _ddebug *iter, *iter_mod_start; + int ret, i, mod_sites, mod_ct; + const char *modname; char *cmdline; - int ret = 0; - int n = 0, entries = 0, modct = 0; + + struct _ddebug_info di = { + .descs = __start___dyndbg, + .classes = __start___dyndbg_classes, + .num_descs = __stop___dyndbg - __start___dyndbg, + .num_classes = __stop___dyndbg_classes - __start___dyndbg_classes, + }; if (&__start___dyndbg == &__stop___dyndbg) { if (IS_ENABLED(CONFIG_DYNAMIC_DEBUG)) { @@ -1098,30 +1397,39 @@ static int __init dynamic_debug_init(void) ddebug_init_success = 1; return 0; } - iter = __start___dyndbg; + + iter = iter_mod_start = __start___dyndbg; modname = iter->modname; - iter_start = iter; - for (; iter < __stop___dyndbg; iter++) { - entries++; + i = mod_sites = mod_ct = 0; + + for (; iter < __stop___dyndbg; iter++, i++, mod_sites++) { + if (strcmp(modname, iter->modname)) { - modct++; - ret = ddebug_add_module(iter_start, n, modname); + mod_ct++; + di.num_descs = mod_sites; + di.descs = iter_mod_start; + ret = __ddebug_add_module(&di, i - mod_sites, modname); if (ret) goto out_err; - n = 0; + + mod_sites = 0; modname = iter->modname; - iter_start = iter; + iter_mod_start = iter; } - n++; } - ret = ddebug_add_module(iter_start, n, modname); + di.num_descs = mod_sites; + di.descs = iter_mod_start; + ret = __ddebug_add_module(&di, i - mod_sites, modname); if (ret) goto out_err; ddebug_init_success = 1; vpr_info("%d prdebugs in %d modules, %d KiB in ddebug tables, %d kiB in __dyndbg section\n", - entries, modct, (int)((modct * sizeof(struct ddebug_table)) >> 10), - (int)((entries * sizeof(struct _ddebug)) >> 10)); + i, mod_ct, (int)((mod_ct * sizeof(struct ddebug_table)) >> 10), + (int)((i * sizeof(struct _ddebug)) >> 10)); + + if (di.num_classes) + v2pr_info(" %d builtin ddebug class-maps\n", di.num_classes); /* now that ddebug tables are loaded, process all boot args * again to find and activate queries given in dyndbg params. diff --git a/lib/earlycpio.c b/lib/earlycpio.c index 7921193f0424..d2c37d64fd0c 100644 --- a/lib/earlycpio.c +++ b/lib/earlycpio.c @@ -126,7 +126,7 @@ struct cpio_data find_cpio_data(const char *path, void *data, "File %s exceeding MAX_CPIO_FILE_NAME [%d]\n", p, MAX_CPIO_FILE_NAME); } - strlcpy(cd.name, p + mypathsize, MAX_CPIO_FILE_NAME); + strscpy(cd.name, p + mypathsize, MAX_CPIO_FILE_NAME); cd.data = (void *)dptr; cd.size = ch[C_FILESIZE]; diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 423784d9c058..1421818c9ef7 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr); static void fail_dump(struct fault_attr *attr) { - if (attr->no_warn) - return; - if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, " @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr) * http://www.nongnu.org/failmalloc/ */ -bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { if (in_task()) { unsigned int fail_nth = READ_ONCE(current->fail_nth); @@ -139,20 +136,26 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false; } - if (attr->probability <= prandom_u32() % 100) + if (attr->probability <= get_random_u32_below(100)) return false; if (!fail_stacktrace(attr)) return false; fail: - fail_dump(attr); + if (!(flags & FAULT_NOWARN)) + fail_dump(attr); if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times); return true; } + +bool should_fail(struct fault_attr *attr, ssize_t size) +{ + return should_fail_ex(attr, size, 0); +} EXPORT_SYMBOL_GPL(should_fail); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/lib/find_bit.c b/lib/find_bit.c index 1b8e4b2a9cba..18bc0a7ac8ee 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -19,57 +19,78 @@ #include <linux/minmax.h> #include <linux/swab.h> -#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \ - !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) || \ - !defined(find_next_and_bit) /* - * This is a common helper function for find_next_bit, find_next_zero_bit, and - * find_next_and_bit. The differences are: - * - The "invert" argument, which is XORed with each fetched word before - * searching it for one bits. - * - The optional "addr2", which is anded with "addr1" if present. + * Common helper for find_bit() function family + * @FETCH: The expression that fetches and pre-processes each word of bitmap(s) + * @MUNGE: The expression that post-processes a word containing found bit (may be empty) + * @size: The bitmap size in bits */ -unsigned long _find_next_bit(const unsigned long *addr1, - const unsigned long *addr2, unsigned long nbits, - unsigned long start, unsigned long invert, unsigned long le) -{ - unsigned long tmp, mask; - - if (unlikely(start >= nbits)) - return nbits; - - tmp = addr1[start / BITS_PER_LONG]; - if (addr2) - tmp &= addr2[start / BITS_PER_LONG]; - tmp ^= invert; - - /* Handle 1st word. */ - mask = BITMAP_FIRST_WORD_MASK(start); - if (le) - mask = swab(mask); - - tmp &= mask; - - start = round_down(start, BITS_PER_LONG); - - while (!tmp) { - start += BITS_PER_LONG; - if (start >= nbits) - return nbits; - - tmp = addr1[start / BITS_PER_LONG]; - if (addr2) - tmp &= addr2[start / BITS_PER_LONG]; - tmp ^= invert; - } +#define FIND_FIRST_BIT(FETCH, MUNGE, size) \ +({ \ + unsigned long idx, val, sz = (size); \ + \ + for (idx = 0; idx * BITS_PER_LONG < sz; idx++) { \ + val = (FETCH); \ + if (val) { \ + sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(val)), sz); \ + break; \ + } \ + } \ + \ + sz; \ +}) - if (le) - tmp = swab(tmp); - - return min(start + __ffs(tmp), nbits); -} -EXPORT_SYMBOL(_find_next_bit); -#endif +/* + * Common helper for find_next_bit() function family + * @FETCH: The expression that fetches and pre-processes each word of bitmap(s) + * @MUNGE: The expression that post-processes a word containing found bit (may be empty) + * @size: The bitmap size in bits + * @start: The bitnumber to start searching at + */ +#define FIND_NEXT_BIT(FETCH, MUNGE, size, start) \ +({ \ + unsigned long mask, idx, tmp, sz = (size), __start = (start); \ + \ + if (unlikely(__start >= sz)) \ + goto out; \ + \ + mask = MUNGE(BITMAP_FIRST_WORD_MASK(__start)); \ + idx = __start / BITS_PER_LONG; \ + \ + for (tmp = (FETCH) & mask; !tmp; tmp = (FETCH)) { \ + if ((idx + 1) * BITS_PER_LONG >= sz) \ + goto out; \ + idx++; \ + } \ + \ + sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(tmp)), sz); \ +out: \ + sz; \ +}) + +#define FIND_NTH_BIT(FETCH, size, num) \ +({ \ + unsigned long sz = (size), nr = (num), idx, w, tmp; \ + \ + for (idx = 0; (idx + 1) * BITS_PER_LONG <= sz; idx++) { \ + if (idx * BITS_PER_LONG + nr >= sz) \ + goto out; \ + \ + tmp = (FETCH); \ + w = hweight_long(tmp); \ + if (w > nr) \ + goto found; \ + \ + nr -= w; \ + } \ + \ + if (sz % BITS_PER_LONG) \ + tmp = (FETCH) & BITMAP_LAST_WORD_MASK(sz); \ +found: \ + sz = min(idx * BITS_PER_LONG + fns(tmp, nr), sz); \ +out: \ + sz; \ +}) #ifndef find_first_bit /* @@ -77,14 +98,7 @@ EXPORT_SYMBOL(_find_next_bit); */ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size) { - unsigned long idx; - - for (idx = 0; idx * BITS_PER_LONG < size; idx++) { - if (addr[idx]) - return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size); - } - - return size; + return FIND_FIRST_BIT(addr[idx], /* nop */, size); } EXPORT_SYMBOL(_find_first_bit); #endif @@ -97,15 +111,7 @@ unsigned long _find_first_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size) { - unsigned long idx, val; - - for (idx = 0; idx * BITS_PER_LONG < size; idx++) { - val = addr1[idx] & addr2[idx]; - if (val) - return min(idx * BITS_PER_LONG + __ffs(val), size); - } - - return size; + return FIND_FIRST_BIT(addr1[idx] & addr2[idx], /* nop */, size); } EXPORT_SYMBOL(_find_first_and_bit); #endif @@ -116,16 +122,64 @@ EXPORT_SYMBOL(_find_first_and_bit); */ unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size) { - unsigned long idx; + return FIND_FIRST_BIT(~addr[idx], /* nop */, size); +} +EXPORT_SYMBOL(_find_first_zero_bit); +#endif - for (idx = 0; idx * BITS_PER_LONG < size; idx++) { - if (addr[idx] != ~0UL) - return min(idx * BITS_PER_LONG + ffz(addr[idx]), size); - } +#ifndef find_next_bit +unsigned long _find_next_bit(const unsigned long *addr, unsigned long nbits, unsigned long start) +{ + return FIND_NEXT_BIT(addr[idx], /* nop */, nbits, start); +} +EXPORT_SYMBOL(_find_next_bit); +#endif - return size; +unsigned long __find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n) +{ + return FIND_NTH_BIT(addr[idx], size, n); } -EXPORT_SYMBOL(_find_first_zero_bit); +EXPORT_SYMBOL(__find_nth_bit); + +unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long size, unsigned long n) +{ + return FIND_NTH_BIT(addr1[idx] & addr2[idx], size, n); +} +EXPORT_SYMBOL(__find_nth_and_bit); + +unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long size, unsigned long n) +{ + return FIND_NTH_BIT(addr1[idx] & ~addr2[idx], size, n); +} +EXPORT_SYMBOL(__find_nth_andnot_bit); + +#ifndef find_next_and_bit +unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start) +{ + return FIND_NEXT_BIT(addr1[idx] & addr2[idx], /* nop */, nbits, start); +} +EXPORT_SYMBOL(_find_next_and_bit); +#endif + +#ifndef find_next_andnot_bit +unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start) +{ + return FIND_NEXT_BIT(addr1[idx] & ~addr2[idx], /* nop */, nbits, start); +} +EXPORT_SYMBOL(_find_next_andnot_bit); +#endif + +#ifndef find_next_zero_bit +unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, + unsigned long start) +{ + return FIND_NEXT_BIT(~addr[idx], /* nop */, nbits, start); +} +EXPORT_SYMBOL(_find_next_zero_bit); #endif #ifndef find_last_bit @@ -161,3 +215,38 @@ unsigned long find_next_clump8(unsigned long *clump, const unsigned long *addr, return offset; } EXPORT_SYMBOL(find_next_clump8); + +#ifdef __BIG_ENDIAN + +#ifndef find_first_zero_bit_le +/* + * Find the first cleared bit in an LE memory region. + */ +unsigned long _find_first_zero_bit_le(const unsigned long *addr, unsigned long size) +{ + return FIND_FIRST_BIT(~addr[idx], swab, size); +} +EXPORT_SYMBOL(_find_first_zero_bit_le); + +#endif + +#ifndef find_next_zero_bit_le +unsigned long _find_next_zero_bit_le(const unsigned long *addr, + unsigned long size, unsigned long offset) +{ + return FIND_NEXT_BIT(~addr[idx], swab, size, offset); +} +EXPORT_SYMBOL(_find_next_zero_bit_le); +#endif + +#ifndef find_next_bit_le +unsigned long _find_next_bit_le(const unsigned long *addr, + unsigned long size, unsigned long offset) +{ + return FIND_NEXT_BIT(addr[idx], swab, size, offset); +} +EXPORT_SYMBOL(_find_next_bit_le); + +#endif + +#endif /* __BIG_ENDIAN */ diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index db904b57d4b8..d3fb09e6eff1 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c @@ -115,6 +115,22 @@ static int __init test_find_last_bit(const void *bitmap, unsigned long len) return 0; } +static int __init test_find_nth_bit(const unsigned long *bitmap, unsigned long len) +{ + unsigned long l, n, w = bitmap_weight(bitmap, len); + ktime_t time; + + time = ktime_get(); + for (n = 0; n < w; n++) { + l = find_nth_bit(bitmap, len, n); + WARN_ON(l >= len); + } + time = ktime_get() - time; + pr_err("find_nth_bit: %18llu ns, %6ld iterations\n", time, w); + + return 0; +} + static int __init test_find_next_and_bit(const void *bitmap, const void *bitmap2, unsigned long len) { @@ -142,6 +158,7 @@ static int __init find_bit_test(void) test_find_next_bit(bitmap, BITMAP_LEN); test_find_next_zero_bit(bitmap, BITMAP_LEN); test_find_last_bit(bitmap, BITMAP_LEN); + test_find_nth_bit(bitmap, BITMAP_LEN / 10); /* * test_find_first_bit() may take some time, so @@ -157,13 +174,14 @@ static int __init find_bit_test(void) bitmap_zero(bitmap2, BITMAP_LEN); while (nbits--) { - __set_bit(prandom_u32() % BITMAP_LEN, bitmap); - __set_bit(prandom_u32() % BITMAP_LEN, bitmap2); + __set_bit(get_random_u32_below(BITMAP_LEN), bitmap); + __set_bit(get_random_u32_below(BITMAP_LEN), bitmap2); } test_find_next_bit(bitmap, BITMAP_LEN); test_find_next_zero_bit(bitmap, BITMAP_LEN); test_find_last_bit(bitmap, BITMAP_LEN); + test_find_nth_bit(bitmap, BITMAP_LEN); test_find_first_bit(bitmap, BITMAP_LEN); test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN); test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index 05cccbcf1661..83332fefa6f4 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -70,6 +70,7 @@ bool fprop_new_period(struct fprop_global *p, int periods) */ if (events <= 1) return false; + preempt_disable_nested(); write_seqcount_begin(&p->sequence); if (periods < 64) events -= events >> periods; @@ -77,6 +78,7 @@ bool fprop_new_period(struct fprop_global *p, int periods) percpu_counter_add(&p->events, -events); p->period += periods; write_seqcount_end(&p->sequence); + preempt_enable_nested(); return true; } diff --git a/lib/fonts/fonts.c b/lib/fonts/fonts.c index 5f4b07b56cd9..973866438608 100644 --- a/lib/fonts/fonts.c +++ b/lib/fonts/fonts.c @@ -135,8 +135,8 @@ const struct font_desc *get_default_font(int xres, int yres, u32 font_w, if (res > 20) c += 20 - res; - if ((font_w & (1 << (f->width - 1))) && - (font_h & (1 << (f->height - 1)))) + if ((font_w & (1U << (f->width - 1))) && + (font_h & (1U << (f->height - 1)))) c += 1000; if (c > cc) { diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c new file mode 100644 index 000000000000..c8c33cbaae9e --- /dev/null +++ b/lib/fortify_kunit.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Runtime test cases for CONFIG_FORTIFY_SOURCE that aren't expected to + * Oops the kernel on success. (For those, see drivers/misc/lkdtm/fortify.c) + * + * For corner cases with UBSAN, try testing with: + * + * ./tools/testing/kunit/kunit.py run --arch=x86_64 \ + * --kconfig_add CONFIG_FORTIFY_SOURCE=y \ + * --kconfig_add CONFIG_UBSAN=y \ + * --kconfig_add CONFIG_UBSAN_TRAP=y \ + * --kconfig_add CONFIG_UBSAN_BOUNDS=y \ + * --kconfig_add CONFIG_UBSAN_LOCAL_BOUNDS=y \ + * --make_options LLVM=1 fortify + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <kunit/test.h> +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/vmalloc.h> + +static const char array_of_10[] = "this is 10"; +static const char *ptr_of_11 = "this is 11!"; +static char array_unknown[] = "compiler thinks I might change"; + +static void known_sizes_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, __compiletime_strlen("88888888"), 8); + KUNIT_EXPECT_EQ(test, __compiletime_strlen(array_of_10), 10); + KUNIT_EXPECT_EQ(test, __compiletime_strlen(ptr_of_11), 11); + + KUNIT_EXPECT_EQ(test, __compiletime_strlen(array_unknown), SIZE_MAX); + /* Externally defined and dynamically sized string pointer: */ + KUNIT_EXPECT_EQ(test, __compiletime_strlen(test->name), SIZE_MAX); +} + +/* This is volatile so the optimizer can't perform DCE below. */ +static volatile int pick; + +/* Not inline to keep optimizer from figuring out which string we want. */ +static noinline size_t want_minus_one(int pick) +{ + const char *str; + + switch (pick) { + case 1: + str = "4444"; + break; + case 2: + str = "333"; + break; + default: + str = "1"; + break; + } + return __compiletime_strlen(str); +} + +static void control_flow_split_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, want_minus_one(pick), SIZE_MAX); +} + +#define KUNIT_EXPECT_BOS(test, p, expected, name) \ + KUNIT_EXPECT_EQ_MSG(test, __builtin_object_size(p, 1), \ + expected, \ + "__alloc_size() not working with __bos on " name "\n") + +#if !__has_builtin(__builtin_dynamic_object_size) +#define KUNIT_EXPECT_BDOS(test, p, expected, name) \ + /* Silence "unused variable 'expected'" warning. */ \ + KUNIT_EXPECT_EQ(test, expected, expected) +#else +#define KUNIT_EXPECT_BDOS(test, p, expected, name) \ + KUNIT_EXPECT_EQ_MSG(test, __builtin_dynamic_object_size(p, 1), \ + expected, \ + "__alloc_size() not working with __bdos on " name "\n") +#endif + +/* If the execpted size is a constant value, __bos can see it. */ +#define check_const(_expected, alloc, free) do { \ + size_t expected = (_expected); \ + void *p = alloc; \ + KUNIT_EXPECT_TRUE_MSG(test, p != NULL, #alloc " failed?!\n"); \ + KUNIT_EXPECT_BOS(test, p, expected, #alloc); \ + KUNIT_EXPECT_BDOS(test, p, expected, #alloc); \ + free; \ +} while (0) + +/* If the execpted size is NOT a constant value, __bos CANNOT see it. */ +#define check_dynamic(_expected, alloc, free) do { \ + size_t expected = (_expected); \ + void *p = alloc; \ + KUNIT_EXPECT_TRUE_MSG(test, p != NULL, #alloc " failed?!\n"); \ + KUNIT_EXPECT_BOS(test, p, SIZE_MAX, #alloc); \ + KUNIT_EXPECT_BDOS(test, p, expected, #alloc); \ + free; \ +} while (0) + +/* Assortment of constant-value kinda-edge cases. */ +#define CONST_TEST_BODY(TEST_alloc) do { \ + /* Special-case vmalloc()-family to skip 0-sized allocs. */ \ + if (strcmp(#TEST_alloc, "TEST_vmalloc") != 0) \ + TEST_alloc(check_const, 0, 0); \ + TEST_alloc(check_const, 1, 1); \ + TEST_alloc(check_const, 128, 128); \ + TEST_alloc(check_const, 1023, 1023); \ + TEST_alloc(check_const, 1025, 1025); \ + TEST_alloc(check_const, 4096, 4096); \ + TEST_alloc(check_const, 4097, 4097); \ +} while (0) + +static volatile size_t zero_size; +static volatile size_t unknown_size = 50; + +#if !__has_builtin(__builtin_dynamic_object_size) +#define DYNAMIC_TEST_BODY(TEST_alloc) \ + kunit_skip(test, "Compiler is missing __builtin_dynamic_object_size() support\n") +#else +#define DYNAMIC_TEST_BODY(TEST_alloc) do { \ + size_t size = unknown_size; \ + \ + /* \ + * Expected size is "size" in each test, before it is then \ + * internally incremented in each test. Requires we disable \ + * -Wunsequenced. \ + */ \ + TEST_alloc(check_dynamic, size, size++); \ + /* Make sure incrementing actually happened. */ \ + KUNIT_EXPECT_NE(test, size, unknown_size); \ +} while (0) +#endif + +#define DEFINE_ALLOC_SIZE_TEST_PAIR(allocator) \ +static void alloc_size_##allocator##_const_test(struct kunit *test) \ +{ \ + CONST_TEST_BODY(TEST_##allocator); \ +} \ +static void alloc_size_##allocator##_dynamic_test(struct kunit *test) \ +{ \ + DYNAMIC_TEST_BODY(TEST_##allocator); \ +} + +#define TEST_kmalloc(checker, expected_size, alloc_size) do { \ + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; \ + void *orig; \ + size_t len; \ + \ + checker(expected_size, kmalloc(alloc_size, gfp), \ + kfree(p)); \ + checker(expected_size, \ + kmalloc_node(alloc_size, gfp, NUMA_NO_NODE), \ + kfree(p)); \ + checker(expected_size, kzalloc(alloc_size, gfp), \ + kfree(p)); \ + checker(expected_size, \ + kzalloc_node(alloc_size, gfp, NUMA_NO_NODE), \ + kfree(p)); \ + checker(expected_size, kcalloc(1, alloc_size, gfp), \ + kfree(p)); \ + checker(expected_size, kcalloc(alloc_size, 1, gfp), \ + kfree(p)); \ + checker(expected_size, \ + kcalloc_node(1, alloc_size, gfp, NUMA_NO_NODE), \ + kfree(p)); \ + checker(expected_size, \ + kcalloc_node(alloc_size, 1, gfp, NUMA_NO_NODE), \ + kfree(p)); \ + checker(expected_size, kmalloc_array(1, alloc_size, gfp), \ + kfree(p)); \ + checker(expected_size, kmalloc_array(alloc_size, 1, gfp), \ + kfree(p)); \ + checker(expected_size, \ + kmalloc_array_node(1, alloc_size, gfp, NUMA_NO_NODE), \ + kfree(p)); \ + checker(expected_size, \ + kmalloc_array_node(alloc_size, 1, gfp, NUMA_NO_NODE), \ + kfree(p)); \ + checker(expected_size, __kmalloc(alloc_size, gfp), \ + kfree(p)); \ + checker(expected_size, \ + __kmalloc_node(alloc_size, gfp, NUMA_NO_NODE), \ + kfree(p)); \ + \ + orig = kmalloc(alloc_size, gfp); \ + KUNIT_EXPECT_TRUE(test, orig != NULL); \ + checker((expected_size) * 2, \ + krealloc(orig, (alloc_size) * 2, gfp), \ + kfree(p)); \ + orig = kmalloc(alloc_size, gfp); \ + KUNIT_EXPECT_TRUE(test, orig != NULL); \ + checker((expected_size) * 2, \ + krealloc_array(orig, 1, (alloc_size) * 2, gfp), \ + kfree(p)); \ + orig = kmalloc(alloc_size, gfp); \ + KUNIT_EXPECT_TRUE(test, orig != NULL); \ + checker((expected_size) * 2, \ + krealloc_array(orig, (alloc_size) * 2, 1, gfp), \ + kfree(p)); \ + \ + len = 11; \ + /* Using memdup() with fixed size, so force unknown length. */ \ + if (!__builtin_constant_p(expected_size)) \ + len += zero_size; \ + checker(len, kmemdup("hello there", len, gfp), kfree(p)); \ +} while (0) +DEFINE_ALLOC_SIZE_TEST_PAIR(kmalloc) + +/* Sizes are in pages, not bytes. */ +#define TEST_vmalloc(checker, expected_pages, alloc_pages) do { \ + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; \ + checker((expected_pages) * PAGE_SIZE, \ + vmalloc((alloc_pages) * PAGE_SIZE), vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + vzalloc((alloc_pages) * PAGE_SIZE), vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + __vmalloc((alloc_pages) * PAGE_SIZE, gfp), vfree(p)); \ +} while (0) +DEFINE_ALLOC_SIZE_TEST_PAIR(vmalloc) + +/* Sizes are in pages (and open-coded for side-effects), not bytes. */ +#define TEST_kvmalloc(checker, expected_pages, alloc_pages) do { \ + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; \ + size_t prev_size; \ + void *orig; \ + \ + checker((expected_pages) * PAGE_SIZE, \ + kvmalloc((alloc_pages) * PAGE_SIZE, gfp), \ + vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + kvmalloc_node((alloc_pages) * PAGE_SIZE, gfp, NUMA_NO_NODE), \ + vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + kvzalloc((alloc_pages) * PAGE_SIZE, gfp), \ + vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + kvzalloc_node((alloc_pages) * PAGE_SIZE, gfp, NUMA_NO_NODE), \ + vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + kvcalloc(1, (alloc_pages) * PAGE_SIZE, gfp), \ + vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + kvcalloc((alloc_pages) * PAGE_SIZE, 1, gfp), \ + vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + kvmalloc_array(1, (alloc_pages) * PAGE_SIZE, gfp), \ + vfree(p)); \ + checker((expected_pages) * PAGE_SIZE, \ + kvmalloc_array((alloc_pages) * PAGE_SIZE, 1, gfp), \ + vfree(p)); \ + \ + prev_size = (expected_pages) * PAGE_SIZE; \ + orig = kvmalloc(prev_size, gfp); \ + KUNIT_EXPECT_TRUE(test, orig != NULL); \ + checker(((expected_pages) * PAGE_SIZE) * 2, \ + kvrealloc(orig, prev_size, \ + ((alloc_pages) * PAGE_SIZE) * 2, gfp), \ + kvfree(p)); \ +} while (0) +DEFINE_ALLOC_SIZE_TEST_PAIR(kvmalloc) + +#define TEST_devm_kmalloc(checker, expected_size, alloc_size) do { \ + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; \ + const char dev_name[] = "fortify-test"; \ + struct device *dev; \ + void *orig; \ + size_t len; \ + \ + /* Create dummy device for devm_kmalloc()-family tests. */ \ + dev = root_device_register(dev_name); \ + KUNIT_ASSERT_FALSE_MSG(test, IS_ERR(dev), \ + "Cannot register test device\n"); \ + \ + checker(expected_size, devm_kmalloc(dev, alloc_size, gfp), \ + devm_kfree(dev, p)); \ + checker(expected_size, devm_kzalloc(dev, alloc_size, gfp), \ + devm_kfree(dev, p)); \ + checker(expected_size, \ + devm_kmalloc_array(dev, 1, alloc_size, gfp), \ + devm_kfree(dev, p)); \ + checker(expected_size, \ + devm_kmalloc_array(dev, alloc_size, 1, gfp), \ + devm_kfree(dev, p)); \ + checker(expected_size, \ + devm_kcalloc(dev, 1, alloc_size, gfp), \ + devm_kfree(dev, p)); \ + checker(expected_size, \ + devm_kcalloc(dev, alloc_size, 1, gfp), \ + devm_kfree(dev, p)); \ + \ + orig = devm_kmalloc(dev, alloc_size, gfp); \ + KUNIT_EXPECT_TRUE(test, orig != NULL); \ + checker((expected_size) * 2, \ + devm_krealloc(dev, orig, (alloc_size) * 2, gfp), \ + devm_kfree(dev, p)); \ + \ + len = 4; \ + /* Using memdup() with fixed size, so force unknown length. */ \ + if (!__builtin_constant_p(expected_size)) \ + len += zero_size; \ + checker(len, devm_kmemdup(dev, "Ohai", len, gfp), \ + devm_kfree(dev, p)); \ + \ + device_unregister(dev); \ +} while (0) +DEFINE_ALLOC_SIZE_TEST_PAIR(devm_kmalloc) + +static struct kunit_case fortify_test_cases[] = { + KUNIT_CASE(known_sizes_test), + KUNIT_CASE(control_flow_split_test), + KUNIT_CASE(alloc_size_kmalloc_const_test), + KUNIT_CASE(alloc_size_kmalloc_dynamic_test), + KUNIT_CASE(alloc_size_vmalloc_const_test), + KUNIT_CASE(alloc_size_vmalloc_dynamic_test), + KUNIT_CASE(alloc_size_kvmalloc_const_test), + KUNIT_CASE(alloc_size_kvmalloc_dynamic_test), + KUNIT_CASE(alloc_size_devm_kmalloc_const_test), + KUNIT_CASE(alloc_size_devm_kmalloc_dynamic_test), + {} +}; + +static struct kunit_suite fortify_test_suite = { + .name = "fortify", + .test_cases = fortify_test_cases, +}; + +kunit_test_suite(fortify_test_suite); + +MODULE_LICENSE("GPL"); diff --git a/lib/interval_tree.c b/lib/interval_tree.c index 593ce56ece50..3412737ff365 100644 --- a/lib/interval_tree.c +++ b/lib/interval_tree.c @@ -15,3 +15,135 @@ EXPORT_SYMBOL_GPL(interval_tree_insert); EXPORT_SYMBOL_GPL(interval_tree_remove); EXPORT_SYMBOL_GPL(interval_tree_iter_first); EXPORT_SYMBOL_GPL(interval_tree_iter_next); + +#ifdef CONFIG_INTERVAL_TREE_SPAN_ITER +/* + * Roll nodes[1] into nodes[0] by advancing nodes[1] to the end of a contiguous + * span of nodes. This makes nodes[0]->last the end of that contiguous used span + * indexes that started at the original nodes[1]->start. nodes[1] is now the + * first node starting the next used span. A hole span is between nodes[0]->last + * and nodes[1]->start. nodes[1] must be !NULL. + */ +static void +interval_tree_span_iter_next_gap(struct interval_tree_span_iter *state) +{ + struct interval_tree_node *cur = state->nodes[1]; + + state->nodes[0] = cur; + do { + if (cur->last > state->nodes[0]->last) + state->nodes[0] = cur; + cur = interval_tree_iter_next(cur, state->first_index, + state->last_index); + } while (cur && (state->nodes[0]->last >= cur->start || + state->nodes[0]->last + 1 == cur->start)); + state->nodes[1] = cur; +} + +void interval_tree_span_iter_first(struct interval_tree_span_iter *iter, + struct rb_root_cached *itree, + unsigned long first_index, + unsigned long last_index) +{ + iter->first_index = first_index; + iter->last_index = last_index; + iter->nodes[0] = NULL; + iter->nodes[1] = + interval_tree_iter_first(itree, first_index, last_index); + if (!iter->nodes[1]) { + /* No nodes intersect the span, whole span is hole */ + iter->start_hole = first_index; + iter->last_hole = last_index; + iter->is_hole = 1; + return; + } + if (iter->nodes[1]->start > first_index) { + /* Leading hole on first iteration */ + iter->start_hole = first_index; + iter->last_hole = iter->nodes[1]->start - 1; + iter->is_hole = 1; + interval_tree_span_iter_next_gap(iter); + return; + } + + /* Starting inside a used */ + iter->start_used = first_index; + iter->is_hole = 0; + interval_tree_span_iter_next_gap(iter); + iter->last_used = iter->nodes[0]->last; + if (iter->last_used >= last_index) { + iter->last_used = last_index; + iter->nodes[0] = NULL; + iter->nodes[1] = NULL; + } +} +EXPORT_SYMBOL_GPL(interval_tree_span_iter_first); + +void interval_tree_span_iter_next(struct interval_tree_span_iter *iter) +{ + if (!iter->nodes[0] && !iter->nodes[1]) { + iter->is_hole = -1; + return; + } + + if (iter->is_hole) { + iter->start_used = iter->last_hole + 1; + iter->last_used = iter->nodes[0]->last; + if (iter->last_used >= iter->last_index) { + iter->last_used = iter->last_index; + iter->nodes[0] = NULL; + iter->nodes[1] = NULL; + } + iter->is_hole = 0; + return; + } + + if (!iter->nodes[1]) { + /* Trailing hole */ + iter->start_hole = iter->nodes[0]->last + 1; + iter->last_hole = iter->last_index; + iter->nodes[0] = NULL; + iter->is_hole = 1; + return; + } + + /* must have both nodes[0] and [1], interior hole */ + iter->start_hole = iter->nodes[0]->last + 1; + iter->last_hole = iter->nodes[1]->start - 1; + iter->is_hole = 1; + interval_tree_span_iter_next_gap(iter); +} +EXPORT_SYMBOL_GPL(interval_tree_span_iter_next); + +/* + * Advance the iterator index to a specific position. The returned used/hole is + * updated to start at new_index. This is faster than calling + * interval_tree_span_iter_first() as it can avoid full searches in several + * cases where the iterator is already set. + */ +void interval_tree_span_iter_advance(struct interval_tree_span_iter *iter, + struct rb_root_cached *itree, + unsigned long new_index) +{ + if (iter->is_hole == -1) + return; + + iter->first_index = new_index; + if (new_index > iter->last_index) { + iter->is_hole = -1; + return; + } + + /* Rely on the union aliasing hole/used */ + if (iter->start_hole <= new_index && new_index <= iter->last_hole) { + iter->start_hole = new_index; + return; + } + if (new_index == iter->last_hole + 1) + interval_tree_span_iter_next(iter); + else + interval_tree_span_iter_first(iter, itree, new_index, + iter->last_index); +} +EXPORT_SYMBOL_GPL(interval_tree_span_iter_advance); +#endif diff --git a/lib/iomap.c b/lib/iomap.c index fbaa3e8f19d6..4f8b31baa575 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -6,6 +6,7 @@ */ #include <linux/pci.h> #include <linux/io.h> +#include <linux/kmsan-checks.h> #include <linux/export.h> @@ -70,26 +71,35 @@ static void bad_io_access(unsigned long port, const char *access) #define mmio_read64be(addr) swab64(readq(addr)) #endif +/* + * Here and below, we apply __no_kmsan_checks to functions reading data from + * hardware, to ensure that KMSAN marks their return values as initialized. + */ +__no_kmsan_checks unsigned int ioread8(const void __iomem *addr) { IO_COND(addr, return inb(port), return readb(addr)); return 0xff; } +__no_kmsan_checks unsigned int ioread16(const void __iomem *addr) { IO_COND(addr, return inw(port), return readw(addr)); return 0xffff; } +__no_kmsan_checks unsigned int ioread16be(const void __iomem *addr) { IO_COND(addr, return pio_read16be(port), return mmio_read16be(addr)); return 0xffff; } +__no_kmsan_checks unsigned int ioread32(const void __iomem *addr) { IO_COND(addr, return inl(port), return readl(addr)); return 0xffffffff; } +__no_kmsan_checks unsigned int ioread32be(const void __iomem *addr) { IO_COND(addr, return pio_read32be(port), return mmio_read32be(addr)); @@ -142,18 +152,21 @@ static u64 pio_read64be_hi_lo(unsigned long port) return lo | (hi << 32); } +__no_kmsan_checks u64 ioread64_lo_hi(const void __iomem *addr) { IO_COND(addr, return pio_read64_lo_hi(port), return readq(addr)); return 0xffffffffffffffffULL; } +__no_kmsan_checks u64 ioread64_hi_lo(const void __iomem *addr) { IO_COND(addr, return pio_read64_hi_lo(port), return readq(addr)); return 0xffffffffffffffffULL; } +__no_kmsan_checks u64 ioread64be_lo_hi(const void __iomem *addr) { IO_COND(addr, return pio_read64be_lo_hi(port), @@ -161,6 +174,7 @@ u64 ioread64be_lo_hi(const void __iomem *addr) return 0xffffffffffffffffULL; } +__no_kmsan_checks u64 ioread64be_hi_lo(const void __iomem *addr) { IO_COND(addr, return pio_read64be_hi_lo(port), @@ -188,22 +202,32 @@ EXPORT_SYMBOL(ioread64be_hi_lo); void iowrite8(u8 val, void __iomem *addr) { + /* Make sure uninitialized memory isn't copied to devices. */ + kmsan_check_memory(&val, sizeof(val)); IO_COND(addr, outb(val,port), writeb(val, addr)); } void iowrite16(u16 val, void __iomem *addr) { + /* Make sure uninitialized memory isn't copied to devices. */ + kmsan_check_memory(&val, sizeof(val)); IO_COND(addr, outw(val,port), writew(val, addr)); } void iowrite16be(u16 val, void __iomem *addr) { + /* Make sure uninitialized memory isn't copied to devices. */ + kmsan_check_memory(&val, sizeof(val)); IO_COND(addr, pio_write16be(val,port), mmio_write16be(val, addr)); } void iowrite32(u32 val, void __iomem *addr) { + /* Make sure uninitialized memory isn't copied to devices. */ + kmsan_check_memory(&val, sizeof(val)); IO_COND(addr, outl(val,port), writel(val, addr)); } void iowrite32be(u32 val, void __iomem *addr) { + /* Make sure uninitialized memory isn't copied to devices. */ + kmsan_check_memory(&val, sizeof(val)); IO_COND(addr, pio_write32be(val,port), mmio_write32be(val, addr)); } EXPORT_SYMBOL(iowrite8); @@ -239,24 +263,32 @@ static void pio_write64be_hi_lo(u64 val, unsigned long port) void iowrite64_lo_hi(u64 val, void __iomem *addr) { + /* Make sure uninitialized memory isn't copied to devices. */ + kmsan_check_memory(&val, sizeof(val)); IO_COND(addr, pio_write64_lo_hi(val, port), writeq(val, addr)); } void iowrite64_hi_lo(u64 val, void __iomem *addr) { + /* Make sure uninitialized memory isn't copied to devices. */ + kmsan_check_memory(&val, sizeof(val)); IO_COND(addr, pio_write64_hi_lo(val, port), writeq(val, addr)); } void iowrite64be_lo_hi(u64 val, void __iomem *addr) { + /* Make sure uninitialized memory isn't copied to devices. */ + kmsan_check_memory(&val, sizeof(val)); IO_COND(addr, pio_write64be_lo_hi(val, port), mmio_write64be(val, addr)); } void iowrite64be_hi_lo(u64 val, void __iomem *addr) { + /* Make sure uninitialized memory isn't copied to devices. */ + kmsan_check_memory(&val, sizeof(val)); IO_COND(addr, pio_write64be_hi_lo(val, port), mmio_write64be(val, addr)); } @@ -328,14 +360,20 @@ static inline void mmio_outsl(void __iomem *addr, const u32 *src, int count) void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) { IO_COND(addr, insb(port,dst,count), mmio_insb(addr, dst, count)); + /* KMSAN must treat values read from devices as initialized. */ + kmsan_unpoison_memory(dst, count); } void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) { IO_COND(addr, insw(port,dst,count), mmio_insw(addr, dst, count)); + /* KMSAN must treat values read from devices as initialized. */ + kmsan_unpoison_memory(dst, count * 2); } void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) { IO_COND(addr, insl(port,dst,count), mmio_insl(addr, dst, count)); + /* KMSAN must treat values read from devices as initialized. */ + kmsan_unpoison_memory(dst, count * 4); } EXPORT_SYMBOL(ioread8_rep); EXPORT_SYMBOL(ioread16_rep); @@ -343,14 +381,20 @@ EXPORT_SYMBOL(ioread32_rep); void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) { + /* Make sure uninitialized memory isn't copied to devices. */ + kmsan_check_memory(src, count); IO_COND(addr, outsb(port, src, count), mmio_outsb(addr, src, count)); } void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) { + /* Make sure uninitialized memory isn't copied to devices. */ + kmsan_check_memory(src, count * 2); IO_COND(addr, outsw(port, src, count), mmio_outsw(addr, src, count)); } void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) { + /* Make sure uninitialized memory isn't copied to devices. */ + kmsan_check_memory(src, count * 4); IO_COND(addr, outsl(port, src,count), mmio_outsl(addr, src, count)); } EXPORT_SYMBOL(iowrite8_rep); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 4b7fce72e3e5..f9a3ff37ecd1 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -174,13 +174,16 @@ static int copyout(void __user *to, const void *from, size_t n) static int copyin(void *to, const void __user *from, size_t n) { + size_t res = n; + if (should_fail_usercopy()) return n; if (access_ok(from, n)) { - instrument_copy_from_user(to, from, n); - n = raw_copy_from_user(to, from, n); + instrument_copy_from_user_before(to, from, n); + res = raw_copy_from_user(to, from, n); + instrument_copy_from_user_after(to, from, n, res); } - return n; + return res; } static inline struct pipe_buffer *pipe_buf(const struct pipe_inode_info *pipe, @@ -517,6 +520,8 @@ static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { + if (WARN_ON_ONCE(i->data_source)) + return 0; if (unlikely(iov_iter_is_pipe(i))) return copy_pipe_to_iter(addr, bytes, i); if (user_backed_iter(i)) @@ -603,6 +608,8 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, */ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { + if (WARN_ON_ONCE(i->data_source)) + return 0; if (unlikely(iov_iter_is_pipe(i))) return copy_mc_pipe_to_iter(addr, bytes, i); if (user_backed_iter(i)) @@ -619,10 +626,9 @@ EXPORT_SYMBOL_GPL(_copy_mc_to_iter); size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(iov_iter_is_pipe(i))) { - WARN_ON(1); + if (WARN_ON_ONCE(!i->data_source)) return 0; - } + if (user_backed_iter(i)) might_fault(); iterate_and_advance(i, bytes, base, len, off, @@ -636,10 +642,9 @@ EXPORT_SYMBOL(_copy_from_iter); size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(iov_iter_is_pipe(i))) { - WARN_ON(1); + if (WARN_ON_ONCE(!i->data_source)) return 0; - } + iterate_and_advance(i, bytes, base, len, off, __copy_from_user_inatomic_nocache(addr + off, base, len), memcpy(addr + off, base, len) @@ -668,10 +673,9 @@ EXPORT_SYMBOL(_copy_from_iter_nocache); */ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(iov_iter_is_pipe(i))) { - WARN_ON(1); + if (WARN_ON_ONCE(!i->data_source)) return 0; - } + iterate_and_advance(i, bytes, base, len, off, __copy_from_user_flushcache(addr + off, base, len), memcpy_flushcache(addr + off, base, len) @@ -700,17 +704,18 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) head = compound_head(page); v += (page - head) << PAGE_SHIFT; - if (likely(n <= v && v <= (page_size(head)))) - return true; - WARN_ON(1); - return false; + if (WARN_ON(n > v || v > page_size(head))) + return false; + return true; } size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { size_t res = 0; - if (unlikely(!page_copy_sane(page, offset, bytes))) + if (!page_copy_sane(page, offset, bytes)) + return 0; + if (WARN_ON_ONCE(i->data_source)) return 0; if (unlikely(iov_iter_is_pipe(i))) return copy_page_to_iter_pipe(page, offset, bytes, i); @@ -805,13 +810,12 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt struct iov_iter *i) { char *kaddr = kmap_atomic(page), *p = kaddr + offset; - if (unlikely(!page_copy_sane(page, offset, bytes))) { + if (!page_copy_sane(page, offset, bytes)) { kunmap_atomic(kaddr); return 0; } - if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { + if (WARN_ON_ONCE(!i->data_source)) { kunmap_atomic(kaddr); - WARN_ON(1); return 0; } iterate_and_advance(i, bytes, base, len, off, @@ -1427,7 +1431,8 @@ static struct page *first_bvec_segment(const struct iov_iter *i, static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, - unsigned int maxpages, size_t *start) + unsigned int maxpages, size_t *start, + unsigned int gup_flags) { unsigned int n; @@ -1439,7 +1444,6 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, maxsize = MAX_RW_COUNT; if (likely(user_backed_iter(i))) { - unsigned int gup_flags = 0; unsigned long addr; int res; @@ -1489,33 +1493,49 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, return -EFAULT; } -ssize_t iov_iter_get_pages2(struct iov_iter *i, +ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, - size_t *start) + size_t *start, unsigned gup_flags) { if (!maxpages) return 0; BUG_ON(!pages); - return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, start); + return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, + start, gup_flags); +} +EXPORT_SYMBOL_GPL(iov_iter_get_pages); + +ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, + size_t maxsize, unsigned maxpages, size_t *start) +{ + return iov_iter_get_pages(i, pages, maxsize, maxpages, start, 0); } EXPORT_SYMBOL(iov_iter_get_pages2); -ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, +ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, - size_t *start) + size_t *start, unsigned gup_flags) { ssize_t len; *pages = NULL; - len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start); + len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start, + gup_flags); if (len <= 0) { kvfree(*pages); *pages = NULL; } return len; } +EXPORT_SYMBOL_GPL(iov_iter_get_pages_alloc); + +ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, + struct page ***pages, size_t maxsize, size_t *start) +{ + return iov_iter_get_pages_alloc(i, pages, maxsize, start, 0); +} EXPORT_SYMBOL(iov_iter_get_pages_alloc2); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, @@ -1523,10 +1543,9 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, { __wsum sum, next; sum = *csum; - if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { - WARN_ON(1); + if (WARN_ON_ONCE(!i->data_source)) return 0; - } + iterate_and_advance(i, bytes, base, len, off, ({ next = csum_and_copy_from_user(base, addr + off, len); sum = csum_block_add(sum, next, off); @@ -1546,9 +1565,15 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, struct csum_state *csstate = _csstate; __wsum sum, next; - if (unlikely(iov_iter_is_discard(i))) { - WARN_ON(1); /* for now */ + if (WARN_ON_ONCE(i->data_source)) return 0; + if (unlikely(iov_iter_is_discard(i))) { + // can't use csum_memcpy() for that one - data is not copied + csstate->csum = csum_block_add(csstate->csum, + csum_partial(addr, bytes, 0), + csstate->off); + csstate->off += bytes; + return bytes; } sum = csum_shift(csstate->csum, csstate->off); diff --git a/lib/is_signed_type_kunit.c b/lib/is_signed_type_kunit.c new file mode 100644 index 000000000000..0a7f6ae62839 --- /dev/null +++ b/lib/is_signed_type_kunit.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * ./tools/testing/kunit/kunit.py run is_signed_type [--raw_output] + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <kunit/test.h> +#include <linux/compiler.h> + +enum unsigned_enum { + constant_a = 3, +}; + +enum signed_enum { + constant_b = -1, + constant_c = 2, +}; + +static void is_signed_type_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, is_signed_type(bool), false); + KUNIT_EXPECT_EQ(test, is_signed_type(signed char), true); + KUNIT_EXPECT_EQ(test, is_signed_type(unsigned char), false); + KUNIT_EXPECT_EQ(test, is_signed_type(char), false); + KUNIT_EXPECT_EQ(test, is_signed_type(int), true); + KUNIT_EXPECT_EQ(test, is_signed_type(unsigned int), false); + KUNIT_EXPECT_EQ(test, is_signed_type(long), true); + KUNIT_EXPECT_EQ(test, is_signed_type(unsigned long), false); + KUNIT_EXPECT_EQ(test, is_signed_type(long long), true); + KUNIT_EXPECT_EQ(test, is_signed_type(unsigned long long), false); + KUNIT_EXPECT_EQ(test, is_signed_type(enum unsigned_enum), false); + KUNIT_EXPECT_EQ(test, is_signed_type(enum signed_enum), true); + KUNIT_EXPECT_EQ(test, is_signed_type(void *), false); + KUNIT_EXPECT_EQ(test, is_signed_type(const char *), false); +} + +static struct kunit_case is_signed_type_test_cases[] = { + KUNIT_CASE(is_signed_type_test), + {} +}; + +static struct kunit_suite is_signed_type_test_suite = { + .name = "is_signed_type", + .test_cases = is_signed_type_test_cases, +}; + +kunit_test_suite(is_signed_type_test_suite); + +MODULE_LICENSE("Dual MIT/GPL"); diff --git a/lib/kobject.c b/lib/kobject.c index 5f0e71ab292c..af1f5f2954d4 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -694,7 +694,7 @@ static void kobject_release(struct kref *kref) { struct kobject *kobj = container_of(kref, struct kobject, kref); #ifdef CONFIG_DEBUG_KOBJECT_RELEASE - unsigned long delay = HZ + HZ * (get_random_int() & 0x3); + unsigned long delay = HZ + HZ * get_random_u32_below(4); pr_info("kobject: '%s' (%p): %s, parent %p (delayed %ld)\n", kobject_name(kobj), kobj, __func__, kobj->parent, delay); INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup); diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig index 0b5dfb001bac..626719b95bad 100644 --- a/lib/kunit/Kconfig +++ b/lib/kunit/Kconfig @@ -59,4 +59,15 @@ config KUNIT_ALL_TESTS If unsure, say N. +config KUNIT_DEFAULT_ENABLED + bool "Default value of kunit.enable" + default y + help + Sets the default value of kunit.enable. If set to N then KUnit + tests will not execute unless kunit.enable=1 is passed to the + kernel command line. + + In most cases this should be left as Y. Only if additional opt-in + behavior is needed should this be set to N. + endif # KUNIT diff --git a/lib/kunit/assert.c b/lib/kunit/assert.c index d00d6d181ee8..f5b50babe38d 100644 --- a/lib/kunit/assert.c +++ b/lib/kunit/assert.c @@ -127,13 +127,15 @@ void kunit_binary_assert_format(const struct kunit_assert *assert, binary_assert->text->right_text); if (!is_literal(stream->test, binary_assert->text->left_text, binary_assert->left_value, stream->gfp)) - string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %lld\n", + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %lld (0x%llx)\n", binary_assert->text->left_text, + binary_assert->left_value, binary_assert->left_value); if (!is_literal(stream->test, binary_assert->text->right_text, binary_assert->right_value, stream->gfp)) - string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %lld", + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s == %lld (0x%llx)", binary_assert->text->right_text, + binary_assert->right_value, binary_assert->right_value); kunit_assert_print_msg(message, stream); } @@ -204,3 +206,59 @@ void kunit_binary_str_assert_format(const struct kunit_assert *assert, kunit_assert_print_msg(message, stream); } EXPORT_SYMBOL_GPL(kunit_binary_str_assert_format); + +/* Adds a hexdump of a buffer to a string_stream comparing it with + * a second buffer. The different bytes are marked with <>. + */ +static void kunit_assert_hexdump(struct string_stream *stream, + const void *buf, + const void *compared_buf, + const size_t len) +{ + size_t i; + const u8 *buf1 = buf; + const u8 *buf2 = compared_buf; + + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT); + + for (i = 0; i < len; ++i) { + if (!(i % 16) && i) + string_stream_add(stream, "\n" KUNIT_SUBSUBTEST_INDENT); + + if (buf1[i] != buf2[i]) + string_stream_add(stream, "<%02x>", buf1[i]); + else + string_stream_add(stream, " %02x ", buf1[i]); + } +} + +void kunit_mem_assert_format(const struct kunit_assert *assert, + const struct va_format *message, + struct string_stream *stream) +{ + struct kunit_mem_assert *mem_assert; + + mem_assert = container_of(assert, struct kunit_mem_assert, + assert); + + string_stream_add(stream, + KUNIT_SUBTEST_INDENT "Expected %s %s %s, but\n", + mem_assert->text->left_text, + mem_assert->text->operation, + mem_assert->text->right_text); + + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s ==\n", + mem_assert->text->left_text); + kunit_assert_hexdump(stream, mem_assert->left_value, + mem_assert->right_value, mem_assert->size); + + string_stream_add(stream, "\n"); + + string_stream_add(stream, KUNIT_SUBSUBTEST_INDENT "%s ==\n", + mem_assert->text->right_text); + kunit_assert_hexdump(stream, mem_assert->right_value, + mem_assert->left_value, mem_assert->size); + + kunit_assert_print_msg(message, stream); +} +EXPORT_SYMBOL_GPL(kunit_mem_assert_format); diff --git a/lib/kunit/debugfs.c b/lib/kunit/debugfs.c index 1048ef1b8d6e..de0ee2e03ed6 100644 --- a/lib/kunit/debugfs.c +++ b/lib/kunit/debugfs.c @@ -63,7 +63,7 @@ static int debugfs_print_results(struct seq_file *seq, void *v) kunit_suite_for_each_test_case(suite, test_case) debugfs_print_result(seq, suite, test_case); - seq_printf(seq, "%s %d - %s\n", + seq_printf(seq, "%s %d %s\n", kunit_status_to_ok_not_ok(success), 1, suite->name); return 0; } diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c index 5e223327196a..74982b83707c 100644 --- a/lib/kunit/executor.c +++ b/lib/kunit/executor.c @@ -166,7 +166,7 @@ static void kunit_exec_run_tests(struct suite_set *suite_set) { size_t num_suites = suite_set->end - suite_set->start; - pr_info("TAP version 14\n"); + pr_info("KTAP version 1\n"); pr_info("1..%zu\n", num_suites); __kunit_test_suites_init(suite_set->start, num_suites); @@ -177,8 +177,8 @@ static void kunit_exec_list_tests(struct suite_set *suite_set) struct kunit_suite * const *suites; struct kunit_case *test_case; - /* Hack: print a tap header so kunit.py can find the start of KUnit output. */ - pr_info("TAP version 14\n"); + /* Hack: print a ktap header so kunit.py can find the start of KUnit output. */ + pr_info("KTAP version 1\n"); for (suites = suite_set->start; suites < suite_set->end; suites++) kunit_suite_for_each_test_case((*suites), test_case) { @@ -190,6 +190,10 @@ int kunit_run_all_tests(void) { struct suite_set suite_set = {__kunit_suites_start, __kunit_suites_end}; int err = 0; + if (!kunit_enabled()) { + pr_info("kunit: disabled\n"); + goto out; + } if (filter_glob_param) { suite_set = kunit_filter_suites(&suite_set, filter_glob_param, &err); diff --git a/lib/kunit/kunit-example-test.c b/lib/kunit/kunit-example-test.c index f8fe582c9e36..66cc4e2365ec 100644 --- a/lib/kunit/kunit-example-test.c +++ b/lib/kunit/kunit-example-test.c @@ -86,6 +86,9 @@ static void example_mark_skipped_test(struct kunit *test) */ static void example_all_expect_macros_test(struct kunit *test) { + const u32 array1[] = { 0x0F, 0xFF }; + const u32 array2[] = { 0x1F, 0xFF }; + /* Boolean assertions */ KUNIT_EXPECT_TRUE(test, true); KUNIT_EXPECT_FALSE(test, false); @@ -109,6 +112,10 @@ static void example_all_expect_macros_test(struct kunit *test) KUNIT_EXPECT_STREQ(test, "hi", "hi"); KUNIT_EXPECT_STRNEQ(test, "hi", "bye"); + /* Memory block assertions */ + KUNIT_EXPECT_MEMEQ(test, array1, array1, sizeof(array1)); + KUNIT_EXPECT_MEMNEQ(test, array1, array2, sizeof(array1)); + /* * There are also ASSERT variants of all of the above that abort test * execution if they fail. Useful for memory allocations, etc. diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c index 13d0bd8b07a9..4df0335d0d06 100644 --- a/lib/kunit/kunit-test.c +++ b/lib/kunit/kunit-test.c @@ -161,6 +161,13 @@ static void kunit_resource_test_alloc_resource(struct kunit *test) kunit_put_resource(res); } +static inline bool kunit_resource_instance_match(struct kunit *test, + struct kunit_resource *res, + void *match_data) +{ + return res->data == match_data; +} + /* * Note: tests below use kunit_alloc_and_get_resource(), so as a consequence * they have a reference to the associated resource that they must release diff --git a/lib/kunit/string-stream.c b/lib/kunit/string-stream.c index 141789ca8949..f5f51166d8c2 100644 --- a/lib/kunit/string-stream.c +++ b/lib/kunit/string-stream.c @@ -12,62 +12,29 @@ #include "string-stream.h" -struct string_stream_fragment_alloc_context { - struct kunit *test; - int len; - gfp_t gfp; -}; -static int string_stream_fragment_init(struct kunit_resource *res, - void *context) +static struct string_stream_fragment *alloc_string_stream_fragment( + struct kunit *test, int len, gfp_t gfp) { - struct string_stream_fragment_alloc_context *ctx = context; struct string_stream_fragment *frag; - frag = kunit_kzalloc(ctx->test, sizeof(*frag), ctx->gfp); + frag = kunit_kzalloc(test, sizeof(*frag), gfp); if (!frag) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - frag->test = ctx->test; - frag->fragment = kunit_kmalloc(ctx->test, ctx->len, ctx->gfp); + frag->fragment = kunit_kmalloc(test, len, gfp); if (!frag->fragment) - return -ENOMEM; - - res->data = frag; + return ERR_PTR(-ENOMEM); - return 0; + return frag; } -static void string_stream_fragment_free(struct kunit_resource *res) +static void string_stream_fragment_destroy(struct kunit *test, + struct string_stream_fragment *frag) { - struct string_stream_fragment *frag = res->data; - list_del(&frag->node); - kunit_kfree(frag->test, frag->fragment); - kunit_kfree(frag->test, frag); -} - -static struct string_stream_fragment *alloc_string_stream_fragment( - struct kunit *test, int len, gfp_t gfp) -{ - struct string_stream_fragment_alloc_context context = { - .test = test, - .len = len, - .gfp = gfp - }; - - return kunit_alloc_resource(test, - string_stream_fragment_init, - string_stream_fragment_free, - gfp, - &context); -} - -static int string_stream_fragment_destroy(struct string_stream_fragment *frag) -{ - return kunit_destroy_resource(frag->test, - kunit_resource_instance_match, - frag); + kunit_kfree(test, frag->fragment); + kunit_kfree(test, frag); } int string_stream_vadd(struct string_stream *stream, @@ -89,8 +56,8 @@ int string_stream_vadd(struct string_stream *stream, frag_container = alloc_string_stream_fragment(stream->test, len, stream->gfp); - if (!frag_container) - return -ENOMEM; + if (IS_ERR(frag_container)) + return PTR_ERR(frag_container); len = vsnprintf(frag_container->fragment, len, fmt, args); spin_lock(&stream->lock); @@ -122,7 +89,7 @@ static void string_stream_clear(struct string_stream *stream) frag_container_safe, &stream->fragments, node) { - string_stream_fragment_destroy(frag_container); + string_stream_fragment_destroy(stream->test, frag_container); } stream->length = 0; spin_unlock(&stream->lock); @@ -164,53 +131,23 @@ bool string_stream_is_empty(struct string_stream *stream) return list_empty(&stream->fragments); } -struct string_stream_alloc_context { - struct kunit *test; - gfp_t gfp; -}; - -static int string_stream_init(struct kunit_resource *res, void *context) +struct string_stream *alloc_string_stream(struct kunit *test, gfp_t gfp) { struct string_stream *stream; - struct string_stream_alloc_context *ctx = context; - stream = kunit_kzalloc(ctx->test, sizeof(*stream), ctx->gfp); + stream = kunit_kzalloc(test, sizeof(*stream), gfp); if (!stream) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - res->data = stream; - stream->gfp = ctx->gfp; - stream->test = ctx->test; + stream->gfp = gfp; + stream->test = test; INIT_LIST_HEAD(&stream->fragments); spin_lock_init(&stream->lock); - return 0; + return stream; } -static void string_stream_free(struct kunit_resource *res) +void string_stream_destroy(struct string_stream *stream) { - struct string_stream *stream = res->data; - string_stream_clear(stream); } - -struct string_stream *alloc_string_stream(struct kunit *test, gfp_t gfp) -{ - struct string_stream_alloc_context context = { - .test = test, - .gfp = gfp - }; - - return kunit_alloc_resource(test, - string_stream_init, - string_stream_free, - gfp, - &context); -} - -int string_stream_destroy(struct string_stream *stream) -{ - return kunit_destroy_resource(stream->test, - kunit_resource_instance_match, - stream); -} diff --git a/lib/kunit/string-stream.h b/lib/kunit/string-stream.h index 43f9508a55b4..b669f9a75a94 100644 --- a/lib/kunit/string-stream.h +++ b/lib/kunit/string-stream.h @@ -14,7 +14,6 @@ #include <linux/stdarg.h> struct string_stream_fragment { - struct kunit *test; struct list_head node; char *fragment; }; @@ -46,6 +45,6 @@ int string_stream_append(struct string_stream *stream, bool string_stream_is_empty(struct string_stream *stream); -int string_stream_destroy(struct string_stream *stream); +void string_stream_destroy(struct string_stream *stream); #endif /* _KUNIT_STRING_STREAM_H */ diff --git a/lib/kunit/test.c b/lib/kunit/test.c index b73d5bb5c473..c9ebf975e56b 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -20,6 +20,8 @@ #include "string-stream.h" #include "try-catch-impl.h" +DEFINE_STATIC_KEY_FALSE(kunit_running); + #if IS_BUILTIN(CONFIG_KUNIT) /* * Fail the current test and print an error message to the log. @@ -55,6 +57,17 @@ EXPORT_SYMBOL_GPL(__kunit_fail_current_test); #endif /* + * Enable KUnit tests to run. + */ +#ifdef CONFIG_KUNIT_DEFAULT_ENABLED +static bool enable_param = true; +#else +static bool enable_param; +#endif +module_param_named(enable, enable_param, bool, 0); +MODULE_PARM_DESC(enable, "Enable KUnit tests"); + +/* * KUnit statistic mode: * 0 - disabled * 1 - only when there is more than one subtest @@ -138,6 +151,7 @@ EXPORT_SYMBOL_GPL(kunit_suite_num_test_cases); static void kunit_print_suite_start(struct kunit_suite *suite) { + kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "KTAP version 1\n"); kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "# Subtest: %s", suite->name); kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "1..%zd", @@ -164,13 +178,13 @@ static void kunit_print_ok_not_ok(void *test_or_suite, * representation. */ if (suite) - pr_info("%s %zd - %s%s%s\n", + pr_info("%s %zd %s%s%s\n", kunit_status_to_ok_not_ok(status), test_number, description, directive_header, (status == KUNIT_SKIPPED) ? directive : ""); else kunit_log(KERN_INFO, test, - KUNIT_SUBTEST_INDENT "%s %zd - %s%s%s", + KUNIT_SUBTEST_INDENT "%s %zd %s%s%s", kunit_status_to_ok_not_ok(status), test_number, description, directive_header, (status == KUNIT_SKIPPED) ? directive : ""); @@ -247,14 +261,14 @@ static void kunit_print_string_stream(struct kunit *test, static void kunit_fail(struct kunit *test, const struct kunit_loc *loc, enum kunit_assert_type type, const struct kunit_assert *assert, - const struct va_format *message) + assert_format_t assert_format, const struct va_format *message) { struct string_stream *stream; kunit_set_failure(test); stream = alloc_string_stream(test, GFP_KERNEL); - if (!stream) { + if (IS_ERR(stream)) { WARN(true, "Could not allocate stream to print failed assertion in %s:%d\n", loc->file, @@ -263,11 +277,11 @@ static void kunit_fail(struct kunit *test, const struct kunit_loc *loc, } kunit_assert_prologue(loc, type, stream); - assert->format(assert, message, stream); + assert_format(assert, message, stream); kunit_print_string_stream(test, stream); - WARN_ON(string_stream_destroy(stream)); + string_stream_destroy(stream); } static void __noreturn kunit_abort(struct kunit *test) @@ -287,6 +301,7 @@ void kunit_do_failed_assertion(struct kunit *test, const struct kunit_loc *loc, enum kunit_assert_type type, const struct kunit_assert *assert, + assert_format_t assert_format, const char *fmt, ...) { va_list args; @@ -296,7 +311,7 @@ void kunit_do_failed_assertion(struct kunit *test, message.fmt = fmt; message.va = &args; - kunit_fail(test, loc, type, assert, &message); + kunit_fail(test, loc, type, assert, assert_format, &message); va_end(args); @@ -531,6 +546,8 @@ int kunit_run_tests(struct kunit_suite *suite) param_desc[0] = '\0'; test.param_value = test_case->generate_params(NULL, param_desc); kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT + "KTAP version 1\n"); + kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT "# Subtest: %s", test_case->name); while (test.param_value) { @@ -543,7 +560,7 @@ int kunit_run_tests(struct kunit_suite *suite) kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT - "%s %d - %s", + "%s %d %s", kunit_status_to_ok_not_ok(test.status), test.param_index + 1, param_desc); @@ -586,14 +603,28 @@ static void kunit_init_suite(struct kunit_suite *suite) suite->suite_init_err = 0; } +bool kunit_enabled(void) +{ + return enable_param; +} + int __kunit_test_suites_init(struct kunit_suite * const * const suites, int num_suites) { unsigned int i; + if (!kunit_enabled() && num_suites > 0) { + pr_info("kunit: disabled\n"); + return 0; + } + + static_branch_inc(&kunit_running); + for (i = 0; i < num_suites; i++) { kunit_init_suite(suites[i]); kunit_run_tests(suites[i]); } + + static_branch_dec(&kunit_running); return 0; } EXPORT_SYMBOL_GPL(__kunit_test_suites_init); @@ -607,6 +638,9 @@ void __kunit_test_suites_exit(struct kunit_suite **suites, int num_suites) { unsigned int i; + if (!kunit_enabled()) + return; + for (i = 0; i < num_suites; i++) kunit_exit_suite(suites[i]); @@ -689,21 +723,20 @@ void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t gfp) } EXPORT_SYMBOL_GPL(kunit_kmalloc_array); -void kunit_kfree(struct kunit *test, const void *ptr) +static inline bool kunit_kfree_match(struct kunit *test, + struct kunit_resource *res, void *match_data) { - struct kunit_resource *res; - - res = kunit_find_resource(test, kunit_resource_instance_match, - (void *)ptr); - - /* - * Removing the resource from the list of resources drops the - * reference count to 1; the final put will trigger the free. - */ - kunit_remove_resource(test, res); + /* Only match resources allocated with kunit_kmalloc() and friends. */ + return res->free == kunit_kmalloc_array_free && res->data == match_data; +} - kunit_put_resource(res); +void kunit_kfree(struct kunit *test, const void *ptr) +{ + if (!ptr) + return; + if (kunit_destroy_resource(test, kunit_kfree_match, (void *)ptr)) + KUNIT_FAIL(test, "kunit_kfree: %px already freed or not allocated by kunit", ptr); } EXPORT_SYMBOL_GPL(kunit_kfree); diff --git a/lib/llist.c b/lib/llist.c index 611ce4881a87..6e668fa5a2c6 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -26,11 +26,11 @@ bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, struct llist_head *head) { - struct llist_node *first; + struct llist_node *first = READ_ONCE(head->first); do { - new_last->next = first = READ_ONCE(head->first); - } while (cmpxchg(&head->first, first, new_first) != first); + new_last->next = first; + } while (!try_cmpxchg(&head->first, &first, new_first)); return !first; } @@ -52,18 +52,14 @@ EXPORT_SYMBOL_GPL(llist_add_batch); */ struct llist_node *llist_del_first(struct llist_head *head) { - struct llist_node *entry, *old_entry, *next; + struct llist_node *entry, *next; entry = smp_load_acquire(&head->first); - for (;;) { + do { if (entry == NULL) return NULL; - old_entry = entry; next = READ_ONCE(entry->next); - entry = cmpxchg(&head->first, old_entry, next); - if (entry == old_entry) - break; - } + } while (!try_cmpxchg(&head->first, &entry, next)); return entry; } diff --git a/lib/lru_cache.c b/lib/lru_cache.c index dc35464216d3..b3d9187611de 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -60,17 +60,6 @@ int lc_try_lock(struct lru_cache *lc) } while (unlikely (val == LC_PARANOIA)); /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */ return 0 == val; -#if 0 - /* Alternative approach, spin in case someone enters or leaves a - * PARANOIA_ENTRY()/RETURN() section. */ - unsigned long old, new, val; - do { - old = lc->flags & LC_PARANOIA; - new = old | LC_LOCKED; - val = cmpxchg(&lc->flags, old, new); - } while (unlikely (val == (old ^ LC_PARANOIA))); - return old == val; -#endif } /** @@ -364,7 +353,7 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, unsig struct lc_element *e; PARANOIA_ENTRY(); - if (lc->flags & LC_STARVING) { + if (test_bit(__LC_STARVING, &lc->flags)) { ++lc->starving; RETURN(NULL); } @@ -417,7 +406,7 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, unsig * the LRU element, we have to wait ... */ if (!lc_unused_element_available(lc)) { - __set_bit(__LC_STARVING, &lc->flags); + set_bit(__LC_STARVING, &lc->flags); RETURN(NULL); } @@ -586,48 +575,6 @@ struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i) } /** - * lc_index_of - * @lc: the lru cache to operate on - * @e: the element to query for its index position in lc->element - */ -unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) -{ - PARANOIA_LC_ELEMENT(lc, e); - return e->lc_index; -} - -/** - * lc_set - associate index with label - * @lc: the lru cache to operate on - * @enr: the label to set - * @index: the element index to associate label with. - * - * Used to initialize the active set to some previously recorded state. - */ -void lc_set(struct lru_cache *lc, unsigned int enr, int index) -{ - struct lc_element *e; - struct list_head *lh; - - if (index < 0 || index >= lc->nr_elements) - return; - - e = lc_element_by_index(lc, index); - BUG_ON(e->lc_number != e->lc_new_number); - BUG_ON(e->refcnt != 0); - - e->lc_number = e->lc_new_number = enr; - hlist_del_init(&e->colision); - if (enr == LC_FREE) - lh = &lc->free; - else { - hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); - lh = &lc->lru; - } - list_move(&e->list, lh); -} - -/** * lc_seq_dump_details - Dump a complete LRU cache to seq in textual form. * @lc: the lru cache to operate on * @seq: the &struct seq_file pointer to seq_printf into @@ -661,7 +608,6 @@ void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext EXPORT_SYMBOL(lc_create); EXPORT_SYMBOL(lc_reset); EXPORT_SYMBOL(lc_destroy); -EXPORT_SYMBOL(lc_set); EXPORT_SYMBOL(lc_del); EXPORT_SYMBOL(lc_try_get); EXPORT_SYMBOL(lc_find); @@ -669,7 +615,6 @@ EXPORT_SYMBOL(lc_get); EXPORT_SYMBOL(lc_put); EXPORT_SYMBOL(lc_committed); EXPORT_SYMBOL(lc_element_by_index); -EXPORT_SYMBOL(lc_index_of); EXPORT_SYMBOL(lc_seq_printf_stats); EXPORT_SYMBOL(lc_seq_dump_details); EXPORT_SYMBOL(lc_try_lock); diff --git a/lib/maple_tree.c b/lib/maple_tree.c new file mode 100644 index 000000000000..3fe1491d2bf9 --- /dev/null +++ b/lib/maple_tree.c @@ -0,0 +1,7157 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Maple Tree implementation + * Copyright (c) 2018-2022 Oracle Corporation + * Authors: Liam R. Howlett <Liam.Howlett@oracle.com> + * Matthew Wilcox <willy@infradead.org> + */ + +/* + * DOC: Interesting implementation details of the Maple Tree + * + * Each node type has a number of slots for entries and a number of slots for + * pivots. In the case of dense nodes, the pivots are implied by the position + * and are simply the slot index + the minimum of the node. + * + * In regular B-Tree terms, pivots are called keys. The term pivot is used to + * indicate that the tree is specifying ranges, Pivots may appear in the + * subtree with an entry attached to the value where as keys are unique to a + * specific position of a B-tree. Pivot values are inclusive of the slot with + * the same index. + * + * + * The following illustrates the layout of a range64 nodes slots and pivots. + * + * + * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 | + * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ + * │ │ │ │ │ │ │ │ └─ Implied maximum + * │ │ │ │ │ │ │ └─ Pivot 14 + * │ │ │ │ │ │ └─ Pivot 13 + * │ │ │ │ │ └─ Pivot 12 + * │ │ │ │ └─ Pivot 11 + * │ │ │ └─ Pivot 2 + * │ │ └─ Pivot 1 + * │ └─ Pivot 0 + * └─ Implied minimum + * + * Slot contents: + * Internal (non-leaf) nodes contain pointers to other nodes. + * Leaf nodes contain entries. + * + * The location of interest is often referred to as an offset. All offsets have + * a slot, but the last offset has an implied pivot from the node above (or + * UINT_MAX for the root node. + * + * Ranges complicate certain write activities. When modifying any of + * the B-tree variants, it is known that one entry will either be added or + * deleted. When modifying the Maple Tree, one store operation may overwrite + * the entire data set, or one half of the tree, or the middle half of the tree. + * + */ + + +#include <linux/maple_tree.h> +#include <linux/xarray.h> +#include <linux/types.h> +#include <linux/export.h> +#include <linux/slab.h> +#include <linux/limits.h> +#include <asm/barrier.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/maple_tree.h> + +#define MA_ROOT_PARENT 1 + +/* + * Maple state flags + * * MA_STATE_BULK - Bulk insert mode + * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert + * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation + */ +#define MA_STATE_BULK 1 +#define MA_STATE_REBALANCE 2 +#define MA_STATE_PREALLOC 4 + +#define ma_parent_ptr(x) ((struct maple_pnode *)(x)) +#define ma_mnode_ptr(x) ((struct maple_node *)(x)) +#define ma_enode_ptr(x) ((struct maple_enode *)(x)) +static struct kmem_cache *maple_node_cache; + +#ifdef CONFIG_DEBUG_MAPLE_TREE +static const unsigned long mt_max[] = { + [maple_dense] = MAPLE_NODE_SLOTS, + [maple_leaf_64] = ULONG_MAX, + [maple_range_64] = ULONG_MAX, + [maple_arange_64] = ULONG_MAX, +}; +#define mt_node_max(x) mt_max[mte_node_type(x)] +#endif + +static const unsigned char mt_slots[] = { + [maple_dense] = MAPLE_NODE_SLOTS, + [maple_leaf_64] = MAPLE_RANGE64_SLOTS, + [maple_range_64] = MAPLE_RANGE64_SLOTS, + [maple_arange_64] = MAPLE_ARANGE64_SLOTS, +}; +#define mt_slot_count(x) mt_slots[mte_node_type(x)] + +static const unsigned char mt_pivots[] = { + [maple_dense] = 0, + [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1, + [maple_range_64] = MAPLE_RANGE64_SLOTS - 1, + [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1, +}; +#define mt_pivot_count(x) mt_pivots[mte_node_type(x)] + +static const unsigned char mt_min_slots[] = { + [maple_dense] = MAPLE_NODE_SLOTS / 2, + [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, + [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, + [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1, +}; +#define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)] + +#define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2) +#define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1) + +struct maple_big_node { + struct maple_pnode *parent; + unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1]; + union { + struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS]; + struct { + unsigned long padding[MAPLE_BIG_NODE_GAPS]; + unsigned long gap[MAPLE_BIG_NODE_GAPS]; + }; + }; + unsigned char b_end; + enum maple_type type; +}; + +/* + * The maple_subtree_state is used to build a tree to replace a segment of an + * existing tree in a more atomic way. Any walkers of the older tree will hit a + * dead node and restart on updates. + */ +struct maple_subtree_state { + struct ma_state *orig_l; /* Original left side of subtree */ + struct ma_state *orig_r; /* Original right side of subtree */ + struct ma_state *l; /* New left side of subtree */ + struct ma_state *m; /* New middle of subtree (rare) */ + struct ma_state *r; /* New right side of subtree */ + struct ma_topiary *free; /* nodes to be freed */ + struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */ + struct maple_big_node *bn; +}; + +/* Functions */ +static inline struct maple_node *mt_alloc_one(gfp_t gfp) +{ + return kmem_cache_alloc(maple_node_cache, gfp | __GFP_ZERO); +} + +static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) +{ + return kmem_cache_alloc_bulk(maple_node_cache, gfp | __GFP_ZERO, size, + nodes); +} + +static inline void mt_free_bulk(size_t size, void __rcu **nodes) +{ + kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); +} + +static void mt_free_rcu(struct rcu_head *head) +{ + struct maple_node *node = container_of(head, struct maple_node, rcu); + + kmem_cache_free(maple_node_cache, node); +} + +/* + * ma_free_rcu() - Use rcu callback to free a maple node + * @node: The node to free + * + * The maple tree uses the parent pointer to indicate this node is no longer in + * use and will be freed. + */ +static void ma_free_rcu(struct maple_node *node) +{ + node->parent = ma_parent_ptr(node); + call_rcu(&node->rcu, mt_free_rcu); +} + + +static void mas_set_height(struct ma_state *mas) +{ + unsigned int new_flags = mas->tree->ma_flags; + + new_flags &= ~MT_FLAGS_HEIGHT_MASK; + BUG_ON(mas->depth > MAPLE_HEIGHT_MAX); + new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET; + mas->tree->ma_flags = new_flags; +} + +static unsigned int mas_mt_height(struct ma_state *mas) +{ + return mt_height(mas->tree); +} + +static inline enum maple_type mte_node_type(const struct maple_enode *entry) +{ + return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & + MAPLE_NODE_TYPE_MASK; +} + +static inline bool ma_is_dense(const enum maple_type type) +{ + return type < maple_leaf_64; +} + +static inline bool ma_is_leaf(const enum maple_type type) +{ + return type < maple_range_64; +} + +static inline bool mte_is_leaf(const struct maple_enode *entry) +{ + return ma_is_leaf(mte_node_type(entry)); +} + +/* + * We also reserve values with the bottom two bits set to '10' which are + * below 4096 + */ +static inline bool mt_is_reserved(const void *entry) +{ + return ((unsigned long)entry < MAPLE_RESERVED_RANGE) && + xa_is_internal(entry); +} + +static inline void mas_set_err(struct ma_state *mas, long err) +{ + mas->node = MA_ERROR(err); +} + +static inline bool mas_is_ptr(struct ma_state *mas) +{ + return mas->node == MAS_ROOT; +} + +static inline bool mas_is_start(struct ma_state *mas) +{ + return mas->node == MAS_START; +} + +bool mas_is_err(struct ma_state *mas) +{ + return xa_is_err(mas->node); +} + +static inline bool mas_searchable(struct ma_state *mas) +{ + if (mas_is_none(mas)) + return false; + + if (mas_is_ptr(mas)) + return false; + + return true; +} + +static inline struct maple_node *mte_to_node(const struct maple_enode *entry) +{ + return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK); +} + +/* + * mte_to_mat() - Convert a maple encoded node to a maple topiary node. + * @entry: The maple encoded node + * + * Return: a maple topiary pointer + */ +static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry) +{ + return (struct maple_topiary *) + ((unsigned long)entry & ~MAPLE_NODE_MASK); +} + +/* + * mas_mn() - Get the maple state node. + * @mas: The maple state + * + * Return: the maple node (not encoded - bare pointer). + */ +static inline struct maple_node *mas_mn(const struct ma_state *mas) +{ + return mte_to_node(mas->node); +} + +/* + * mte_set_node_dead() - Set a maple encoded node as dead. + * @mn: The maple encoded node. + */ +static inline void mte_set_node_dead(struct maple_enode *mn) +{ + mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn)); + smp_wmb(); /* Needed for RCU */ +} + +/* Bit 1 indicates the root is a node */ +#define MAPLE_ROOT_NODE 0x02 +/* maple_type stored bit 3-6 */ +#define MAPLE_ENODE_TYPE_SHIFT 0x03 +/* Bit 2 means a NULL somewhere below */ +#define MAPLE_ENODE_NULL 0x04 + +static inline struct maple_enode *mt_mk_node(const struct maple_node *node, + enum maple_type type) +{ + return (void *)((unsigned long)node | + (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL); +} + +static inline void *mte_mk_root(const struct maple_enode *node) +{ + return (void *)((unsigned long)node | MAPLE_ROOT_NODE); +} + +static inline void *mte_safe_root(const struct maple_enode *node) +{ + return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); +} + +static inline void *mte_set_full(const struct maple_enode *node) +{ + return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); +} + +static inline void *mte_clear_full(const struct maple_enode *node) +{ + return (void *)((unsigned long)node | MAPLE_ENODE_NULL); +} + +static inline bool mte_has_null(const struct maple_enode *node) +{ + return (unsigned long)node & MAPLE_ENODE_NULL; +} + +static inline bool ma_is_root(struct maple_node *node) +{ + return ((unsigned long)node->parent & MA_ROOT_PARENT); +} + +static inline bool mte_is_root(const struct maple_enode *node) +{ + return ma_is_root(mte_to_node(node)); +} + +static inline bool mas_is_root_limits(const struct ma_state *mas) +{ + return !mas->min && mas->max == ULONG_MAX; +} + +static inline bool mt_is_alloc(struct maple_tree *mt) +{ + return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE); +} + +/* + * The Parent Pointer + * Excluding root, the parent pointer is 256B aligned like all other tree nodes. + * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16 + * bit values need an extra bit to store the offset. This extra bit comes from + * a reuse of the last bit in the node type. This is possible by using bit 1 to + * indicate if bit 2 is part of the type or the slot. + * + * Note types: + * 0x??1 = Root + * 0x?00 = 16 bit nodes + * 0x010 = 32 bit nodes + * 0x110 = 64 bit nodes + * + * Slot size and alignment + * 0b??1 : Root + * 0b?00 : 16 bit values, type in 0-1, slot in 2-7 + * 0b010 : 32 bit values, type in 0-2, slot in 3-7 + * 0b110 : 64 bit values, type in 0-2, slot in 3-7 + */ + +#define MAPLE_PARENT_ROOT 0x01 + +#define MAPLE_PARENT_SLOT_SHIFT 0x03 +#define MAPLE_PARENT_SLOT_MASK 0xF8 + +#define MAPLE_PARENT_16B_SLOT_SHIFT 0x02 +#define MAPLE_PARENT_16B_SLOT_MASK 0xFC + +#define MAPLE_PARENT_RANGE64 0x06 +#define MAPLE_PARENT_RANGE32 0x04 +#define MAPLE_PARENT_NOT_RANGE16 0x02 + +/* + * mte_parent_shift() - Get the parent shift for the slot storage. + * @parent: The parent pointer cast as an unsigned long + * Return: The shift into that pointer to the star to of the slot + */ +static inline unsigned long mte_parent_shift(unsigned long parent) +{ + /* Note bit 1 == 0 means 16B */ + if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) + return MAPLE_PARENT_SLOT_SHIFT; + + return MAPLE_PARENT_16B_SLOT_SHIFT; +} + +/* + * mte_parent_slot_mask() - Get the slot mask for the parent. + * @parent: The parent pointer cast as an unsigned long. + * Return: The slot mask for that parent. + */ +static inline unsigned long mte_parent_slot_mask(unsigned long parent) +{ + /* Note bit 1 == 0 means 16B */ + if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) + return MAPLE_PARENT_SLOT_MASK; + + return MAPLE_PARENT_16B_SLOT_MASK; +} + +/* + * mas_parent_enum() - Return the maple_type of the parent from the stored + * parent type. + * @mas: The maple state + * @node: The maple_enode to extract the parent's enum + * Return: The node->parent maple_type + */ +static inline +enum maple_type mte_parent_enum(struct maple_enode *p_enode, + struct maple_tree *mt) +{ + unsigned long p_type; + + p_type = (unsigned long)p_enode; + if (p_type & MAPLE_PARENT_ROOT) + return 0; /* Validated in the caller. */ + + p_type &= MAPLE_NODE_MASK; + p_type = p_type & ~(MAPLE_PARENT_ROOT | mte_parent_slot_mask(p_type)); + + switch (p_type) { + case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */ + if (mt_is_alloc(mt)) + return maple_arange_64; + return maple_range_64; + } + + return 0; +} + +static inline +enum maple_type mas_parent_enum(struct ma_state *mas, struct maple_enode *enode) +{ + return mte_parent_enum(ma_enode_ptr(mte_to_node(enode)->parent), mas->tree); +} + +/* + * mte_set_parent() - Set the parent node and encode the slot + * @enode: The encoded maple node. + * @parent: The encoded maple node that is the parent of @enode. + * @slot: The slot that @enode resides in @parent. + * + * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the + * parent type. + */ +static inline +void mte_set_parent(struct maple_enode *enode, const struct maple_enode *parent, + unsigned char slot) +{ + unsigned long val = (unsigned long) parent; + unsigned long shift; + unsigned long type; + enum maple_type p_type = mte_node_type(parent); + + BUG_ON(p_type == maple_dense); + BUG_ON(p_type == maple_leaf_64); + + switch (p_type) { + case maple_range_64: + case maple_arange_64: + shift = MAPLE_PARENT_SLOT_SHIFT; + type = MAPLE_PARENT_RANGE64; + break; + default: + case maple_dense: + case maple_leaf_64: + shift = type = 0; + break; + } + + val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */ + val |= (slot << shift) | type; + mte_to_node(enode)->parent = ma_parent_ptr(val); +} + +/* + * mte_parent_slot() - get the parent slot of @enode. + * @enode: The encoded maple node. + * + * Return: The slot in the parent node where @enode resides. + */ +static inline unsigned int mte_parent_slot(const struct maple_enode *enode) +{ + unsigned long val = (unsigned long) mte_to_node(enode)->parent; + + /* Root. */ + if (val & 1) + return 0; + + /* + * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost + * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT + */ + return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val); +} + +/* + * mte_parent() - Get the parent of @node. + * @node: The encoded maple node. + * + * Return: The parent maple node. + */ +static inline struct maple_node *mte_parent(const struct maple_enode *enode) +{ + return (void *)((unsigned long) + (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK); +} + +/* + * ma_dead_node() - check if the @enode is dead. + * @enode: The encoded maple node + * + * Return: true if dead, false otherwise. + */ +static inline bool ma_dead_node(const struct maple_node *node) +{ + struct maple_node *parent = (void *)((unsigned long) + node->parent & ~MAPLE_NODE_MASK); + + return (parent == node); +} +/* + * mte_dead_node() - check if the @enode is dead. + * @enode: The encoded maple node + * + * Return: true if dead, false otherwise. + */ +static inline bool mte_dead_node(const struct maple_enode *enode) +{ + struct maple_node *parent, *node; + + node = mte_to_node(enode); + parent = mte_parent(enode); + return (parent == node); +} + +/* + * mas_allocated() - Get the number of nodes allocated in a maple state. + * @mas: The maple state + * + * The ma_state alloc member is overloaded to hold a pointer to the first + * allocated node or to the number of requested nodes to allocate. If bit 0 is + * set, then the alloc contains the number of requested nodes. If there is an + * allocated node, then the total allocated nodes is in that node. + * + * Return: The total number of nodes allocated + */ +static inline unsigned long mas_allocated(const struct ma_state *mas) +{ + if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) + return 0; + + return mas->alloc->total; +} + +/* + * mas_set_alloc_req() - Set the requested number of allocations. + * @mas: the maple state + * @count: the number of allocations. + * + * The requested number of allocations is either in the first allocated node, + * located in @mas->alloc->request_count, or directly in @mas->alloc if there is + * no allocated node. Set the request either in the node or do the necessary + * encoding to store in @mas->alloc directly. + */ +static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count) +{ + if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) { + if (!count) + mas->alloc = NULL; + else + mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U); + return; + } + + mas->alloc->request_count = count; +} + +/* + * mas_alloc_req() - get the requested number of allocations. + * @mas: The maple state + * + * The alloc count is either stored directly in @mas, or in + * @mas->alloc->request_count if there is at least one node allocated. Decode + * the request count if it's stored directly in @mas->alloc. + * + * Return: The allocation request count. + */ +static inline unsigned int mas_alloc_req(const struct ma_state *mas) +{ + if ((unsigned long)mas->alloc & 0x1) + return (unsigned long)(mas->alloc) >> 1; + else if (mas->alloc) + return mas->alloc->request_count; + return 0; +} + +/* + * ma_pivots() - Get a pointer to the maple node pivots. + * @node - the maple node + * @type - the node type + * + * Return: A pointer to the maple node pivots + */ +static inline unsigned long *ma_pivots(struct maple_node *node, + enum maple_type type) +{ + switch (type) { + case maple_arange_64: + return node->ma64.pivot; + case maple_range_64: + case maple_leaf_64: + return node->mr64.pivot; + case maple_dense: + return NULL; + } + return NULL; +} + +/* + * ma_gaps() - Get a pointer to the maple node gaps. + * @node - the maple node + * @type - the node type + * + * Return: A pointer to the maple node gaps + */ +static inline unsigned long *ma_gaps(struct maple_node *node, + enum maple_type type) +{ + switch (type) { + case maple_arange_64: + return node->ma64.gap; + case maple_range_64: + case maple_leaf_64: + case maple_dense: + return NULL; + } + return NULL; +} + +/* + * mte_pivot() - Get the pivot at @piv of the maple encoded node. + * @mn: The maple encoded node. + * @piv: The pivot. + * + * Return: the pivot at @piv of @mn. + */ +static inline unsigned long mte_pivot(const struct maple_enode *mn, + unsigned char piv) +{ + struct maple_node *node = mte_to_node(mn); + + if (piv >= mt_pivots[piv]) { + WARN_ON(1); + return 0; + } + switch (mte_node_type(mn)) { + case maple_arange_64: + return node->ma64.pivot[piv]; + case maple_range_64: + case maple_leaf_64: + return node->mr64.pivot[piv]; + case maple_dense: + return 0; + } + return 0; +} + +/* + * mas_safe_pivot() - get the pivot at @piv or mas->max. + * @mas: The maple state + * @pivots: The pointer to the maple node pivots + * @piv: The pivot to fetch + * @type: The maple node type + * + * Return: The pivot at @piv within the limit of the @pivots array, @mas->max + * otherwise. + */ +static inline unsigned long +mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots, + unsigned char piv, enum maple_type type) +{ + if (piv >= mt_pivots[type]) + return mas->max; + + return pivots[piv]; +} + +/* + * mas_safe_min() - Return the minimum for a given offset. + * @mas: The maple state + * @pivots: The pointer to the maple node pivots + * @offset: The offset into the pivot array + * + * Return: The minimum range value that is contained in @offset. + */ +static inline unsigned long +mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset) +{ + if (likely(offset)) + return pivots[offset - 1] + 1; + + return mas->min; +} + +/* + * mas_logical_pivot() - Get the logical pivot of a given offset. + * @mas: The maple state + * @pivots: The pointer to the maple node pivots + * @offset: The offset into the pivot array + * @type: The maple node type + * + * When there is no value at a pivot (beyond the end of the data), then the + * pivot is actually @mas->max. + * + * Return: the logical pivot of a given @offset. + */ +static inline unsigned long +mas_logical_pivot(struct ma_state *mas, unsigned long *pivots, + unsigned char offset, enum maple_type type) +{ + unsigned long lpiv = mas_safe_pivot(mas, pivots, offset, type); + + if (likely(lpiv)) + return lpiv; + + if (likely(offset)) + return mas->max; + + return lpiv; +} + +/* + * mte_set_pivot() - Set a pivot to a value in an encoded maple node. + * @mn: The encoded maple node + * @piv: The pivot offset + * @val: The value of the pivot + */ +static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv, + unsigned long val) +{ + struct maple_node *node = mte_to_node(mn); + enum maple_type type = mte_node_type(mn); + + BUG_ON(piv >= mt_pivots[type]); + switch (type) { + default: + case maple_range_64: + case maple_leaf_64: + node->mr64.pivot[piv] = val; + break; + case maple_arange_64: + node->ma64.pivot[piv] = val; + break; + case maple_dense: + break; + } + +} + +/* + * ma_slots() - Get a pointer to the maple node slots. + * @mn: The maple node + * @mt: The maple node type + * + * Return: A pointer to the maple node slots + */ +static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt) +{ + switch (mt) { + default: + case maple_arange_64: + return mn->ma64.slot; + case maple_range_64: + case maple_leaf_64: + return mn->mr64.slot; + case maple_dense: + return mn->slot; + } +} + +static inline bool mt_locked(const struct maple_tree *mt) +{ + return mt_external_lock(mt) ? mt_lock_is_held(mt) : + lockdep_is_held(&mt->ma_lock); +} + +static inline void *mt_slot(const struct maple_tree *mt, + void __rcu **slots, unsigned char offset) +{ + return rcu_dereference_check(slots[offset], mt_locked(mt)); +} + +/* + * mas_slot_locked() - Get the slot value when holding the maple tree lock. + * @mas: The maple state + * @slots: The pointer to the slots + * @offset: The offset into the slots array to fetch + * + * Return: The entry stored in @slots at the @offset. + */ +static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots, + unsigned char offset) +{ + return rcu_dereference_protected(slots[offset], mt_locked(mas->tree)); +} + +/* + * mas_slot() - Get the slot value when not holding the maple tree lock. + * @mas: The maple state + * @slots: The pointer to the slots + * @offset: The offset into the slots array to fetch + * + * Return: The entry stored in @slots at the @offset + */ +static inline void *mas_slot(struct ma_state *mas, void __rcu **slots, + unsigned char offset) +{ + return mt_slot(mas->tree, slots, offset); +} + +/* + * mas_root() - Get the maple tree root. + * @mas: The maple state. + * + * Return: The pointer to the root of the tree + */ +static inline void *mas_root(struct ma_state *mas) +{ + return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree)); +} + +static inline void *mt_root_locked(struct maple_tree *mt) +{ + return rcu_dereference_protected(mt->ma_root, mt_locked(mt)); +} + +/* + * mas_root_locked() - Get the maple tree root when holding the maple tree lock. + * @mas: The maple state. + * + * Return: The pointer to the root of the tree + */ +static inline void *mas_root_locked(struct ma_state *mas) +{ + return mt_root_locked(mas->tree); +} + +static inline struct maple_metadata *ma_meta(struct maple_node *mn, + enum maple_type mt) +{ + switch (mt) { + case maple_arange_64: + return &mn->ma64.meta; + default: + return &mn->mr64.meta; + } +} + +/* + * ma_set_meta() - Set the metadata information of a node. + * @mn: The maple node + * @mt: The maple node type + * @offset: The offset of the highest sub-gap in this node. + * @end: The end of the data in this node. + */ +static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, + unsigned char offset, unsigned char end) +{ + struct maple_metadata *meta = ma_meta(mn, mt); + + meta->gap = offset; + meta->end = end; +} + +/* + * ma_meta_end() - Get the data end of a node from the metadata + * @mn: The maple node + * @mt: The maple node type + */ +static inline unsigned char ma_meta_end(struct maple_node *mn, + enum maple_type mt) +{ + struct maple_metadata *meta = ma_meta(mn, mt); + + return meta->end; +} + +/* + * ma_meta_gap() - Get the largest gap location of a node from the metadata + * @mn: The maple node + * @mt: The maple node type + */ +static inline unsigned char ma_meta_gap(struct maple_node *mn, + enum maple_type mt) +{ + BUG_ON(mt != maple_arange_64); + + return mn->ma64.meta.gap; +} + +/* + * ma_set_meta_gap() - Set the largest gap location in a nodes metadata + * @mn: The maple node + * @mn: The maple node type + * @offset: The location of the largest gap. + */ +static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt, + unsigned char offset) +{ + + struct maple_metadata *meta = ma_meta(mn, mt); + + meta->gap = offset; +} + +/* + * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes. + * @mat - the ma_topiary, a linked list of dead nodes. + * @dead_enode - the node to be marked as dead and added to the tail of the list + * + * Add the @dead_enode to the linked list in @mat. + */ +static inline void mat_add(struct ma_topiary *mat, + struct maple_enode *dead_enode) +{ + mte_set_node_dead(dead_enode); + mte_to_mat(dead_enode)->next = NULL; + if (!mat->tail) { + mat->tail = mat->head = dead_enode; + return; + } + + mte_to_mat(mat->tail)->next = dead_enode; + mat->tail = dead_enode; +} + +static void mte_destroy_walk(struct maple_enode *, struct maple_tree *); +static inline void mas_free(struct ma_state *mas, struct maple_enode *used); + +/* + * mas_mat_free() - Free all nodes in a dead list. + * @mas - the maple state + * @mat - the ma_topiary linked list of dead nodes to free. + * + * Free walk a dead list. + */ +static void mas_mat_free(struct ma_state *mas, struct ma_topiary *mat) +{ + struct maple_enode *next; + + while (mat->head) { + next = mte_to_mat(mat->head)->next; + mas_free(mas, mat->head); + mat->head = next; + } +} + +/* + * mas_mat_destroy() - Free all nodes and subtrees in a dead list. + * @mas - the maple state + * @mat - the ma_topiary linked list of dead nodes to free. + * + * Destroy walk a dead list. + */ +static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat) +{ + struct maple_enode *next; + + while (mat->head) { + next = mte_to_mat(mat->head)->next; + mte_destroy_walk(mat->head, mat->mtree); + mat->head = next; + } +} +/* + * mas_descend() - Descend into the slot stored in the ma_state. + * @mas - the maple state. + * + * Note: Not RCU safe, only use in write side or debug code. + */ +static inline void mas_descend(struct ma_state *mas) +{ + enum maple_type type; + unsigned long *pivots; + struct maple_node *node; + void __rcu **slots; + + node = mas_mn(mas); + type = mte_node_type(mas->node); + pivots = ma_pivots(node, type); + slots = ma_slots(node, type); + + if (mas->offset) + mas->min = pivots[mas->offset - 1] + 1; + mas->max = mas_safe_pivot(mas, pivots, mas->offset, type); + mas->node = mas_slot(mas, slots, mas->offset); +} + +/* + * mte_set_gap() - Set a maple node gap. + * @mn: The encoded maple node + * @gap: The offset of the gap to set + * @val: The gap value + */ +static inline void mte_set_gap(const struct maple_enode *mn, + unsigned char gap, unsigned long val) +{ + switch (mte_node_type(mn)) { + default: + break; + case maple_arange_64: + mte_to_node(mn)->ma64.gap[gap] = val; + break; + } +} + +/* + * mas_ascend() - Walk up a level of the tree. + * @mas: The maple state + * + * Sets the @mas->max and @mas->min to the correct values when walking up. This + * may cause several levels of walking up to find the correct min and max. + * May find a dead node which will cause a premature return. + * Return: 1 on dead node, 0 otherwise + */ +static int mas_ascend(struct ma_state *mas) +{ + struct maple_enode *p_enode; /* parent enode. */ + struct maple_enode *a_enode; /* ancestor enode. */ + struct maple_node *a_node; /* ancestor node. */ + struct maple_node *p_node; /* parent node. */ + unsigned char a_slot; + enum maple_type a_type; + unsigned long min, max; + unsigned long *pivots; + unsigned char offset; + bool set_max = false, set_min = false; + + a_node = mas_mn(mas); + if (ma_is_root(a_node)) { + mas->offset = 0; + return 0; + } + + p_node = mte_parent(mas->node); + if (unlikely(a_node == p_node)) + return 1; + a_type = mas_parent_enum(mas, mas->node); + offset = mte_parent_slot(mas->node); + a_enode = mt_mk_node(p_node, a_type); + + /* Check to make sure all parent information is still accurate */ + if (p_node != mte_parent(mas->node)) + return 1; + + mas->node = a_enode; + mas->offset = offset; + + if (mte_is_root(a_enode)) { + mas->max = ULONG_MAX; + mas->min = 0; + return 0; + } + + min = 0; + max = ULONG_MAX; + do { + p_enode = a_enode; + a_type = mas_parent_enum(mas, p_enode); + a_node = mte_parent(p_enode); + a_slot = mte_parent_slot(p_enode); + pivots = ma_pivots(a_node, a_type); + a_enode = mt_mk_node(a_node, a_type); + + if (!set_min && a_slot) { + set_min = true; + min = pivots[a_slot - 1] + 1; + } + + if (!set_max && a_slot < mt_pivots[a_type]) { + set_max = true; + max = pivots[a_slot]; + } + + if (unlikely(ma_dead_node(a_node))) + return 1; + + if (unlikely(ma_is_root(a_node))) + break; + + } while (!set_min || !set_max); + + mas->max = max; + mas->min = min; + return 0; +} + +/* + * mas_pop_node() - Get a previously allocated maple node from the maple state. + * @mas: The maple state + * + * Return: A pointer to a maple node. + */ +static inline struct maple_node *mas_pop_node(struct ma_state *mas) +{ + struct maple_alloc *ret, *node = mas->alloc; + unsigned long total = mas_allocated(mas); + + /* nothing or a request pending. */ + if (unlikely(!total)) + return NULL; + + if (total == 1) { + /* single allocation in this ma_state */ + mas->alloc = NULL; + ret = node; + goto single_node; + } + + if (!node->node_count) { + /* Single allocation in this node. */ + mas->alloc = node->slot[0]; + node->slot[0] = NULL; + mas->alloc->total = node->total - 1; + ret = node; + goto new_head; + } + + node->total--; + ret = node->slot[node->node_count]; + node->slot[node->node_count--] = NULL; + +single_node: +new_head: + ret->total = 0; + ret->node_count = 0; + if (ret->request_count) { + mas_set_alloc_req(mas, ret->request_count + 1); + ret->request_count = 0; + } + return (struct maple_node *)ret; +} + +/* + * mas_push_node() - Push a node back on the maple state allocation. + * @mas: The maple state + * @used: The used maple node + * + * Stores the maple node back into @mas->alloc for reuse. Updates allocated and + * requested node count as necessary. + */ +static inline void mas_push_node(struct ma_state *mas, struct maple_node *used) +{ + struct maple_alloc *reuse = (struct maple_alloc *)used; + struct maple_alloc *head = mas->alloc; + unsigned long count; + unsigned int requested = mas_alloc_req(mas); + + memset(reuse, 0, sizeof(*reuse)); + count = mas_allocated(mas); + + if (count && (head->node_count < MAPLE_ALLOC_SLOTS - 1)) { + if (head->slot[0]) + head->node_count++; + head->slot[head->node_count] = reuse; + head->total++; + goto done; + } + + reuse->total = 1; + if ((head) && !((unsigned long)head & 0x1)) { + head->request_count = 0; + reuse->slot[0] = head; + reuse->total += head->total; + } + + mas->alloc = reuse; +done: + if (requested > 1) + mas_set_alloc_req(mas, requested - 1); +} + +/* + * mas_alloc_nodes() - Allocate nodes into a maple state + * @mas: The maple state + * @gfp: The GFP Flags + */ +static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) +{ + struct maple_alloc *node; + unsigned long allocated = mas_allocated(mas); + unsigned long success = allocated; + unsigned int requested = mas_alloc_req(mas); + unsigned int count; + void **slots = NULL; + unsigned int max_req = 0; + + if (!requested) + return; + + mas_set_alloc_req(mas, 0); + if (mas->mas_flags & MA_STATE_PREALLOC) { + if (allocated) + return; + WARN_ON(!allocated); + } + + if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS - 1) { + node = (struct maple_alloc *)mt_alloc_one(gfp); + if (!node) + goto nomem_one; + + if (allocated) + node->slot[0] = mas->alloc; + + success++; + mas->alloc = node; + requested--; + } + + node = mas->alloc; + while (requested) { + max_req = MAPLE_ALLOC_SLOTS; + if (node->slot[0]) { + unsigned int offset = node->node_count + 1; + + slots = (void **)&node->slot[offset]; + max_req -= offset; + } else { + slots = (void **)&node->slot; + } + + max_req = min(requested, max_req); + count = mt_alloc_bulk(gfp, max_req, slots); + if (!count) + goto nomem_bulk; + + node->node_count += count; + /* zero indexed. */ + if (slots == (void **)&node->slot) + node->node_count--; + + success += count; + node = node->slot[0]; + requested -= count; + } + mas->alloc->total = success; + return; + +nomem_bulk: + /* Clean up potential freed allocations on bulk failure */ + memset(slots, 0, max_req * sizeof(unsigned long)); +nomem_one: + mas_set_alloc_req(mas, requested); + if (mas->alloc && !(((unsigned long)mas->alloc & 0x1))) + mas->alloc->total = success; + mas_set_err(mas, -ENOMEM); + return; + +} + +/* + * mas_free() - Free an encoded maple node + * @mas: The maple state + * @used: The encoded maple node to free. + * + * Uses rcu free if necessary, pushes @used back on the maple state allocations + * otherwise. + */ +static inline void mas_free(struct ma_state *mas, struct maple_enode *used) +{ + struct maple_node *tmp = mte_to_node(used); + + if (mt_in_rcu(mas->tree)) + ma_free_rcu(tmp); + else + mas_push_node(mas, tmp); +} + +/* + * mas_node_count() - Check if enough nodes are allocated and request more if + * there is not enough nodes. + * @mas: The maple state + * @count: The number of nodes needed + * @gfp: the gfp flags + */ +static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp) +{ + unsigned long allocated = mas_allocated(mas); + + if (allocated < count) { + mas_set_alloc_req(mas, count - allocated); + mas_alloc_nodes(mas, gfp); + } +} + +/* + * mas_node_count() - Check if enough nodes are allocated and request more if + * there is not enough nodes. + * @mas: The maple state + * @count: The number of nodes needed + * + * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags. + */ +static void mas_node_count(struct ma_state *mas, int count) +{ + return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN); +} + +/* + * mas_start() - Sets up maple state for operations. + * @mas: The maple state. + * + * If mas->node == MAS_START, then set the min, max, depth, and offset to + * defaults. + * + * Return: + * - If mas->node is an error or not MAS_START, return NULL. + * - If it's an empty tree: NULL & mas->node == MAS_NONE + * - If it's a single entry: The entry & mas->node == MAS_ROOT + * - If it's a tree: NULL & mas->node == safe root node. + */ +static inline struct maple_enode *mas_start(struct ma_state *mas) +{ + if (likely(mas_is_start(mas))) { + struct maple_enode *root; + + mas->node = MAS_NONE; + mas->min = 0; + mas->max = ULONG_MAX; + mas->depth = 0; + mas->offset = 0; + + root = mas_root(mas); + /* Tree with nodes */ + if (likely(xa_is_node(root))) { + mas->depth = 1; + mas->node = mte_safe_root(root); + return NULL; + } + + /* empty tree */ + if (unlikely(!root)) { + mas->offset = MAPLE_NODE_SLOTS; + return NULL; + } + + /* Single entry tree */ + mas->node = MAS_ROOT; + mas->offset = MAPLE_NODE_SLOTS; + + /* Single entry tree. */ + if (mas->index > 0) + return NULL; + + return root; + } + + return NULL; +} + +/* + * ma_data_end() - Find the end of the data in a node. + * @node: The maple node + * @type: The maple node type + * @pivots: The array of pivots in the node + * @max: The maximum value in the node + * + * Uses metadata to find the end of the data when possible. + * Return: The zero indexed last slot with data (may be null). + */ +static inline unsigned char ma_data_end(struct maple_node *node, + enum maple_type type, + unsigned long *pivots, + unsigned long max) +{ + unsigned char offset; + + if (type == maple_arange_64) + return ma_meta_end(node, type); + + offset = mt_pivots[type] - 1; + if (likely(!pivots[offset])) + return ma_meta_end(node, type); + + if (likely(pivots[offset] == max)) + return offset; + + return mt_pivots[type]; +} + +/* + * mas_data_end() - Find the end of the data (slot). + * @mas: the maple state + * + * This method is optimized to check the metadata of a node if the node type + * supports data end metadata. + * + * Return: The zero indexed last slot with data (may be null). + */ +static inline unsigned char mas_data_end(struct ma_state *mas) +{ + enum maple_type type; + struct maple_node *node; + unsigned char offset; + unsigned long *pivots; + + type = mte_node_type(mas->node); + node = mas_mn(mas); + if (type == maple_arange_64) + return ma_meta_end(node, type); + + pivots = ma_pivots(node, type); + offset = mt_pivots[type] - 1; + if (likely(!pivots[offset])) + return ma_meta_end(node, type); + + if (likely(pivots[offset] == mas->max)) + return offset; + + return mt_pivots[type]; +} + +/* + * mas_leaf_max_gap() - Returns the largest gap in a leaf node + * @mas - the maple state + * + * Return: The maximum gap in the leaf. + */ +static unsigned long mas_leaf_max_gap(struct ma_state *mas) +{ + enum maple_type mt; + unsigned long pstart, gap, max_gap; + struct maple_node *mn; + unsigned long *pivots; + void __rcu **slots; + unsigned char i; + unsigned char max_piv; + + mt = mte_node_type(mas->node); + mn = mas_mn(mas); + slots = ma_slots(mn, mt); + max_gap = 0; + if (unlikely(ma_is_dense(mt))) { + gap = 0; + for (i = 0; i < mt_slots[mt]; i++) { + if (slots[i]) { + if (gap > max_gap) + max_gap = gap; + gap = 0; + } else { + gap++; + } + } + if (gap > max_gap) + max_gap = gap; + return max_gap; + } + + /* + * Check the first implied pivot optimizes the loop below and slot 1 may + * be skipped if there is a gap in slot 0. + */ + pivots = ma_pivots(mn, mt); + if (likely(!slots[0])) { + max_gap = pivots[0] - mas->min + 1; + i = 2; + } else { + i = 1; + } + + /* reduce max_piv as the special case is checked before the loop */ + max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1; + /* + * Check end implied pivot which can only be a gap on the right most + * node. + */ + if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) { + gap = ULONG_MAX - pivots[max_piv]; + if (gap > max_gap) + max_gap = gap; + } + + for (; i <= max_piv; i++) { + /* data == no gap. */ + if (likely(slots[i])) + continue; + + pstart = pivots[i - 1]; + gap = pivots[i] - pstart; + if (gap > max_gap) + max_gap = gap; + + /* There cannot be two gaps in a row. */ + i++; + } + return max_gap; +} + +/* + * ma_max_gap() - Get the maximum gap in a maple node (non-leaf) + * @node: The maple node + * @gaps: The pointer to the gaps + * @mt: The maple node type + * @*off: Pointer to store the offset location of the gap. + * + * Uses the metadata data end to scan backwards across set gaps. + * + * Return: The maximum gap value + */ +static inline unsigned long +ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt, + unsigned char *off) +{ + unsigned char offset, i; + unsigned long max_gap = 0; + + i = offset = ma_meta_end(node, mt); + do { + if (gaps[i] > max_gap) { + max_gap = gaps[i]; + offset = i; + } + } while (i--); + + *off = offset; + return max_gap; +} + +/* + * mas_max_gap() - find the largest gap in a non-leaf node and set the slot. + * @mas: The maple state. + * + * If the metadata gap is set to MAPLE_ARANGE64_META_MAX, there is no gap. + * + * Return: The gap value. + */ +static inline unsigned long mas_max_gap(struct ma_state *mas) +{ + unsigned long *gaps; + unsigned char offset; + enum maple_type mt; + struct maple_node *node; + + mt = mte_node_type(mas->node); + if (ma_is_leaf(mt)) + return mas_leaf_max_gap(mas); + + node = mas_mn(mas); + offset = ma_meta_gap(node, mt); + if (offset == MAPLE_ARANGE64_META_MAX) + return 0; + + gaps = ma_gaps(node, mt); + return gaps[offset]; +} + +/* + * mas_parent_gap() - Set the parent gap and any gaps above, as needed + * @mas: The maple state + * @offset: The gap offset in the parent to set + * @new: The new gap value. + * + * Set the parent gap then continue to set the gap upwards, using the metadata + * of the parent to see if it is necessary to check the node above. + */ +static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset, + unsigned long new) +{ + unsigned long meta_gap = 0; + struct maple_node *pnode; + struct maple_enode *penode; + unsigned long *pgaps; + unsigned char meta_offset; + enum maple_type pmt; + + pnode = mte_parent(mas->node); + pmt = mas_parent_enum(mas, mas->node); + penode = mt_mk_node(pnode, pmt); + pgaps = ma_gaps(pnode, pmt); + +ascend: + meta_offset = ma_meta_gap(pnode, pmt); + if (meta_offset == MAPLE_ARANGE64_META_MAX) + meta_gap = 0; + else + meta_gap = pgaps[meta_offset]; + + pgaps[offset] = new; + + if (meta_gap == new) + return; + + if (offset != meta_offset) { + if (meta_gap > new) + return; + + ma_set_meta_gap(pnode, pmt, offset); + } else if (new < meta_gap) { + meta_offset = 15; + new = ma_max_gap(pnode, pgaps, pmt, &meta_offset); + ma_set_meta_gap(pnode, pmt, meta_offset); + } + + if (ma_is_root(pnode)) + return; + + /* Go to the parent node. */ + pnode = mte_parent(penode); + pmt = mas_parent_enum(mas, penode); + pgaps = ma_gaps(pnode, pmt); + offset = mte_parent_slot(penode); + penode = mt_mk_node(pnode, pmt); + goto ascend; +} + +/* + * mas_update_gap() - Update a nodes gaps and propagate up if necessary. + * @mas - the maple state. + */ +static inline void mas_update_gap(struct ma_state *mas) +{ + unsigned char pslot; + unsigned long p_gap; + unsigned long max_gap; + + if (!mt_is_alloc(mas->tree)) + return; + + if (mte_is_root(mas->node)) + return; + + max_gap = mas_max_gap(mas); + + pslot = mte_parent_slot(mas->node); + p_gap = ma_gaps(mte_parent(mas->node), + mas_parent_enum(mas, mas->node))[pslot]; + + if (p_gap != max_gap) + mas_parent_gap(mas, pslot, max_gap); +} + +/* + * mas_adopt_children() - Set the parent pointer of all nodes in @parent to + * @parent with the slot encoded. + * @mas - the maple state (for the tree) + * @parent - the maple encoded node containing the children. + */ +static inline void mas_adopt_children(struct ma_state *mas, + struct maple_enode *parent) +{ + enum maple_type type = mte_node_type(parent); + struct maple_node *node = mas_mn(mas); + void __rcu **slots = ma_slots(node, type); + unsigned long *pivots = ma_pivots(node, type); + struct maple_enode *child; + unsigned char offset; + + offset = ma_data_end(node, type, pivots, mas->max); + do { + child = mas_slot_locked(mas, slots, offset); + mte_set_parent(child, parent, offset); + } while (offset--); +} + +/* + * mas_replace() - Replace a maple node in the tree with mas->node. Uses the + * parent encoding to locate the maple node in the tree. + * @mas - the ma_state to use for operations. + * @advanced - boolean to adopt the child nodes and free the old node (false) or + * leave the node (true) and handle the adoption and free elsewhere. + */ +static inline void mas_replace(struct ma_state *mas, bool advanced) + __must_hold(mas->tree->lock) +{ + struct maple_node *mn = mas_mn(mas); + struct maple_enode *old_enode; + unsigned char offset = 0; + void __rcu **slots = NULL; + + if (ma_is_root(mn)) { + old_enode = mas_root_locked(mas); + } else { + offset = mte_parent_slot(mas->node); + slots = ma_slots(mte_parent(mas->node), + mas_parent_enum(mas, mas->node)); + old_enode = mas_slot_locked(mas, slots, offset); + } + + if (!advanced && !mte_is_leaf(mas->node)) + mas_adopt_children(mas, mas->node); + + if (mte_is_root(mas->node)) { + mn->parent = ma_parent_ptr( + ((unsigned long)mas->tree | MA_ROOT_PARENT)); + rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); + mas_set_height(mas); + } else { + rcu_assign_pointer(slots[offset], mas->node); + } + + if (!advanced) + mas_free(mas, old_enode); +} + +/* + * mas_new_child() - Find the new child of a node. + * @mas: the maple state + * @child: the maple state to store the child. + */ +static inline bool mas_new_child(struct ma_state *mas, struct ma_state *child) + __must_hold(mas->tree->lock) +{ + enum maple_type mt; + unsigned char offset; + unsigned char end; + unsigned long *pivots; + struct maple_enode *entry; + struct maple_node *node; + void __rcu **slots; + + mt = mte_node_type(mas->node); + node = mas_mn(mas); + slots = ma_slots(node, mt); + pivots = ma_pivots(node, mt); + end = ma_data_end(node, mt, pivots, mas->max); + for (offset = mas->offset; offset <= end; offset++) { + entry = mas_slot_locked(mas, slots, offset); + if (mte_parent(entry) == node) { + *child = *mas; + mas->offset = offset + 1; + child->offset = offset; + mas_descend(child); + child->offset = 0; + return true; + } + } + return false; +} + +/* + * mab_shift_right() - Shift the data in mab right. Note, does not clean out the + * old data or set b_node->b_end. + * @b_node: the maple_big_node + * @shift: the shift count + */ +static inline void mab_shift_right(struct maple_big_node *b_node, + unsigned char shift) +{ + unsigned long size = b_node->b_end * sizeof(unsigned long); + + memmove(b_node->pivot + shift, b_node->pivot, size); + memmove(b_node->slot + shift, b_node->slot, size); + if (b_node->type == maple_arange_64) + memmove(b_node->gap + shift, b_node->gap, size); +} + +/* + * mab_middle_node() - Check if a middle node is needed (unlikely) + * @b_node: the maple_big_node that contains the data. + * @size: the amount of data in the b_node + * @split: the potential split location + * @slot_count: the size that can be stored in a single node being considered. + * + * Return: true if a middle node is required. + */ +static inline bool mab_middle_node(struct maple_big_node *b_node, int split, + unsigned char slot_count) +{ + unsigned char size = b_node->b_end; + + if (size >= 2 * slot_count) + return true; + + if (!b_node->slot[split] && (size >= 2 * slot_count - 1)) + return true; + + return false; +} + +/* + * mab_no_null_split() - ensure the split doesn't fall on a NULL + * @b_node: the maple_big_node with the data + * @split: the suggested split location + * @slot_count: the number of slots in the node being considered. + * + * Return: the split location. + */ +static inline int mab_no_null_split(struct maple_big_node *b_node, + unsigned char split, unsigned char slot_count) +{ + if (!b_node->slot[split]) { + /* + * If the split is less than the max slot && the right side will + * still be sufficient, then increment the split on NULL. + */ + if ((split < slot_count - 1) && + (b_node->b_end - split) > (mt_min_slots[b_node->type])) + split++; + else + split--; + } + return split; +} + +/* + * mab_calc_split() - Calculate the split location and if there needs to be two + * splits. + * @bn: The maple_big_node with the data + * @mid_split: The second split, if required. 0 otherwise. + * + * Return: The first split location. The middle split is set in @mid_split. + */ +static inline int mab_calc_split(struct ma_state *mas, + struct maple_big_node *bn, unsigned char *mid_split, unsigned long min) +{ + unsigned char b_end = bn->b_end; + int split = b_end / 2; /* Assume equal split. */ + unsigned char slot_min, slot_count = mt_slots[bn->type]; + + /* + * To support gap tracking, all NULL entries are kept together and a node cannot + * end on a NULL entry, with the exception of the left-most leaf. The + * limitation means that the split of a node must be checked for this condition + * and be able to put more data in one direction or the other. + */ + if (unlikely((mas->mas_flags & MA_STATE_BULK))) { + *mid_split = 0; + split = b_end - mt_min_slots[bn->type]; + + if (!ma_is_leaf(bn->type)) + return split; + + mas->mas_flags |= MA_STATE_REBALANCE; + if (!bn->slot[split]) + split--; + return split; + } + + /* + * Although extremely rare, it is possible to enter what is known as the 3-way + * split scenario. The 3-way split comes about by means of a store of a range + * that overwrites the end and beginning of two full nodes. The result is a set + * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can + * also be located in different parent nodes which are also full. This can + * carry upwards all the way to the root in the worst case. + */ + if (unlikely(mab_middle_node(bn, split, slot_count))) { + split = b_end / 3; + *mid_split = split * 2; + } else { + slot_min = mt_min_slots[bn->type]; + + *mid_split = 0; + /* + * Avoid having a range less than the slot count unless it + * causes one node to be deficient. + * NOTE: mt_min_slots is 1 based, b_end and split are zero. + */ + while (((bn->pivot[split] - min) < slot_count - 1) && + (split < slot_count - 1) && (b_end - split > slot_min)) + split++; + } + + /* Avoid ending a node on a NULL entry */ + split = mab_no_null_split(bn, split, slot_count); + if (!(*mid_split)) + return split; + + *mid_split = mab_no_null_split(bn, *mid_split, slot_count); + + return split; +} + +/* + * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node + * and set @b_node->b_end to the next free slot. + * @mas: The maple state + * @mas_start: The starting slot to copy + * @mas_end: The end slot to copy (inclusively) + * @b_node: The maple_big_node to place the data + * @mab_start: The starting location in maple_big_node to store the data. + */ +static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start, + unsigned char mas_end, struct maple_big_node *b_node, + unsigned char mab_start) +{ + enum maple_type mt; + struct maple_node *node; + void __rcu **slots; + unsigned long *pivots, *gaps; + int i = mas_start, j = mab_start; + unsigned char piv_end; + + node = mas_mn(mas); + mt = mte_node_type(mas->node); + pivots = ma_pivots(node, mt); + if (!i) { + b_node->pivot[j] = pivots[i++]; + if (unlikely(i > mas_end)) + goto complete; + j++; + } + + piv_end = min(mas_end, mt_pivots[mt]); + for (; i < piv_end; i++, j++) { + b_node->pivot[j] = pivots[i]; + if (unlikely(!b_node->pivot[j])) + break; + + if (unlikely(mas->max == b_node->pivot[j])) + goto complete; + } + + if (likely(i <= mas_end)) + b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); + +complete: + b_node->b_end = ++j; + j -= mab_start; + slots = ma_slots(node, mt); + memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j); + if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) { + gaps = ma_gaps(node, mt); + memcpy(b_node->gap + mab_start, gaps + mas_start, + sizeof(unsigned long) * j); + } +} + +/* + * mas_leaf_set_meta() - Set the metadata of a leaf if possible. + * @mas: The maple state + * @node: The maple node + * @pivots: pointer to the maple node pivots + * @mt: The maple type + * @end: The assumed end + * + * Note, end may be incremented within this function but not modified at the + * source. This is fine since the metadata is the last thing to be stored in a + * node during a write. + */ +static inline void mas_leaf_set_meta(struct ma_state *mas, + struct maple_node *node, unsigned long *pivots, + enum maple_type mt, unsigned char end) +{ + /* There is no room for metadata already */ + if (mt_pivots[mt] <= end) + return; + + if (pivots[end] && pivots[end] < mas->max) + end++; + + if (end < mt_slots[mt] - 1) + ma_set_meta(node, mt, 0, end); +} + +/* + * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node. + * @b_node: the maple_big_node that has the data + * @mab_start: the start location in @b_node. + * @mab_end: The end location in @b_node (inclusively) + * @mas: The maple state with the maple encoded node. + */ +static inline void mab_mas_cp(struct maple_big_node *b_node, + unsigned char mab_start, unsigned char mab_end, + struct ma_state *mas, bool new_max) +{ + int i, j = 0; + enum maple_type mt = mte_node_type(mas->node); + struct maple_node *node = mte_to_node(mas->node); + void __rcu **slots = ma_slots(node, mt); + unsigned long *pivots = ma_pivots(node, mt); + unsigned long *gaps = NULL; + unsigned char end; + + if (mab_end - mab_start > mt_pivots[mt]) + mab_end--; + + if (!pivots[mt_pivots[mt] - 1]) + slots[mt_pivots[mt]] = NULL; + + i = mab_start; + do { + pivots[j++] = b_node->pivot[i++]; + } while (i <= mab_end && likely(b_node->pivot[i])); + + memcpy(slots, b_node->slot + mab_start, + sizeof(void *) * (i - mab_start)); + + if (new_max) + mas->max = b_node->pivot[i - 1]; + + end = j - 1; + if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) { + unsigned long max_gap = 0; + unsigned char offset = 15; + + gaps = ma_gaps(node, mt); + do { + gaps[--j] = b_node->gap[--i]; + if (gaps[j] > max_gap) { + offset = j; + max_gap = gaps[j]; + } + } while (j); + + ma_set_meta(node, mt, offset, end); + } else { + mas_leaf_set_meta(mas, node, pivots, mt, end); + } +} + +/* + * mas_descend_adopt() - Descend through a sub-tree and adopt children. + * @mas: the maple state with the maple encoded node of the sub-tree. + * + * Descend through a sub-tree and adopt children who do not have the correct + * parents set. Follow the parents which have the correct parents as they are + * the new entries which need to be followed to find other incorrectly set + * parents. + */ +static inline void mas_descend_adopt(struct ma_state *mas) +{ + struct ma_state list[3], next[3]; + int i, n; + + /* + * At each level there may be up to 3 correct parent pointers which indicates + * the new nodes which need to be walked to find any new nodes at a lower level. + */ + + for (i = 0; i < 3; i++) { + list[i] = *mas; + list[i].offset = 0; + next[i].offset = 0; + } + next[0] = *mas; + + while (!mte_is_leaf(list[0].node)) { + n = 0; + for (i = 0; i < 3; i++) { + if (mas_is_none(&list[i])) + continue; + + if (i && list[i-1].node == list[i].node) + continue; + + while ((n < 3) && (mas_new_child(&list[i], &next[n]))) + n++; + + mas_adopt_children(&list[i], list[i].node); + } + + while (n < 3) + next[n++].node = MAS_NONE; + + /* descend by setting the list to the children */ + for (i = 0; i < 3; i++) + list[i] = next[i]; + } +} + +/* + * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert. + * @mas: The maple state + * @end: The maple node end + * @mt: The maple node type + */ +static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, + enum maple_type mt) +{ + if (!(mas->mas_flags & MA_STATE_BULK)) + return; + + if (mte_is_root(mas->node)) + return; + + if (end > mt_min_slots[mt]) { + mas->mas_flags &= ~MA_STATE_REBALANCE; + return; + } +} + +/* + * mas_store_b_node() - Store an @entry into the b_node while also copying the + * data from a maple encoded node. + * @wr_mas: the maple write state + * @b_node: the maple_big_node to fill with data + * @offset_end: the offset to end copying + * + * Return: The actual end of the data stored in @b_node + */ +static inline void mas_store_b_node(struct ma_wr_state *wr_mas, + struct maple_big_node *b_node, unsigned char offset_end) +{ + unsigned char slot; + unsigned char b_end; + /* Possible underflow of piv will wrap back to 0 before use. */ + unsigned long piv; + struct ma_state *mas = wr_mas->mas; + + b_node->type = wr_mas->type; + b_end = 0; + slot = mas->offset; + if (slot) { + /* Copy start data up to insert. */ + mas_mab_cp(mas, 0, slot - 1, b_node, 0); + b_end = b_node->b_end; + piv = b_node->pivot[b_end - 1]; + } else + piv = mas->min - 1; + + if (piv + 1 < mas->index) { + /* Handle range starting after old range */ + b_node->slot[b_end] = wr_mas->content; + if (!wr_mas->content) + b_node->gap[b_end] = mas->index - 1 - piv; + b_node->pivot[b_end++] = mas->index - 1; + } + + /* Store the new entry. */ + mas->offset = b_end; + b_node->slot[b_end] = wr_mas->entry; + b_node->pivot[b_end] = mas->last; + + /* Appended. */ + if (mas->last >= mas->max) + goto b_end; + + /* Handle new range ending before old range ends */ + piv = mas_logical_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type); + if (piv > mas->last) { + if (piv == ULONG_MAX) + mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type); + + if (offset_end != slot) + wr_mas->content = mas_slot_locked(mas, wr_mas->slots, + offset_end); + + b_node->slot[++b_end] = wr_mas->content; + if (!wr_mas->content) + b_node->gap[b_end] = piv - mas->last + 1; + b_node->pivot[b_end] = piv; + } + + slot = offset_end + 1; + if (slot > wr_mas->node_end) + goto b_end; + + /* Copy end data to the end of the node. */ + mas_mab_cp(mas, slot, wr_mas->node_end + 1, b_node, ++b_end); + b_node->b_end--; + return; + +b_end: + b_node->b_end = b_end; +} + +/* + * mas_prev_sibling() - Find the previous node with the same parent. + * @mas: the maple state + * + * Return: True if there is a previous sibling, false otherwise. + */ +static inline bool mas_prev_sibling(struct ma_state *mas) +{ + unsigned int p_slot = mte_parent_slot(mas->node); + + if (mte_is_root(mas->node)) + return false; + + if (!p_slot) + return false; + + mas_ascend(mas); + mas->offset = p_slot - 1; + mas_descend(mas); + return true; +} + +/* + * mas_next_sibling() - Find the next node with the same parent. + * @mas: the maple state + * + * Return: true if there is a next sibling, false otherwise. + */ +static inline bool mas_next_sibling(struct ma_state *mas) +{ + MA_STATE(parent, mas->tree, mas->index, mas->last); + + if (mte_is_root(mas->node)) + return false; + + parent = *mas; + mas_ascend(&parent); + parent.offset = mte_parent_slot(mas->node) + 1; + if (parent.offset > mas_data_end(&parent)) + return false; + + *mas = parent; + mas_descend(mas); + return true; +} + +/* + * mte_node_or_node() - Return the encoded node or MAS_NONE. + * @enode: The encoded maple node. + * + * Shorthand to avoid setting %NULLs in the tree or maple_subtree_state. + * + * Return: @enode or MAS_NONE + */ +static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode) +{ + if (enode) + return enode; + + return ma_enode_ptr(MAS_NONE); +} + +/* + * mas_wr_node_walk() - Find the correct offset for the index in the @mas. + * @wr_mas: The maple write state + * + * Uses mas_slot_locked() and does not need to worry about dead nodes. + */ +static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) +{ + struct ma_state *mas = wr_mas->mas; + unsigned char count; + unsigned char offset; + unsigned long index, min, max; + + if (unlikely(ma_is_dense(wr_mas->type))) { + wr_mas->r_max = wr_mas->r_min = mas->index; + mas->offset = mas->index = mas->min; + return; + } + + wr_mas->node = mas_mn(wr_mas->mas); + wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type); + count = wr_mas->node_end = ma_data_end(wr_mas->node, wr_mas->type, + wr_mas->pivots, mas->max); + offset = mas->offset; + min = mas_safe_min(mas, wr_mas->pivots, offset); + if (unlikely(offset == count)) + goto max; + + max = wr_mas->pivots[offset]; + index = mas->index; + if (unlikely(index <= max)) + goto done; + + if (unlikely(!max && offset)) + goto max; + + min = max + 1; + while (++offset < count) { + max = wr_mas->pivots[offset]; + if (index <= max) + goto done; + else if (unlikely(!max)) + break; + + min = max + 1; + } + +max: + max = mas->max; +done: + wr_mas->r_max = max; + wr_mas->r_min = min; + wr_mas->offset_end = mas->offset = offset; +} + +/* + * mas_topiary_range() - Add a range of slots to the topiary. + * @mas: The maple state + * @destroy: The topiary to add the slots (usually destroy) + * @start: The starting slot inclusively + * @end: The end slot inclusively + */ +static inline void mas_topiary_range(struct ma_state *mas, + struct ma_topiary *destroy, unsigned char start, unsigned char end) +{ + void __rcu **slots; + unsigned char offset; + + MT_BUG_ON(mas->tree, mte_is_leaf(mas->node)); + slots = ma_slots(mas_mn(mas), mte_node_type(mas->node)); + for (offset = start; offset <= end; offset++) { + struct maple_enode *enode = mas_slot_locked(mas, slots, offset); + + if (mte_dead_node(enode)) + continue; + + mat_add(destroy, enode); + } +} + +/* + * mast_topiary() - Add the portions of the tree to the removal list; either to + * be freed or discarded (destroy walk). + * @mast: The maple_subtree_state. + */ +static inline void mast_topiary(struct maple_subtree_state *mast) +{ + MA_WR_STATE(wr_mas, mast->orig_l, NULL); + unsigned char r_start, r_end; + unsigned char l_start, l_end; + void __rcu **l_slots, **r_slots; + + wr_mas.type = mte_node_type(mast->orig_l->node); + mast->orig_l->index = mast->orig_l->last; + mas_wr_node_walk(&wr_mas); + l_start = mast->orig_l->offset + 1; + l_end = mas_data_end(mast->orig_l); + r_start = 0; + r_end = mast->orig_r->offset; + + if (r_end) + r_end--; + + l_slots = ma_slots(mas_mn(mast->orig_l), + mte_node_type(mast->orig_l->node)); + + r_slots = ma_slots(mas_mn(mast->orig_r), + mte_node_type(mast->orig_r->node)); + + if ((l_start < l_end) && + mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_start))) { + l_start++; + } + + if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_end))) { + if (r_end) + r_end--; + } + + if ((l_start > r_end) && (mast->orig_l->node == mast->orig_r->node)) + return; + + /* At the node where left and right sides meet, add the parts between */ + if (mast->orig_l->node == mast->orig_r->node) { + return mas_topiary_range(mast->orig_l, mast->destroy, + l_start, r_end); + } + + /* mast->orig_r is different and consumed. */ + if (mte_is_leaf(mast->orig_r->node)) + return; + + if (mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_end))) + l_end--; + + + if (l_start <= l_end) + mas_topiary_range(mast->orig_l, mast->destroy, l_start, l_end); + + if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_start))) + r_start++; + + if (r_start <= r_end) + mas_topiary_range(mast->orig_r, mast->destroy, 0, r_end); +} + +/* + * mast_rebalance_next() - Rebalance against the next node + * @mast: The maple subtree state + * @old_r: The encoded maple node to the right (next node). + */ +static inline void mast_rebalance_next(struct maple_subtree_state *mast) +{ + unsigned char b_end = mast->bn->b_end; + + mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node), + mast->bn, b_end); + mast->orig_r->last = mast->orig_r->max; +} + +/* + * mast_rebalance_prev() - Rebalance against the previous node + * @mast: The maple subtree state + * @old_l: The encoded maple node to the left (previous node) + */ +static inline void mast_rebalance_prev(struct maple_subtree_state *mast) +{ + unsigned char end = mas_data_end(mast->orig_l) + 1; + unsigned char b_end = mast->bn->b_end; + + mab_shift_right(mast->bn, end); + mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0); + mast->l->min = mast->orig_l->min; + mast->orig_l->index = mast->orig_l->min; + mast->bn->b_end = end + b_end; + mast->l->offset += end; +} + +/* + * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring + * the node to the right. Checking the nodes to the right then the left at each + * level upwards until root is reached. Free and destroy as needed. + * Data is copied into the @mast->bn. + * @mast: The maple_subtree_state. + */ +static inline +bool mast_spanning_rebalance(struct maple_subtree_state *mast) +{ + struct ma_state r_tmp = *mast->orig_r; + struct ma_state l_tmp = *mast->orig_l; + struct maple_enode *ancestor = NULL; + unsigned char start, end; + unsigned char depth = 0; + + r_tmp = *mast->orig_r; + l_tmp = *mast->orig_l; + do { + mas_ascend(mast->orig_r); + mas_ascend(mast->orig_l); + depth++; + if (!ancestor && + (mast->orig_r->node == mast->orig_l->node)) { + ancestor = mast->orig_r->node; + end = mast->orig_r->offset - 1; + start = mast->orig_l->offset + 1; + } + + if (mast->orig_r->offset < mas_data_end(mast->orig_r)) { + if (!ancestor) { + ancestor = mast->orig_r->node; + start = 0; + } + + mast->orig_r->offset++; + do { + mas_descend(mast->orig_r); + mast->orig_r->offset = 0; + depth--; + } while (depth); + + mast_rebalance_next(mast); + do { + unsigned char l_off = 0; + struct maple_enode *child = r_tmp.node; + + mas_ascend(&r_tmp); + if (ancestor == r_tmp.node) + l_off = start; + + if (r_tmp.offset) + r_tmp.offset--; + + if (l_off < r_tmp.offset) + mas_topiary_range(&r_tmp, mast->destroy, + l_off, r_tmp.offset); + + if (l_tmp.node != child) + mat_add(mast->free, child); + + } while (r_tmp.node != ancestor); + + *mast->orig_l = l_tmp; + return true; + + } else if (mast->orig_l->offset != 0) { + if (!ancestor) { + ancestor = mast->orig_l->node; + end = mas_data_end(mast->orig_l); + } + + mast->orig_l->offset--; + do { + mas_descend(mast->orig_l); + mast->orig_l->offset = + mas_data_end(mast->orig_l); + depth--; + } while (depth); + + mast_rebalance_prev(mast); + do { + unsigned char r_off; + struct maple_enode *child = l_tmp.node; + + mas_ascend(&l_tmp); + if (ancestor == l_tmp.node) + r_off = end; + else + r_off = mas_data_end(&l_tmp); + + if (l_tmp.offset < r_off) + l_tmp.offset++; + + if (l_tmp.offset < r_off) + mas_topiary_range(&l_tmp, mast->destroy, + l_tmp.offset, r_off); + + if (r_tmp.node != child) + mat_add(mast->free, child); + + } while (l_tmp.node != ancestor); + + *mast->orig_r = r_tmp; + return true; + } + } while (!mte_is_root(mast->orig_r->node)); + + *mast->orig_r = r_tmp; + *mast->orig_l = l_tmp; + return false; +} + +/* + * mast_ascend_free() - Add current original maple state nodes to the free list + * and ascend. + * @mast: the maple subtree state. + * + * Ascend the original left and right sides and add the previous nodes to the + * free list. Set the slots to point to the correct location in the new nodes. + */ +static inline void +mast_ascend_free(struct maple_subtree_state *mast) +{ + MA_WR_STATE(wr_mas, mast->orig_r, NULL); + struct maple_enode *left = mast->orig_l->node; + struct maple_enode *right = mast->orig_r->node; + + mas_ascend(mast->orig_l); + mas_ascend(mast->orig_r); + mat_add(mast->free, left); + + if (left != right) + mat_add(mast->free, right); + + mast->orig_r->offset = 0; + mast->orig_r->index = mast->r->max; + /* last should be larger than or equal to index */ + if (mast->orig_r->last < mast->orig_r->index) + mast->orig_r->last = mast->orig_r->index; + /* + * The node may not contain the value so set slot to ensure all + * of the nodes contents are freed or destroyed. + */ + wr_mas.type = mte_node_type(mast->orig_r->node); + mas_wr_node_walk(&wr_mas); + /* Set up the left side of things */ + mast->orig_l->offset = 0; + mast->orig_l->index = mast->l->min; + wr_mas.mas = mast->orig_l; + wr_mas.type = mte_node_type(mast->orig_l->node); + mas_wr_node_walk(&wr_mas); + + mast->bn->type = wr_mas.type; +} + +/* + * mas_new_ma_node() - Create and return a new maple node. Helper function. + * @mas: the maple state with the allocations. + * @b_node: the maple_big_node with the type encoding. + * + * Use the node type from the maple_big_node to allocate a new node from the + * ma_state. This function exists mainly for code readability. + * + * Return: A new maple encoded node + */ +static inline struct maple_enode +*mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node) +{ + return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type); +} + +/* + * mas_mab_to_node() - Set up right and middle nodes + * + * @mas: the maple state that contains the allocations. + * @b_node: the node which contains the data. + * @left: The pointer which will have the left node + * @right: The pointer which may have the right node + * @middle: the pointer which may have the middle node (rare) + * @mid_split: the split location for the middle node + * + * Return: the split of left. + */ +static inline unsigned char mas_mab_to_node(struct ma_state *mas, + struct maple_big_node *b_node, struct maple_enode **left, + struct maple_enode **right, struct maple_enode **middle, + unsigned char *mid_split, unsigned long min) +{ + unsigned char split = 0; + unsigned char slot_count = mt_slots[b_node->type]; + + *left = mas_new_ma_node(mas, b_node); + *right = NULL; + *middle = NULL; + *mid_split = 0; + + if (b_node->b_end < slot_count) { + split = b_node->b_end; + } else { + split = mab_calc_split(mas, b_node, mid_split, min); + *right = mas_new_ma_node(mas, b_node); + } + + if (*mid_split) + *middle = mas_new_ma_node(mas, b_node); + + return split; + +} + +/* + * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end + * pointer. + * @b_node - the big node to add the entry + * @mas - the maple state to get the pivot (mas->max) + * @entry - the entry to add, if NULL nothing happens. + */ +static inline void mab_set_b_end(struct maple_big_node *b_node, + struct ma_state *mas, + void *entry) +{ + if (!entry) + return; + + b_node->slot[b_node->b_end] = entry; + if (mt_is_alloc(mas->tree)) + b_node->gap[b_node->b_end] = mas_max_gap(mas); + b_node->pivot[b_node->b_end++] = mas->max; +} + +/* + * mas_set_split_parent() - combine_then_separate helper function. Sets the parent + * of @mas->node to either @left or @right, depending on @slot and @split + * + * @mas - the maple state with the node that needs a parent + * @left - possible parent 1 + * @right - possible parent 2 + * @slot - the slot the mas->node was placed + * @split - the split location between @left and @right + */ +static inline void mas_set_split_parent(struct ma_state *mas, + struct maple_enode *left, + struct maple_enode *right, + unsigned char *slot, unsigned char split) +{ + if (mas_is_none(mas)) + return; + + if ((*slot) <= split) + mte_set_parent(mas->node, left, *slot); + else if (right) + mte_set_parent(mas->node, right, (*slot) - split - 1); + + (*slot)++; +} + +/* + * mte_mid_split_check() - Check if the next node passes the mid-split + * @**l: Pointer to left encoded maple node. + * @**m: Pointer to middle encoded maple node. + * @**r: Pointer to right encoded maple node. + * @slot: The offset + * @*split: The split location. + * @mid_split: The middle split. + */ +static inline void mte_mid_split_check(struct maple_enode **l, + struct maple_enode **r, + struct maple_enode *right, + unsigned char slot, + unsigned char *split, + unsigned char mid_split) +{ + if (*r == right) + return; + + if (slot < mid_split) + return; + + *l = *r; + *r = right; + *split = mid_split; +} + +/* + * mast_set_split_parents() - Helper function to set three nodes parents. Slot + * is taken from @mast->l. + * @mast - the maple subtree state + * @left - the left node + * @right - the right node + * @split - the split location. + */ +static inline void mast_set_split_parents(struct maple_subtree_state *mast, + struct maple_enode *left, + struct maple_enode *middle, + struct maple_enode *right, + unsigned char split, + unsigned char mid_split) +{ + unsigned char slot; + struct maple_enode *l = left; + struct maple_enode *r = right; + + if (mas_is_none(mast->l)) + return; + + if (middle) + r = middle; + + slot = mast->l->offset; + + mte_mid_split_check(&l, &r, right, slot, &split, mid_split); + mas_set_split_parent(mast->l, l, r, &slot, split); + + mte_mid_split_check(&l, &r, right, slot, &split, mid_split); + mas_set_split_parent(mast->m, l, r, &slot, split); + + mte_mid_split_check(&l, &r, right, slot, &split, mid_split); + mas_set_split_parent(mast->r, l, r, &slot, split); +} + +/* + * mas_wmb_replace() - Write memory barrier and replace + * @mas: The maple state + * @free: the maple topiary list of nodes to free + * @destroy: The maple topiary list of nodes to destroy (walk and free) + * + * Updates gap as necessary. + */ +static inline void mas_wmb_replace(struct ma_state *mas, + struct ma_topiary *free, + struct ma_topiary *destroy) +{ + /* All nodes must see old data as dead prior to replacing that data */ + smp_wmb(); /* Needed for RCU */ + + /* Insert the new data in the tree */ + mas_replace(mas, true); + + if (!mte_is_leaf(mas->node)) + mas_descend_adopt(mas); + + mas_mat_free(mas, free); + + if (destroy) + mas_mat_destroy(mas, destroy); + + if (mte_is_leaf(mas->node)) + return; + + mas_update_gap(mas); +} + +/* + * mast_new_root() - Set a new tree root during subtree creation + * @mast: The maple subtree state + * @mas: The maple state + */ +static inline void mast_new_root(struct maple_subtree_state *mast, + struct ma_state *mas) +{ + mas_mn(mast->l)->parent = + ma_parent_ptr(((unsigned long)mas->tree | MA_ROOT_PARENT)); + if (!mte_dead_node(mast->orig_l->node) && + !mte_is_root(mast->orig_l->node)) { + do { + mast_ascend_free(mast); + mast_topiary(mast); + } while (!mte_is_root(mast->orig_l->node)); + } + if ((mast->orig_l->node != mas->node) && + (mast->l->depth > mas_mt_height(mas))) { + mat_add(mast->free, mas->node); + } +} + +/* + * mast_cp_to_nodes() - Copy data out to nodes. + * @mast: The maple subtree state + * @left: The left encoded maple node + * @middle: The middle encoded maple node + * @right: The right encoded maple node + * @split: The location to split between left and (middle ? middle : right) + * @mid_split: The location to split between middle and right. + */ +static inline void mast_cp_to_nodes(struct maple_subtree_state *mast, + struct maple_enode *left, struct maple_enode *middle, + struct maple_enode *right, unsigned char split, unsigned char mid_split) +{ + bool new_lmax = true; + + mast->l->node = mte_node_or_none(left); + mast->m->node = mte_node_or_none(middle); + mast->r->node = mte_node_or_none(right); + + mast->l->min = mast->orig_l->min; + if (split == mast->bn->b_end) { + mast->l->max = mast->orig_r->max; + new_lmax = false; + } + + mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax); + + if (middle) { + mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true); + mast->m->min = mast->bn->pivot[split] + 1; + split = mid_split; + } + + mast->r->max = mast->orig_r->max; + if (right) { + mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false); + mast->r->min = mast->bn->pivot[split] + 1; + } +} + +/* + * mast_combine_cp_left - Copy in the original left side of the tree into the + * combined data set in the maple subtree state big node. + * @mast: The maple subtree state + */ +static inline void mast_combine_cp_left(struct maple_subtree_state *mast) +{ + unsigned char l_slot = mast->orig_l->offset; + + if (!l_slot) + return; + + mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0); +} + +/* + * mast_combine_cp_right: Copy in the original right side of the tree into the + * combined data set in the maple subtree state big node. + * @mast: The maple subtree state + */ +static inline void mast_combine_cp_right(struct maple_subtree_state *mast) +{ + if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max) + return; + + mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1, + mt_slot_count(mast->orig_r->node), mast->bn, + mast->bn->b_end); + mast->orig_r->last = mast->orig_r->max; +} + +/* + * mast_sufficient: Check if the maple subtree state has enough data in the big + * node to create at least one sufficient node + * @mast: the maple subtree state + */ +static inline bool mast_sufficient(struct maple_subtree_state *mast) +{ + if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node)) + return true; + + return false; +} + +/* + * mast_overflow: Check if there is too much data in the subtree state for a + * single node. + * @mast: The maple subtree state + */ +static inline bool mast_overflow(struct maple_subtree_state *mast) +{ + if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node)) + return true; + + return false; +} + +static inline void *mtree_range_walk(struct ma_state *mas) +{ + unsigned long *pivots; + unsigned char offset; + struct maple_node *node; + struct maple_enode *next, *last; + enum maple_type type; + void __rcu **slots; + unsigned char end; + unsigned long max, min; + unsigned long prev_max, prev_min; + + next = mas->node; + min = mas->min; + max = mas->max; + do { + offset = 0; + last = next; + node = mte_to_node(next); + type = mte_node_type(next); + pivots = ma_pivots(node, type); + end = ma_data_end(node, type, pivots, max); + if (unlikely(ma_dead_node(node))) + goto dead_node; + + if (pivots[offset] >= mas->index) { + prev_max = max; + prev_min = min; + max = pivots[offset]; + goto next; + } + + do { + offset++; + } while ((offset < end) && (pivots[offset] < mas->index)); + + prev_min = min; + min = pivots[offset - 1] + 1; + prev_max = max; + if (likely(offset < end && pivots[offset])) + max = pivots[offset]; + +next: + slots = ma_slots(node, type); + next = mt_slot(mas->tree, slots, offset); + if (unlikely(ma_dead_node(node))) + goto dead_node; + } while (!ma_is_leaf(type)); + + mas->offset = offset; + mas->index = min; + mas->last = max; + mas->min = prev_min; + mas->max = prev_max; + mas->node = last; + return (void *) next; + +dead_node: + mas_reset(mas); + return NULL; +} + +/* + * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers. + * @mas: The starting maple state + * @mast: The maple_subtree_state, keeps track of 4 maple states. + * @count: The estimated count of iterations needed. + * + * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root + * is hit. First @b_node is split into two entries which are inserted into the + * next iteration of the loop. @b_node is returned populated with the final + * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the + * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last + * to account of what has been copied into the new sub-tree. The update of + * orig_l_mas->last is used in mas_consume to find the slots that will need to + * be either freed or destroyed. orig_l_mas->depth keeps track of the height of + * the new sub-tree in case the sub-tree becomes the full tree. + * + * Return: the number of elements in b_node during the last loop. + */ +static int mas_spanning_rebalance(struct ma_state *mas, + struct maple_subtree_state *mast, unsigned char count) +{ + unsigned char split, mid_split; + unsigned char slot = 0; + struct maple_enode *left = NULL, *middle = NULL, *right = NULL; + + MA_STATE(l_mas, mas->tree, mas->index, mas->index); + MA_STATE(r_mas, mas->tree, mas->index, mas->last); + MA_STATE(m_mas, mas->tree, mas->index, mas->index); + MA_TOPIARY(free, mas->tree); + MA_TOPIARY(destroy, mas->tree); + + /* + * The tree needs to be rebalanced and leaves need to be kept at the same level. + * Rebalancing is done by use of the ``struct maple_topiary``. + */ + mast->l = &l_mas; + mast->m = &m_mas; + mast->r = &r_mas; + mast->free = &free; + mast->destroy = &destroy; + l_mas.node = r_mas.node = m_mas.node = MAS_NONE; + if (!(mast->orig_l->min && mast->orig_r->max == ULONG_MAX) && + unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type])) + mast_spanning_rebalance(mast); + + mast->orig_l->depth = 0; + + /* + * Each level of the tree is examined and balanced, pushing data to the left or + * right, or rebalancing against left or right nodes is employed to avoid + * rippling up the tree to limit the amount of churn. Once a new sub-section of + * the tree is created, there may be a mix of new and old nodes. The old nodes + * will have the incorrect parent pointers and currently be in two trees: the + * original tree and the partially new tree. To remedy the parent pointers in + * the old tree, the new data is swapped into the active tree and a walk down + * the tree is performed and the parent pointers are updated. + * See mas_descend_adopt() for more information.. + */ + while (count--) { + mast->bn->b_end--; + mast->bn->type = mte_node_type(mast->orig_l->node); + split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle, + &mid_split, mast->orig_l->min); + mast_set_split_parents(mast, left, middle, right, split, + mid_split); + mast_cp_to_nodes(mast, left, middle, right, split, mid_split); + + /* + * Copy data from next level in the tree to mast->bn from next + * iteration + */ + memset(mast->bn, 0, sizeof(struct maple_big_node)); + mast->bn->type = mte_node_type(left); + mast->orig_l->depth++; + + /* Root already stored in l->node. */ + if (mas_is_root_limits(mast->l)) + goto new_root; + + mast_ascend_free(mast); + mast_combine_cp_left(mast); + l_mas.offset = mast->bn->b_end; + mab_set_b_end(mast->bn, &l_mas, left); + mab_set_b_end(mast->bn, &m_mas, middle); + mab_set_b_end(mast->bn, &r_mas, right); + + /* Copy anything necessary out of the right node. */ + mast_combine_cp_right(mast); + mast_topiary(mast); + mast->orig_l->last = mast->orig_l->max; + + if (mast_sufficient(mast)) + continue; + + if (mast_overflow(mast)) + continue; + + /* May be a new root stored in mast->bn */ + if (mas_is_root_limits(mast->orig_l)) + break; + + mast_spanning_rebalance(mast); + + /* rebalancing from other nodes may require another loop. */ + if (!count) + count++; + } + + l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), + mte_node_type(mast->orig_l->node)); + mast->orig_l->depth++; + mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true); + mte_set_parent(left, l_mas.node, slot); + if (middle) + mte_set_parent(middle, l_mas.node, ++slot); + + if (right) + mte_set_parent(right, l_mas.node, ++slot); + + if (mas_is_root_limits(mast->l)) { +new_root: + mast_new_root(mast, mas); + } else { + mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent; + } + + if (!mte_dead_node(mast->orig_l->node)) + mat_add(&free, mast->orig_l->node); + + mas->depth = mast->orig_l->depth; + *mast->orig_l = l_mas; + mte_set_node_dead(mas->node); + + /* Set up mas for insertion. */ + mast->orig_l->depth = mas->depth; + mast->orig_l->alloc = mas->alloc; + *mas = *mast->orig_l; + mas_wmb_replace(mas, &free, &destroy); + mtree_range_walk(mas); + return mast->bn->b_end; +} + +/* + * mas_rebalance() - Rebalance a given node. + * @mas: The maple state + * @b_node: The big maple node. + * + * Rebalance two nodes into a single node or two new nodes that are sufficient. + * Continue upwards until tree is sufficient. + * + * Return: the number of elements in b_node during the last loop. + */ +static inline int mas_rebalance(struct ma_state *mas, + struct maple_big_node *b_node) +{ + char empty_count = mas_mt_height(mas); + struct maple_subtree_state mast; + unsigned char shift, b_end = ++b_node->b_end; + + MA_STATE(l_mas, mas->tree, mas->index, mas->last); + MA_STATE(r_mas, mas->tree, mas->index, mas->last); + + trace_ma_op(__func__, mas); + + /* + * Rebalancing occurs if a node is insufficient. Data is rebalanced + * against the node to the right if it exists, otherwise the node to the + * left of this node is rebalanced against this node. If rebalancing + * causes just one node to be produced instead of two, then the parent + * is also examined and rebalanced if it is insufficient. Every level + * tries to combine the data in the same way. If one node contains the + * entire range of the tree, then that node is used as a new root node. + */ + mas_node_count(mas, 1 + empty_count * 3); + if (mas_is_err(mas)) + return 0; + + mast.orig_l = &l_mas; + mast.orig_r = &r_mas; + mast.bn = b_node; + mast.bn->type = mte_node_type(mas->node); + + l_mas = r_mas = *mas; + + if (mas_next_sibling(&r_mas)) { + mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end); + r_mas.last = r_mas.index = r_mas.max; + } else { + mas_prev_sibling(&l_mas); + shift = mas_data_end(&l_mas) + 1; + mab_shift_right(b_node, shift); + mas->offset += shift; + mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0); + b_node->b_end = shift + b_end; + l_mas.index = l_mas.last = l_mas.min; + } + + return mas_spanning_rebalance(mas, &mast, empty_count); +} + +/* + * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple + * state. + * @mas: The maple state + * @end: The end of the left-most node. + * + * During a mass-insert event (such as forking), it may be necessary to + * rebalance the left-most node when it is not sufficient. + */ +static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end) +{ + enum maple_type mt = mte_node_type(mas->node); + struct maple_node reuse, *newnode, *parent, *new_left, *left, *node; + struct maple_enode *eparent; + unsigned char offset, tmp, split = mt_slots[mt] / 2; + void __rcu **l_slots, **slots; + unsigned long *l_pivs, *pivs, gap; + bool in_rcu = mt_in_rcu(mas->tree); + + MA_STATE(l_mas, mas->tree, mas->index, mas->last); + + l_mas = *mas; + mas_prev_sibling(&l_mas); + + /* set up node. */ + if (in_rcu) { + /* Allocate for both left and right as well as parent. */ + mas_node_count(mas, 3); + if (mas_is_err(mas)) + return; + + newnode = mas_pop_node(mas); + } else { + newnode = &reuse; + } + + node = mas_mn(mas); + newnode->parent = node->parent; + slots = ma_slots(newnode, mt); + pivs = ma_pivots(newnode, mt); + left = mas_mn(&l_mas); + l_slots = ma_slots(left, mt); + l_pivs = ma_pivots(left, mt); + if (!l_slots[split]) + split++; + tmp = mas_data_end(&l_mas) - split; + + memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp); + memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp); + pivs[tmp] = l_mas.max; + memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end); + memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end); + + l_mas.max = l_pivs[split]; + mas->min = l_mas.max + 1; + eparent = mt_mk_node(mte_parent(l_mas.node), + mas_parent_enum(&l_mas, l_mas.node)); + tmp += end; + if (!in_rcu) { + unsigned char max_p = mt_pivots[mt]; + unsigned char max_s = mt_slots[mt]; + + if (tmp < max_p) + memset(pivs + tmp, 0, + sizeof(unsigned long *) * (max_p - tmp)); + + if (tmp < mt_slots[mt]) + memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp)); + + memcpy(node, newnode, sizeof(struct maple_node)); + ma_set_meta(node, mt, 0, tmp - 1); + mte_set_pivot(eparent, mte_parent_slot(l_mas.node), + l_pivs[split]); + + /* Remove data from l_pivs. */ + tmp = split + 1; + memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp)); + memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp)); + ma_set_meta(left, mt, 0, split); + + goto done; + } + + /* RCU requires replacing both l_mas, mas, and parent. */ + mas->node = mt_mk_node(newnode, mt); + ma_set_meta(newnode, mt, 0, tmp); + + new_left = mas_pop_node(mas); + new_left->parent = left->parent; + mt = mte_node_type(l_mas.node); + slots = ma_slots(new_left, mt); + pivs = ma_pivots(new_left, mt); + memcpy(slots, l_slots, sizeof(void *) * split); + memcpy(pivs, l_pivs, sizeof(unsigned long) * split); + ma_set_meta(new_left, mt, 0, split); + l_mas.node = mt_mk_node(new_left, mt); + + /* replace parent. */ + offset = mte_parent_slot(mas->node); + mt = mas_parent_enum(&l_mas, l_mas.node); + parent = mas_pop_node(mas); + slots = ma_slots(parent, mt); + pivs = ma_pivots(parent, mt); + memcpy(parent, mte_to_node(eparent), sizeof(struct maple_node)); + rcu_assign_pointer(slots[offset], mas->node); + rcu_assign_pointer(slots[offset - 1], l_mas.node); + pivs[offset - 1] = l_mas.max; + eparent = mt_mk_node(parent, mt); +done: + gap = mas_leaf_max_gap(mas); + mte_set_gap(eparent, mte_parent_slot(mas->node), gap); + gap = mas_leaf_max_gap(&l_mas); + mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap); + mas_ascend(mas); + + if (in_rcu) + mas_replace(mas, false); + + mas_update_gap(mas); +} + +/* + * mas_split_final_node() - Split the final node in a subtree operation. + * @mast: the maple subtree state + * @mas: The maple state + * @height: The height of the tree in case it's a new root. + */ +static inline bool mas_split_final_node(struct maple_subtree_state *mast, + struct ma_state *mas, int height) +{ + struct maple_enode *ancestor; + + if (mte_is_root(mas->node)) { + if (mt_is_alloc(mas->tree)) + mast->bn->type = maple_arange_64; + else + mast->bn->type = maple_range_64; + mas->depth = height; + } + /* + * Only a single node is used here, could be root. + * The Big_node data should just fit in a single node. + */ + ancestor = mas_new_ma_node(mas, mast->bn); + mte_set_parent(mast->l->node, ancestor, mast->l->offset); + mte_set_parent(mast->r->node, ancestor, mast->r->offset); + mte_to_node(ancestor)->parent = mas_mn(mas)->parent; + + mast->l->node = ancestor; + mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true); + mas->offset = mast->bn->b_end - 1; + return true; +} + +/* + * mast_fill_bnode() - Copy data into the big node in the subtree state + * @mast: The maple subtree state + * @mas: the maple state + * @skip: The number of entries to skip for new nodes insertion. + */ +static inline void mast_fill_bnode(struct maple_subtree_state *mast, + struct ma_state *mas, + unsigned char skip) +{ + bool cp = true; + struct maple_enode *old = mas->node; + unsigned char split; + + memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap)); + memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot)); + memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot)); + mast->bn->b_end = 0; + + if (mte_is_root(mas->node)) { + cp = false; + } else { + mas_ascend(mas); + mat_add(mast->free, old); + mas->offset = mte_parent_slot(mas->node); + } + + if (cp && mast->l->offset) + mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0); + + split = mast->bn->b_end; + mab_set_b_end(mast->bn, mast->l, mast->l->node); + mast->r->offset = mast->bn->b_end; + mab_set_b_end(mast->bn, mast->r, mast->r->node); + if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max) + cp = false; + + if (cp) + mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1, + mast->bn, mast->bn->b_end); + + mast->bn->b_end--; + mast->bn->type = mte_node_type(mas->node); +} + +/* + * mast_split_data() - Split the data in the subtree state big node into regular + * nodes. + * @mast: The maple subtree state + * @mas: The maple state + * @split: The location to split the big node + */ +static inline void mast_split_data(struct maple_subtree_state *mast, + struct ma_state *mas, unsigned char split) +{ + unsigned char p_slot; + + mab_mas_cp(mast->bn, 0, split, mast->l, true); + mte_set_pivot(mast->r->node, 0, mast->r->max); + mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false); + mast->l->offset = mte_parent_slot(mas->node); + mast->l->max = mast->bn->pivot[split]; + mast->r->min = mast->l->max + 1; + if (mte_is_leaf(mas->node)) + return; + + p_slot = mast->orig_l->offset; + mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node, + &p_slot, split); + mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node, + &p_slot, split); +} + +/* + * mas_push_data() - Instead of splitting a node, it is beneficial to push the + * data to the right or left node if there is room. + * @mas: The maple state + * @height: The current height of the maple state + * @mast: The maple subtree state + * @left: Push left or not. + * + * Keeping the height of the tree low means faster lookups. + * + * Return: True if pushed, false otherwise. + */ +static inline bool mas_push_data(struct ma_state *mas, int height, + struct maple_subtree_state *mast, bool left) +{ + unsigned char slot_total = mast->bn->b_end; + unsigned char end, space, split; + + MA_STATE(tmp_mas, mas->tree, mas->index, mas->last); + tmp_mas = *mas; + tmp_mas.depth = mast->l->depth; + + if (left && !mas_prev_sibling(&tmp_mas)) + return false; + else if (!left && !mas_next_sibling(&tmp_mas)) + return false; + + end = mas_data_end(&tmp_mas); + slot_total += end; + space = 2 * mt_slot_count(mas->node) - 2; + /* -2 instead of -1 to ensure there isn't a triple split */ + if (ma_is_leaf(mast->bn->type)) + space--; + + if (mas->max == ULONG_MAX) + space--; + + if (slot_total >= space) + return false; + + /* Get the data; Fill mast->bn */ + mast->bn->b_end++; + if (left) { + mab_shift_right(mast->bn, end + 1); + mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0); + mast->bn->b_end = slot_total + 1; + } else { + mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end); + } + + /* Configure mast for splitting of mast->bn */ + split = mt_slots[mast->bn->type] - 2; + if (left) { + /* Switch mas to prev node */ + mat_add(mast->free, mas->node); + *mas = tmp_mas; + /* Start using mast->l for the left side. */ + tmp_mas.node = mast->l->node; + *mast->l = tmp_mas; + } else { + mat_add(mast->free, tmp_mas.node); + tmp_mas.node = mast->r->node; + *mast->r = tmp_mas; + split = slot_total - split; + } + split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]); + /* Update parent slot for split calculation. */ + if (left) + mast->orig_l->offset += end + 1; + + mast_split_data(mast, mas, split); + mast_fill_bnode(mast, mas, 2); + mas_split_final_node(mast, mas, height + 1); + return true; +} + +/* + * mas_split() - Split data that is too big for one node into two. + * @mas: The maple state + * @b_node: The maple big node + * Return: 1 on success, 0 on failure. + */ +static int mas_split(struct ma_state *mas, struct maple_big_node *b_node) +{ + + struct maple_subtree_state mast; + int height = 0; + unsigned char mid_split, split = 0; + + /* + * Splitting is handled differently from any other B-tree; the Maple + * Tree splits upwards. Splitting up means that the split operation + * occurs when the walk of the tree hits the leaves and not on the way + * down. The reason for splitting up is that it is impossible to know + * how much space will be needed until the leaf is (or leaves are) + * reached. Since overwriting data is allowed and a range could + * overwrite more than one range or result in changing one entry into 3 + * entries, it is impossible to know if a split is required until the + * data is examined. + * + * Splitting is a balancing act between keeping allocations to a minimum + * and avoiding a 'jitter' event where a tree is expanded to make room + * for an entry followed by a contraction when the entry is removed. To + * accomplish the balance, there are empty slots remaining in both left + * and right nodes after a split. + */ + MA_STATE(l_mas, mas->tree, mas->index, mas->last); + MA_STATE(r_mas, mas->tree, mas->index, mas->last); + MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); + MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); + MA_TOPIARY(mat, mas->tree); + + trace_ma_op(__func__, mas); + mas->depth = mas_mt_height(mas); + /* Allocation failures will happen early. */ + mas_node_count(mas, 1 + mas->depth * 2); + if (mas_is_err(mas)) + return 0; + + mast.l = &l_mas; + mast.r = &r_mas; + mast.orig_l = &prev_l_mas; + mast.orig_r = &prev_r_mas; + mast.free = &mat; + mast.bn = b_node; + + while (height++ <= mas->depth) { + if (mt_slots[b_node->type] > b_node->b_end) { + mas_split_final_node(&mast, mas, height); + break; + } + + l_mas = r_mas = *mas; + l_mas.node = mas_new_ma_node(mas, b_node); + r_mas.node = mas_new_ma_node(mas, b_node); + /* + * Another way that 'jitter' is avoided is to terminate a split up early if the + * left or right node has space to spare. This is referred to as "pushing left" + * or "pushing right" and is similar to the B* tree, except the nodes left or + * right can rarely be reused due to RCU, but the ripple upwards is halted which + * is a significant savings. + */ + /* Try to push left. */ + if (mas_push_data(mas, height, &mast, true)) + break; + + /* Try to push right. */ + if (mas_push_data(mas, height, &mast, false)) + break; + + split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min); + mast_split_data(&mast, mas, split); + /* + * Usually correct, mab_mas_cp in the above call overwrites + * r->max. + */ + mast.r->max = mas->max; + mast_fill_bnode(&mast, mas, 1); + prev_l_mas = *mast.l; + prev_r_mas = *mast.r; + } + + /* Set the original node as dead */ + mat_add(mast.free, mas->node); + mas->node = l_mas.node; + mas_wmb_replace(mas, mast.free, NULL); + mtree_range_walk(mas); + return 1; +} + +/* + * mas_reuse_node() - Reuse the node to store the data. + * @wr_mas: The maple write state + * @bn: The maple big node + * @end: The end of the data. + * + * Will always return false in RCU mode. + * + * Return: True if node was reused, false otherwise. + */ +static inline bool mas_reuse_node(struct ma_wr_state *wr_mas, + struct maple_big_node *bn, unsigned char end) +{ + /* Need to be rcu safe. */ + if (mt_in_rcu(wr_mas->mas->tree)) + return false; + + if (end > bn->b_end) { + int clear = mt_slots[wr_mas->type] - bn->b_end; + + memset(wr_mas->slots + bn->b_end, 0, sizeof(void *) * clear--); + memset(wr_mas->pivots + bn->b_end, 0, sizeof(void *) * clear); + } + mab_mas_cp(bn, 0, bn->b_end, wr_mas->mas, false); + return true; +} + +/* + * mas_commit_b_node() - Commit the big node into the tree. + * @wr_mas: The maple write state + * @b_node: The maple big node + * @end: The end of the data. + */ +static inline int mas_commit_b_node(struct ma_wr_state *wr_mas, + struct maple_big_node *b_node, unsigned char end) +{ + struct maple_node *node; + unsigned char b_end = b_node->b_end; + enum maple_type b_type = b_node->type; + + if ((b_end < mt_min_slots[b_type]) && + (!mte_is_root(wr_mas->mas->node)) && + (mas_mt_height(wr_mas->mas) > 1)) + return mas_rebalance(wr_mas->mas, b_node); + + if (b_end >= mt_slots[b_type]) + return mas_split(wr_mas->mas, b_node); + + if (mas_reuse_node(wr_mas, b_node, end)) + goto reuse_node; + + mas_node_count(wr_mas->mas, 1); + if (mas_is_err(wr_mas->mas)) + return 0; + + node = mas_pop_node(wr_mas->mas); + node->parent = mas_mn(wr_mas->mas)->parent; + wr_mas->mas->node = mt_mk_node(node, b_type); + mab_mas_cp(b_node, 0, b_end, wr_mas->mas, false); + mas_replace(wr_mas->mas, false); +reuse_node: + mas_update_gap(wr_mas->mas); + return 1; +} + +/* + * mas_root_expand() - Expand a root to a node + * @mas: The maple state + * @entry: The entry to store into the tree + */ +static inline int mas_root_expand(struct ma_state *mas, void *entry) +{ + void *contents = mas_root_locked(mas); + enum maple_type type = maple_leaf_64; + struct maple_node *node; + void __rcu **slots; + unsigned long *pivots; + int slot = 0; + + mas_node_count(mas, 1); + if (unlikely(mas_is_err(mas))) + return 0; + + node = mas_pop_node(mas); + pivots = ma_pivots(node, type); + slots = ma_slots(node, type); + node->parent = ma_parent_ptr( + ((unsigned long)mas->tree | MA_ROOT_PARENT)); + mas->node = mt_mk_node(node, type); + + if (mas->index) { + if (contents) { + rcu_assign_pointer(slots[slot], contents); + if (likely(mas->index > 1)) + slot++; + } + pivots[slot++] = mas->index - 1; + } + + rcu_assign_pointer(slots[slot], entry); + mas->offset = slot; + pivots[slot] = mas->last; + if (mas->last != ULONG_MAX) + slot++; + mas->depth = 1; + mas_set_height(mas); + + /* swap the new root into the tree */ + rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); + ma_set_meta(node, maple_leaf_64, 0, slot); + return slot; +} + +static inline void mas_store_root(struct ma_state *mas, void *entry) +{ + if (likely((mas->last != 0) || (mas->index != 0))) + mas_root_expand(mas, entry); + else if (((unsigned long) (entry) & 3) == 2) + mas_root_expand(mas, entry); + else { + rcu_assign_pointer(mas->tree->ma_root, entry); + mas->node = MAS_START; + } +} + +/* + * mas_is_span_wr() - Check if the write needs to be treated as a write that + * spans the node. + * @mas: The maple state + * @piv: The pivot value being written + * @type: The maple node type + * @entry: The data to write + * + * Spanning writes are writes that start in one node and end in another OR if + * the write of a %NULL will cause the node to end with a %NULL. + * + * Return: True if this is a spanning write, false otherwise. + */ +static bool mas_is_span_wr(struct ma_wr_state *wr_mas) +{ + unsigned long max; + unsigned long last = wr_mas->mas->last; + unsigned long piv = wr_mas->r_max; + enum maple_type type = wr_mas->type; + void *entry = wr_mas->entry; + + /* Contained in this pivot */ + if (piv > last) + return false; + + max = wr_mas->mas->max; + if (unlikely(ma_is_leaf(type))) { + /* Fits in the node, but may span slots. */ + if (last < max) + return false; + + /* Writes to the end of the node but not null. */ + if ((last == max) && entry) + return false; + + /* + * Writing ULONG_MAX is not a spanning write regardless of the + * value being written as long as the range fits in the node. + */ + if ((last == ULONG_MAX) && (last == max)) + return false; + } else if (piv == last) { + if (entry) + return false; + + /* Detect spanning store wr walk */ + if (last == ULONG_MAX) + return false; + } + + trace_ma_write(__func__, wr_mas->mas, piv, entry); + + return true; +} + +static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas) +{ + wr_mas->type = mte_node_type(wr_mas->mas->node); + mas_wr_node_walk(wr_mas); + wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type); +} + +static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas) +{ + wr_mas->mas->max = wr_mas->r_max; + wr_mas->mas->min = wr_mas->r_min; + wr_mas->mas->node = wr_mas->content; + wr_mas->mas->offset = 0; + wr_mas->mas->depth++; +} +/* + * mas_wr_walk() - Walk the tree for a write. + * @wr_mas: The maple write state + * + * Uses mas_slot_locked() and does not need to worry about dead nodes. + * + * Return: True if it's contained in a node, false on spanning write. + */ +static bool mas_wr_walk(struct ma_wr_state *wr_mas) +{ + struct ma_state *mas = wr_mas->mas; + + while (true) { + mas_wr_walk_descend(wr_mas); + if (unlikely(mas_is_span_wr(wr_mas))) + return false; + + wr_mas->content = mas_slot_locked(mas, wr_mas->slots, + mas->offset); + if (ma_is_leaf(wr_mas->type)) + return true; + + mas_wr_walk_traverse(wr_mas); + } + + return true; +} + +static bool mas_wr_walk_index(struct ma_wr_state *wr_mas) +{ + struct ma_state *mas = wr_mas->mas; + + while (true) { + mas_wr_walk_descend(wr_mas); + wr_mas->content = mas_slot_locked(mas, wr_mas->slots, + mas->offset); + if (ma_is_leaf(wr_mas->type)) + return true; + mas_wr_walk_traverse(wr_mas); + + } + return true; +} +/* + * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. + * @l_wr_mas: The left maple write state + * @r_wr_mas: The right maple write state + */ +static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas, + struct ma_wr_state *r_wr_mas) +{ + struct ma_state *r_mas = r_wr_mas->mas; + struct ma_state *l_mas = l_wr_mas->mas; + unsigned char l_slot; + + l_slot = l_mas->offset; + if (!l_wr_mas->content) + l_mas->index = l_wr_mas->r_min; + + if ((l_mas->index == l_wr_mas->r_min) && + (l_slot && + !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) { + if (l_slot > 1) + l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1; + else + l_mas->index = l_mas->min; + + l_mas->offset = l_slot - 1; + } + + if (!r_wr_mas->content) { + if (r_mas->last < r_wr_mas->r_max) + r_mas->last = r_wr_mas->r_max; + r_mas->offset++; + } else if ((r_mas->last == r_wr_mas->r_max) && + (r_mas->last < r_mas->max) && + !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) { + r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots, + r_wr_mas->type, r_mas->offset + 1); + r_mas->offset++; + } +} + +static inline void *mas_state_walk(struct ma_state *mas) +{ + void *entry; + + entry = mas_start(mas); + if (mas_is_none(mas)) + return NULL; + + if (mas_is_ptr(mas)) + return entry; + + return mtree_range_walk(mas); +} + +/* + * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up + * to date. + * + * @mas: The maple state. + * + * Note: Leaves mas in undesirable state. + * Return: The entry for @mas->index or %NULL on dead node. + */ +static inline void *mtree_lookup_walk(struct ma_state *mas) +{ + unsigned long *pivots; + unsigned char offset; + struct maple_node *node; + struct maple_enode *next; + enum maple_type type; + void __rcu **slots; + unsigned char end; + unsigned long max; + + next = mas->node; + max = ULONG_MAX; + do { + offset = 0; + node = mte_to_node(next); + type = mte_node_type(next); + pivots = ma_pivots(node, type); + end = ma_data_end(node, type, pivots, max); + if (unlikely(ma_dead_node(node))) + goto dead_node; + + if (pivots[offset] >= mas->index) + goto next; + + do { + offset++; + } while ((offset < end) && (pivots[offset] < mas->index)); + + if (likely(offset > end)) + max = pivots[offset]; + +next: + slots = ma_slots(node, type); + next = mt_slot(mas->tree, slots, offset); + if (unlikely(ma_dead_node(node))) + goto dead_node; + } while (!ma_is_leaf(type)); + + return (void *) next; + +dead_node: + mas_reset(mas); + return NULL; +} + +/* + * mas_new_root() - Create a new root node that only contains the entry passed + * in. + * @mas: The maple state + * @entry: The entry to store. + * + * Only valid when the index == 0 and the last == ULONG_MAX + * + * Return 0 on error, 1 on success. + */ +static inline int mas_new_root(struct ma_state *mas, void *entry) +{ + struct maple_enode *root = mas_root_locked(mas); + enum maple_type type = maple_leaf_64; + struct maple_node *node; + void __rcu **slots; + unsigned long *pivots; + + if (!entry && !mas->index && mas->last == ULONG_MAX) { + mas->depth = 0; + mas_set_height(mas); + rcu_assign_pointer(mas->tree->ma_root, entry); + mas->node = MAS_START; + goto done; + } + + mas_node_count(mas, 1); + if (mas_is_err(mas)) + return 0; + + node = mas_pop_node(mas); + pivots = ma_pivots(node, type); + slots = ma_slots(node, type); + node->parent = ma_parent_ptr( + ((unsigned long)mas->tree | MA_ROOT_PARENT)); + mas->node = mt_mk_node(node, type); + rcu_assign_pointer(slots[0], entry); + pivots[0] = mas->last; + mas->depth = 1; + mas_set_height(mas); + rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); + +done: + if (xa_is_node(root)) + mte_destroy_walk(root, mas->tree); + + return 1; +} +/* + * mas_wr_spanning_store() - Create a subtree with the store operation completed + * and new nodes where necessary, then place the sub-tree in the actual tree. + * Note that mas is expected to point to the node which caused the store to + * span. + * @wr_mas: The maple write state + * + * Return: 0 on error, positive on success. + */ +static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas) +{ + struct maple_subtree_state mast; + struct maple_big_node b_node; + struct ma_state *mas; + unsigned char height; + + /* Left and Right side of spanning store */ + MA_STATE(l_mas, NULL, 0, 0); + MA_STATE(r_mas, NULL, 0, 0); + + MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); + MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry); + + /* + * A store operation that spans multiple nodes is called a spanning + * store and is handled early in the store call stack by the function + * mas_is_span_wr(). When a spanning store is identified, the maple + * state is duplicated. The first maple state walks the left tree path + * to ``index``, the duplicate walks the right tree path to ``last``. + * The data in the two nodes are combined into a single node, two nodes, + * or possibly three nodes (see the 3-way split above). A ``NULL`` + * written to the last entry of a node is considered a spanning store as + * a rebalance is required for the operation to complete and an overflow + * of data may happen. + */ + mas = wr_mas->mas; + trace_ma_op(__func__, mas); + + if (unlikely(!mas->index && mas->last == ULONG_MAX)) + return mas_new_root(mas, wr_mas->entry); + /* + * Node rebalancing may occur due to this store, so there may be three new + * entries per level plus a new root. + */ + height = mas_mt_height(mas); + mas_node_count(mas, 1 + height * 3); + if (mas_is_err(mas)) + return 0; + + /* + * Set up right side. Need to get to the next offset after the spanning + * store to ensure it's not NULL and to combine both the next node and + * the node with the start together. + */ + r_mas = *mas; + /* Avoid overflow, walk to next slot in the tree. */ + if (r_mas.last + 1) + r_mas.last++; + + r_mas.index = r_mas.last; + mas_wr_walk_index(&r_wr_mas); + r_mas.last = r_mas.index = mas->last; + + /* Set up left side. */ + l_mas = *mas; + mas_wr_walk_index(&l_wr_mas); + + if (!wr_mas->entry) { + mas_extend_spanning_null(&l_wr_mas, &r_wr_mas); + mas->offset = l_mas.offset; + mas->index = l_mas.index; + mas->last = l_mas.last = r_mas.last; + } + + /* expanding NULLs may make this cover the entire range */ + if (!l_mas.index && r_mas.last == ULONG_MAX) { + mas_set_range(mas, 0, ULONG_MAX); + return mas_new_root(mas, wr_mas->entry); + } + + memset(&b_node, 0, sizeof(struct maple_big_node)); + /* Copy l_mas and store the value in b_node. */ + mas_store_b_node(&l_wr_mas, &b_node, l_wr_mas.node_end); + /* Copy r_mas into b_node. */ + if (r_mas.offset <= r_wr_mas.node_end) + mas_mab_cp(&r_mas, r_mas.offset, r_wr_mas.node_end, + &b_node, b_node.b_end + 1); + else + b_node.b_end++; + + /* Stop spanning searches by searching for just index. */ + l_mas.index = l_mas.last = mas->index; + + mast.bn = &b_node; + mast.orig_l = &l_mas; + mast.orig_r = &r_mas; + /* Combine l_mas and r_mas and split them up evenly again. */ + return mas_spanning_rebalance(mas, &mast, height + 1); +} + +/* + * mas_wr_node_store() - Attempt to store the value in a node + * @wr_mas: The maple write state + * + * Attempts to reuse the node, but may allocate. + * + * Return: True if stored, false otherwise + */ +static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas) +{ + struct ma_state *mas = wr_mas->mas; + void __rcu **dst_slots; + unsigned long *dst_pivots; + unsigned char dst_offset; + unsigned char new_end = wr_mas->node_end; + unsigned char offset; + unsigned char node_slots = mt_slots[wr_mas->type]; + struct maple_node reuse, *newnode; + unsigned char copy_size, max_piv = mt_pivots[wr_mas->type]; + bool in_rcu = mt_in_rcu(mas->tree); + + offset = mas->offset; + if (mas->last == wr_mas->r_max) { + /* runs right to the end of the node */ + if (mas->last == mas->max) + new_end = offset; + /* don't copy this offset */ + wr_mas->offset_end++; + } else if (mas->last < wr_mas->r_max) { + /* new range ends in this range */ + if (unlikely(wr_mas->r_max == ULONG_MAX)) + mas_bulk_rebalance(mas, wr_mas->node_end, wr_mas->type); + + new_end++; + } else { + if (wr_mas->end_piv == mas->last) + wr_mas->offset_end++; + + new_end -= wr_mas->offset_end - offset - 1; + } + + /* new range starts within a range */ + if (wr_mas->r_min < mas->index) + new_end++; + + /* Not enough room */ + if (new_end >= node_slots) + return false; + + /* Not enough data. */ + if (!mte_is_root(mas->node) && (new_end <= mt_min_slots[wr_mas->type]) && + !(mas->mas_flags & MA_STATE_BULK)) + return false; + + /* set up node. */ + if (in_rcu) { + mas_node_count(mas, 1); + if (mas_is_err(mas)) + return false; + + newnode = mas_pop_node(mas); + } else { + memset(&reuse, 0, sizeof(struct maple_node)); + newnode = &reuse; + } + + newnode->parent = mas_mn(mas)->parent; + dst_pivots = ma_pivots(newnode, wr_mas->type); + dst_slots = ma_slots(newnode, wr_mas->type); + /* Copy from start to insert point */ + memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * (offset + 1)); + memcpy(dst_slots, wr_mas->slots, sizeof(void *) * (offset + 1)); + dst_offset = offset; + + /* Handle insert of new range starting after old range */ + if (wr_mas->r_min < mas->index) { + mas->offset++; + rcu_assign_pointer(dst_slots[dst_offset], wr_mas->content); + dst_pivots[dst_offset++] = mas->index - 1; + } + + /* Store the new entry and range end. */ + if (dst_offset < max_piv) + dst_pivots[dst_offset] = mas->last; + mas->offset = dst_offset; + rcu_assign_pointer(dst_slots[dst_offset], wr_mas->entry); + + /* + * this range wrote to the end of the node or it overwrote the rest of + * the data + */ + if (wr_mas->offset_end > wr_mas->node_end || mas->last >= mas->max) { + new_end = dst_offset; + goto done; + } + + dst_offset++; + /* Copy to the end of node if necessary. */ + copy_size = wr_mas->node_end - wr_mas->offset_end + 1; + memcpy(dst_slots + dst_offset, wr_mas->slots + wr_mas->offset_end, + sizeof(void *) * copy_size); + if (dst_offset < max_piv) { + if (copy_size > max_piv - dst_offset) + copy_size = max_piv - dst_offset; + + memcpy(dst_pivots + dst_offset, + wr_mas->pivots + wr_mas->offset_end, + sizeof(unsigned long) * copy_size); + } + + if ((wr_mas->node_end == node_slots - 1) && (new_end < node_slots - 1)) + dst_pivots[new_end] = mas->max; + +done: + mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end); + if (in_rcu) { + mas->node = mt_mk_node(newnode, wr_mas->type); + mas_replace(mas, false); + } else { + memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); + } + trace_ma_write(__func__, mas, 0, wr_mas->entry); + mas_update_gap(mas); + return true; +} + +/* + * mas_wr_slot_store: Attempt to store a value in a slot. + * @wr_mas: the maple write state + * + * Return: True if stored, false otherwise + */ +static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas) +{ + struct ma_state *mas = wr_mas->mas; + unsigned long lmax; /* Logical max. */ + unsigned char offset = mas->offset; + + if ((wr_mas->r_max > mas->last) && ((wr_mas->r_min != mas->index) || + (offset != wr_mas->node_end))) + return false; + + if (offset == wr_mas->node_end - 1) + lmax = mas->max; + else + lmax = wr_mas->pivots[offset + 1]; + + /* going to overwrite too many slots. */ + if (lmax < mas->last) + return false; + + if (wr_mas->r_min == mas->index) { + /* overwriting two or more ranges with one. */ + if (lmax == mas->last) + return false; + + /* Overwriting all of offset and a portion of offset + 1. */ + rcu_assign_pointer(wr_mas->slots[offset], wr_mas->entry); + wr_mas->pivots[offset] = mas->last; + goto done; + } + + /* Doesn't end on the next range end. */ + if (lmax != mas->last) + return false; + + /* Overwriting a portion of offset and all of offset + 1 */ + if ((offset + 1 < mt_pivots[wr_mas->type]) && + (wr_mas->entry || wr_mas->pivots[offset + 1])) + wr_mas->pivots[offset + 1] = mas->last; + + rcu_assign_pointer(wr_mas->slots[offset + 1], wr_mas->entry); + wr_mas->pivots[offset] = mas->index - 1; + mas->offset++; /* Keep mas accurate. */ + +done: + trace_ma_write(__func__, mas, 0, wr_mas->entry); + mas_update_gap(mas); + return true; +} + +static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas) +{ + while ((wr_mas->mas->last > wr_mas->end_piv) && + (wr_mas->offset_end < wr_mas->node_end)) + wr_mas->end_piv = wr_mas->pivots[++wr_mas->offset_end]; + + if (wr_mas->mas->last > wr_mas->end_piv) + wr_mas->end_piv = wr_mas->mas->max; +} + +static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas) +{ + struct ma_state *mas = wr_mas->mas; + + if (mas->last < wr_mas->end_piv && !wr_mas->slots[wr_mas->offset_end]) + mas->last = wr_mas->end_piv; + + /* Check next slot(s) if we are overwriting the end */ + if ((mas->last == wr_mas->end_piv) && + (wr_mas->node_end != wr_mas->offset_end) && + !wr_mas->slots[wr_mas->offset_end + 1]) { + wr_mas->offset_end++; + if (wr_mas->offset_end == wr_mas->node_end) + mas->last = mas->max; + else + mas->last = wr_mas->pivots[wr_mas->offset_end]; + wr_mas->end_piv = mas->last; + } + + if (!wr_mas->content) { + /* If this one is null, the next and prev are not */ + mas->index = wr_mas->r_min; + } else { + /* Check prev slot if we are overwriting the start */ + if (mas->index == wr_mas->r_min && mas->offset && + !wr_mas->slots[mas->offset - 1]) { + mas->offset--; + wr_mas->r_min = mas->index = + mas_safe_min(mas, wr_mas->pivots, mas->offset); + wr_mas->r_max = wr_mas->pivots[mas->offset]; + } + } +} + +static inline bool mas_wr_append(struct ma_wr_state *wr_mas) +{ + unsigned char end = wr_mas->node_end; + unsigned char new_end = end + 1; + struct ma_state *mas = wr_mas->mas; + unsigned char node_pivots = mt_pivots[wr_mas->type]; + + if ((mas->index != wr_mas->r_min) && (mas->last == wr_mas->r_max)) { + if (new_end < node_pivots) + wr_mas->pivots[new_end] = wr_mas->pivots[end]; + + if (new_end < node_pivots) + ma_set_meta(wr_mas->node, maple_leaf_64, 0, new_end); + + rcu_assign_pointer(wr_mas->slots[new_end], wr_mas->entry); + mas->offset = new_end; + wr_mas->pivots[end] = mas->index - 1; + + return true; + } + + if ((mas->index == wr_mas->r_min) && (mas->last < wr_mas->r_max)) { + if (new_end < node_pivots) + wr_mas->pivots[new_end] = wr_mas->pivots[end]; + + rcu_assign_pointer(wr_mas->slots[new_end], wr_mas->content); + if (new_end < node_pivots) + ma_set_meta(wr_mas->node, maple_leaf_64, 0, new_end); + + wr_mas->pivots[end] = mas->last; + rcu_assign_pointer(wr_mas->slots[end], wr_mas->entry); + return true; + } + + return false; +} + +/* + * mas_wr_bnode() - Slow path for a modification. + * @wr_mas: The write maple state + * + * This is where split, rebalance end up. + */ +static void mas_wr_bnode(struct ma_wr_state *wr_mas) +{ + struct maple_big_node b_node; + + trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); + memset(&b_node, 0, sizeof(struct maple_big_node)); + mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); + mas_commit_b_node(wr_mas, &b_node, wr_mas->node_end); +} + +static inline void mas_wr_modify(struct ma_wr_state *wr_mas) +{ + unsigned char node_slots; + unsigned char node_size; + struct ma_state *mas = wr_mas->mas; + + /* Direct replacement */ + if (wr_mas->r_min == mas->index && wr_mas->r_max == mas->last) { + rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry); + if (!!wr_mas->entry ^ !!wr_mas->content) + mas_update_gap(mas); + return; + } + + /* Attempt to append */ + node_slots = mt_slots[wr_mas->type]; + node_size = wr_mas->node_end - wr_mas->offset_end + mas->offset + 2; + if (mas->max == ULONG_MAX) + node_size++; + + /* slot and node store will not fit, go to the slow path */ + if (unlikely(node_size >= node_slots)) + goto slow_path; + + if (wr_mas->entry && (wr_mas->node_end < node_slots - 1) && + (mas->offset == wr_mas->node_end) && mas_wr_append(wr_mas)) { + if (!wr_mas->content || !wr_mas->entry) + mas_update_gap(mas); + return; + } + + if ((wr_mas->offset_end - mas->offset <= 1) && mas_wr_slot_store(wr_mas)) + return; + else if (mas_wr_node_store(wr_mas)) + return; + + if (mas_is_err(mas)) + return; + +slow_path: + mas_wr_bnode(wr_mas); +} + +/* + * mas_wr_store_entry() - Internal call to store a value + * @mas: The maple state + * @entry: The entry to store. + * + * Return: The contents that was stored at the index. + */ +static inline void *mas_wr_store_entry(struct ma_wr_state *wr_mas) +{ + struct ma_state *mas = wr_mas->mas; + + wr_mas->content = mas_start(mas); + if (mas_is_none(mas) || mas_is_ptr(mas)) { + mas_store_root(mas, wr_mas->entry); + return wr_mas->content; + } + + if (unlikely(!mas_wr_walk(wr_mas))) { + mas_wr_spanning_store(wr_mas); + return wr_mas->content; + } + + /* At this point, we are at the leaf node that needs to be altered. */ + wr_mas->end_piv = wr_mas->r_max; + mas_wr_end_piv(wr_mas); + + if (!wr_mas->entry) + mas_wr_extend_null(wr_mas); + + /* New root for a single pointer */ + if (unlikely(!mas->index && mas->last == ULONG_MAX)) { + mas_new_root(mas, wr_mas->entry); + return wr_mas->content; + } + + mas_wr_modify(wr_mas); + return wr_mas->content; +} + +/** + * mas_insert() - Internal call to insert a value + * @mas: The maple state + * @entry: The entry to store + * + * Return: %NULL or the contents that already exists at the requested index + * otherwise. The maple state needs to be checked for error conditions. + */ +static inline void *mas_insert(struct ma_state *mas, void *entry) +{ + MA_WR_STATE(wr_mas, mas, entry); + + /* + * Inserting a new range inserts either 0, 1, or 2 pivots within the + * tree. If the insert fits exactly into an existing gap with a value + * of NULL, then the slot only needs to be written with the new value. + * If the range being inserted is adjacent to another range, then only a + * single pivot needs to be inserted (as well as writing the entry). If + * the new range is within a gap but does not touch any other ranges, + * then two pivots need to be inserted: the start - 1, and the end. As + * usual, the entry must be written. Most operations require a new node + * to be allocated and replace an existing node to ensure RCU safety, + * when in RCU mode. The exception to requiring a newly allocated node + * is when inserting at the end of a node (appending). When done + * carefully, appending can reuse the node in place. + */ + wr_mas.content = mas_start(mas); + if (wr_mas.content) + goto exists; + + if (mas_is_none(mas) || mas_is_ptr(mas)) { + mas_store_root(mas, entry); + return NULL; + } + + /* spanning writes always overwrite something */ + if (!mas_wr_walk(&wr_mas)) + goto exists; + + /* At this point, we are at the leaf node that needs to be altered. */ + wr_mas.offset_end = mas->offset; + wr_mas.end_piv = wr_mas.r_max; + + if (wr_mas.content || (mas->last > wr_mas.r_max)) + goto exists; + + if (!entry) + return NULL; + + mas_wr_modify(&wr_mas); + return wr_mas.content; + +exists: + mas_set_err(mas, -EEXIST); + return wr_mas.content; + +} + +/* + * mas_prev_node() - Find the prev non-null entry at the same level in the + * tree. The prev value will be mas->node[mas->offset] or MAS_NONE. + * @mas: The maple state + * @min: The lower limit to search + * + * The prev node value will be mas->node[mas->offset] or MAS_NONE. + * Return: 1 if the node is dead, 0 otherwise. + */ +static inline int mas_prev_node(struct ma_state *mas, unsigned long min) +{ + enum maple_type mt; + int offset, level; + void __rcu **slots; + struct maple_node *node; + struct maple_enode *enode; + unsigned long *pivots; + + if (mas_is_none(mas)) + return 0; + + level = 0; + do { + node = mas_mn(mas); + if (ma_is_root(node)) + goto no_entry; + + /* Walk up. */ + if (unlikely(mas_ascend(mas))) + return 1; + offset = mas->offset; + level++; + } while (!offset); + + offset--; + mt = mte_node_type(mas->node); + node = mas_mn(mas); + slots = ma_slots(node, mt); + pivots = ma_pivots(node, mt); + mas->max = pivots[offset]; + if (offset) + mas->min = pivots[offset - 1] + 1; + if (unlikely(ma_dead_node(node))) + return 1; + + if (mas->max < min) + goto no_entry_min; + + while (level > 1) { + level--; + enode = mas_slot(mas, slots, offset); + if (unlikely(ma_dead_node(node))) + return 1; + + mas->node = enode; + mt = mte_node_type(mas->node); + node = mas_mn(mas); + slots = ma_slots(node, mt); + pivots = ma_pivots(node, mt); + offset = ma_data_end(node, mt, pivots, mas->max); + if (offset) + mas->min = pivots[offset - 1] + 1; + + if (offset < mt_pivots[mt]) + mas->max = pivots[offset]; + + if (mas->max < min) + goto no_entry; + } + + mas->node = mas_slot(mas, slots, offset); + if (unlikely(ma_dead_node(node))) + return 1; + + mas->offset = mas_data_end(mas); + if (unlikely(mte_dead_node(mas->node))) + return 1; + + return 0; + +no_entry_min: + mas->offset = offset; + if (offset) + mas->min = pivots[offset - 1] + 1; +no_entry: + if (unlikely(ma_dead_node(node))) + return 1; + + mas->node = MAS_NONE; + return 0; +} + +/* + * mas_next_node() - Get the next node at the same level in the tree. + * @mas: The maple state + * @max: The maximum pivot value to check. + * + * The next value will be mas->node[mas->offset] or MAS_NONE. + * Return: 1 on dead node, 0 otherwise. + */ +static inline int mas_next_node(struct ma_state *mas, struct maple_node *node, + unsigned long max) +{ + unsigned long min, pivot; + unsigned long *pivots; + struct maple_enode *enode; + int level = 0; + unsigned char offset; + enum maple_type mt; + void __rcu **slots; + + if (mas->max >= max) + goto no_entry; + + level = 0; + do { + if (ma_is_root(node)) + goto no_entry; + + min = mas->max + 1; + if (min > max) + goto no_entry; + + if (unlikely(mas_ascend(mas))) + return 1; + + offset = mas->offset; + level++; + node = mas_mn(mas); + mt = mte_node_type(mas->node); + pivots = ma_pivots(node, mt); + } while (unlikely(offset == ma_data_end(node, mt, pivots, mas->max))); + + slots = ma_slots(node, mt); + pivot = mas_safe_pivot(mas, pivots, ++offset, mt); + while (unlikely(level > 1)) { + /* Descend, if necessary */ + enode = mas_slot(mas, slots, offset); + if (unlikely(ma_dead_node(node))) + return 1; + + mas->node = enode; + level--; + node = mas_mn(mas); + mt = mte_node_type(mas->node); + slots = ma_slots(node, mt); + pivots = ma_pivots(node, mt); + offset = 0; + pivot = pivots[0]; + } + + enode = mas_slot(mas, slots, offset); + if (unlikely(ma_dead_node(node))) + return 1; + + mas->node = enode; + mas->min = min; + mas->max = pivot; + return 0; + +no_entry: + if (unlikely(ma_dead_node(node))) + return 1; + + mas->node = MAS_NONE; + return 0; +} + +/* + * mas_next_nentry() - Get the next node entry + * @mas: The maple state + * @max: The maximum value to check + * @*range_start: Pointer to store the start of the range. + * + * Sets @mas->offset to the offset of the next node entry, @mas->last to the + * pivot of the entry. + * + * Return: The next entry, %NULL otherwise + */ +static inline void *mas_next_nentry(struct ma_state *mas, + struct maple_node *node, unsigned long max, enum maple_type type) +{ + unsigned char count; + unsigned long pivot; + unsigned long *pivots; + void __rcu **slots; + void *entry; + + if (mas->last == mas->max) { + mas->index = mas->max; + return NULL; + } + + pivots = ma_pivots(node, type); + slots = ma_slots(node, type); + mas->index = mas_safe_min(mas, pivots, mas->offset); + if (ma_dead_node(node)) + return NULL; + + if (mas->index > max) + return NULL; + + count = ma_data_end(node, type, pivots, mas->max); + if (mas->offset > count) + return NULL; + + while (mas->offset < count) { + pivot = pivots[mas->offset]; + entry = mas_slot(mas, slots, mas->offset); + if (ma_dead_node(node)) + return NULL; + + if (entry) + goto found; + + if (pivot >= max) + return NULL; + + mas->index = pivot + 1; + mas->offset++; + } + + if (mas->index > mas->max) { + mas->index = mas->last; + return NULL; + } + + pivot = mas_safe_pivot(mas, pivots, mas->offset, type); + entry = mas_slot(mas, slots, mas->offset); + if (ma_dead_node(node)) + return NULL; + + if (!pivot) + return NULL; + + if (!entry) + return NULL; + +found: + mas->last = pivot; + return entry; +} + +static inline void mas_rewalk(struct ma_state *mas, unsigned long index) +{ + +retry: + mas_set(mas, index); + mas_state_walk(mas); + if (mas_is_start(mas)) + goto retry; + + return; + +} + +/* + * mas_next_entry() - Internal function to get the next entry. + * @mas: The maple state + * @limit: The maximum range start. + * + * Set the @mas->node to the next entry and the range_start to + * the beginning value for the entry. Does not check beyond @limit. + * Sets @mas->index and @mas->last to the limit if it is hit. + * Restarts on dead nodes. + * + * Return: the next entry or %NULL. + */ +static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit) +{ + void *entry = NULL; + struct maple_enode *prev_node; + struct maple_node *node; + unsigned char offset; + unsigned long last; + enum maple_type mt; + + last = mas->last; +retry: + offset = mas->offset; + prev_node = mas->node; + node = mas_mn(mas); + mt = mte_node_type(mas->node); + mas->offset++; + if (unlikely(mas->offset >= mt_slots[mt])) { + mas->offset = mt_slots[mt] - 1; + goto next_node; + } + + while (!mas_is_none(mas)) { + entry = mas_next_nentry(mas, node, limit, mt); + if (unlikely(ma_dead_node(node))) { + mas_rewalk(mas, last); + goto retry; + } + + if (likely(entry)) + return entry; + + if (unlikely((mas->index > limit))) + break; + +next_node: + prev_node = mas->node; + offset = mas->offset; + if (unlikely(mas_next_node(mas, node, limit))) { + mas_rewalk(mas, last); + goto retry; + } + mas->offset = 0; + node = mas_mn(mas); + mt = mte_node_type(mas->node); + } + + mas->index = mas->last = limit; + mas->offset = offset; + mas->node = prev_node; + return NULL; +} + +/* + * mas_prev_nentry() - Get the previous node entry. + * @mas: The maple state. + * @limit: The lower limit to check for a value. + * + * Return: the entry, %NULL otherwise. + */ +static inline void *mas_prev_nentry(struct ma_state *mas, unsigned long limit, + unsigned long index) +{ + unsigned long pivot, min; + unsigned char offset; + struct maple_node *mn; + enum maple_type mt; + unsigned long *pivots; + void __rcu **slots; + void *entry; + +retry: + if (!mas->offset) + return NULL; + + mn = mas_mn(mas); + mt = mte_node_type(mas->node); + offset = mas->offset - 1; + if (offset >= mt_slots[mt]) + offset = mt_slots[mt] - 1; + + slots = ma_slots(mn, mt); + pivots = ma_pivots(mn, mt); + if (offset == mt_pivots[mt]) + pivot = mas->max; + else + pivot = pivots[offset]; + + if (unlikely(ma_dead_node(mn))) { + mas_rewalk(mas, index); + goto retry; + } + + while (offset && ((!mas_slot(mas, slots, offset) && pivot >= limit) || + !pivot)) + pivot = pivots[--offset]; + + min = mas_safe_min(mas, pivots, offset); + entry = mas_slot(mas, slots, offset); + if (unlikely(ma_dead_node(mn))) { + mas_rewalk(mas, index); + goto retry; + } + + if (likely(entry)) { + mas->offset = offset; + mas->last = pivot; + mas->index = min; + } + return entry; +} + +static inline void *mas_prev_entry(struct ma_state *mas, unsigned long min) +{ + void *entry; + +retry: + while (likely(!mas_is_none(mas))) { + entry = mas_prev_nentry(mas, min, mas->index); + if (unlikely(mas->last < min)) + goto not_found; + + if (likely(entry)) + return entry; + + if (unlikely(mas_prev_node(mas, min))) { + mas_rewalk(mas, mas->index); + goto retry; + } + + mas->offset++; + } + + mas->offset--; +not_found: + mas->index = mas->last = min; + return NULL; +} + +/* + * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the + * highest gap address of a given size in a given node and descend. + * @mas: The maple state + * @size: The needed size. + * + * Return: True if found in a leaf, false otherwise. + * + */ +static bool mas_rev_awalk(struct ma_state *mas, unsigned long size) +{ + enum maple_type type = mte_node_type(mas->node); + struct maple_node *node = mas_mn(mas); + unsigned long *pivots, *gaps; + void __rcu **slots; + unsigned long gap = 0; + unsigned long max, min, index; + unsigned char offset; + + if (unlikely(mas_is_err(mas))) + return true; + + if (ma_is_dense(type)) { + /* dense nodes. */ + mas->offset = (unsigned char)(mas->index - mas->min); + return true; + } + + pivots = ma_pivots(node, type); + slots = ma_slots(node, type); + gaps = ma_gaps(node, type); + offset = mas->offset; + min = mas_safe_min(mas, pivots, offset); + /* Skip out of bounds. */ + while (mas->last < min) + min = mas_safe_min(mas, pivots, --offset); + + max = mas_safe_pivot(mas, pivots, offset, type); + index = mas->index; + while (index <= max) { + gap = 0; + if (gaps) + gap = gaps[offset]; + else if (!mas_slot(mas, slots, offset)) + gap = max - min + 1; + + if (gap) { + if ((size <= gap) && (size <= mas->last - min + 1)) + break; + + if (!gaps) { + /* Skip the next slot, it cannot be a gap. */ + if (offset < 2) + goto ascend; + + offset -= 2; + max = pivots[offset]; + min = mas_safe_min(mas, pivots, offset); + continue; + } + } + + if (!offset) + goto ascend; + + offset--; + max = min - 1; + min = mas_safe_min(mas, pivots, offset); + } + + if (unlikely(index > max)) { + mas_set_err(mas, -EBUSY); + return false; + } + + if (unlikely(ma_is_leaf(type))) { + mas->offset = offset; + mas->min = min; + mas->max = min + gap - 1; + return true; + } + + /* descend, only happens under lock. */ + mas->node = mas_slot(mas, slots, offset); + mas->min = min; + mas->max = max; + mas->offset = mas_data_end(mas); + return false; + +ascend: + if (mte_is_root(mas->node)) + mas_set_err(mas, -EBUSY); + + return false; +} + +static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) +{ + enum maple_type type = mte_node_type(mas->node); + unsigned long pivot, min, gap = 0; + unsigned char offset; + unsigned long *gaps; + unsigned long *pivots = ma_pivots(mas_mn(mas), type); + void __rcu **slots = ma_slots(mas_mn(mas), type); + bool found = false; + + if (ma_is_dense(type)) { + mas->offset = (unsigned char)(mas->index - mas->min); + return true; + } + + gaps = ma_gaps(mte_to_node(mas->node), type); + offset = mas->offset; + min = mas_safe_min(mas, pivots, offset); + for (; offset < mt_slots[type]; offset++) { + pivot = mas_safe_pivot(mas, pivots, offset, type); + if (offset && !pivot) + break; + + /* Not within lower bounds */ + if (mas->index > pivot) + goto next_slot; + + if (gaps) + gap = gaps[offset]; + else if (!mas_slot(mas, slots, offset)) + gap = min(pivot, mas->last) - max(mas->index, min) + 1; + else + goto next_slot; + + if (gap >= size) { + if (ma_is_leaf(type)) { + found = true; + goto done; + } + if (mas->index <= pivot) { + mas->node = mas_slot(mas, slots, offset); + mas->min = min; + mas->max = pivot; + offset = 0; + break; + } + } +next_slot: + min = pivot + 1; + if (mas->last <= pivot) { + mas_set_err(mas, -EBUSY); + return true; + } + } + + if (mte_is_root(mas->node)) + found = true; +done: + mas->offset = offset; + return found; +} + +/** + * mas_walk() - Search for @mas->index in the tree. + * @mas: The maple state. + * + * mas->index and mas->last will be set to the range if there is a value. If + * mas->node is MAS_NONE, reset to MAS_START. + * + * Return: the entry at the location or %NULL. + */ +void *mas_walk(struct ma_state *mas) +{ + void *entry; + +retry: + entry = mas_state_walk(mas); + if (mas_is_start(mas)) + goto retry; + + if (mas_is_ptr(mas)) { + if (!mas->index) { + mas->last = 0; + } else { + mas->index = 1; + mas->last = ULONG_MAX; + } + return entry; + } + + if (mas_is_none(mas)) { + mas->index = 0; + mas->last = ULONG_MAX; + } + + return entry; +} +EXPORT_SYMBOL_GPL(mas_walk); + +static inline bool mas_rewind_node(struct ma_state *mas) +{ + unsigned char slot; + + do { + if (mte_is_root(mas->node)) { + slot = mas->offset; + if (!slot) + return false; + } else { + mas_ascend(mas); + slot = mas->offset; + } + } while (!slot); + + mas->offset = --slot; + return true; +} + +/* + * mas_skip_node() - Internal function. Skip over a node. + * @mas: The maple state. + * + * Return: true if there is another node, false otherwise. + */ +static inline bool mas_skip_node(struct ma_state *mas) +{ + unsigned char slot, slot_count; + unsigned long *pivots; + enum maple_type mt; + + mt = mte_node_type(mas->node); + slot_count = mt_slots[mt] - 1; + do { + if (mte_is_root(mas->node)) { + slot = mas->offset; + if (slot > slot_count) { + mas_set_err(mas, -EBUSY); + return false; + } + } else { + mas_ascend(mas); + slot = mas->offset; + mt = mte_node_type(mas->node); + slot_count = mt_slots[mt] - 1; + } + } while (slot > slot_count); + + mas->offset = ++slot; + pivots = ma_pivots(mas_mn(mas), mt); + if (slot > 0) + mas->min = pivots[slot - 1] + 1; + + if (slot <= slot_count) + mas->max = pivots[slot]; + + return true; +} + +/* + * mas_awalk() - Allocation walk. Search from low address to high, for a gap of + * @size + * @mas: The maple state + * @size: The size of the gap required + * + * Search between @mas->index and @mas->last for a gap of @size. + */ +static inline void mas_awalk(struct ma_state *mas, unsigned long size) +{ + struct maple_enode *last = NULL; + + /* + * There are 4 options: + * go to child (descend) + * go back to parent (ascend) + * no gap found. (return, slot == MAPLE_NODE_SLOTS) + * found the gap. (return, slot != MAPLE_NODE_SLOTS) + */ + while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) { + if (last == mas->node) + mas_skip_node(mas); + else + last = mas->node; + } +} + +/* + * mas_fill_gap() - Fill a located gap with @entry. + * @mas: The maple state + * @entry: The value to store + * @slot: The offset into the node to store the @entry + * @size: The size of the entry + * @index: The start location + */ +static inline void mas_fill_gap(struct ma_state *mas, void *entry, + unsigned char slot, unsigned long size, unsigned long *index) +{ + MA_WR_STATE(wr_mas, mas, entry); + unsigned char pslot = mte_parent_slot(mas->node); + struct maple_enode *mn = mas->node; + unsigned long *pivots; + enum maple_type ptype; + /* + * mas->index is the start address for the search + * which may no longer be needed. + * mas->last is the end address for the search + */ + + *index = mas->index; + mas->last = mas->index + size - 1; + + /* + * It is possible that using mas->max and mas->min to correctly + * calculate the index and last will cause an issue in the gap + * calculation, so fix the ma_state here + */ + mas_ascend(mas); + ptype = mte_node_type(mas->node); + pivots = ma_pivots(mas_mn(mas), ptype); + mas->max = mas_safe_pivot(mas, pivots, pslot, ptype); + mas->min = mas_safe_min(mas, pivots, pslot); + mas->node = mn; + mas->offset = slot; + mas_wr_store_entry(&wr_mas); +} + +/* + * mas_sparse_area() - Internal function. Return upper or lower limit when + * searching for a gap in an empty tree. + * @mas: The maple state + * @min: the minimum range + * @max: The maximum range + * @size: The size of the gap + * @fwd: Searching forward or back + */ +static inline void mas_sparse_area(struct ma_state *mas, unsigned long min, + unsigned long max, unsigned long size, bool fwd) +{ + unsigned long start = 0; + + if (!unlikely(mas_is_none(mas))) + start++; + /* mas_is_ptr */ + + if (start < min) + start = min; + + if (fwd) { + mas->index = start; + mas->last = start + size - 1; + return; + } + + mas->index = max; +} + +/* + * mas_empty_area() - Get the lowest address within the range that is + * sufficient for the size requested. + * @mas: The maple state + * @min: The lowest value of the range + * @max: The highest value of the range + * @size: The size needed + */ +int mas_empty_area(struct ma_state *mas, unsigned long min, + unsigned long max, unsigned long size) +{ + unsigned char offset; + unsigned long *pivots; + enum maple_type mt; + + if (mas_is_start(mas)) + mas_start(mas); + else if (mas->offset >= 2) + mas->offset -= 2; + else if (!mas_skip_node(mas)) + return -EBUSY; + + /* Empty set */ + if (mas_is_none(mas) || mas_is_ptr(mas)) { + mas_sparse_area(mas, min, max, size, true); + return 0; + } + + /* The start of the window can only be within these values */ + mas->index = min; + mas->last = max; + mas_awalk(mas, size); + + if (unlikely(mas_is_err(mas))) + return xa_err(mas->node); + + offset = mas->offset; + if (unlikely(offset == MAPLE_NODE_SLOTS)) + return -EBUSY; + + mt = mte_node_type(mas->node); + pivots = ma_pivots(mas_mn(mas), mt); + if (offset) + mas->min = pivots[offset - 1] + 1; + + if (offset < mt_pivots[mt]) + mas->max = pivots[offset]; + + if (mas->index < mas->min) + mas->index = mas->min; + + mas->last = mas->index + size - 1; + return 0; +} +EXPORT_SYMBOL_GPL(mas_empty_area); + +/* + * mas_empty_area_rev() - Get the highest address within the range that is + * sufficient for the size requested. + * @mas: The maple state + * @min: The lowest value of the range + * @max: The highest value of the range + * @size: The size needed + */ +int mas_empty_area_rev(struct ma_state *mas, unsigned long min, + unsigned long max, unsigned long size) +{ + struct maple_enode *last = mas->node; + + if (mas_is_start(mas)) { + mas_start(mas); + mas->offset = mas_data_end(mas); + } else if (mas->offset >= 2) { + mas->offset -= 2; + } else if (!mas_rewind_node(mas)) { + return -EBUSY; + } + + /* Empty set. */ + if (mas_is_none(mas) || mas_is_ptr(mas)) { + mas_sparse_area(mas, min, max, size, false); + return 0; + } + + /* The start of the window can only be within these values. */ + mas->index = min; + mas->last = max; + + while (!mas_rev_awalk(mas, size)) { + if (last == mas->node) { + if (!mas_rewind_node(mas)) + return -EBUSY; + } else { + last = mas->node; + } + } + + if (mas_is_err(mas)) + return xa_err(mas->node); + + if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) + return -EBUSY; + + /* + * mas_rev_awalk() has set mas->min and mas->max to the gap values. If + * the maximum is outside the window we are searching, then use the last + * location in the search. + * mas->max and mas->min is the range of the gap. + * mas->index and mas->last are currently set to the search range. + */ + + /* Trim the upper limit to the max. */ + if (mas->max <= mas->last) + mas->last = mas->max; + + mas->index = mas->last - size + 1; + return 0; +} +EXPORT_SYMBOL_GPL(mas_empty_area_rev); + +static inline int mas_alloc(struct ma_state *mas, void *entry, + unsigned long size, unsigned long *index) +{ + unsigned long min; + + mas_start(mas); + if (mas_is_none(mas) || mas_is_ptr(mas)) { + mas_root_expand(mas, entry); + if (mas_is_err(mas)) + return xa_err(mas->node); + + if (!mas->index) + return mte_pivot(mas->node, 0); + return mte_pivot(mas->node, 1); + } + + /* Must be walking a tree. */ + mas_awalk(mas, size); + if (mas_is_err(mas)) + return xa_err(mas->node); + + if (mas->offset == MAPLE_NODE_SLOTS) + goto no_gap; + + /* + * At this point, mas->node points to the right node and we have an + * offset that has a sufficient gap. + */ + min = mas->min; + if (mas->offset) + min = mte_pivot(mas->node, mas->offset - 1) + 1; + + if (mas->index < min) + mas->index = min; + + mas_fill_gap(mas, entry, mas->offset, size, index); + return 0; + +no_gap: + return -EBUSY; +} + +static inline int mas_rev_alloc(struct ma_state *mas, unsigned long min, + unsigned long max, void *entry, + unsigned long size, unsigned long *index) +{ + int ret = 0; + + ret = mas_empty_area_rev(mas, min, max, size); + if (ret) + return ret; + + if (mas_is_err(mas)) + return xa_err(mas->node); + + if (mas->offset == MAPLE_NODE_SLOTS) + goto no_gap; + + mas_fill_gap(mas, entry, mas->offset, size, index); + return 0; + +no_gap: + return -EBUSY; +} + +/* + * mas_dead_leaves() - Mark all leaves of a node as dead. + * @mas: The maple state + * @slots: Pointer to the slot array + * + * Must hold the write lock. + * + * Return: The number of leaves marked as dead. + */ +static inline +unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots) +{ + struct maple_node *node; + enum maple_type type; + void *entry; + int offset; + + for (offset = 0; offset < mt_slot_count(mas->node); offset++) { + entry = mas_slot_locked(mas, slots, offset); + type = mte_node_type(entry); + node = mte_to_node(entry); + /* Use both node and type to catch LE & BE metadata */ + if (!node || !type) + break; + + mte_set_node_dead(entry); + smp_wmb(); /* Needed for RCU */ + node->type = type; + rcu_assign_pointer(slots[offset], node); + } + + return offset; +} + +static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offset) +{ + struct maple_node *node, *next; + void __rcu **slots = NULL; + + next = mas_mn(mas); + do { + mas->node = ma_enode_ptr(next); + node = mas_mn(mas); + slots = ma_slots(node, node->type); + next = mas_slot_locked(mas, slots, offset); + offset = 0; + } while (!ma_is_leaf(next->type)); + + return slots; +} + +static void mt_free_walk(struct rcu_head *head) +{ + void __rcu **slots; + struct maple_node *node, *start; + struct maple_tree mt; + unsigned char offset; + enum maple_type type; + MA_STATE(mas, &mt, 0, 0); + + node = container_of(head, struct maple_node, rcu); + + if (ma_is_leaf(node->type)) + goto free_leaf; + + mt_init_flags(&mt, node->ma_flags); + mas_lock(&mas); + start = node; + mas.node = mt_mk_node(node, node->type); + slots = mas_dead_walk(&mas, 0); + node = mas_mn(&mas); + do { + mt_free_bulk(node->slot_len, slots); + offset = node->parent_slot + 1; + mas.node = node->piv_parent; + if (mas_mn(&mas) == node) + goto start_slots_free; + + type = mte_node_type(mas.node); + slots = ma_slots(mte_to_node(mas.node), type); + if ((offset < mt_slots[type]) && (slots[offset])) + slots = mas_dead_walk(&mas, offset); + + node = mas_mn(&mas); + } while ((node != start) || (node->slot_len < offset)); + + slots = ma_slots(node, node->type); + mt_free_bulk(node->slot_len, slots); + +start_slots_free: + mas_unlock(&mas); +free_leaf: + mt_free_rcu(&node->rcu); +} + +static inline void __rcu **mas_destroy_descend(struct ma_state *mas, + struct maple_enode *prev, unsigned char offset) +{ + struct maple_node *node; + struct maple_enode *next = mas->node; + void __rcu **slots = NULL; + + do { + mas->node = next; + node = mas_mn(mas); + slots = ma_slots(node, mte_node_type(mas->node)); + next = mas_slot_locked(mas, slots, 0); + if ((mte_dead_node(next))) + next = mas_slot_locked(mas, slots, 1); + + mte_set_node_dead(mas->node); + node->type = mte_node_type(mas->node); + node->piv_parent = prev; + node->parent_slot = offset; + offset = 0; + prev = mas->node; + } while (!mte_is_leaf(next)); + + return slots; +} + +static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags, + bool free) +{ + void __rcu **slots; + struct maple_node *node = mte_to_node(enode); + struct maple_enode *start; + struct maple_tree mt; + + MA_STATE(mas, &mt, 0, 0); + + if (mte_is_leaf(enode)) + goto free_leaf; + + mt_init_flags(&mt, ma_flags); + mas_lock(&mas); + + mas.node = start = enode; + slots = mas_destroy_descend(&mas, start, 0); + node = mas_mn(&mas); + do { + enum maple_type type; + unsigned char offset; + struct maple_enode *parent, *tmp; + + node->slot_len = mas_dead_leaves(&mas, slots); + if (free) + mt_free_bulk(node->slot_len, slots); + offset = node->parent_slot + 1; + mas.node = node->piv_parent; + if (mas_mn(&mas) == node) + goto start_slots_free; + + type = mte_node_type(mas.node); + slots = ma_slots(mte_to_node(mas.node), type); + if (offset >= mt_slots[type]) + goto next; + + tmp = mas_slot_locked(&mas, slots, offset); + if (mte_node_type(tmp) && mte_to_node(tmp)) { + parent = mas.node; + mas.node = tmp; + slots = mas_destroy_descend(&mas, parent, offset); + } +next: + node = mas_mn(&mas); + } while (start != mas.node); + + node = mas_mn(&mas); + node->slot_len = mas_dead_leaves(&mas, slots); + if (free) + mt_free_bulk(node->slot_len, slots); + +start_slots_free: + mas_unlock(&mas); + +free_leaf: + if (free) + mt_free_rcu(&node->rcu); +} + +/* + * mte_destroy_walk() - Free a tree or sub-tree. + * @enode - the encoded maple node (maple_enode) to start + * @mn - the tree to free - needed for node types. + * + * Must hold the write lock. + */ +static inline void mte_destroy_walk(struct maple_enode *enode, + struct maple_tree *mt) +{ + struct maple_node *node = mte_to_node(enode); + + if (mt_in_rcu(mt)) { + mt_destroy_walk(enode, mt->ma_flags, false); + call_rcu(&node->rcu, mt_free_walk); + } else { + mt_destroy_walk(enode, mt->ma_flags, true); + } +} + +static void mas_wr_store_setup(struct ma_wr_state *wr_mas) +{ + if (!mas_is_start(wr_mas->mas)) { + if (mas_is_none(wr_mas->mas)) { + mas_reset(wr_mas->mas); + } else { + wr_mas->r_max = wr_mas->mas->max; + wr_mas->type = mte_node_type(wr_mas->mas->node); + if (mas_is_span_wr(wr_mas)) + mas_reset(wr_mas->mas); + } + } + +} + +/* Interface */ + +/** + * mas_store() - Store an @entry. + * @mas: The maple state. + * @entry: The entry to store. + * + * The @mas->index and @mas->last is used to set the range for the @entry. + * Note: The @mas should have pre-allocated entries to ensure there is memory to + * store the entry. Please see mas_expected_entries()/mas_destroy() for more details. + * + * Return: the first entry between mas->index and mas->last or %NULL. + */ +void *mas_store(struct ma_state *mas, void *entry) +{ + MA_WR_STATE(wr_mas, mas, entry); + + trace_ma_write(__func__, mas, 0, entry); +#ifdef CONFIG_DEBUG_MAPLE_TREE + if (mas->index > mas->last) + pr_err("Error %lu > %lu %p\n", mas->index, mas->last, entry); + MT_BUG_ON(mas->tree, mas->index > mas->last); + if (mas->index > mas->last) { + mas_set_err(mas, -EINVAL); + return NULL; + } + +#endif + + /* + * Storing is the same operation as insert with the added caveat that it + * can overwrite entries. Although this seems simple enough, one may + * want to examine what happens if a single store operation was to + * overwrite multiple entries within a self-balancing B-Tree. + */ + mas_wr_store_setup(&wr_mas); + mas_wr_store_entry(&wr_mas); + return wr_mas.content; +} +EXPORT_SYMBOL_GPL(mas_store); + +/** + * mas_store_gfp() - Store a value into the tree. + * @mas: The maple state + * @entry: The entry to store + * @gfp: The GFP_FLAGS to use for allocations if necessary. + * + * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not + * be allocated. + */ +int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp) +{ + MA_WR_STATE(wr_mas, mas, entry); + + mas_wr_store_setup(&wr_mas); + trace_ma_write(__func__, mas, 0, entry); +retry: + mas_wr_store_entry(&wr_mas); + if (unlikely(mas_nomem(mas, gfp))) + goto retry; + + if (unlikely(mas_is_err(mas))) + return xa_err(mas->node); + + return 0; +} +EXPORT_SYMBOL_GPL(mas_store_gfp); + +/** + * mas_store_prealloc() - Store a value into the tree using memory + * preallocated in the maple state. + * @mas: The maple state + * @entry: The entry to store. + */ +void mas_store_prealloc(struct ma_state *mas, void *entry) +{ + MA_WR_STATE(wr_mas, mas, entry); + + mas_wr_store_setup(&wr_mas); + trace_ma_write(__func__, mas, 0, entry); + mas_wr_store_entry(&wr_mas); + BUG_ON(mas_is_err(mas)); + mas_destroy(mas); +} +EXPORT_SYMBOL_GPL(mas_store_prealloc); + +/** + * mas_preallocate() - Preallocate enough nodes for a store operation + * @mas: The maple state + * @entry: The entry that will be stored + * @gfp: The GFP_FLAGS to use for allocations. + * + * Return: 0 on success, -ENOMEM if memory could not be allocated. + */ +int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) +{ + int ret; + + mas_node_count_gfp(mas, 1 + mas_mt_height(mas) * 3, gfp); + mas->mas_flags |= MA_STATE_PREALLOC; + if (likely(!mas_is_err(mas))) + return 0; + + mas_set_alloc_req(mas, 0); + ret = xa_err(mas->node); + mas_reset(mas); + mas_destroy(mas); + mas_reset(mas); + return ret; +} + +/* + * mas_destroy() - destroy a maple state. + * @mas: The maple state + * + * Upon completion, check the left-most node and rebalance against the node to + * the right if necessary. Frees any allocated nodes associated with this maple + * state. + */ +void mas_destroy(struct ma_state *mas) +{ + struct maple_alloc *node; + + /* + * When using mas_for_each() to insert an expected number of elements, + * it is possible that the number inserted is less than the expected + * number. To fix an invalid final node, a check is performed here to + * rebalance the previous node with the final node. + */ + if (mas->mas_flags & MA_STATE_REBALANCE) { + unsigned char end; + + if (mas_is_start(mas)) + mas_start(mas); + + mtree_range_walk(mas); + end = mas_data_end(mas) + 1; + if (end < mt_min_slot_count(mas->node) - 1) + mas_destroy_rebalance(mas, end); + + mas->mas_flags &= ~MA_STATE_REBALANCE; + } + mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC); + + while (mas->alloc && !((unsigned long)mas->alloc & 0x1)) { + node = mas->alloc; + mas->alloc = node->slot[0]; + if (node->node_count > 0) + mt_free_bulk(node->node_count, + (void __rcu **)&node->slot[1]); + kmem_cache_free(maple_node_cache, node); + } + mas->alloc = NULL; +} +EXPORT_SYMBOL_GPL(mas_destroy); + +/* + * mas_expected_entries() - Set the expected number of entries that will be inserted. + * @mas: The maple state + * @nr_entries: The number of expected entries. + * + * This will attempt to pre-allocate enough nodes to store the expected number + * of entries. The allocations will occur using the bulk allocator interface + * for speed. Please call mas_destroy() on the @mas after inserting the entries + * to ensure any unused nodes are freed. + * + * Return: 0 on success, -ENOMEM if memory could not be allocated. + */ +int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries) +{ + int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2; + struct maple_enode *enode = mas->node; + int nr_nodes; + int ret; + + /* + * Sometimes it is necessary to duplicate a tree to a new tree, such as + * forking a process and duplicating the VMAs from one tree to a new + * tree. When such a situation arises, it is known that the new tree is + * not going to be used until the entire tree is populated. For + * performance reasons, it is best to use a bulk load with RCU disabled. + * This allows for optimistic splitting that favours the left and reuse + * of nodes during the operation. + */ + + /* Optimize splitting for bulk insert in-order */ + mas->mas_flags |= MA_STATE_BULK; + + /* + * Avoid overflow, assume a gap between each entry and a trailing null. + * If this is wrong, it just means allocation can happen during + * insertion of entries. + */ + nr_nodes = max(nr_entries, nr_entries * 2 + 1); + if (!mt_is_alloc(mas->tree)) + nonleaf_cap = MAPLE_RANGE64_SLOTS - 2; + + /* Leaves; reduce slots to keep space for expansion */ + nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2); + /* Internal nodes */ + nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap); + /* Add working room for split (2 nodes) + new parents */ + mas_node_count(mas, nr_nodes + 3); + + /* Detect if allocations run out */ + mas->mas_flags |= MA_STATE_PREALLOC; + + if (!mas_is_err(mas)) + return 0; + + ret = xa_err(mas->node); + mas->node = enode; + mas_destroy(mas); + return ret; + +} +EXPORT_SYMBOL_GPL(mas_expected_entries); + +/** + * mas_next() - Get the next entry. + * @mas: The maple state + * @max: The maximum index to check. + * + * Returns the next entry after @mas->index. + * Must hold rcu_read_lock or the write lock. + * Can return the zero entry. + * + * Return: The next entry or %NULL + */ +void *mas_next(struct ma_state *mas, unsigned long max) +{ + if (mas_is_none(mas) || mas_is_paused(mas)) + mas->node = MAS_START; + + if (mas_is_start(mas)) + mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ + + if (mas_is_ptr(mas)) { + if (!mas->index) { + mas->index = 1; + mas->last = ULONG_MAX; + } + return NULL; + } + + if (mas->last == ULONG_MAX) + return NULL; + + /* Retries on dead nodes handled by mas_next_entry */ + return mas_next_entry(mas, max); +} +EXPORT_SYMBOL_GPL(mas_next); + +/** + * mt_next() - get the next value in the maple tree + * @mt: The maple tree + * @index: The start index + * @max: The maximum index to check + * + * Return: The entry at @index or higher, or %NULL if nothing is found. + */ +void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max) +{ + void *entry = NULL; + MA_STATE(mas, mt, index, index); + + rcu_read_lock(); + entry = mas_next(&mas, max); + rcu_read_unlock(); + return entry; +} +EXPORT_SYMBOL_GPL(mt_next); + +/** + * mas_prev() - Get the previous entry + * @mas: The maple state + * @min: The minimum value to check. + * + * Must hold rcu_read_lock or the write lock. + * Will reset mas to MAS_START if the node is MAS_NONE. Will stop on not + * searchable nodes. + * + * Return: the previous value or %NULL. + */ +void *mas_prev(struct ma_state *mas, unsigned long min) +{ + if (!mas->index) { + /* Nothing comes before 0 */ + mas->last = 0; + return NULL; + } + + if (unlikely(mas_is_ptr(mas))) + return NULL; + + if (mas_is_none(mas) || mas_is_paused(mas)) + mas->node = MAS_START; + + if (mas_is_start(mas)) { + mas_walk(mas); + if (!mas->index) + return NULL; + } + + if (mas_is_ptr(mas)) { + if (!mas->index) { + mas->last = 0; + return NULL; + } + + mas->index = mas->last = 0; + return mas_root_locked(mas); + } + return mas_prev_entry(mas, min); +} +EXPORT_SYMBOL_GPL(mas_prev); + +/** + * mt_prev() - get the previous value in the maple tree + * @mt: The maple tree + * @index: The start index + * @min: The minimum index to check + * + * Return: The entry at @index or lower, or %NULL if nothing is found. + */ +void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min) +{ + void *entry = NULL; + MA_STATE(mas, mt, index, index); + + rcu_read_lock(); + entry = mas_prev(&mas, min); + rcu_read_unlock(); + return entry; +} +EXPORT_SYMBOL_GPL(mt_prev); + +/** + * mas_pause() - Pause a mas_find/mas_for_each to drop the lock. + * @mas: The maple state to pause + * + * Some users need to pause a walk and drop the lock they're holding in + * order to yield to a higher priority thread or carry out an operation + * on an entry. Those users should call this function before they drop + * the lock. It resets the @mas to be suitable for the next iteration + * of the loop after the user has reacquired the lock. If most entries + * found during a walk require you to call mas_pause(), the mt_for_each() + * iterator may be more appropriate. + * + */ +void mas_pause(struct ma_state *mas) +{ + mas->node = MAS_PAUSE; +} +EXPORT_SYMBOL_GPL(mas_pause); + +/** + * mas_find() - On the first call, find the entry at or after mas->index up to + * %max. Otherwise, find the entry after mas->index. + * @mas: The maple state + * @max: The maximum value to check. + * + * Must hold rcu_read_lock or the write lock. + * If an entry exists, last and index are updated accordingly. + * May set @mas->node to MAS_NONE. + * + * Return: The entry or %NULL. + */ +void *mas_find(struct ma_state *mas, unsigned long max) +{ + if (unlikely(mas_is_paused(mas))) { + if (unlikely(mas->last == ULONG_MAX)) { + mas->node = MAS_NONE; + return NULL; + } + mas->node = MAS_START; + mas->index = ++mas->last; + } + + if (unlikely(mas_is_start(mas))) { + /* First run or continue */ + void *entry; + + if (mas->index > max) + return NULL; + + entry = mas_walk(mas); + if (entry) + return entry; + } + + if (unlikely(!mas_searchable(mas))) + return NULL; + + /* Retries on dead nodes handled by mas_next_entry */ + return mas_next_entry(mas, max); +} +EXPORT_SYMBOL_GPL(mas_find); + +/** + * mas_find_rev: On the first call, find the first non-null entry at or below + * mas->index down to %min. Otherwise find the first non-null entry below + * mas->index down to %min. + * @mas: The maple state + * @min: The minimum value to check. + * + * Must hold rcu_read_lock or the write lock. + * If an entry exists, last and index are updated accordingly. + * May set @mas->node to MAS_NONE. + * + * Return: The entry or %NULL. + */ +void *mas_find_rev(struct ma_state *mas, unsigned long min) +{ + if (unlikely(mas_is_paused(mas))) { + if (unlikely(mas->last == ULONG_MAX)) { + mas->node = MAS_NONE; + return NULL; + } + mas->node = MAS_START; + mas->last = --mas->index; + } + + if (unlikely(mas_is_start(mas))) { + /* First run or continue */ + void *entry; + + if (mas->index < min) + return NULL; + + entry = mas_walk(mas); + if (entry) + return entry; + } + + if (unlikely(!mas_searchable(mas))) + return NULL; + + if (mas->index < min) + return NULL; + + /* Retries on dead nodes handled by mas_next_entry */ + return mas_prev_entry(mas, min); +} +EXPORT_SYMBOL_GPL(mas_find_rev); + +/** + * mas_erase() - Find the range in which index resides and erase the entire + * range. + * @mas: The maple state + * + * Must hold the write lock. + * Searches for @mas->index, sets @mas->index and @mas->last to the range and + * erases that range. + * + * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated. + */ +void *mas_erase(struct ma_state *mas) +{ + void *entry; + MA_WR_STATE(wr_mas, mas, NULL); + + if (mas_is_none(mas) || mas_is_paused(mas)) + mas->node = MAS_START; + + /* Retry unnecessary when holding the write lock. */ + entry = mas_state_walk(mas); + if (!entry) + return NULL; + +write_retry: + /* Must reset to ensure spanning writes of last slot are detected */ + mas_reset(mas); + mas_wr_store_setup(&wr_mas); + mas_wr_store_entry(&wr_mas); + if (mas_nomem(mas, GFP_KERNEL)) + goto write_retry; + + return entry; +} +EXPORT_SYMBOL_GPL(mas_erase); + +/** + * mas_nomem() - Check if there was an error allocating and do the allocation + * if necessary If there are allocations, then free them. + * @mas: The maple state + * @gfp: The GFP_FLAGS to use for allocations + * Return: true on allocation, false otherwise. + */ +bool mas_nomem(struct ma_state *mas, gfp_t gfp) + __must_hold(mas->tree->lock) +{ + if (likely(mas->node != MA_ERROR(-ENOMEM))) { + mas_destroy(mas); + return false; + } + + if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) { + mtree_unlock(mas->tree); + mas_alloc_nodes(mas, gfp); + mtree_lock(mas->tree); + } else { + mas_alloc_nodes(mas, gfp); + } + + if (!mas_allocated(mas)) + return false; + + mas->node = MAS_START; + return true; +} + +void __init maple_tree_init(void) +{ + maple_node_cache = kmem_cache_create("maple_node", + sizeof(struct maple_node), sizeof(struct maple_node), + SLAB_PANIC, NULL); +} + +/** + * mtree_load() - Load a value stored in a maple tree + * @mt: The maple tree + * @index: The index to load + * + * Return: the entry or %NULL + */ +void *mtree_load(struct maple_tree *mt, unsigned long index) +{ + MA_STATE(mas, mt, index, index); + void *entry; + + trace_ma_read(__func__, &mas); + rcu_read_lock(); +retry: + entry = mas_start(&mas); + if (unlikely(mas_is_none(&mas))) + goto unlock; + + if (unlikely(mas_is_ptr(&mas))) { + if (index) + entry = NULL; + + goto unlock; + } + + entry = mtree_lookup_walk(&mas); + if (!entry && unlikely(mas_is_start(&mas))) + goto retry; +unlock: + rcu_read_unlock(); + if (xa_is_zero(entry)) + return NULL; + + return entry; +} +EXPORT_SYMBOL(mtree_load); + +/** + * mtree_store_range() - Store an entry at a given range. + * @mt: The maple tree + * @index: The start of the range + * @last: The end of the range + * @entry: The entry to store + * @gfp: The GFP_FLAGS to use for allocations + * + * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not + * be allocated. + */ +int mtree_store_range(struct maple_tree *mt, unsigned long index, + unsigned long last, void *entry, gfp_t gfp) +{ + MA_STATE(mas, mt, index, last); + MA_WR_STATE(wr_mas, &mas, entry); + + trace_ma_write(__func__, &mas, 0, entry); + if (WARN_ON_ONCE(xa_is_advanced(entry))) + return -EINVAL; + + if (index > last) + return -EINVAL; + + mtree_lock(mt); +retry: + mas_wr_store_entry(&wr_mas); + if (mas_nomem(&mas, gfp)) + goto retry; + + mtree_unlock(mt); + if (mas_is_err(&mas)) + return xa_err(mas.node); + + return 0; +} +EXPORT_SYMBOL(mtree_store_range); + +/** + * mtree_store() - Store an entry at a given index. + * @mt: The maple tree + * @index: The index to store the value + * @entry: The entry to store + * @gfp: The GFP_FLAGS to use for allocations + * + * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not + * be allocated. + */ +int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, + gfp_t gfp) +{ + return mtree_store_range(mt, index, index, entry, gfp); +} +EXPORT_SYMBOL(mtree_store); + +/** + * mtree_insert_range() - Insert an entry at a give range if there is no value. + * @mt: The maple tree + * @first: The start of the range + * @last: The end of the range + * @entry: The entry to store + * @gfp: The GFP_FLAGS to use for allocations. + * + * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid + * request, -ENOMEM if memory could not be allocated. + */ +int mtree_insert_range(struct maple_tree *mt, unsigned long first, + unsigned long last, void *entry, gfp_t gfp) +{ + MA_STATE(ms, mt, first, last); + + if (WARN_ON_ONCE(xa_is_advanced(entry))) + return -EINVAL; + + if (first > last) + return -EINVAL; + + mtree_lock(mt); +retry: + mas_insert(&ms, entry); + if (mas_nomem(&ms, gfp)) + goto retry; + + mtree_unlock(mt); + if (mas_is_err(&ms)) + return xa_err(ms.node); + + return 0; +} +EXPORT_SYMBOL(mtree_insert_range); + +/** + * mtree_insert() - Insert an entry at a give index if there is no value. + * @mt: The maple tree + * @index : The index to store the value + * @entry: The entry to store + * @gfp: The FGP_FLAGS to use for allocations. + * + * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid + * request, -ENOMEM if memory could not be allocated. + */ +int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, + gfp_t gfp) +{ + return mtree_insert_range(mt, index, index, entry, gfp); +} +EXPORT_SYMBOL(mtree_insert); + +int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, + void *entry, unsigned long size, unsigned long min, + unsigned long max, gfp_t gfp) +{ + int ret = 0; + + MA_STATE(mas, mt, min, max - size); + if (!mt_is_alloc(mt)) + return -EINVAL; + + if (WARN_ON_ONCE(mt_is_reserved(entry))) + return -EINVAL; + + if (min > max) + return -EINVAL; + + if (max < size) + return -EINVAL; + + if (!size) + return -EINVAL; + + mtree_lock(mt); +retry: + mas.offset = 0; + mas.index = min; + mas.last = max - size; + ret = mas_alloc(&mas, entry, size, startp); + if (mas_nomem(&mas, gfp)) + goto retry; + + mtree_unlock(mt); + return ret; +} +EXPORT_SYMBOL(mtree_alloc_range); + +int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, + void *entry, unsigned long size, unsigned long min, + unsigned long max, gfp_t gfp) +{ + int ret = 0; + + MA_STATE(mas, mt, min, max - size); + if (!mt_is_alloc(mt)) + return -EINVAL; + + if (WARN_ON_ONCE(mt_is_reserved(entry))) + return -EINVAL; + + if (min >= max) + return -EINVAL; + + if (max < size - 1) + return -EINVAL; + + if (!size) + return -EINVAL; + + mtree_lock(mt); +retry: + ret = mas_rev_alloc(&mas, min, max, entry, size, startp); + if (mas_nomem(&mas, gfp)) + goto retry; + + mtree_unlock(mt); + return ret; +} +EXPORT_SYMBOL(mtree_alloc_rrange); + +/** + * mtree_erase() - Find an index and erase the entire range. + * @mt: The maple tree + * @index: The index to erase + * + * Erasing is the same as a walk to an entry then a store of a NULL to that + * ENTIRE range. In fact, it is implemented as such using the advanced API. + * + * Return: The entry stored at the @index or %NULL + */ +void *mtree_erase(struct maple_tree *mt, unsigned long index) +{ + void *entry = NULL; + + MA_STATE(mas, mt, index, index); + trace_ma_op(__func__, &mas); + + mtree_lock(mt); + entry = mas_erase(&mas); + mtree_unlock(mt); + + return entry; +} +EXPORT_SYMBOL(mtree_erase); + +/** + * __mt_destroy() - Walk and free all nodes of a locked maple tree. + * @mt: The maple tree + * + * Note: Does not handle locking. + */ +void __mt_destroy(struct maple_tree *mt) +{ + void *root = mt_root_locked(mt); + + rcu_assign_pointer(mt->ma_root, NULL); + if (xa_is_node(root)) + mte_destroy_walk(root, mt); + + mt->ma_flags = 0; +} +EXPORT_SYMBOL_GPL(__mt_destroy); + +/** + * mtree_destroy() - Destroy a maple tree + * @mt: The maple tree + * + * Frees all resources used by the tree. Handles locking. + */ +void mtree_destroy(struct maple_tree *mt) +{ + mtree_lock(mt); + __mt_destroy(mt); + mtree_unlock(mt); +} +EXPORT_SYMBOL(mtree_destroy); + +/** + * mt_find() - Search from the start up until an entry is found. + * @mt: The maple tree + * @index: Pointer which contains the start location of the search + * @max: The maximum value to check + * + * Handles locking. @index will be incremented to one beyond the range. + * + * Return: The entry at or after the @index or %NULL + */ +void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max) +{ + MA_STATE(mas, mt, *index, *index); + void *entry; +#ifdef CONFIG_DEBUG_MAPLE_TREE + unsigned long copy = *index; +#endif + + trace_ma_read(__func__, &mas); + + if ((*index) > max) + return NULL; + + rcu_read_lock(); +retry: + entry = mas_state_walk(&mas); + if (mas_is_start(&mas)) + goto retry; + + if (unlikely(xa_is_zero(entry))) + entry = NULL; + + if (entry) + goto unlock; + + while (mas_searchable(&mas) && (mas.index < max)) { + entry = mas_next_entry(&mas, max); + if (likely(entry && !xa_is_zero(entry))) + break; + } + + if (unlikely(xa_is_zero(entry))) + entry = NULL; +unlock: + rcu_read_unlock(); + if (likely(entry)) { + *index = mas.last + 1; +#ifdef CONFIG_DEBUG_MAPLE_TREE + if ((*index) && (*index) <= copy) + pr_err("index not increased! %lx <= %lx\n", + *index, copy); + MT_BUG_ON(mt, (*index) && ((*index) <= copy)); +#endif + } + + return entry; +} +EXPORT_SYMBOL(mt_find); + +/** + * mt_find_after() - Search from the start up until an entry is found. + * @mt: The maple tree + * @index: Pointer which contains the start location of the search + * @max: The maximum value to check + * + * Handles locking, detects wrapping on index == 0 + * + * Return: The entry at or after the @index or %NULL + */ +void *mt_find_after(struct maple_tree *mt, unsigned long *index, + unsigned long max) +{ + if (!(*index)) + return NULL; + + return mt_find(mt, index, max); +} +EXPORT_SYMBOL(mt_find_after); + +#ifdef CONFIG_DEBUG_MAPLE_TREE +atomic_t maple_tree_tests_run; +EXPORT_SYMBOL_GPL(maple_tree_tests_run); +atomic_t maple_tree_tests_passed; +EXPORT_SYMBOL_GPL(maple_tree_tests_passed); + +#ifndef __KERNEL__ +extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int); +void mt_set_non_kernel(unsigned int val) +{ + kmem_cache_set_non_kernel(maple_node_cache, val); +} + +extern unsigned long kmem_cache_get_alloc(struct kmem_cache *); +unsigned long mt_get_alloc_size(void) +{ + return kmem_cache_get_alloc(maple_node_cache); +} + +extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *); +void mt_zero_nr_tallocated(void) +{ + kmem_cache_zero_nr_tallocated(maple_node_cache); +} + +extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *); +unsigned int mt_nr_tallocated(void) +{ + return kmem_cache_nr_tallocated(maple_node_cache); +} + +extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *); +unsigned int mt_nr_allocated(void) +{ + return kmem_cache_nr_allocated(maple_node_cache); +} + +/* + * mas_dead_node() - Check if the maple state is pointing to a dead node. + * @mas: The maple state + * @index: The index to restore in @mas. + * + * Used in test code. + * Return: 1 if @mas has been reset to MAS_START, 0 otherwise. + */ +static inline int mas_dead_node(struct ma_state *mas, unsigned long index) +{ + if (unlikely(!mas_searchable(mas) || mas_is_start(mas))) + return 0; + + if (likely(!mte_dead_node(mas->node))) + return 0; + + mas_rewalk(mas, index); + return 1; +} + +void mt_cache_shrink(void) +{ +} +#else +/* + * mt_cache_shrink() - For testing, don't use this. + * + * Certain testcases can trigger an OOM when combined with other memory + * debugging configuration options. This function is used to reduce the + * possibility of an out of memory even due to kmem_cache objects remaining + * around for longer than usual. + */ +void mt_cache_shrink(void) +{ + kmem_cache_shrink(maple_node_cache); + +} +EXPORT_SYMBOL_GPL(mt_cache_shrink); + +#endif /* not defined __KERNEL__ */ +/* + * mas_get_slot() - Get the entry in the maple state node stored at @offset. + * @mas: The maple state + * @offset: The offset into the slot array to fetch. + * + * Return: The entry stored at @offset. + */ +static inline struct maple_enode *mas_get_slot(struct ma_state *mas, + unsigned char offset) +{ + return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)), + offset); +} + + +/* + * mas_first_entry() - Go the first leaf and find the first entry. + * @mas: the maple state. + * @limit: the maximum index to check. + * @*r_start: Pointer to set to the range start. + * + * Sets mas->offset to the offset of the entry, r_start to the range minimum. + * + * Return: The first entry or MAS_NONE. + */ +static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn, + unsigned long limit, enum maple_type mt) + +{ + unsigned long max; + unsigned long *pivots; + void __rcu **slots; + void *entry = NULL; + + mas->index = mas->min; + if (mas->index > limit) + goto none; + + max = mas->max; + mas->offset = 0; + while (likely(!ma_is_leaf(mt))) { + MT_BUG_ON(mas->tree, mte_dead_node(mas->node)); + slots = ma_slots(mn, mt); + pivots = ma_pivots(mn, mt); + max = pivots[0]; + entry = mas_slot(mas, slots, 0); + if (unlikely(ma_dead_node(mn))) + return NULL; + mas->node = entry; + mn = mas_mn(mas); + mt = mte_node_type(mas->node); + } + MT_BUG_ON(mas->tree, mte_dead_node(mas->node)); + + mas->max = max; + slots = ma_slots(mn, mt); + entry = mas_slot(mas, slots, 0); + if (unlikely(ma_dead_node(mn))) + return NULL; + + /* Slot 0 or 1 must be set */ + if (mas->index > limit) + goto none; + + if (likely(entry)) + return entry; + + pivots = ma_pivots(mn, mt); + mas->index = pivots[0] + 1; + mas->offset = 1; + entry = mas_slot(mas, slots, 1); + if (unlikely(ma_dead_node(mn))) + return NULL; + + if (mas->index > limit) + goto none; + + if (likely(entry)) + return entry; + +none: + if (likely(!ma_dead_node(mn))) + mas->node = MAS_NONE; + return NULL; +} + +/* Depth first search, post-order */ +static void mas_dfs_postorder(struct ma_state *mas, unsigned long max) +{ + + struct maple_enode *p = MAS_NONE, *mn = mas->node; + unsigned long p_min, p_max; + + mas_next_node(mas, mas_mn(mas), max); + if (!mas_is_none(mas)) + return; + + if (mte_is_root(mn)) + return; + + mas->node = mn; + mas_ascend(mas); + while (mas->node != MAS_NONE) { + p = mas->node; + p_min = mas->min; + p_max = mas->max; + mas_prev_node(mas, 0); + } + + if (p == MAS_NONE) + return; + + mas->node = p; + mas->max = p_max; + mas->min = p_min; +} + +/* Tree validations */ +static void mt_dump_node(const struct maple_tree *mt, void *entry, + unsigned long min, unsigned long max, unsigned int depth); +static void mt_dump_range(unsigned long min, unsigned long max, + unsigned int depth) +{ + static const char spaces[] = " "; + + if (min == max) + pr_info("%.*s%lu: ", depth * 2, spaces, min); + else + pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max); +} + +static void mt_dump_entry(void *entry, unsigned long min, unsigned long max, + unsigned int depth) +{ + mt_dump_range(min, max, depth); + + if (xa_is_value(entry)) + pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry), + xa_to_value(entry), entry); + else if (xa_is_zero(entry)) + pr_cont("zero (%ld)\n", xa_to_internal(entry)); + else if (mt_is_reserved(entry)) + pr_cont("UNKNOWN ENTRY (%p)\n", entry); + else + pr_cont("%p\n", entry); +} + +static void mt_dump_range64(const struct maple_tree *mt, void *entry, + unsigned long min, unsigned long max, unsigned int depth) +{ + struct maple_range_64 *node = &mte_to_node(entry)->mr64; + bool leaf = mte_is_leaf(entry); + unsigned long first = min; + int i; + + pr_cont(" contents: "); + for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) + pr_cont("%p %lu ", node->slot[i], node->pivot[i]); + pr_cont("%p\n", node->slot[i]); + for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) { + unsigned long last = max; + + if (i < (MAPLE_RANGE64_SLOTS - 1)) + last = node->pivot[i]; + else if (!node->slot[i] && max != mt_max[mte_node_type(entry)]) + break; + if (last == 0 && i > 0) + break; + if (leaf) + mt_dump_entry(mt_slot(mt, node->slot, i), + first, last, depth + 1); + else if (node->slot[i]) + mt_dump_node(mt, mt_slot(mt, node->slot, i), + first, last, depth + 1); + + if (last == max) + break; + if (last > max) { + pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", + node, last, max, i); + break; + } + first = last + 1; + } +} + +static void mt_dump_arange64(const struct maple_tree *mt, void *entry, + unsigned long min, unsigned long max, unsigned int depth) +{ + struct maple_arange_64 *node = &mte_to_node(entry)->ma64; + bool leaf = mte_is_leaf(entry); + unsigned long first = min; + int i; + + pr_cont(" contents: "); + for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) + pr_cont("%lu ", node->gap[i]); + pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap); + for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) + pr_cont("%p %lu ", node->slot[i], node->pivot[i]); + pr_cont("%p\n", node->slot[i]); + for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) { + unsigned long last = max; + + if (i < (MAPLE_ARANGE64_SLOTS - 1)) + last = node->pivot[i]; + else if (!node->slot[i]) + break; + if (last == 0 && i > 0) + break; + if (leaf) + mt_dump_entry(mt_slot(mt, node->slot, i), + first, last, depth + 1); + else if (node->slot[i]) + mt_dump_node(mt, mt_slot(mt, node->slot, i), + first, last, depth + 1); + + if (last == max) + break; + if (last > max) { + pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n", + node, last, max, i); + break; + } + first = last + 1; + } +} + +static void mt_dump_node(const struct maple_tree *mt, void *entry, + unsigned long min, unsigned long max, unsigned int depth) +{ + struct maple_node *node = mte_to_node(entry); + unsigned int type = mte_node_type(entry); + unsigned int i; + + mt_dump_range(min, max, depth); + + pr_cont("node %p depth %d type %d parent %p", node, depth, type, + node ? node->parent : NULL); + switch (type) { + case maple_dense: + pr_cont("\n"); + for (i = 0; i < MAPLE_NODE_SLOTS; i++) { + if (min + i > max) + pr_cont("OUT OF RANGE: "); + mt_dump_entry(mt_slot(mt, node->slot, i), + min + i, min + i, depth); + } + break; + case maple_leaf_64: + case maple_range_64: + mt_dump_range64(mt, entry, min, max, depth); + break; + case maple_arange_64: + mt_dump_arange64(mt, entry, min, max, depth); + break; + + default: + pr_cont(" UNKNOWN TYPE\n"); + } +} + +void mt_dump(const struct maple_tree *mt) +{ + void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt)); + + pr_info("maple_tree(%p) flags %X, height %u root %p\n", + mt, mt->ma_flags, mt_height(mt), entry); + if (!xa_is_node(entry)) + mt_dump_entry(entry, 0, 0, 0); + else if (entry) + mt_dump_node(mt, entry, 0, mt_max[mte_node_type(entry)], 0); +} +EXPORT_SYMBOL_GPL(mt_dump); + +/* + * Calculate the maximum gap in a node and check if that's what is reported in + * the parent (unless root). + */ +static void mas_validate_gaps(struct ma_state *mas) +{ + struct maple_enode *mte = mas->node; + struct maple_node *p_mn; + unsigned long gap = 0, max_gap = 0; + unsigned long p_end, p_start = mas->min; + unsigned char p_slot; + unsigned long *gaps = NULL; + unsigned long *pivots = ma_pivots(mte_to_node(mte), mte_node_type(mte)); + int i; + + if (ma_is_dense(mte_node_type(mte))) { + for (i = 0; i < mt_slot_count(mte); i++) { + if (mas_get_slot(mas, i)) { + if (gap > max_gap) + max_gap = gap; + gap = 0; + continue; + } + gap++; + } + goto counted; + } + + gaps = ma_gaps(mte_to_node(mte), mte_node_type(mte)); + for (i = 0; i < mt_slot_count(mte); i++) { + p_end = mas_logical_pivot(mas, pivots, i, mte_node_type(mte)); + + if (!gaps) { + if (mas_get_slot(mas, i)) { + gap = 0; + goto not_empty; + } + + gap += p_end - p_start + 1; + } else { + void *entry = mas_get_slot(mas, i); + + gap = gaps[i]; + if (!entry) { + if (gap != p_end - p_start + 1) { + pr_err("%p[%u] -> %p %lu != %lu - %lu + 1\n", + mas_mn(mas), i, + mas_get_slot(mas, i), gap, + p_end, p_start); + mt_dump(mas->tree); + + MT_BUG_ON(mas->tree, + gap != p_end - p_start + 1); + } + } else { + if (gap > p_end - p_start + 1) { + pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n", + mas_mn(mas), i, gap, p_end, p_start, + p_end - p_start + 1); + MT_BUG_ON(mas->tree, + gap > p_end - p_start + 1); + } + } + } + + if (gap > max_gap) + max_gap = gap; +not_empty: + p_start = p_end + 1; + if (p_end >= mas->max) + break; + } + +counted: + if (mte_is_root(mte)) + return; + + p_slot = mte_parent_slot(mas->node); + p_mn = mte_parent(mte); + MT_BUG_ON(mas->tree, max_gap > mas->max); + if (ma_gaps(p_mn, mas_parent_enum(mas, mte))[p_slot] != max_gap) { + pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap); + mt_dump(mas->tree); + } + + MT_BUG_ON(mas->tree, + ma_gaps(p_mn, mas_parent_enum(mas, mte))[p_slot] != max_gap); +} + +static void mas_validate_parent_slot(struct ma_state *mas) +{ + struct maple_node *parent; + struct maple_enode *node; + enum maple_type p_type = mas_parent_enum(mas, mas->node); + unsigned char p_slot = mte_parent_slot(mas->node); + void __rcu **slots; + int i; + + if (mte_is_root(mas->node)) + return; + + parent = mte_parent(mas->node); + slots = ma_slots(parent, p_type); + MT_BUG_ON(mas->tree, mas_mn(mas) == parent); + + /* Check prev/next parent slot for duplicate node entry */ + + for (i = 0; i < mt_slots[p_type]; i++) { + node = mas_slot(mas, slots, i); + if (i == p_slot) { + if (node != mas->node) + pr_err("parent %p[%u] does not have %p\n", + parent, i, mas_mn(mas)); + MT_BUG_ON(mas->tree, node != mas->node); + } else if (node == mas->node) { + pr_err("Invalid child %p at parent %p[%u] p_slot %u\n", + mas_mn(mas), parent, i, p_slot); + MT_BUG_ON(mas->tree, node == mas->node); + } + } +} + +static void mas_validate_child_slot(struct ma_state *mas) +{ + enum maple_type type = mte_node_type(mas->node); + void __rcu **slots = ma_slots(mte_to_node(mas->node), type); + unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type); + struct maple_enode *child; + unsigned char i; + + if (mte_is_leaf(mas->node)) + return; + + for (i = 0; i < mt_slots[type]; i++) { + child = mas_slot(mas, slots, i); + if (!pivots[i] || pivots[i] == mas->max) + break; + + if (!child) + break; + + if (mte_parent_slot(child) != i) { + pr_err("Slot error at %p[%u]: child %p has pslot %u\n", + mas_mn(mas), i, mte_to_node(child), + mte_parent_slot(child)); + MT_BUG_ON(mas->tree, 1); + } + + if (mte_parent(child) != mte_to_node(mas->node)) { + pr_err("child %p has parent %p not %p\n", + mte_to_node(child), mte_parent(child), + mte_to_node(mas->node)); + MT_BUG_ON(mas->tree, 1); + } + } +} + +/* + * Validate all pivots are within mas->min and mas->max. + */ +static void mas_validate_limits(struct ma_state *mas) +{ + int i; + unsigned long prev_piv = 0; + enum maple_type type = mte_node_type(mas->node); + void __rcu **slots = ma_slots(mte_to_node(mas->node), type); + unsigned long *pivots = ma_pivots(mas_mn(mas), type); + + /* all limits are fine here. */ + if (mte_is_root(mas->node)) + return; + + for (i = 0; i < mt_slots[type]; i++) { + unsigned long piv; + + piv = mas_safe_pivot(mas, pivots, i, type); + + if (!piv && (i != 0)) + break; + + if (!mte_is_leaf(mas->node)) { + void *entry = mas_slot(mas, slots, i); + + if (!entry) + pr_err("%p[%u] cannot be null\n", + mas_mn(mas), i); + + MT_BUG_ON(mas->tree, !entry); + } + + if (prev_piv > piv) { + pr_err("%p[%u] piv %lu < prev_piv %lu\n", + mas_mn(mas), i, piv, prev_piv); + MT_BUG_ON(mas->tree, piv < prev_piv); + } + + if (piv < mas->min) { + pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i, + piv, mas->min); + MT_BUG_ON(mas->tree, piv < mas->min); + } + if (piv > mas->max) { + pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i, + piv, mas->max); + MT_BUG_ON(mas->tree, piv > mas->max); + } + prev_piv = piv; + if (piv == mas->max) + break; + } + for (i += 1; i < mt_slots[type]; i++) { + void *entry = mas_slot(mas, slots, i); + + if (entry && (i != mt_slots[type] - 1)) { + pr_err("%p[%u] should not have entry %p\n", mas_mn(mas), + i, entry); + MT_BUG_ON(mas->tree, entry != NULL); + } + + if (i < mt_pivots[type]) { + unsigned long piv = pivots[i]; + + if (!piv) + continue; + + pr_err("%p[%u] should not have piv %lu\n", + mas_mn(mas), i, piv); + MT_BUG_ON(mas->tree, i < mt_pivots[type] - 1); + } + } +} + +static void mt_validate_nulls(struct maple_tree *mt) +{ + void *entry, *last = (void *)1; + unsigned char offset = 0; + void __rcu **slots; + MA_STATE(mas, mt, 0, 0); + + mas_start(&mas); + if (mas_is_none(&mas) || (mas.node == MAS_ROOT)) + return; + + while (!mte_is_leaf(mas.node)) + mas_descend(&mas); + + slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node)); + do { + entry = mas_slot(&mas, slots, offset); + if (!last && !entry) { + pr_err("Sequential nulls end at %p[%u]\n", + mas_mn(&mas), offset); + } + MT_BUG_ON(mt, !last && !entry); + last = entry; + if (offset == mas_data_end(&mas)) { + mas_next_node(&mas, mas_mn(&mas), ULONG_MAX); + if (mas_is_none(&mas)) + return; + offset = 0; + slots = ma_slots(mte_to_node(mas.node), + mte_node_type(mas.node)); + } else { + offset++; + } + + } while (!mas_is_none(&mas)); +} + +/* + * validate a maple tree by checking: + * 1. The limits (pivots are within mas->min to mas->max) + * 2. The gap is correctly set in the parents + */ +void mt_validate(struct maple_tree *mt) +{ + unsigned char end; + + MA_STATE(mas, mt, 0, 0); + rcu_read_lock(); + mas_start(&mas); + if (!mas_searchable(&mas)) + goto done; + + mas_first_entry(&mas, mas_mn(&mas), ULONG_MAX, mte_node_type(mas.node)); + while (!mas_is_none(&mas)) { + MT_BUG_ON(mas.tree, mte_dead_node(mas.node)); + if (!mte_is_root(mas.node)) { + end = mas_data_end(&mas); + if ((end < mt_min_slot_count(mas.node)) && + (mas.max != ULONG_MAX)) { + pr_err("Invalid size %u of %p\n", end, + mas_mn(&mas)); + MT_BUG_ON(mas.tree, 1); + } + + } + mas_validate_parent_slot(&mas); + mas_validate_child_slot(&mas); + mas_validate_limits(&mas); + if (mt_is_alloc(mt)) + mas_validate_gaps(&mas); + mas_dfs_postorder(&mas, ULONG_MAX); + } + mt_validate_nulls(mt); +done: + rcu_read_unlock(); + +} +EXPORT_SYMBOL_GPL(mt_validate); + +#endif /* CONFIG_DEBUG_MAPLE_TREE */ diff --git a/lib/math/div64.c b/lib/math/div64.c index 46866394fc84..55a81782e271 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -63,12 +63,6 @@ uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base) EXPORT_SYMBOL(__div64_32); #endif -/** - * div_s64_rem - signed 64bit divide with 64bit divisor and remainder - * @dividend: 64bit dividend - * @divisor: 64bit divisor - * @remainder: 64bit remainder - */ #ifndef div_s64_rem s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) { @@ -89,7 +83,7 @@ s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) EXPORT_SYMBOL(div_s64_rem); #endif -/** +/* * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder * @dividend: 64bit dividend * @divisor: 64bit divisor @@ -129,7 +123,7 @@ u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) EXPORT_SYMBOL(div64_u64_rem); #endif -/** +/* * div64_u64 - unsigned 64bit divide with 64bit divisor * @dividend: 64bit dividend * @divisor: 64bit divisor @@ -163,11 +157,6 @@ u64 div64_u64(u64 dividend, u64 divisor) EXPORT_SYMBOL(div64_u64); #endif -/** - * div64_s64 - signed 64bit divide with 64bit divisor - * @dividend: 64bit dividend - * @divisor: 64bit divisor - */ #ifndef div64_s64 s64 div64_s64(s64 dividend, s64 divisor) { diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c index 62f8ffcbbaa3..89128551448d 100644 --- a/lib/memcpy_kunit.c +++ b/lib/memcpy_kunit.c @@ -29,9 +29,8 @@ struct some_bytes { }; #define check(instance, v) do { \ - int i; \ BUILD_BUG_ON(sizeof(instance.data) != 32); \ - for (i = 0; i < sizeof(instance.data); i++) { \ + for (size_t i = 0; i < sizeof(instance.data); i++) { \ KUNIT_ASSERT_EQ_MSG(test, instance.data[i], v, \ "line %d: '%s' not initialized to 0x%02x @ %d (saw 0x%02x)\n", \ __LINE__, #instance, v, i, instance.data[i]); \ @@ -39,9 +38,8 @@ struct some_bytes { } while (0) #define compare(name, one, two) do { \ - int i; \ BUILD_BUG_ON(sizeof(one) != sizeof(two)); \ - for (i = 0; i < sizeof(one); i++) { \ + for (size_t i = 0; i < sizeof(one); i++) { \ KUNIT_EXPECT_EQ_MSG(test, one.data[i], two.data[i], \ "line %d: %s.data[%d] (0x%02x) != %s.data[%d] (0x%02x)\n", \ __LINE__, #one, i, one.data[i], #two, i, two.data[i]); \ @@ -107,6 +105,8 @@ static void memcpy_test(struct kunit *test) #undef TEST_OP } +static unsigned char larger_array [2048]; + static void memmove_test(struct kunit *test) { #define TEST_OP "memmove" @@ -181,6 +181,26 @@ static void memmove_test(struct kunit *test) ptr = &overlap.data[2]; memmove(ptr, overlap.data, 5); compare("overlapping write", overlap, overlap_expected); + + /* Verify larger overlapping moves. */ + larger_array[256] = 0xAAu; + /* + * Test a backwards overlapping memmove first. 256 and 1024 are + * important for i386 to use rep movsl. + */ + memmove(larger_array, larger_array + 256, 1024); + KUNIT_ASSERT_EQ(test, larger_array[0], 0xAAu); + KUNIT_ASSERT_EQ(test, larger_array[256], 0x00); + KUNIT_ASSERT_NULL(test, + memchr(larger_array + 1, 0xaa, ARRAY_SIZE(larger_array) - 1)); + /* Test a forwards overlapping memmove. */ + larger_array[0] = 0xBBu; + memmove(larger_array + 256, larger_array, 1024); + KUNIT_ASSERT_EQ(test, larger_array[0], 0xBBu); + KUNIT_ASSERT_EQ(test, larger_array[256], 0xBBu); + KUNIT_ASSERT_NULL(test, memchr(larger_array + 1, 0xBBu, 256 - 1)); + KUNIT_ASSERT_NULL(test, + memchr(larger_array + 257, 0xBBu, ARRAY_SIZE(larger_array) - 257)); #undef TEST_OP } @@ -272,10 +292,268 @@ static void memset_test(struct kunit *test) #undef TEST_OP } +static u8 large_src[1024]; +static u8 large_dst[2048]; +static const u8 large_zero[2048]; + +static void set_random_nonzero(struct kunit *test, u8 *byte) +{ + int failed_rng = 0; + + while (*byte == 0) { + get_random_bytes(byte, 1); + KUNIT_ASSERT_LT_MSG(test, failed_rng++, 100, + "Is the RNG broken?"); + } +} + +static void init_large(struct kunit *test) +{ + + /* Get many bit patterns. */ + get_random_bytes(large_src, ARRAY_SIZE(large_src)); + + /* Make sure we have non-zero edges. */ + set_random_nonzero(test, &large_src[0]); + set_random_nonzero(test, &large_src[ARRAY_SIZE(large_src) - 1]); + + /* Explicitly zero the entire destination. */ + memset(large_dst, 0, ARRAY_SIZE(large_dst)); +} + +/* + * Instead of an indirect function call for "copy" or a giant macro, + * use a bool to pick memcpy or memmove. + */ +static void copy_large_test(struct kunit *test, bool use_memmove) +{ + init_large(test); + + /* Copy a growing number of non-overlapping bytes ... */ + for (int bytes = 1; bytes <= ARRAY_SIZE(large_src); bytes++) { + /* Over a shifting destination window ... */ + for (int offset = 0; offset < ARRAY_SIZE(large_src); offset++) { + int right_zero_pos = offset + bytes; + int right_zero_size = ARRAY_SIZE(large_dst) - right_zero_pos; + + /* Copy! */ + if (use_memmove) + memmove(large_dst + offset, large_src, bytes); + else + memcpy(large_dst + offset, large_src, bytes); + + /* Did we touch anything before the copy area? */ + KUNIT_ASSERT_EQ_MSG(test, + memcmp(large_dst, large_zero, offset), 0, + "with size %d at offset %d", bytes, offset); + /* Did we touch anything after the copy area? */ + KUNIT_ASSERT_EQ_MSG(test, + memcmp(&large_dst[right_zero_pos], large_zero, right_zero_size), 0, + "with size %d at offset %d", bytes, offset); + + /* Are we byte-for-byte exact across the copy? */ + KUNIT_ASSERT_EQ_MSG(test, + memcmp(large_dst + offset, large_src, bytes), 0, + "with size %d at offset %d", bytes, offset); + + /* Zero out what we copied for the next cycle. */ + memset(large_dst + offset, 0, bytes); + } + /* Avoid stall warnings if this loop gets slow. */ + cond_resched(); + } +} + +static void memcpy_large_test(struct kunit *test) +{ + copy_large_test(test, false); +} + +static void memmove_large_test(struct kunit *test) +{ + copy_large_test(test, true); +} + +/* + * On the assumption that boundary conditions are going to be the most + * sensitive, instead of taking a full step (inc) each iteration, + * take single index steps for at least the first "inc"-many indexes + * from the "start" and at least the last "inc"-many indexes before + * the "end". When in the middle, take full "inc"-wide steps. For + * example, calling next_step(idx, 1, 15, 3) with idx starting at 0 + * would see the following pattern: 1 2 3 4 7 10 11 12 13 14 15. + */ +static int next_step(int idx, int start, int end, int inc) +{ + start += inc; + end -= inc; + + if (idx < start || idx + inc > end) + inc = 1; + return idx + inc; +} + +static void inner_loop(struct kunit *test, int bytes, int d_off, int s_off) +{ + int left_zero_pos, left_zero_size; + int right_zero_pos, right_zero_size; + int src_pos, src_orig_pos, src_size; + int pos; + + /* Place the source in the destination buffer. */ + memcpy(&large_dst[s_off], large_src, bytes); + + /* Copy to destination offset. */ + memmove(&large_dst[d_off], &large_dst[s_off], bytes); + + /* Make sure destination entirely matches. */ + KUNIT_ASSERT_EQ_MSG(test, memcmp(&large_dst[d_off], large_src, bytes), 0, + "with size %d at src offset %d and dest offset %d", + bytes, s_off, d_off); + + /* Calculate the expected zero spans. */ + if (s_off < d_off) { + left_zero_pos = 0; + left_zero_size = s_off; + + right_zero_pos = d_off + bytes; + right_zero_size = ARRAY_SIZE(large_dst) - right_zero_pos; + + src_pos = s_off; + src_orig_pos = 0; + src_size = d_off - s_off; + } else { + left_zero_pos = 0; + left_zero_size = d_off; + + right_zero_pos = s_off + bytes; + right_zero_size = ARRAY_SIZE(large_dst) - right_zero_pos; + + src_pos = d_off + bytes; + src_orig_pos = src_pos - s_off; + src_size = right_zero_pos - src_pos; + } + + /* Check non-overlapping source is unchanged.*/ + KUNIT_ASSERT_EQ_MSG(test, + memcmp(&large_dst[src_pos], &large_src[src_orig_pos], src_size), 0, + "with size %d at src offset %d and dest offset %d", + bytes, s_off, d_off); + + /* Check leading buffer contents are zero. */ + KUNIT_ASSERT_EQ_MSG(test, + memcmp(&large_dst[left_zero_pos], large_zero, left_zero_size), 0, + "with size %d at src offset %d and dest offset %d", + bytes, s_off, d_off); + /* Check trailing buffer contents are zero. */ + KUNIT_ASSERT_EQ_MSG(test, + memcmp(&large_dst[right_zero_pos], large_zero, right_zero_size), 0, + "with size %d at src offset %d and dest offset %d", + bytes, s_off, d_off); + + /* Zero out everything not already zeroed.*/ + pos = left_zero_pos + left_zero_size; + memset(&large_dst[pos], 0, right_zero_pos - pos); +} + +static void memmove_overlap_test(struct kunit *test) +{ + /* + * Running all possible offset and overlap combinations takes a + * very long time. Instead, only check up to 128 bytes offset + * into the destination buffer (which should result in crossing + * cachelines), with a step size of 1 through 7 to try to skip some + * redundancy. + */ + static const int offset_max = 128; /* less than ARRAY_SIZE(large_src); */ + static const int bytes_step = 7; + static const int window_step = 7; + + static const int bytes_start = 1; + static const int bytes_end = ARRAY_SIZE(large_src) + 1; + + init_large(test); + + /* Copy a growing number of overlapping bytes ... */ + for (int bytes = bytes_start; bytes < bytes_end; + bytes = next_step(bytes, bytes_start, bytes_end, bytes_step)) { + + /* Over a shifting destination window ... */ + for (int d_off = 0; d_off < offset_max; d_off++) { + int s_start = max(d_off - bytes, 0); + int s_end = min_t(int, d_off + bytes, ARRAY_SIZE(large_src)); + + /* Over a shifting source window ... */ + for (int s_off = s_start; s_off < s_end; + s_off = next_step(s_off, s_start, s_end, window_step)) + inner_loop(test, bytes, d_off, s_off); + + /* Avoid stall warnings. */ + cond_resched(); + } + } +} + +static void strtomem_test(struct kunit *test) +{ + static const char input[sizeof(unsigned long)] = "hi"; + static const char truncate[] = "this is too long"; + struct { + unsigned long canary1; + unsigned char output[sizeof(unsigned long)] __nonstring; + unsigned long canary2; + } wrap; + + memset(&wrap, 0xFF, sizeof(wrap)); + KUNIT_EXPECT_EQ_MSG(test, wrap.canary1, ULONG_MAX, + "bad initial canary value"); + KUNIT_EXPECT_EQ_MSG(test, wrap.canary2, ULONG_MAX, + "bad initial canary value"); + + /* Check unpadded copy leaves surroundings untouched. */ + strtomem(wrap.output, input); + KUNIT_EXPECT_EQ(test, wrap.canary1, ULONG_MAX); + KUNIT_EXPECT_EQ(test, wrap.output[0], input[0]); + KUNIT_EXPECT_EQ(test, wrap.output[1], input[1]); + for (size_t i = 2; i < sizeof(wrap.output); i++) + KUNIT_EXPECT_EQ(test, wrap.output[i], 0xFF); + KUNIT_EXPECT_EQ(test, wrap.canary2, ULONG_MAX); + + /* Check truncated copy leaves surroundings untouched. */ + memset(&wrap, 0xFF, sizeof(wrap)); + strtomem(wrap.output, truncate); + KUNIT_EXPECT_EQ(test, wrap.canary1, ULONG_MAX); + for (size_t i = 0; i < sizeof(wrap.output); i++) + KUNIT_EXPECT_EQ(test, wrap.output[i], truncate[i]); + KUNIT_EXPECT_EQ(test, wrap.canary2, ULONG_MAX); + + /* Check padded copy leaves only string padded. */ + memset(&wrap, 0xFF, sizeof(wrap)); + strtomem_pad(wrap.output, input, 0xAA); + KUNIT_EXPECT_EQ(test, wrap.canary1, ULONG_MAX); + KUNIT_EXPECT_EQ(test, wrap.output[0], input[0]); + KUNIT_EXPECT_EQ(test, wrap.output[1], input[1]); + for (size_t i = 2; i < sizeof(wrap.output); i++) + KUNIT_EXPECT_EQ(test, wrap.output[i], 0xAA); + KUNIT_EXPECT_EQ(test, wrap.canary2, ULONG_MAX); + + /* Check truncated padded copy has no padding. */ + memset(&wrap, 0xFF, sizeof(wrap)); + strtomem(wrap.output, truncate); + KUNIT_EXPECT_EQ(test, wrap.canary1, ULONG_MAX); + for (size_t i = 0; i < sizeof(wrap.output); i++) + KUNIT_EXPECT_EQ(test, wrap.output[i], truncate[i]); + KUNIT_EXPECT_EQ(test, wrap.canary2, ULONG_MAX); +} + static struct kunit_case memcpy_test_cases[] = { KUNIT_CASE(memset_test), KUNIT_CASE(memcpy_test), + KUNIT_CASE(memcpy_large_test), KUNIT_CASE(memmove_test), + KUNIT_CASE(memmove_large_test), + KUNIT_CASE(memmove_overlap_test), + KUNIT_CASE(strtomem_test), {} }; diff --git a/lib/net_utils.c b/lib/net_utils.c index af525353395d..c17201df3d08 100644 --- a/lib/net_utils.c +++ b/lib/net_utils.c @@ -6,10 +6,11 @@ bool mac_pton(const char *s, u8 *mac) { + size_t maxlen = 3 * ETH_ALEN - 1; int i; /* XX:XX:XX:XX:XX:XX */ - if (strlen(s) < 3 * ETH_ALEN - 1) + if (strnlen(s, maxlen) < maxlen) return false; /* Don't dirty result unless string is valid MAC. */ diff --git a/lib/nlattr.c b/lib/nlattr.c index 86029ad5ead4..9055e8b4d144 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -124,10 +124,12 @@ void nla_get_range_unsigned(const struct nla_policy *pt, range->max = U8_MAX; break; case NLA_U16: + case NLA_BE16: case NLA_BINARY: range->max = U16_MAX; break; case NLA_U32: + case NLA_BE32: range->max = U32_MAX; break; case NLA_U64: @@ -178,12 +180,20 @@ static int nla_validate_range_unsigned(const struct nla_policy *pt, value = nla_get_u32(nla); break; case NLA_U64: + value = nla_get_u64(nla); + break; case NLA_MSECS: value = nla_get_u64(nla); break; case NLA_BINARY: value = nla_len(nla); break; + case NLA_BE16: + value = ntohs(nla_get_be16(nla)); + break; + case NLA_BE32: + value = ntohl(nla_get_be32(nla)); + break; default: return -EINVAL; } @@ -311,6 +321,8 @@ static int nla_validate_int_range(const struct nla_policy *pt, case NLA_U64: case NLA_MSECS: case NLA_BINARY: + case NLA_BE16: + case NLA_BE32: return nla_validate_range_unsigned(pt, nla, extack, validate); case NLA_S8: case NLA_S16: @@ -634,7 +646,7 @@ EXPORT_SYMBOL(__nla_validate); /** * nla_policy_len - Determine the max. length of a policy - * @policy: policy to use + * @p: policy to use * @n: number of policies * * Determines the max. length of the policy. It is currently used diff --git a/lib/notifier-error-inject.c b/lib/notifier-error-inject.c index 21016b32d313..2b24ea6c9497 100644 --- a/lib/notifier-error-inject.c +++ b/lib/notifier-error-inject.c @@ -15,7 +15,7 @@ static int debugfs_errno_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_errno, debugfs_errno_get, debugfs_errno_set, +DEFINE_SIMPLE_ATTRIBUTE_SIGNED(fops_errno, debugfs_errno_get, debugfs_errno_set, "%lld\n"); static struct dentry *debugfs_create_errno(const char *name, umode_t mode, diff --git a/lib/oid_registry.c b/lib/oid_registry.c index e592d48b1974..fe6705cfd780 100644 --- a/lib/oid_registry.c +++ b/lib/oid_registry.c @@ -146,7 +146,6 @@ int sprint_oid(const void *data, size_t datasize, char *buffer, size_t bufsize) bufsize -= count; while (v < end) { - num = 0; n = *v++; if (!(n & 0x80)) { num = n; diff --git a/lib/once.c b/lib/once.c index 59149bf3bfb4..2c306f0e891e 100644 --- a/lib/once.c +++ b/lib/once.c @@ -66,3 +66,33 @@ void __do_once_done(bool *done, struct static_key_true *once_key, once_disable_jump(once_key, mod); } EXPORT_SYMBOL(__do_once_done); + +static DEFINE_MUTEX(once_mutex); + +bool __do_once_sleepable_start(bool *done) + __acquires(once_mutex) +{ + mutex_lock(&once_mutex); + if (*done) { + mutex_unlock(&once_mutex); + /* Keep sparse happy by restoring an even lock count on + * this mutex. In case we return here, we don't call into + * __do_once_done but return early in the DO_ONCE_SLEEPABLE() macro. + */ + __acquire(once_mutex); + return false; + } + + return true; +} +EXPORT_SYMBOL(__do_once_sleepable_start); + +void __do_once_sleepable_done(bool *done, struct static_key_true *once_key, + struct module *mod) + __releases(once_mutex) +{ + *done = true; + mutex_unlock(&once_mutex); + once_disable_jump(once_key, mod); +} +EXPORT_SYMBOL(__do_once_sleepable_done); diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index 7e3e43679b73..dcd3ba102db6 100644 --- a/lib/overflow_kunit.c +++ b/lib/overflow_kunit.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /* * Test cases for arithmetic overflow checks. See: - * https://www.kernel.org/doc/html/latest/dev-tools/kunit/kunit-tool.html#configuring-building-and-running-tests + * "Running tests with kunit_tool" at Documentation/dev-tools/kunit/start.rst * ./tools/testing/kunit/kunit.py run overflow [--raw_output] */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -16,12 +16,43 @@ #include <linux/types.h> #include <linux/vmalloc.h> -#define DEFINE_TEST_ARRAY(t) \ - static const struct test_ ## t { \ - t a, b; \ - t sum, diff, prod; \ - bool s_of, d_of, p_of; \ - } t ## _tests[] +#define SKIP(cond, reason) do { \ + if (cond) { \ + kunit_skip(test, reason); \ + return; \ + } \ +} while (0) + +/* + * Clang 11 and earlier generate unwanted libcalls for signed output + * on unsigned input. + */ +#if defined(CONFIG_CC_IS_CLANG) && __clang_major__ <= 11 +# define SKIP_SIGN_MISMATCH(t) SKIP(t, "Clang 11 unwanted libcalls") +#else +# define SKIP_SIGN_MISMATCH(t) do { } while (0) +#endif + +/* + * Clang 13 and earlier generate unwanted libcalls for 64-bit tests on + * 32-bit hosts. + */ +#if defined(CONFIG_CC_IS_CLANG) && __clang_major__ <= 13 && \ + BITS_PER_LONG != 64 +# define SKIP_64_ON_32(t) SKIP(t, "Clang 13 unwanted libcalls") +#else +# define SKIP_64_ON_32(t) do { } while (0) +#endif + +#define DEFINE_TEST_ARRAY_TYPED(t1, t2, t) \ + static const struct test_ ## t1 ## _ ## t2 ## __ ## t { \ + t1 a; \ + t2 b; \ + t sum, diff, prod; \ + bool s_of, d_of, p_of; \ + } t1 ## _ ## t2 ## __ ## t ## _tests[] + +#define DEFINE_TEST_ARRAY(t) DEFINE_TEST_ARRAY_TYPED(t, t, t) DEFINE_TEST_ARRAY(u8) = { {0, 0, 0, 0, 0, false, false, false}, @@ -91,7 +122,6 @@ DEFINE_TEST_ARRAY(u32) = { {-4U, 5U, 1U, -9U, -20U, true, false, true}, }; -#if BITS_PER_LONG == 64 DEFINE_TEST_ARRAY(u64) = { {0, 0, 0, 0, 0, false, false, false}, {1, 1, 2, 0, 1, false, false, false}, @@ -115,7 +145,6 @@ DEFINE_TEST_ARRAY(u64) = { false, true, false}, {-15ULL, 10ULL, -5ULL, -25ULL, -150ULL, false, false, true}, }; -#endif DEFINE_TEST_ARRAY(s8) = { {0, 0, 0, 0, 0, false, false, false}, @@ -191,7 +220,6 @@ DEFINE_TEST_ARRAY(s32) = { {S32_MAX, S32_MAX, -2, 0, 1, true, false, true}, }; -#if BITS_PER_LONG == 64 DEFINE_TEST_ARRAY(s64) = { {0, 0, 0, 0, 0, false, false, false}, @@ -220,24 +248,29 @@ DEFINE_TEST_ARRAY(s64) = { {-128, -1, -129, -127, 128, false, false, false}, {0, -S64_MAX, -S64_MAX, S64_MAX, 0, false, false, false}, }; -#endif -#define check_one_op(t, fmt, op, sym, a, b, r, of) do { \ - t _r; \ - bool _of; \ - \ - _of = check_ ## op ## _overflow(a, b, &_r); \ - KUNIT_EXPECT_EQ_MSG(test, _of, of, \ +#define check_one_op(t, fmt, op, sym, a, b, r, of) do { \ + int _a_orig = a, _a_bump = a + 1; \ + int _b_orig = b, _b_bump = b + 1; \ + bool _of; \ + t _r; \ + \ + _of = check_ ## op ## _overflow(a, b, &_r); \ + KUNIT_EXPECT_EQ_MSG(test, _of, of, \ "expected "fmt" "sym" "fmt" to%s overflow (type %s)\n", \ - a, b, of ? "" : " not", #t); \ - KUNIT_EXPECT_EQ_MSG(test, _r, r, \ + a, b, of ? "" : " not", #t); \ + KUNIT_EXPECT_EQ_MSG(test, _r, r, \ "expected "fmt" "sym" "fmt" == "fmt", got "fmt" (type %s)\n", \ - a, b, r, _r, #t); \ + a, b, r, _r, #t); \ + /* Check for internal macro side-effects. */ \ + _of = check_ ## op ## _overflow(_a_orig++, _b_orig++, &_r); \ + KUNIT_EXPECT_EQ_MSG(test, _a_orig, _a_bump, "Unexpected " #op " macro side-effect!\n"); \ + KUNIT_EXPECT_EQ_MSG(test, _b_orig, _b_bump, "Unexpected " #op " macro side-effect!\n"); \ } while (0) -#define DEFINE_TEST_FUNC(t, fmt) \ -static void do_test_ ## t(struct kunit *test, const struct test_ ## t *p) \ -{ \ +#define DEFINE_TEST_FUNC_TYPED(n, t, fmt) \ +static void do_test_ ## n(struct kunit *test, const struct test_ ## n *p) \ +{ \ check_one_op(t, fmt, add, "+", p->a, p->b, p->sum, p->s_of); \ check_one_op(t, fmt, add, "+", p->b, p->a, p->sum, p->s_of); \ check_one_op(t, fmt, sub, "-", p->a, p->b, p->diff, p->d_of); \ @@ -245,29 +278,59 @@ static void do_test_ ## t(struct kunit *test, const struct test_ ## t *p) \ check_one_op(t, fmt, mul, "*", p->b, p->a, p->prod, p->p_of); \ } \ \ -static void t ## _overflow_test(struct kunit *test) { \ +static void n ## _overflow_test(struct kunit *test) { \ unsigned i; \ \ - for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i) \ - do_test_ ## t(test, &t ## _tests[i]); \ + SKIP_64_ON_32(__same_type(t, u64)); \ + SKIP_64_ON_32(__same_type(t, s64)); \ + SKIP_SIGN_MISMATCH(__same_type(n ## _tests[0].a, u32) && \ + __same_type(n ## _tests[0].b, u32) && \ + __same_type(n ## _tests[0].sum, int)); \ + \ + for (i = 0; i < ARRAY_SIZE(n ## _tests); ++i) \ + do_test_ ## n(test, &n ## _tests[i]); \ kunit_info(test, "%zu %s arithmetic tests finished\n", \ - ARRAY_SIZE(t ## _tests), #t); \ + ARRAY_SIZE(n ## _tests), #n); \ } +#define DEFINE_TEST_FUNC(t, fmt) \ + DEFINE_TEST_FUNC_TYPED(t ## _ ## t ## __ ## t, t, fmt) + DEFINE_TEST_FUNC(u8, "%d"); DEFINE_TEST_FUNC(s8, "%d"); DEFINE_TEST_FUNC(u16, "%d"); DEFINE_TEST_FUNC(s16, "%d"); DEFINE_TEST_FUNC(u32, "%u"); DEFINE_TEST_FUNC(s32, "%d"); -#if BITS_PER_LONG == 64 DEFINE_TEST_FUNC(u64, "%llu"); DEFINE_TEST_FUNC(s64, "%lld"); -#endif -static void overflow_shift_test(struct kunit *test) -{ - int count = 0; +DEFINE_TEST_ARRAY_TYPED(u32, u32, u8) = { + {0, 0, 0, 0, 0, false, false, false}, + {U8_MAX, 2, 1, U8_MAX - 2, U8_MAX - 1, true, false, true}, + {U8_MAX + 1, 0, 0, 0, 0, true, true, false}, +}; +DEFINE_TEST_FUNC_TYPED(u32_u32__u8, u8, "%d"); + +DEFINE_TEST_ARRAY_TYPED(u32, u32, int) = { + {0, 0, 0, 0, 0, false, false, false}, + {U32_MAX, 0, -1, -1, 0, true, true, false}, +}; +DEFINE_TEST_FUNC_TYPED(u32_u32__int, int, "%d"); + +DEFINE_TEST_ARRAY_TYPED(u8, u8, int) = { + {0, 0, 0, 0, 0, false, false, false}, + {U8_MAX, U8_MAX, 2 * U8_MAX, 0, U8_MAX * U8_MAX, false, false, false}, + {1, 2, 3, -1, 2, false, false, false}, +}; +DEFINE_TEST_FUNC_TYPED(u8_u8__int, int, "%d"); + +DEFINE_TEST_ARRAY_TYPED(int, int, u8) = { + {0, 0, 0, 0, 0, false, false, false}, + {1, 2, 3, U8_MAX, 2, false, true, false}, + {-1, 0, U8_MAX, U8_MAX, 0, true, true, false}, +}; +DEFINE_TEST_FUNC_TYPED(int_int__u8, u8, "%d"); /* Args are: value, shift, type, expected result, overflow expected */ #define TEST_ONE_SHIFT(a, s, t, expect, of) do { \ @@ -292,6 +355,10 @@ static void overflow_shift_test(struct kunit *test) count++; \ } while (0) +static void shift_sane_test(struct kunit *test) +{ + int count = 0; + /* Sane shifts. */ TEST_ONE_SHIFT(1, 0, u8, 1 << 0, false); TEST_ONE_SHIFT(1, 4, u8, 1 << 4, false); @@ -334,6 +401,13 @@ static void overflow_shift_test(struct kunit *test) TEST_ONE_SHIFT(0, 30, s32, 0, false); TEST_ONE_SHIFT(0, 62, s64, 0, false); + kunit_info(test, "%d sane shift tests finished\n", count); +} + +static void shift_overflow_test(struct kunit *test) +{ + int count = 0; + /* Overflow: shifted the bit off the end. */ TEST_ONE_SHIFT(1, 8, u8, 0, true); TEST_ONE_SHIFT(1, 16, u16, 0, true); @@ -381,6 +455,13 @@ static void overflow_shift_test(struct kunit *test) /* 0100000100001000001000000010000001000010000001000100010001001011 */ TEST_ONE_SHIFT(4686030735197619275LL, 2, s64, 0, true); + kunit_info(test, "%d overflow shift tests finished\n", count); +} + +static void shift_truncate_test(struct kunit *test) +{ + int count = 0; + /* Overflow: values larger than destination type. */ TEST_ONE_SHIFT(0x100, 0, u8, 0, true); TEST_ONE_SHIFT(0xFF, 0, s8, 0, true); @@ -392,6 +473,33 @@ static void overflow_shift_test(struct kunit *test) TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, int, 0, true); TEST_ONE_SHIFT(0xFFFFFFFFFFFFFFFFULL, 0, s64, 0, true); + /* Overflow: shifted at or beyond entire type's bit width. */ + TEST_ONE_SHIFT(0, 8, u8, 0, true); + TEST_ONE_SHIFT(0, 9, u8, 0, true); + TEST_ONE_SHIFT(0, 8, s8, 0, true); + TEST_ONE_SHIFT(0, 9, s8, 0, true); + TEST_ONE_SHIFT(0, 16, u16, 0, true); + TEST_ONE_SHIFT(0, 17, u16, 0, true); + TEST_ONE_SHIFT(0, 16, s16, 0, true); + TEST_ONE_SHIFT(0, 17, s16, 0, true); + TEST_ONE_SHIFT(0, 32, u32, 0, true); + TEST_ONE_SHIFT(0, 33, u32, 0, true); + TEST_ONE_SHIFT(0, 32, int, 0, true); + TEST_ONE_SHIFT(0, 33, int, 0, true); + TEST_ONE_SHIFT(0, 32, s32, 0, true); + TEST_ONE_SHIFT(0, 33, s32, 0, true); + TEST_ONE_SHIFT(0, 64, u64, 0, true); + TEST_ONE_SHIFT(0, 65, u64, 0, true); + TEST_ONE_SHIFT(0, 64, s64, 0, true); + TEST_ONE_SHIFT(0, 65, s64, 0, true); + + kunit_info(test, "%d truncate shift tests finished\n", count); +} + +static void shift_nonsense_test(struct kunit *test) +{ + int count = 0; + /* Nonsense: negative initial value. */ TEST_ONE_SHIFT(-1, 0, s8, 0, true); TEST_ONE_SHIFT(-1, 0, u8, 0, true); @@ -416,26 +524,6 @@ static void overflow_shift_test(struct kunit *test) TEST_ONE_SHIFT(0, -30, s64, 0, true); TEST_ONE_SHIFT(0, -30, u64, 0, true); - /* Overflow: shifted at or beyond entire type's bit width. */ - TEST_ONE_SHIFT(0, 8, u8, 0, true); - TEST_ONE_SHIFT(0, 9, u8, 0, true); - TEST_ONE_SHIFT(0, 8, s8, 0, true); - TEST_ONE_SHIFT(0, 9, s8, 0, true); - TEST_ONE_SHIFT(0, 16, u16, 0, true); - TEST_ONE_SHIFT(0, 17, u16, 0, true); - TEST_ONE_SHIFT(0, 16, s16, 0, true); - TEST_ONE_SHIFT(0, 17, s16, 0, true); - TEST_ONE_SHIFT(0, 32, u32, 0, true); - TEST_ONE_SHIFT(0, 33, u32, 0, true); - TEST_ONE_SHIFT(0, 32, int, 0, true); - TEST_ONE_SHIFT(0, 33, int, 0, true); - TEST_ONE_SHIFT(0, 32, s32, 0, true); - TEST_ONE_SHIFT(0, 33, s32, 0, true); - TEST_ONE_SHIFT(0, 64, u64, 0, true); - TEST_ONE_SHIFT(0, 65, u64, 0, true); - TEST_ONE_SHIFT(0, 64, s64, 0, true); - TEST_ONE_SHIFT(0, 65, s64, 0, true); - /* * Corner case: for unsigned types, we fail when we've shifted * through the entire width of bits. For signed types, we might @@ -451,9 +539,9 @@ static void overflow_shift_test(struct kunit *test) TEST_ONE_SHIFT(0, 31, s32, 0, false); TEST_ONE_SHIFT(0, 63, s64, 0, false); - kunit_info(test, "%d shift tests finished\n", count); -#undef TEST_ONE_SHIFT + kunit_info(test, "%d nonsense shift tests finished\n", count); } +#undef TEST_ONE_SHIFT /* * Deal with the various forms of allocator arguments. See comments above @@ -648,21 +736,406 @@ static void overflow_size_helpers_test(struct kunit *test) #undef check_one_size_helper } -static struct kunit_case overflow_test_cases[] = { - KUNIT_CASE(u8_overflow_test), - KUNIT_CASE(s8_overflow_test), - KUNIT_CASE(u16_overflow_test), - KUNIT_CASE(s16_overflow_test), - KUNIT_CASE(u32_overflow_test), - KUNIT_CASE(s32_overflow_test), -/* Clang 13 and earlier generate unwanted libcalls on 32-bit. */ +static void overflows_type_test(struct kunit *test) +{ + int count = 0; + unsigned int var; + +#define __TEST_OVERFLOWS_TYPE(func, arg1, arg2, of) do { \ + bool __of = func(arg1, arg2); \ + KUNIT_EXPECT_EQ_MSG(test, __of, of, \ + "expected " #func "(" #arg1 ", " #arg2 " to%s overflow\n",\ + of ? "" : " not"); \ + count++; \ +} while (0) + +/* Args are: first type, second type, value, overflow expected */ +#define TEST_OVERFLOWS_TYPE(__t1, __t2, v, of) do { \ + __t1 t1 = (v); \ + __t2 t2; \ + __TEST_OVERFLOWS_TYPE(__overflows_type, t1, t2, of); \ + __TEST_OVERFLOWS_TYPE(__overflows_type, t1, __t2, of); \ + __TEST_OVERFLOWS_TYPE(__overflows_type_constexpr, t1, t2, of); \ + __TEST_OVERFLOWS_TYPE(__overflows_type_constexpr, t1, __t2, of);\ +} while (0) + + TEST_OVERFLOWS_TYPE(u8, u8, U8_MAX, false); + TEST_OVERFLOWS_TYPE(u8, u16, U8_MAX, false); + TEST_OVERFLOWS_TYPE(u8, s8, U8_MAX, true); + TEST_OVERFLOWS_TYPE(u8, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(u8, s8, (u8)S8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u8, s16, U8_MAX, false); + TEST_OVERFLOWS_TYPE(s8, u8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s8, u8, -1, true); + TEST_OVERFLOWS_TYPE(s8, u8, S8_MIN, true); + TEST_OVERFLOWS_TYPE(s8, u16, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s8, u16, -1, true); + TEST_OVERFLOWS_TYPE(s8, u16, S8_MIN, true); + TEST_OVERFLOWS_TYPE(s8, u32, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s8, u32, -1, true); + TEST_OVERFLOWS_TYPE(s8, u32, S8_MIN, true); #if BITS_PER_LONG == 64 - KUNIT_CASE(u64_overflow_test), - KUNIT_CASE(s64_overflow_test), + TEST_OVERFLOWS_TYPE(s8, u64, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s8, u64, -1, true); + TEST_OVERFLOWS_TYPE(s8, u64, S8_MIN, true); #endif - KUNIT_CASE(overflow_shift_test), + TEST_OVERFLOWS_TYPE(s8, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s8, s8, S8_MIN, false); + TEST_OVERFLOWS_TYPE(s8, s16, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s8, s16, S8_MIN, false); + TEST_OVERFLOWS_TYPE(u16, u8, U8_MAX, false); + TEST_OVERFLOWS_TYPE(u16, u8, (u16)U8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u16, u8, U16_MAX, true); + TEST_OVERFLOWS_TYPE(u16, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(u16, s8, (u16)S8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u16, s8, U16_MAX, true); + TEST_OVERFLOWS_TYPE(u16, s16, S16_MAX, false); + TEST_OVERFLOWS_TYPE(u16, s16, (u16)S16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u16, s16, U16_MAX, true); + TEST_OVERFLOWS_TYPE(u16, u32, U16_MAX, false); + TEST_OVERFLOWS_TYPE(u16, s32, U16_MAX, false); + TEST_OVERFLOWS_TYPE(s16, u8, U8_MAX, false); + TEST_OVERFLOWS_TYPE(s16, u8, (s16)U8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s16, u8, -1, true); + TEST_OVERFLOWS_TYPE(s16, u8, S16_MIN, true); + TEST_OVERFLOWS_TYPE(s16, u16, S16_MAX, false); + TEST_OVERFLOWS_TYPE(s16, u16, -1, true); + TEST_OVERFLOWS_TYPE(s16, u16, S16_MIN, true); + TEST_OVERFLOWS_TYPE(s16, u32, S16_MAX, false); + TEST_OVERFLOWS_TYPE(s16, u32, -1, true); + TEST_OVERFLOWS_TYPE(s16, u32, S16_MIN, true); +#if BITS_PER_LONG == 64 + TEST_OVERFLOWS_TYPE(s16, u64, S16_MAX, false); + TEST_OVERFLOWS_TYPE(s16, u64, -1, true); + TEST_OVERFLOWS_TYPE(s16, u64, S16_MIN, true); +#endif + TEST_OVERFLOWS_TYPE(s16, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s16, s8, S8_MIN, false); + TEST_OVERFLOWS_TYPE(s16, s8, (s16)S8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s16, s8, (s16)S8_MIN - 1, true); + TEST_OVERFLOWS_TYPE(s16, s8, S16_MAX, true); + TEST_OVERFLOWS_TYPE(s16, s8, S16_MIN, true); + TEST_OVERFLOWS_TYPE(s16, s16, S16_MAX, false); + TEST_OVERFLOWS_TYPE(s16, s16, S16_MIN, false); + TEST_OVERFLOWS_TYPE(s16, s32, S16_MAX, false); + TEST_OVERFLOWS_TYPE(s16, s32, S16_MIN, false); + TEST_OVERFLOWS_TYPE(u32, u8, U8_MAX, false); + TEST_OVERFLOWS_TYPE(u32, u8, (u32)U8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u32, u8, U32_MAX, true); + TEST_OVERFLOWS_TYPE(u32, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(u32, s8, (u32)S8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u32, s8, U32_MAX, true); + TEST_OVERFLOWS_TYPE(u32, u16, U16_MAX, false); + TEST_OVERFLOWS_TYPE(u32, u16, U16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u32, u16, U32_MAX, true); + TEST_OVERFLOWS_TYPE(u32, s16, S16_MAX, false); + TEST_OVERFLOWS_TYPE(u32, s16, (u32)S16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u32, s16, U32_MAX, true); + TEST_OVERFLOWS_TYPE(u32, u32, U32_MAX, false); + TEST_OVERFLOWS_TYPE(u32, s32, S32_MAX, false); + TEST_OVERFLOWS_TYPE(u32, s32, U32_MAX, true); + TEST_OVERFLOWS_TYPE(u32, s32, (u32)S32_MAX + 1, true); +#if BITS_PER_LONG == 64 + TEST_OVERFLOWS_TYPE(u32, u64, U32_MAX, false); + TEST_OVERFLOWS_TYPE(u32, s64, U32_MAX, false); +#endif + TEST_OVERFLOWS_TYPE(s32, u8, U8_MAX, false); + TEST_OVERFLOWS_TYPE(s32, u8, (s32)U8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s32, u16, S32_MAX, true); + TEST_OVERFLOWS_TYPE(s32, u8, -1, true); + TEST_OVERFLOWS_TYPE(s32, u8, S32_MIN, true); + TEST_OVERFLOWS_TYPE(s32, u16, U16_MAX, false); + TEST_OVERFLOWS_TYPE(s32, u16, (s32)U16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s32, u16, S32_MAX, true); + TEST_OVERFLOWS_TYPE(s32, u16, -1, true); + TEST_OVERFLOWS_TYPE(s32, u16, S32_MIN, true); + TEST_OVERFLOWS_TYPE(s32, u32, S32_MAX, false); + TEST_OVERFLOWS_TYPE(s32, u32, -1, true); + TEST_OVERFLOWS_TYPE(s32, u32, S32_MIN, true); +#if BITS_PER_LONG == 64 + TEST_OVERFLOWS_TYPE(s32, u64, S32_MAX, false); + TEST_OVERFLOWS_TYPE(s32, u64, -1, true); + TEST_OVERFLOWS_TYPE(s32, u64, S32_MIN, true); +#endif + TEST_OVERFLOWS_TYPE(s32, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s32, s8, S8_MIN, false); + TEST_OVERFLOWS_TYPE(s32, s8, (s32)S8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s32, s8, (s32)S8_MIN - 1, true); + TEST_OVERFLOWS_TYPE(s32, s8, S32_MAX, true); + TEST_OVERFLOWS_TYPE(s32, s8, S32_MIN, true); + TEST_OVERFLOWS_TYPE(s32, s16, S16_MAX, false); + TEST_OVERFLOWS_TYPE(s32, s16, S16_MIN, false); + TEST_OVERFLOWS_TYPE(s32, s16, (s32)S16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s32, s16, (s32)S16_MIN - 1, true); + TEST_OVERFLOWS_TYPE(s32, s16, S32_MAX, true); + TEST_OVERFLOWS_TYPE(s32, s16, S32_MIN, true); + TEST_OVERFLOWS_TYPE(s32, s32, S32_MAX, false); + TEST_OVERFLOWS_TYPE(s32, s32, S32_MIN, false); +#if BITS_PER_LONG == 64 + TEST_OVERFLOWS_TYPE(s32, s64, S32_MAX, false); + TEST_OVERFLOWS_TYPE(s32, s64, S32_MIN, false); + TEST_OVERFLOWS_TYPE(u64, u8, U64_MAX, true); + TEST_OVERFLOWS_TYPE(u64, u8, U8_MAX, false); + TEST_OVERFLOWS_TYPE(u64, u8, (u64)U8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u64, u16, U64_MAX, true); + TEST_OVERFLOWS_TYPE(u64, u16, U16_MAX, false); + TEST_OVERFLOWS_TYPE(u64, u16, (u64)U16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u64, u32, U64_MAX, true); + TEST_OVERFLOWS_TYPE(u64, u32, U32_MAX, false); + TEST_OVERFLOWS_TYPE(u64, u32, (u64)U32_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u64, u64, U64_MAX, false); + TEST_OVERFLOWS_TYPE(u64, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(u64, s8, (u64)S8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u64, s8, U64_MAX, true); + TEST_OVERFLOWS_TYPE(u64, s16, S16_MAX, false); + TEST_OVERFLOWS_TYPE(u64, s16, (u64)S16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u64, s16, U64_MAX, true); + TEST_OVERFLOWS_TYPE(u64, s32, S32_MAX, false); + TEST_OVERFLOWS_TYPE(u64, s32, (u64)S32_MAX + 1, true); + TEST_OVERFLOWS_TYPE(u64, s32, U64_MAX, true); + TEST_OVERFLOWS_TYPE(u64, s64, S64_MAX, false); + TEST_OVERFLOWS_TYPE(u64, s64, U64_MAX, true); + TEST_OVERFLOWS_TYPE(u64, s64, (u64)S64_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s64, u8, S64_MAX, true); + TEST_OVERFLOWS_TYPE(s64, u8, S64_MIN, true); + TEST_OVERFLOWS_TYPE(s64, u8, -1, true); + TEST_OVERFLOWS_TYPE(s64, u8, U8_MAX, false); + TEST_OVERFLOWS_TYPE(s64, u8, (s64)U8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s64, u16, S64_MAX, true); + TEST_OVERFLOWS_TYPE(s64, u16, S64_MIN, true); + TEST_OVERFLOWS_TYPE(s64, u16, -1, true); + TEST_OVERFLOWS_TYPE(s64, u16, U16_MAX, false); + TEST_OVERFLOWS_TYPE(s64, u16, (s64)U16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s64, u32, S64_MAX, true); + TEST_OVERFLOWS_TYPE(s64, u32, S64_MIN, true); + TEST_OVERFLOWS_TYPE(s64, u32, -1, true); + TEST_OVERFLOWS_TYPE(s64, u32, U32_MAX, false); + TEST_OVERFLOWS_TYPE(s64, u32, (s64)U32_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s64, u64, S64_MAX, false); + TEST_OVERFLOWS_TYPE(s64, u64, S64_MIN, true); + TEST_OVERFLOWS_TYPE(s64, u64, -1, true); + TEST_OVERFLOWS_TYPE(s64, s8, S8_MAX, false); + TEST_OVERFLOWS_TYPE(s64, s8, S8_MIN, false); + TEST_OVERFLOWS_TYPE(s64, s8, (s64)S8_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s64, s8, (s64)S8_MIN - 1, true); + TEST_OVERFLOWS_TYPE(s64, s8, S64_MAX, true); + TEST_OVERFLOWS_TYPE(s64, s16, S16_MAX, false); + TEST_OVERFLOWS_TYPE(s64, s16, S16_MIN, false); + TEST_OVERFLOWS_TYPE(s64, s16, (s64)S16_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s64, s16, (s64)S16_MIN - 1, true); + TEST_OVERFLOWS_TYPE(s64, s16, S64_MAX, true); + TEST_OVERFLOWS_TYPE(s64, s32, S32_MAX, false); + TEST_OVERFLOWS_TYPE(s64, s32, S32_MIN, false); + TEST_OVERFLOWS_TYPE(s64, s32, (s64)S32_MAX + 1, true); + TEST_OVERFLOWS_TYPE(s64, s32, (s64)S32_MIN - 1, true); + TEST_OVERFLOWS_TYPE(s64, s32, S64_MAX, true); + TEST_OVERFLOWS_TYPE(s64, s64, S64_MAX, false); + TEST_OVERFLOWS_TYPE(s64, s64, S64_MIN, false); +#endif + + /* Check for macro side-effects. */ + var = INT_MAX - 1; + __TEST_OVERFLOWS_TYPE(__overflows_type, var++, int, false); + __TEST_OVERFLOWS_TYPE(__overflows_type, var++, int, false); + __TEST_OVERFLOWS_TYPE(__overflows_type, var++, int, true); + var = INT_MAX - 1; + __TEST_OVERFLOWS_TYPE(overflows_type, var++, int, false); + __TEST_OVERFLOWS_TYPE(overflows_type, var++, int, false); + __TEST_OVERFLOWS_TYPE(overflows_type, var++, int, true); + + kunit_info(test, "%d overflows_type() tests finished\n", count); +#undef TEST_OVERFLOWS_TYPE +#undef __TEST_OVERFLOWS_TYPE +} + +static void same_type_test(struct kunit *test) +{ + int count = 0; + int var; + +#define TEST_SAME_TYPE(t1, t2, same) do { \ + typeof(t1) __t1h = type_max(t1); \ + typeof(t1) __t1l = type_min(t1); \ + typeof(t2) __t2h = type_max(t2); \ + typeof(t2) __t2l = type_min(t2); \ + KUNIT_EXPECT_EQ(test, true, __same_type(t1, __t1h)); \ + KUNIT_EXPECT_EQ(test, true, __same_type(t1, __t1l)); \ + KUNIT_EXPECT_EQ(test, true, __same_type(__t1h, t1)); \ + KUNIT_EXPECT_EQ(test, true, __same_type(__t1l, t1)); \ + KUNIT_EXPECT_EQ(test, true, __same_type(t2, __t2h)); \ + KUNIT_EXPECT_EQ(test, true, __same_type(t2, __t2l)); \ + KUNIT_EXPECT_EQ(test, true, __same_type(__t2h, t2)); \ + KUNIT_EXPECT_EQ(test, true, __same_type(__t2l, t2)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(t1, t2)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(t2, __t1h)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(t2, __t1l)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(__t1h, t2)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(__t1l, t2)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(t1, __t2h)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(t1, __t2l)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(__t2h, t1)); \ + KUNIT_EXPECT_EQ(test, same, __same_type(__t2l, t1)); \ +} while (0) + +#if BITS_PER_LONG == 64 +# define TEST_SAME_TYPE64(base, t, m) TEST_SAME_TYPE(base, t, m) +#else +# define TEST_SAME_TYPE64(base, t, m) do { } while (0) +#endif + +#define TEST_TYPE_SETS(base, mu8, mu16, mu32, ms8, ms16, ms32, mu64, ms64) \ +do { \ + TEST_SAME_TYPE(base, u8, mu8); \ + TEST_SAME_TYPE(base, u16, mu16); \ + TEST_SAME_TYPE(base, u32, mu32); \ + TEST_SAME_TYPE(base, s8, ms8); \ + TEST_SAME_TYPE(base, s16, ms16); \ + TEST_SAME_TYPE(base, s32, ms32); \ + TEST_SAME_TYPE64(base, u64, mu64); \ + TEST_SAME_TYPE64(base, s64, ms64); \ +} while (0) + + TEST_TYPE_SETS(u8, true, false, false, false, false, false, false, false); + TEST_TYPE_SETS(u16, false, true, false, false, false, false, false, false); + TEST_TYPE_SETS(u32, false, false, true, false, false, false, false, false); + TEST_TYPE_SETS(s8, false, false, false, true, false, false, false, false); + TEST_TYPE_SETS(s16, false, false, false, false, true, false, false, false); + TEST_TYPE_SETS(s32, false, false, false, false, false, true, false, false); +#if BITS_PER_LONG == 64 + TEST_TYPE_SETS(u64, false, false, false, false, false, false, true, false); + TEST_TYPE_SETS(s64, false, false, false, false, false, false, false, true); +#endif + + /* Check for macro side-effects. */ + var = 4; + KUNIT_EXPECT_EQ(test, var, 4); + KUNIT_EXPECT_TRUE(test, __same_type(var++, int)); + KUNIT_EXPECT_EQ(test, var, 4); + KUNIT_EXPECT_TRUE(test, __same_type(int, var++)); + KUNIT_EXPECT_EQ(test, var, 4); + KUNIT_EXPECT_TRUE(test, __same_type(var++, var++)); + KUNIT_EXPECT_EQ(test, var, 4); + + kunit_info(test, "%d __same_type() tests finished\n", count); + +#undef TEST_TYPE_SETS +#undef TEST_SAME_TYPE64 +#undef TEST_SAME_TYPE +} + +static void castable_to_type_test(struct kunit *test) +{ + int count = 0; + +#define TEST_CASTABLE_TO_TYPE(arg1, arg2, pass) do { \ + bool __pass = castable_to_type(arg1, arg2); \ + KUNIT_EXPECT_EQ_MSG(test, __pass, pass, \ + "expected castable_to_type(" #arg1 ", " #arg2 ") to%s pass\n",\ + pass ? "" : " not"); \ + count++; \ +} while (0) + + TEST_CASTABLE_TO_TYPE(16, u8, true); + TEST_CASTABLE_TO_TYPE(16, u16, true); + TEST_CASTABLE_TO_TYPE(16, u32, true); + TEST_CASTABLE_TO_TYPE(16, s8, true); + TEST_CASTABLE_TO_TYPE(16, s16, true); + TEST_CASTABLE_TO_TYPE(16, s32, true); + TEST_CASTABLE_TO_TYPE(-16, s8, true); + TEST_CASTABLE_TO_TYPE(-16, s16, true); + TEST_CASTABLE_TO_TYPE(-16, s32, true); +#if BITS_PER_LONG == 64 + TEST_CASTABLE_TO_TYPE(16, u64, true); + TEST_CASTABLE_TO_TYPE(-16, s64, true); +#endif + +#define TEST_CASTABLE_TO_TYPE_VAR(width) do { \ + u ## width u ## width ## var = 0; \ + s ## width s ## width ## var = 0; \ + \ + /* Constant expressions that fit types. */ \ + TEST_CASTABLE_TO_TYPE(type_max(u ## width), u ## width, true); \ + TEST_CASTABLE_TO_TYPE(type_min(u ## width), u ## width, true); \ + TEST_CASTABLE_TO_TYPE(type_max(u ## width), u ## width ## var, true); \ + TEST_CASTABLE_TO_TYPE(type_min(u ## width), u ## width ## var, true); \ + TEST_CASTABLE_TO_TYPE(type_max(s ## width), s ## width, true); \ + TEST_CASTABLE_TO_TYPE(type_min(s ## width), s ## width, true); \ + TEST_CASTABLE_TO_TYPE(type_max(s ## width), s ## width ## var, true); \ + TEST_CASTABLE_TO_TYPE(type_min(u ## width), s ## width ## var, true); \ + /* Constant expressions that do not fit types. */ \ + TEST_CASTABLE_TO_TYPE(type_max(u ## width), s ## width, false); \ + TEST_CASTABLE_TO_TYPE(type_max(u ## width), s ## width ## var, false); \ + TEST_CASTABLE_TO_TYPE(type_min(s ## width), u ## width, false); \ + TEST_CASTABLE_TO_TYPE(type_min(s ## width), u ## width ## var, false); \ + /* Non-constant expression with mismatched type. */ \ + TEST_CASTABLE_TO_TYPE(s ## width ## var, u ## width, false); \ + TEST_CASTABLE_TO_TYPE(u ## width ## var, s ## width, false); \ +} while (0) + +#define TEST_CASTABLE_TO_TYPE_RANGE(width) do { \ + unsigned long big = U ## width ## _MAX; \ + signed long small = S ## width ## _MIN; \ + u ## width u ## width ## var = 0; \ + s ## width s ## width ## var = 0; \ + \ + /* Constant expression in range. */ \ + TEST_CASTABLE_TO_TYPE(U ## width ## _MAX, u ## width, true); \ + TEST_CASTABLE_TO_TYPE(U ## width ## _MAX, u ## width ## var, true); \ + TEST_CASTABLE_TO_TYPE(S ## width ## _MIN, s ## width, true); \ + TEST_CASTABLE_TO_TYPE(S ## width ## _MIN, s ## width ## var, true); \ + /* Constant expression out of range. */ \ + TEST_CASTABLE_TO_TYPE((unsigned long)U ## width ## _MAX + 1, u ## width, false); \ + TEST_CASTABLE_TO_TYPE((unsigned long)U ## width ## _MAX + 1, u ## width ## var, false); \ + TEST_CASTABLE_TO_TYPE((signed long)S ## width ## _MIN - 1, s ## width, false); \ + TEST_CASTABLE_TO_TYPE((signed long)S ## width ## _MIN - 1, s ## width ## var, false); \ + /* Non-constant expression with mismatched type. */ \ + TEST_CASTABLE_TO_TYPE(big, u ## width, false); \ + TEST_CASTABLE_TO_TYPE(big, u ## width ## var, false); \ + TEST_CASTABLE_TO_TYPE(small, s ## width, false); \ + TEST_CASTABLE_TO_TYPE(small, s ## width ## var, false); \ +} while (0) + + TEST_CASTABLE_TO_TYPE_VAR(8); + TEST_CASTABLE_TO_TYPE_VAR(16); + TEST_CASTABLE_TO_TYPE_VAR(32); +#if BITS_PER_LONG == 64 + TEST_CASTABLE_TO_TYPE_VAR(64); +#endif + + TEST_CASTABLE_TO_TYPE_RANGE(8); + TEST_CASTABLE_TO_TYPE_RANGE(16); +#if BITS_PER_LONG == 64 + TEST_CASTABLE_TO_TYPE_RANGE(32); +#endif + kunit_info(test, "%d castable_to_type() tests finished\n", count); + +#undef TEST_CASTABLE_TO_TYPE_RANGE +#undef TEST_CASTABLE_TO_TYPE_VAR +#undef TEST_CASTABLE_TO_TYPE +} + +static struct kunit_case overflow_test_cases[] = { + KUNIT_CASE(u8_u8__u8_overflow_test), + KUNIT_CASE(s8_s8__s8_overflow_test), + KUNIT_CASE(u16_u16__u16_overflow_test), + KUNIT_CASE(s16_s16__s16_overflow_test), + KUNIT_CASE(u32_u32__u32_overflow_test), + KUNIT_CASE(s32_s32__s32_overflow_test), + KUNIT_CASE(u64_u64__u64_overflow_test), + KUNIT_CASE(s64_s64__s64_overflow_test), + KUNIT_CASE(u32_u32__int_overflow_test), + KUNIT_CASE(u32_u32__u8_overflow_test), + KUNIT_CASE(u8_u8__int_overflow_test), + KUNIT_CASE(int_int__u8_overflow_test), + KUNIT_CASE(shift_sane_test), + KUNIT_CASE(shift_overflow_test), + KUNIT_CASE(shift_truncate_test), + KUNIT_CASE(shift_nonsense_test), KUNIT_CASE(overflow_allocation_test), KUNIT_CASE(overflow_size_helpers_test), + KUNIT_CASE(overflows_type_test), + KUNIT_CASE(same_type_test), + KUNIT_CASE(castable_to_type_test), {} }; diff --git a/lib/packing.c b/lib/packing.c index 9a72f4bbf0e2..a96169237ae6 100644 --- a/lib/packing.c +++ b/lib/packing.c @@ -7,6 +7,7 @@ #include <linux/bitops.h> #include <linux/errno.h> #include <linux/types.h> +#include <linux/bitrev.h> static int get_le_offset(int offset) { @@ -29,19 +30,6 @@ static int get_reverse_lsw32_offset(int offset, size_t len) return word_index * 4 + offset; } -static u64 bit_reverse(u64 val, unsigned int width) -{ - u64 new_val = 0; - unsigned int bit; - unsigned int i; - - for (i = 0; i < width; i++) { - bit = (val & (1 << i)) != 0; - new_val |= (bit << (width - i - 1)); - } - return new_val; -} - static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit, int *box_end_bit, u8 *box_mask) { @@ -49,7 +37,7 @@ static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit, int new_box_start_bit, new_box_end_bit; *to_write >>= *box_end_bit; - *to_write = bit_reverse(*to_write, box_bit_width); + *to_write = bitrev8(*to_write) >> (8 - box_bit_width); *to_write <<= *box_end_bit; new_box_end_bit = box_bit_width - *box_start_bit - 1; diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index e5c5315da274..668f6aa6a75d 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -230,7 +230,8 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, percpu_ref_noop_confirm_switch; percpu_ref_get(ref); /* put after confirmation */ - call_rcu(&ref->data->rcu, percpu_ref_switch_to_atomic_rcu); + call_rcu_hurry(&ref->data->rcu, + percpu_ref_switch_to_atomic_rcu); } static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index ed610b75dc32..42f729c8e56c 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -117,11 +117,8 @@ void percpu_counter_sync(struct percpu_counter *fbc) } EXPORT_SYMBOL(percpu_counter_sync); -/* - * Add up all the per-cpu counts, return the result. This is a more accurate - * but much slower version of percpu_counter_read_positive() - */ -s64 __percpu_counter_sum(struct percpu_counter *fbc) +static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc, + const struct cpumask *cpu_mask) { s64 ret; int cpu; @@ -129,15 +126,35 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) raw_spin_lock_irqsave(&fbc->lock, flags); ret = fbc->count; - for_each_online_cpu(cpu) { + for_each_cpu(cpu, cpu_mask) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; } raw_spin_unlock_irqrestore(&fbc->lock, flags); return ret; } + +/* + * Add up all the per-cpu counts, return the result. This is a more accurate + * but much slower version of percpu_counter_read_positive() + */ +s64 __percpu_counter_sum(struct percpu_counter *fbc) +{ + return __percpu_counter_sum_mask(fbc, cpu_online_mask); +} EXPORT_SYMBOL(__percpu_counter_sum); +/* + * This is slower version of percpu_counter_sum as it traverses all possible + * cpus. Use this only in the cases where accurate data is needed in the + * presense of CPUs getting offlined. + */ +s64 percpu_counter_sum_all(struct percpu_counter *fbc) +{ + return __percpu_counter_sum_mask(fbc, cpu_possible_mask); +} +EXPORT_SYMBOL(percpu_counter_sum_all); + int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, struct lock_class_key *key) { diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 3c78e1e8b2ad..049ba132f7ef 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1029,7 +1029,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, { struct radix_tree_node *node, *parent; unsigned long maxindex; - int offset; + int offset = 0; radix_tree_load_root(root, &node, &maxindex); if (index > maxindex) diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 39b74221f4a7..a22a05c9af8a 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -18,12 +18,10 @@ #else #include <linux/module.h> #include <linux/gfp.h> -#if !RAID6_USE_EMPTY_ZERO_PAGE /* In .bss so it's zeroed */ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); EXPORT_SYMBOL(raid6_empty_zero_page); #endif -#endif struct raid6_calls raid6_call; EXPORT_SYMBOL_GPL(raid6_call); diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc index 9e597e1f91a4..b25dfc9c7759 100644 --- a/lib/raid6/s390vx.uc +++ b/lib/raid6/s390vx.uc @@ -13,8 +13,7 @@ #include <linux/raid/pq.h> #include <asm/fpu/api.h> - -asm(".include \"asm/vx-insn.h\"\n"); +#include <asm/vx-insn.h> #define NSIZE 16 diff --git a/lib/random32.c b/lib/random32.c index d5d9029362cb..32060b852668 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -47,7 +47,7 @@ * @state: pointer to state structure holding seeded state. * * This is used for pseudo-randomness with no outside seeding. - * For more random results, use prandom_u32(). + * For more random results, use get_random_u32(). */ u32 prandom_u32_state(struct rnd_state *state) { @@ -69,7 +69,7 @@ EXPORT_SYMBOL(prandom_u32_state); * @bytes: the requested number of bytes * * This is used for pseudo-randomness with no outside seeding. - * For more random results, use prandom_bytes(). + * For more random results, use get_random_bytes(). */ void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes) { diff --git a/lib/ratelimit.c b/lib/ratelimit.c index e01a93f46f83..ce945c17980b 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -26,10 +26,16 @@ */ int ___ratelimit(struct ratelimit_state *rs, const char *func) { + /* Paired with WRITE_ONCE() in .proc_handler(). + * Changing two values seperately could be inconsistent + * and some message could be lost. (See: net_ratelimit_state). + */ + int interval = READ_ONCE(rs->interval); + int burst = READ_ONCE(rs->burst); unsigned long flags; int ret; - if (!rs->interval) + if (!interval) return 1; /* @@ -44,7 +50,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func) if (!rs->begin) rs->begin = jiffies; - if (time_is_before_jiffies(rs->begin + rs->interval)) { + if (time_is_before_jiffies(rs->begin + interval)) { if (rs->missed) { if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { printk_deferred(KERN_WARNING @@ -56,7 +62,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func) rs->begin = jiffies; rs->printed = 0; } - if (rs->burst && rs->burst > rs->printed) { + if (burst && burst > rs->printed) { rs->printed++; ret = 1; } else { diff --git a/lib/reed_solomon/test_rslib.c b/lib/reed_solomon/test_rslib.c index d9d1c33aebda..75cb1adac884 100644 --- a/lib/reed_solomon/test_rslib.c +++ b/lib/reed_solomon/test_rslib.c @@ -164,7 +164,7 @@ static int get_rcw_we(struct rs_control *rs, struct wspace *ws, /* Load c with random data and encode */ for (i = 0; i < dlen; i++) - c[i] = prandom_u32() & nn; + c[i] = get_random_u32() & nn; memset(c + dlen, 0, nroots * sizeof(*c)); encode_rs16(rs, c, dlen, c + dlen, 0); @@ -178,12 +178,12 @@ static int get_rcw_we(struct rs_control *rs, struct wspace *ws, for (i = 0; i < errs; i++) { do { /* Error value must be nonzero */ - errval = prandom_u32() & nn; + errval = get_random_u32() & nn; } while (errval == 0); do { /* Must not choose the same location twice */ - errloc = prandom_u32() % len; + errloc = get_random_u32_below(len); } while (errlocs[errloc] != 0); errlocs[errloc] = 1; @@ -194,19 +194,19 @@ static int get_rcw_we(struct rs_control *rs, struct wspace *ws, for (i = 0; i < eras; i++) { do { /* Must not choose the same location twice */ - errloc = prandom_u32() % len; + errloc = get_random_u32_below(len); } while (errlocs[errloc] != 0); derrlocs[i] = errloc; - if (ewsc && (prandom_u32() & 1)) { + if (ewsc && get_random_u32_below(2)) { /* Erasure with the symbol intact */ errlocs[errloc] = 2; } else { /* Erasure with corrupted symbol */ do { /* Error value must be nonzero */ - errval = prandom_u32() & nn; + errval = get_random_u32() & nn; } while (errval == 0); errlocs[errloc] = 1; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index e12bbfb240b8..6ae2ba8e06a2 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -231,6 +231,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, struct rhash_head *head, *next, *entry; struct rhash_head __rcu **pprev = NULL; unsigned int new_hash; + unsigned long flags; if (new_tbl->nest) goto out; @@ -253,13 +254,14 @@ static int rhashtable_rehash_one(struct rhashtable *ht, new_hash = head_hashfn(ht, new_tbl, entry); - rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING); + flags = rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], + SINGLE_DEPTH_NESTING); head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash); RCU_INIT_POINTER(entry->next, head); - rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry); + rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry, flags); if (pprev) rcu_assign_pointer(*pprev, next); @@ -276,18 +278,19 @@ static int rhashtable_rehash_chain(struct rhashtable *ht, { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash); + unsigned long flags; int err; if (!bkt) return 0; - rht_lock(old_tbl, bkt); + flags = rht_lock(old_tbl, bkt); while (!(err = rhashtable_rehash_one(ht, bkt, old_hash))) ; if (err == -ENOENT) err = 0; - rht_unlock(old_tbl, bkt); + rht_unlock(old_tbl, bkt, flags); return err; } @@ -590,6 +593,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, struct bucket_table *new_tbl; struct bucket_table *tbl; struct rhash_lock_head __rcu **bkt; + unsigned long flags; unsigned int hash; void *data; @@ -607,7 +611,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); data = ERR_PTR(-EAGAIN); } else { - rht_lock(tbl, bkt); + flags = rht_lock(tbl, bkt); data = rhashtable_lookup_one(ht, bkt, tbl, hash, key, obj); new_tbl = rhashtable_insert_one(ht, bkt, tbl, @@ -615,7 +619,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, if (PTR_ERR(new_tbl) != -EEXIST) data = ERR_CAST(new_tbl); - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); } } while (!IS_ERR_OR_NULL(new_tbl)); diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 29eb0484215a..1fcede228fa2 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -21,7 +21,7 @@ static int init_alloc_hint(struct sbitmap *sb, gfp_t flags) int i; for_each_possible_cpu(i) - *per_cpu_ptr(sb->alloc_hint, i) = prandom_u32() % depth; + *per_cpu_ptr(sb->alloc_hint, i) = get_random_u32_below(depth); } return 0; } @@ -33,7 +33,7 @@ static inline unsigned update_alloc_hint_before_get(struct sbitmap *sb, hint = this_cpu_read(*sb->alloc_hint); if (unlikely(hint >= depth)) { - hint = depth ? prandom_u32() % depth : 0; + hint = depth ? get_random_u32_below(depth) : 0; this_cpu_write(*sb->alloc_hint, hint); } @@ -434,6 +434,8 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); atomic_set(&sbq->wake_index, 0); atomic_set(&sbq->ws_active, 0); + atomic_set(&sbq->completion_cnt, 0); + atomic_set(&sbq->wakeup_cnt, 0); sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); if (!sbq->ws) { @@ -441,40 +443,21 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, return -ENOMEM; } - for (i = 0; i < SBQ_WAIT_QUEUES; i++) { + for (i = 0; i < SBQ_WAIT_QUEUES; i++) init_waitqueue_head(&sbq->ws[i].wait); - atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); - } return 0; } EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); -static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, - unsigned int wake_batch) -{ - int i; - - if (sbq->wake_batch != wake_batch) { - WRITE_ONCE(sbq->wake_batch, wake_batch); - /* - * Pairs with the memory barrier in sbitmap_queue_wake_up() - * to ensure that the batch size is updated before the wait - * counts. - */ - smp_mb(); - for (i = 0; i < SBQ_WAIT_QUEUES; i++) - atomic_set(&sbq->ws[i].wait_cnt, 1); - } -} - static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, unsigned int depth) { unsigned int wake_batch; wake_batch = sbq_calc_wake_batch(sbq, depth); - __sbitmap_queue_update_wake_batch(sbq, wake_batch); + if (sbq->wake_batch != wake_batch) + WRITE_ONCE(sbq->wake_batch, wake_batch); } void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, @@ -488,7 +471,8 @@ void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES, min_batch, SBQ_WAKE_BATCH); - __sbitmap_queue_update_wake_batch(sbq, wake_batch); + + WRITE_ONCE(sbq->wake_batch, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch); @@ -533,21 +517,20 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, nr = find_first_zero_bit(&map->word, map_depth); if (nr + nr_tags <= map_depth) { atomic_long_t *ptr = (atomic_long_t *) &map->word; - int map_tags = min_t(int, nr_tags, map_depth); - unsigned long val, ret; + unsigned long val; - get_mask = ((1UL << map_tags) - 1) << nr; + get_mask = ((1UL << nr_tags) - 1) << nr; + val = READ_ONCE(map->word); do { - val = READ_ONCE(map->word); if ((val & ~get_mask) != val) goto next; - ret = atomic_long_cmpxchg(ptr, val, get_mask | val); - } while (ret != val); - get_mask = (get_mask & ~ret) >> nr; + } while (!atomic_long_try_cmpxchg(ptr, &val, + get_mask | val)); + get_mask = (get_mask & ~val) >> nr; if (get_mask) { *offset = nr + (index << sb->shift); update_alloc_hint_after_get(sb, depth, hint, - *offset + map_tags - 1); + *offset + nr_tags - 1); return get_mask; } } @@ -577,74 +560,56 @@ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, } EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); -static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) +static void __sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) { int i, wake_index; if (!atomic_read(&sbq->ws_active)) - return NULL; + return; wake_index = atomic_read(&sbq->wake_index); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[wake_index]; - if (waitqueue_active(&ws->wait)) { - if (wake_index != atomic_read(&sbq->wake_index)) - atomic_set(&sbq->wake_index, wake_index); - return ws; - } - - wake_index = sbq_index_inc(wake_index); - } - - return NULL; -} - -static bool __sbq_wake_up(struct sbitmap_queue *sbq) -{ - struct sbq_wait_state *ws; - unsigned int wake_batch; - int wait_cnt; - - ws = sbq_wake_ptr(sbq); - if (!ws) - return false; - - wait_cnt = atomic_dec_return(&ws->wait_cnt); - if (wait_cnt <= 0) { - int ret; - - wake_batch = READ_ONCE(sbq->wake_batch); - /* - * Pairs with the memory barrier in sbitmap_queue_resize() to - * ensure that we see the batch size update before the wait - * count is reset. + * Advance the index before checking the current queue. + * It improves fairness, by ensuring the queue doesn't + * need to be fully emptied before trying to wake up + * from the next one. */ - smp_mb__before_atomic(); + wake_index = sbq_index_inc(wake_index); /* - * For concurrent callers of this, the one that failed the - * atomic_cmpxhcg() race should call this function again - * to wakeup a new batch on a different 'ws'. + * It is sufficient to wake up at least one waiter to + * guarantee forward progress. */ - ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch); - if (ret == wait_cnt) { - sbq_index_atomic_inc(&sbq->wake_index); - wake_up_nr(&ws->wait, wake_batch); - return false; - } - - return true; + if (waitqueue_active(&ws->wait) && + wake_up_nr(&ws->wait, nr)) + break; } - return false; + if (wake_index != atomic_read(&sbq->wake_index)) + atomic_set(&sbq->wake_index, wake_index); } -void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) { - while (__sbq_wake_up(sbq)) - ; + unsigned int wake_batch = READ_ONCE(sbq->wake_batch); + unsigned int wakeups; + + if (!atomic_read(&sbq->ws_active)) + return; + + atomic_add(nr, &sbq->completion_cnt); + wakeups = atomic_read(&sbq->wakeup_cnt); + + do { + if (atomic_read(&sbq->completion_cnt) - wakeups < wake_batch) + return; + } while (!atomic_try_cmpxchg(&sbq->wakeup_cnt, + &wakeups, wakeups + wake_batch)); + + __sbitmap_queue_wake_up(sbq, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); @@ -683,7 +648,7 @@ void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, atomic_long_andnot(mask, (atomic_long_t *) addr); smp_mb__after_atomic(); - sbitmap_queue_wake_up(sbq); + sbitmap_queue_wake_up(sbq, nr_tags); sbitmap_update_cpu_hint(&sbq->sb, raw_smp_processor_id(), tags[nr_tags - 1] - offset); } @@ -711,7 +676,7 @@ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, * waiter. See the comment on waitqueue_active(). */ smp_mb__after_atomic(); - sbitmap_queue_wake_up(sbq); + sbitmap_queue_wake_up(sbq, 1); sbitmap_update_cpu_hint(&sbq->sb, cpu, nr); } EXPORT_SYMBOL_GPL(sbitmap_queue_clear); @@ -761,9 +726,7 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) seq_puts(m, "ws={\n"); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[i]; - - seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", - atomic_read(&ws->wait_cnt), + seq_printf(m, "\t{.wait=%s},\n", waitqueue_active(&ws->wait) ? "active" : "inactive"); } seq_puts(m, "}\n"); diff --git a/lib/scatterlist.c b/lib/scatterlist.c index c8c3d675845c..a0ad2a7959b5 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -410,6 +410,15 @@ static struct scatterlist *get_next_sg(struct sg_append_table *table, return new_sg; } +static bool pages_are_mergeable(struct page *a, struct page *b) +{ + if (page_to_pfn(a) != page_to_pfn(b) + 1) + return false; + if (!zone_device_pages_have_same_pgmap(a, b)) + return false; + return true; +} + /** * sg_alloc_append_table_from_pages - Allocate and initialize an append sg * table from an array of pages @@ -447,6 +456,7 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append, unsigned int chunks, cur_page, seg_len, i, prv_len = 0; unsigned int added_nents = 0; struct scatterlist *s = sgt_append->prv; + struct page *last_pg; /* * The algorithm below requires max_segment to be aligned to PAGE_SIZE @@ -460,21 +470,17 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append, return -EOPNOTSUPP; if (sgt_append->prv) { - unsigned long paddr = - (page_to_pfn(sg_page(sgt_append->prv)) * PAGE_SIZE + - sgt_append->prv->offset + sgt_append->prv->length) / - PAGE_SIZE; - if (WARN_ON(offset)) return -EINVAL; /* Merge contiguous pages into the last SG */ prv_len = sgt_append->prv->length; - while (n_pages && page_to_pfn(pages[0]) == paddr) { + last_pg = sg_page(sgt_append->prv); + while (n_pages && pages_are_mergeable(last_pg, pages[0])) { if (sgt_append->prv->length + PAGE_SIZE > max_segment) break; sgt_append->prv->length += PAGE_SIZE; - paddr++; + last_pg = pages[0]; pages++; n_pages--; } @@ -488,7 +494,7 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append, for (i = 1; i < n_pages; i++) { seg_len += PAGE_SIZE; if (seg_len >= max_segment || - page_to_pfn(pages[i]) != page_to_pfn(pages[i - 1]) + 1) { + !pages_are_mergeable(pages[i], pages[i - 1])) { chunks++; seg_len = 0; } @@ -504,8 +510,7 @@ int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append, for (j = cur_page + 1; j < n_pages; j++) { seg_len += PAGE_SIZE; if (seg_len >= max_segment || - page_to_pfn(pages[j]) != - page_to_pfn(pages[j - 1]) + 1) + !pages_are_mergeable(pages[j], pages[j - 1])) break; } diff --git a/lib/sg_pool.c b/lib/sg_pool.c index a0b1a52cd6f7..9bfe60ca3f37 100644 --- a/lib/sg_pool.c +++ b/lib/sg_pool.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <linux/module.h> +#include <linux/init.h> #include <linux/scatterlist.h> #include <linux/mempool.h> #include <linux/slab.h> @@ -177,16 +177,4 @@ cleanup_sdb: return -ENOMEM; } -static __exit void sg_pool_exit(void) -{ - int i; - - for (i = 0; i < SG_MEMPOOL_NR; i++) { - struct sg_pool *sgp = sg_pools + i; - mempool_destroy(sgp->pool); - kmem_cache_destroy(sgp->slab); - } -} - -module_init(sg_pool_init); -module_exit(sg_pool_exit); +subsys_initcall(sg_pool_init); diff --git a/lib/show_mem.c b/lib/show_mem.c index 1c26c14ffbb9..0d7585cde2a6 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -8,13 +8,13 @@ #include <linux/mm.h> #include <linux/cma.h> -void show_mem(unsigned int filter, nodemask_t *nodemask) +void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx) { pg_data_t *pgdat; unsigned long total = 0, reserved = 0, highmem = 0; printk("Mem-Info:\n"); - show_free_areas(filter, nodemask); + __show_free_areas(filter, nodemask, max_zone_idx); for_each_online_pgdat(pgdat) { int zoneid; diff --git a/lib/test_siphash.c b/lib/siphash_kunit.c index a96788d0141d..a3c697e8be35 100644 --- a/lib/test_siphash.c +++ b/lib/siphash_kunit.c @@ -13,6 +13,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <kunit/test.h> #include <linux/siphash.h> #include <linux/kernel.h> #include <linux/string.h> @@ -109,114 +110,88 @@ static const u32 test_vectors_hsiphash[64] = { }; #endif -static int __init siphash_test_init(void) +#define chk(hash, vector, fmt...) \ + KUNIT_EXPECT_EQ_MSG(test, hash, vector, fmt) + +static void siphash_test(struct kunit *test) { u8 in[64] __aligned(SIPHASH_ALIGNMENT); u8 in_unaligned[65] __aligned(SIPHASH_ALIGNMENT); u8 i; - int ret = 0; for (i = 0; i < 64; ++i) { in[i] = i; in_unaligned[i + 1] = i; - if (siphash(in, i, &test_key_siphash) != - test_vectors_siphash[i]) { - pr_info("siphash self-test aligned %u: FAIL\n", i + 1); - ret = -EINVAL; - } - if (siphash(in_unaligned + 1, i, &test_key_siphash) != - test_vectors_siphash[i]) { - pr_info("siphash self-test unaligned %u: FAIL\n", i + 1); - ret = -EINVAL; - } - if (hsiphash(in, i, &test_key_hsiphash) != - test_vectors_hsiphash[i]) { - pr_info("hsiphash self-test aligned %u: FAIL\n", i + 1); - ret = -EINVAL; - } - if (hsiphash(in_unaligned + 1, i, &test_key_hsiphash) != - test_vectors_hsiphash[i]) { - pr_info("hsiphash self-test unaligned %u: FAIL\n", i + 1); - ret = -EINVAL; - } - } - if (siphash_1u64(0x0706050403020100ULL, &test_key_siphash) != - test_vectors_siphash[8]) { - pr_info("siphash self-test 1u64: FAIL\n"); - ret = -EINVAL; - } - if (siphash_2u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, - &test_key_siphash) != test_vectors_siphash[16]) { - pr_info("siphash self-test 2u64: FAIL\n"); - ret = -EINVAL; + chk(siphash(in, i, &test_key_siphash), + test_vectors_siphash[i], + "siphash self-test aligned %u: FAIL", i + 1); + chk(siphash(in_unaligned + 1, i, &test_key_siphash), + test_vectors_siphash[i], + "siphash self-test unaligned %u: FAIL", i + 1); + chk(hsiphash(in, i, &test_key_hsiphash), + test_vectors_hsiphash[i], + "hsiphash self-test aligned %u: FAIL", i + 1); + chk(hsiphash(in_unaligned + 1, i, &test_key_hsiphash), + test_vectors_hsiphash[i], + "hsiphash self-test unaligned %u: FAIL", i + 1); } - if (siphash_3u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, - 0x1716151413121110ULL, &test_key_siphash) != - test_vectors_siphash[24]) { - pr_info("siphash self-test 3u64: FAIL\n"); - ret = -EINVAL; - } - if (siphash_4u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, + chk(siphash_1u64(0x0706050403020100ULL, &test_key_siphash), + test_vectors_siphash[8], + "siphash self-test 1u64: FAIL"); + chk(siphash_2u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, + &test_key_siphash), + test_vectors_siphash[16], + "siphash self-test 2u64: FAIL"); + chk(siphash_3u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, + 0x1716151413121110ULL, &test_key_siphash), + test_vectors_siphash[24], + "siphash self-test 3u64: FAIL"); + chk(siphash_4u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, 0x1716151413121110ULL, 0x1f1e1d1c1b1a1918ULL, - &test_key_siphash) != test_vectors_siphash[32]) { - pr_info("siphash self-test 4u64: FAIL\n"); - ret = -EINVAL; - } - if (siphash_1u32(0x03020100U, &test_key_siphash) != - test_vectors_siphash[4]) { - pr_info("siphash self-test 1u32: FAIL\n"); - ret = -EINVAL; - } - if (siphash_2u32(0x03020100U, 0x07060504U, &test_key_siphash) != - test_vectors_siphash[8]) { - pr_info("siphash self-test 2u32: FAIL\n"); - ret = -EINVAL; - } - if (siphash_3u32(0x03020100U, 0x07060504U, - 0x0b0a0908U, &test_key_siphash) != - test_vectors_siphash[12]) { - pr_info("siphash self-test 3u32: FAIL\n"); - ret = -EINVAL; - } - if (siphash_4u32(0x03020100U, 0x07060504U, - 0x0b0a0908U, 0x0f0e0d0cU, &test_key_siphash) != - test_vectors_siphash[16]) { - pr_info("siphash self-test 4u32: FAIL\n"); - ret = -EINVAL; - } - if (hsiphash_1u32(0x03020100U, &test_key_hsiphash) != - test_vectors_hsiphash[4]) { - pr_info("hsiphash self-test 1u32: FAIL\n"); - ret = -EINVAL; - } - if (hsiphash_2u32(0x03020100U, 0x07060504U, &test_key_hsiphash) != - test_vectors_hsiphash[8]) { - pr_info("hsiphash self-test 2u32: FAIL\n"); - ret = -EINVAL; - } - if (hsiphash_3u32(0x03020100U, 0x07060504U, - 0x0b0a0908U, &test_key_hsiphash) != - test_vectors_hsiphash[12]) { - pr_info("hsiphash self-test 3u32: FAIL\n"); - ret = -EINVAL; - } - if (hsiphash_4u32(0x03020100U, 0x07060504U, - 0x0b0a0908U, 0x0f0e0d0cU, &test_key_hsiphash) != - test_vectors_hsiphash[16]) { - pr_info("hsiphash self-test 4u32: FAIL\n"); - ret = -EINVAL; - } - if (!ret) - pr_info("self-tests: pass\n"); - return ret; + &test_key_siphash), + test_vectors_siphash[32], + "siphash self-test 4u64: FAIL"); + chk(siphash_1u32(0x03020100U, &test_key_siphash), + test_vectors_siphash[4], + "siphash self-test 1u32: FAIL"); + chk(siphash_2u32(0x03020100U, 0x07060504U, &test_key_siphash), + test_vectors_siphash[8], + "siphash self-test 2u32: FAIL"); + chk(siphash_3u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, &test_key_siphash), + test_vectors_siphash[12], + "siphash self-test 3u32: FAIL"); + chk(siphash_4u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, 0x0f0e0d0cU, &test_key_siphash), + test_vectors_siphash[16], + "siphash self-test 4u32: FAIL"); + chk(hsiphash_1u32(0x03020100U, &test_key_hsiphash), + test_vectors_hsiphash[4], + "hsiphash self-test 1u32: FAIL"); + chk(hsiphash_2u32(0x03020100U, 0x07060504U, &test_key_hsiphash), + test_vectors_hsiphash[8], + "hsiphash self-test 2u32: FAIL"); + chk(hsiphash_3u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, &test_key_hsiphash), + test_vectors_hsiphash[12], + "hsiphash self-test 3u32: FAIL"); + chk(hsiphash_4u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, 0x0f0e0d0cU, &test_key_hsiphash), + test_vectors_hsiphash[16], + "hsiphash self-test 4u32: FAIL"); } -static void __exit siphash_test_exit(void) -{ -} +static struct kunit_case siphash_test_cases[] = { + KUNIT_CASE(siphash_test), + {} +}; + +static struct kunit_suite siphash_test_suite = { + .name = "siphash", + .test_cases = siphash_test_cases, +}; -module_init(siphash_test_init); -module_exit(siphash_test_exit); +kunit_test_suite(siphash_test_suite); MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c index 7a0564d7cb7a..d4a3730b08fa 100644 --- a/lib/slub_kunit.c +++ b/lib/slub_kunit.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <kunit/test.h> +#include <kunit/test-bug.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/module.h> @@ -9,10 +10,25 @@ static struct kunit_resource resource; static int slab_errors; +/* + * Wrapper function for kmem_cache_create(), which reduces 2 parameters: + * 'align' and 'ctor', and sets SLAB_SKIP_KFENCE flag to avoid getting an + * object from kfence pool, where the operation could be caught by both + * our test and kfence sanity check. + */ +static struct kmem_cache *test_kmem_cache_create(const char *name, + unsigned int size, slab_flags_t flags) +{ + struct kmem_cache *s = kmem_cache_create(name, size, 0, + (flags | SLAB_NO_USER_FLAGS), NULL); + s->flags |= SLAB_SKIP_KFENCE; + return s; +} + static void test_clobber_zone(struct kunit *test) { - struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0, - SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL); + struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_alloc", 64, + SLAB_RED_ZONE); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kasan_disable_current(); @@ -29,8 +45,8 @@ static void test_clobber_zone(struct kunit *test) #ifndef CONFIG_KASAN static void test_next_pointer(struct kunit *test) { - struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0, - SLAB_POISON|SLAB_NO_USER_FLAGS, NULL); + struct kmem_cache *s = test_kmem_cache_create("TestSlub_next_ptr_free", + 64, SLAB_POISON); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); unsigned long tmp; unsigned long *ptr_addr; @@ -74,8 +90,8 @@ static void test_next_pointer(struct kunit *test) static void test_first_word(struct kunit *test) { - struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0, - SLAB_POISON|SLAB_NO_USER_FLAGS, NULL); + struct kmem_cache *s = test_kmem_cache_create("TestSlub_1th_word_free", + 64, SLAB_POISON); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kmem_cache_free(s, p); @@ -89,8 +105,8 @@ static void test_first_word(struct kunit *test) static void test_clobber_50th_byte(struct kunit *test) { - struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0, - SLAB_POISON|SLAB_NO_USER_FLAGS, NULL); + struct kmem_cache *s = test_kmem_cache_create("TestSlub_50th_word_free", + 64, SLAB_POISON); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kmem_cache_free(s, p); @@ -105,8 +121,8 @@ static void test_clobber_50th_byte(struct kunit *test) static void test_clobber_redzone_free(struct kunit *test) { - struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0, - SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL); + struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_free", 64, + SLAB_RED_ZONE); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kasan_disable_current(); @@ -120,6 +136,27 @@ static void test_clobber_redzone_free(struct kunit *test) kmem_cache_destroy(s); } +static void test_kmalloc_redzone_access(struct kunit *test) +{ + struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_kmalloc", 32, + SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE); + u8 *p = kmalloc_trace(s, GFP_KERNEL, 18); + + kasan_disable_current(); + + /* Suppress the -Warray-bounds warning */ + OPTIMIZER_HIDE_VAR(p); + p[18] = 0xab; + p[19] = 0xab; + + validate_slab_cache(s); + KUNIT_EXPECT_EQ(test, 2, slab_errors); + + kasan_enable_current(); + kmem_cache_free(s, p); + kmem_cache_destroy(s); +} + static int test_init(struct kunit *test) { slab_errors = 0; @@ -139,6 +176,7 @@ static struct kunit_case test_cases[] = { #endif KUNIT_CASE(test_clobber_redzone_free), + KUNIT_CASE(test_kmalloc_redzone_access), {} }; diff --git a/lib/stackdepot.c b/lib/stackdepot.c index e73fda23388d..79e894cf8406 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -43,7 +43,8 @@ #define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \ STACK_ALLOC_ALIGN) #define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \ - STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS) + STACK_ALLOC_NULL_PROTECTION_BITS - \ + STACK_ALLOC_OFFSET_BITS - STACK_DEPOT_EXTRA_BITS) #define STACK_ALLOC_SLABS_CAP 8192 #define STACK_ALLOC_MAX_SLABS \ (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \ @@ -56,6 +57,7 @@ union handle_parts { u32 slabindex : STACK_ALLOC_INDEX_BITS; u32 offset : STACK_ALLOC_OFFSET_BITS; u32 valid : STACK_ALLOC_NULL_PROTECTION_BITS; + u32 extra : STACK_DEPOT_EXTRA_BITS; }; }; @@ -77,6 +79,14 @@ static int next_slab_inited; static size_t depot_offset; static DEFINE_RAW_SPINLOCK(depot_lock); +unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) +{ + union handle_parts parts = { .handle = handle }; + + return parts.extra; +} +EXPORT_SYMBOL(stack_depot_get_extra_bits); + static bool init_stack_slab(void **prealloc) { if (!*prealloc) @@ -140,6 +150,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) stack->handle.slabindex = depot_index; stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN; stack->handle.valid = 1; + stack->handle.extra = 0; memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); depot_offset += required_size; @@ -382,6 +393,7 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch); * * @entries: Pointer to storage array * @nr_entries: Size of the storage array + * @extra_bits: Flags to store in unused bits of depot_stack_handle_t * @alloc_flags: Allocation gfp flags * @can_alloc: Allocate stack slabs (increased chance of failure if false) * @@ -393,6 +405,10 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch); * If the stack trace in @entries is from an interrupt, only the portion up to * interrupt entry is saved. * + * Additional opaque flags can be passed in @extra_bits, stored in the unused + * bits of the stack handle, and retrieved using stack_depot_get_extra_bits() + * without calling stack_depot_fetch(). + * * Context: Any context, but setting @can_alloc to %false is required if * alloc_pages() cannot be used from the current context. Currently * this is the case from contexts where neither %GFP_ATOMIC nor @@ -402,10 +418,11 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch); */ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, + unsigned int extra_bits, gfp_t alloc_flags, bool can_alloc) { struct stack_record *found = NULL, **bucket; - depot_stack_handle_t retval = 0; + union handle_parts retval = { .handle = 0 }; struct page *page = NULL; void *prealloc = NULL; unsigned long flags; @@ -489,9 +506,11 @@ exit: free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER); } if (found) - retval = found->handle.handle; + retval.handle = found->handle.handle; fast_exit: - return retval; + retval.extra = extra_bits; + + return retval.handle; } EXPORT_SYMBOL_GPL(__stack_depot_save); @@ -511,6 +530,6 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t alloc_flags) { - return __stack_depot_save(entries, nr_entries, alloc_flags, true); + return __stack_depot_save(entries, nr_entries, 0, alloc_flags, true); } EXPORT_SYMBOL_GPL(stack_depot_save); diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c index 35c69aa425b2..4591d6cf5e01 100644 --- a/lib/stackinit_kunit.c +++ b/lib/stackinit_kunit.c @@ -3,7 +3,7 @@ * Test cases for compiler-based stack variable zeroing via * -ftrivial-auto-var-init={zero,pattern} or CONFIG_GCC_PLUGIN_STRUCTLEAK*. * For example, see: - * https://www.kernel.org/doc/html/latest/dev-tools/kunit/kunit-tool.html#configuring-building-and-running-tests + * "Running tests with kunit_tool" at Documentation/dev-tools/kunit/start.rst * ./tools/testing/kunit/kunit.py run stackinit [--raw_output] \ * --make_option LLVM=1 \ * --kconfig_add CONFIG_INIT_STACK_ALL_ZERO=y diff --git a/lib/string.c b/lib/string.c index 6f334420f687..4fb566ea610f 100644 --- a/lib/string.c +++ b/lib/string.c @@ -76,11 +76,6 @@ EXPORT_SYMBOL(strcasecmp); #endif #ifndef __HAVE_ARCH_STRCPY -/** - * strcpy - Copy a %NUL terminated string - * @dest: Where to copy the string to - * @src: Where to copy the string from - */ char *strcpy(char *dest, const char *src) { char *tmp = dest; @@ -93,19 +88,6 @@ EXPORT_SYMBOL(strcpy); #endif #ifndef __HAVE_ARCH_STRNCPY -/** - * strncpy - Copy a length-limited, C-string - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @count: The maximum number of bytes to copy - * - * The result is not %NUL-terminated if the source exceeds - * @count bytes. - * - * In the case where the length of @src is less than that of - * count, the remainder of @dest will be padded with %NUL. - * - */ char *strncpy(char *dest, const char *src, size_t count) { char *tmp = dest; @@ -122,17 +104,6 @@ EXPORT_SYMBOL(strncpy); #endif #ifndef __HAVE_ARCH_STRLCPY -/** - * strlcpy - Copy a C-string into a sized buffer - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @size: size of destination buffer - * - * Compatible with ``*BSD``: the result is always a valid - * NUL-terminated string that fits in the buffer (unless, - * of course, the buffer size is zero). It does not pad - * out the result like strncpy() does. - */ size_t strlcpy(char *dest, const char *src, size_t size) { size_t ret = strlen(src); @@ -148,30 +119,6 @@ EXPORT_SYMBOL(strlcpy); #endif #ifndef __HAVE_ARCH_STRSCPY -/** - * strscpy - Copy a C-string into a sized buffer - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @count: Size of destination buffer - * - * Copy the string, or as much of it as fits, into the dest buffer. The - * behavior is undefined if the string buffers overlap. The destination - * buffer is always NUL terminated, unless it's zero-sized. - * - * Preferred to strlcpy() since the API doesn't require reading memory - * from the src string beyond the specified "count" bytes, and since - * the return value is easier to error-check than strlcpy()'s. - * In addition, the implementation is robust to the string changing out - * from underneath it, unlike the current strlcpy() implementation. - * - * Preferred to strncpy() since it always returns a valid string, and - * doesn't unnecessarily force the tail of the destination buffer to be - * zeroed. If zeroing is desired please use strscpy_pad(). - * - * Returns: - * * The number of characters copied (not including the trailing %NUL) - * * -E2BIG if count is 0 or @src was truncated. - */ ssize_t strscpy(char *dest, const char *src, size_t count) { const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; @@ -197,6 +144,14 @@ ssize_t strscpy(char *dest, const char *src, size_t count) max = 0; #endif + /* + * read_word_at_a_time() below may read uninitialized bytes after the + * trailing zero and use them in comparisons. Disable this optimization + * under KMSAN to prevent false positive reports. + */ + if (IS_ENABLED(CONFIG_KMSAN)) + max = 0; + while (max >= sizeof(unsigned long)) { unsigned long c, data; @@ -258,11 +213,6 @@ char *stpcpy(char *__restrict__ dest, const char *__restrict__ src) EXPORT_SYMBOL(stpcpy); #ifndef __HAVE_ARCH_STRCAT -/** - * strcat - Append one %NUL-terminated string to another - * @dest: The string to be appended to - * @src: The string to append to it - */ char *strcat(char *dest, const char *src) { char *tmp = dest; @@ -277,15 +227,6 @@ EXPORT_SYMBOL(strcat); #endif #ifndef __HAVE_ARCH_STRNCAT -/** - * strncat - Append a length-limited, C-string to another - * @dest: The string to be appended to - * @src: The string to append to it - * @count: The maximum numbers of bytes to copy - * - * Note that in contrast to strncpy(), strncat() ensures the result is - * terminated. - */ char *strncat(char *dest, const char *src, size_t count) { char *tmp = dest; @@ -306,12 +247,6 @@ EXPORT_SYMBOL(strncat); #endif #ifndef __HAVE_ARCH_STRLCAT -/** - * strlcat - Append a length-limited, C-string to another - * @dest: The string to be appended to - * @src: The string to append to it - * @count: The size of the destination buffer. - */ size_t strlcat(char *dest, const char *src, size_t count) { size_t dsize = strlen(dest); @@ -476,10 +411,6 @@ EXPORT_SYMBOL(strnchr); #endif #ifndef __HAVE_ARCH_STRLEN -/** - * strlen - Find the length of a string - * @s: The string to be sized - */ size_t strlen(const char *s) { const char *sc; @@ -492,11 +423,6 @@ EXPORT_SYMBOL(strlen); #endif #ifndef __HAVE_ARCH_STRNLEN -/** - * strnlen - Find the length of a length-limited string - * @s: The string to be sized - * @count: The maximum number of bytes to search - */ size_t strnlen(const char *s, size_t count) { const char *sc; diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 5ed3beb066e6..230020a2e076 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -131,6 +131,50 @@ void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, } EXPORT_SYMBOL(string_get_size); +/** + * parse_int_array_user - Split string into a sequence of integers + * @from: The user space buffer to read from + * @count: The maximum number of bytes to read + * @array: Returned pointer to sequence of integers + * + * On success @array is allocated and initialized with a sequence of + * integers extracted from the @from plus an additional element that + * begins the sequence and specifies the integers count. + * + * Caller takes responsibility for freeing @array when it is no longer + * needed. + */ +int parse_int_array_user(const char __user *from, size_t count, int **array) +{ + int *ints, nints; + char *buf; + int ret = 0; + + buf = memdup_user_nul(from, count); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + get_options(buf, 0, &nints); + if (!nints) { + ret = -ENOENT; + goto free_buf; + } + + ints = kcalloc(nints + 1, sizeof(*ints), GFP_KERNEL); + if (!ints) { + ret = -ENOMEM; + goto free_buf; + } + + get_options(buf, nints + 1, ints); + *array = ints; + +free_buf: + kfree(buf); + return ret; +} +EXPORT_SYMBOL(parse_int_array_user); + static bool unescape_space(char **src, char **dst) { char *p = *dst, *q = *src; diff --git a/lib/strscpy_kunit.c b/lib/strscpy_kunit.c new file mode 100644 index 000000000000..a6b6344354ed --- /dev/null +++ b/lib/strscpy_kunit.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Kernel module for testing 'strscpy' family of functions. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <kunit/test.h> +#include <linux/string.h> + +/* + * tc() - Run a specific test case. + * @src: Source string, argument to strscpy_pad() + * @count: Size of destination buffer, argument to strscpy_pad() + * @expected: Expected return value from call to strscpy_pad() + * @terminator: 1 if there should be a terminating null byte 0 otherwise. + * @chars: Number of characters from the src string expected to be + * written to the dst buffer. + * @pad: Number of pad characters expected (in the tail of dst buffer). + * (@pad does not include the null terminator byte.) + * + * Calls strscpy_pad() and verifies the return value and state of the + * destination buffer after the call returns. + */ +static void tc(struct kunit *test, char *src, int count, int expected, + int chars, int terminator, int pad) +{ + int nr_bytes_poison; + int max_expected; + int max_count; + int written; + char buf[6]; + int index, i; + const char POISON = 'z'; + + KUNIT_ASSERT_TRUE_MSG(test, src != NULL, + "null source string not supported"); + + memset(buf, POISON, sizeof(buf)); + /* Future proofing test suite, validate args */ + max_count = sizeof(buf) - 2; /* Space for null and to verify overflow */ + max_expected = count - 1; /* Space for the null */ + + KUNIT_ASSERT_LE_MSG(test, count, max_count, + "count (%d) is too big (%d) ... aborting", count, max_count); + KUNIT_EXPECT_LE_MSG(test, expected, max_expected, + "expected (%d) is bigger than can possibly be returned (%d)", + expected, max_expected); + + written = strscpy_pad(buf, src, count); + KUNIT_ASSERT_EQ(test, written, expected); + + if (count && written == -E2BIG) { + KUNIT_ASSERT_EQ_MSG(test, 0, strncmp(buf, src, count - 1), + "buffer state invalid for -E2BIG"); + KUNIT_ASSERT_EQ_MSG(test, buf[count - 1], '\0', + "too big string is not null terminated correctly"); + } + + for (i = 0; i < chars; i++) + KUNIT_ASSERT_EQ_MSG(test, buf[i], src[i], + "buf[i]==%c != src[i]==%c", buf[i], src[i]); + + if (terminator) + KUNIT_ASSERT_EQ_MSG(test, buf[count - 1], '\0', + "string is not null terminated correctly"); + + for (i = 0; i < pad; i++) { + index = chars + terminator + i; + KUNIT_ASSERT_EQ_MSG(test, buf[index], '\0', + "padding missing at index: %d", i); + } + + nr_bytes_poison = sizeof(buf) - chars - terminator - pad; + for (i = 0; i < nr_bytes_poison; i++) { + index = sizeof(buf) - 1 - i; /* Check from the end back */ + KUNIT_ASSERT_EQ_MSG(test, buf[index], POISON, + "poison value missing at index: %d", i); + } +} + +static void strscpy_test(struct kunit *test) +{ + char dest[8]; + + /* + * tc() uses a destination buffer of size 6 and needs at + * least 2 characters spare (one for null and one to check for + * overflow). This means we should only call tc() with + * strings up to a maximum of 4 characters long and 'count' + * should not exceed 4. To test with longer strings increase + * the buffer size in tc(). + */ + + /* tc(test, src, count, expected, chars, terminator, pad) */ + tc(test, "a", 0, -E2BIG, 0, 0, 0); + tc(test, "", 0, -E2BIG, 0, 0, 0); + + tc(test, "a", 1, -E2BIG, 0, 1, 0); + tc(test, "", 1, 0, 0, 1, 0); + + tc(test, "ab", 2, -E2BIG, 1, 1, 0); + tc(test, "a", 2, 1, 1, 1, 0); + tc(test, "", 2, 0, 0, 1, 1); + + tc(test, "abc", 3, -E2BIG, 2, 1, 0); + tc(test, "ab", 3, 2, 2, 1, 0); + tc(test, "a", 3, 1, 1, 1, 1); + tc(test, "", 3, 0, 0, 1, 2); + + tc(test, "abcd", 4, -E2BIG, 3, 1, 0); + tc(test, "abc", 4, 3, 3, 1, 0); + tc(test, "ab", 4, 2, 2, 1, 1); + tc(test, "a", 4, 1, 1, 1, 2); + tc(test, "", 4, 0, 0, 1, 3); + + /* Compile-time-known source strings. */ + KUNIT_EXPECT_EQ(test, strscpy(dest, "", ARRAY_SIZE(dest)), 0); + KUNIT_EXPECT_EQ(test, strscpy(dest, "", 3), 0); + KUNIT_EXPECT_EQ(test, strscpy(dest, "", 1), 0); + KUNIT_EXPECT_EQ(test, strscpy(dest, "", 0), -E2BIG); + KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", ARRAY_SIZE(dest)), 5); + KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", 3), -E2BIG); + KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", 1), -E2BIG); + KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", 0), -E2BIG); + KUNIT_EXPECT_EQ(test, strscpy(dest, "This is too long", ARRAY_SIZE(dest)), -E2BIG); +} + +static struct kunit_case strscpy_test_cases[] = { + KUNIT_CASE(strscpy_test), + {} +}; + +static struct kunit_suite strscpy_test_suite = { + .name = "strscpy", + .test_cases = strscpy_test_cases, +}; + +kunit_test_suite(strscpy_test_suite); + +MODULE_AUTHOR("Tobin C. Harding <tobin@kernel.org>"); +MODULE_LICENSE("GPL"); diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c index 437d8e6b7cb1..41d3447bc3b4 100644 --- a/lib/test-string_helpers.c +++ b/lib/test-string_helpers.c @@ -587,7 +587,7 @@ static int __init test_string_helpers_init(void) for (i = 0; i < UNESCAPE_ALL_MASK + 1; i++) test_string_unescape("unescape", i, false); test_string_unescape("unescape inplace", - get_random_int() % (UNESCAPE_ANY + 1), true); + get_random_u32_below(UNESCAPE_ANY + 1), true); /* Without dictionary */ for (i = 0; i < ESCAPE_ALL_MASK + 1; i++) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 98754ff9fe68..a8005ad3bd58 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -16,6 +16,8 @@ #include "../tools/testing/selftests/kselftest_module.h" +#define EXP1_IN_BITS (sizeof(exp1) * 8) + KSTM_MODULE_GLOBALS(); static char pbl_buffer[PAGE_SIZE] __initdata; @@ -219,6 +221,47 @@ static void __init test_zero_clear(void) expect_eq_pbl("", bmap, 1024); } +static void __init test_find_nth_bit(void) +{ + unsigned long b, bit, cnt = 0; + DECLARE_BITMAP(bmap, 64 * 3); + + bitmap_zero(bmap, 64 * 3); + __set_bit(10, bmap); + __set_bit(20, bmap); + __set_bit(30, bmap); + __set_bit(40, bmap); + __set_bit(50, bmap); + __set_bit(60, bmap); + __set_bit(80, bmap); + __set_bit(123, bmap); + + expect_eq_uint(10, find_nth_bit(bmap, 64 * 3, 0)); + expect_eq_uint(20, find_nth_bit(bmap, 64 * 3, 1)); + expect_eq_uint(30, find_nth_bit(bmap, 64 * 3, 2)); + expect_eq_uint(40, find_nth_bit(bmap, 64 * 3, 3)); + expect_eq_uint(50, find_nth_bit(bmap, 64 * 3, 4)); + expect_eq_uint(60, find_nth_bit(bmap, 64 * 3, 5)); + expect_eq_uint(80, find_nth_bit(bmap, 64 * 3, 6)); + expect_eq_uint(123, find_nth_bit(bmap, 64 * 3, 7)); + expect_eq_uint(64 * 3, find_nth_bit(bmap, 64 * 3, 8)); + + expect_eq_uint(10, find_nth_bit(bmap, 64 * 3 - 1, 0)); + expect_eq_uint(20, find_nth_bit(bmap, 64 * 3 - 1, 1)); + expect_eq_uint(30, find_nth_bit(bmap, 64 * 3 - 1, 2)); + expect_eq_uint(40, find_nth_bit(bmap, 64 * 3 - 1, 3)); + expect_eq_uint(50, find_nth_bit(bmap, 64 * 3 - 1, 4)); + expect_eq_uint(60, find_nth_bit(bmap, 64 * 3 - 1, 5)); + expect_eq_uint(80, find_nth_bit(bmap, 64 * 3 - 1, 6)); + expect_eq_uint(123, find_nth_bit(bmap, 64 * 3 - 1, 7)); + expect_eq_uint(64 * 3 - 1, find_nth_bit(bmap, 64 * 3 - 1, 8)); + + for_each_set_bit(bit, exp1, EXP1_IN_BITS) { + b = find_nth_bit(exp1, EXP1_IN_BITS, cnt++); + expect_eq_uint(b, bit); + } +} + static void __init test_fill_set(void) { DECLARE_BITMAP(bmap, 1024); @@ -557,8 +600,6 @@ static void __init test_bitmap_parse(void) } } -#define EXP1_IN_BITS (sizeof(exp1) * 8) - static void __init test_bitmap_arr32(void) { unsigned int nbits, next_bit; @@ -685,6 +726,239 @@ static void __init test_for_each_set_clump8(void) expect_eq_clump8(start, CLUMP_EXP_NUMBITS, clump_exp, &clump); } +static void __init test_for_each_set_bit_wrap(void) +{ + DECLARE_BITMAP(orig, 500); + DECLARE_BITMAP(copy, 500); + unsigned int wr, bit; + + bitmap_zero(orig, 500); + + /* Set individual bits */ + for (bit = 0; bit < 500; bit += 10) + bitmap_set(orig, bit, 1); + + /* Set range of bits */ + bitmap_set(orig, 100, 50); + + for (wr = 0; wr < 500; wr++) { + bitmap_zero(copy, 500); + + for_each_set_bit_wrap(bit, orig, 500, wr) + bitmap_set(copy, bit, 1); + + expect_eq_bitmap(orig, copy, 500); + } +} + +static void __init test_for_each_set_bit(void) +{ + DECLARE_BITMAP(orig, 500); + DECLARE_BITMAP(copy, 500); + unsigned int bit; + + bitmap_zero(orig, 500); + bitmap_zero(copy, 500); + + /* Set individual bits */ + for (bit = 0; bit < 500; bit += 10) + bitmap_set(orig, bit, 1); + + /* Set range of bits */ + bitmap_set(orig, 100, 50); + + for_each_set_bit(bit, orig, 500) + bitmap_set(copy, bit, 1); + + expect_eq_bitmap(orig, copy, 500); +} + +static void __init test_for_each_set_bit_from(void) +{ + DECLARE_BITMAP(orig, 500); + DECLARE_BITMAP(copy, 500); + unsigned int wr, bit; + + bitmap_zero(orig, 500); + + /* Set individual bits */ + for (bit = 0; bit < 500; bit += 10) + bitmap_set(orig, bit, 1); + + /* Set range of bits */ + bitmap_set(orig, 100, 50); + + for (wr = 0; wr < 500; wr++) { + DECLARE_BITMAP(tmp, 500); + + bitmap_zero(copy, 500); + bit = wr; + + for_each_set_bit_from(bit, orig, 500) + bitmap_set(copy, bit, 1); + + bitmap_copy(tmp, orig, 500); + bitmap_clear(tmp, 0, wr); + expect_eq_bitmap(tmp, copy, 500); + } +} + +static void __init test_for_each_clear_bit(void) +{ + DECLARE_BITMAP(orig, 500); + DECLARE_BITMAP(copy, 500); + unsigned int bit; + + bitmap_fill(orig, 500); + bitmap_fill(copy, 500); + + /* Set individual bits */ + for (bit = 0; bit < 500; bit += 10) + bitmap_clear(orig, bit, 1); + + /* Set range of bits */ + bitmap_clear(orig, 100, 50); + + for_each_clear_bit(bit, orig, 500) + bitmap_clear(copy, bit, 1); + + expect_eq_bitmap(orig, copy, 500); +} + +static void __init test_for_each_clear_bit_from(void) +{ + DECLARE_BITMAP(orig, 500); + DECLARE_BITMAP(copy, 500); + unsigned int wr, bit; + + bitmap_fill(orig, 500); + + /* Set individual bits */ + for (bit = 0; bit < 500; bit += 10) + bitmap_clear(orig, bit, 1); + + /* Set range of bits */ + bitmap_clear(orig, 100, 50); + + for (wr = 0; wr < 500; wr++) { + DECLARE_BITMAP(tmp, 500); + + bitmap_fill(copy, 500); + bit = wr; + + for_each_clear_bit_from(bit, orig, 500) + bitmap_clear(copy, bit, 1); + + bitmap_copy(tmp, orig, 500); + bitmap_set(tmp, 0, wr); + expect_eq_bitmap(tmp, copy, 500); + } +} + +static void __init test_for_each_set_bitrange(void) +{ + DECLARE_BITMAP(orig, 500); + DECLARE_BITMAP(copy, 500); + unsigned int s, e; + + bitmap_zero(orig, 500); + bitmap_zero(copy, 500); + + /* Set individual bits */ + for (s = 0; s < 500; s += 10) + bitmap_set(orig, s, 1); + + /* Set range of bits */ + bitmap_set(orig, 100, 50); + + for_each_set_bitrange(s, e, orig, 500) + bitmap_set(copy, s, e-s); + + expect_eq_bitmap(orig, copy, 500); +} + +static void __init test_for_each_clear_bitrange(void) +{ + DECLARE_BITMAP(orig, 500); + DECLARE_BITMAP(copy, 500); + unsigned int s, e; + + bitmap_fill(orig, 500); + bitmap_fill(copy, 500); + + /* Set individual bits */ + for (s = 0; s < 500; s += 10) + bitmap_clear(orig, s, 1); + + /* Set range of bits */ + bitmap_clear(orig, 100, 50); + + for_each_clear_bitrange(s, e, orig, 500) + bitmap_clear(copy, s, e-s); + + expect_eq_bitmap(orig, copy, 500); +} + +static void __init test_for_each_set_bitrange_from(void) +{ + DECLARE_BITMAP(orig, 500); + DECLARE_BITMAP(copy, 500); + unsigned int wr, s, e; + + bitmap_zero(orig, 500); + + /* Set individual bits */ + for (s = 0; s < 500; s += 10) + bitmap_set(orig, s, 1); + + /* Set range of bits */ + bitmap_set(orig, 100, 50); + + for (wr = 0; wr < 500; wr++) { + DECLARE_BITMAP(tmp, 500); + + bitmap_zero(copy, 500); + s = wr; + + for_each_set_bitrange_from(s, e, orig, 500) + bitmap_set(copy, s, e - s); + + bitmap_copy(tmp, orig, 500); + bitmap_clear(tmp, 0, wr); + expect_eq_bitmap(tmp, copy, 500); + } +} + +static void __init test_for_each_clear_bitrange_from(void) +{ + DECLARE_BITMAP(orig, 500); + DECLARE_BITMAP(copy, 500); + unsigned int wr, s, e; + + bitmap_fill(orig, 500); + + /* Set individual bits */ + for (s = 0; s < 500; s += 10) + bitmap_clear(orig, s, 1); + + /* Set range of bits */ + bitmap_set(orig, 100, 50); + + for (wr = 0; wr < 500; wr++) { + DECLARE_BITMAP(tmp, 500); + + bitmap_fill(copy, 500); + s = wr; + + for_each_clear_bitrange_from(s, e, orig, 500) + bitmap_clear(copy, s, e - s); + + bitmap_copy(tmp, orig, 500); + bitmap_set(tmp, 0, wr); + expect_eq_bitmap(tmp, copy, 500); + } +} + struct test_bitmap_cut { unsigned int first; unsigned int cut; @@ -948,10 +1222,21 @@ static void __init selftest(void) test_bitmap_parselist(); test_bitmap_printlist(); test_mem_optimisations(); - test_for_each_set_clump8(); test_bitmap_cut(); test_bitmap_print_buf(); test_bitmap_const_eval(); + + test_find_nth_bit(); + test_for_each_set_bit(); + test_for_each_set_bit_from(); + test_for_each_clear_bit(); + test_for_each_clear_bit_from(); + test_for_each_set_bitrange(); + test_for_each_clear_bitrange(); + test_for_each_set_bitrange_from(); + test_for_each_clear_bitrange_from(); + test_for_each_set_clump8(); + test_for_each_set_bit_wrap(); } KSTM_MODULE_LOADERS(test_bitmap); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 5820704165a6..ade9ac672adb 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -14346,7 +14346,6 @@ static struct sk_buff *populate_skb(char *buf, int size) skb->hash = SKB_HASH; skb->queue_mapping = SKB_QUEUE_MAP; skb->vlan_tci = SKB_VLAN_TCI; - skb->vlan_present = SKB_VLAN_PRESENT; skb->vlan_proto = htons(ETH_P_IP); dev_net_set(&dev, &init_net); skb->dev = &dev; diff --git a/lib/test_cpumask.c b/lib/test_cpumask.c deleted file mode 100644 index a31a1622f1f6..000000000000 --- a/lib/test_cpumask.c +++ /dev/null @@ -1,138 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * KUnit tests for cpumask. - * - * Author: Sander Vanheule <sander@svanheule.net> - */ - -#include <kunit/test.h> -#include <linux/cpu.h> -#include <linux/cpumask.h> - -#define EXPECT_FOR_EACH_CPU_EQ(test, mask) \ - do { \ - const cpumask_t *m = (mask); \ - int mask_weight = cpumask_weight(m); \ - int cpu, iter = 0; \ - for_each_cpu(cpu, m) \ - iter++; \ - KUNIT_EXPECT_EQ((test), mask_weight, iter); \ - } while (0) - -#define EXPECT_FOR_EACH_CPU_NOT_EQ(test, mask) \ - do { \ - const cpumask_t *m = (mask); \ - int mask_weight = cpumask_weight(m); \ - int cpu, iter = 0; \ - for_each_cpu_not(cpu, m) \ - iter++; \ - KUNIT_EXPECT_EQ((test), nr_cpu_ids - mask_weight, iter); \ - } while (0) - -#define EXPECT_FOR_EACH_CPU_WRAP_EQ(test, mask) \ - do { \ - const cpumask_t *m = (mask); \ - int mask_weight = cpumask_weight(m); \ - int cpu, iter = 0; \ - for_each_cpu_wrap(cpu, m, nr_cpu_ids / 2) \ - iter++; \ - KUNIT_EXPECT_EQ((test), mask_weight, iter); \ - } while (0) - -#define EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, name) \ - do { \ - int mask_weight = num_##name##_cpus(); \ - int cpu, iter = 0; \ - for_each_##name##_cpu(cpu) \ - iter++; \ - KUNIT_EXPECT_EQ((test), mask_weight, iter); \ - } while (0) - -static cpumask_t mask_empty; -static cpumask_t mask_all; - -static void test_cpumask_weight(struct kunit *test) -{ - KUNIT_EXPECT_TRUE(test, cpumask_empty(&mask_empty)); - KUNIT_EXPECT_TRUE(test, cpumask_full(cpu_possible_mask)); - KUNIT_EXPECT_TRUE(test, cpumask_full(&mask_all)); - - KUNIT_EXPECT_EQ(test, 0, cpumask_weight(&mask_empty)); - KUNIT_EXPECT_EQ(test, nr_cpu_ids, cpumask_weight(cpu_possible_mask)); - KUNIT_EXPECT_EQ(test, nr_cpumask_bits, cpumask_weight(&mask_all)); -} - -static void test_cpumask_first(struct kunit *test) -{ - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first(&mask_empty)); - KUNIT_EXPECT_EQ(test, 0, cpumask_first(cpu_possible_mask)); - - KUNIT_EXPECT_EQ(test, 0, cpumask_first_zero(&mask_empty)); - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first_zero(cpu_possible_mask)); -} - -static void test_cpumask_last(struct kunit *test) -{ - KUNIT_EXPECT_LE(test, nr_cpumask_bits, cpumask_last(&mask_empty)); - KUNIT_EXPECT_EQ(test, nr_cpumask_bits - 1, cpumask_last(cpu_possible_mask)); -} - -static void test_cpumask_next(struct kunit *test) -{ - KUNIT_EXPECT_EQ(test, 0, cpumask_next_zero(-1, &mask_empty)); - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next_zero(-1, cpu_possible_mask)); - - KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next(-1, &mask_empty)); - KUNIT_EXPECT_EQ(test, 0, cpumask_next(-1, cpu_possible_mask)); -} - -static void test_cpumask_iterators(struct kunit *test) -{ - EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty); - EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty); - EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty); - - EXPECT_FOR_EACH_CPU_EQ(test, cpu_possible_mask); - EXPECT_FOR_EACH_CPU_NOT_EQ(test, cpu_possible_mask); - EXPECT_FOR_EACH_CPU_WRAP_EQ(test, cpu_possible_mask); -} - -static void test_cpumask_iterators_builtin(struct kunit *test) -{ - EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, possible); - - /* Ensure the dynamic masks are stable while running the tests */ - cpu_hotplug_disable(); - - EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, online); - EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, present); - - cpu_hotplug_enable(); -} - -static int test_cpumask_init(struct kunit *test) -{ - cpumask_clear(&mask_empty); - cpumask_setall(&mask_all); - - return 0; -} - -static struct kunit_case test_cpumask_cases[] = { - KUNIT_CASE(test_cpumask_weight), - KUNIT_CASE(test_cpumask_first), - KUNIT_CASE(test_cpumask_last), - KUNIT_CASE(test_cpumask_next), - KUNIT_CASE(test_cpumask_iterators), - KUNIT_CASE(test_cpumask_iterators_builtin), - {} -}; - -static struct kunit_suite test_cpumask_suite = { - .name = "cpumask", - .init = test_cpumask_init, - .test_cases = test_cpumask_cases, -}; -kunit_test_suite(test_cpumask_suite); - -MODULE_LICENSE("GPL"); diff --git a/lib/test_dynamic_debug.c b/lib/test_dynamic_debug.c new file mode 100644 index 000000000000..8dd250ad022b --- /dev/null +++ b/lib/test_dynamic_debug.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Kernel module for testing dynamic_debug + * + * Authors: + * Jim Cromie <jim.cromie@gmail.com> + */ + +#define pr_fmt(fmt) "test_dd: " fmt + +#include <linux/module.h> + +/* run tests by reading or writing sysfs node: do_prints */ + +static void do_prints(void); /* device under test */ +static int param_set_do_prints(const char *instr, const struct kernel_param *kp) +{ + do_prints(); + return 0; +} +static int param_get_do_prints(char *buffer, const struct kernel_param *kp) +{ + do_prints(); + return scnprintf(buffer, PAGE_SIZE, "did do_prints\n"); +} +static const struct kernel_param_ops param_ops_do_prints = { + .set = param_set_do_prints, + .get = param_get_do_prints, +}; +module_param_cb(do_prints, ¶m_ops_do_prints, NULL, 0600); + +/* + * Using the CLASSMAP api: + * - classmaps must have corresponding enum + * - enum symbols must match/correlate with class-name strings in the map. + * - base must equal enum's 1st value + * - multiple maps must set their base to share the 0-30 class_id space !! + * (build-bug-on tips welcome) + * Additionally, here: + * - tie together sysname, mapname, bitsname, flagsname + */ +#define DD_SYS_WRAP(_model, _flags) \ + static unsigned long bits_##_model; \ + static struct ddebug_class_param _flags##_model = { \ + .bits = &bits_##_model, \ + .flags = #_flags, \ + .map = &map_##_model, \ + }; \ + module_param_cb(_flags##_##_model, ¶m_ops_dyndbg_classes, &_flags##_model, 0600) + +/* numeric input, independent bits */ +enum cat_disjoint_bits { + D2_CORE = 0, + D2_DRIVER, + D2_KMS, + D2_PRIME, + D2_ATOMIC, + D2_VBL, + D2_STATE, + D2_LEASE, + D2_DP, + D2_DRMRES }; +DECLARE_DYNDBG_CLASSMAP(map_disjoint_bits, DD_CLASS_TYPE_DISJOINT_BITS, 0, + "D2_CORE", + "D2_DRIVER", + "D2_KMS", + "D2_PRIME", + "D2_ATOMIC", + "D2_VBL", + "D2_STATE", + "D2_LEASE", + "D2_DP", + "D2_DRMRES"); +DD_SYS_WRAP(disjoint_bits, p); +DD_SYS_WRAP(disjoint_bits, T); + +/* symbolic input, independent bits */ +enum cat_disjoint_names { LOW = 11, MID, HI }; +DECLARE_DYNDBG_CLASSMAP(map_disjoint_names, DD_CLASS_TYPE_DISJOINT_NAMES, 10, + "LOW", "MID", "HI"); +DD_SYS_WRAP(disjoint_names, p); +DD_SYS_WRAP(disjoint_names, T); + +/* numeric verbosity, V2 > V1 related */ +enum cat_level_num { V0 = 14, V1, V2, V3, V4, V5, V6, V7 }; +DECLARE_DYNDBG_CLASSMAP(map_level_num, DD_CLASS_TYPE_LEVEL_NUM, 14, + "V0", "V1", "V2", "V3", "V4", "V5", "V6", "V7"); +DD_SYS_WRAP(level_num, p); +DD_SYS_WRAP(level_num, T); + +/* symbolic verbosity */ +enum cat_level_names { L0 = 22, L1, L2, L3, L4, L5, L6, L7 }; +DECLARE_DYNDBG_CLASSMAP(map_level_names, DD_CLASS_TYPE_LEVEL_NAMES, 22, + "L0", "L1", "L2", "L3", "L4", "L5", "L6", "L7"); +DD_SYS_WRAP(level_names, p); +DD_SYS_WRAP(level_names, T); + +/* stand-in for all pr_debug etc */ +#define prdbg(SYM) __pr_debug_cls(SYM, #SYM " msg\n") + +static void do_cats(void) +{ + pr_debug("doing categories\n"); + + prdbg(LOW); + prdbg(MID); + prdbg(HI); + + prdbg(D2_CORE); + prdbg(D2_DRIVER); + prdbg(D2_KMS); + prdbg(D2_PRIME); + prdbg(D2_ATOMIC); + prdbg(D2_VBL); + prdbg(D2_STATE); + prdbg(D2_LEASE); + prdbg(D2_DP); + prdbg(D2_DRMRES); +} + +static void do_levels(void) +{ + pr_debug("doing levels\n"); + + prdbg(V1); + prdbg(V2); + prdbg(V3); + prdbg(V4); + prdbg(V5); + prdbg(V6); + prdbg(V7); + + prdbg(L1); + prdbg(L2); + prdbg(L3); + prdbg(L4); + prdbg(L5); + prdbg(L6); + prdbg(L7); +} + +static void do_prints(void) +{ + do_cats(); + do_levels(); +} + +static int __init test_dynamic_debug_init(void) +{ + pr_debug("init start\n"); + do_prints(); + pr_debug("init done\n"); + return 0; +} + +static void __exit test_dynamic_debug_exit(void) +{ + pr_debug("exited\n"); +} + +module_init(test_dynamic_debug_init); +module_exit(test_dynamic_debug_exit); + +MODULE_AUTHOR("Jim Cromie <jim.cromie@gmail.com>"); +MODULE_LICENSE("GPL"); diff --git a/lib/test_fprobe.c b/lib/test_fprobe.c index ed70637a2ffa..1fb56cf5e5ce 100644 --- a/lib/test_fprobe.c +++ b/lib/test_fprobe.c @@ -144,10 +144,7 @@ static unsigned long get_ftrace_location(void *func) static int fprobe_test_init(struct kunit *test) { - do { - rand1 = prandom_u32(); - } while (rand1 <= div_factor); - + rand1 = get_random_u32_above(div_factor); target = fprobe_selftest_target; target2 = fprobe_selftest_target2; target_ip = get_ftrace_location(target); diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c index 5144899d3c6b..b916801f23a8 100644 --- a/lib/test_hexdump.c +++ b/lib/test_hexdump.c @@ -149,7 +149,7 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize, static void __init test_hexdump_set(int rowsize, bool ascii) { size_t d = min_t(size_t, sizeof(data_b), rowsize); - size_t len = get_random_int() % d + 1; + size_t len = get_random_u32_inclusive(1, d); test_hexdump(len, rowsize, 4, ascii); test_hexdump(len, rowsize, 2, ascii); @@ -208,11 +208,11 @@ static void __init test_hexdump_overflow(size_t buflen, size_t len, static void __init test_hexdump_overflow_set(size_t buflen, bool ascii) { unsigned int i = 0; - int rs = (get_random_int() % 2 + 1) * 16; + int rs = get_random_u32_inclusive(1, 2) * 16; do { int gs = 1 << i; - size_t len = get_random_int() % rs + gs; + size_t len = get_random_u32_below(rs) + gs; test_hexdump_overflow(buflen, rounddown(len, gs), rs, gs, ascii); } while (i++ < 3); @@ -223,11 +223,11 @@ static int __init test_hexdump_init(void) unsigned int i; int rowsize; - rowsize = (get_random_int() % 2 + 1) * 16; + rowsize = get_random_u32_inclusive(1, 2) * 16; for (i = 0; i < 16; i++) test_hexdump_set(rowsize, false); - rowsize = (get_random_int() % 2 + 1) * 16; + rowsize = get_random_u32_inclusive(1, 2) * 16; for (i = 0; i < 16; i++) test_hexdump_set(rowsize, true); diff --git a/lib/test_hmm.c b/lib/test_hmm.c index e3965cafd27c..67e6f83fe0f8 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -100,6 +100,7 @@ struct dmirror { struct dmirror_chunk { struct dev_pagemap pagemap; struct dmirror_device *mdevice; + bool remove; }; /* @@ -107,8 +108,8 @@ struct dmirror_chunk { */ struct dmirror_device { struct cdev cdevice; - struct hmm_devmem *devmem; unsigned int zone_device_type; + struct device device; unsigned int devmem_capacity; unsigned int devmem_count; @@ -192,11 +193,15 @@ static int dmirror_fops_release(struct inode *inode, struct file *filp) return 0; } +static struct dmirror_chunk *dmirror_page_to_chunk(struct page *page) +{ + return container_of(page->pgmap, struct dmirror_chunk, pagemap); +} + static struct dmirror_device *dmirror_page_to_device(struct page *page) { - return container_of(page->pgmap, struct dmirror_chunk, - pagemap)->mdevice; + return dmirror_page_to_chunk(page)->mdevice; } static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range) @@ -627,8 +632,8 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) goto error; } + zone_device_page_init(dpage); dpage->zone_device_data = rpage; - lock_page(dpage); return dpage; error: @@ -907,7 +912,7 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror, struct vm_area_struct *vma; unsigned long src_pfns[64] = { 0 }; unsigned long dst_pfns[64] = { 0 }; - struct migrate_vma args; + struct migrate_vma args = { 0 }; unsigned long next; int ret; @@ -968,7 +973,7 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror, unsigned long src_pfns[64] = { 0 }; unsigned long dst_pfns[64] = { 0 }; struct dmirror_bounce bounce; - struct migrate_vma args; + struct migrate_vma args = { 0 }; unsigned long next; int ret; @@ -1218,6 +1223,85 @@ static int dmirror_snapshot(struct dmirror *dmirror, return ret; } +static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk) +{ + unsigned long start_pfn = chunk->pagemap.range.start >> PAGE_SHIFT; + unsigned long end_pfn = chunk->pagemap.range.end >> PAGE_SHIFT; + unsigned long npages = end_pfn - start_pfn + 1; + unsigned long i; + unsigned long *src_pfns; + unsigned long *dst_pfns; + + src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL); + dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL); + + migrate_device_range(src_pfns, start_pfn, npages); + for (i = 0; i < npages; i++) { + struct page *dpage, *spage; + + spage = migrate_pfn_to_page(src_pfns[i]); + if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) + continue; + + if (WARN_ON(!is_device_private_page(spage) && + !is_device_coherent_page(spage))) + continue; + spage = BACKING_PAGE(spage); + dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL); + lock_page(dpage); + copy_highpage(dpage, spage); + dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)); + if (src_pfns[i] & MIGRATE_PFN_WRITE) + dst_pfns[i] |= MIGRATE_PFN_WRITE; + } + migrate_device_pages(src_pfns, dst_pfns, npages); + migrate_device_finalize(src_pfns, dst_pfns, npages); + kfree(src_pfns); + kfree(dst_pfns); +} + +/* Removes free pages from the free list so they can't be re-allocated */ +static void dmirror_remove_free_pages(struct dmirror_chunk *devmem) +{ + struct dmirror_device *mdevice = devmem->mdevice; + struct page *page; + + for (page = mdevice->free_pages; page; page = page->zone_device_data) + if (dmirror_page_to_chunk(page) == devmem) + mdevice->free_pages = page->zone_device_data; +} + +static void dmirror_device_remove_chunks(struct dmirror_device *mdevice) +{ + unsigned int i; + + mutex_lock(&mdevice->devmem_lock); + if (mdevice->devmem_chunks) { + for (i = 0; i < mdevice->devmem_count; i++) { + struct dmirror_chunk *devmem = + mdevice->devmem_chunks[i]; + + spin_lock(&mdevice->lock); + devmem->remove = true; + dmirror_remove_free_pages(devmem); + spin_unlock(&mdevice->lock); + + dmirror_device_evict_chunk(devmem); + memunmap_pages(&devmem->pagemap); + if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE) + release_mem_region(devmem->pagemap.range.start, + range_len(&devmem->pagemap.range)); + kfree(devmem); + } + mdevice->devmem_count = 0; + mdevice->devmem_capacity = 0; + mdevice->free_pages = NULL; + kfree(mdevice->devmem_chunks); + mdevice->devmem_chunks = NULL; + } + mutex_unlock(&mdevice->devmem_lock); +} + static long dmirror_fops_unlocked_ioctl(struct file *filp, unsigned int command, unsigned long arg) @@ -1272,6 +1356,11 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp, ret = dmirror_snapshot(dmirror, &cmd); break; + case HMM_DMIRROR_RELEASE: + dmirror_device_remove_chunks(dmirror->mdevice); + ret = 0; + break; + default: return -EINVAL; } @@ -1326,15 +1415,19 @@ static void dmirror_devmem_free(struct page *page) mdevice = dmirror_page_to_device(page); spin_lock(&mdevice->lock); - mdevice->cfree++; - page->zone_device_data = mdevice->free_pages; - mdevice->free_pages = page; + + /* Return page to our allocator if not freeing the chunk */ + if (!dmirror_page_to_chunk(page)->remove) { + mdevice->cfree++; + page->zone_device_data = mdevice->free_pages; + mdevice->free_pages = page; + } spin_unlock(&mdevice->lock); } static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) { - struct migrate_vma args; + struct migrate_vma args = { 0 }; unsigned long src_pfns = 0; unsigned long dst_pfns = 0; struct page *rpage; @@ -1357,6 +1450,7 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) args.dst = &dst_pfns; args.pgmap_owner = dmirror->mdevice; args.flags = dmirror_select_device(dmirror); + args.fault_page = vmf->page; if (migrate_vma_setup(&args)) return VM_FAULT_SIGBUS; @@ -1390,7 +1484,14 @@ static int dmirror_device_init(struct dmirror_device *mdevice, int id) cdev_init(&mdevice->cdevice, &dmirror_fops); mdevice->cdevice.owner = THIS_MODULE; - ret = cdev_add(&mdevice->cdevice, dev, 1); + device_initialize(&mdevice->device); + mdevice->device.devt = dev; + + ret = dev_set_name(&mdevice->device, "hmm_dmirror%u", id); + if (ret) + return ret; + + ret = cdev_device_add(&mdevice->cdevice, &mdevice->device); if (ret) return ret; @@ -1400,23 +1501,8 @@ static int dmirror_device_init(struct dmirror_device *mdevice, int id) static void dmirror_device_remove(struct dmirror_device *mdevice) { - unsigned int i; - - if (mdevice->devmem_chunks) { - for (i = 0; i < mdevice->devmem_count; i++) { - struct dmirror_chunk *devmem = - mdevice->devmem_chunks[i]; - - memunmap_pages(&devmem->pagemap); - if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE) - release_mem_region(devmem->pagemap.range.start, - range_len(&devmem->pagemap.range)); - kfree(devmem); - } - kfree(mdevice->devmem_chunks); - } - - cdev_del(&mdevice->cdevice); + dmirror_device_remove_chunks(mdevice); + cdev_device_del(&mdevice->cdevice, &mdevice->device); } static int __init hmm_dmirror_init(void) diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h index e31d58c9034a..8c818a2cf4f6 100644 --- a/lib/test_hmm_uapi.h +++ b/lib/test_hmm_uapi.h @@ -36,6 +36,7 @@ struct hmm_dmirror_cmd { #define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x04, struct hmm_dmirror_cmd) #define HMM_DMIRROR_EXCLUSIVE _IOWR('H', 0x05, struct hmm_dmirror_cmd) #define HMM_DMIRROR_CHECK_EXCLUSIVE _IOWR('H', 0x06, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_RELEASE _IOWR('H', 0x07, struct hmm_dmirror_cmd) /* * Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT. diff --git a/lib/test_kasan.c b/lib/test_kasan.c deleted file mode 100644 index 58c1b01ccfe2..000000000000 --- a/lib/test_kasan.c +++ /dev/null @@ -1,1426 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * - * Copyright (c) 2014 Samsung Electronics Co., Ltd. - * Author: Andrey Ryabinin <a.ryabinin@samsung.com> - */ - -#include <linux/bitops.h> -#include <linux/delay.h> -#include <linux/kasan.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/mman.h> -#include <linux/module.h> -#include <linux/printk.h> -#include <linux/random.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/uaccess.h> -#include <linux/io.h> -#include <linux/vmalloc.h> -#include <linux/set_memory.h> - -#include <asm/page.h> - -#include <kunit/test.h> - -#include "../mm/kasan/kasan.h" - -#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_GRANULE_SIZE) - -/* - * Some tests use these global variables to store return values from function - * calls that could otherwise be eliminated by the compiler as dead code. - */ -void *kasan_ptr_result; -int kasan_int_result; - -static struct kunit_resource resource; -static struct kunit_kasan_status test_status; -static bool multishot; - -/* - * Temporarily enable multi-shot mode. Otherwise, KASAN would only report the - * first detected bug and panic the kernel if panic_on_warn is enabled. For - * hardware tag-based KASAN also allow tag checking to be reenabled for each - * test, see the comment for KUNIT_EXPECT_KASAN_FAIL(). - */ -static int kasan_test_init(struct kunit *test) -{ - if (!kasan_enabled()) { - kunit_err(test, "can't run KASAN tests with KASAN disabled"); - return -1; - } - - multishot = kasan_save_enable_multi_shot(); - test_status.report_found = false; - test_status.sync_fault = false; - kunit_add_named_resource(test, NULL, NULL, &resource, - "kasan_status", &test_status); - return 0; -} - -static void kasan_test_exit(struct kunit *test) -{ - kasan_restore_multi_shot(multishot); - KUNIT_EXPECT_FALSE(test, test_status.report_found); -} - -/** - * KUNIT_EXPECT_KASAN_FAIL() - check that the executed expression produces a - * KASAN report; causes a test failure otherwise. This relies on a KUnit - * resource named "kasan_status". Do not use this name for KUnit resources - * outside of KASAN tests. - * - * For hardware tag-based KASAN, when a synchronous tag fault happens, tag - * checking is auto-disabled. When this happens, this test handler reenables - * tag checking. As tag checking can be only disabled or enabled per CPU, - * this handler disables migration (preemption). - * - * Since the compiler doesn't see that the expression can change the test_status - * fields, it can reorder or optimize away the accesses to those fields. - * Use READ/WRITE_ONCE() for the accesses and compiler barriers around the - * expression to prevent that. - * - * In between KUNIT_EXPECT_KASAN_FAIL checks, test_status.report_found is kept - * as false. This allows detecting KASAN reports that happen outside of the - * checks by asserting !test_status.report_found at the start of - * KUNIT_EXPECT_KASAN_FAIL and in kasan_test_exit. - */ -#define KUNIT_EXPECT_KASAN_FAIL(test, expression) do { \ - if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \ - kasan_sync_fault_possible()) \ - migrate_disable(); \ - KUNIT_EXPECT_FALSE(test, READ_ONCE(test_status.report_found)); \ - barrier(); \ - expression; \ - barrier(); \ - if (kasan_async_fault_possible()) \ - kasan_force_async_fault(); \ - if (!READ_ONCE(test_status.report_found)) { \ - KUNIT_FAIL(test, KUNIT_SUBTEST_INDENT "KASAN failure " \ - "expected in \"" #expression \ - "\", but none occurred"); \ - } \ - if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \ - kasan_sync_fault_possible()) { \ - if (READ_ONCE(test_status.report_found) && \ - READ_ONCE(test_status.sync_fault)) \ - kasan_enable_tagging(); \ - migrate_enable(); \ - } \ - WRITE_ONCE(test_status.report_found, false); \ -} while (0) - -#define KASAN_TEST_NEEDS_CONFIG_ON(test, config) do { \ - if (!IS_ENABLED(config)) \ - kunit_skip((test), "Test requires " #config "=y"); \ -} while (0) - -#define KASAN_TEST_NEEDS_CONFIG_OFF(test, config) do { \ - if (IS_ENABLED(config)) \ - kunit_skip((test), "Test requires " #config "=n"); \ -} while (0) - -static void kmalloc_oob_right(struct kunit *test) -{ - char *ptr; - size_t size = 128 - KASAN_GRANULE_SIZE - 5; - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - OPTIMIZER_HIDE_VAR(ptr); - /* - * An unaligned access past the requested kmalloc size. - * Only generic KASAN can precisely detect these. - */ - if (IS_ENABLED(CONFIG_KASAN_GENERIC)) - KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 'x'); - - /* - * An aligned access into the first out-of-bounds granule that falls - * within the aligned kmalloc object. - */ - KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + 5] = 'y'); - - /* Out-of-bounds access past the aligned kmalloc object. */ - KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = - ptr[size + KASAN_GRANULE_SIZE + 5]); - - kfree(ptr); -} - -static void kmalloc_oob_left(struct kunit *test) -{ - char *ptr; - size_t size = 15; - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - OPTIMIZER_HIDE_VAR(ptr); - KUNIT_EXPECT_KASAN_FAIL(test, *ptr = *(ptr - 1)); - kfree(ptr); -} - -static void kmalloc_node_oob_right(struct kunit *test) -{ - char *ptr; - size_t size = 4096; - - ptr = kmalloc_node(size, GFP_KERNEL, 0); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - OPTIMIZER_HIDE_VAR(ptr); - KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = ptr[size]); - kfree(ptr); -} - -/* - * These kmalloc_pagealloc_* tests try allocating a memory chunk that doesn't - * fit into a slab cache and therefore is allocated via the page allocator - * fallback. Since this kind of fallback is only implemented for SLUB, these - * tests are limited to that allocator. - */ -static void kmalloc_pagealloc_oob_right(struct kunit *test) -{ - char *ptr; - size_t size = KMALLOC_MAX_CACHE_SIZE + 10; - - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB); - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - OPTIMIZER_HIDE_VAR(ptr); - KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + OOB_TAG_OFF] = 0); - - kfree(ptr); -} - -static void kmalloc_pagealloc_uaf(struct kunit *test) -{ - char *ptr; - size_t size = KMALLOC_MAX_CACHE_SIZE + 10; - - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB); - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - kfree(ptr); - - KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]); -} - -static void kmalloc_pagealloc_invalid_free(struct kunit *test) -{ - char *ptr; - size_t size = KMALLOC_MAX_CACHE_SIZE + 10; - - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB); - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - KUNIT_EXPECT_KASAN_FAIL(test, kfree(ptr + 1)); -} - -static void pagealloc_oob_right(struct kunit *test) -{ - char *ptr; - struct page *pages; - size_t order = 4; - size_t size = (1UL << (PAGE_SHIFT + order)); - - /* - * With generic KASAN page allocations have no redzones, thus - * out-of-bounds detection is not guaranteed. - * See https://bugzilla.kernel.org/show_bug.cgi?id=210503. - */ - KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC); - - pages = alloc_pages(GFP_KERNEL, order); - ptr = page_address(pages); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = ptr[size]); - free_pages((unsigned long)ptr, order); -} - -static void pagealloc_uaf(struct kunit *test) -{ - char *ptr; - struct page *pages; - size_t order = 4; - - pages = alloc_pages(GFP_KERNEL, order); - ptr = page_address(pages); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - free_pages((unsigned long)ptr, order); - - KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]); -} - -static void kmalloc_large_oob_right(struct kunit *test) -{ - char *ptr; - size_t size = KMALLOC_MAX_CACHE_SIZE - 256; - - /* - * Allocate a chunk that is large enough, but still fits into a slab - * and does not trigger the page allocator fallback in SLUB. - */ - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - OPTIMIZER_HIDE_VAR(ptr); - KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0); - kfree(ptr); -} - -static void krealloc_more_oob_helper(struct kunit *test, - size_t size1, size_t size2) -{ - char *ptr1, *ptr2; - size_t middle; - - KUNIT_ASSERT_LT(test, size1, size2); - middle = size1 + (size2 - size1) / 2; - - ptr1 = kmalloc(size1, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); - - ptr2 = krealloc(ptr1, size2, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2); - - /* All offsets up to size2 must be accessible. */ - ptr2[size1 - 1] = 'x'; - ptr2[size1] = 'x'; - ptr2[middle] = 'x'; - ptr2[size2 - 1] = 'x'; - - /* Generic mode is precise, so unaligned size2 must be inaccessible. */ - if (IS_ENABLED(CONFIG_KASAN_GENERIC)) - KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2] = 'x'); - - /* For all modes first aligned offset after size2 must be inaccessible. */ - KUNIT_EXPECT_KASAN_FAIL(test, - ptr2[round_up(size2, KASAN_GRANULE_SIZE)] = 'x'); - - kfree(ptr2); -} - -static void krealloc_less_oob_helper(struct kunit *test, - size_t size1, size_t size2) -{ - char *ptr1, *ptr2; - size_t middle; - - KUNIT_ASSERT_LT(test, size2, size1); - middle = size2 + (size1 - size2) / 2; - - ptr1 = kmalloc(size1, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); - - ptr2 = krealloc(ptr1, size2, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2); - - /* Must be accessible for all modes. */ - ptr2[size2 - 1] = 'x'; - - /* Generic mode is precise, so unaligned size2 must be inaccessible. */ - if (IS_ENABLED(CONFIG_KASAN_GENERIC)) - KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2] = 'x'); - - /* For all modes first aligned offset after size2 must be inaccessible. */ - KUNIT_EXPECT_KASAN_FAIL(test, - ptr2[round_up(size2, KASAN_GRANULE_SIZE)] = 'x'); - - /* - * For all modes all size2, middle, and size1 should land in separate - * granules and thus the latter two offsets should be inaccessible. - */ - KUNIT_EXPECT_LE(test, round_up(size2, KASAN_GRANULE_SIZE), - round_down(middle, KASAN_GRANULE_SIZE)); - KUNIT_EXPECT_LE(test, round_up(middle, KASAN_GRANULE_SIZE), - round_down(size1, KASAN_GRANULE_SIZE)); - KUNIT_EXPECT_KASAN_FAIL(test, ptr2[middle] = 'x'); - KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size1 - 1] = 'x'); - KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size1] = 'x'); - - kfree(ptr2); -} - -static void krealloc_more_oob(struct kunit *test) -{ - krealloc_more_oob_helper(test, 201, 235); -} - -static void krealloc_less_oob(struct kunit *test) -{ - krealloc_less_oob_helper(test, 235, 201); -} - -static void krealloc_pagealloc_more_oob(struct kunit *test) -{ - /* page_alloc fallback in only implemented for SLUB. */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB); - - krealloc_more_oob_helper(test, KMALLOC_MAX_CACHE_SIZE + 201, - KMALLOC_MAX_CACHE_SIZE + 235); -} - -static void krealloc_pagealloc_less_oob(struct kunit *test) -{ - /* page_alloc fallback in only implemented for SLUB. */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB); - - krealloc_less_oob_helper(test, KMALLOC_MAX_CACHE_SIZE + 235, - KMALLOC_MAX_CACHE_SIZE + 201); -} - -/* - * Check that krealloc() detects a use-after-free, returns NULL, - * and doesn't unpoison the freed object. - */ -static void krealloc_uaf(struct kunit *test) -{ - char *ptr1, *ptr2; - int size1 = 201; - int size2 = 235; - - ptr1 = kmalloc(size1, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); - kfree(ptr1); - - KUNIT_EXPECT_KASAN_FAIL(test, ptr2 = krealloc(ptr1, size2, GFP_KERNEL)); - KUNIT_ASSERT_NULL(test, ptr2); - KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)ptr1); -} - -static void kmalloc_oob_16(struct kunit *test) -{ - struct { - u64 words[2]; - } *ptr1, *ptr2; - - /* This test is specifically crafted for the generic mode. */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); - - ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); - - ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2); - - OPTIMIZER_HIDE_VAR(ptr1); - OPTIMIZER_HIDE_VAR(ptr2); - KUNIT_EXPECT_KASAN_FAIL(test, *ptr1 = *ptr2); - kfree(ptr1); - kfree(ptr2); -} - -static void kmalloc_uaf_16(struct kunit *test) -{ - struct { - u64 words[2]; - } *ptr1, *ptr2; - - ptr1 = kmalloc(sizeof(*ptr1), GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); - - ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2); - kfree(ptr2); - - KUNIT_EXPECT_KASAN_FAIL(test, *ptr1 = *ptr2); - kfree(ptr1); -} - -/* - * Note: in the memset tests below, the written range touches both valid and - * invalid memory. This makes sure that the instrumentation does not only check - * the starting address but the whole range. - */ - -static void kmalloc_oob_memset_2(struct kunit *test) -{ - char *ptr; - size_t size = 128 - KASAN_GRANULE_SIZE; - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - OPTIMIZER_HIDE_VAR(size); - KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 1, 0, 2)); - kfree(ptr); -} - -static void kmalloc_oob_memset_4(struct kunit *test) -{ - char *ptr; - size_t size = 128 - KASAN_GRANULE_SIZE; - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - OPTIMIZER_HIDE_VAR(size); - KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 3, 0, 4)); - kfree(ptr); -} - -static void kmalloc_oob_memset_8(struct kunit *test) -{ - char *ptr; - size_t size = 128 - KASAN_GRANULE_SIZE; - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - OPTIMIZER_HIDE_VAR(size); - KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 7, 0, 8)); - kfree(ptr); -} - -static void kmalloc_oob_memset_16(struct kunit *test) -{ - char *ptr; - size_t size = 128 - KASAN_GRANULE_SIZE; - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - OPTIMIZER_HIDE_VAR(size); - KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 15, 0, 16)); - kfree(ptr); -} - -static void kmalloc_oob_in_memset(struct kunit *test) -{ - char *ptr; - size_t size = 128 - KASAN_GRANULE_SIZE; - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - OPTIMIZER_HIDE_VAR(ptr); - OPTIMIZER_HIDE_VAR(size); - KUNIT_EXPECT_KASAN_FAIL(test, - memset(ptr, 0, size + KASAN_GRANULE_SIZE)); - kfree(ptr); -} - -static void kmalloc_memmove_negative_size(struct kunit *test) -{ - char *ptr; - size_t size = 64; - size_t invalid_size = -2; - - /* - * Hardware tag-based mode doesn't check memmove for negative size. - * As a result, this test introduces a side-effect memory corruption, - * which can result in a crash. - */ - KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_HW_TAGS); - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - memset((char *)ptr, 0, 64); - OPTIMIZER_HIDE_VAR(ptr); - OPTIMIZER_HIDE_VAR(invalid_size); - KUNIT_EXPECT_KASAN_FAIL(test, - memmove((char *)ptr, (char *)ptr + 4, invalid_size)); - kfree(ptr); -} - -static void kmalloc_memmove_invalid_size(struct kunit *test) -{ - char *ptr; - size_t size = 64; - volatile size_t invalid_size = size; - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - memset((char *)ptr, 0, 64); - OPTIMIZER_HIDE_VAR(ptr); - KUNIT_EXPECT_KASAN_FAIL(test, - memmove((char *)ptr, (char *)ptr + 4, invalid_size)); - kfree(ptr); -} - -static void kmalloc_uaf(struct kunit *test) -{ - char *ptr; - size_t size = 10; - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - kfree(ptr); - KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[8]); -} - -static void kmalloc_uaf_memset(struct kunit *test) -{ - char *ptr; - size_t size = 33; - - /* - * Only generic KASAN uses quarantine, which is required to avoid a - * kernel memory corruption this test causes. - */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - kfree(ptr); - KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr, 0, size)); -} - -static void kmalloc_uaf2(struct kunit *test) -{ - char *ptr1, *ptr2; - size_t size = 43; - int counter = 0; - -again: - ptr1 = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); - - kfree(ptr1); - - ptr2 = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2); - - /* - * For tag-based KASAN ptr1 and ptr2 tags might happen to be the same. - * Allow up to 16 attempts at generating different tags. - */ - if (!IS_ENABLED(CONFIG_KASAN_GENERIC) && ptr1 == ptr2 && counter++ < 16) { - kfree(ptr2); - goto again; - } - - KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr1)[40]); - KUNIT_EXPECT_PTR_NE(test, ptr1, ptr2); - - kfree(ptr2); -} - -static void kfree_via_page(struct kunit *test) -{ - char *ptr; - size_t size = 8; - struct page *page; - unsigned long offset; - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - page = virt_to_page(ptr); - offset = offset_in_page(ptr); - kfree(page_address(page) + offset); -} - -static void kfree_via_phys(struct kunit *test) -{ - char *ptr; - size_t size = 8; - phys_addr_t phys; - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - phys = virt_to_phys(ptr); - kfree(phys_to_virt(phys)); -} - -static void kmem_cache_oob(struct kunit *test) -{ - char *p; - size_t size = 200; - struct kmem_cache *cache; - - cache = kmem_cache_create("test_cache", size, 0, 0, NULL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache); - - p = kmem_cache_alloc(cache, GFP_KERNEL); - if (!p) { - kunit_err(test, "Allocation failed: %s\n", __func__); - kmem_cache_destroy(cache); - return; - } - - KUNIT_EXPECT_KASAN_FAIL(test, *p = p[size + OOB_TAG_OFF]); - - kmem_cache_free(cache, p); - kmem_cache_destroy(cache); -} - -static void kmem_cache_accounted(struct kunit *test) -{ - int i; - char *p; - size_t size = 200; - struct kmem_cache *cache; - - cache = kmem_cache_create("test_cache", size, 0, SLAB_ACCOUNT, NULL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache); - - /* - * Several allocations with a delay to allow for lazy per memcg kmem - * cache creation. - */ - for (i = 0; i < 5; i++) { - p = kmem_cache_alloc(cache, GFP_KERNEL); - if (!p) - goto free_cache; - - kmem_cache_free(cache, p); - msleep(100); - } - -free_cache: - kmem_cache_destroy(cache); -} - -static void kmem_cache_bulk(struct kunit *test) -{ - struct kmem_cache *cache; - size_t size = 200; - char *p[10]; - bool ret; - int i; - - cache = kmem_cache_create("test_cache", size, 0, 0, NULL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache); - - ret = kmem_cache_alloc_bulk(cache, GFP_KERNEL, ARRAY_SIZE(p), (void **)&p); - if (!ret) { - kunit_err(test, "Allocation failed: %s\n", __func__); - kmem_cache_destroy(cache); - return; - } - - for (i = 0; i < ARRAY_SIZE(p); i++) - p[i][0] = p[i][size - 1] = 42; - - kmem_cache_free_bulk(cache, ARRAY_SIZE(p), (void **)&p); - kmem_cache_destroy(cache); -} - -static char global_array[10]; - -static void kasan_global_oob_right(struct kunit *test) -{ - /* - * Deliberate out-of-bounds access. To prevent CONFIG_UBSAN_LOCAL_BOUNDS - * from failing here and panicking the kernel, access the array via a - * volatile pointer, which will prevent the compiler from being able to - * determine the array bounds. - * - * This access uses a volatile pointer to char (char *volatile) rather - * than the more conventional pointer to volatile char (volatile char *) - * because we want to prevent the compiler from making inferences about - * the pointer itself (i.e. its array bounds), not the data that it - * refers to. - */ - char *volatile array = global_array; - char *p = &array[ARRAY_SIZE(global_array) + 3]; - - /* Only generic mode instruments globals. */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); - - KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p); -} - -static void kasan_global_oob_left(struct kunit *test) -{ - char *volatile array = global_array; - char *p = array - 3; - - /* - * GCC is known to fail this test, skip it. - * See https://bugzilla.kernel.org/show_bug.cgi?id=215051. - */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_CC_IS_CLANG); - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); - KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p); -} - -/* Check that ksize() makes the whole object accessible. */ -static void ksize_unpoisons_memory(struct kunit *test) -{ - char *ptr; - size_t size = 123, real_size; - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - real_size = ksize(ptr); - - OPTIMIZER_HIDE_VAR(ptr); - - /* This access shouldn't trigger a KASAN report. */ - ptr[size] = 'x'; - - /* This one must. */ - KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[real_size]); - - kfree(ptr); -} - -/* - * Check that a use-after-free is detected by ksize() and via normal accesses - * after it. - */ -static void ksize_uaf(struct kunit *test) -{ - char *ptr; - int size = 128 - KASAN_GRANULE_SIZE; - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - kfree(ptr); - - OPTIMIZER_HIDE_VAR(ptr); - KUNIT_EXPECT_KASAN_FAIL(test, ksize(ptr)); - KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]); - KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[size]); -} - -static void kasan_stack_oob(struct kunit *test) -{ - char stack_array[10]; - /* See comment in kasan_global_oob_right. */ - char *volatile array = stack_array; - char *p = &array[ARRAY_SIZE(stack_array) + OOB_TAG_OFF]; - - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_STACK); - - KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p); -} - -static void kasan_alloca_oob_left(struct kunit *test) -{ - volatile int i = 10; - char alloca_array[i]; - /* See comment in kasan_global_oob_right. */ - char *volatile array = alloca_array; - char *p = array - 1; - - /* Only generic mode instruments dynamic allocas. */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_STACK); - - KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p); -} - -static void kasan_alloca_oob_right(struct kunit *test) -{ - volatile int i = 10; - char alloca_array[i]; - /* See comment in kasan_global_oob_right. */ - char *volatile array = alloca_array; - char *p = array + i; - - /* Only generic mode instruments dynamic allocas. */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_STACK); - - KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p); -} - -static void kmem_cache_double_free(struct kunit *test) -{ - char *p; - size_t size = 200; - struct kmem_cache *cache; - - cache = kmem_cache_create("test_cache", size, 0, 0, NULL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache); - - p = kmem_cache_alloc(cache, GFP_KERNEL); - if (!p) { - kunit_err(test, "Allocation failed: %s\n", __func__); - kmem_cache_destroy(cache); - return; - } - - kmem_cache_free(cache, p); - KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_free(cache, p)); - kmem_cache_destroy(cache); -} - -static void kmem_cache_invalid_free(struct kunit *test) -{ - char *p; - size_t size = 200; - struct kmem_cache *cache; - - cache = kmem_cache_create("test_cache", size, 0, SLAB_TYPESAFE_BY_RCU, - NULL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache); - - p = kmem_cache_alloc(cache, GFP_KERNEL); - if (!p) { - kunit_err(test, "Allocation failed: %s\n", __func__); - kmem_cache_destroy(cache); - return; - } - - /* Trigger invalid free, the object doesn't get freed. */ - KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_free(cache, p + 1)); - - /* - * Properly free the object to prevent the "Objects remaining in - * test_cache on __kmem_cache_shutdown" BUG failure. - */ - kmem_cache_free(cache, p); - - kmem_cache_destroy(cache); -} - -static void empty_cache_ctor(void *object) { } - -static void kmem_cache_double_destroy(struct kunit *test) -{ - struct kmem_cache *cache; - - /* Provide a constructor to prevent cache merging. */ - cache = kmem_cache_create("test_cache", 200, 0, 0, empty_cache_ctor); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache); - kmem_cache_destroy(cache); - KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_destroy(cache)); -} - -static void kasan_memchr(struct kunit *test) -{ - char *ptr; - size_t size = 24; - - /* - * str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT. - * See https://bugzilla.kernel.org/show_bug.cgi?id=206337 for details. - */ - KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_AMD_MEM_ENCRYPT); - - if (OOB_TAG_OFF) - size = round_up(size, OOB_TAG_OFF); - - ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - OPTIMIZER_HIDE_VAR(ptr); - OPTIMIZER_HIDE_VAR(size); - KUNIT_EXPECT_KASAN_FAIL(test, - kasan_ptr_result = memchr(ptr, '1', size + 1)); - - kfree(ptr); -} - -static void kasan_memcmp(struct kunit *test) -{ - char *ptr; - size_t size = 24; - int arr[9]; - - /* - * str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT. - * See https://bugzilla.kernel.org/show_bug.cgi?id=206337 for details. - */ - KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_AMD_MEM_ENCRYPT); - - if (OOB_TAG_OFF) - size = round_up(size, OOB_TAG_OFF); - - ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - memset(arr, 0, sizeof(arr)); - - OPTIMIZER_HIDE_VAR(ptr); - OPTIMIZER_HIDE_VAR(size); - KUNIT_EXPECT_KASAN_FAIL(test, - kasan_int_result = memcmp(ptr, arr, size+1)); - kfree(ptr); -} - -static void kasan_strings(struct kunit *test) -{ - char *ptr; - size_t size = 24; - - /* - * str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT. - * See https://bugzilla.kernel.org/show_bug.cgi?id=206337 for details. - */ - KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_AMD_MEM_ENCRYPT); - - ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - kfree(ptr); - - /* - * Try to cause only 1 invalid access (less spam in dmesg). - * For that we need ptr to point to zeroed byte. - * Skip metadata that could be stored in freed object so ptr - * will likely point to zeroed byte. - */ - ptr += 16; - KUNIT_EXPECT_KASAN_FAIL(test, kasan_ptr_result = strchr(ptr, '1')); - - KUNIT_EXPECT_KASAN_FAIL(test, kasan_ptr_result = strrchr(ptr, '1')); - - KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strcmp(ptr, "2")); - - KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strncmp(ptr, "2", 1)); - - KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strlen(ptr)); - - KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strnlen(ptr, 1)); -} - -static void kasan_bitops_modify(struct kunit *test, int nr, void *addr) -{ - KUNIT_EXPECT_KASAN_FAIL(test, set_bit(nr, addr)); - KUNIT_EXPECT_KASAN_FAIL(test, __set_bit(nr, addr)); - KUNIT_EXPECT_KASAN_FAIL(test, clear_bit(nr, addr)); - KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit(nr, addr)); - KUNIT_EXPECT_KASAN_FAIL(test, clear_bit_unlock(nr, addr)); - KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit_unlock(nr, addr)); - KUNIT_EXPECT_KASAN_FAIL(test, change_bit(nr, addr)); - KUNIT_EXPECT_KASAN_FAIL(test, __change_bit(nr, addr)); -} - -static void kasan_bitops_test_and_modify(struct kunit *test, int nr, void *addr) -{ - KUNIT_EXPECT_KASAN_FAIL(test, test_and_set_bit(nr, addr)); - KUNIT_EXPECT_KASAN_FAIL(test, __test_and_set_bit(nr, addr)); - KUNIT_EXPECT_KASAN_FAIL(test, test_and_set_bit_lock(nr, addr)); - KUNIT_EXPECT_KASAN_FAIL(test, test_and_clear_bit(nr, addr)); - KUNIT_EXPECT_KASAN_FAIL(test, __test_and_clear_bit(nr, addr)); - KUNIT_EXPECT_KASAN_FAIL(test, test_and_change_bit(nr, addr)); - KUNIT_EXPECT_KASAN_FAIL(test, __test_and_change_bit(nr, addr)); - KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = test_bit(nr, addr)); - -#if defined(clear_bit_unlock_is_negative_byte) - KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = - clear_bit_unlock_is_negative_byte(nr, addr)); -#endif -} - -static void kasan_bitops_generic(struct kunit *test) -{ - long *bits; - - /* This test is specifically crafted for the generic mode. */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); - - /* - * Allocate 1 more byte, which causes kzalloc to round up to 16 bytes; - * this way we do not actually corrupt other memory. - */ - bits = kzalloc(sizeof(*bits) + 1, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits); - - /* - * Below calls try to access bit within allocated memory; however, the - * below accesses are still out-of-bounds, since bitops are defined to - * operate on the whole long the bit is in. - */ - kasan_bitops_modify(test, BITS_PER_LONG, bits); - - /* - * Below calls try to access bit beyond allocated memory. - */ - kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, bits); - - kfree(bits); -} - -static void kasan_bitops_tags(struct kunit *test) -{ - long *bits; - - /* This test is specifically crafted for tag-based modes. */ - KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC); - - /* kmalloc-64 cache will be used and the last 16 bytes will be the redzone. */ - bits = kzalloc(48, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits); - - /* Do the accesses past the 48 allocated bytes, but within the redone. */ - kasan_bitops_modify(test, BITS_PER_LONG, (void *)bits + 48); - kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, (void *)bits + 48); - - kfree(bits); -} - -static void kmalloc_double_kzfree(struct kunit *test) -{ - char *ptr; - size_t size = 16; - - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - kfree_sensitive(ptr); - KUNIT_EXPECT_KASAN_FAIL(test, kfree_sensitive(ptr)); -} - -static void vmalloc_helpers_tags(struct kunit *test) -{ - void *ptr; - - /* This test is intended for tag-based modes. */ - KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC); - - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_VMALLOC); - - ptr = vmalloc(PAGE_SIZE); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - /* Check that the returned pointer is tagged. */ - KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN); - KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL); - - /* Make sure exported vmalloc helpers handle tagged pointers. */ - KUNIT_ASSERT_TRUE(test, is_vmalloc_addr(ptr)); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vmalloc_to_page(ptr)); - -#if !IS_MODULE(CONFIG_KASAN_KUNIT_TEST) - { - int rv; - - /* Make sure vmalloc'ed memory permissions can be changed. */ - rv = set_memory_ro((unsigned long)ptr, 1); - KUNIT_ASSERT_GE(test, rv, 0); - rv = set_memory_rw((unsigned long)ptr, 1); - KUNIT_ASSERT_GE(test, rv, 0); - } -#endif - - vfree(ptr); -} - -static void vmalloc_oob(struct kunit *test) -{ - char *v_ptr, *p_ptr; - struct page *page; - size_t size = PAGE_SIZE / 2 - KASAN_GRANULE_SIZE - 5; - - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_VMALLOC); - - v_ptr = vmalloc(size); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_ptr); - - OPTIMIZER_HIDE_VAR(v_ptr); - - /* - * We have to be careful not to hit the guard page in vmalloc tests. - * The MMU will catch that and crash us. - */ - - /* Make sure in-bounds accesses are valid. */ - v_ptr[0] = 0; - v_ptr[size - 1] = 0; - - /* - * An unaligned access past the requested vmalloc size. - * Only generic KASAN can precisely detect these. - */ - if (IS_ENABLED(CONFIG_KASAN_GENERIC)) - KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)v_ptr)[size]); - - /* An aligned access into the first out-of-bounds granule. */ - KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)v_ptr)[size + 5]); - - /* Check that in-bounds accesses to the physical page are valid. */ - page = vmalloc_to_page(v_ptr); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, page); - p_ptr = page_address(page); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_ptr); - p_ptr[0] = 0; - - vfree(v_ptr); - - /* - * We can't check for use-after-unmap bugs in this nor in the following - * vmalloc tests, as the page might be fully unmapped and accessing it - * will crash the kernel. - */ -} - -static void vmap_tags(struct kunit *test) -{ - char *p_ptr, *v_ptr; - struct page *p_page, *v_page; - - /* - * This test is specifically crafted for the software tag-based mode, - * the only tag-based mode that poisons vmap mappings. - */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS); - - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_VMALLOC); - - p_page = alloc_pages(GFP_KERNEL, 1); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_page); - p_ptr = page_address(p_page); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_ptr); - - v_ptr = vmap(&p_page, 1, VM_MAP, PAGE_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_ptr); - - /* - * We can't check for out-of-bounds bugs in this nor in the following - * vmalloc tests, as allocations have page granularity and accessing - * the guard page will crash the kernel. - */ - - KUNIT_EXPECT_GE(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_MIN); - KUNIT_EXPECT_LT(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_KERNEL); - - /* Make sure that in-bounds accesses through both pointers work. */ - *p_ptr = 0; - *v_ptr = 0; - - /* Make sure vmalloc_to_page() correctly recovers the page pointer. */ - v_page = vmalloc_to_page(v_ptr); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_page); - KUNIT_EXPECT_PTR_EQ(test, p_page, v_page); - - vunmap(v_ptr); - free_pages((unsigned long)p_ptr, 1); -} - -static void vm_map_ram_tags(struct kunit *test) -{ - char *p_ptr, *v_ptr; - struct page *page; - - /* - * This test is specifically crafted for the software tag-based mode, - * the only tag-based mode that poisons vm_map_ram mappings. - */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS); - - page = alloc_pages(GFP_KERNEL, 1); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, page); - p_ptr = page_address(page); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_ptr); - - v_ptr = vm_map_ram(&page, 1, -1); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_ptr); - - KUNIT_EXPECT_GE(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_MIN); - KUNIT_EXPECT_LT(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_KERNEL); - - /* Make sure that in-bounds accesses through both pointers work. */ - *p_ptr = 0; - *v_ptr = 0; - - vm_unmap_ram(v_ptr, 1); - free_pages((unsigned long)p_ptr, 1); -} - -static void vmalloc_percpu(struct kunit *test) -{ - char __percpu *ptr; - int cpu; - - /* - * This test is specifically crafted for the software tag-based mode, - * the only tag-based mode that poisons percpu mappings. - */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS); - - ptr = __alloc_percpu(PAGE_SIZE, PAGE_SIZE); - - for_each_possible_cpu(cpu) { - char *c_ptr = per_cpu_ptr(ptr, cpu); - - KUNIT_EXPECT_GE(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_MIN); - KUNIT_EXPECT_LT(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_KERNEL); - - /* Make sure that in-bounds accesses don't crash the kernel. */ - *c_ptr = 0; - } - - free_percpu(ptr); -} - -/* - * Check that the assigned pointer tag falls within the [KASAN_TAG_MIN, - * KASAN_TAG_KERNEL) range (note: excluding the match-all tag) for tag-based - * modes. - */ -static void match_all_not_assigned(struct kunit *test) -{ - char *ptr; - struct page *pages; - int i, size, order; - - KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC); - - for (i = 0; i < 256; i++) { - size = (get_random_int() % 1024) + 1; - ptr = kmalloc(size, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN); - KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL); - kfree(ptr); - } - - for (i = 0; i < 256; i++) { - order = (get_random_int() % 4) + 1; - pages = alloc_pages(GFP_KERNEL, order); - ptr = page_address(pages); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN); - KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL); - free_pages((unsigned long)ptr, order); - } - - if (!IS_ENABLED(CONFIG_KASAN_VMALLOC)) - return; - - for (i = 0; i < 256; i++) { - size = (get_random_int() % 1024) + 1; - ptr = vmalloc(size); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN); - KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL); - vfree(ptr); - } -} - -/* Check that 0xff works as a match-all pointer tag for tag-based modes. */ -static void match_all_ptr_tag(struct kunit *test) -{ - char *ptr; - u8 tag; - - KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC); - - ptr = kmalloc(128, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - - /* Backup the assigned tag. */ - tag = get_tag(ptr); - KUNIT_EXPECT_NE(test, tag, (u8)KASAN_TAG_KERNEL); - - /* Reset the tag to 0xff.*/ - ptr = set_tag(ptr, KASAN_TAG_KERNEL); - - /* This access shouldn't trigger a KASAN report. */ - *ptr = 0; - - /* Recover the pointer tag and free. */ - ptr = set_tag(ptr, tag); - kfree(ptr); -} - -/* Check that there are no match-all memory tags for tag-based modes. */ -static void match_all_mem_tag(struct kunit *test) -{ - char *ptr; - int tag; - - KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC); - - ptr = kmalloc(128, GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - KUNIT_EXPECT_NE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL); - - /* For each possible tag value not matching the pointer tag. */ - for (tag = KASAN_TAG_MIN; tag <= KASAN_TAG_KERNEL; tag++) { - if (tag == get_tag(ptr)) - continue; - - /* Mark the first memory granule with the chosen memory tag. */ - kasan_poison(ptr, KASAN_GRANULE_SIZE, (u8)tag, false); - - /* This access must cause a KASAN report. */ - KUNIT_EXPECT_KASAN_FAIL(test, *ptr = 0); - } - - /* Recover the memory tag and free. */ - kasan_poison(ptr, KASAN_GRANULE_SIZE, get_tag(ptr), false); - kfree(ptr); -} - -static struct kunit_case kasan_kunit_test_cases[] = { - KUNIT_CASE(kmalloc_oob_right), - KUNIT_CASE(kmalloc_oob_left), - KUNIT_CASE(kmalloc_node_oob_right), - KUNIT_CASE(kmalloc_pagealloc_oob_right), - KUNIT_CASE(kmalloc_pagealloc_uaf), - KUNIT_CASE(kmalloc_pagealloc_invalid_free), - KUNIT_CASE(pagealloc_oob_right), - KUNIT_CASE(pagealloc_uaf), - KUNIT_CASE(kmalloc_large_oob_right), - KUNIT_CASE(krealloc_more_oob), - KUNIT_CASE(krealloc_less_oob), - KUNIT_CASE(krealloc_pagealloc_more_oob), - KUNIT_CASE(krealloc_pagealloc_less_oob), - KUNIT_CASE(krealloc_uaf), - KUNIT_CASE(kmalloc_oob_16), - KUNIT_CASE(kmalloc_uaf_16), - KUNIT_CASE(kmalloc_oob_in_memset), - KUNIT_CASE(kmalloc_oob_memset_2), - KUNIT_CASE(kmalloc_oob_memset_4), - KUNIT_CASE(kmalloc_oob_memset_8), - KUNIT_CASE(kmalloc_oob_memset_16), - KUNIT_CASE(kmalloc_memmove_negative_size), - KUNIT_CASE(kmalloc_memmove_invalid_size), - KUNIT_CASE(kmalloc_uaf), - KUNIT_CASE(kmalloc_uaf_memset), - KUNIT_CASE(kmalloc_uaf2), - KUNIT_CASE(kfree_via_page), - KUNIT_CASE(kfree_via_phys), - KUNIT_CASE(kmem_cache_oob), - KUNIT_CASE(kmem_cache_accounted), - KUNIT_CASE(kmem_cache_bulk), - KUNIT_CASE(kasan_global_oob_right), - KUNIT_CASE(kasan_global_oob_left), - KUNIT_CASE(kasan_stack_oob), - KUNIT_CASE(kasan_alloca_oob_left), - KUNIT_CASE(kasan_alloca_oob_right), - KUNIT_CASE(ksize_unpoisons_memory), - KUNIT_CASE(ksize_uaf), - KUNIT_CASE(kmem_cache_double_free), - KUNIT_CASE(kmem_cache_invalid_free), - KUNIT_CASE(kmem_cache_double_destroy), - KUNIT_CASE(kasan_memchr), - KUNIT_CASE(kasan_memcmp), - KUNIT_CASE(kasan_strings), - KUNIT_CASE(kasan_bitops_generic), - KUNIT_CASE(kasan_bitops_tags), - KUNIT_CASE(kmalloc_double_kzfree), - KUNIT_CASE(vmalloc_helpers_tags), - KUNIT_CASE(vmalloc_oob), - KUNIT_CASE(vmap_tags), - KUNIT_CASE(vm_map_ram_tags), - KUNIT_CASE(vmalloc_percpu), - KUNIT_CASE(match_all_not_assigned), - KUNIT_CASE(match_all_ptr_tag), - KUNIT_CASE(match_all_mem_tag), - {} -}; - -static struct kunit_suite kasan_kunit_test_suite = { - .name = "kasan", - .init = kasan_test_init, - .test_cases = kasan_kunit_test_cases, - .exit = kasan_test_exit, -}; - -kunit_test_suite(kasan_kunit_test_suite); - -MODULE_LICENSE("GPL"); diff --git a/lib/test_kasan_module.c b/lib/test_kasan_module.c deleted file mode 100644 index b112cbc835e9..000000000000 --- a/lib/test_kasan_module.c +++ /dev/null @@ -1,141 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * - * Copyright (c) 2014 Samsung Electronics Co., Ltd. - * Author: Andrey Ryabinin <a.ryabinin@samsung.com> - */ - -#define pr_fmt(fmt) "kasan test: %s " fmt, __func__ - -#include <linux/mman.h> -#include <linux/module.h> -#include <linux/printk.h> -#include <linux/slab.h> -#include <linux/uaccess.h> - -#include "../mm/kasan/kasan.h" - -static noinline void __init copy_user_test(void) -{ - char *kmem; - char __user *usermem; - size_t size = 128 - KASAN_GRANULE_SIZE; - int __maybe_unused unused; - - kmem = kmalloc(size, GFP_KERNEL); - if (!kmem) - return; - - usermem = (char __user *)vm_mmap(NULL, 0, PAGE_SIZE, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_ANONYMOUS | MAP_PRIVATE, 0); - if (IS_ERR(usermem)) { - pr_err("Failed to allocate user memory\n"); - kfree(kmem); - return; - } - - OPTIMIZER_HIDE_VAR(size); - - pr_info("out-of-bounds in copy_from_user()\n"); - unused = copy_from_user(kmem, usermem, size + 1); - - pr_info("out-of-bounds in copy_to_user()\n"); - unused = copy_to_user(usermem, kmem, size + 1); - - pr_info("out-of-bounds in __copy_from_user()\n"); - unused = __copy_from_user(kmem, usermem, size + 1); - - pr_info("out-of-bounds in __copy_to_user()\n"); - unused = __copy_to_user(usermem, kmem, size + 1); - - pr_info("out-of-bounds in __copy_from_user_inatomic()\n"); - unused = __copy_from_user_inatomic(kmem, usermem, size + 1); - - pr_info("out-of-bounds in __copy_to_user_inatomic()\n"); - unused = __copy_to_user_inatomic(usermem, kmem, size + 1); - - pr_info("out-of-bounds in strncpy_from_user()\n"); - unused = strncpy_from_user(kmem, usermem, size + 1); - - vm_munmap((unsigned long)usermem, PAGE_SIZE); - kfree(kmem); -} - -static struct kasan_rcu_info { - int i; - struct rcu_head rcu; -} *global_rcu_ptr; - -static noinline void __init kasan_rcu_reclaim(struct rcu_head *rp) -{ - struct kasan_rcu_info *fp = container_of(rp, - struct kasan_rcu_info, rcu); - - kfree(fp); - ((volatile struct kasan_rcu_info *)fp)->i; -} - -static noinline void __init kasan_rcu_uaf(void) -{ - struct kasan_rcu_info *ptr; - - pr_info("use-after-free in kasan_rcu_reclaim\n"); - ptr = kmalloc(sizeof(struct kasan_rcu_info), GFP_KERNEL); - if (!ptr) { - pr_err("Allocation failed\n"); - return; - } - - global_rcu_ptr = rcu_dereference_protected(ptr, NULL); - call_rcu(&global_rcu_ptr->rcu, kasan_rcu_reclaim); -} - -static noinline void __init kasan_workqueue_work(struct work_struct *work) -{ - kfree(work); -} - -static noinline void __init kasan_workqueue_uaf(void) -{ - struct workqueue_struct *workqueue; - struct work_struct *work; - - workqueue = create_workqueue("kasan_wq_test"); - if (!workqueue) { - pr_err("Allocation failed\n"); - return; - } - work = kmalloc(sizeof(struct work_struct), GFP_KERNEL); - if (!work) { - pr_err("Allocation failed\n"); - return; - } - - INIT_WORK(work, kasan_workqueue_work); - queue_work(workqueue, work); - destroy_workqueue(workqueue); - - pr_info("use-after-free on workqueue\n"); - ((volatile struct work_struct *)work)->data; -} - -static int __init test_kasan_module_init(void) -{ - /* - * Temporarily enable multi-shot mode. Otherwise, KASAN would only - * report the first detected bug and panic the kernel if panic_on_warn - * is enabled. - */ - bool multishot = kasan_save_enable_multi_shot(); - - copy_user_test(); - kasan_rcu_uaf(); - kasan_workqueue_uaf(); - - kasan_restore_multi_shot(multishot); - return -EAGAIN; -} - -module_init(test_kasan_module_init); -MODULE_LICENSE("GPL"); diff --git a/lib/test_kprobes.c b/lib/test_kprobes.c index a5edc2ebc947..1c95e5719802 100644 --- a/lib/test_kprobes.c +++ b/lib/test_kprobes.c @@ -339,10 +339,7 @@ static int kprobes_test_init(struct kunit *test) stacktrace_target = kprobe_stacktrace_target; internal_target = kprobe_stacktrace_internal_target; stacktrace_driver = kprobe_stacktrace_driver; - - do { - rand1 = prandom_u32(); - } while (rand1 <= div_factor); + rand1 = get_random_u32_above(div_factor); return 0; } diff --git a/lib/test_linear_ranges.c b/lib/test_linear_ranges.c index 676e0b8abcdd..c18f9c0f1f25 100644 --- a/lib/test_linear_ranges.c +++ b/lib/test_linear_ranges.c @@ -107,17 +107,8 @@ static const unsigned int range2_vals[] = { RANGE2_MIN, RANGE2_MIN + #define SMALLEST_VAL RANGE1_MIN static struct linear_range testr[] = { - { - .min = RANGE1_MIN, - .min_sel = RANGE1_MIN_SEL, - .max_sel = RANGE1_MAX_SEL, - .step = RANGE1_STEP, - }, { - .min = RANGE2_MIN, - .min_sel = RANGE2_MIN_SEL, - .max_sel = RANGE2_MAX_SEL, - .step = RANGE2_STEP - }, + LINEAR_RANGE(RANGE1_MIN, RANGE1_MIN_SEL, RANGE1_MAX_SEL, RANGE1_STEP), + LINEAR_RANGE(RANGE2_MIN, RANGE2_MIN_SEL, RANGE2_MAX_SEL, RANGE2_STEP), }; static void range_test_get_value(struct kunit *test) diff --git a/lib/test_list_sort.c b/lib/test_list_sort.c index ade7a1ea0c8e..cc5f335f29b5 100644 --- a/lib/test_list_sort.c +++ b/lib/test_list_sort.c @@ -71,7 +71,7 @@ static void list_sort_test(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, el); /* force some equivalencies */ - el->value = prandom_u32() % (TEST_LIST_LEN / 3); + el->value = get_random_u32_below(TEST_LIST_LEN / 3); el->serial = i; el->poison1 = TEST_POISON1; el->poison2 = TEST_POISON2; diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c new file mode 100644 index 000000000000..f425f169ef08 --- /dev/null +++ b/lib/test_maple_tree.c @@ -0,0 +1,2767 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * test_maple_tree.c: Test the maple tree API + * Copyright (c) 2018-2022 Oracle Corporation + * Author: Liam R. Howlett <Liam.Howlett@Oracle.com> + * + * Any tests that only require the interface of the tree. + */ + +#include <linux/maple_tree.h> +#include <linux/module.h> + +#define MTREE_ALLOC_MAX 0x2000000000000Ul +#ifndef CONFIG_DEBUG_MAPLE_TREE +#define CONFIG_DEBUG_MAPLE_TREE +#endif +#define CONFIG_MAPLE_SEARCH +#define MAPLE_32BIT (MAPLE_NODE_SLOTS > 31) + +/* #define BENCH_SLOT_STORE */ +/* #define BENCH_NODE_STORE */ +/* #define BENCH_AWALK */ +/* #define BENCH_WALK */ +/* #define BENCH_MT_FOR_EACH */ +/* #define BENCH_FORK */ + +#ifdef __KERNEL__ +#define mt_set_non_kernel(x) do {} while (0) +#define mt_zero_nr_tallocated(x) do {} while (0) +#else +#define cond_resched() do {} while (0) +#endif +static +int mtree_insert_index(struct maple_tree *mt, unsigned long index, gfp_t gfp) +{ + return mtree_insert(mt, index, xa_mk_value(index & LONG_MAX), gfp); +} + +static void mtree_erase_index(struct maple_tree *mt, unsigned long index) +{ + MT_BUG_ON(mt, mtree_erase(mt, index) != xa_mk_value(index & LONG_MAX)); + MT_BUG_ON(mt, mtree_load(mt, index) != NULL); +} + +static int mtree_test_insert(struct maple_tree *mt, unsigned long index, + void *ptr) +{ + return mtree_insert(mt, index, ptr, GFP_KERNEL); +} + +static int mtree_test_store_range(struct maple_tree *mt, unsigned long start, + unsigned long end, void *ptr) +{ + return mtree_store_range(mt, start, end, ptr, GFP_KERNEL); +} + +static int mtree_test_store(struct maple_tree *mt, unsigned long start, + void *ptr) +{ + return mtree_test_store_range(mt, start, start, ptr); +} + +static int mtree_test_insert_range(struct maple_tree *mt, unsigned long start, + unsigned long end, void *ptr) +{ + return mtree_insert_range(mt, start, end, ptr, GFP_KERNEL); +} + +static void *mtree_test_load(struct maple_tree *mt, unsigned long index) +{ + return mtree_load(mt, index); +} + +static void *mtree_test_erase(struct maple_tree *mt, unsigned long index) +{ + return mtree_erase(mt, index); +} + +#if defined(CONFIG_64BIT) +static noinline void check_mtree_alloc_range(struct maple_tree *mt, + unsigned long start, unsigned long end, unsigned long size, + unsigned long expected, int eret, void *ptr) +{ + + unsigned long result = expected + 1; + int ret; + + ret = mtree_alloc_range(mt, &result, ptr, size, start, end, + GFP_KERNEL); + MT_BUG_ON(mt, ret != eret); + if (ret) + return; + + MT_BUG_ON(mt, result != expected); +} + +static noinline void check_mtree_alloc_rrange(struct maple_tree *mt, + unsigned long start, unsigned long end, unsigned long size, + unsigned long expected, int eret, void *ptr) +{ + + unsigned long result = expected + 1; + int ret; + + ret = mtree_alloc_rrange(mt, &result, ptr, size, start, end - 1, + GFP_KERNEL); + MT_BUG_ON(mt, ret != eret); + if (ret) + return; + + MT_BUG_ON(mt, result != expected); +} +#endif + +static noinline void check_load(struct maple_tree *mt, unsigned long index, + void *ptr) +{ + void *ret = mtree_test_load(mt, index); + + if (ret != ptr) + pr_err("Load %lu returned %p expect %p\n", index, ret, ptr); + MT_BUG_ON(mt, ret != ptr); +} + +static noinline void check_store_range(struct maple_tree *mt, + unsigned long start, unsigned long end, void *ptr, int expected) +{ + int ret = -EINVAL; + unsigned long i; + + ret = mtree_test_store_range(mt, start, end, ptr); + MT_BUG_ON(mt, ret != expected); + + if (ret) + return; + + for (i = start; i <= end; i++) + check_load(mt, i, ptr); +} + +static noinline void check_insert_range(struct maple_tree *mt, + unsigned long start, unsigned long end, void *ptr, int expected) +{ + int ret = -EINVAL; + unsigned long i; + + ret = mtree_test_insert_range(mt, start, end, ptr); + MT_BUG_ON(mt, ret != expected); + + if (ret) + return; + + for (i = start; i <= end; i++) + check_load(mt, i, ptr); +} + +static noinline void check_insert(struct maple_tree *mt, unsigned long index, + void *ptr) +{ + int ret = -EINVAL; + + ret = mtree_test_insert(mt, index, ptr); + MT_BUG_ON(mt, ret != 0); +} + +static noinline void check_dup_insert(struct maple_tree *mt, + unsigned long index, void *ptr) +{ + int ret = -EINVAL; + + ret = mtree_test_insert(mt, index, ptr); + MT_BUG_ON(mt, ret != -EEXIST); +} + + +static noinline +void check_index_load(struct maple_tree *mt, unsigned long index) +{ + return check_load(mt, index, xa_mk_value(index & LONG_MAX)); +} + +static inline int not_empty(struct maple_node *node) +{ + int i; + + if (node->parent) + return 1; + + for (i = 0; i < ARRAY_SIZE(node->slot); i++) + if (node->slot[i]) + return 1; + + return 0; +} + + +static noinline void check_rev_seq(struct maple_tree *mt, unsigned long max, + bool verbose) +{ + unsigned long i = max, j; + + MT_BUG_ON(mt, !mtree_empty(mt)); + + mt_zero_nr_tallocated(); + while (i) { + MT_BUG_ON(mt, mtree_insert_index(mt, i, GFP_KERNEL)); + for (j = i; j <= max; j++) + check_index_load(mt, j); + + check_load(mt, i - 1, NULL); + mt_set_in_rcu(mt); + MT_BUG_ON(mt, !mt_height(mt)); + mt_clear_in_rcu(mt); + MT_BUG_ON(mt, !mt_height(mt)); + i--; + } + check_load(mt, max + 1, NULL); + +#ifndef __KERNEL__ + if (verbose) { + rcu_barrier(); + mt_dump(mt); + pr_info(" %s test of 0-%lu %luK in %d active (%d total)\n", + __func__, max, mt_get_alloc_size()/1024, mt_nr_allocated(), + mt_nr_tallocated()); + } +#endif +} + +static noinline void check_seq(struct maple_tree *mt, unsigned long max, + bool verbose) +{ + unsigned long i, j; + + MT_BUG_ON(mt, !mtree_empty(mt)); + + mt_zero_nr_tallocated(); + for (i = 0; i <= max; i++) { + MT_BUG_ON(mt, mtree_insert_index(mt, i, GFP_KERNEL)); + for (j = 0; j <= i; j++) + check_index_load(mt, j); + + if (i) + MT_BUG_ON(mt, !mt_height(mt)); + check_load(mt, i + 1, NULL); + } + +#ifndef __KERNEL__ + if (verbose) { + rcu_barrier(); + mt_dump(mt); + pr_info(" seq test of 0-%lu %luK in %d active (%d total)\n", + max, mt_get_alloc_size()/1024, mt_nr_allocated(), + mt_nr_tallocated()); + } +#endif +} + +static noinline void check_lb_not_empty(struct maple_tree *mt) +{ + unsigned long i, j; + unsigned long huge = 4000UL * 1000 * 1000; + + + i = huge; + while (i > 4096) { + check_insert(mt, i, (void *) i); + for (j = huge; j >= i; j /= 2) { + check_load(mt, j-1, NULL); + check_load(mt, j, (void *) j); + check_load(mt, j+1, NULL); + } + i /= 2; + } + mtree_destroy(mt); +} + +static noinline void check_lower_bound_split(struct maple_tree *mt) +{ + MT_BUG_ON(mt, !mtree_empty(mt)); + check_lb_not_empty(mt); +} + +static noinline void check_upper_bound_split(struct maple_tree *mt) +{ + unsigned long i, j; + unsigned long huge; + + MT_BUG_ON(mt, !mtree_empty(mt)); + + if (MAPLE_32BIT) + huge = 2147483647UL; + else + huge = 4000UL * 1000 * 1000; + + i = 4096; + while (i < huge) { + check_insert(mt, i, (void *) i); + for (j = i; j >= huge; j *= 2) { + check_load(mt, j-1, NULL); + check_load(mt, j, (void *) j); + check_load(mt, j+1, NULL); + } + i *= 2; + } + mtree_destroy(mt); +} + +static noinline void check_mid_split(struct maple_tree *mt) +{ + unsigned long huge = 8000UL * 1000 * 1000; + + check_insert(mt, huge, (void *) huge); + check_insert(mt, 0, xa_mk_value(0)); + check_lb_not_empty(mt); +} + +static noinline void check_rev_find(struct maple_tree *mt) +{ + int i, nr_entries = 200; + void *val; + MA_STATE(mas, mt, 0, 0); + + for (i = 0; i <= nr_entries; i++) + mtree_store_range(mt, i*10, i*10 + 5, + xa_mk_value(i), GFP_KERNEL); + + rcu_read_lock(); + mas_set(&mas, 1000); + val = mas_find_rev(&mas, 1000); + MT_BUG_ON(mt, val != xa_mk_value(100)); + val = mas_find_rev(&mas, 1000); + MT_BUG_ON(mt, val != NULL); + + mas_set(&mas, 999); + val = mas_find_rev(&mas, 997); + MT_BUG_ON(mt, val != NULL); + + mas_set(&mas, 1000); + val = mas_find_rev(&mas, 900); + MT_BUG_ON(mt, val != xa_mk_value(100)); + val = mas_find_rev(&mas, 900); + MT_BUG_ON(mt, val != xa_mk_value(99)); + + mas_set(&mas, 20); + val = mas_find_rev(&mas, 0); + MT_BUG_ON(mt, val != xa_mk_value(2)); + val = mas_find_rev(&mas, 0); + MT_BUG_ON(mt, val != xa_mk_value(1)); + val = mas_find_rev(&mas, 0); + MT_BUG_ON(mt, val != xa_mk_value(0)); + val = mas_find_rev(&mas, 0); + MT_BUG_ON(mt, val != NULL); + rcu_read_unlock(); +} + +static noinline void check_find(struct maple_tree *mt) +{ + unsigned long val = 0; + unsigned long count; + unsigned long max; + unsigned long top; + unsigned long last = 0, index = 0; + void *entry, *entry2; + + MA_STATE(mas, mt, 0, 0); + + /* Insert 0. */ + MT_BUG_ON(mt, mtree_insert_index(mt, val++, GFP_KERNEL)); + +#if defined(CONFIG_64BIT) + top = 4398046511104UL; +#else + top = ULONG_MAX; +#endif + + if (MAPLE_32BIT) { + count = 15; + } else { + count = 20; + } + + for (int i = 0; i <= count; i++) { + if (val != 64) + MT_BUG_ON(mt, mtree_insert_index(mt, val, GFP_KERNEL)); + else + MT_BUG_ON(mt, mtree_insert(mt, val, + XA_ZERO_ENTRY, GFP_KERNEL)); + + val <<= 2; + } + + val = 0; + mas_set(&mas, val); + mas_lock(&mas); + while ((entry = mas_find(&mas, 268435456)) != NULL) { + if (val != 64) + MT_BUG_ON(mt, xa_mk_value(val) != entry); + else + MT_BUG_ON(mt, entry != XA_ZERO_ENTRY); + + val <<= 2; + /* For zero check. */ + if (!val) + val = 1; + } + mas_unlock(&mas); + + val = 0; + mas_set(&mas, val); + mas_lock(&mas); + mas_for_each(&mas, entry, ULONG_MAX) { + if (val != 64) + MT_BUG_ON(mt, xa_mk_value(val) != entry); + else + MT_BUG_ON(mt, entry != XA_ZERO_ENTRY); + val <<= 2; + /* For zero check. */ + if (!val) + val = 1; + } + mas_unlock(&mas); + + /* Test mas_pause */ + val = 0; + mas_set(&mas, val); + mas_lock(&mas); + mas_for_each(&mas, entry, ULONG_MAX) { + if (val != 64) + MT_BUG_ON(mt, xa_mk_value(val) != entry); + else + MT_BUG_ON(mt, entry != XA_ZERO_ENTRY); + val <<= 2; + /* For zero check. */ + if (!val) + val = 1; + + mas_pause(&mas); + mas_unlock(&mas); + mas_lock(&mas); + } + mas_unlock(&mas); + + val = 0; + max = 300; /* A value big enough to include XA_ZERO_ENTRY at 64. */ + mt_for_each(mt, entry, index, max) { + MT_BUG_ON(mt, xa_mk_value(val) != entry); + val <<= 2; + if (val == 64) /* Skip zero entry. */ + val <<= 2; + /* For zero check. */ + if (!val) + val = 1; + } + + val = 0; + max = 0; + index = 0; + MT_BUG_ON(mt, mtree_insert_index(mt, ULONG_MAX, GFP_KERNEL)); + mt_for_each(mt, entry, index, ULONG_MAX) { + if (val == top) + MT_BUG_ON(mt, entry != xa_mk_value(LONG_MAX)); + else + MT_BUG_ON(mt, xa_mk_value(val) != entry); + + /* Workaround for 32bit */ + if ((val << 2) < val) + val = ULONG_MAX; + else + val <<= 2; + + if (val == 64) /* Skip zero entry. */ + val <<= 2; + /* For zero check. */ + if (!val) + val = 1; + max++; + MT_BUG_ON(mt, max > 25); + } + mtree_erase_index(mt, ULONG_MAX); + + mas_reset(&mas); + index = 17; + entry = mt_find(mt, &index, 512); + MT_BUG_ON(mt, xa_mk_value(256) != entry); + + mas_reset(&mas); + index = 17; + entry = mt_find(mt, &index, 20); + MT_BUG_ON(mt, entry != NULL); + + + /* Range check.. */ + /* Insert ULONG_MAX */ + MT_BUG_ON(mt, mtree_insert_index(mt, ULONG_MAX, GFP_KERNEL)); + + val = 0; + mas_set(&mas, 0); + mas_lock(&mas); + mas_for_each(&mas, entry, ULONG_MAX) { + if (val == 64) + MT_BUG_ON(mt, entry != XA_ZERO_ENTRY); + else if (val == top) + MT_BUG_ON(mt, entry != xa_mk_value(LONG_MAX)); + else + MT_BUG_ON(mt, xa_mk_value(val) != entry); + + /* Workaround for 32bit */ + if ((val << 2) < val) + val = ULONG_MAX; + else + val <<= 2; + + /* For zero check. */ + if (!val) + val = 1; + mas_pause(&mas); + mas_unlock(&mas); + mas_lock(&mas); + } + mas_unlock(&mas); + + mas_set(&mas, 1048576); + mas_lock(&mas); + entry = mas_find(&mas, 1048576); + mas_unlock(&mas); + MT_BUG_ON(mas.tree, entry == NULL); + + /* + * Find last value. + * 1. get the expected value, leveraging the existence of an end entry + * 2. delete end entry + * 3. find the last value but searching for ULONG_MAX and then using + * prev + */ + /* First, get the expected result. */ + mas_lock(&mas); + mas_reset(&mas); + mas.index = ULONG_MAX; /* start at max.. */ + entry = mas_find(&mas, ULONG_MAX); + entry = mas_prev(&mas, 0); + index = mas.index; + last = mas.last; + + /* Erase the last entry. */ + mas_reset(&mas); + mas.index = ULONG_MAX; + mas.last = ULONG_MAX; + mas_erase(&mas); + + /* Get the previous value from MAS_START */ + mas_reset(&mas); + entry2 = mas_prev(&mas, 0); + + /* Check results. */ + MT_BUG_ON(mt, entry != entry2); + MT_BUG_ON(mt, index != mas.index); + MT_BUG_ON(mt, last != mas.last); + + + mas.node = MAS_NONE; + mas.index = ULONG_MAX; + mas.last = ULONG_MAX; + entry2 = mas_prev(&mas, 0); + MT_BUG_ON(mt, entry != entry2); + + mas_set(&mas, 0); + MT_BUG_ON(mt, mas_prev(&mas, 0) != NULL); + + mas_unlock(&mas); + mtree_destroy(mt); +} + +static noinline void check_find_2(struct maple_tree *mt) +{ + unsigned long i, j; + void *entry; + + MA_STATE(mas, mt, 0, 0); + rcu_read_lock(); + mas_for_each(&mas, entry, ULONG_MAX) + MT_BUG_ON(mt, true); + rcu_read_unlock(); + + for (i = 0; i < 256; i++) { + mtree_insert_index(mt, i, GFP_KERNEL); + j = 0; + mas_set(&mas, 0); + rcu_read_lock(); + mas_for_each(&mas, entry, ULONG_MAX) { + MT_BUG_ON(mt, entry != xa_mk_value(j)); + j++; + } + rcu_read_unlock(); + MT_BUG_ON(mt, j != i + 1); + } + + for (i = 0; i < 256; i++) { + mtree_erase_index(mt, i); + j = i + 1; + mas_set(&mas, 0); + rcu_read_lock(); + mas_for_each(&mas, entry, ULONG_MAX) { + if (xa_is_zero(entry)) + continue; + + MT_BUG_ON(mt, entry != xa_mk_value(j)); + j++; + } + rcu_read_unlock(); + MT_BUG_ON(mt, j != 256); + } + + /*MT_BUG_ON(mt, !mtree_empty(mt)); */ +} + + +#if defined(CONFIG_64BIT) +static noinline void check_alloc_rev_range(struct maple_tree *mt) +{ + /* + * Generated by: + * cat /proc/self/maps | awk '{print $1}'| + * awk -F "-" '{printf "0x%s, 0x%s, ", $1, $2}' + */ + + unsigned long range[] = { + /* Inclusive , Exclusive. */ + 0x565234af2000, 0x565234af4000, + 0x565234af4000, 0x565234af9000, + 0x565234af9000, 0x565234afb000, + 0x565234afc000, 0x565234afd000, + 0x565234afd000, 0x565234afe000, + 0x565235def000, 0x565235e10000, + 0x7f36d4bfd000, 0x7f36d4ee2000, + 0x7f36d4ee2000, 0x7f36d4f04000, + 0x7f36d4f04000, 0x7f36d504c000, + 0x7f36d504c000, 0x7f36d5098000, + 0x7f36d5098000, 0x7f36d5099000, + 0x7f36d5099000, 0x7f36d509d000, + 0x7f36d509d000, 0x7f36d509f000, + 0x7f36d509f000, 0x7f36d50a5000, + 0x7f36d50b9000, 0x7f36d50db000, + 0x7f36d50db000, 0x7f36d50dc000, + 0x7f36d50dc000, 0x7f36d50fa000, + 0x7f36d50fa000, 0x7f36d5102000, + 0x7f36d5102000, 0x7f36d5103000, + 0x7f36d5103000, 0x7f36d5104000, + 0x7f36d5104000, 0x7f36d5105000, + 0x7fff5876b000, 0x7fff5878d000, + 0x7fff5878e000, 0x7fff58791000, + 0x7fff58791000, 0x7fff58793000, + }; + + unsigned long holes[] = { + /* + * Note: start of hole is INCLUSIVE + * end of hole is EXCLUSIVE + * (opposite of the above table.) + * Start of hole, end of hole, size of hole (+1) + */ + 0x565234afb000, 0x565234afc000, 0x1000, + 0x565234afe000, 0x565235def000, 0x12F1000, + 0x565235e10000, 0x7f36d4bfd000, 0x28E49EDED000, + }; + + /* + * req_range consists of 4 values. + * 1. min index + * 2. max index + * 3. size + * 4. number that should be returned. + * 5. return value + */ + unsigned long req_range[] = { + 0x565234af9000, /* Min */ + 0x7fff58791000, /* Max */ + 0x1000, /* Size */ + 0x7fff5878d << 12, /* First rev hole of size 0x1000 */ + 0, /* Return value success. */ + + 0x0, /* Min */ + 0x565234AF1 << 12, /* Max */ + 0x3000, /* Size */ + 0x565234AEE << 12, /* max - 3. */ + 0, /* Return value success. */ + + 0x0, /* Min */ + -1, /* Max */ + 0x1000, /* Size */ + 562949953421311 << 12,/* First rev hole of size 0x1000 */ + 0, /* Return value success. */ + + 0x0, /* Min */ + 0x7F36D510A << 12, /* Max */ + 0x4000, /* Size */ + 0x7F36D5106 << 12, /* First rev hole of size 0x4000 */ + 0, /* Return value success. */ + + /* Ascend test. */ + 0x0, + 34148798629 << 12, + 19 << 12, + 34148797418 << 12, + 0x0, + + /* Too big test. */ + 0x0, + 18446744073709551615UL, + 562915594369134UL << 12, + 0x0, + -EBUSY, + + }; + + int i, range_count = ARRAY_SIZE(range); + int req_range_count = ARRAY_SIZE(req_range); + unsigned long min = 0; + + MA_STATE(mas, mt, 0, 0); + + mtree_store_range(mt, MTREE_ALLOC_MAX, ULONG_MAX, XA_ZERO_ENTRY, + GFP_KERNEL); +#define DEBUG_REV_RANGE 0 + for (i = 0; i < range_count; i += 2) { + /* Inclusive, Inclusive (with the -1) */ + +#if DEBUG_REV_RANGE + pr_debug("\t%s: Insert %lu-%lu\n", __func__, range[i] >> 12, + (range[i + 1] >> 12) - 1); +#endif + check_insert_range(mt, range[i] >> 12, (range[i + 1] >> 12) - 1, + xa_mk_value(range[i] >> 12), 0); + mt_validate(mt); + } + + + mas_lock(&mas); + for (i = 0; i < ARRAY_SIZE(holes); i += 3) { +#if DEBUG_REV_RANGE + pr_debug("Search from %lu-%lu for gap %lu should be at %lu\n", + min, holes[i+1]>>12, holes[i+2]>>12, + holes[i] >> 12); +#endif + MT_BUG_ON(mt, mas_empty_area_rev(&mas, min, + holes[i+1] >> 12, + holes[i+2] >> 12)); +#if DEBUG_REV_RANGE + pr_debug("Found %lu %lu\n", mas.index, mas.last); + pr_debug("gap %lu %lu\n", (holes[i] >> 12), + (holes[i+1] >> 12)); +#endif + MT_BUG_ON(mt, mas.last + 1 != (holes[i+1] >> 12)); + MT_BUG_ON(mt, mas.index != (holes[i+1] >> 12) - (holes[i+2] >> 12)); + min = holes[i+1] >> 12; + mas_reset(&mas); + } + + mas_unlock(&mas); + for (i = 0; i < req_range_count; i += 5) { +#if DEBUG_REV_RANGE + pr_debug("\tReverse request between %lu-%lu size %lu, should get %lu\n", + req_range[i] >> 12, + (req_range[i + 1] >> 12) - 1, + req_range[i+2] >> 12, + req_range[i+3] >> 12); +#endif + check_mtree_alloc_rrange(mt, + req_range[i] >> 12, /* start */ + req_range[i+1] >> 12, /* end */ + req_range[i+2] >> 12, /* size */ + req_range[i+3] >> 12, /* expected address */ + req_range[i+4], /* expected return */ + xa_mk_value(req_range[i] >> 12)); /* pointer */ + mt_validate(mt); + } + + mt_set_non_kernel(1); + mtree_erase(mt, 34148798727); /* create a deleted range. */ + check_mtree_alloc_rrange(mt, 0, 34359052173, 210253414, + 34148798725, 0, mt); + + mtree_destroy(mt); +} + +static noinline void check_alloc_range(struct maple_tree *mt) +{ + /* + * Generated by: + * cat /proc/self/maps|awk '{print $1}'| + * awk -F "-" '{printf "0x%s, 0x%s, ", $1, $2}' + */ + + unsigned long range[] = { + /* Inclusive , Exclusive. */ + 0x565234af2000, 0x565234af4000, + 0x565234af4000, 0x565234af9000, + 0x565234af9000, 0x565234afb000, + 0x565234afc000, 0x565234afd000, + 0x565234afd000, 0x565234afe000, + 0x565235def000, 0x565235e10000, + 0x7f36d4bfd000, 0x7f36d4ee2000, + 0x7f36d4ee2000, 0x7f36d4f04000, + 0x7f36d4f04000, 0x7f36d504c000, + 0x7f36d504c000, 0x7f36d5098000, + 0x7f36d5098000, 0x7f36d5099000, + 0x7f36d5099000, 0x7f36d509d000, + 0x7f36d509d000, 0x7f36d509f000, + 0x7f36d509f000, 0x7f36d50a5000, + 0x7f36d50b9000, 0x7f36d50db000, + 0x7f36d50db000, 0x7f36d50dc000, + 0x7f36d50dc000, 0x7f36d50fa000, + 0x7f36d50fa000, 0x7f36d5102000, + 0x7f36d5102000, 0x7f36d5103000, + 0x7f36d5103000, 0x7f36d5104000, + 0x7f36d5104000, 0x7f36d5105000, + 0x7fff5876b000, 0x7fff5878d000, + 0x7fff5878e000, 0x7fff58791000, + 0x7fff58791000, 0x7fff58793000, + }; + unsigned long holes[] = { + /* Start of hole, end of hole, size of hole (+1) */ + 0x565234afb000, 0x565234afc000, 0x1000, + 0x565234afe000, 0x565235def000, 0x12F1000, + 0x565235e10000, 0x7f36d4bfd000, 0x28E49EDED000, + }; + + /* + * req_range consists of 4 values. + * 1. min index + * 2. max index + * 3. size + * 4. number that should be returned. + * 5. return value + */ + unsigned long req_range[] = { + 0x565234af9000, /* Min */ + 0x7fff58791000, /* Max */ + 0x1000, /* Size */ + 0x565234afb000, /* First hole in our data of size 1000. */ + 0, /* Return value success. */ + + 0x0, /* Min */ + 0x7fff58791000, /* Max */ + 0x1F00, /* Size */ + 0x0, /* First hole in our data of size 2000. */ + 0, /* Return value success. */ + + /* Test ascend. */ + 34148797436 << 12, /* Min */ + 0x7fff587AF000, /* Max */ + 0x3000, /* Size */ + 34148798629 << 12, /* Expected location */ + 0, /* Return value success. */ + + /* Test failing. */ + 34148798623 << 12, /* Min */ + 34148798683 << 12, /* Max */ + 0x15000, /* Size */ + 0, /* Expected location */ + -EBUSY, /* Return value failed. */ + + /* Test filling entire gap. */ + 34148798623 << 12, /* Min */ + 0x7fff587AF000, /* Max */ + 0x10000, /* Size */ + 34148798632 << 12, /* Expected location */ + 0, /* Return value success. */ + + /* Test walking off the end of root. */ + 0, /* Min */ + -1, /* Max */ + -1, /* Size */ + 0, /* Expected location */ + -EBUSY, /* Return value failure. */ + + /* Test looking for too large a hole across entire range. */ + 0, /* Min */ + -1, /* Max */ + 4503599618982063UL << 12, /* Size */ + 34359052178 << 12, /* Expected location */ + -EBUSY, /* Return failure. */ + }; + int i, range_count = ARRAY_SIZE(range); + int req_range_count = ARRAY_SIZE(req_range); + unsigned long min = 0x565234af2000; + MA_STATE(mas, mt, 0, 0); + + mtree_store_range(mt, MTREE_ALLOC_MAX, ULONG_MAX, XA_ZERO_ENTRY, + GFP_KERNEL); + for (i = 0; i < range_count; i += 2) { +#define DEBUG_ALLOC_RANGE 0 +#if DEBUG_ALLOC_RANGE + pr_debug("\tInsert %lu-%lu\n", range[i] >> 12, + (range[i + 1] >> 12) - 1); + mt_dump(mt); +#endif + check_insert_range(mt, range[i] >> 12, (range[i + 1] >> 12) - 1, + xa_mk_value(range[i] >> 12), 0); + mt_validate(mt); + } + + + + mas_lock(&mas); + for (i = 0; i < ARRAY_SIZE(holes); i += 3) { + +#if DEBUG_ALLOC_RANGE + pr_debug("\tGet empty %lu-%lu size %lu (%lx-%lx)\n", min >> 12, + holes[i+1] >> 12, holes[i+2] >> 12, + min, holes[i+1]); +#endif + MT_BUG_ON(mt, mas_empty_area(&mas, min >> 12, + holes[i+1] >> 12, + holes[i+2] >> 12)); + MT_BUG_ON(mt, mas.index != holes[i] >> 12); + min = holes[i+1]; + mas_reset(&mas); + } + mas_unlock(&mas); + for (i = 0; i < req_range_count; i += 5) { +#if DEBUG_ALLOC_RANGE + pr_debug("\tTest %d: %lu-%lu size %lu expected %lu (%lu-%lu)\n", + i/5, req_range[i] >> 12, req_range[i + 1] >> 12, + req_range[i + 2] >> 12, req_range[i + 3] >> 12, + req_range[i], req_range[i+1]); +#endif + check_mtree_alloc_range(mt, + req_range[i] >> 12, /* start */ + req_range[i+1] >> 12, /* end */ + req_range[i+2] >> 12, /* size */ + req_range[i+3] >> 12, /* expected address */ + req_range[i+4], /* expected return */ + xa_mk_value(req_range[i] >> 12)); /* pointer */ + mt_validate(mt); +#if DEBUG_ALLOC_RANGE + mt_dump(mt); +#endif + } + + mtree_destroy(mt); +} +#endif + +static noinline void check_ranges(struct maple_tree *mt) +{ + int i, val, val2; + unsigned long r[] = { + 10, 15, + 20, 25, + 17, 22, /* Overlaps previous range. */ + 9, 1000, /* Huge. */ + 100, 200, + 45, 168, + 118, 128, + }; + + MT_BUG_ON(mt, !mtree_empty(mt)); + check_insert_range(mt, r[0], r[1], xa_mk_value(r[0]), 0); + check_insert_range(mt, r[2], r[3], xa_mk_value(r[2]), 0); + check_insert_range(mt, r[4], r[5], xa_mk_value(r[4]), -EEXIST); + MT_BUG_ON(mt, !mt_height(mt)); + /* Store */ + check_store_range(mt, r[4], r[5], xa_mk_value(r[4]), 0); + check_store_range(mt, r[6], r[7], xa_mk_value(r[6]), 0); + check_store_range(mt, r[8], r[9], xa_mk_value(r[8]), 0); + MT_BUG_ON(mt, !mt_height(mt)); + mtree_destroy(mt); + MT_BUG_ON(mt, mt_height(mt)); + + check_seq(mt, 50, false); + mt_set_non_kernel(4); + check_store_range(mt, 5, 47, xa_mk_value(47), 0); + MT_BUG_ON(mt, !mt_height(mt)); + mtree_destroy(mt); + + /* Create tree of 1-100 */ + check_seq(mt, 100, false); + /* Store 45-168 */ + mt_set_non_kernel(10); + check_store_range(mt, r[10], r[11], xa_mk_value(r[10]), 0); + MT_BUG_ON(mt, !mt_height(mt)); + mtree_destroy(mt); + + /* Create tree of 1-200 */ + check_seq(mt, 200, false); + /* Store 45-168 */ + check_store_range(mt, r[10], r[11], xa_mk_value(r[10]), 0); + MT_BUG_ON(mt, !mt_height(mt)); + mtree_destroy(mt); + + check_seq(mt, 30, false); + check_store_range(mt, 6, 18, xa_mk_value(6), 0); + MT_BUG_ON(mt, !mt_height(mt)); + mtree_destroy(mt); + + /* Overwrite across multiple levels. */ + /* Create tree of 1-400 */ + check_seq(mt, 400, false); + mt_set_non_kernel(50); + /* Store 118-128 */ + check_store_range(mt, r[12], r[13], xa_mk_value(r[12]), 0); + mt_set_non_kernel(50); + mtree_test_erase(mt, 140); + mtree_test_erase(mt, 141); + mtree_test_erase(mt, 142); + mtree_test_erase(mt, 143); + mtree_test_erase(mt, 130); + mtree_test_erase(mt, 131); + mtree_test_erase(mt, 132); + mtree_test_erase(mt, 133); + mtree_test_erase(mt, 134); + mtree_test_erase(mt, 135); + check_load(mt, r[12], xa_mk_value(r[12])); + check_load(mt, r[13], xa_mk_value(r[12])); + check_load(mt, r[13] - 1, xa_mk_value(r[12])); + check_load(mt, r[13] + 1, xa_mk_value(r[13] + 1)); + check_load(mt, 135, NULL); + check_load(mt, 140, NULL); + mt_set_non_kernel(0); + MT_BUG_ON(mt, !mt_height(mt)); + mtree_destroy(mt); + + + + /* Overwrite multiple levels at the end of the tree (slot 7) */ + mt_set_non_kernel(50); + check_seq(mt, 400, false); + check_store_range(mt, 353, 361, xa_mk_value(353), 0); + check_store_range(mt, 347, 352, xa_mk_value(347), 0); + + check_load(mt, 346, xa_mk_value(346)); + for (i = 347; i <= 352; i++) + check_load(mt, i, xa_mk_value(347)); + for (i = 353; i <= 361; i++) + check_load(mt, i, xa_mk_value(353)); + check_load(mt, 362, xa_mk_value(362)); + mt_set_non_kernel(0); + MT_BUG_ON(mt, !mt_height(mt)); + mtree_destroy(mt); + + mt_set_non_kernel(50); + check_seq(mt, 400, false); + check_store_range(mt, 352, 364, NULL, 0); + check_store_range(mt, 351, 363, xa_mk_value(352), 0); + check_load(mt, 350, xa_mk_value(350)); + check_load(mt, 351, xa_mk_value(352)); + for (i = 352; i <= 363; i++) + check_load(mt, i, xa_mk_value(352)); + check_load(mt, 364, NULL); + check_load(mt, 365, xa_mk_value(365)); + mt_set_non_kernel(0); + MT_BUG_ON(mt, !mt_height(mt)); + mtree_destroy(mt); + + mt_set_non_kernel(5); + check_seq(mt, 400, false); + check_store_range(mt, 352, 364, NULL, 0); + check_store_range(mt, 351, 364, xa_mk_value(352), 0); + check_load(mt, 350, xa_mk_value(350)); + check_load(mt, 351, xa_mk_value(352)); + for (i = 352; i <= 364; i++) + check_load(mt, i, xa_mk_value(352)); + check_load(mt, 365, xa_mk_value(365)); + mt_set_non_kernel(0); + MT_BUG_ON(mt, !mt_height(mt)); + mtree_destroy(mt); + + + mt_set_non_kernel(50); + check_seq(mt, 400, false); + check_store_range(mt, 362, 367, xa_mk_value(362), 0); + check_store_range(mt, 353, 361, xa_mk_value(353), 0); + mt_set_non_kernel(0); + mt_validate(mt); + MT_BUG_ON(mt, !mt_height(mt)); + mtree_destroy(mt); + /* + * Interesting cases: + * 1. Overwrite the end of a node and end in the first entry of the next + * node. + * 2. Split a single range + * 3. Overwrite the start of a range + * 4. Overwrite the end of a range + * 5. Overwrite the entire range + * 6. Overwrite a range that causes multiple parent nodes to be + * combined + * 7. Overwrite a range that causes multiple parent nodes and part of + * root to be combined + * 8. Overwrite the whole tree + * 9. Try to overwrite the zero entry of an alloc tree. + * 10. Write a range larger than a nodes current pivot + */ + + mt_set_non_kernel(50); + for (i = 0; i <= 500; i++) { + val = i*5; + val2 = (i+1)*5; + check_store_range(mt, val, val2, xa_mk_value(val), 0); + } + check_store_range(mt, 2400, 2400, xa_mk_value(2400), 0); + check_store_range(mt, 2411, 2411, xa_mk_value(2411), 0); + check_store_range(mt, 2412, 2412, xa_mk_value(2412), 0); + check_store_range(mt, 2396, 2400, xa_mk_value(4052020), 0); + check_store_range(mt, 2402, 2402, xa_mk_value(2402), 0); + mtree_destroy(mt); + mt_set_non_kernel(0); + + mt_set_non_kernel(50); + for (i = 0; i <= 500; i++) { + val = i*5; + val2 = (i+1)*5; + check_store_range(mt, val, val2, xa_mk_value(val), 0); + } + check_store_range(mt, 2422, 2422, xa_mk_value(2422), 0); + check_store_range(mt, 2424, 2424, xa_mk_value(2424), 0); + check_store_range(mt, 2425, 2425, xa_mk_value(2), 0); + check_store_range(mt, 2460, 2470, NULL, 0); + check_store_range(mt, 2435, 2460, xa_mk_value(2435), 0); + check_store_range(mt, 2461, 2470, xa_mk_value(2461), 0); + mt_set_non_kernel(0); + MT_BUG_ON(mt, !mt_height(mt)); + mtree_destroy(mt); + + /* Test rebalance gaps */ + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mt_set_non_kernel(50); + for (i = 0; i <= 50; i++) { + val = i*10; + val2 = (i+1)*10; + check_store_range(mt, val, val2, xa_mk_value(val), 0); + } + check_store_range(mt, 161, 161, xa_mk_value(161), 0); + check_store_range(mt, 162, 162, xa_mk_value(162), 0); + check_store_range(mt, 163, 163, xa_mk_value(163), 0); + check_store_range(mt, 240, 249, NULL, 0); + mtree_erase(mt, 200); + mtree_erase(mt, 210); + mtree_erase(mt, 220); + mtree_erase(mt, 230); + mt_set_non_kernel(0); + MT_BUG_ON(mt, !mt_height(mt)); + mtree_destroy(mt); + + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + for (i = 0; i <= 500; i++) { + val = i*10; + val2 = (i+1)*10; + check_store_range(mt, val, val2, xa_mk_value(val), 0); + } + check_store_range(mt, 4600, 4959, xa_mk_value(1), 0); + mt_validate(mt); + MT_BUG_ON(mt, !mt_height(mt)); + mtree_destroy(mt); + + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + for (i = 0; i <= 500; i++) { + val = i*10; + val2 = (i+1)*10; + check_store_range(mt, val, val2, xa_mk_value(val), 0); + } + check_store_range(mt, 4811, 4811, xa_mk_value(4811), 0); + check_store_range(mt, 4812, 4812, xa_mk_value(4812), 0); + check_store_range(mt, 4861, 4861, xa_mk_value(4861), 0); + check_store_range(mt, 4862, 4862, xa_mk_value(4862), 0); + check_store_range(mt, 4842, 4849, NULL, 0); + mt_validate(mt); + MT_BUG_ON(mt, !mt_height(mt)); + mtree_destroy(mt); + + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + for (i = 0; i <= 1300; i++) { + val = i*10; + val2 = (i+1)*10; + check_store_range(mt, val, val2, xa_mk_value(val), 0); + MT_BUG_ON(mt, mt_height(mt) >= 4); + } + /* Cause a 3 child split all the way up the tree. */ + for (i = 5; i < 215; i += 10) + check_store_range(mt, 11450 + i, 11450 + i + 1, NULL, 0); + for (i = 5; i < 65; i += 10) + check_store_range(mt, 11770 + i, 11770 + i + 1, NULL, 0); + + MT_BUG_ON(mt, mt_height(mt) >= 4); + for (i = 5; i < 45; i += 10) + check_store_range(mt, 11700 + i, 11700 + i + 1, NULL, 0); + if (!MAPLE_32BIT) + MT_BUG_ON(mt, mt_height(mt) < 4); + mtree_destroy(mt); + + + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + for (i = 0; i <= 1200; i++) { + val = i*10; + val2 = (i+1)*10; + check_store_range(mt, val, val2, xa_mk_value(val), 0); + MT_BUG_ON(mt, mt_height(mt) >= 4); + } + /* Fill parents and leaves before split. */ + for (i = 5; i < 455; i += 10) + check_store_range(mt, 7800 + i, 7800 + i + 1, NULL, 0); + + for (i = 1; i < 16; i++) + check_store_range(mt, 8185 + i, 8185 + i + 1, + xa_mk_value(8185+i), 0); + MT_BUG_ON(mt, mt_height(mt) >= 4); + /* triple split across multiple levels. */ + check_store_range(mt, 8184, 8184, xa_mk_value(8184), 0); + if (!MAPLE_32BIT) + MT_BUG_ON(mt, mt_height(mt) != 4); +} + +static noinline void check_next_entry(struct maple_tree *mt) +{ + void *entry = NULL; + unsigned long limit = 30, i = 0; + MA_STATE(mas, mt, i, i); + + MT_BUG_ON(mt, !mtree_empty(mt)); + + check_seq(mt, limit, false); + rcu_read_lock(); + + /* Check the first one and get ma_state in the correct state. */ + MT_BUG_ON(mt, mas_walk(&mas) != xa_mk_value(i++)); + for ( ; i <= limit + 1; i++) { + entry = mas_next(&mas, limit); + if (i > limit) + MT_BUG_ON(mt, entry != NULL); + else + MT_BUG_ON(mt, xa_mk_value(i) != entry); + } + rcu_read_unlock(); + mtree_destroy(mt); +} + +static noinline void check_prev_entry(struct maple_tree *mt) +{ + unsigned long index = 16; + void *value; + int i; + + MA_STATE(mas, mt, index, index); + + MT_BUG_ON(mt, !mtree_empty(mt)); + check_seq(mt, 30, false); + + rcu_read_lock(); + value = mas_find(&mas, ULONG_MAX); + MT_BUG_ON(mt, value != xa_mk_value(index)); + value = mas_prev(&mas, 0); + MT_BUG_ON(mt, value != xa_mk_value(index - 1)); + rcu_read_unlock(); + mtree_destroy(mt); + + /* Check limits on prev */ + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mas_lock(&mas); + for (i = 0; i <= index; i++) { + mas_set_range(&mas, i*10, i*10+5); + mas_store_gfp(&mas, xa_mk_value(i), GFP_KERNEL); + } + + mas_set(&mas, 20); + value = mas_walk(&mas); + MT_BUG_ON(mt, value != xa_mk_value(2)); + + value = mas_prev(&mas, 19); + MT_BUG_ON(mt, value != NULL); + + mas_set(&mas, 80); + value = mas_walk(&mas); + MT_BUG_ON(mt, value != xa_mk_value(8)); + + value = mas_prev(&mas, 76); + MT_BUG_ON(mt, value != NULL); + + mas_unlock(&mas); +} + +static noinline void check_root_expand(struct maple_tree *mt) +{ + MA_STATE(mas, mt, 0, 0); + void *ptr; + + + mas_lock(&mas); + mas_set(&mas, 3); + ptr = mas_walk(&mas); + MT_BUG_ON(mt, ptr != NULL); + MT_BUG_ON(mt, mas.index != 0); + MT_BUG_ON(mt, mas.last != ULONG_MAX); + + ptr = &check_prev_entry; + mas_set(&mas, 1); + mas_store_gfp(&mas, ptr, GFP_KERNEL); + + mas_set(&mas, 0); + ptr = mas_walk(&mas); + MT_BUG_ON(mt, ptr != NULL); + + mas_set(&mas, 1); + ptr = mas_walk(&mas); + MT_BUG_ON(mt, ptr != &check_prev_entry); + + mas_set(&mas, 2); + ptr = mas_walk(&mas); + MT_BUG_ON(mt, ptr != NULL); + mas_unlock(&mas); + mtree_destroy(mt); + + + mt_init_flags(mt, 0); + mas_lock(&mas); + + mas_set(&mas, 0); + ptr = &check_prev_entry; + mas_store_gfp(&mas, ptr, GFP_KERNEL); + + mas_set(&mas, 5); + ptr = mas_walk(&mas); + MT_BUG_ON(mt, ptr != NULL); + MT_BUG_ON(mt, mas.index != 1); + MT_BUG_ON(mt, mas.last != ULONG_MAX); + + mas_set_range(&mas, 0, 100); + ptr = mas_walk(&mas); + MT_BUG_ON(mt, ptr != &check_prev_entry); + MT_BUG_ON(mt, mas.last != 0); + mas_unlock(&mas); + mtree_destroy(mt); + + mt_init_flags(mt, 0); + mas_lock(&mas); + + mas_set(&mas, 0); + ptr = (void *)((unsigned long) check_prev_entry | 1UL); + mas_store_gfp(&mas, ptr, GFP_KERNEL); + ptr = mas_next(&mas, ULONG_MAX); + MT_BUG_ON(mt, ptr != NULL); + MT_BUG_ON(mt, (mas.index != 1) && (mas.last != ULONG_MAX)); + + mas_set(&mas, 1); + ptr = mas_prev(&mas, 0); + MT_BUG_ON(mt, (mas.index != 0) && (mas.last != 0)); + MT_BUG_ON(mt, ptr != (void *)((unsigned long) check_prev_entry | 1UL)); + + mas_unlock(&mas); + + mtree_destroy(mt); + + mt_init_flags(mt, 0); + mas_lock(&mas); + mas_set(&mas, 0); + ptr = (void *)((unsigned long) check_prev_entry | 2UL); + mas_store_gfp(&mas, ptr, GFP_KERNEL); + ptr = mas_next(&mas, ULONG_MAX); + MT_BUG_ON(mt, ptr != NULL); + MT_BUG_ON(mt, (mas.index != 1) && (mas.last != ULONG_MAX)); + + mas_set(&mas, 1); + ptr = mas_prev(&mas, 0); + MT_BUG_ON(mt, (mas.index != 0) && (mas.last != 0)); + MT_BUG_ON(mt, ptr != (void *)((unsigned long) check_prev_entry | 2UL)); + + + mas_unlock(&mas); +} + +static noinline void check_gap_combining(struct maple_tree *mt) +{ + struct maple_enode *mn1, *mn2; + void *entry; + unsigned long singletons = 100; + unsigned long *seq100; + unsigned long seq100_64[] = { + /* 0-5 */ + 74, 75, 76, + 50, 100, 2, + + /* 6-12 */ + 44, 45, 46, 43, + 20, 50, 3, + + /* 13-20*/ + 80, 81, 82, + 76, 2, 79, 85, 4, + }; + + unsigned long seq100_32[] = { + /* 0-5 */ + 61, 62, 63, + 50, 100, 2, + + /* 6-12 */ + 31, 32, 33, 30, + 20, 50, 3, + + /* 13-20*/ + 80, 81, 82, + 76, 2, 79, 85, 4, + }; + + unsigned long seq2000[] = { + 1152, 1151, + 1100, 1200, 2, + }; + unsigned long seq400[] = { + 286, 318, + 256, 260, 266, 270, 275, 280, 290, 398, + 286, 310, + }; + + unsigned long index; + + MA_STATE(mas, mt, 0, 0); + + if (MAPLE_32BIT) + seq100 = seq100_32; + else + seq100 = seq100_64; + + index = seq100[0]; + mas_set(&mas, index); + MT_BUG_ON(mt, !mtree_empty(mt)); + check_seq(mt, singletons, false); /* create 100 singletons. */ + + mt_set_non_kernel(1); + mtree_test_erase(mt, seq100[2]); + check_load(mt, seq100[2], NULL); + mtree_test_erase(mt, seq100[1]); + check_load(mt, seq100[1], NULL); + + rcu_read_lock(); + entry = mas_find(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != xa_mk_value(index)); + mn1 = mas.node; + mas_next(&mas, ULONG_MAX); + entry = mas_next(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != xa_mk_value(index + 4)); + mn2 = mas.node; + MT_BUG_ON(mt, mn1 == mn2); /* test the test. */ + + /* + * At this point, there is a gap of 2 at index + 1 between seq100[3] and + * seq100[4]. Search for the gap. + */ + mt_set_non_kernel(1); + mas_reset(&mas); + MT_BUG_ON(mt, mas_empty_area_rev(&mas, seq100[3], seq100[4], + seq100[5])); + MT_BUG_ON(mt, mas.index != index + 1); + rcu_read_unlock(); + + mtree_test_erase(mt, seq100[6]); + check_load(mt, seq100[6], NULL); + mtree_test_erase(mt, seq100[7]); + check_load(mt, seq100[7], NULL); + mtree_test_erase(mt, seq100[8]); + index = seq100[9]; + + rcu_read_lock(); + mas.index = index; + mas.last = index; + mas_reset(&mas); + entry = mas_find(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != xa_mk_value(index)); + mn1 = mas.node; + entry = mas_next(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != xa_mk_value(index + 4)); + mas_next(&mas, ULONG_MAX); /* go to the next entry. */ + mn2 = mas.node; + MT_BUG_ON(mt, mn1 == mn2); /* test the next entry is in the next node. */ + + /* + * At this point, there is a gap of 3 at seq100[6]. Find it by + * searching 20 - 50 for size 3. + */ + mas_reset(&mas); + MT_BUG_ON(mt, mas_empty_area_rev(&mas, seq100[10], seq100[11], + seq100[12])); + MT_BUG_ON(mt, mas.index != seq100[6]); + rcu_read_unlock(); + + mt_set_non_kernel(1); + mtree_store(mt, seq100[13], NULL, GFP_KERNEL); + check_load(mt, seq100[13], NULL); + check_load(mt, seq100[14], xa_mk_value(seq100[14])); + mtree_store(mt, seq100[14], NULL, GFP_KERNEL); + check_load(mt, seq100[13], NULL); + check_load(mt, seq100[14], NULL); + + mas_reset(&mas); + rcu_read_lock(); + MT_BUG_ON(mt, mas_empty_area_rev(&mas, seq100[16], seq100[15], + seq100[17])); + MT_BUG_ON(mt, mas.index != seq100[13]); + mt_validate(mt); + rcu_read_unlock(); + + /* + * *DEPRECATED: no retries anymore* Test retry entry in the start of a + * gap. + */ + mt_set_non_kernel(2); + mtree_test_store_range(mt, seq100[18], seq100[14], NULL); + mtree_test_erase(mt, seq100[15]); + mas_reset(&mas); + rcu_read_lock(); + MT_BUG_ON(mt, mas_empty_area_rev(&mas, seq100[16], seq100[19], + seq100[20])); + rcu_read_unlock(); + MT_BUG_ON(mt, mas.index != seq100[18]); + mt_validate(mt); + mtree_destroy(mt); + + /* seq 2000 tests are for multi-level tree gaps */ + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + check_seq(mt, 2000, false); + mt_set_non_kernel(1); + mtree_test_erase(mt, seq2000[0]); + mtree_test_erase(mt, seq2000[1]); + + mt_set_non_kernel(2); + mas_reset(&mas); + rcu_read_lock(); + MT_BUG_ON(mt, mas_empty_area_rev(&mas, seq2000[2], seq2000[3], + seq2000[4])); + MT_BUG_ON(mt, mas.index != seq2000[1]); + rcu_read_unlock(); + mt_validate(mt); + mtree_destroy(mt); + + /* seq 400 tests rebalancing over two levels. */ + mt_set_non_kernel(99); + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + check_seq(mt, 400, false); + mtree_test_store_range(mt, seq400[0], seq400[1], NULL); + mt_set_non_kernel(0); + mtree_destroy(mt); + + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + check_seq(mt, 400, false); + mt_set_non_kernel(50); + mtree_test_store_range(mt, seq400[2], seq400[9], + xa_mk_value(seq400[2])); + mtree_test_store_range(mt, seq400[3], seq400[9], + xa_mk_value(seq400[3])); + mtree_test_store_range(mt, seq400[4], seq400[9], + xa_mk_value(seq400[4])); + mtree_test_store_range(mt, seq400[5], seq400[9], + xa_mk_value(seq400[5])); + mtree_test_store_range(mt, seq400[0], seq400[9], + xa_mk_value(seq400[0])); + mtree_test_store_range(mt, seq400[6], seq400[9], + xa_mk_value(seq400[6])); + mtree_test_store_range(mt, seq400[7], seq400[9], + xa_mk_value(seq400[7])); + mtree_test_store_range(mt, seq400[8], seq400[9], + xa_mk_value(seq400[8])); + mtree_test_store_range(mt, seq400[10], seq400[11], + xa_mk_value(seq400[10])); + mt_validate(mt); + mt_set_non_kernel(0); + mtree_destroy(mt); +} +static noinline void check_node_overwrite(struct maple_tree *mt) +{ + int i, max = 4000; + + for (i = 0; i < max; i++) + mtree_test_store_range(mt, i*100, i*100 + 50, xa_mk_value(i*100)); + + mtree_test_store_range(mt, 319951, 367950, NULL); + /*mt_dump(mt); */ + mt_validate(mt); +} + +#if defined(BENCH_SLOT_STORE) +static noinline void bench_slot_store(struct maple_tree *mt) +{ + int i, brk = 105, max = 1040, brk_start = 100, count = 20000000; + + for (i = 0; i < max; i += 10) + mtree_store_range(mt, i, i + 5, xa_mk_value(i), GFP_KERNEL); + + for (i = 0; i < count; i++) { + mtree_store_range(mt, brk, brk, NULL, GFP_KERNEL); + mtree_store_range(mt, brk_start, brk, xa_mk_value(brk), + GFP_KERNEL); + } +} +#endif + +#if defined(BENCH_NODE_STORE) +static noinline void bench_node_store(struct maple_tree *mt) +{ + int i, overwrite = 76, max = 240, count = 20000000; + + for (i = 0; i < max; i += 10) + mtree_store_range(mt, i, i + 5, xa_mk_value(i), GFP_KERNEL); + + for (i = 0; i < count; i++) { + mtree_store_range(mt, overwrite, overwrite + 15, + xa_mk_value(overwrite), GFP_KERNEL); + + overwrite += 5; + if (overwrite >= 135) + overwrite = 76; + } +} +#endif + +#if defined(BENCH_AWALK) +static noinline void bench_awalk(struct maple_tree *mt) +{ + int i, max = 2500, count = 50000000; + MA_STATE(mas, mt, 1470, 1470); + + for (i = 0; i < max; i += 10) + mtree_store_range(mt, i, i + 5, xa_mk_value(i), GFP_KERNEL); + + mtree_store_range(mt, 1470, 1475, NULL, GFP_KERNEL); + + for (i = 0; i < count; i++) { + mas_empty_area_rev(&mas, 0, 2000, 10); + mas_reset(&mas); + } +} +#endif +#if defined(BENCH_WALK) +static noinline void bench_walk(struct maple_tree *mt) +{ + int i, max = 2500, count = 550000000; + MA_STATE(mas, mt, 1470, 1470); + + for (i = 0; i < max; i += 10) + mtree_store_range(mt, i, i + 5, xa_mk_value(i), GFP_KERNEL); + + for (i = 0; i < count; i++) { + mas_walk(&mas); + mas_reset(&mas); + } + +} +#endif + +#if defined(BENCH_MT_FOR_EACH) +static noinline void bench_mt_for_each(struct maple_tree *mt) +{ + int i, count = 1000000; + unsigned long max = 2500, index = 0; + void *entry; + + for (i = 0; i < max; i += 5) + mtree_store_range(mt, i, i + 4, xa_mk_value(i), GFP_KERNEL); + + for (i = 0; i < count; i++) { + unsigned long j = 0; + + mt_for_each(mt, entry, index, max) { + MT_BUG_ON(mt, entry != xa_mk_value(j)); + j += 5; + } + + index = 0; + } + +} +#endif + +/* check_forking - simulate the kernel forking sequence with the tree. */ +static noinline void check_forking(struct maple_tree *mt) +{ + + struct maple_tree newmt; + int i, nr_entries = 134; + void *val; + MA_STATE(mas, mt, 0, 0); + MA_STATE(newmas, mt, 0, 0); + + for (i = 0; i <= nr_entries; i++) + mtree_store_range(mt, i*10, i*10 + 5, + xa_mk_value(i), GFP_KERNEL); + + mt_set_non_kernel(99999); + mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE); + newmas.tree = &newmt; + mas_reset(&newmas); + mas_reset(&mas); + mas_lock(&newmas); + mas.index = 0; + mas.last = 0; + if (mas_expected_entries(&newmas, nr_entries)) { + pr_err("OOM!"); + BUG_ON(1); + } + rcu_read_lock(); + mas_for_each(&mas, val, ULONG_MAX) { + newmas.index = mas.index; + newmas.last = mas.last; + mas_store(&newmas, val); + } + rcu_read_unlock(); + mas_destroy(&newmas); + mas_unlock(&newmas); + mt_validate(&newmt); + mt_set_non_kernel(0); + mtree_destroy(&newmt); +} + +static noinline void check_mas_store_gfp(struct maple_tree *mt) +{ + + struct maple_tree newmt; + int i, nr_entries = 135; + void *val; + MA_STATE(mas, mt, 0, 0); + MA_STATE(newmas, mt, 0, 0); + + for (i = 0; i <= nr_entries; i++) + mtree_store_range(mt, i*10, i*10 + 5, + xa_mk_value(i), GFP_KERNEL); + + mt_set_non_kernel(99999); + mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE); + newmas.tree = &newmt; + rcu_read_lock(); + mas_lock(&newmas); + mas_reset(&newmas); + mas_set(&mas, 0); + mas_for_each(&mas, val, ULONG_MAX) { + newmas.index = mas.index; + newmas.last = mas.last; + mas_store_gfp(&newmas, val, GFP_KERNEL); + } + mas_unlock(&newmas); + rcu_read_unlock(); + mt_validate(&newmt); + mt_set_non_kernel(0); + mtree_destroy(&newmt); +} + +#if defined(BENCH_FORK) +static noinline void bench_forking(struct maple_tree *mt) +{ + + struct maple_tree newmt; + int i, nr_entries = 134, nr_fork = 80000; + void *val; + MA_STATE(mas, mt, 0, 0); + MA_STATE(newmas, mt, 0, 0); + + for (i = 0; i <= nr_entries; i++) + mtree_store_range(mt, i*10, i*10 + 5, + xa_mk_value(i), GFP_KERNEL); + + for (i = 0; i < nr_fork; i++) { + mt_set_non_kernel(99999); + mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE); + newmas.tree = &newmt; + mas_reset(&newmas); + mas_reset(&mas); + mas.index = 0; + mas.last = 0; + rcu_read_lock(); + mas_lock(&newmas); + if (mas_expected_entries(&newmas, nr_entries)) { + printk("OOM!"); + BUG_ON(1); + } + mas_for_each(&mas, val, ULONG_MAX) { + newmas.index = mas.index; + newmas.last = mas.last; + mas_store(&newmas, val); + } + mas_destroy(&newmas); + mas_unlock(&newmas); + rcu_read_unlock(); + mt_validate(&newmt); + mt_set_non_kernel(0); + mtree_destroy(&newmt); + } +} +#endif + +static noinline void next_prev_test(struct maple_tree *mt) +{ + int i, nr_entries; + void *val; + MA_STATE(mas, mt, 0, 0); + struct maple_enode *mn; + unsigned long *level2; + unsigned long level2_64[] = {707, 1000, 710, 715, 720, 725}; + unsigned long level2_32[] = {1747, 2000, 1750, 1755, 1760, 1765}; + + if (MAPLE_32BIT) { + nr_entries = 500; + level2 = level2_32; + } else { + nr_entries = 200; + level2 = level2_64; + } + + for (i = 0; i <= nr_entries; i++) + mtree_store_range(mt, i*10, i*10 + 5, + xa_mk_value(i), GFP_KERNEL); + + mas_lock(&mas); + for (i = 0; i <= nr_entries / 2; i++) { + mas_next(&mas, 1000); + if (mas_is_none(&mas)) + break; + + } + mas_reset(&mas); + mas_set(&mas, 0); + i = 0; + mas_for_each(&mas, val, 1000) { + i++; + } + + mas_reset(&mas); + mas_set(&mas, 0); + i = 0; + mas_for_each(&mas, val, 1000) { + mas_pause(&mas); + i++; + } + + /* + * 680 - 685 = 0x61a00001930c + * 686 - 689 = NULL; + * 690 - 695 = 0x61a00001930c + * Check simple next/prev + */ + mas_set(&mas, 686); + val = mas_walk(&mas); + MT_BUG_ON(mt, val != NULL); + + val = mas_next(&mas, 1000); + MT_BUG_ON(mt, val != xa_mk_value(690 / 10)); + MT_BUG_ON(mt, mas.index != 690); + MT_BUG_ON(mt, mas.last != 695); + + val = mas_prev(&mas, 0); + MT_BUG_ON(mt, val != xa_mk_value(680 / 10)); + MT_BUG_ON(mt, mas.index != 680); + MT_BUG_ON(mt, mas.last != 685); + + val = mas_next(&mas, 1000); + MT_BUG_ON(mt, val != xa_mk_value(690 / 10)); + MT_BUG_ON(mt, mas.index != 690); + MT_BUG_ON(mt, mas.last != 695); + + val = mas_next(&mas, 1000); + MT_BUG_ON(mt, val != xa_mk_value(700 / 10)); + MT_BUG_ON(mt, mas.index != 700); + MT_BUG_ON(mt, mas.last != 705); + + /* Check across node boundaries of the tree */ + mas_set(&mas, 70); + val = mas_walk(&mas); + MT_BUG_ON(mt, val != xa_mk_value(70 / 10)); + MT_BUG_ON(mt, mas.index != 70); + MT_BUG_ON(mt, mas.last != 75); + + val = mas_next(&mas, 1000); + MT_BUG_ON(mt, val != xa_mk_value(80 / 10)); + MT_BUG_ON(mt, mas.index != 80); + MT_BUG_ON(mt, mas.last != 85); + + val = mas_prev(&mas, 70); + MT_BUG_ON(mt, val != xa_mk_value(70 / 10)); + MT_BUG_ON(mt, mas.index != 70); + MT_BUG_ON(mt, mas.last != 75); + + /* Check across two levels of the tree */ + mas_reset(&mas); + mas_set(&mas, level2[0]); + val = mas_walk(&mas); + MT_BUG_ON(mt, val != NULL); + val = mas_next(&mas, level2[1]); + MT_BUG_ON(mt, val != xa_mk_value(level2[2] / 10)); + MT_BUG_ON(mt, mas.index != level2[2]); + MT_BUG_ON(mt, mas.last != level2[3]); + mn = mas.node; + + val = mas_next(&mas, level2[1]); + MT_BUG_ON(mt, val != xa_mk_value(level2[4] / 10)); + MT_BUG_ON(mt, mas.index != level2[4]); + MT_BUG_ON(mt, mas.last != level2[5]); + MT_BUG_ON(mt, mn == mas.node); + + val = mas_prev(&mas, 0); + MT_BUG_ON(mt, val != xa_mk_value(level2[2] / 10)); + MT_BUG_ON(mt, mas.index != level2[2]); + MT_BUG_ON(mt, mas.last != level2[3]); + + /* Check running off the end and back on */ + mas_set(&mas, nr_entries * 10); + val = mas_walk(&mas); + MT_BUG_ON(mt, val != xa_mk_value(nr_entries)); + MT_BUG_ON(mt, mas.index != (nr_entries * 10)); + MT_BUG_ON(mt, mas.last != (nr_entries * 10 + 5)); + + val = mas_next(&mas, ULONG_MAX); + MT_BUG_ON(mt, val != NULL); + MT_BUG_ON(mt, mas.index != ULONG_MAX); + MT_BUG_ON(mt, mas.last != ULONG_MAX); + + val = mas_prev(&mas, 0); + MT_BUG_ON(mt, val != xa_mk_value(nr_entries)); + MT_BUG_ON(mt, mas.index != (nr_entries * 10)); + MT_BUG_ON(mt, mas.last != (nr_entries * 10 + 5)); + + /* Check running off the start and back on */ + mas_reset(&mas); + mas_set(&mas, 10); + val = mas_walk(&mas); + MT_BUG_ON(mt, val != xa_mk_value(1)); + MT_BUG_ON(mt, mas.index != 10); + MT_BUG_ON(mt, mas.last != 15); + + val = mas_prev(&mas, 0); + MT_BUG_ON(mt, val != xa_mk_value(0)); + MT_BUG_ON(mt, mas.index != 0); + MT_BUG_ON(mt, mas.last != 5); + + val = mas_prev(&mas, 0); + MT_BUG_ON(mt, val != NULL); + MT_BUG_ON(mt, mas.index != 0); + MT_BUG_ON(mt, mas.last != 0); + + mas.index = 0; + mas.last = 5; + mas_store(&mas, NULL); + mas_reset(&mas); + mas_set(&mas, 10); + mas_walk(&mas); + + val = mas_prev(&mas, 0); + MT_BUG_ON(mt, val != NULL); + MT_BUG_ON(mt, mas.index != 0); + MT_BUG_ON(mt, mas.last != 0); + mas_unlock(&mas); + + mtree_destroy(mt); + + mt_init(mt); + mtree_store_range(mt, 0, 0, xa_mk_value(0), GFP_KERNEL); + mtree_store_range(mt, 5, 5, xa_mk_value(5), GFP_KERNEL); + rcu_read_lock(); + mas_set(&mas, 5); + val = mas_prev(&mas, 4); + MT_BUG_ON(mt, val != NULL); + rcu_read_unlock(); +} + + + +/* Test spanning writes that require balancing right sibling or right cousin */ +static noinline void check_spanning_relatives(struct maple_tree *mt) +{ + + unsigned long i, nr_entries = 1000; + + for (i = 0; i <= nr_entries; i++) + mtree_store_range(mt, i*10, i*10 + 5, + xa_mk_value(i), GFP_KERNEL); + + + mtree_store_range(mt, 9365, 9955, NULL, GFP_KERNEL); +} + +static noinline void check_fuzzer(struct maple_tree *mt) +{ + /* + * 1. Causes a spanning rebalance of a single root node. + * Fixed by setting the correct limit in mast_cp_to_nodes() when the + * entire right side is consumed. + */ + mtree_test_insert(mt, 88, (void *)0xb1); + mtree_test_insert(mt, 84, (void *)0xa9); + mtree_test_insert(mt, 2, (void *)0x5); + mtree_test_insert(mt, 4, (void *)0x9); + mtree_test_insert(mt, 14, (void *)0x1d); + mtree_test_insert(mt, 7, (void *)0xf); + mtree_test_insert(mt, 12, (void *)0x19); + mtree_test_insert(mt, 18, (void *)0x25); + mtree_test_store_range(mt, 8, 18, (void *)0x11); + mtree_destroy(mt); + + + /* + * 2. Cause a spanning rebalance of two nodes in root. + * Fixed by setting mast->r->max correctly. + */ + mt_init_flags(mt, 0); + mtree_test_store(mt, 87, (void *)0xaf); + mtree_test_store(mt, 0, (void *)0x1); + mtree_test_load(mt, 4); + mtree_test_insert(mt, 4, (void *)0x9); + mtree_test_store(mt, 8, (void *)0x11); + mtree_test_store(mt, 44, (void *)0x59); + mtree_test_store(mt, 68, (void *)0x89); + mtree_test_store(mt, 2, (void *)0x5); + mtree_test_insert(mt, 43, (void *)0x57); + mtree_test_insert(mt, 24, (void *)0x31); + mtree_test_insert(mt, 844, (void *)0x699); + mtree_test_store(mt, 84, (void *)0xa9); + mtree_test_store(mt, 4, (void *)0x9); + mtree_test_erase(mt, 4); + mtree_test_load(mt, 5); + mtree_test_erase(mt, 0); + mtree_destroy(mt); + + /* + * 3. Cause a node overflow on copy + * Fixed by using the correct check for node size in mas_wr_modify() + * Also discovered issue with metadata setting. + */ + mt_init_flags(mt, 0); + mtree_test_store_range(mt, 0, ULONG_MAX, (void *)0x1); + mtree_test_store(mt, 4, (void *)0x9); + mtree_test_erase(mt, 5); + mtree_test_erase(mt, 0); + mtree_test_erase(mt, 4); + mtree_test_store(mt, 5, (void *)0xb); + mtree_test_erase(mt, 5); + mtree_test_store(mt, 5, (void *)0xb); + mtree_test_erase(mt, 5); + mtree_test_erase(mt, 4); + mtree_test_store(mt, 4, (void *)0x9); + mtree_test_store(mt, 444, (void *)0x379); + mtree_test_store(mt, 0, (void *)0x1); + mtree_test_load(mt, 0); + mtree_test_store(mt, 5, (void *)0xb); + mtree_test_erase(mt, 0); + mtree_destroy(mt); + + /* + * 4. spanning store failure due to writing incorrect pivot value at + * last slot. + * Fixed by setting mast->r->max correctly in mast_cp_to_nodes() + * + */ + mt_init_flags(mt, 0); + mtree_test_insert(mt, 261, (void *)0x20b); + mtree_test_store(mt, 516, (void *)0x409); + mtree_test_store(mt, 6, (void *)0xd); + mtree_test_insert(mt, 5, (void *)0xb); + mtree_test_insert(mt, 1256, (void *)0x9d1); + mtree_test_store(mt, 4, (void *)0x9); + mtree_test_erase(mt, 1); + mtree_test_store(mt, 56, (void *)0x71); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_store(mt, 24, (void *)0x31); + mtree_test_erase(mt, 1); + mtree_test_insert(mt, 2263, (void *)0x11af); + mtree_test_insert(mt, 446, (void *)0x37d); + mtree_test_store_range(mt, 6, 45, (void *)0xd); + mtree_test_store_range(mt, 3, 446, (void *)0x7); + mtree_destroy(mt); + + /* + * 5. mas_wr_extend_null() may overflow slots. + * Fix by checking against wr_mas->node_end. + */ + mt_init_flags(mt, 0); + mtree_test_store(mt, 48, (void *)0x61); + mtree_test_store(mt, 3, (void *)0x7); + mtree_test_load(mt, 0); + mtree_test_store(mt, 88, (void *)0xb1); + mtree_test_store(mt, 81, (void *)0xa3); + mtree_test_insert(mt, 0, (void *)0x1); + mtree_test_insert(mt, 8, (void *)0x11); + mtree_test_insert(mt, 4, (void *)0x9); + mtree_test_insert(mt, 2480, (void *)0x1361); + mtree_test_insert(mt, ULONG_MAX, + (void *)0xffffffffffffffff); + mtree_test_erase(mt, ULONG_MAX); + mtree_destroy(mt); + + /* + * 6. When reusing a node with an implied pivot and the node is + * shrinking, old data would be left in the implied slot + * Fixed by checking the last pivot for the mas->max and clear + * accordingly. This only affected the left-most node as that node is + * the only one allowed to end in NULL. + */ + mt_init_flags(mt, 0); + mtree_test_erase(mt, 3); + mtree_test_insert(mt, 22, (void *)0x2d); + mtree_test_insert(mt, 15, (void *)0x1f); + mtree_test_load(mt, 2); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_insert(mt, 5, (void *)0xb); + mtree_test_erase(mt, 1); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_insert(mt, 4, (void *)0x9); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_erase(mt, 1); + mtree_test_insert(mt, 2, (void *)0x5); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_erase(mt, 3); + mtree_test_insert(mt, 22, (void *)0x2d); + mtree_test_insert(mt, 15, (void *)0x1f); + mtree_test_insert(mt, 2, (void *)0x5); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_insert(mt, 8, (void *)0x11); + mtree_test_load(mt, 2); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_store(mt, 1, (void *)0x3); + mtree_test_insert(mt, 5, (void *)0xb); + mtree_test_erase(mt, 1); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_insert(mt, 4, (void *)0x9); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_erase(mt, 1); + mtree_test_insert(mt, 2, (void *)0x5); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_erase(mt, 3); + mtree_test_insert(mt, 22, (void *)0x2d); + mtree_test_insert(mt, 15, (void *)0x1f); + mtree_test_insert(mt, 2, (void *)0x5); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_insert(mt, 8, (void *)0x11); + mtree_test_insert(mt, 12, (void *)0x19); + mtree_test_erase(mt, 1); + mtree_test_store_range(mt, 4, 62, (void *)0x9); + mtree_test_erase(mt, 62); + mtree_test_store_range(mt, 1, 0, (void *)0x3); + mtree_test_insert(mt, 11, (void *)0x17); + mtree_test_insert(mt, 3, (void *)0x7); + mtree_test_insert(mt, 3, (void *)0x7); + mtree_test_store(mt, 62, (void *)0x7d); + mtree_test_erase(mt, 62); + mtree_test_store_range(mt, 1, 15, (void *)0x3); + mtree_test_erase(mt, 1); + mtree_test_insert(mt, 22, (void *)0x2d); + mtree_test_insert(mt, 12, (void *)0x19); + mtree_test_erase(mt, 1); + mtree_test_insert(mt, 3, (void *)0x7); + mtree_test_store(mt, 62, (void *)0x7d); + mtree_test_erase(mt, 62); + mtree_test_insert(mt, 122, (void *)0xf5); + mtree_test_store(mt, 3, (void *)0x7); + mtree_test_insert(mt, 0, (void *)0x1); + mtree_test_store_range(mt, 0, 1, (void *)0x1); + mtree_test_insert(mt, 85, (void *)0xab); + mtree_test_insert(mt, 72, (void *)0x91); + mtree_test_insert(mt, 81, (void *)0xa3); + mtree_test_insert(mt, 726, (void *)0x5ad); + mtree_test_insert(mt, 0, (void *)0x1); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_store(mt, 51, (void *)0x67); + mtree_test_insert(mt, 611, (void *)0x4c7); + mtree_test_insert(mt, 485, (void *)0x3cb); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_erase(mt, 1); + mtree_test_insert(mt, 0, (void *)0x1); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_insert_range(mt, 26, 1, (void *)0x35); + mtree_test_load(mt, 1); + mtree_test_store_range(mt, 1, 22, (void *)0x3); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_erase(mt, 1); + mtree_test_load(mt, 53); + mtree_test_load(mt, 1); + mtree_test_store_range(mt, 1, 1, (void *)0x3); + mtree_test_insert(mt, 222, (void *)0x1bd); + mtree_test_insert(mt, 485, (void *)0x3cb); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_erase(mt, 1); + mtree_test_load(mt, 0); + mtree_test_insert(mt, 21, (void *)0x2b); + mtree_test_insert(mt, 3, (void *)0x7); + mtree_test_store(mt, 621, (void *)0x4db); + mtree_test_insert(mt, 0, (void *)0x1); + mtree_test_erase(mt, 5); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_store(mt, 62, (void *)0x7d); + mtree_test_erase(mt, 62); + mtree_test_store_range(mt, 1, 0, (void *)0x3); + mtree_test_insert(mt, 22, (void *)0x2d); + mtree_test_insert(mt, 12, (void *)0x19); + mtree_test_erase(mt, 1); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_store_range(mt, 4, 62, (void *)0x9); + mtree_test_erase(mt, 62); + mtree_test_erase(mt, 1); + mtree_test_load(mt, 1); + mtree_test_store_range(mt, 1, 22, (void *)0x3); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_erase(mt, 1); + mtree_test_load(mt, 53); + mtree_test_load(mt, 1); + mtree_test_store_range(mt, 1, 1, (void *)0x3); + mtree_test_insert(mt, 222, (void *)0x1bd); + mtree_test_insert(mt, 485, (void *)0x3cb); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_erase(mt, 1); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_load(mt, 0); + mtree_test_load(mt, 0); + mtree_destroy(mt); + + /* + * 7. Previous fix was incomplete, fix mas_resuse_node() clearing of old + * data by overwriting it first - that way metadata is of no concern. + */ + mt_init_flags(mt, 0); + mtree_test_load(mt, 1); + mtree_test_insert(mt, 102, (void *)0xcd); + mtree_test_erase(mt, 2); + mtree_test_erase(mt, 0); + mtree_test_load(mt, 0); + mtree_test_insert(mt, 4, (void *)0x9); + mtree_test_insert(mt, 2, (void *)0x5); + mtree_test_insert(mt, 110, (void *)0xdd); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_insert_range(mt, 5, 0, (void *)0xb); + mtree_test_erase(mt, 2); + mtree_test_store(mt, 0, (void *)0x1); + mtree_test_store(mt, 112, (void *)0xe1); + mtree_test_insert(mt, 21, (void *)0x2b); + mtree_test_store(mt, 1, (void *)0x3); + mtree_test_insert_range(mt, 110, 2, (void *)0xdd); + mtree_test_store(mt, 2, (void *)0x5); + mtree_test_load(mt, 22); + mtree_test_erase(mt, 2); + mtree_test_store(mt, 210, (void *)0x1a5); + mtree_test_store_range(mt, 0, 2, (void *)0x1); + mtree_test_store(mt, 2, (void *)0x5); + mtree_test_erase(mt, 2); + mtree_test_erase(mt, 22); + mtree_test_erase(mt, 1); + mtree_test_erase(mt, 2); + mtree_test_store(mt, 0, (void *)0x1); + mtree_test_load(mt, 112); + mtree_test_insert(mt, 2, (void *)0x5); + mtree_test_erase(mt, 2); + mtree_test_store(mt, 1, (void *)0x3); + mtree_test_insert_range(mt, 1, 2, (void *)0x3); + mtree_test_erase(mt, 0); + mtree_test_erase(mt, 2); + mtree_test_store(mt, 2, (void *)0x5); + mtree_test_erase(mt, 0); + mtree_test_erase(mt, 2); + mtree_test_store(mt, 0, (void *)0x1); + mtree_test_store(mt, 0, (void *)0x1); + mtree_test_erase(mt, 2); + mtree_test_store(mt, 2, (void *)0x5); + mtree_test_erase(mt, 2); + mtree_test_insert(mt, 2, (void *)0x5); + mtree_test_insert_range(mt, 1, 2, (void *)0x3); + mtree_test_erase(mt, 0); + mtree_test_erase(mt, 2); + mtree_test_store(mt, 0, (void *)0x1); + mtree_test_load(mt, 112); + mtree_test_store_range(mt, 110, 12, (void *)0xdd); + mtree_test_store(mt, 2, (void *)0x5); + mtree_test_load(mt, 110); + mtree_test_insert_range(mt, 4, 71, (void *)0x9); + mtree_test_load(mt, 2); + mtree_test_store(mt, 2, (void *)0x5); + mtree_test_insert_range(mt, 11, 22, (void *)0x17); + mtree_test_erase(mt, 12); + mtree_test_store(mt, 2, (void *)0x5); + mtree_test_load(mt, 22); + mtree_destroy(mt); + + + /* + * 8. When rebalancing or spanning_rebalance(), the max of the new node + * may be set incorrectly to the final pivot and not the right max. + * Fix by setting the left max to orig right max if the entire node is + * consumed. + */ + mt_init_flags(mt, 0); + mtree_test_store(mt, 6, (void *)0xd); + mtree_test_store(mt, 67, (void *)0x87); + mtree_test_insert(mt, 15, (void *)0x1f); + mtree_test_insert(mt, 6716, (void *)0x3479); + mtree_test_store(mt, 61, (void *)0x7b); + mtree_test_insert(mt, 13, (void *)0x1b); + mtree_test_store(mt, 8, (void *)0x11); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_load(mt, 0); + mtree_test_erase(mt, 67167); + mtree_test_insert_range(mt, 6, 7167, (void *)0xd); + mtree_test_insert(mt, 6, (void *)0xd); + mtree_test_erase(mt, 67); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_erase(mt, 667167); + mtree_test_insert(mt, 6, (void *)0xd); + mtree_test_store(mt, 67, (void *)0x87); + mtree_test_insert(mt, 5, (void *)0xb); + mtree_test_erase(mt, 1); + mtree_test_insert(mt, 6, (void *)0xd); + mtree_test_erase(mt, 67); + mtree_test_insert(mt, 15, (void *)0x1f); + mtree_test_insert(mt, 67167, (void *)0x20cbf); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_load(mt, 7); + mtree_test_insert(mt, 16, (void *)0x21); + mtree_test_insert(mt, 36, (void *)0x49); + mtree_test_store(mt, 67, (void *)0x87); + mtree_test_store(mt, 6, (void *)0xd); + mtree_test_insert(mt, 367, (void *)0x2df); + mtree_test_insert(mt, 115, (void *)0xe7); + mtree_test_store(mt, 0, (void *)0x1); + mtree_test_store_range(mt, 1, 3, (void *)0x3); + mtree_test_store(mt, 1, (void *)0x3); + mtree_test_erase(mt, 67167); + mtree_test_insert_range(mt, 6, 47, (void *)0xd); + mtree_test_store(mt, 1, (void *)0x3); + mtree_test_insert_range(mt, 1, 67, (void *)0x3); + mtree_test_load(mt, 67); + mtree_test_insert(mt, 1, (void *)0x3); + mtree_test_erase(mt, 67167); + mtree_destroy(mt); + + /* + * 9. spanning store to the end of data caused an invalid metadata + * length which resulted in a crash eventually. + * Fix by checking if there is a value in pivot before incrementing the + * metadata end in mab_mas_cp(). To ensure this doesn't happen again, + * abstract the two locations this happens into a function called + * mas_leaf_set_meta(). + */ + mt_init_flags(mt, 0); + mtree_test_insert(mt, 21, (void *)0x2b); + mtree_test_insert(mt, 12, (void *)0x19); + mtree_test_insert(mt, 6, (void *)0xd); + mtree_test_insert(mt, 8, (void *)0x11); + mtree_test_insert(mt, 2, (void *)0x5); + mtree_test_insert(mt, 91, (void *)0xb7); + mtree_test_insert(mt, 18, (void *)0x25); + mtree_test_insert(mt, 81, (void *)0xa3); + mtree_test_store_range(mt, 0, 128, (void *)0x1); + mtree_test_store(mt, 1, (void *)0x3); + mtree_test_erase(mt, 8); + mtree_test_insert(mt, 11, (void *)0x17); + mtree_test_insert(mt, 8, (void *)0x11); + mtree_test_insert(mt, 21, (void *)0x2b); + mtree_test_insert(mt, 2, (void *)0x5); + mtree_test_insert(mt, ULONG_MAX - 10, (void *)0xffffffffffffffeb); + mtree_test_erase(mt, ULONG_MAX - 10); + mtree_test_store_range(mt, 0, 281, (void *)0x1); + mtree_test_erase(mt, 2); + mtree_test_insert(mt, 1211, (void *)0x977); + mtree_test_insert(mt, 111, (void *)0xdf); + mtree_test_insert(mt, 13, (void *)0x1b); + mtree_test_insert(mt, 211, (void *)0x1a7); + mtree_test_insert(mt, 11, (void *)0x17); + mtree_test_insert(mt, 5, (void *)0xb); + mtree_test_insert(mt, 1218, (void *)0x985); + mtree_test_insert(mt, 61, (void *)0x7b); + mtree_test_store(mt, 1, (void *)0x3); + mtree_test_insert(mt, 121, (void *)0xf3); + mtree_test_insert(mt, 8, (void *)0x11); + mtree_test_insert(mt, 21, (void *)0x2b); + mtree_test_insert(mt, 2, (void *)0x5); + mtree_test_insert(mt, ULONG_MAX - 10, (void *)0xffffffffffffffeb); + mtree_test_erase(mt, ULONG_MAX - 10); +} + +/* duplicate the tree with a specific gap */ +static noinline void check_dup_gaps(struct maple_tree *mt, + unsigned long nr_entries, bool zero_start, + unsigned long gap) +{ + unsigned long i = 0; + struct maple_tree newmt; + int ret; + void *tmp; + MA_STATE(mas, mt, 0, 0); + MA_STATE(newmas, &newmt, 0, 0); + + if (!zero_start) + i = 1; + + mt_zero_nr_tallocated(); + for (; i <= nr_entries; i++) + mtree_store_range(mt, i*10, (i+1)*10 - gap, + xa_mk_value(i), GFP_KERNEL); + + mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE); + mt_set_non_kernel(99999); + mas_lock(&newmas); + ret = mas_expected_entries(&newmas, nr_entries); + mt_set_non_kernel(0); + MT_BUG_ON(mt, ret != 0); + + rcu_read_lock(); + mas_for_each(&mas, tmp, ULONG_MAX) { + newmas.index = mas.index; + newmas.last = mas.last; + mas_store(&newmas, tmp); + } + rcu_read_unlock(); + mas_destroy(&newmas); + mas_unlock(&newmas); + + mtree_destroy(&newmt); +} + +/* Duplicate many sizes of trees. Mainly to test expected entry values */ +static noinline void check_dup(struct maple_tree *mt) +{ + int i; + int big_start = 100010; + + /* Check with a value at zero */ + for (i = 10; i < 1000; i++) { + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + check_dup_gaps(mt, i, true, 5); + mtree_destroy(mt); + rcu_barrier(); + } + + cond_resched(); + mt_cache_shrink(); + /* Check with a value at zero, no gap */ + for (i = 1000; i < 2000; i++) { + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + check_dup_gaps(mt, i, true, 0); + mtree_destroy(mt); + rcu_barrier(); + } + + cond_resched(); + mt_cache_shrink(); + /* Check with a value at zero and unreasonably large */ + for (i = big_start; i < big_start + 10; i++) { + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + check_dup_gaps(mt, i, true, 5); + mtree_destroy(mt); + rcu_barrier(); + } + + cond_resched(); + mt_cache_shrink(); + /* Small to medium size not starting at zero*/ + for (i = 200; i < 1000; i++) { + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + check_dup_gaps(mt, i, false, 5); + mtree_destroy(mt); + rcu_barrier(); + } + + cond_resched(); + mt_cache_shrink(); + /* Unreasonably large not starting at zero*/ + for (i = big_start; i < big_start + 10; i++) { + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + check_dup_gaps(mt, i, false, 5); + mtree_destroy(mt); + rcu_barrier(); + cond_resched(); + mt_cache_shrink(); + } + + /* Check non-allocation tree not starting at zero */ + for (i = 1500; i < 3000; i++) { + mt_init_flags(mt, 0); + check_dup_gaps(mt, i, false, 5); + mtree_destroy(mt); + rcu_barrier(); + cond_resched(); + if (i % 2 == 0) + mt_cache_shrink(); + } + + mt_cache_shrink(); + /* Check non-allocation tree starting at zero */ + for (i = 200; i < 1000; i++) { + mt_init_flags(mt, 0); + check_dup_gaps(mt, i, true, 5); + mtree_destroy(mt); + rcu_barrier(); + cond_resched(); + } + + mt_cache_shrink(); + /* Unreasonably large */ + for (i = big_start + 5; i < big_start + 10; i++) { + mt_init_flags(mt, 0); + check_dup_gaps(mt, i, true, 5); + mtree_destroy(mt); + rcu_barrier(); + mt_cache_shrink(); + cond_resched(); + } +} + +static DEFINE_MTREE(tree); +static int maple_tree_seed(void) +{ + unsigned long set[] = {5015, 5014, 5017, 25, 1000, + 1001, 1002, 1003, 1005, 0, + 5003, 5002}; + void *ptr = &set; + + pr_info("\nTEST STARTING\n\n"); + + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_root_expand(&tree); + mtree_destroy(&tree); + +#if defined(BENCH_SLOT_STORE) +#define BENCH + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + bench_slot_store(&tree); + mtree_destroy(&tree); + goto skip; +#endif +#if defined(BENCH_NODE_STORE) +#define BENCH + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + bench_node_store(&tree); + mtree_destroy(&tree); + goto skip; +#endif +#if defined(BENCH_AWALK) +#define BENCH + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + bench_awalk(&tree); + mtree_destroy(&tree); + goto skip; +#endif +#if defined(BENCH_WALK) +#define BENCH + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + bench_walk(&tree); + mtree_destroy(&tree); + goto skip; +#endif +#if defined(BENCH_FORK) +#define BENCH + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + bench_forking(&tree); + mtree_destroy(&tree); + goto skip; +#endif +#if defined(BENCH_MT_FOR_EACH) +#define BENCH + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + bench_mt_for_each(&tree); + mtree_destroy(&tree); + goto skip; +#endif + + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_forking(&tree); + mtree_destroy(&tree); + + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_mas_store_gfp(&tree); + mtree_destroy(&tree); + + /* Test ranges (store and insert) */ + mt_init_flags(&tree, 0); + check_ranges(&tree); + mtree_destroy(&tree); + +#if defined(CONFIG_64BIT) + /* These tests have ranges outside of 4GB */ + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_alloc_range(&tree); + mtree_destroy(&tree); + + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_alloc_rev_range(&tree); + mtree_destroy(&tree); +#endif + + mt_init_flags(&tree, 0); + + check_load(&tree, set[0], NULL); /* See if 5015 -> NULL */ + + check_insert(&tree, set[9], &tree); /* Insert 0 */ + check_load(&tree, set[9], &tree); /* See if 0 -> &tree */ + check_load(&tree, set[0], NULL); /* See if 5015 -> NULL */ + + check_insert(&tree, set[10], ptr); /* Insert 5003 */ + check_load(&tree, set[9], &tree); /* See if 0 -> &tree */ + check_load(&tree, set[11], NULL); /* See if 5002 -> NULL */ + check_load(&tree, set[10], ptr); /* See if 5003 -> ptr */ + + /* Clear out the tree */ + mtree_destroy(&tree); + + /* Try to insert, insert a dup, and load back what was inserted. */ + mt_init_flags(&tree, 0); + check_insert(&tree, set[0], &tree); /* Insert 5015 */ + check_dup_insert(&tree, set[0], &tree); /* Insert 5015 again */ + check_load(&tree, set[0], &tree); /* See if 5015 -> &tree */ + + /* + * Second set of tests try to load a value that doesn't exist, inserts + * a second value, then loads the value again + */ + check_load(&tree, set[1], NULL); /* See if 5014 -> NULL */ + check_insert(&tree, set[1], ptr); /* insert 5014 -> ptr */ + check_load(&tree, set[1], ptr); /* See if 5014 -> ptr */ + check_load(&tree, set[0], &tree); /* See if 5015 -> &tree */ + /* + * Tree currently contains: + * p[0]: 14 -> (nil) p[1]: 15 -> ptr p[2]: 16 -> &tree p[3]: 0 -> (nil) + */ + check_insert(&tree, set[6], ptr); /* insert 1002 -> ptr */ + check_insert(&tree, set[7], &tree); /* insert 1003 -> &tree */ + + check_load(&tree, set[0], &tree); /* See if 5015 -> &tree */ + check_load(&tree, set[1], ptr); /* See if 5014 -> ptr */ + check_load(&tree, set[6], ptr); /* See if 1002 -> ptr */ + check_load(&tree, set[7], &tree); /* 1003 = &tree ? */ + + /* Clear out tree */ + mtree_destroy(&tree); + + mt_init_flags(&tree, 0); + /* Test inserting into a NULL hole. */ + check_insert(&tree, set[5], ptr); /* insert 1001 -> ptr */ + check_insert(&tree, set[7], &tree); /* insert 1003 -> &tree */ + check_insert(&tree, set[6], ptr); /* insert 1002 -> ptr */ + check_load(&tree, set[5], ptr); /* See if 1001 -> ptr */ + check_load(&tree, set[6], ptr); /* See if 1002 -> ptr */ + check_load(&tree, set[7], &tree); /* See if 1003 -> &tree */ + + /* Clear out the tree */ + mtree_destroy(&tree); + + mt_init_flags(&tree, 0); + /* + * set[] = {5015, 5014, 5017, 25, 1000, + * 1001, 1002, 1003, 1005, 0, + * 5003, 5002}; + */ + + check_insert(&tree, set[0], ptr); /* 5015 */ + check_insert(&tree, set[1], &tree); /* 5014 */ + check_insert(&tree, set[2], ptr); /* 5017 */ + check_insert(&tree, set[3], &tree); /* 25 */ + check_load(&tree, set[0], ptr); + check_load(&tree, set[1], &tree); + check_load(&tree, set[2], ptr); + check_load(&tree, set[3], &tree); + check_insert(&tree, set[4], ptr); /* 1000 < Should split. */ + check_load(&tree, set[0], ptr); + check_load(&tree, set[1], &tree); + check_load(&tree, set[2], ptr); + check_load(&tree, set[3], &tree); /*25 */ + check_load(&tree, set[4], ptr); + check_insert(&tree, set[5], &tree); /* 1001 */ + check_load(&tree, set[0], ptr); + check_load(&tree, set[1], &tree); + check_load(&tree, set[2], ptr); + check_load(&tree, set[3], &tree); + check_load(&tree, set[4], ptr); + check_load(&tree, set[5], &tree); + check_insert(&tree, set[6], ptr); + check_load(&tree, set[0], ptr); + check_load(&tree, set[1], &tree); + check_load(&tree, set[2], ptr); + check_load(&tree, set[3], &tree); + check_load(&tree, set[4], ptr); + check_load(&tree, set[5], &tree); + check_load(&tree, set[6], ptr); + check_insert(&tree, set[7], &tree); + check_load(&tree, set[0], ptr); + check_insert(&tree, set[8], ptr); + + check_insert(&tree, set[9], &tree); + + check_load(&tree, set[0], ptr); + check_load(&tree, set[1], &tree); + check_load(&tree, set[2], ptr); + check_load(&tree, set[3], &tree); + check_load(&tree, set[4], ptr); + check_load(&tree, set[5], &tree); + check_load(&tree, set[6], ptr); + check_load(&tree, set[9], &tree); + mtree_destroy(&tree); + + mt_init_flags(&tree, 0); + check_seq(&tree, 16, false); + mtree_destroy(&tree); + + mt_init_flags(&tree, 0); + check_seq(&tree, 1000, true); + mtree_destroy(&tree); + + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_rev_seq(&tree, 1000, true); + mtree_destroy(&tree); + + check_lower_bound_split(&tree); + check_upper_bound_split(&tree); + check_mid_split(&tree); + + mt_init_flags(&tree, 0); + check_next_entry(&tree); + check_find(&tree); + check_find_2(&tree); + mtree_destroy(&tree); + + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_prev_entry(&tree); + mtree_destroy(&tree); + + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_gap_combining(&tree); + mtree_destroy(&tree); + + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_node_overwrite(&tree); + mtree_destroy(&tree); + + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + next_prev_test(&tree); + mtree_destroy(&tree); + + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_spanning_relatives(&tree); + mtree_destroy(&tree); + + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_rev_find(&tree); + mtree_destroy(&tree); + + mt_init_flags(&tree, 0); + check_fuzzer(&tree); + mtree_destroy(&tree); + + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_dup(&tree); + mtree_destroy(&tree); + +#if defined(BENCH) +skip: +#endif + rcu_barrier(); + pr_info("maple_tree: %u of %u tests passed\n", + atomic_read(&maple_tree_tests_passed), + atomic_read(&maple_tree_tests_run)); + if (atomic_read(&maple_tree_tests_run) == + atomic_read(&maple_tree_tests_passed)) + return 0; + + return -EINVAL; +} + +static void maple_tree_harvest(void) +{ + +} + +module_init(maple_tree_seed); +module_exit(maple_tree_harvest); +MODULE_AUTHOR("Liam R. Howlett <Liam.Howlett@Oracle.com>"); +MODULE_LICENSE("GPL"); diff --git a/lib/test_meminit.c b/lib/test_meminit.c index c95db11a6906..60e1984c060f 100644 --- a/lib/test_meminit.c +++ b/lib/test_meminit.c @@ -67,17 +67,24 @@ static int __init do_alloc_pages_order(int order, int *total_failures) size_t size = PAGE_SIZE << order; page = alloc_pages(GFP_KERNEL, order); + if (!page) + goto err; buf = page_address(page); fill_with_garbage(buf, size); __free_pages(page, order); page = alloc_pages(GFP_KERNEL, order); + if (!page) + goto err; buf = page_address(page); if (count_nonzero_bytes(buf, size)) (*total_failures)++; fill_with_garbage(buf, size); __free_pages(page, order); return 1; +err: + (*total_failures)++; + return 1; } /* Test the page allocator by calling alloc_pages with different orders. */ @@ -100,15 +107,22 @@ static int __init do_kmalloc_size(size_t size, int *total_failures) void *buf; buf = kmalloc(size, GFP_KERNEL); + if (!buf) + goto err; fill_with_garbage(buf, size); kfree(buf); buf = kmalloc(size, GFP_KERNEL); + if (!buf) + goto err; if (count_nonzero_bytes(buf, size)) (*total_failures)++; fill_with_garbage(buf, size); kfree(buf); return 1; +err: + (*total_failures)++; + return 1; } /* Test vmalloc() with given parameters. */ @@ -117,15 +131,22 @@ static int __init do_vmalloc_size(size_t size, int *total_failures) void *buf; buf = vmalloc(size); + if (!buf) + goto err; fill_with_garbage(buf, size); vfree(buf); buf = vmalloc(size); + if (!buf) + goto err; if (count_nonzero_bytes(buf, size)) (*total_failures)++; fill_with_garbage(buf, size); vfree(buf); return 1; +err: + (*total_failures)++; + return 1; } /* Test kmalloc()/vmalloc() by allocating objects of different sizes. */ diff --git a/lib/test_min_heap.c b/lib/test_min_heap.c index d19c8080fd4d..7b01b4387cfb 100644 --- a/lib/test_min_heap.c +++ b/lib/test_min_heap.c @@ -83,7 +83,7 @@ static __init int test_heapify_all(bool min_heap) /* Test with randomly generated values. */ heap.nr = ARRAY_SIZE(values); for (i = 0; i < heap.nr; i++) - values[i] = get_random_int(); + values[i] = get_random_u32(); min_heapify_all(&heap, &funcs); err += pop_verify_heap(min_heap, &heap, &funcs); @@ -116,7 +116,7 @@ static __init int test_heap_push(bool min_heap) /* Test with randomly generated values. */ while (heap.nr < heap.size) { - temp = get_random_int(); + temp = get_random_u32(); min_heap_push(&heap, &temp, &funcs); } err += pop_verify_heap(min_heap, &heap, &funcs); @@ -158,7 +158,7 @@ static __init int test_heap_pop_push(bool min_heap) /* Test with randomly generated values. */ for (i = 0; i < ARRAY_SIZE(data); i++) { - temp = get_random_int(); + temp = get_random_u32(); min_heap_pop_push(&heap, &temp, &funcs); } err += pop_verify_heap(min_heap, &heap, &funcs); diff --git a/lib/test_objagg.c b/lib/test_objagg.c index da137939a410..c0c957c50635 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -157,7 +157,7 @@ static int test_nodelta_obj_get(struct world *world, struct objagg *objagg, int err; if (should_create_root) - prandom_bytes(world->next_root_buf, + get_random_bytes(world->next_root_buf, sizeof(world->next_root_buf)); objagg_obj = world_obj_get(world, objagg, key_id); diff --git a/lib/test_printf.c b/lib/test_printf.c index 4bd15a593fbd..d34dc636b81c 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -126,7 +126,7 @@ __test(const char *expect, int elen, const char *fmt, ...) * be able to print it as expected. */ failed_tests += do_test(BUF_SIZE, expect, elen, fmt, ap); - rand = 1 + prandom_u32_max(elen+1); + rand = get_random_u32_inclusive(1, elen + 1); /* Since elen < BUF_SIZE, we have 1 <= rand <= BUF_SIZE. */ failed_tests += do_test(rand, expect, elen, fmt, ap); failed_tests += do_test(0, expect, elen, fmt, ap); @@ -179,18 +179,6 @@ test_number(void) * behaviour. */ test("00|0|0|0|0", "%.2d|%.1d|%.0d|%.*d|%1.0d", 0, 0, 0, 0, 0, 0); -#ifndef __CHAR_UNSIGNED__ - { - /* - * Passing a 'char' to a %02x specifier doesn't do - * what was presumably the intention when char is - * signed and the value is negative. One must either & - * with 0xff or cast to u8. - */ - char val = -16; - test("0xfffffff0|0xf0|0xf0", "%#02x|%#02x|%#02x", val, val & 0xff, (u8)val); - } -#endif } static void __init @@ -704,31 +692,29 @@ flags(void) static void __init fwnode_pointer(void) { - const struct software_node softnodes[] = { - { .name = "first", }, - { .name = "second", .parent = &softnodes[0], }, - { .name = "third", .parent = &softnodes[1], }, - { NULL /* Guardian */ } - }; - const char * const full_name = "first/second/third"; + const struct software_node first = { .name = "first" }; + const struct software_node second = { .name = "second", .parent = &first }; + const struct software_node third = { .name = "third", .parent = &second }; + const struct software_node *group[] = { &first, &second, &third, NULL }; const char * const full_name_second = "first/second"; + const char * const full_name_third = "first/second/third"; const char * const second_name = "second"; const char * const third_name = "third"; int rval; - rval = software_node_register_nodes(softnodes); + rval = software_node_register_node_group(group); if (rval) { pr_warn("cannot register softnodes; rval %d\n", rval); return; } - test(full_name_second, "%pfw", software_node_fwnode(&softnodes[1])); - test(full_name, "%pfw", software_node_fwnode(&softnodes[2])); - test(full_name, "%pfwf", software_node_fwnode(&softnodes[2])); - test(second_name, "%pfwP", software_node_fwnode(&softnodes[1])); - test(third_name, "%pfwP", software_node_fwnode(&softnodes[2])); + test(full_name_second, "%pfw", software_node_fwnode(&second)); + test(full_name_third, "%pfw", software_node_fwnode(&third)); + test(full_name_third, "%pfwf", software_node_fwnode(&third)); + test(second_name, "%pfwP", software_node_fwnode(&second)); + test(third_name, "%pfwP", software_node_fwnode(&third)); - software_node_unregister_nodes(softnodes); + software_node_unregister_node_group(group); } static void __init fourcc_pointer(void) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 5a1dd4736b56..c20f6cb4bf55 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -291,7 +291,7 @@ static int __init test_rhltable(unsigned int entries) if (WARN_ON(err)) goto out_free; - k = prandom_u32(); + k = get_random_u32(); ret = 0; for (i = 0; i < entries; i++) { rhl_test_objects[i].value.id = k; @@ -368,19 +368,11 @@ static int __init test_rhltable(unsigned int entries) pr_info("test %d random rhlist add/delete operations\n", entries); for (j = 0; j < entries; j++) { - u32 i = prandom_u32_max(entries); - u32 prand = prandom_u32(); + u32 i = get_random_u32_below(entries); + u32 prand = get_random_u32_below(4); cond_resched(); - if (prand == 0) - prand = prandom_u32(); - - if (prand & 1) { - prand >>= 1; - continue; - } - err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); if (test_bit(i, obj_in_table)) { clear_bit(i, obj_in_table); @@ -393,35 +385,29 @@ static int __init test_rhltable(unsigned int entries) } if (prand & 1) { - prand >>= 1; - continue; - } - - err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); - if (err == 0) { - if (WARN(test_and_set_bit(i, obj_in_table), "succeeded to insert same object %d", i)) - continue; - } else { - if (WARN(!test_bit(i, obj_in_table), "failed to insert object %d", i)) - continue; - } - - if (prand & 1) { - prand >>= 1; - continue; + err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); + if (err == 0) { + if (WARN(test_and_set_bit(i, obj_in_table), "succeeded to insert same object %d", i)) + continue; + } else { + if (WARN(!test_bit(i, obj_in_table), "failed to insert object %d", i)) + continue; + } } - i = prandom_u32_max(entries); - if (test_bit(i, obj_in_table)) { - err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); - WARN(err, "cannot remove element at slot %d", i); - if (err == 0) - clear_bit(i, obj_in_table); - } else { - err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); - WARN(err, "failed to insert object %d", i); - if (err == 0) - set_bit(i, obj_in_table); + if (prand & 2) { + i = get_random_u32_below(entries); + if (test_bit(i, obj_in_table)) { + err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); + WARN(err, "cannot remove element at slot %d", i); + if (err == 0) + clear_bit(i, obj_in_table); + } else { + err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); + WARN(err, "failed to insert object %d", i); + if (err == 0) + set_bit(i, obj_in_table); + } } } @@ -448,7 +434,7 @@ out_free: static int __init test_rhashtable_max(struct test_obj *array, unsigned int entries) { - unsigned int i, insert_retries = 0; + unsigned int i; int err; test_rht_params.max_size = roundup_pow_of_two(entries / 8); @@ -461,9 +447,7 @@ static int __init test_rhashtable_max(struct test_obj *array, obj->value.id = i * 2; err = insert_retry(&ht, obj, test_rht_params); - if (err > 0) - insert_retries += err; - else if (err) + if (err < 0) return err; } diff --git a/lib/test_strscpy.c b/lib/test_strscpy.c deleted file mode 100644 index a827f94601f5..000000000000 --- a/lib/test_strscpy.c +++ /dev/null @@ -1,150 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/string.h> - -#include "../tools/testing/selftests/kselftest_module.h" - -/* - * Kernel module for testing 'strscpy' family of functions. - */ - -KSTM_MODULE_GLOBALS(); - -/* - * tc() - Run a specific test case. - * @src: Source string, argument to strscpy_pad() - * @count: Size of destination buffer, argument to strscpy_pad() - * @expected: Expected return value from call to strscpy_pad() - * @terminator: 1 if there should be a terminating null byte 0 otherwise. - * @chars: Number of characters from the src string expected to be - * written to the dst buffer. - * @pad: Number of pad characters expected (in the tail of dst buffer). - * (@pad does not include the null terminator byte.) - * - * Calls strscpy_pad() and verifies the return value and state of the - * destination buffer after the call returns. - */ -static int __init tc(char *src, int count, int expected, - int chars, int terminator, int pad) -{ - int nr_bytes_poison; - int max_expected; - int max_count; - int written; - char buf[6]; - int index, i; - const char POISON = 'z'; - - total_tests++; - - if (!src) { - pr_err("null source string not supported\n"); - return -1; - } - - memset(buf, POISON, sizeof(buf)); - /* Future proofing test suite, validate args */ - max_count = sizeof(buf) - 2; /* Space for null and to verify overflow */ - max_expected = count - 1; /* Space for the null */ - if (count > max_count) { - pr_err("count (%d) is too big (%d) ... aborting", count, max_count); - return -1; - } - if (expected > max_expected) { - pr_warn("expected (%d) is bigger than can possibly be returned (%d)", - expected, max_expected); - } - - written = strscpy_pad(buf, src, count); - if ((written) != (expected)) { - pr_err("%d != %d (written, expected)\n", written, expected); - goto fail; - } - - if (count && written == -E2BIG) { - if (strncmp(buf, src, count - 1) != 0) { - pr_err("buffer state invalid for -E2BIG\n"); - goto fail; - } - if (buf[count - 1] != '\0') { - pr_err("too big string is not null terminated correctly\n"); - goto fail; - } - } - - for (i = 0; i < chars; i++) { - if (buf[i] != src[i]) { - pr_err("buf[i]==%c != src[i]==%c\n", buf[i], src[i]); - goto fail; - } - } - - if (terminator) { - if (buf[count - 1] != '\0') { - pr_err("string is not null terminated correctly\n"); - goto fail; - } - } - - for (i = 0; i < pad; i++) { - index = chars + terminator + i; - if (buf[index] != '\0') { - pr_err("padding missing at index: %d\n", i); - goto fail; - } - } - - nr_bytes_poison = sizeof(buf) - chars - terminator - pad; - for (i = 0; i < nr_bytes_poison; i++) { - index = sizeof(buf) - 1 - i; /* Check from the end back */ - if (buf[index] != POISON) { - pr_err("poison value missing at index: %d\n", i); - goto fail; - } - } - - return 0; -fail: - failed_tests++; - return -1; -} - -static void __init selftest(void) -{ - /* - * tc() uses a destination buffer of size 6 and needs at - * least 2 characters spare (one for null and one to check for - * overflow). This means we should only call tc() with - * strings up to a maximum of 4 characters long and 'count' - * should not exceed 4. To test with longer strings increase - * the buffer size in tc(). - */ - - /* tc(src, count, expected, chars, terminator, pad) */ - KSTM_CHECK_ZERO(tc("a", 0, -E2BIG, 0, 0, 0)); - KSTM_CHECK_ZERO(tc("", 0, -E2BIG, 0, 0, 0)); - - KSTM_CHECK_ZERO(tc("a", 1, -E2BIG, 0, 1, 0)); - KSTM_CHECK_ZERO(tc("", 1, 0, 0, 1, 0)); - - KSTM_CHECK_ZERO(tc("ab", 2, -E2BIG, 1, 1, 0)); - KSTM_CHECK_ZERO(tc("a", 2, 1, 1, 1, 0)); - KSTM_CHECK_ZERO(tc("", 2, 0, 0, 1, 1)); - - KSTM_CHECK_ZERO(tc("abc", 3, -E2BIG, 2, 1, 0)); - KSTM_CHECK_ZERO(tc("ab", 3, 2, 2, 1, 0)); - KSTM_CHECK_ZERO(tc("a", 3, 1, 1, 1, 1)); - KSTM_CHECK_ZERO(tc("", 3, 0, 0, 1, 2)); - - KSTM_CHECK_ZERO(tc("abcd", 4, -E2BIG, 3, 1, 0)); - KSTM_CHECK_ZERO(tc("abc", 4, 3, 3, 1, 0)); - KSTM_CHECK_ZERO(tc("ab", 4, 2, 2, 1, 1)); - KSTM_CHECK_ZERO(tc("a", 4, 1, 1, 1, 2)); - KSTM_CHECK_ZERO(tc("", 4, 0, 0, 1, 3)); -} - -KSTM_MODULE_LOADERS(test_strscpy); -MODULE_AUTHOR("Tobin C. Harding <tobin@kernel.org>"); -MODULE_LICENSE("GPL"); diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index 4f2f2d1bac56..f90d2c27675b 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -80,7 +80,7 @@ static int random_size_align_alloc_test(void) int i; for (i = 0; i < test_loop_count; i++) { - rnd = prandom_u32(); + rnd = get_random_u8(); /* * Maximum 1024 pages, if PAGE_SIZE is 4096. @@ -151,9 +151,7 @@ static int random_size_alloc_test(void) int i; for (i = 0; i < test_loop_count; i++) { - n = prandom_u32(); - n = (n % 100) + 1; - + n = get_random_u32_inclusive(1, 100); p = vmalloc(n * PAGE_SIZE); if (!p) @@ -293,16 +291,12 @@ pcpu_alloc_test(void) return -1; for (i = 0; i < 35000; i++) { - unsigned int r; - - r = prandom_u32(); - size = (r % (PAGE_SIZE / 4)) + 1; + size = get_random_u32_inclusive(1, PAGE_SIZE / 4); /* * Maximum PAGE_SIZE */ - r = prandom_u32(); - align = 1 << ((r % 11) + 1); + align = 1 << get_random_u32_inclusive(1, 11); pcpu[i] = __alloc_percpu(size, align); if (!pcpu[i]) @@ -393,14 +387,11 @@ static struct test_driver { static void shuffle_array(int *arr, int n) { - unsigned int rnd; int i, j; for (i = n - 1; i > 0; i--) { - rnd = prandom_u32(); - /* Cut the range. */ - j = rnd % i; + j = get_random_u32_below(i); /* Swap indexes. */ swap(arr[i], arr[j]); diff --git a/lib/ubsan.c b/lib/ubsan.c index 36bd75e33426..60c7099857a0 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -154,8 +154,7 @@ static void ubsan_epilogue(void) current->in_ubsan--; - if (panic_on_warn) - panic("panic_on_warn set ...\n"); + check_panic_on_warn("UBSAN"); } void __ubsan_handle_divrem_overflow(void *_data, void *lhs, void *rhs) diff --git a/lib/usercopy.c b/lib/usercopy.c index 7413dd300516..1505a52f23a0 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -12,8 +12,9 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n unsigned long res = n; might_fault(); if (!should_fail_usercopy() && likely(access_ok(from, n))) { - instrument_copy_from_user(to, from, n); + instrument_copy_from_user_before(to, from, n); res = raw_copy_from_user(to, from, n); + instrument_copy_from_user_after(to, from, n, res); } if (unlikely(res)) memset(to + (n - res), 0, res); diff --git a/lib/uuid.c b/lib/uuid.c index 562d53977cab..e309b4c5be3d 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -52,7 +52,7 @@ EXPORT_SYMBOL(generate_random_guid); static void __uuid_gen_common(__u8 b[16]) { - prandom_bytes(b, 16); + get_random_bytes(b, 16); /* reversion 0b10 */ b[8] = (b[8] & 0x3F) | 0x80; } diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile index c415a685d61b..e814061d6aa0 100644 --- a/lib/vdso/Makefile +++ b/lib/vdso/Makefile @@ -17,6 +17,6 @@ $(error ARCH_REL_TYPE_ABS is not set) endif quiet_cmd_vdso_check = VDSOCHK $@ - cmd_vdso_check = if $(OBJDUMP) -R $@ | egrep -h "$(ARCH_REL_TYPE_ABS)"; \ + cmd_vdso_check = if $(OBJDUMP) -R $@ | grep -E -h "$(ARCH_REL_TYPE_ABS)"; \ then (echo >&2 "$@: dynamic relocations are not supported"; \ rm -f $@; /bin/false); fi diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3c1853a9d1c0..be71a03c936a 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -41,6 +41,7 @@ #include <linux/siphash.h> #include <linux/compiler.h> #include <linux/property.h> +#include <linux/notifier.h> #ifdef CONFIG_BLOCK #include <linux/blkdev.h> #endif @@ -750,37 +751,37 @@ static int __init debug_boot_weak_hash_enable(char *str) } early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable); -static DEFINE_STATIC_KEY_FALSE(filled_random_ptr_key); +static bool filled_random_ptr_key __read_mostly; +static siphash_key_t ptr_key __read_mostly; -static void enable_ptr_key_workfn(struct work_struct *work) +static int fill_ptr_key(struct notifier_block *nb, unsigned long action, void *data) { - static_branch_enable(&filled_random_ptr_key); + get_random_bytes(&ptr_key, sizeof(ptr_key)); + + /* Pairs with smp_rmb() before reading ptr_key. */ + smp_wmb(); + WRITE_ONCE(filled_random_ptr_key, true); + return NOTIFY_DONE; +} + +static int __init vsprintf_init_hashval(void) +{ + static struct notifier_block fill_ptr_key_nb = { .notifier_call = fill_ptr_key }; + execute_with_initialized_rng(&fill_ptr_key_nb); + return 0; } +subsys_initcall(vsprintf_init_hashval) /* Maps a pointer to a 32 bit unique identifier. */ static inline int __ptr_to_hashval(const void *ptr, unsigned long *hashval_out) { - static siphash_key_t ptr_key __read_mostly; unsigned long hashval; - if (!static_branch_likely(&filled_random_ptr_key)) { - static bool filled = false; - static DEFINE_SPINLOCK(filling); - static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn); - unsigned long flags; - - if (!system_unbound_wq || !rng_is_initialized() || - !spin_trylock_irqsave(&filling, flags)) - return -EAGAIN; - - if (!filled) { - get_random_bytes(&ptr_key, sizeof(ptr_key)); - queue_work(system_unbound_wq, &enable_ptr_key_work); - filled = true; - } - spin_unlock_irqrestore(&filling, flags); - } + if (!READ_ONCE(filled_random_ptr_key)) + return -EBUSY; + /* Pairs with smp_wmb() after writing ptr_key. */ + smp_rmb(); #ifdef CONFIG_64BIT hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key); @@ -861,7 +862,7 @@ char *restricted_pointer(char *buf, char *end, const void *ptr, * kptr_restrict==1 cannot be used in IRQ context * because its test for CAP_SYSLOG would be meaningless. */ - if (in_irq() || in_serving_softirq() || in_nmi()) { + if (in_hardirq() || in_serving_softirq() || in_nmi()) { if (spec.field_width == -1) spec.field_width = 2 * sizeof(ptr); return error_string(buf, end, "pK-error", spec); @@ -1189,7 +1190,7 @@ char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec, } static noinline_for_stack -char *bitmap_string(char *buf, char *end, unsigned long *bitmap, +char *bitmap_string(char *buf, char *end, const unsigned long *bitmap, struct printf_spec spec, const char *fmt) { const int CHUNKSZ = 32; @@ -1233,7 +1234,7 @@ char *bitmap_string(char *buf, char *end, unsigned long *bitmap, } static noinline_for_stack -char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap, +char *bitmap_list_string(char *buf, char *end, const unsigned long *bitmap, struct printf_spec spec, const char *fmt) { int nr_bits = max_t(int, spec.field_width, 0); @@ -2246,6 +2247,9 @@ int __init no_hash_pointers_enable(char *str) } early_param("no_hash_pointers", no_hash_pointers_enable); +/* Used for Rust formatting ('%pA'). */ +char *rust_fmt_argument(char *buf, char *end, void *ptr); + /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format @@ -2372,6 +2376,10 @@ early_param("no_hash_pointers", no_hash_pointers_enable); * * Note: The default behaviour (unadorned %p) is to hash the address, * rendering it useful as a unique identifier. + * + * There is also a '%pA' format specifier, but it is only intended to be used + * from Rust code to format core::fmt::Arguments. Do *not* use it from C. + * See rust/kernel/print.rs for details. */ static noinline_for_stack char *pointer(const char *fmt, char *buf, char *end, void *ptr, @@ -2444,6 +2452,12 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return device_node_string(buf, end, ptr, spec, fmt + 1); case 'f': return fwnode_string(buf, end, ptr, spec, fmt + 1); + case 'A': + if (!IS_ENABLED(CONFIG_RUST)) { + WARN_ONCE(1, "Please remove %%pA from non-Rust code\n"); + return error_string(buf, end, "(%pA?)", spec); + } + return rust_fmt_argument(buf, end, ptr); case 'x': return pointer_string(buf, end, ptr, spec); case 'e': diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile index fc45339fc3a3..440bd0007ae2 100644 --- a/lib/zstd/Makefile +++ b/lib/zstd/Makefile @@ -10,14 +10,10 @@ # ################################################################ obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o +obj-$(CONFIG_ZSTD_COMMON) += zstd_common.o zstd_compress-y := \ zstd_compress_module.o \ - common/debug.o \ - common/entropy_common.o \ - common/error_private.o \ - common/fse_decompress.o \ - common/zstd_common.o \ compress/fse_compress.o \ compress/hist.o \ compress/huf_compress.o \ @@ -33,12 +29,14 @@ zstd_compress-y := \ zstd_decompress-y := \ zstd_decompress_module.o \ + decompress/huf_decompress.o \ + decompress/zstd_ddict.o \ + decompress/zstd_decompress.o \ + decompress/zstd_decompress_block.o \ + +zstd_common-y := \ common/debug.o \ common/entropy_common.o \ common/error_private.o \ common/fse_decompress.o \ common/zstd_common.o \ - decompress/huf_decompress.o \ - decompress/zstd_ddict.o \ - decompress/zstd_decompress.o \ - decompress/zstd_decompress_block.o \ diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c index 53b47a2b52ff..a311808c0d56 100644 --- a/lib/zstd/common/entropy_common.c +++ b/lib/zstd/common/entropy_common.c @@ -15,6 +15,7 @@ /* ************************************* * Dependencies ***************************************/ +#include <linux/module.h> #include "mem.h" #include "error_private.h" /* ERR_*, ERROR */ #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ @@ -239,7 +240,7 @@ size_t FSE_readNCount( { return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); } - +EXPORT_SYMBOL_GPL(FSE_readNCount); /*! HUF_readStats() : Read compact Huffman tree, saved by HUF_writeCTable(). @@ -255,6 +256,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); } +EXPORT_SYMBOL_GPL(HUF_readStats); FORCE_INLINE_TEMPLATE size_t HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, @@ -355,3 +357,4 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, (void)bmi2; return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); } +EXPORT_SYMBOL_GPL(HUF_readStats_wksp); diff --git a/lib/zstd/common/zstd_common.c b/lib/zstd/common/zstd_common.c index 3d7e35b309b5..0f1f63be25d9 100644 --- a/lib/zstd/common/zstd_common.c +++ b/lib/zstd/common/zstd_common.c @@ -13,6 +13,7 @@ /*-************************************* * Dependencies ***************************************/ +#include <linux/module.h> #define ZSTD_DEPS_NEED_MALLOC #include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ #include "error_private.h" @@ -35,14 +36,17 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } * tells if a return value is an error code * symbol is required for external callers */ unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } +EXPORT_SYMBOL_GPL(ZSTD_isError); /*! ZSTD_getErrorName() : * provides error code string from function result (useful for debugging) */ const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } +EXPORT_SYMBOL_GPL(ZSTD_getErrorName); /*! ZSTD_getError() : * convert a `size_t` function result into a proper ZSTD_errorCode enum */ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } +EXPORT_SYMBOL_GPL(ZSTD_getErrorCode); /*! ZSTD_getErrorString() : * provides error code string from enum */ @@ -59,6 +63,7 @@ void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) return customMem.customAlloc(customMem.opaque, size); return ZSTD_malloc(size); } +EXPORT_SYMBOL_GPL(ZSTD_customMalloc); void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) { @@ -71,6 +76,7 @@ void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) } return ZSTD_calloc(1, size); } +EXPORT_SYMBOL_GPL(ZSTD_customCalloc); void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) { @@ -81,3 +87,7 @@ void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) ZSTD_free(ptr); } } +EXPORT_SYMBOL_GPL(ZSTD_customFree); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Zstd Common"); |