40 files changed, 1544 insertions, 4075 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 5fe577673b98..706836ec314d 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -405,7 +405,7 @@ config ASSOCIATIVE_ARRAY
 
 	  See:
 
-		Documentation/assoc_array.txt
+		Documentation/core-api/assoc_array.rst
 
 	  for more information.
 
@@ -420,24 +420,14 @@ config HAS_IOPORT_MAP
 	depends on HAS_IOMEM && !NO_IOPORT_MAP
 	default y
 
-config HAS_DMA
-	bool
-	depends on !NO_DMA
-	default y
+source "kernel/dma/Kconfig"
 
 config SGL_ALLOC
 	bool
 	default n
 
-config DMA_DIRECT_OPS
-	bool
-	depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
-	default n
-
-config DMA_VIRT_OPS
+config IOMMU_HELPER
 	bool
-	depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
-	default n
 
 config CHECK_SIGNATURE
 	bool
@@ -586,6 +576,9 @@ config ARCH_HAS_PMEM_API
 config ARCH_HAS_UACCESS_FLUSHCACHE
 	bool
 
+config ARCH_HAS_UACCESS_MCSAFE
+	bool
+
 config STACKDEPOT
 	bool
 	select STACKTRACE
@@ -604,20 +597,20 @@ config STRING_SELFTEST
 
 endmenu
 
-config GENERIC_ASHLDI3
+config GENERIC_LIB_ASHLDI3
 	bool
 
-config GENERIC_ASHRDI3
+config GENERIC_LIB_ASHRDI3
 	bool
 
-config GENERIC_LSHRDI3
+config GENERIC_LIB_LSHRDI3
 	bool
 
-config GENERIC_MULDI3
+config GENERIC_LIB_MULDI3
 	bool
 
-config GENERIC_CMPDI2
+config GENERIC_LIB_CMPDI2
 	bool
 
-config GENERIC_UCMPDI2
+config GENERIC_LIB_UCMPDI2
 	bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c40c7b734cd1..8838d1158d19 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -736,12 +736,15 @@ config ARCH_HAS_KCOV
 	  only for x86_64. KCOV requires testing on other archs, and most likely
 	  disabling of instrumentation for some early boot code.
 
+config CC_HAS_SANCOV_TRACE_PC
+	def_bool $(cc-option,-fsanitize-coverage=trace-pc)
+
 config KCOV
 	bool "Code coverage for fuzzing"
 	depends on ARCH_HAS_KCOV
+	depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS
 	select DEBUG_FS
-	select GCC_PLUGINS if !COMPILE_TEST
-	select GCC_PLUGIN_SANCOV if !COMPILE_TEST
+	select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC
 	help
 	  KCOV exposes kernel code coverage information in a form suitable
 	  for coverage-guided fuzzing (randomized testing).
@@ -755,7 +758,7 @@ config KCOV
 config KCOV_ENABLE_COMPARISONS
 	bool "Enable comparison operands collection by KCOV"
 	depends on KCOV
-	default n
+	depends on $(cc-option,-fsanitize-coverage=trace-cmp)
 	help
 	  KCOV also exposes operands of every comparison in the instrumented
 	  code along with operand sizes and PCs of the comparison instructions.
@@ -765,7 +768,7 @@ config KCOV_ENABLE_COMPARISONS
 config KCOV_INSTRUMENT_ALL
 	bool "Instrument all code by default"
 	depends on KCOV
-	default y if KCOV
+	default y
 	help
 	  If you are doing generic system call fuzzing (like e.g. syzkaller),
 	  then you will want to instrument the whole kernel and you should
@@ -1503,6 +1506,10 @@ config NETDEV_NOTIFIER_ERROR_INJECT
 
 	  If unsure, say N.
 
+config FUNCTION_ERROR_INJECTION
+	def_bool y
+	depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
+
 config FAULT_INJECTION
 	bool "Fault-injection framework"
 	depends on DEBUG_KERNEL
@@ -1510,10 +1517,6 @@ config FAULT_INJECTION
 	  Provide fault-injection framework.
 	  For more details, see Documentation/fault-injection/.
 
-config FUNCTION_ERROR_INJECTION
-	def_bool y
-	depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
-
 config FAILSLAB
 	bool "Fault-injection capability for kmalloc"
 	depends on FAULT_INJECTION
@@ -1544,16 +1547,6 @@ config FAIL_IO_TIMEOUT
 	  Only works with drivers that use the generic timeout handling,
 	  for others it wont do anything.
 
-config FAIL_MMC_REQUEST
-	bool "Fault-injection capability for MMC IO"
-	depends on FAULT_INJECTION_DEBUG_FS && MMC
-	help
-	  Provide fault-injection capability for MMC IO.
-	  This will make the mmc core return data errors. This is
-	  useful to test the error handling in the mmc block device
-	  and to test how the mmc host driver handles retries from
-	  the block device.
-
 config FAIL_FUTEX
 	bool "Fault-injection capability for futexes"
 	select DEBUG_FS
@@ -1561,6 +1554,12 @@ config FAIL_FUTEX
 	help
 	  Provide fault-injection capability for futexes.
 
+config FAULT_INJECTION_DEBUG_FS
+	bool "Debugfs entries for fault-injection capabilities"
+	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
+	help
+	  Enable configuration of fault-injection capabilities via debugfs.
+
 config FAIL_FUNCTION
 	bool "Fault-injection capability for functions"
 	depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION
@@ -1571,11 +1570,15 @@ config FAIL_FUNCTION
 	  an error value and have to handle it. This is useful to test the
 	  error handling in various subsystems.
 
-config FAULT_INJECTION_DEBUG_FS
-	bool "Debugfs entries for fault-injection capabilities"
-	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
+config FAIL_MMC_REQUEST
+	bool "Fault-injection capability for MMC IO"
+	depends on FAULT_INJECTION_DEBUG_FS && MMC
 	help
-	  Enable configuration of fault-injection capabilities via debugfs.
+	  Provide fault-injection capability for MMC IO.
+	  This will make the mmc core return data errors. This is
+	  useful to test the error handling in the mmc block device
+	  and to test how the mmc host driver handles retries from
+	  the block device.
 
 config FAULT_INJECTION_STACKTRACE_FILTER
 	bool "stacktrace filter for fault-injection capabilities"
@@ -1634,7 +1637,7 @@ config PROVIDE_OHCI1394_DMA_INIT
 
 config DMA_API_DEBUG
 	bool "Enable debugging of DMA-API usage"
-	depends on HAVE_DMA_API_DEBUG
+	select NEED_DMA_MAP_STATE
 	help
 	  Enable this option to debug the use of the DMA API by device drivers.
 	  With this option you will be able to detect common bugs in device
@@ -1651,6 +1654,23 @@ config DMA_API_DEBUG
 
 	  If unsure, say N.
 
+config DMA_API_DEBUG_SG
+	bool "Debug DMA scatter-gather usage"
+	default y
+	depends on DMA_API_DEBUG
+	help
+	  Perform extra checking that callers of dma_map_sg() have respected the
+	  appropriate segment length/boundary limits for the given device when
+	  preparing DMA scatterlists.
+
+	  This is particularly likely to have been overlooked in cases where the
+	  dma_map_sg() API is used for general bulk mapping of pages rather than
+	  preparing literal scatter-gather descriptors, where there is a risk of
+	  unexpected behaviour from DMA API implementations if the scatterlist
+	  is technically out-of-spec.
+
+	  If unsure, say N.
+
 menuconfig RUNTIME_TESTING_MENU
 	bool "Runtime Testing"
 	def_bool y
@@ -1785,6 +1805,9 @@ config TEST_BITMAP
 config TEST_UUID
 	tristate "Test functions located in the uuid module at runtime"
 
+config TEST_OVERFLOW
+	tristate "Test check_*_overflow() functions at runtime"
+
 config TEST_RHASHTABLE
 	tristate "Perform selftest on resizable hash table"
 	default n
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 3d35d062970d..c253c1b46c6b 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -6,6 +6,7 @@ if HAVE_ARCH_KASAN
 config KASAN
 	bool "KASan: runtime memory debugger"
 	depends on SLUB || (SLAB && !DEBUG_SLAB)
+	select SLUB_DEBUG if SLUB
 	select CONSTRUCTORS
 	select STACKDEPOT
 	help
diff --git a/lib/Makefile b/lib/Makefile
index ce20696d5a92..90dc5520b784 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -23,14 +23,12 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 sha1.o chacha20.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
-	 earlycpio.o seq_buf.o siphash.o \
+	 earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
 	 nmi_backtrace.o nodemask.o win_minmax.o
 
 lib-$(CONFIG_PRINTK) += dump_stack.o
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
-lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o
-lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o
 
 lib-y	+= kobject.o klist.o
 obj-y	+= lockref.o
@@ -59,6 +57,7 @@ UBSAN_SANITIZE_test_ubsan.o := y
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
+obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o
 obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
 obj-$(CONFIG_TEST_SORT) += test_sort.o
 obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
@@ -96,10 +95,6 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 
-ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
-  lib-y += dec_and_lock.o
-endif
-
 obj-$(CONFIG_BITREVERSE) += bitrev.o
 obj-$(CONFIG_RATIONAL)	+= rational.o
 obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
@@ -146,8 +141,7 @@ obj-$(CONFIG_SMP) += percpu_counter.o
 obj-$(CONFIG_AUDIT_GENERIC) += audit.o
 obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
 
-obj-$(CONFIG_SWIOTLB) += swiotlb.o
-obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
+obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
 obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
 obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
 obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
@@ -167,8 +161,6 @@ obj-$(CONFIG_NLATTR) += nlattr.o
 
 obj-$(CONFIG_LRU_CACHE) += lru_cache.o
 
-obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
-
 obj-$(CONFIG_GENERIC_CSUM) += checksum.o
 
 obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
@@ -259,9 +251,9 @@ obj-$(CONFIG_SBITMAP) += sbitmap.o
 obj-$(CONFIG_PARMAN) += parman.o
 
 # GCC library routines
-obj-$(CONFIG_GENERIC_ASHLDI3) += ashldi3.o
-obj-$(CONFIG_GENERIC_ASHRDI3) += ashrdi3.o
-obj-$(CONFIG_GENERIC_LSHRDI3) += lshrdi3.o
-obj-$(CONFIG_GENERIC_MULDI3) += muldi3.o
-obj-$(CONFIG_GENERIC_CMPDI2) += cmpdi2.o
-obj-$(CONFIG_GENERIC_UCMPDI2) += ucmpdi2.o
+obj-$(CONFIG_GENERIC_LIB_ASHLDI3) += ashldi3.o
+obj-$(CONFIG_GENERIC_LIB_ASHRDI3) += ashrdi3.o
+obj-$(CONFIG_GENERIC_LIB_LSHRDI3) += lshrdi3.o
+obj-$(CONFIG_GENERIC_LIB_MULDI3) += muldi3.o
+obj-$(CONFIG_GENERIC_LIB_CMPDI2) += cmpdi2.o
+obj-$(CONFIG_GENERIC_LIB_UCMPDI2) += ucmpdi2.o
diff --git a/lib/argv_split.c b/lib/argv_split.c
index 5c35752a9414..1a19a0a93dc1 100644
--- a/lib/argv_split.c
+++ b/lib/argv_split.c
@@ -69,7 +69,7 @@ char **argv_split(gfp_t gfp, const char *str, int *argcp)
 		return NULL;
 
 	argc = count_argc(argv_str);
-	argv = kmalloc(sizeof(*argv) * (argc + 2), gfp);
+	argv = kmalloc_array(argc + 2, sizeof(*argv), gfp);
 	if (!argv) {
 		kfree(argv_str);
 		return NULL;
diff --git a/lib/bitmap.c b/lib/bitmap.c
index a42eff7e8c48..58f9750e49c6 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -64,12 +64,9 @@ EXPORT_SYMBOL(__bitmap_equal);
 
 void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits)
 {
-	unsigned int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = BITS_TO_LONGS(bits);
 	for (k = 0; k < lim; ++k)
 		dst[k] = ~src[k];
-
-	if (bits % BITS_PER_LONG)
-		dst[k] = ~src[k];
 }
 EXPORT_SYMBOL(__bitmap_complement);
 
diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c
index 266a97c5708b..ade3ce6c4af6 100644
--- a/lib/bucket_locks.c
+++ b/lib/bucket_locks.c
@@ -30,10 +30,7 @@ int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
 	}
 
 	if (sizeof(spinlock_t) != 0) {
-		if (gfpflags_allow_blocking(gfp))
-			tlocks = kvmalloc(size * sizeof(spinlock_t), gfp);
-		else
-			tlocks = kmalloc_array(size, sizeof(spinlock_t), gfp);
+		tlocks = kvmalloc_array(size, sizeof(spinlock_t), gfp);
 		if (!tlocks)
 			return -ENOMEM;
 		for (i = 0; i < size; i++)
diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c
index 347fa7ac2e8a..9555b68bb774 100644
--- a/lib/dec_and_lock.c
+++ b/lib/dec_and_lock.c
@@ -33,3 +33,19 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
 }
 
 EXPORT_SYMBOL(_atomic_dec_and_lock);
+
+int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
+				 unsigned long *flags)
+{
+	/* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
+	if (atomic_add_unless(atomic, -1, 1))
+		return 0;
+
+	/* Otherwise do it the slow way */
+	spin_lock_irqsave(lock, *flags);
+	if (atomic_dec_and_test(atomic))
+		return 1;
+	spin_unlock_irqrestore(lock, *flags);
+	return 0;
+}
+EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave);
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
deleted file mode 100644
index 7f5cdc1e6b29..000000000000
--- a/lib/dma-debug.c
+++ /dev/null
@@ -1,1752 +0,0 @@
-/*
- * Copyright (C) 2008 Advanced Micro Devices, Inc.
- *
- * Author: Joerg Roedel <joerg.roedel@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#include <linux/sched/task_stack.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
-#include <linux/sched/task.h>
-#include <linux/stacktrace.h>
-#include <linux/dma-debug.h>
-#include <linux/spinlock.h>
-#include <linux/vmalloc.h>
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
-#include <linux/export.h>
-#include <linux/device.h>
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/ctype.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-
-#include <asm/sections.h>
-
-#define HASH_SIZE       1024ULL
-#define HASH_FN_SHIFT   13
-#define HASH_FN_MASK    (HASH_SIZE - 1)
-
-enum {
-	dma_debug_single,
-	dma_debug_page,
-	dma_debug_sg,
-	dma_debug_coherent,
-	dma_debug_resource,
-};
-
-enum map_err_types {
-	MAP_ERR_CHECK_NOT_APPLICABLE,
-	MAP_ERR_NOT_CHECKED,
-	MAP_ERR_CHECKED,
-};
-
-#define DMA_DEBUG_STACKTRACE_ENTRIES 5
-
-/**
- * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping
- * @list: node on pre-allocated free_entries list
- * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent
- * @type: single, page, sg, coherent
- * @pfn: page frame of the start address
- * @offset: offset of mapping relative to pfn
- * @size: length of the mapping
- * @direction: enum dma_data_direction
- * @sg_call_ents: 'nents' from dma_map_sg
- * @sg_mapped_ents: 'mapped_ents' from dma_map_sg
- * @map_err_type: track whether dma_mapping_error() was checked
- * @stacktrace: support backtraces when a violation is detected
- */
-struct dma_debug_entry {
-	struct list_head list;
-	struct device    *dev;
-	int              type;
-	unsigned long	 pfn;
-	size_t		 offset;
-	u64              dev_addr;
-	u64              size;
-	int              direction;
-	int		 sg_call_ents;
-	int		 sg_mapped_ents;
-	enum map_err_types  map_err_type;
-#ifdef CONFIG_STACKTRACE
-	struct		 stack_trace stacktrace;
-	unsigned long	 st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
-#endif
-};
-
-typedef bool (*match_fn)(struct dma_debug_entry *, struct dma_debug_entry *);
-
-struct hash_bucket {
-	struct list_head list;
-	spinlock_t lock;
-} ____cacheline_aligned_in_smp;
-
-/* Hash list to save the allocated dma addresses */
-static struct hash_bucket dma_entry_hash[HASH_SIZE];
-/* List of pre-allocated dma_debug_entry's */
-static LIST_HEAD(free_entries);
-/* Lock for the list above */
-static DEFINE_SPINLOCK(free_entries_lock);
-
-/* Global disable flag - will be set in case of an error */
-static bool global_disable __read_mostly;
-
-/* Early initialization disable flag, set at the end of dma_debug_init */
-static bool dma_debug_initialized __read_mostly;
-
-static inline bool dma_debug_disabled(void)
-{
-	return global_disable || !dma_debug_initialized;
-}
-
-/* Global error count */
-static u32 error_count;
-
-/* Global error show enable*/
-static u32 show_all_errors __read_mostly;
-/* Number of errors to show */
-static u32 show_num_errors = 1;
-
-static u32 num_free_entries;
-static u32 min_free_entries;
-static u32 nr_total_entries;
-
-/* number of preallocated entries requested by kernel cmdline */
-static u32 req_entries;
-
-/* debugfs dentry's for the stuff above */
-static struct dentry *dma_debug_dent        __read_mostly;
-static struct dentry *global_disable_dent   __read_mostly;
-static struct dentry *error_count_dent      __read_mostly;
-static struct dentry *show_all_errors_dent  __read_mostly;
-static struct dentry *show_num_errors_dent  __read_mostly;
-static struct dentry *num_free_entries_dent __read_mostly;
-static struct dentry *min_free_entries_dent __read_mostly;
-static struct dentry *filter_dent           __read_mostly;
-
-/* per-driver filter related state */
-
-#define NAME_MAX_LEN	64
-
-static char                  current_driver_name[NAME_MAX_LEN] __read_mostly;
-static struct device_driver *current_driver                    __read_mostly;
-
-static DEFINE_RWLOCK(driver_name_lock);
-
-static const char *const maperr2str[] = {
-	[MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable",
-	[MAP_ERR_NOT_CHECKED] = "dma map error not checked",
-	[MAP_ERR_CHECKED] = "dma map error checked",
-};
-
-static const char *type2name[5] = { "single", "page",
-				    "scather-gather", "coherent",
-				    "resource" };
-
-static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
-				   "DMA_FROM_DEVICE", "DMA_NONE" };
-
-/*
- * The access to some variables in this macro is racy. We can't use atomic_t
- * here because all these variables are exported to debugfs. Some of them even
- * writeable. This is also the reason why a lock won't help much. But anyway,
- * the races are no big deal. Here is why:
- *
- *   error_count: the addition is racy, but the worst thing that can happen is
- *                that we don't count some errors
- *   show_num_errors: the subtraction is racy. Also no big deal because in
- *                    worst case this will result in one warning more in the
- *                    system log than the user configured. This variable is
- *                    writeable via debugfs.
- */
-static inline void dump_entry_trace(struct dma_debug_entry *entry)
-{
-#ifdef CONFIG_STACKTRACE
-	if (entry) {
-		pr_warning("Mapped at:\n");
-		print_stack_trace(&entry->stacktrace, 0);
-	}
-#endif
-}
-
-static bool driver_filter(struct device *dev)
-{
-	struct device_driver *drv;
-	unsigned long flags;
-	bool ret;
-
-	/* driver filter off */
-	if (likely(!current_driver_name[0]))
-		return true;
-
-	/* driver filter on and initialized */
-	if (current_driver && dev && dev->driver == current_driver)
-		return true;
-
-	/* driver filter on, but we can't filter on a NULL device... */
-	if (!dev)
-		return false;
-
-	if (current_driver || !current_driver_name[0])
-		return false;
-
-	/* driver filter on but not yet initialized */
-	drv = dev->driver;
-	if (!drv)
-		return false;
-
-	/* lock to protect against change of current_driver_name */
-	read_lock_irqsave(&driver_name_lock, flags);
-
-	ret = false;
-	if (drv->name &&
-	    strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) {
-		current_driver = drv;
-		ret = true;
-	}
-
-	read_unlock_irqrestore(&driver_name_lock, flags);
-
-	return ret;
-}
-
-#define err_printk(dev, entry, format, arg...) do {			\
-		error_count += 1;					\
-		if (driver_filter(dev) &&				\
-		    (show_all_errors || show_num_errors > 0)) {		\
-			WARN(1, "%s %s: " format,			\
-			     dev ? dev_driver_string(dev) : "NULL",	\
-			     dev ? dev_name(dev) : "NULL", ## arg);	\
-			dump_entry_trace(entry);			\
-		}							\
-		if (!show_all_errors && show_num_errors > 0)		\
-			show_num_errors -= 1;				\
-	} while (0);
-
-/*
- * Hash related functions
- *
- * Every DMA-API request is saved into a struct dma_debug_entry. To
- * have quick access to these structs they are stored into a hash.
- */
-static int hash_fn(struct dma_debug_entry *entry)
-{
-	/*
-	 * Hash function is based on the dma address.
-	 * We use bits 20-27 here as the index into the hash
-	 */
-	return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK;
-}
-
-/*
- * Request exclusive access to a hash bucket for a given dma_debug_entry.
- */
-static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
-					   unsigned long *flags)
-	__acquires(&dma_entry_hash[idx].lock)
-{
-	int idx = hash_fn(entry);
-	unsigned long __flags;
-
-	spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags);
-	*flags = __flags;
-	return &dma_entry_hash[idx];
-}
-
-/*
- * Give up exclusive access to the hash bucket
- */
-static void put_hash_bucket(struct hash_bucket *bucket,
-			    unsigned long *flags)
-	__releases(&bucket->lock)
-{
-	unsigned long __flags = *flags;
-
-	spin_unlock_irqrestore(&bucket->lock, __flags);
-}
-
-static bool exact_match(struct dma_debug_entry *a, struct dma_debug_entry *b)
-{
-	return ((a->dev_addr == b->dev_addr) &&
-		(a->dev == b->dev)) ? true : false;
-}
-
-static bool containing_match(struct dma_debug_entry *a,
-			     struct dma_debug_entry *b)
-{
-	if (a->dev != b->dev)
-		return false;
-
-	if ((b->dev_addr <= a->dev_addr) &&
-	    ((b->dev_addr + b->size) >= (a->dev_addr + a->size)))
-		return true;
-
-	return false;
-}
-
-/*
- * Search a given entry in the hash bucket list
- */
-static struct dma_debug_entry *__hash_bucket_find(struct hash_bucket *bucket,
-						  struct dma_debug_entry *ref,
-						  match_fn match)
-{
-	struct dma_debug_entry *entry, *ret = NULL;
-	int matches = 0, match_lvl, last_lvl = -1;
-
-	list_for_each_entry(entry, &bucket->list, list) {
-		if (!match(ref, entry))
-			continue;
-
-		/*
-		 * Some drivers map the same physical address multiple
-		 * times. Without a hardware IOMMU this results in the
-		 * same device addresses being put into the dma-debug
-		 * hash multiple times too. This can result in false
-		 * positives being reported. Therefore we implement a
-		 * best-fit algorithm here which returns the entry from
-		 * the hash which fits best to the reference value
-		 * instead of the first-fit.
-		 */
-		matches += 1;
-		match_lvl = 0;
-		entry->size         == ref->size         ? ++match_lvl : 0;
-		entry->type         == ref->type         ? ++match_lvl : 0;
-		entry->direction    == ref->direction    ? ++match_lvl : 0;
-		entry->sg_call_ents == ref->sg_call_ents ? ++match_lvl : 0;
-
-		if (match_lvl == 4) {
-			/* perfect-fit - return the result */
-			return entry;
-		} else if (match_lvl > last_lvl) {
-			/*
-			 * We found an entry that fits better then the
-			 * previous one or it is the 1st match.
-			 */
-			last_lvl = match_lvl;
-			ret      = entry;
-		}
-	}
-
-	/*
-	 * If we have multiple matches but no perfect-fit, just return
-	 * NULL.
-	 */
-	ret = (matches == 1) ? ret : NULL;
-
-	return ret;
-}
-
-static struct dma_debug_entry *bucket_find_exact(struct hash_bucket *bucket,
-						 struct dma_debug_entry *ref)
-{
-	return __hash_bucket_find(bucket, ref, exact_match);
-}
-
-static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
-						   struct dma_debug_entry *ref,
-						   unsigned long *flags)
-{
-
-	unsigned int max_range = dma_get_max_seg_size(ref->dev);
-	struct dma_debug_entry *entry, index = *ref;
-	unsigned int range = 0;
-
-	while (range <= max_range) {
-		entry = __hash_bucket_find(*bucket, ref, containing_match);
-
-		if (entry)
-			return entry;
-
-		/*
-		 * Nothing found, go back a hash bucket
-		 */
-		put_hash_bucket(*bucket, flags);
-		range          += (1 << HASH_FN_SHIFT);
-		index.dev_addr -= (1 << HASH_FN_SHIFT);
-		*bucket = get_hash_bucket(&index, flags);
-	}
-
-	return NULL;
-}
-
-/*
- * Add an entry to a hash bucket
- */
-static void hash_bucket_add(struct hash_bucket *bucket,
-			    struct dma_debug_entry *entry)
-{
-	list_add_tail(&entry->list, &bucket->list);
-}
-
-/*
- * Remove entry from a hash bucket list
- */
-static void hash_bucket_del(struct dma_debug_entry *entry)
-{
-	list_del(&entry->list);
-}
-
-static unsigned long long phys_addr(struct dma_debug_entry *entry)
-{
-	if (entry->type == dma_debug_resource)
-		return __pfn_to_phys(entry->pfn) + entry->offset;
-
-	return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
-}
-
-/*
- * Dump mapping entries for debugging purposes
- */
-void debug_dma_dump_mappings(struct device *dev)
-{
-	int idx;
-
-	for (idx = 0; idx < HASH_SIZE; idx++) {
-		struct hash_bucket *bucket = &dma_entry_hash[idx];
-		struct dma_debug_entry *entry;
-		unsigned long flags;
-
-		spin_lock_irqsave(&bucket->lock, flags);
-
-		list_for_each_entry(entry, &bucket->list, list) {
-			if (!dev || dev == entry->dev) {
-				dev_info(entry->dev,
-					 "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n",
-					 type2name[entry->type], idx,
-					 phys_addr(entry), entry->pfn,
-					 entry->dev_addr, entry->size,
-					 dir2name[entry->direction],
-					 maperr2str[entry->map_err_type]);
-			}
-		}
-
-		spin_unlock_irqrestore(&bucket->lock, flags);
-	}
-}
-EXPORT_SYMBOL(debug_dma_dump_mappings);
-
-/*
- * For each mapping (initial cacheline in the case of
- * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a
- * scatterlist, or the cacheline specified in dma_map_single) insert
- * into this tree using the cacheline as the key. At
- * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry.  If
- * the entry already exists at insertion time add a tag as a reference
- * count for the overlapping mappings.  For now, the overlap tracking
- * just ensures that 'unmaps' balance 'maps' before marking the
- * cacheline idle, but we should also be flagging overlaps as an API
- * violation.
- *
- * Memory usage is mostly constrained by the maximum number of available
- * dma-debug entries in that we need a free dma_debug_entry before
- * inserting into the tree.  In the case of dma_map_page and
- * dma_alloc_coherent there is only one dma_debug_entry and one
- * dma_active_cacheline entry to track per event.  dma_map_sg(), on the
- * other hand, consumes a single dma_debug_entry, but inserts 'nents'
- * entries into the tree.
- *
- * At any time debug_dma_assert_idle() can be called to trigger a
- * warning if any cachelines in the given page are in the active set.
- */
-static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT);
-static DEFINE_SPINLOCK(radix_lock);
-#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
-#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT)
-#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT)
-
-static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry)
-{
-	return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) +
-		(entry->offset >> L1_CACHE_SHIFT);
-}
-
-static int active_cacheline_read_overlap(phys_addr_t cln)
-{
-	int overlap = 0, i;
-
-	for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
-		if (radix_tree_tag_get(&dma_active_cacheline, cln, i))
-			overlap |= 1 << i;
-	return overlap;
-}
-
-static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
-{
-	int i;
-
-	if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0)
-		return overlap;
-
-	for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
-		if (overlap & 1 << i)
-			radix_tree_tag_set(&dma_active_cacheline, cln, i);
-		else
-			radix_tree_tag_clear(&dma_active_cacheline, cln, i);
-
-	return overlap;
-}
-
-static void active_cacheline_inc_overlap(phys_addr_t cln)
-{
-	int overlap = active_cacheline_read_overlap(cln);
-
-	overlap = active_cacheline_set_overlap(cln, ++overlap);
-
-	/* If we overflowed the overlap counter then we're potentially
-	 * leaking dma-mappings.  Otherwise, if maps and unmaps are
-	 * balanced then this overflow may cause false negatives in
-	 * debug_dma_assert_idle() as the cacheline may be marked idle
-	 * prematurely.
-	 */
-	WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
-		  "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n",
-		  ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
-}
-
-static int active_cacheline_dec_overlap(phys_addr_t cln)
-{
-	int overlap = active_cacheline_read_overlap(cln);
-
-	return active_cacheline_set_overlap(cln, --overlap);
-}
-
-static int active_cacheline_insert(struct dma_debug_entry *entry)
-{
-	phys_addr_t cln = to_cacheline_number(entry);
-	unsigned long flags;
-	int rc;
-
-	/* If the device is not writing memory then we don't have any
-	 * concerns about the cpu consuming stale data.  This mitigates
-	 * legitimate usages of overlapping mappings.
-	 */
-	if (entry->direction == DMA_TO_DEVICE)
-		return 0;
-
-	spin_lock_irqsave(&radix_lock, flags);
-	rc = radix_tree_insert(&dma_active_cacheline, cln, entry);
-	if (rc == -EEXIST)
-		active_cacheline_inc_overlap(cln);
-	spin_unlock_irqrestore(&radix_lock, flags);
-
-	return rc;
-}
-
-static void active_cacheline_remove(struct dma_debug_entry *entry)
-{
-	phys_addr_t cln = to_cacheline_number(entry);
-	unsigned long flags;
-
-	/* ...mirror the insert case */
-	if (entry->direction == DMA_TO_DEVICE)
-		return;
-
-	spin_lock_irqsave(&radix_lock, flags);
-	/* since we are counting overlaps the final put of the
-	 * cacheline will occur when the overlap count is 0.
-	 * active_cacheline_dec_overlap() returns -1 in that case
-	 */
-	if (active_cacheline_dec_overlap(cln) < 0)
-		radix_tree_delete(&dma_active_cacheline, cln);
-	spin_unlock_irqrestore(&radix_lock, flags);
-}
-
-/**
- * debug_dma_assert_idle() - assert that a page is not undergoing dma
- * @page: page to lookup in the dma_active_cacheline tree
- *
- * Place a call to this routine in cases where the cpu touching the page
- * before the dma completes (page is dma_unmapped) will lead to data
- * corruption.
- */
-void debug_dma_assert_idle(struct page *page)
-{
-	static struct dma_debug_entry *ents[CACHELINES_PER_PAGE];
-	struct dma_debug_entry *entry = NULL;
-	void **results = (void **) &ents;
-	unsigned int nents, i;
-	unsigned long flags;
-	phys_addr_t cln;
-
-	if (dma_debug_disabled())
-		return;
-
-	if (!page)
-		return;
-
-	cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT;
-	spin_lock_irqsave(&radix_lock, flags);
-	nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln,
-				       CACHELINES_PER_PAGE);
-	for (i = 0; i < nents; i++) {
-		phys_addr_t ent_cln = to_cacheline_number(ents[i]);
-
-		if (ent_cln == cln) {
-			entry = ents[i];
-			break;
-		} else if (ent_cln >= cln + CACHELINES_PER_PAGE)
-			break;
-	}
-	spin_unlock_irqrestore(&radix_lock, flags);
-
-	if (!entry)
-		return;
-
-	cln = to_cacheline_number(entry);
-	err_printk(entry->dev, entry,
-		   "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n",
-		   &cln);
-}
-
-/*
- * Wrapper function for adding an entry to the hash.
- * This function takes care of locking itself.
- */
-static void add_dma_entry(struct dma_debug_entry *entry)
-{
-	struct hash_bucket *bucket;
-	unsigned long flags;
-	int rc;
-
-	bucket = get_hash_bucket(entry, &flags);
-	hash_bucket_add(bucket, entry);
-	put_hash_bucket(bucket, &flags);
-
-	rc = active_cacheline_insert(entry);
-	if (rc == -ENOMEM) {
-		pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n");
-		global_disable = true;
-	}
-
-	/* TODO: report -EEXIST errors here as overlapping mappings are
-	 * not supported by the DMA API
-	 */
-}
-
-static struct dma_debug_entry *__dma_entry_alloc(void)
-{
-	struct dma_debug_entry *entry;
-
-	entry = list_entry(free_entries.next, struct dma_debug_entry, list);
-	list_del(&entry->list);
-	memset(entry, 0, sizeof(*entry));
-
-	num_free_entries -= 1;
-	if (num_free_entries < min_free_entries)
-		min_free_entries = num_free_entries;
-
-	return entry;
-}
-
-/* struct dma_entry allocator
- *
- * The next two functions implement the allocator for
- * struct dma_debug_entries.
- */
-static struct dma_debug_entry *dma_entry_alloc(void)
-{
-	struct dma_debug_entry *entry;
-	unsigned long flags;
-
-	spin_lock_irqsave(&free_entries_lock, flags);
-
-	if (list_empty(&free_entries)) {
-		global_disable = true;
-		spin_unlock_irqrestore(&free_entries_lock, flags);
-		pr_err("DMA-API: debugging out of memory - disabling\n");
-		return NULL;
-	}
-
-	entry = __dma_entry_alloc();
-
-	spin_unlock_irqrestore(&free_entries_lock, flags);
-
-#ifdef CONFIG_STACKTRACE
-	entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
-	entry->stacktrace.entries = entry->st_entries;
-	entry->stacktrace.skip = 2;
-	save_stack_trace(&entry->stacktrace);
-#endif
-
-	return entry;
-}
-
-static void dma_entry_free(struct dma_debug_entry *entry)
-{
-	unsigned long flags;
-
-	active_cacheline_remove(entry);
-
-	/*
-	 * add to beginning of the list - this way the entries are
-	 * more likely cache hot when they are reallocated.
-	 */
-	spin_lock_irqsave(&free_entries_lock, flags);
-	list_add(&entry->list, &free_entries);
-	num_free_entries += 1;
-	spin_unlock_irqrestore(&free_entries_lock, flags);
-}
-
-int dma_debug_resize_entries(u32 num_entries)
-{
-	int i, delta, ret = 0;
-	unsigned long flags;
-	struct dma_debug_entry *entry;
-	LIST_HEAD(tmp);
-
-	spin_lock_irqsave(&free_entries_lock, flags);
-
-	if (nr_total_entries < num_entries) {
-		delta = num_entries - nr_total_entries;
-
-		spin_unlock_irqrestore(&free_entries_lock, flags);
-
-		for (i = 0; i < delta; i++) {
-			entry = kzalloc(sizeof(*entry), GFP_KERNEL);
-			if (!entry)
-				break;
-
-			list_add_tail(&entry->list, &tmp);
-		}
-
-		spin_lock_irqsave(&free_entries_lock, flags);
-
-		list_splice(&tmp, &free_entries);
-		nr_total_entries += i;
-		num_free_entries += i;
-	} else {
-		delta = nr_total_entries - num_entries;
-
-		for (i = 0; i < delta && !list_empty(&free_entries); i++) {
-			entry = __dma_entry_alloc();
-			kfree(entry);
-		}
-
-		nr_total_entries -= i;
-	}
-
-	if (nr_total_entries != num_entries)
-		ret = 1;
-
-	spin_unlock_irqrestore(&free_entries_lock, flags);
-
-	return ret;
-}
-EXPORT_SYMBOL(dma_debug_resize_entries);
-
-/*
- * DMA-API debugging init code
- *
- * The init code does two things:
- *   1. Initialize core data structures
- *   2. Preallocate a given number of dma_debug_entry structs
- */
-
-static int prealloc_memory(u32 num_entries)
-{
-	struct dma_debug_entry *entry, *next_entry;
-	int i;
-
-	for (i = 0; i < num_entries; ++i) {
-		entry = kzalloc(sizeof(*entry), GFP_KERNEL);
-		if (!entry)
-			goto out_err;
-
-		list_add_tail(&entry->list, &free_entries);
-	}
-
-	num_free_entries = num_entries;
-	min_free_entries = num_entries;
-
-	pr_info("DMA-API: preallocated %d debug entries\n", num_entries);
-
-	return 0;
-
-out_err:
-
-	list_for_each_entry_safe(entry, next_entry, &free_entries, list) {
-		list_del(&entry->list);
-		kfree(entry);
-	}
-
-	return -ENOMEM;
-}
-
-static ssize_t filter_read(struct file *file, char __user *user_buf,
-			   size_t count, loff_t *ppos)
-{
-	char buf[NAME_MAX_LEN + 1];
-	unsigned long flags;
-	int len;
-
-	if (!current_driver_name[0])
-		return 0;
-
-	/*
-	 * We can't copy to userspace directly because current_driver_name can
-	 * only be read under the driver_name_lock with irqs disabled. So
-	 * create a temporary copy first.
-	 */
-	read_lock_irqsave(&driver_name_lock, flags);
-	len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name);
-	read_unlock_irqrestore(&driver_name_lock, flags);
-
-	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
-}
-
-static ssize_t filter_write(struct file *file, const char __user *userbuf,
-			    size_t count, loff_t *ppos)
-{
-	char buf[NAME_MAX_LEN];
-	unsigned long flags;
-	size_t len;
-	int i;
-
-	/*
-	 * We can't copy from userspace directly. Access to
-	 * current_driver_name is protected with a write_lock with irqs
-	 * disabled. Since copy_from_user can fault and may sleep we
-	 * need to copy to temporary buffer first
-	 */
-	len = min(count, (size_t)(NAME_MAX_LEN - 1));
-	if (copy_from_user(buf, userbuf, len))
-		return -EFAULT;
-
-	buf[len] = 0;
-
-	write_lock_irqsave(&driver_name_lock, flags);
-
-	/*
-	 * Now handle the string we got from userspace very carefully.
-	 * The rules are:
-	 *         - only use the first token we got
-	 *         - token delimiter is everything looking like a space
-	 *           character (' ', '\n', '\t' ...)
-	 *
-	 */
-	if (!isalnum(buf[0])) {
-		/*
-		 * If the first character userspace gave us is not
-		 * alphanumerical then assume the filter should be
-		 * switched off.
-		 */
-		if (current_driver_name[0])
-			pr_info("DMA-API: switching off dma-debug driver filter\n");
-		current_driver_name[0] = 0;
-		current_driver = NULL;
-		goto out_unlock;
-	}
-
-	/*
-	 * Now parse out the first token and use it as the name for the
-	 * driver to filter for.
-	 */
-	for (i = 0; i < NAME_MAX_LEN - 1; ++i) {
-		current_driver_name[i] = buf[i];
-		if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
-			break;
-	}
-	current_driver_name[i] = 0;
-	current_driver = NULL;
-
-	pr_info("DMA-API: enable driver filter for driver [%s]\n",
-		current_driver_name);
-
-out_unlock:
-	write_unlock_irqrestore(&driver_name_lock, flags);
-
-	return count;
-}
-
-static const struct file_operations filter_fops = {
-	.read  = filter_read,
-	.write = filter_write,
-	.llseek = default_llseek,
-};
-
-static int dma_debug_fs_init(void)
-{
-	dma_debug_dent = debugfs_create_dir("dma-api", NULL);
-	if (!dma_debug_dent) {
-		pr_err("DMA-API: can not create debugfs directory\n");
-		return -ENOMEM;
-	}
-
-	global_disable_dent = debugfs_create_bool("disabled", 0444,
-			dma_debug_dent,
-			&global_disable);
-	if (!global_disable_dent)
-		goto out_err;
-
-	error_count_dent = debugfs_create_u32("error_count", 0444,
-			dma_debug_dent, &error_count);
-	if (!error_count_dent)
-		goto out_err;
-
-	show_all_errors_dent = debugfs_create_u32("all_errors", 0644,
-			dma_debug_dent,
-			&show_all_errors);
-	if (!show_all_errors_dent)
-		goto out_err;
-
-	show_num_errors_dent = debugfs_create_u32("num_errors", 0644,
-			dma_debug_dent,
-			&show_num_errors);
-	if (!show_num_errors_dent)
-		goto out_err;
-
-	num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444,
-			dma_debug_dent,
-			&num_free_entries);
-	if (!num_free_entries_dent)
-		goto out_err;
-
-	min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444,
-			dma_debug_dent,
-			&min_free_entries);
-	if (!min_free_entries_dent)
-		goto out_err;
-
-	filter_dent = debugfs_create_file("driver_filter", 0644,
-					  dma_debug_dent, NULL, &filter_fops);
-	if (!filter_dent)
-		goto out_err;
-
-	return 0;
-
-out_err:
-	debugfs_remove_recursive(dma_debug_dent);
-
-	return -ENOMEM;
-}
-
-static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry)
-{
-	struct dma_debug_entry *entry;
-	unsigned long flags;
-	int count = 0, i;
-
-	for (i = 0; i < HASH_SIZE; ++i) {
-		spin_lock_irqsave(&dma_entry_hash[i].lock, flags);
-		list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
-			if (entry->dev == dev) {
-				count += 1;
-				*out_entry = entry;
-			}
-		}
-		spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags);
-	}
-
-	return count;
-}
-
-static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data)
-{
-	struct device *dev = data;
-	struct dma_debug_entry *uninitialized_var(entry);
-	int count;
-
-	if (dma_debug_disabled())
-		return 0;
-
-	switch (action) {
-	case BUS_NOTIFY_UNBOUND_DRIVER:
-		count = device_dma_allocations(dev, &entry);
-		if (count == 0)
-			break;
-		err_printk(dev, entry, "DMA-API: device driver has pending "
-				"DMA allocations while released from device "
-				"[count=%d]\n"
-				"One of leaked entries details: "
-				"[device address=0x%016llx] [size=%llu bytes] "
-				"[mapped with %s] [mapped as %s]\n",
-			count, entry->dev_addr, entry->size,
-			dir2name[entry->direction], type2name[entry->type]);
-		break;
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-void dma_debug_add_bus(struct bus_type *bus)
-{
-	struct notifier_block *nb;
-
-	if (dma_debug_disabled())
-		return;
-
-	nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
-	if (nb == NULL) {
-		pr_err("dma_debug_add_bus: out of memory\n");
-		return;
-	}
-
-	nb->notifier_call = dma_debug_device_change;
-
-	bus_register_notifier(bus, nb);
-}
-
-/*
- * Let the architectures decide how many entries should be preallocated.
- */
-void dma_debug_init(u32 num_entries)
-{
-	int i;
-
-	/* Do not use dma_debug_initialized here, since we really want to be
-	 * called to set dma_debug_initialized
-	 */
-	if (global_disable)
-		return;
-
-	for (i = 0; i < HASH_SIZE; ++i) {
-		INIT_LIST_HEAD(&dma_entry_hash[i].list);
-		spin_lock_init(&dma_entry_hash[i].lock);
-	}
-
-	if (dma_debug_fs_init() != 0) {
-		pr_err("DMA-API: error creating debugfs entries - disabling\n");
-		global_disable = true;
-
-		return;
-	}
-
-	if (req_entries)
-		num_entries = req_entries;
-
-	if (prealloc_memory(num_entries) != 0) {
-		pr_err("DMA-API: debugging out of memory error - disabled\n");
-		global_disable = true;
-
-		return;
-	}
-
-	nr_total_entries = num_free_entries;
-
-	dma_debug_initialized = true;
-
-	pr_info("DMA-API: debugging enabled by kernel config\n");
-}
-
-static __init int dma_debug_cmdline(char *str)
-{
-	if (!str)
-		return -EINVAL;
-
-	if (strncmp(str, "off", 3) == 0) {
-		pr_info("DMA-API: debugging disabled on kernel command line\n");
-		global_disable = true;
-	}
-
-	return 0;
-}
-
-static __init int dma_debug_entries_cmdline(char *str)
-{
-	int res;
-
-	if (!str)
-		return -EINVAL;
-
-	res = get_option(&str, &req_entries);
-
-	if (!res)
-		req_entries = 0;
-
-	return 0;
-}
-
-__setup("dma_debug=", dma_debug_cmdline);
-__setup("dma_debug_entries=", dma_debug_entries_cmdline);
-
-static void check_unmap(struct dma_debug_entry *ref)
-{
-	struct dma_debug_entry *entry;
-	struct hash_bucket *bucket;
-	unsigned long flags;
-
-	bucket = get_hash_bucket(ref, &flags);
-	entry = bucket_find_exact(bucket, ref);
-
-	if (!entry) {
-		/* must drop lock before calling dma_mapping_error */
-		put_hash_bucket(bucket, &flags);
-
-		if (dma_mapping_error(ref->dev, ref->dev_addr)) {
-			err_printk(ref->dev, NULL,
-				   "DMA-API: device driver tries to free an "
-				   "invalid DMA memory address\n");
-		} else {
-			err_printk(ref->dev, NULL,
-				   "DMA-API: device driver tries to free DMA "
-				   "memory it has not allocated [device "
-				   "address=0x%016llx] [size=%llu bytes]\n",
-				   ref->dev_addr, ref->size);
-		}
-		return;
-	}
-
-	if (ref->size != entry->size) {
-		err_printk(ref->dev, entry, "DMA-API: device driver frees "
-			   "DMA memory with different size "
-			   "[device address=0x%016llx] [map size=%llu bytes] "
-			   "[unmap size=%llu bytes]\n",
-			   ref->dev_addr, entry->size, ref->size);
-	}
-
-	if (ref->type != entry->type) {
-		err_printk(ref->dev, entry, "DMA-API: device driver frees "
-			   "DMA memory with wrong function "
-			   "[device address=0x%016llx] [size=%llu bytes] "
-			   "[mapped as %s] [unmapped as %s]\n",
-			   ref->dev_addr, ref->size,
-			   type2name[entry->type], type2name[ref->type]);
-	} else if ((entry->type == dma_debug_coherent) &&
-		   (phys_addr(ref) != phys_addr(entry))) {
-		err_printk(ref->dev, entry, "DMA-API: device driver frees "
-			   "DMA memory with different CPU address "
-			   "[device address=0x%016llx] [size=%llu bytes] "
-			   "[cpu alloc address=0x%016llx] "
-			   "[cpu free address=0x%016llx]",
-			   ref->dev_addr, ref->size,
-			   phys_addr(entry),
-			   phys_addr(ref));
-	}
-
-	if (ref->sg_call_ents && ref->type == dma_debug_sg &&
-	    ref->sg_call_ents != entry->sg_call_ents) {
-		err_printk(ref->dev, entry, "DMA-API: device driver frees "
-			   "DMA sg list with different entry count "
-			   "[map count=%d] [unmap count=%d]\n",
-			   entry->sg_call_ents, ref->sg_call_ents);
-	}
-
-	/*
-	 * This may be no bug in reality - but most implementations of the
-	 * DMA API don't handle this properly, so check for it here
-	 */
-	if (ref->direction != entry->direction) {
-		err_printk(ref->dev, entry, "DMA-API: device driver frees "
-			   "DMA memory with different direction "
-			   "[device address=0x%016llx] [size=%llu bytes] "
-			   "[mapped with %s] [unmapped with %s]\n",
-			   ref->dev_addr, ref->size,
-			   dir2name[entry->direction],
-			   dir2name[ref->direction]);
-	}
-
-	/*
-	 * Drivers should use dma_mapping_error() to check the returned
-	 * addresses of dma_map_single() and dma_map_page().
-	 * If not, print this warning message. See Documentation/DMA-API.txt.
-	 */
-	if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
-		err_printk(ref->dev, entry,
-			   "DMA-API: device driver failed to check map error"
-			   "[device address=0x%016llx] [size=%llu bytes] "
-			   "[mapped as %s]",
-			   ref->dev_addr, ref->size,
-			   type2name[entry->type]);
-	}
-
-	hash_bucket_del(entry);
-	dma_entry_free(entry);
-
-	put_hash_bucket(bucket, &flags);
-}
-
-static void check_for_stack(struct device *dev,
-			    struct page *page, size_t offset)
-{
-	void *addr;
-	struct vm_struct *stack_vm_area = task_stack_vm_area(current);
-
-	if (!stack_vm_area) {
-		/* Stack is direct-mapped. */
-		if (PageHighMem(page))
-			return;
-		addr = page_address(page) + offset;
-		if (object_is_on_stack(addr))
-			err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr);
-	} else {
-		/* Stack is vmalloced. */
-		int i;
-
-		for (i = 0; i < stack_vm_area->nr_pages; i++) {
-			if (page != stack_vm_area->pages[i])
-				continue;
-
-			addr = (u8 *)current->stack + i * PAGE_SIZE + offset;
-			err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr);
-			break;
-		}
-	}
-}
-
-static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
-{
-	unsigned long a1 = (unsigned long)addr;
-	unsigned long b1 = a1 + len;
-	unsigned long a2 = (unsigned long)start;
-	unsigned long b2 = (unsigned long)end;
-
-	return !(b1 <= a2 || a1 >= b2);
-}
-
-static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len)
-{
-	if (overlap(addr, len, _stext, _etext) ||
-	    overlap(addr, len, __start_rodata, __end_rodata))
-		err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
-}
-
-static void check_sync(struct device *dev,
-		       struct dma_debug_entry *ref,
-		       bool to_cpu)
-{
-	struct dma_debug_entry *entry;
-	struct hash_bucket *bucket;
-	unsigned long flags;
-
-	bucket = get_hash_bucket(ref, &flags);
-
-	entry = bucket_find_contain(&bucket, ref, &flags);
-
-	if (!entry) {
-		err_printk(dev, NULL, "DMA-API: device driver tries "
-				"to sync DMA memory it has not allocated "
-				"[device address=0x%016llx] [size=%llu bytes]\n",
-				(unsigned long long)ref->dev_addr, ref->size);
-		goto out;
-	}
-
-	if (ref->size > entry->size) {
-		err_printk(dev, entry, "DMA-API: device driver syncs"
-				" DMA memory outside allocated range "
-				"[device address=0x%016llx] "
-				"[allocation size=%llu bytes] "
-				"[sync offset+size=%llu]\n",
-				entry->dev_addr, entry->size,
-				ref->size);
-	}
-
-	if (entry->direction == DMA_BIDIRECTIONAL)
-		goto out;
-
-	if (ref->direction != entry->direction) {
-		err_printk(dev, entry, "DMA-API: device driver syncs "
-				"DMA memory with different direction "
-				"[device address=0x%016llx] [size=%llu bytes] "
-				"[mapped with %s] [synced with %s]\n",
-				(unsigned long long)ref->dev_addr, entry->size,
-				dir2name[entry->direction],
-				dir2name[ref->direction]);
-	}
-
-	if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) &&
-		      !(ref->direction == DMA_TO_DEVICE))
-		err_printk(dev, entry, "DMA-API: device driver syncs "
-				"device read-only DMA memory for cpu "
-				"[device address=0x%016llx] [size=%llu bytes] "
-				"[mapped with %s] [synced with %s]\n",
-				(unsigned long long)ref->dev_addr, entry->size,
-				dir2name[entry->direction],
-				dir2name[ref->direction]);
-
-	if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) &&
-		       !(ref->direction == DMA_FROM_DEVICE))
-		err_printk(dev, entry, "DMA-API: device driver syncs "
-				"device write-only DMA memory to device "
-				"[device address=0x%016llx] [size=%llu bytes] "
-				"[mapped with %s] [synced with %s]\n",
-				(unsigned long long)ref->dev_addr, entry->size,
-				dir2name[entry->direction],
-				dir2name[ref->direction]);
-
-	if (ref->sg_call_ents && ref->type == dma_debug_sg &&
-	    ref->sg_call_ents != entry->sg_call_ents) {
-		err_printk(ref->dev, entry, "DMA-API: device driver syncs "
-			   "DMA sg list with different entry count "
-			   "[map count=%d] [sync count=%d]\n",
-			   entry->sg_call_ents, ref->sg_call_ents);
-	}
-
-out:
-	put_hash_bucket(bucket, &flags);
-}
-
-void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
-			size_t size, int direction, dma_addr_t dma_addr,
-			bool map_single)
-{
-	struct dma_debug_entry *entry;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	if (dma_mapping_error(dev, dma_addr))
-		return;
-
-	entry = dma_entry_alloc();
-	if (!entry)
-		return;
-
-	entry->dev       = dev;
-	entry->type      = dma_debug_page;
-	entry->pfn	 = page_to_pfn(page);
-	entry->offset	 = offset,
-	entry->dev_addr  = dma_addr;
-	entry->size      = size;
-	entry->direction = direction;
-	entry->map_err_type = MAP_ERR_NOT_CHECKED;
-
-	if (map_single)
-		entry->type = dma_debug_single;
-
-	check_for_stack(dev, page, offset);
-
-	if (!PageHighMem(page)) {
-		void *addr = page_address(page) + offset;
-
-		check_for_illegal_area(dev, addr, size);
-	}
-
-	add_dma_entry(entry);
-}
-EXPORT_SYMBOL(debug_dma_map_page);
-
-void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-	struct dma_debug_entry ref;
-	struct dma_debug_entry *entry;
-	struct hash_bucket *bucket;
-	unsigned long flags;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	ref.dev = dev;
-	ref.dev_addr = dma_addr;
-	bucket = get_hash_bucket(&ref, &flags);
-
-	list_for_each_entry(entry, &bucket->list, list) {
-		if (!exact_match(&ref, entry))
-			continue;
-
-		/*
-		 * The same physical address can be mapped multiple
-		 * times. Without a hardware IOMMU this results in the
-		 * same device addresses being put into the dma-debug
-		 * hash multiple times too. This can result in false
-		 * positives being reported. Therefore we implement a
-		 * best-fit algorithm here which updates the first entry
-		 * from the hash which fits the reference value and is
-		 * not currently listed as being checked.
-		 */
-		if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
-			entry->map_err_type = MAP_ERR_CHECKED;
-			break;
-		}
-	}
-
-	put_hash_bucket(bucket, &flags);
-}
-EXPORT_SYMBOL(debug_dma_mapping_error);
-
-void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
-			  size_t size, int direction, bool map_single)
-{
-	struct dma_debug_entry ref = {
-		.type           = dma_debug_page,
-		.dev            = dev,
-		.dev_addr       = addr,
-		.size           = size,
-		.direction      = direction,
-	};
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	if (map_single)
-		ref.type = dma_debug_single;
-
-	check_unmap(&ref);
-}
-EXPORT_SYMBOL(debug_dma_unmap_page);
-
-void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
-		      int nents, int mapped_ents, int direction)
-{
-	struct dma_debug_entry *entry;
-	struct scatterlist *s;
-	int i;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	for_each_sg(sg, s, mapped_ents, i) {
-		entry = dma_entry_alloc();
-		if (!entry)
-			return;
-
-		entry->type           = dma_debug_sg;
-		entry->dev            = dev;
-		entry->pfn	      = page_to_pfn(sg_page(s));
-		entry->offset	      = s->offset,
-		entry->size           = sg_dma_len(s);
-		entry->dev_addr       = sg_dma_address(s);
-		entry->direction      = direction;
-		entry->sg_call_ents   = nents;
-		entry->sg_mapped_ents = mapped_ents;
-
-		check_for_stack(dev, sg_page(s), s->offset);
-
-		if (!PageHighMem(sg_page(s))) {
-			check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
-		}
-
-		add_dma_entry(entry);
-	}
-}
-EXPORT_SYMBOL(debug_dma_map_sg);
-
-static int get_nr_mapped_entries(struct device *dev,
-				 struct dma_debug_entry *ref)
-{
-	struct dma_debug_entry *entry;
-	struct hash_bucket *bucket;
-	unsigned long flags;
-	int mapped_ents;
-
-	bucket       = get_hash_bucket(ref, &flags);
-	entry        = bucket_find_exact(bucket, ref);
-	mapped_ents  = 0;
-
-	if (entry)
-		mapped_ents = entry->sg_mapped_ents;
-	put_hash_bucket(bucket, &flags);
-
-	return mapped_ents;
-}
-
-void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
-			int nelems, int dir)
-{
-	struct scatterlist *s;
-	int mapped_ents = 0, i;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	for_each_sg(sglist, s, nelems, i) {
-
-		struct dma_debug_entry ref = {
-			.type           = dma_debug_sg,
-			.dev            = dev,
-			.pfn		= page_to_pfn(sg_page(s)),
-			.offset		= s->offset,
-			.dev_addr       = sg_dma_address(s),
-			.size           = sg_dma_len(s),
-			.direction      = dir,
-			.sg_call_ents   = nelems,
-		};
-
-		if (mapped_ents && i >= mapped_ents)
-			break;
-
-		if (!i)
-			mapped_ents = get_nr_mapped_entries(dev, &ref);
-
-		check_unmap(&ref);
-	}
-}
-EXPORT_SYMBOL(debug_dma_unmap_sg);
-
-void debug_dma_alloc_coherent(struct device *dev, size_t size,
-			      dma_addr_t dma_addr, void *virt)
-{
-	struct dma_debug_entry *entry;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	if (unlikely(virt == NULL))
-		return;
-
-	/* handle vmalloc and linear addresses */
-	if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
-		return;
-
-	entry = dma_entry_alloc();
-	if (!entry)
-		return;
-
-	entry->type      = dma_debug_coherent;
-	entry->dev       = dev;
-	entry->offset	 = offset_in_page(virt);
-	entry->size      = size;
-	entry->dev_addr  = dma_addr;
-	entry->direction = DMA_BIDIRECTIONAL;
-
-	if (is_vmalloc_addr(virt))
-		entry->pfn = vmalloc_to_pfn(virt);
-	else
-		entry->pfn = page_to_pfn(virt_to_page(virt));
-
-	add_dma_entry(entry);
-}
-EXPORT_SYMBOL(debug_dma_alloc_coherent);
-
-void debug_dma_free_coherent(struct device *dev, size_t size,
-			 void *virt, dma_addr_t addr)
-{
-	struct dma_debug_entry ref = {
-		.type           = dma_debug_coherent,
-		.dev            = dev,
-		.offset		= offset_in_page(virt),
-		.dev_addr       = addr,
-		.size           = size,
-		.direction      = DMA_BIDIRECTIONAL,
-	};
-
-	/* handle vmalloc and linear addresses */
-	if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
-		return;
-
-	if (is_vmalloc_addr(virt))
-		ref.pfn = vmalloc_to_pfn(virt);
-	else
-		ref.pfn = page_to_pfn(virt_to_page(virt));
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	check_unmap(&ref);
-}
-EXPORT_SYMBOL(debug_dma_free_coherent);
-
-void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
-			    int direction, dma_addr_t dma_addr)
-{
-	struct dma_debug_entry *entry;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	entry = dma_entry_alloc();
-	if (!entry)
-		return;
-
-	entry->type		= dma_debug_resource;
-	entry->dev		= dev;
-	entry->pfn		= PHYS_PFN(addr);
-	entry->offset		= offset_in_page(addr);
-	entry->size		= size;
-	entry->dev_addr		= dma_addr;
-	entry->direction	= direction;
-	entry->map_err_type	= MAP_ERR_NOT_CHECKED;
-
-	add_dma_entry(entry);
-}
-EXPORT_SYMBOL(debug_dma_map_resource);
-
-void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
-			      size_t size, int direction)
-{
-	struct dma_debug_entry ref = {
-		.type           = dma_debug_resource,
-		.dev            = dev,
-		.dev_addr       = dma_addr,
-		.size           = size,
-		.direction      = direction,
-	};
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	check_unmap(&ref);
-}
-EXPORT_SYMBOL(debug_dma_unmap_resource);
-
-void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
-				   size_t size, int direction)
-{
-	struct dma_debug_entry ref;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	ref.type         = dma_debug_single;
-	ref.dev          = dev;
-	ref.dev_addr     = dma_handle;
-	ref.size         = size;
-	ref.direction    = direction;
-	ref.sg_call_ents = 0;
-
-	check_sync(dev, &ref, true);
-}
-EXPORT_SYMBOL(debug_dma_sync_single_for_cpu);
-
-void debug_dma_sync_single_for_device(struct device *dev,
-				      dma_addr_t dma_handle, size_t size,
-				      int direction)
-{
-	struct dma_debug_entry ref;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	ref.type         = dma_debug_single;
-	ref.dev          = dev;
-	ref.dev_addr     = dma_handle;
-	ref.size         = size;
-	ref.direction    = direction;
-	ref.sg_call_ents = 0;
-
-	check_sync(dev, &ref, false);
-}
-EXPORT_SYMBOL(debug_dma_sync_single_for_device);
-
-void debug_dma_sync_single_range_for_cpu(struct device *dev,
-					 dma_addr_t dma_handle,
-					 unsigned long offset, size_t size,
-					 int direction)
-{
-	struct dma_debug_entry ref;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	ref.type         = dma_debug_single;
-	ref.dev          = dev;
-	ref.dev_addr     = dma_handle;
-	ref.size         = offset + size;
-	ref.direction    = direction;
-	ref.sg_call_ents = 0;
-
-	check_sync(dev, &ref, true);
-}
-EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu);
-
-void debug_dma_sync_single_range_for_device(struct device *dev,
-					    dma_addr_t dma_handle,
-					    unsigned long offset,
-					    size_t size, int direction)
-{
-	struct dma_debug_entry ref;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	ref.type         = dma_debug_single;
-	ref.dev          = dev;
-	ref.dev_addr     = dma_handle;
-	ref.size         = offset + size;
-	ref.direction    = direction;
-	ref.sg_call_ents = 0;
-
-	check_sync(dev, &ref, false);
-}
-EXPORT_SYMBOL(debug_dma_sync_single_range_for_device);
-
-void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
-			       int nelems, int direction)
-{
-	struct scatterlist *s;
-	int mapped_ents = 0, i;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	for_each_sg(sg, s, nelems, i) {
-
-		struct dma_debug_entry ref = {
-			.type           = dma_debug_sg,
-			.dev            = dev,
-			.pfn		= page_to_pfn(sg_page(s)),
-			.offset		= s->offset,
-			.dev_addr       = sg_dma_address(s),
-			.size           = sg_dma_len(s),
-			.direction      = direction,
-			.sg_call_ents   = nelems,
-		};
-
-		if (!i)
-			mapped_ents = get_nr_mapped_entries(dev, &ref);
-
-		if (i >= mapped_ents)
-			break;
-
-		check_sync(dev, &ref, true);
-	}
-}
-EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
-
-void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
-				  int nelems, int direction)
-{
-	struct scatterlist *s;
-	int mapped_ents = 0, i;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	for_each_sg(sg, s, nelems, i) {
-
-		struct dma_debug_entry ref = {
-			.type           = dma_debug_sg,
-			.dev            = dev,
-			.pfn		= page_to_pfn(sg_page(s)),
-			.offset		= s->offset,
-			.dev_addr       = sg_dma_address(s),
-			.size           = sg_dma_len(s),
-			.direction      = direction,
-			.sg_call_ents   = nelems,
-		};
-		if (!i)
-			mapped_ents = get_nr_mapped_entries(dev, &ref);
-
-		if (i >= mapped_ents)
-			break;
-
-		check_sync(dev, &ref, false);
-	}
-}
-EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
-
-static int __init dma_debug_driver_setup(char *str)
-{
-	int i;
-
-	for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) {
-		current_driver_name[i] = *str;
-		if (*str == 0)
-			break;
-	}
-
-	if (current_driver_name[0])
-		pr_info("DMA-API: enable driver filter for driver [%s]\n",
-			current_driver_name);
-
-
-	return 1;
-}
-__setup("dma_debug_driver=", dma_debug_driver_setup);
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
deleted file mode 100644
index bbfb229aa067..000000000000
--- a/lib/dma-direct.c
+++ /dev/null
@@ -1,185 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DMA operations that map physical memory directly without using an IOMMU or
- * flushing caches.
- */
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-direct.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-contiguous.h>
-#include <linux/pfn.h>
-#include <linux/set_memory.h>
-
-#define DIRECT_MAPPING_ERROR		0
-
-/*
- * Most architectures use ZONE_DMA for the first 16 Megabytes, but
- * some use it for entirely different regions:
- */
-#ifndef ARCH_ZONE_DMA_BITS
-#define ARCH_ZONE_DMA_BITS 24
-#endif
-
-/*
- * For AMD SEV all DMA must be to unencrypted addresses.
- */
-static inline bool force_dma_unencrypted(void)
-{
-	return sev_active();
-}
-
-static bool
-check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
-		const char *caller)
-{
-	if (unlikely(dev && !dma_capable(dev, dma_addr, size))) {
-		if (*dev->dma_mask >= DMA_BIT_MASK(32)) {
-			dev_err(dev,
-				"%s: overflow %pad+%zu of device mask %llx\n",
-				caller, &dma_addr, size, *dev->dma_mask);
-		}
-		return false;
-	}
-	return true;
-}
-
-static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
-{
-	dma_addr_t addr = force_dma_unencrypted() ?
-		__phys_to_dma(dev, phys) : phys_to_dma(dev, phys);
-	return addr + size - 1 <= dev->coherent_dma_mask;
-}
-
-void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		gfp_t gfp, unsigned long attrs)
-{
-	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	int page_order = get_order(size);
-	struct page *page = NULL;
-	void *ret;
-
-	/* we always manually zero the memory once we are done: */
-	gfp &= ~__GFP_ZERO;
-
-	/* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
-	if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
-		gfp |= GFP_DMA;
-	if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
-		gfp |= GFP_DMA32;
-
-again:
-	/* CMA can be used only in the context which permits sleeping */
-	if (gfpflags_allow_blocking(gfp)) {
-		page = dma_alloc_from_contiguous(dev, count, page_order, gfp);
-		if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
-			dma_release_from_contiguous(dev, page, count);
-			page = NULL;
-		}
-	}
-	if (!page)
-		page = alloc_pages_node(dev_to_node(dev), gfp, page_order);
-
-	if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
-		__free_pages(page, page_order);
-		page = NULL;
-
-		if (IS_ENABLED(CONFIG_ZONE_DMA) &&
-		    dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
-		    !(gfp & GFP_DMA)) {
-			gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
-			goto again;
-		}
-	}
-
-	if (!page)
-		return NULL;
-	ret = page_address(page);
-	if (force_dma_unencrypted()) {
-		set_memory_decrypted((unsigned long)ret, 1 << page_order);
-		*dma_handle = __phys_to_dma(dev, page_to_phys(page));
-	} else {
-		*dma_handle = phys_to_dma(dev, page_to_phys(page));
-	}
-	memset(ret, 0, size);
-	return ret;
-}
-
-/*
- * NOTE: this function must never look at the dma_addr argument, because we want
- * to be able to use it as a helper for iommu implementations as well.
- */
-void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
-		dma_addr_t dma_addr, unsigned long attrs)
-{
-	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	unsigned int page_order = get_order(size);
-
-	if (force_dma_unencrypted())
-		set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
-	if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
-		free_pages((unsigned long)cpu_addr, page_order);
-}
-
-static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
-		unsigned long offset, size_t size, enum dma_data_direction dir,
-		unsigned long attrs)
-{
-	dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
-
-	if (!check_addr(dev, dma_addr, size, __func__))
-		return DIRECT_MAPPING_ERROR;
-	return dma_addr;
-}
-
-static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
-		int nents, enum dma_data_direction dir, unsigned long attrs)
-{
-	int i;
-	struct scatterlist *sg;
-
-	for_each_sg(sgl, sg, nents, i) {
-		BUG_ON(!sg_page(sg));
-
-		sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg));
-		if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__))
-			return 0;
-		sg_dma_len(sg) = sg->length;
-	}
-
-	return nents;
-}
-
-int dma_direct_supported(struct device *dev, u64 mask)
-{
-#ifdef CONFIG_ZONE_DMA
-	if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
-		return 0;
-#else
-	/*
-	 * Because 32-bit DMA masks are so common we expect every architecture
-	 * to be able to satisfy them - either by not supporting more physical
-	 * memory, or by providing a ZONE_DMA32.  If neither is the case, the
-	 * architecture needs to use an IOMMU instead of the direct mapping.
-	 */
-	if (mask < DMA_BIT_MASK(32))
-		return 0;
-#endif
-	return 1;
-}
-
-static int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-	return dma_addr == DIRECT_MAPPING_ERROR;
-}
-
-const struct dma_map_ops dma_direct_ops = {
-	.alloc			= dma_direct_alloc,
-	.free			= dma_direct_free,
-	.map_page		= dma_direct_map_page,
-	.map_sg			= dma_direct_map_sg,
-	.dma_supported		= dma_direct_supported,
-	.mapping_error		= dma_direct_mapping_error,
-	.is_phys		= 1,
-};
-EXPORT_SYMBOL(dma_direct_ops);
diff --git a/lib/dma-virt.c b/lib/dma-virt.c
deleted file mode 100644
index 8e61a02ef9ca..000000000000
--- a/lib/dma-virt.c
+++ /dev/null
@@ -1,61 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *	lib/dma-virt.c
- *
- * DMA operations that map to virtual addresses without flushing memory.
- */
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-mapping.h>
-#include <linux/scatterlist.h>
-
-static void *dma_virt_alloc(struct device *dev, size_t size,
-			    dma_addr_t *dma_handle, gfp_t gfp,
-			    unsigned long attrs)
-{
-	void *ret;
-
-	ret = (void *)__get_free_pages(gfp, get_order(size));
-	if (ret)
-		*dma_handle = (uintptr_t)ret;
-	return ret;
-}
-
-static void dma_virt_free(struct device *dev, size_t size,
-			  void *cpu_addr, dma_addr_t dma_addr,
-			  unsigned long attrs)
-{
-	free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page,
-				    unsigned long offset, size_t size,
-				    enum dma_data_direction dir,
-				    unsigned long attrs)
-{
-	return (uintptr_t)(page_address(page) + offset);
-}
-
-static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl,
-			   int nents, enum dma_data_direction dir,
-			   unsigned long attrs)
-{
-	int i;
-	struct scatterlist *sg;
-
-	for_each_sg(sgl, sg, nents, i) {
-		BUG_ON(!sg_page(sg));
-		sg_dma_address(sg) = (uintptr_t)sg_virt(sg);
-		sg_dma_len(sg) = sg->length;
-	}
-
-	return nents;
-}
-
-const struct dma_map_ops dma_virt_ops = {
-	.alloc			= dma_virt_alloc,
-	.free			= dma_virt_free,
-	.map_page		= dma_virt_map_page,
-	.map_sg			= dma_virt_map_sg,
-};
-EXPORT_SYMBOL(dma_virt_ops);
diff --git a/lib/idr.c b/lib/idr.c
index 823b813f08f8..ed9c169c12bd 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -4,9 +4,9 @@
 #include <linux/idr.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/xarray.h>
 
 DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
-static DEFINE_SPINLOCK(simple_ida_lock);
 
 /**
  * idr_alloc_u32() - Allocate an ID.
@@ -581,7 +581,7 @@ again:
 	if (!ida_pre_get(ida, gfp_mask))
 		return -ENOMEM;
 
-	spin_lock_irqsave(&simple_ida_lock, flags);
+	xa_lock_irqsave(&ida->ida_rt, flags);
 	ret = ida_get_new_above(ida, start, &id);
 	if (!ret) {
 		if (id > max) {
@@ -591,7 +591,7 @@ again:
 			ret = id;
 		}
 	}
-	spin_unlock_irqrestore(&simple_ida_lock, flags);
+	xa_unlock_irqrestore(&ida->ida_rt, flags);
 
 	if (unlikely(ret == -EAGAIN))
 		goto again;
@@ -615,8 +615,8 @@ void ida_simple_remove(struct ida *ida, unsigned int id)
 	unsigned long flags;
 
 	BUG_ON((int)id < 0);
-	spin_lock_irqsave(&simple_ida_lock, flags);
+	xa_lock_irqsave(&ida->ida_rt, flags);
 	ida_remove(ida, id);
-	spin_unlock_irqrestore(&simple_ida_lock, flags);
+	xa_unlock_irqrestore(&ida->ida_rt, flags);
 }
 EXPORT_SYMBOL(ida_simple_remove);
diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c
index 835242e74aaa..75509a1511a3 100644
--- a/lib/interval_tree_test.c
+++ b/lib/interval_tree_test.c
@@ -64,11 +64,12 @@ static int interval_tree_test_init(void)
 	unsigned long results;
 	cycles_t time1, time2, time;
 
-	nodes = kmalloc(nnodes * sizeof(struct interval_tree_node), GFP_KERNEL);
+	nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node),
+			      GFP_KERNEL);
 	if (!nodes)
 		return -ENOMEM;
 
-	queries = kmalloc(nsearches * sizeof(int), GFP_KERNEL);
+	queries = kmalloc_array(nsearches, sizeof(int), GFP_KERNEL);
 	if (!queries) {
 		kfree(nodes);
 		return -ENOMEM;
diff --git a/lib/iommu-common.c b/lib/iommu-common.c
deleted file mode 100644
index 55b00de106b5..000000000000
--- a/lib/iommu-common.c
+++ /dev/null
@@ -1,267 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * IOMMU mmap management and range allocation functions.
- * Based almost entirely upon the powerpc iommu allocator.
- */
-
-#include <linux/export.h>
-#include <linux/bitmap.h>
-#include <linux/bug.h>
-#include <linux/iommu-helper.h>
-#include <linux/iommu-common.h>
-#include <linux/dma-mapping.h>
-#include <linux/hash.h>
-
-static unsigned long iommu_large_alloc = 15;
-
-static	DEFINE_PER_CPU(unsigned int, iommu_hash_common);
-
-static inline bool need_flush(struct iommu_map_table *iommu)
-{
-	return ((iommu->flags & IOMMU_NEED_FLUSH) != 0);
-}
-
-static inline void set_flush(struct iommu_map_table *iommu)
-{
-	iommu->flags |= IOMMU_NEED_FLUSH;
-}
-
-static inline void clear_flush(struct iommu_map_table *iommu)
-{
-	iommu->flags &= ~IOMMU_NEED_FLUSH;
-}
-
-static void setup_iommu_pool_hash(void)
-{
-	unsigned int i;
-	static bool do_once;
-
-	if (do_once)
-		return;
-	do_once = true;
-	for_each_possible_cpu(i)
-		per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS);
-}
-
-/*
- * Initialize iommu_pool entries for the iommu_map_table. `num_entries'
- * is the number of table entries. If `large_pool' is set to true,
- * the top 1/4 of the table will be set aside for pool allocations
- * of more than iommu_large_alloc pages.
- */
-void iommu_tbl_pool_init(struct iommu_map_table *iommu,
-			 unsigned long num_entries,
-			 u32 table_shift,
-			 void (*lazy_flush)(struct iommu_map_table *),
-			 bool large_pool, u32 npools,
-			 bool skip_span_boundary_check)
-{
-	unsigned int start, i;
-	struct iommu_pool *p = &(iommu->large_pool);
-
-	setup_iommu_pool_hash();
-	if (npools == 0)
-		iommu->nr_pools = IOMMU_NR_POOLS;
-	else
-		iommu->nr_pools = npools;
-	BUG_ON(npools > IOMMU_NR_POOLS);
-
-	iommu->table_shift = table_shift;
-	iommu->lazy_flush = lazy_flush;
-	start = 0;
-	if (skip_span_boundary_check)
-		iommu->flags |= IOMMU_NO_SPAN_BOUND;
-	if (large_pool)
-		iommu->flags |= IOMMU_HAS_LARGE_POOL;
-
-	if (!large_pool)
-		iommu->poolsize = num_entries/iommu->nr_pools;
-	else
-		iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools;
-	for (i = 0; i < iommu->nr_pools; i++) {
-		spin_lock_init(&(iommu->pools[i].lock));
-		iommu->pools[i].start = start;
-		iommu->pools[i].hint = start;
-		start += iommu->poolsize; /* start for next pool */
-		iommu->pools[i].end = start - 1;
-	}
-	if (!large_pool)
-		return;
-	/* initialize large_pool */
-	spin_lock_init(&(p->lock));
-	p->start = start;
-	p->hint = p->start;
-	p->end = num_entries;
-}
-EXPORT_SYMBOL(iommu_tbl_pool_init);
-
-unsigned long iommu_tbl_range_alloc(struct device *dev,
-				struct iommu_map_table *iommu,
-				unsigned long npages,
-				unsigned long *handle,
-				unsigned long mask,
-				unsigned int align_order)
-{
-	unsigned int pool_hash = __this_cpu_read(iommu_hash_common);
-	unsigned long n, end, start, limit, boundary_size;
-	struct iommu_pool *pool;
-	int pass = 0;
-	unsigned int pool_nr;
-	unsigned int npools = iommu->nr_pools;
-	unsigned long flags;
-	bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0);
-	bool largealloc = (large_pool && npages > iommu_large_alloc);
-	unsigned long shift;
-	unsigned long align_mask = 0;
-
-	if (align_order > 0)
-		align_mask = ~0ul >> (BITS_PER_LONG - align_order);
-
-	/* Sanity check */
-	if (unlikely(npages == 0)) {
-		WARN_ON_ONCE(1);
-		return IOMMU_ERROR_CODE;
-	}
-
-	if (largealloc) {
-		pool = &(iommu->large_pool);
-		pool_nr = 0; /* to keep compiler happy */
-	} else {
-		/* pick out pool_nr */
-		pool_nr =  pool_hash & (npools - 1);
-		pool = &(iommu->pools[pool_nr]);
-	}
-	spin_lock_irqsave(&pool->lock, flags);
-
- again:
-	if (pass == 0 && handle && *handle &&
-	    (*handle >= pool->start) && (*handle < pool->end))
-		start = *handle;
-	else
-		start = pool->hint;
-
-	limit = pool->end;
-
-	/* The case below can happen if we have a small segment appended
-	 * to a large, or when the previous alloc was at the very end of
-	 * the available space. If so, go back to the beginning. If a
-	 * flush is needed, it will get done based on the return value
-	 * from iommu_area_alloc() below.
-	 */
-	if (start >= limit)
-		start = pool->start;
-	shift = iommu->table_map_base >> iommu->table_shift;
-	if (limit + shift > mask) {
-		limit = mask - shift + 1;
-		/* If we're constrained on address range, first try
-		 * at the masked hint to avoid O(n) search complexity,
-		 * but on second pass, start at 0 in pool 0.
-		 */
-		if ((start & mask) >= limit || pass > 0) {
-			spin_unlock(&(pool->lock));
-			pool = &(iommu->pools[0]);
-			spin_lock(&(pool->lock));
-			start = pool->start;
-		} else {
-			start &= mask;
-		}
-	}
-
-	if (dev)
-		boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
-				      1 << iommu->table_shift);
-	else
-		boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift);
-
-	boundary_size = boundary_size >> iommu->table_shift;
-	/*
-	 * if the skip_span_boundary_check had been set during init, we set
-	 * things up so that iommu_is_span_boundary() merely checks if the
-	 * (index + npages) < num_tsb_entries
-	 */
-	if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) {
-		shift = 0;
-		boundary_size = iommu->poolsize * iommu->nr_pools;
-	}
-	n = iommu_area_alloc(iommu->map, limit, start, npages, shift,
-			     boundary_size, align_mask);
-	if (n == -1) {
-		if (likely(pass == 0)) {
-			/* First failure, rescan from the beginning.  */
-			pool->hint = pool->start;
-			set_flush(iommu);
-			pass++;
-			goto again;
-		} else if (!largealloc && pass <= iommu->nr_pools) {
-			spin_unlock(&(pool->lock));
-			pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
-			pool = &(iommu->pools[pool_nr]);
-			spin_lock(&(pool->lock));
-			pool->hint = pool->start;
-			set_flush(iommu);
-			pass++;
-			goto again;
-		} else {
-			/* give up */
-			n = IOMMU_ERROR_CODE;
-			goto bail;
-		}
-	}
-	if (iommu->lazy_flush &&
-	    (n < pool->hint || need_flush(iommu))) {
-		clear_flush(iommu);
-		iommu->lazy_flush(iommu);
-	}
-
-	end = n + npages;
-	pool->hint = end;
-
-	/* Update handle for SG allocations */
-	if (handle)
-		*handle = end;
-bail:
-	spin_unlock_irqrestore(&(pool->lock), flags);
-
-	return n;
-}
-EXPORT_SYMBOL(iommu_tbl_range_alloc);
-
-static struct iommu_pool *get_pool(struct iommu_map_table *tbl,
-				   unsigned long entry)
-{
-	struct iommu_pool *p;
-	unsigned long largepool_start = tbl->large_pool.start;
-	bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0);
-
-	/* The large pool is the last pool at the top of the table */
-	if (large_pool && entry >= largepool_start) {
-		p = &tbl->large_pool;
-	} else {
-		unsigned int pool_nr = entry / tbl->poolsize;
-
-		BUG_ON(pool_nr >= tbl->nr_pools);
-		p = &tbl->pools[pool_nr];
-	}
-	return p;
-}
-
-/* Caller supplies the index of the entry into the iommu map table
- * itself when the mapping from dma_addr to the entry is not the
- * default addr->entry mapping below.
- */
-void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr,
-			  unsigned long npages, unsigned long entry)
-{
-	struct iommu_pool *pool;
-	unsigned long flags;
-	unsigned long shift = iommu->table_shift;
-
-	if (entry == IOMMU_ERROR_CODE) /* use default addr->entry mapping */
-		entry = (dma_addr - iommu->table_map_base) >> shift;
-	pool = get_pool(iommu, entry);
-
-	spin_lock_irqsave(&(pool->lock), flags);
-	bitmap_clear(iommu->map, entry, npages);
-	spin_unlock_irqrestore(&(pool->lock), flags);
-}
-EXPORT_SYMBOL(iommu_tbl_range_free);
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index 23633c0fda4a..92a9f243c0e2 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -3,19 +3,8 @@
  * IOMMU helper functions for the free area management
  */
 
-#include <linux/export.h>
 #include <linux/bitmap.h>
-#include <linux/bug.h>
-
-int iommu_is_span_boundary(unsigned int index, unsigned int nr,
-			   unsigned long shift,
-			   unsigned long boundary_size)
-{
-	BUG_ON(!is_power_of_2(boundary_size));
-
-	shift = (shift + index) & (boundary_size - 1);
-	return shift + nr > boundary_size;
-}
+#include <linux/iommu-helper.h>
 
 unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
 			       unsigned long start, unsigned int nr,
@@ -38,4 +27,3 @@ again:
 	}
 	return -1;
 }
-EXPORT_SYMBOL(iommu_area_alloc);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index fdae394172fa..7e43cd54c84c 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -573,6 +573,67 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 }
 EXPORT_SYMBOL(_copy_to_iter);
 
+#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
+static int copyout_mcsafe(void __user *to, const void *from, size_t n)
+{
+	if (access_ok(VERIFY_WRITE, to, n)) {
+		kasan_check_read(from, n);
+		n = copy_to_user_mcsafe((__force void *) to, from, n);
+	}
+	return n;
+}
+
+static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
+		const char *from, size_t len)
+{
+	unsigned long ret;
+	char *to;
+
+	to = kmap_atomic(page);
+	ret = memcpy_mcsafe(to + offset, from, len);
+	kunmap_atomic(to);
+
+	return ret;
+}
+
+size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
+{
+	const char *from = addr;
+	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
+
+	if (unlikely(i->type & ITER_PIPE)) {
+		WARN_ON(1);
+		return 0;
+	}
+	if (iter_is_iovec(i))
+		might_fault();
+	iterate_and_advance(i, bytes, v,
+		copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
+		({
+		rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
+                               (from += v.bv_len) - v.bv_len, v.bv_len);
+		if (rem) {
+			curr_addr = (unsigned long) from;
+			bytes = curr_addr - s_addr - rem;
+			return bytes;
+		}
+		}),
+		({
+		rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
+				v.iov_len);
+		if (rem) {
+			curr_addr = (unsigned long) from;
+			bytes = curr_addr - s_addr - rem;
+			return bytes;
+		}
+		})
+	)
+
+	return bytes;
+}
+EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
+#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
+
 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
diff --git a/lib/kfifo.c b/lib/kfifo.c
index b0f757bf7213..015656aa8182 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -54,7 +54,7 @@ int __kfifo_alloc(struct __kfifo *fifo, unsigned int size,
 		return -EINVAL;
 	}
 
-	fifo->data = kmalloc(size * esize, gfp_mask);
+	fifo->data = kmalloc_array(esize, size, gfp_mask);
 
 	if (!fifo->data) {
 		fifo->mask = 0;
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 15ea216a67ce..63d0816ab23b 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -22,6 +22,7 @@
 #include <linux/socket.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
+#include <linux/uidgid.h>
 #include <linux/uuid.h>
 #include <linux/ctype.h>
 #include <net/sock.h>
@@ -231,30 +232,6 @@ out:
 	return r;
 }
 
-#ifdef CONFIG_NET
-static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
-{
-	struct kobject *kobj = data, *ksobj;
-	const struct kobj_ns_type_operations *ops;
-
-	ops = kobj_ns_ops(kobj);
-	if (!ops && kobj->kset) {
-		ksobj = &kobj->kset->kobj;
-		if (ksobj->parent != NULL)
-			ops = kobj_ns_ops(ksobj->parent);
-	}
-
-	if (ops && ops->netlink_ns && kobj->ktype->namespace) {
-		const void *sock_ns, *ns;
-		ns = kobj->ktype->namespace(kobj);
-		sock_ns = ops->netlink_ns(dsk);
-		return sock_ns != ns;
-	}
-
-	return 0;
-}
-#endif
-
 #ifdef CONFIG_UEVENT_HELPER
 static int kobj_usermode_filter(struct kobject *kobj)
 {
@@ -296,15 +273,44 @@ static void cleanup_uevent_env(struct subprocess_info *info)
 }
 #endif
 
-static int kobject_uevent_net_broadcast(struct kobject *kobj,
-					struct kobj_uevent_env *env,
+#ifdef CONFIG_NET
+static struct sk_buff *alloc_uevent_skb(struct kobj_uevent_env *env,
 					const char *action_string,
 					const char *devpath)
 {
-	int retval = 0;
-#if defined(CONFIG_NET)
+	struct netlink_skb_parms *parms;
+	struct sk_buff *skb = NULL;
+	char *scratch;
+	size_t len;
+
+	/* allocate message with maximum possible size */
+	len = strlen(action_string) + strlen(devpath) + 2;
+	skb = alloc_skb(len + env->buflen, GFP_KERNEL);
+	if (!skb)
+		return NULL;
+
+	/* add header */
+	scratch = skb_put(skb, len);
+	sprintf(scratch, "%s@%s", action_string, devpath);
+
+	skb_put_data(skb, env->buf, env->buflen);
+
+	parms = &NETLINK_CB(skb);
+	parms->creds.uid = GLOBAL_ROOT_UID;
+	parms->creds.gid = GLOBAL_ROOT_GID;
+	parms->dst_group = 1;
+	parms->portid = 0;
+
+	return skb;
+}
+
+static int uevent_net_broadcast_untagged(struct kobj_uevent_env *env,
+					 const char *action_string,
+					 const char *devpath)
+{
 	struct sk_buff *skb = NULL;
 	struct uevent_sock *ue_sk;
+	int retval = 0;
 
 	/* send netlink message */
 	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
@@ -314,37 +320,99 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj,
 			continue;
 
 		if (!skb) {
-			/* allocate message with the maximum possible size */
-			size_t len = strlen(action_string) + strlen(devpath) + 2;
-			char *scratch;
-
 			retval = -ENOMEM;
-			skb = alloc_skb(len + env->buflen, GFP_KERNEL);
+			skb = alloc_uevent_skb(env, action_string, devpath);
 			if (!skb)
 				continue;
-
-			/* add header */
-			scratch = skb_put(skb, len);
-			sprintf(scratch, "%s@%s", action_string, devpath);
-
-			skb_put_data(skb, env->buf, env->buflen);
-
-			NETLINK_CB(skb).dst_group = 1;
 		}
 
-		retval = netlink_broadcast_filtered(uevent_sock, skb_get(skb),
-						    0, 1, GFP_KERNEL,
-						    kobj_bcast_filter,
-						    kobj);
+		retval = netlink_broadcast(uevent_sock, skb_get(skb), 0, 1,
+					   GFP_KERNEL);
 		/* ENOBUFS should be handled in userspace */
 		if (retval == -ENOBUFS || retval == -ESRCH)
 			retval = 0;
 	}
 	consume_skb(skb);
-#endif
+
 	return retval;
 }
 
+static int uevent_net_broadcast_tagged(struct sock *usk,
+				       struct kobj_uevent_env *env,
+				       const char *action_string,
+				       const char *devpath)
+{
+	struct user_namespace *owning_user_ns = sock_net(usk)->user_ns;
+	struct sk_buff *skb = NULL;
+	int ret = 0;
+
+	skb = alloc_uevent_skb(env, action_string, devpath);
+	if (!skb)
+		return -ENOMEM;
+
+	/* fix credentials */
+	if (owning_user_ns != &init_user_ns) {
+		struct netlink_skb_parms *parms = &NETLINK_CB(skb);
+		kuid_t root_uid;
+		kgid_t root_gid;
+
+		/* fix uid */
+		root_uid = make_kuid(owning_user_ns, 0);
+		if (uid_valid(root_uid))
+			parms->creds.uid = root_uid;
+
+		/* fix gid */
+		root_gid = make_kgid(owning_user_ns, 0);
+		if (gid_valid(root_gid))
+			parms->creds.gid = root_gid;
+	}
+
+	ret = netlink_broadcast(usk, skb, 0, 1, GFP_KERNEL);
+	/* ENOBUFS should be handled in userspace */
+	if (ret == -ENOBUFS || ret == -ESRCH)
+		ret = 0;
+
+	return ret;
+}
+#endif
+
+static int kobject_uevent_net_broadcast(struct kobject *kobj,
+					struct kobj_uevent_env *env,
+					const char *action_string,
+					const char *devpath)
+{
+	int ret = 0;
+
+#ifdef CONFIG_NET
+	const struct kobj_ns_type_operations *ops;
+	const struct net *net = NULL;
+
+	ops = kobj_ns_ops(kobj);
+	if (!ops && kobj->kset) {
+		struct kobject *ksobj = &kobj->kset->kobj;
+		if (ksobj->parent != NULL)
+			ops = kobj_ns_ops(ksobj->parent);
+	}
+
+	/* kobjects currently only carry network namespace tags and they
+	 * are the only tag relevant here since we want to decide which
+	 * network namespaces to broadcast the uevent into.
+	 */
+	if (ops && ops->netlink_ns && kobj->ktype->namespace)
+		if (ops->type == KOBJ_NS_TYPE_NET)
+			net = kobj->ktype->namespace(kobj);
+
+	if (!net)
+		ret = uevent_net_broadcast_untagged(env, action_string,
+						    devpath);
+	else
+		ret = uevent_net_broadcast_tagged(net->uevent_sock->sk, env,
+						  action_string, devpath);
+#endif
+
+	return ret;
+}
+
 static void zap_modalias_env(struct kobj_uevent_env *env)
 {
 	static const char modalias_prefix[] = "MODALIAS=";
@@ -703,9 +771,13 @@ static int uevent_net_init(struct net *net)
 
 	net->uevent_sock = ue_sk;
 
-	mutex_lock(&uevent_sock_mutex);
-	list_add_tail(&ue_sk->list, &uevent_sock_list);
-	mutex_unlock(&uevent_sock_mutex);
+	/* Restrict uevents to initial user namespace. */
+	if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) {
+		mutex_lock(&uevent_sock_mutex);
+		list_add_tail(&ue_sk->list, &uevent_sock_list);
+		mutex_unlock(&uevent_sock_mutex);
+	}
+
 	return 0;
 }
 
@@ -713,9 +785,11 @@ static void uevent_net_exit(struct net *net)
 {
 	struct uevent_sock *ue_sk = net->uevent_sock;
 
-	mutex_lock(&uevent_sock_mutex);
-	list_del(&ue_sk->list);
-	mutex_unlock(&uevent_sock_mutex);
+	if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) {
+		mutex_lock(&uevent_sock_mutex);
+		list_del(&ue_sk->list);
+		mutex_unlock(&uevent_sock_mutex);
+	}
 
 	netlink_kernel_release(ue_sk->sk);
 	kfree(ue_sk);
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index 28ba40b99337..2b10a4024c35 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -119,7 +119,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
 	slot = kcalloc(e_count, sizeof(struct hlist_head), GFP_KERNEL);
 	if (!slot)
 		goto out_fail;
-	element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL);
+	element = kcalloc(e_count, sizeof(struct lc_element *), GFP_KERNEL);
 	if (!element)
 		goto out_fail;
 
diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h
index 7eceeddb3fb8..c2d6f4efcfbc 100644
--- a/lib/mpi/mpi-internal.h
+++ b/lib/mpi/mpi-internal.h
@@ -65,13 +65,6 @@
 typedef mpi_limb_t *mpi_ptr_t;	/* pointer to a limb */
 typedef int mpi_size_t;		/* (must be a signed type) */
 
-static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
-{
-	if (a->alloced < b)
-		return mpi_resize(a, b);
-	return 0;
-}
-
 /* Copy N limbs from S to D.  */
 #define MPN_COPY(d, s, n) \
 	do {					\
@@ -80,13 +73,6 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
 			(d)[_i] = (s)[_i];	\
 	} while (0)
 
-#define MPN_COPY_INCR(d, s, n) \
-	do {					\
-		mpi_size_t _i;			\
-		for (_i = 0; _i < (n); _i++)	\
-			(d)[_i] = (s)[_i];	\
-	} while (0)
-
 #define MPN_COPY_DECR(d, s, n) \
 	do {					\
 		mpi_size_t _i;			\
@@ -111,15 +97,6 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
 		}				\
 	} while (0)
 
-#define MPN_NORMALIZE_NOT_ZERO(d, n) \
-	do {				\
-		for (;;) {		\
-			if ((d)[(n)-1])	\
-				break;	\
-			(n)--;		\
-		}			\
-	} while (0)
-
 #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
 	do {							\
 		if ((size) < KARATSUBA_THRESHOLD)		\
@@ -128,46 +105,11 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
 			mul_n(prodp, up, vp, size, tspace);	\
 	} while (0);
 
-/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
- * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
- * If this would yield overflow, DI should be the largest possible number
- * (i.e., only ones).  For correct operation, the most significant bit of D
- * has to be set.  Put the quotient in Q and the remainder in R.
- */
-#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \
-	do {								\
-		mpi_limb_t _q, _ql, _r;					\
-		mpi_limb_t _xh, _xl;					\
-		umul_ppmm(_q, _ql, (nh), (di));				\
-		_q += (nh);	/* DI is 2**BITS_PER_MPI_LIMB too small */ \
-		umul_ppmm(_xh, _xl, _q, (d));				\
-		sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl);		\
-		if (_xh) {						\
-			sub_ddmmss(_xh, _r, _xh, _r, 0, (d));		\
-			_q++;						\
-			if (_xh) {					\
-				sub_ddmmss(_xh, _r, _xh, _r, 0, (d));	\
-				_q++;					\
-			}						\
-		}							\
-		if (_r >= (d)) {					\
-			_r -= (d);					\
-			_q++;						\
-		}							\
-		(r) = _r;						\
-		(q) = _q;						\
-	} while (0)
-
 /*-- mpiutil.c --*/
 mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs);
 void mpi_free_limb_space(mpi_ptr_t a);
 void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs);
 
-/*-- mpi-bit.c --*/
-void mpi_rshift_limbs(MPI a, unsigned int count);
-int mpi_lshift_limbs(MPI a, unsigned int count);
-
-/*-- mpihelp-add.c --*/
 static inline mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			 mpi_size_t s1_size, mpi_limb_t s2_limb);
 mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
@@ -175,7 +117,6 @@ mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 static inline mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
 		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
 
-/*-- mpihelp-sub.c --*/
 static inline mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			 mpi_size_t s1_size, mpi_limb_t s2_limb);
 mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
@@ -183,10 +124,10 @@ mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 static inline mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
 		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
 
-/*-- mpihelp-cmp.c --*/
+/*-- mpih-cmp.c --*/
 int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size);
 
-/*-- mpihelp-mul.c --*/
+/*-- mpih-mul.c --*/
 
 struct karatsuba_ctx {
 	struct karatsuba_ctx *next;
@@ -202,7 +143,6 @@ mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			    mpi_size_t s1_size, mpi_limb_t s2_limb);
 mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			    mpi_size_t s1_size, mpi_limb_t s2_limb);
-int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size);
 int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
 		mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result);
 void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size);
@@ -214,21 +154,16 @@ int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
 			       mpi_ptr_t vp, mpi_size_t vsize,
 			       struct karatsuba_ctx *ctx);
 
-/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/
+/*-- generic_mpih-mul1.c --*/
 mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			 mpi_size_t s1_size, mpi_limb_t s2_limb);
 
-/*-- mpihelp-div.c --*/
-mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
-			 mpi_limb_t divisor_limb);
+/*-- mpih-div.c --*/
 mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
 			  mpi_ptr_t np, mpi_size_t nsize,
 			  mpi_ptr_t dp, mpi_size_t dsize);
-mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr,
-			    mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
-			    mpi_limb_t divisor_limb);
 
-/*-- mpihelp-shift.c --*/
+/*-- generic_mpih-[lr]shift.c --*/
 mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
 			  unsigned cnt);
 mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
index 314f4dfa603e..20ed0f766787 100644
--- a/lib/mpi/mpiutil.c
+++ b/lib/mpi/mpiutil.c
@@ -91,14 +91,14 @@ int mpi_resize(MPI a, unsigned nlimbs)
 		return 0;	/* no need to do it */
 
 	if (a->d) {
-		p = kmalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL);
+		p = kmalloc_array(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL);
 		if (!p)
 			return -ENOMEM;
 		memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t));
 		kzfree(a->d);
 		a->d = p;
 	} else {
-		a->d = kzalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL);
+		a->d = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL);
 		if (!a->d)
 			return -ENOMEM;
 	}
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index 6016f1deb1f5..beb14839b41a 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -112,18 +112,6 @@ static inline void alloc_global_tags(struct percpu_ida *pool,
 		  min(pool->nr_free, pool->percpu_batch_size));
 }
 
-static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
-{
-	int tag = -ENOSPC;
-
-	spin_lock(&tags->lock);
-	if (tags->nr_free)
-		tag = tags->freelist[--tags->nr_free];
-	spin_unlock(&tags->lock);
-
-	return tag;
-}
-
 /**
  * percpu_ida_alloc - allocate a tag
  * @pool: pool to allocate from
@@ -147,20 +135,22 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
 	DEFINE_WAIT(wait);
 	struct percpu_ida_cpu *tags;
 	unsigned long flags;
-	int tag;
+	int tag = -ENOSPC;
 
-	local_irq_save(flags);
-	tags = this_cpu_ptr(pool->tag_cpu);
+	tags = raw_cpu_ptr(pool->tag_cpu);
+	spin_lock_irqsave(&tags->lock, flags);
 
 	/* Fastpath */
-	tag = alloc_local_tag(tags);
-	if (likely(tag >= 0)) {
-		local_irq_restore(flags);
+	if (likely(tags->nr_free)) {
+		tag = tags->freelist[--tags->nr_free];
+		spin_unlock_irqrestore(&tags->lock, flags);
 		return tag;
 	}
+	spin_unlock_irqrestore(&tags->lock, flags);
 
 	while (1) {
-		spin_lock(&pool->lock);
+		spin_lock_irqsave(&pool->lock, flags);
+		tags = this_cpu_ptr(pool->tag_cpu);
 
 		/*
 		 * prepare_to_wait() must come before steal_tags(), in case
@@ -184,8 +174,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
 						&pool->cpus_have_tags);
 		}
 
-		spin_unlock(&pool->lock);
-		local_irq_restore(flags);
+		spin_unlock_irqrestore(&pool->lock, flags);
 
 		if (tag >= 0 || state == TASK_RUNNING)
 			break;
@@ -196,9 +185,6 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
 		}
 
 		schedule();
-
-		local_irq_save(flags);
-		tags = this_cpu_ptr(pool->tag_cpu);
 	}
 	if (state != TASK_RUNNING)
 		finish_wait(&pool->wait, &wait);
@@ -222,28 +208,24 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
 
 	BUG_ON(tag >= pool->nr_tags);
 
-	local_irq_save(flags);
-	tags = this_cpu_ptr(pool->tag_cpu);
+	tags = raw_cpu_ptr(pool->tag_cpu);
 
-	spin_lock(&tags->lock);
+	spin_lock_irqsave(&tags->lock, flags);
 	tags->freelist[tags->nr_free++] = tag;
 
 	nr_free = tags->nr_free;
-	spin_unlock(&tags->lock);
 
 	if (nr_free == 1) {
 		cpumask_set_cpu(smp_processor_id(),
 				&pool->cpus_have_tags);
 		wake_up(&pool->wait);
 	}
+	spin_unlock_irqrestore(&tags->lock, flags);
 
 	if (nr_free == pool->percpu_max_size) {
-		spin_lock(&pool->lock);
+		spin_lock_irqsave(&pool->lock, flags);
+		spin_lock(&tags->lock);
 
-		/*
-		 * Global lock held and irqs disabled, don't need percpu
-		 * lock
-		 */
 		if (tags->nr_free == pool->percpu_max_size) {
 			move_tags(pool->freelist, &pool->nr_free,
 				  tags->freelist, &tags->nr_free,
@@ -251,10 +233,9 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
 
 			wake_up(&pool->wait);
 		}
-		spin_unlock(&pool->lock);
+		spin_unlock(&tags->lock);
+		spin_unlock_irqrestore(&pool->lock, flags);
 	}
-
-	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(percpu_ida_free);
 
@@ -346,29 +327,27 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
 	struct percpu_ida_cpu *remote;
 	unsigned cpu, i, err = 0;
 
-	local_irq_save(flags);
 	for_each_possible_cpu(cpu) {
 		remote = per_cpu_ptr(pool->tag_cpu, cpu);
-		spin_lock(&remote->lock);
+		spin_lock_irqsave(&remote->lock, flags);
 		for (i = 0; i < remote->nr_free; i++) {
 			err = fn(remote->freelist[i], data);
 			if (err)
 				break;
 		}
-		spin_unlock(&remote->lock);
+		spin_unlock_irqrestore(&remote->lock, flags);
 		if (err)
 			goto out;
 	}
 
-	spin_lock(&pool->lock);
+	spin_lock_irqsave(&pool->lock, flags);
 	for (i = 0; i < pool->nr_free; i++) {
 		err = fn(pool->freelist[i], data);
 		if (err)
 			break;
 	}
-	spin_unlock(&pool->lock);
+	spin_unlock_irqrestore(&pool->lock, flags);
 out:
-	local_irq_restore(flags);
 	return err;
 }
 EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
index 7d36c1e27ff6..b7055b2a07d3 100644
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -247,7 +247,7 @@ static int __init rbtree_test_init(void)
 	cycles_t time1, time2, time;
 	struct rb_node *node;
 
-	nodes = kmalloc(nnodes * sizeof(*nodes), GFP_KERNEL);
+	nodes = kmalloc_array(nnodes, sizeof(*nodes), GFP_KERNEL);
 	if (!nodes)
 		return -ENOMEM;
 
diff --git a/lib/reed_solomon/decode_rs.c b/lib/reed_solomon/decode_rs.c
index 0ec3f257ffdf..1db74eb098d0 100644
--- a/lib/reed_solomon/decode_rs.c
+++ b/lib/reed_solomon/decode_rs.c
@@ -1,22 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * lib/reed_solomon/decode_rs.c
- *
- * Overview:
- *   Generic Reed Solomon encoder / decoder library
+ * Generic Reed Solomon encoder / decoder library
  *
  * Copyright 2002, Phil Karn, KA9Q
  * May be used under the terms of the GNU General Public License (GPL)
  *
  * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de)
  *
- * $Id: decode_rs.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $
- *
- */
-
-/* Generic data width independent code which is included by the
- * wrappers.
+ * Generic data width independent code which is included by the wrappers.
  */
 {
+	struct rs_codec *rs = rsc->codec;
 	int deg_lambda, el, deg_omega;
 	int i, j, r, k, pad;
 	int nn = rs->nn;
@@ -27,16 +21,22 @@
 	uint16_t *alpha_to = rs->alpha_to;
 	uint16_t *index_of = rs->index_of;
 	uint16_t u, q, tmp, num1, num2, den, discr_r, syn_error;
-	/* Err+Eras Locator poly and syndrome poly The maximum value
-	 * of nroots is 8. So the necessary stack size will be about
-	 * 220 bytes max.
-	 */
-	uint16_t lambda[nroots + 1], syn[nroots];
-	uint16_t b[nroots + 1], t[nroots + 1], omega[nroots + 1];
-	uint16_t root[nroots], reg[nroots + 1], loc[nroots];
 	int count = 0;
 	uint16_t msk = (uint16_t) rs->nn;
 
+	/*
+	 * The decoder buffers are in the rs control struct. They are
+	 * arrays sized [nroots + 1]
+	 */
+	uint16_t *lambda = rsc->buffers + RS_DECODE_LAMBDA * (nroots + 1);
+	uint16_t *syn = rsc->buffers + RS_DECODE_SYN * (nroots + 1);
+	uint16_t *b = rsc->buffers + RS_DECODE_B * (nroots + 1);
+	uint16_t *t = rsc->buffers + RS_DECODE_T * (nroots + 1);
+	uint16_t *omega = rsc->buffers + RS_DECODE_OMEGA * (nroots + 1);
+	uint16_t *root = rsc->buffers + RS_DECODE_ROOT * (nroots + 1);
+	uint16_t *reg = rsc->buffers + RS_DECODE_REG * (nroots + 1);
+	uint16_t *loc = rsc->buffers + RS_DECODE_LOC * (nroots + 1);
+
 	/* Check length parameter for validity */
 	pad = nn - nroots - len;
 	BUG_ON(pad < 0 || pad >= nn);
diff --git a/lib/reed_solomon/encode_rs.c b/lib/reed_solomon/encode_rs.c
index 0b5b1a6728ec..9112d46e869e 100644
--- a/lib/reed_solomon/encode_rs.c
+++ b/lib/reed_solomon/encode_rs.c
@@ -1,23 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * lib/reed_solomon/encode_rs.c
- *
- * Overview:
- *   Generic Reed Solomon encoder / decoder library
+ * Generic Reed Solomon encoder / decoder library
  *
  * Copyright 2002, Phil Karn, KA9Q
  * May be used under the terms of the GNU General Public License (GPL)
  *
  * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de)
  *
- * $Id: encode_rs.c,v 1.5 2005/11/07 11:14:59 gleixner Exp $
- *
- */
-
-/* Generic data width independent code which is included by the
- * wrappers.
- * int encode_rsX (struct rs_control *rs, uintX_t *data, int len, uintY_t *par)
+ * Generic data width independent code which is included by the wrappers.
  */
 {
+	struct rs_codec *rs = rsc->codec;
 	int i, j, pad;
 	int nn = rs->nn;
 	int nroots = rs->nroots;
diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c
index 06d04cfa9339..d8bb1a1eba72 100644
--- a/lib/reed_solomon/reed_solomon.c
+++ b/lib/reed_solomon/reed_solomon.c
@@ -1,43 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * lib/reed_solomon/reed_solomon.c
- *
- * Overview:
- *   Generic Reed Solomon encoder / decoder library
+ * Generic Reed Solomon encoder / decoder library
  *
  * Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de)
  *
  * Reed Solomon code lifted from reed solomon library written by Phil Karn
  * Copyright 2002 Phil Karn, KA9Q
  *
- * $Id: rslib.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Description:
  *
  * The generic Reed Solomon library provides runtime configurable
  * encoding / decoding of RS codes.
- * Each user must call init_rs to get a pointer to a rs_control
- * structure for the given rs parameters. This structure is either
- * generated or a already available matching control structure is used.
- * If a structure is generated then the polynomial arrays for
- * fast encoding / decoding are built. This can take some time so
- * make sure not to call this function from a time critical path.
- * Usually a module / driver should initialize the necessary
- * rs_control structure on module / driver init and release it
- * on exit.
- * The encoding puts the calculated syndrome into a given syndrome
- * buffer.
- * The decoding is a two step process. The first step calculates
- * the syndrome over the received (data + syndrome) and calls the
- * second stage, which does the decoding / error correction itself.
- * Many hw encoders provide a syndrome calculation over the received
- * data + syndrome and can call the second stage directly.
  *
+ * Each user must call init_rs to get a pointer to a rs_control structure
+ * for the given rs parameters. The control struct is unique per instance.
+ * It points to a codec which can be shared by multiple control structures.
+ * If a codec is newly allocated then the polynomial arrays for fast
+ * encoding / decoding are built. This can take some time so make sure not
+ * to call this function from a time critical path.  Usually a module /
+ * driver should initialize the necessary rs_control structure on module /
+ * driver init and release it on exit.
+ *
+ * The encoding puts the calculated syndrome into a given syndrome buffer.
+ *
+ * The decoding is a two step process. The first step calculates the
+ * syndrome over the received (data + syndrome) and calls the second stage,
+ * which does the decoding / error correction itself.  Many hw encoders
+ * provide a syndrome calculation over the received data + syndrome and can
+ * call the second stage directly.
  */
-
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -46,32 +37,44 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 
-/* This list holds all currently allocated rs control structures */
-static LIST_HEAD (rslist);
+enum {
+	RS_DECODE_LAMBDA,
+	RS_DECODE_SYN,
+	RS_DECODE_B,
+	RS_DECODE_T,
+	RS_DECODE_OMEGA,
+	RS_DECODE_ROOT,
+	RS_DECODE_REG,
+	RS_DECODE_LOC,
+	RS_DECODE_NUM_BUFFERS
+};
+
+/* This list holds all currently allocated rs codec structures */
+static LIST_HEAD(codec_list);
 /* Protection for the list */
 static DEFINE_MUTEX(rslistlock);
 
 /**
- * rs_init - Initialize a Reed-Solomon codec
+ * codec_init - Initialize a Reed-Solomon codec
  * @symsize:	symbol size, bits (1-8)
  * @gfpoly:	Field generator polynomial coefficients
  * @gffunc:	Field generator function
  * @fcr:	first root of RS code generator polynomial, index form
  * @prim:	primitive element to generate polynomial roots
  * @nroots:	RS code generator polynomial degree (number of roots)
+ * @gfp:	GFP_ flags for allocations
  *
- * Allocate a control structure and the polynom arrays for faster
+ * Allocate a codec structure and the polynom arrays for faster
  * en/decoding. Fill the arrays according to the given parameters.
  */
-static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int),
-                                  int fcr, int prim, int nroots)
+static struct rs_codec *codec_init(int symsize, int gfpoly, int (*gffunc)(int),
+				   int fcr, int prim, int nroots, gfp_t gfp)
 {
-	struct rs_control *rs;
 	int i, j, sr, root, iprim;
+	struct rs_codec *rs;
 
-	/* Allocate the control structure */
-	rs = kmalloc(sizeof (struct rs_control), GFP_KERNEL);
-	if (rs == NULL)
+	rs = kzalloc(sizeof(*rs), gfp);
+	if (!rs)
 		return NULL;
 
 	INIT_LIST_HEAD(&rs->list);
@@ -85,17 +88,17 @@ static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int),
 	rs->gffunc = gffunc;
 
 	/* Allocate the arrays */
-	rs->alpha_to = kmalloc(sizeof(uint16_t) * (rs->nn + 1), GFP_KERNEL);
+	rs->alpha_to = kmalloc_array(rs->nn + 1, sizeof(uint16_t), gfp);
 	if (rs->alpha_to == NULL)
-		goto errrs;
+		goto err;
 
-	rs->index_of = kmalloc(sizeof(uint16_t) * (rs->nn + 1), GFP_KERNEL);
+	rs->index_of = kmalloc_array(rs->nn + 1, sizeof(uint16_t), gfp);
 	if (rs->index_of == NULL)
-		goto erralp;
+		goto err;
 
-	rs->genpoly = kmalloc(sizeof(uint16_t) * (rs->nroots + 1), GFP_KERNEL);
+	rs->genpoly = kmalloc_array(rs->nroots + 1, sizeof(uint16_t), gfp);
 	if(rs->genpoly == NULL)
-		goto erridx;
+		goto err;
 
 	/* Generate Galois field lookup tables */
 	rs->index_of[0] = rs->nn;	/* log(zero) = -inf */
@@ -120,7 +123,7 @@ static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int),
 	}
 	/* If it's not primitive, exit */
 	if(sr != rs->alpha_to[0])
-		goto errpol;
+		goto err;
 
 	/* Find prim-th root of 1, used in decoding */
 	for(iprim = 1; (iprim % prim) != 0; iprim += rs->nn);
@@ -148,42 +151,52 @@ static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int),
 	/* convert rs->genpoly[] to index form for quicker encoding */
 	for (i = 0; i <= nroots; i++)
 		rs->genpoly[i] = rs->index_of[rs->genpoly[i]];
+
+	rs->users = 1;
+	list_add(&rs->list, &codec_list);
 	return rs;
 
-	/* Error exit */
-errpol:
+err:
 	kfree(rs->genpoly);
-erridx:
 	kfree(rs->index_of);
-erralp:
 	kfree(rs->alpha_to);
-errrs:
 	kfree(rs);
 	return NULL;
 }
 
 
 /**
- *  free_rs - Free the rs control structure, if it is no longer used
- *  @rs:	the control structure which is not longer used by the
+ *  free_rs - Free the rs control structure
+ *  @rs:	The control structure which is not longer used by the
  *		caller
+ *
+ * Free the control structure. If @rs is the last user of the associated
+ * codec, free the codec as well.
  */
 void free_rs(struct rs_control *rs)
 {
+	struct rs_codec *cd;
+
+	if (!rs)
+		return;
+
+	cd = rs->codec;
 	mutex_lock(&rslistlock);
-	rs->users--;
-	if(!rs->users) {
-		list_del(&rs->list);
-		kfree(rs->alpha_to);
-		kfree(rs->index_of);
-		kfree(rs->genpoly);
-		kfree(rs);
+	cd->users--;
+	if(!cd->users) {
+		list_del(&cd->list);
+		kfree(cd->alpha_to);
+		kfree(cd->index_of);
+		kfree(cd->genpoly);
+		kfree(cd);
 	}
 	mutex_unlock(&rslistlock);
+	kfree(rs);
 }
+EXPORT_SYMBOL_GPL(free_rs);
 
 /**
- * init_rs_internal - Find a matching or allocate a new rs control structure
+ * init_rs_internal - Allocate rs control, find a matching codec or allocate a new one
  *  @symsize:	the symbol size (number of bits)
  *  @gfpoly:	the extended Galois field generator polynomial coefficients,
  *		with the 0th coefficient in the low order bit. The polynomial
@@ -191,55 +204,69 @@ void free_rs(struct rs_control *rs)
  *  @gffunc:	pointer to function to generate the next field element,
  *		or the multiplicative identity element if given 0.  Used
  *		instead of gfpoly if gfpoly is 0
- *  @fcr:  	the first consecutive root of the rs code generator polynomial
+ *  @fcr:	the first consecutive root of the rs code generator polynomial
  *		in index form
  *  @prim:	primitive element to generate polynomial roots
  *  @nroots:	RS code generator polynomial degree (number of roots)
+ *  @gfp:	GFP_ flags for allocations
  */
 static struct rs_control *init_rs_internal(int symsize, int gfpoly,
-                                           int (*gffunc)(int), int fcr,
-                                           int prim, int nroots)
+					   int (*gffunc)(int), int fcr,
+					   int prim, int nroots, gfp_t gfp)
 {
-	struct list_head	*tmp;
-	struct rs_control	*rs;
+	struct list_head *tmp;
+	struct rs_control *rs;
+	unsigned int bsize;
 
 	/* Sanity checks */
 	if (symsize < 1)
 		return NULL;
 	if (fcr < 0 || fcr >= (1<<symsize))
-    		return NULL;
+		return NULL;
 	if (prim <= 0 || prim >= (1<<symsize))
-    		return NULL;
+		return NULL;
 	if (nroots < 0 || nroots >= (1<<symsize))
 		return NULL;
 
+	/*
+	 * The decoder needs buffers in each control struct instance to
+	 * avoid variable size or large fixed size allocations on
+	 * stack. Size the buffers to arrays of [nroots + 1].
+	 */
+	bsize = sizeof(uint16_t) * RS_DECODE_NUM_BUFFERS * (nroots + 1);
+	rs = kzalloc(sizeof(*rs) + bsize, gfp);
+	if (!rs)
+		return NULL;
+
 	mutex_lock(&rslistlock);
 
 	/* Walk through the list and look for a matching entry */
-	list_for_each(tmp, &rslist) {
-		rs = list_entry(tmp, struct rs_control, list);
-		if (symsize != rs->mm)
+	list_for_each(tmp, &codec_list) {
+		struct rs_codec *cd = list_entry(tmp, struct rs_codec, list);
+
+		if (symsize != cd->mm)
 			continue;
-		if (gfpoly != rs->gfpoly)
+		if (gfpoly != cd->gfpoly)
 			continue;
-		if (gffunc != rs->gffunc)
+		if (gffunc != cd->gffunc)
 			continue;
-		if (fcr != rs->fcr)
+		if (fcr != cd->fcr)
 			continue;
-		if (prim != rs->prim)
+		if (prim != cd->prim)
 			continue;
-		if (nroots != rs->nroots)
+		if (nroots != cd->nroots)
 			continue;
 		/* We have a matching one already */
-		rs->users++;
+		cd->users++;
+		rs->codec = cd;
 		goto out;
 	}
 
 	/* Create a new one */
-	rs = rs_init(symsize, gfpoly, gffunc, fcr, prim, nroots);
-	if (rs) {
-		rs->users = 1;
-		list_add(&rs->list, &rslist);
+	rs->codec = codec_init(symsize, gfpoly, gffunc, fcr, prim, nroots, gfp);
+	if (!rs->codec) {
+		kfree(rs);
+		rs = NULL;
 	}
 out:
 	mutex_unlock(&rslistlock);
@@ -247,45 +274,48 @@ out:
 }
 
 /**
- * init_rs - Find a matching or allocate a new rs control structure
+ * init_rs_gfp - Create a RS control struct and initialize it
  *  @symsize:	the symbol size (number of bits)
  *  @gfpoly:	the extended Galois field generator polynomial coefficients,
  *		with the 0th coefficient in the low order bit. The polynomial
  *		must be primitive;
- *  @fcr:  	the first consecutive root of the rs code generator polynomial
+ *  @fcr:	the first consecutive root of the rs code generator polynomial
  *		in index form
  *  @prim:	primitive element to generate polynomial roots
  *  @nroots:	RS code generator polynomial degree (number of roots)
+ *  @gfp:	GFP_ flags for allocations
  */
-struct rs_control *init_rs(int symsize, int gfpoly, int fcr, int prim,
-                           int nroots)
+struct rs_control *init_rs_gfp(int symsize, int gfpoly, int fcr, int prim,
+			       int nroots, gfp_t gfp)
 {
-	return init_rs_internal(symsize, gfpoly, NULL, fcr, prim, nroots);
+	return init_rs_internal(symsize, gfpoly, NULL, fcr, prim, nroots, gfp);
 }
+EXPORT_SYMBOL_GPL(init_rs_gfp);
 
 /**
- * init_rs_non_canonical - Find a matching or allocate a new rs control
- *                         structure, for fields with non-canonical
- *                         representation
+ * init_rs_non_canonical - Allocate rs control struct for fields with
+ *                         non-canonical representation
  *  @symsize:	the symbol size (number of bits)
  *  @gffunc:	pointer to function to generate the next field element,
  *		or the multiplicative identity element if given 0.  Used
  *		instead of gfpoly if gfpoly is 0
- *  @fcr:  	the first consecutive root of the rs code generator polynomial
+ *  @fcr:	the first consecutive root of the rs code generator polynomial
  *		in index form
  *  @prim:	primitive element to generate polynomial roots
  *  @nroots:	RS code generator polynomial degree (number of roots)
  */
 struct rs_control *init_rs_non_canonical(int symsize, int (*gffunc)(int),
-                                         int fcr, int prim, int nroots)
+					 int fcr, int prim, int nroots)
 {
-	return init_rs_internal(symsize, 0, gffunc, fcr, prim, nroots);
+	return init_rs_internal(symsize, 0, gffunc, fcr, prim, nroots,
+				GFP_KERNEL);
 }
+EXPORT_SYMBOL_GPL(init_rs_non_canonical);
 
 #ifdef CONFIG_REED_SOLOMON_ENC8
 /**
  *  encode_rs8 - Calculate the parity for data values (8bit data width)
- *  @rs:	the rs control structure
+ *  @rsc:	the rs control structure
  *  @data:	data field of a given type
  *  @len:	data length
  *  @par:	parity data, must be initialized by caller (usually all 0)
@@ -295,7 +325,7 @@ struct rs_control *init_rs_non_canonical(int symsize, int (*gffunc)(int),
  *  symbol size > 8. The calling code must take care of encoding of the
  *  syndrome result for storage itself.
  */
-int encode_rs8(struct rs_control *rs, uint8_t *data, int len, uint16_t *par,
+int encode_rs8(struct rs_control *rsc, uint8_t *data, int len, uint16_t *par,
 	       uint16_t invmsk)
 {
 #include "encode_rs.c"
@@ -306,7 +336,7 @@ EXPORT_SYMBOL_GPL(encode_rs8);
 #ifdef CONFIG_REED_SOLOMON_DEC8
 /**
  *  decode_rs8 - Decode codeword (8bit data width)
- *  @rs:	the rs control structure
+ *  @rsc:	the rs control structure
  *  @data:	data field of a given type
  *  @par:	received parity data field
  *  @len:	data length
@@ -319,9 +349,14 @@ EXPORT_SYMBOL_GPL(encode_rs8);
  *  The syndrome and parity uses a uint16_t data type to enable
  *  symbol size > 8. The calling code must take care of decoding of the
  *  syndrome result and the received parity before calling this code.
+ *
+ *  Note: The rs_control struct @rsc contains buffers which are used for
+ *  decoding, so the caller has to ensure that decoder invocations are
+ *  serialized.
+ *
  *  Returns the number of corrected bits or -EBADMSG for uncorrectable errors.
  */
-int decode_rs8(struct rs_control *rs, uint8_t *data, uint16_t *par, int len,
+int decode_rs8(struct rs_control *rsc, uint8_t *data, uint16_t *par, int len,
 	       uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
 	       uint16_t *corr)
 {
@@ -333,7 +368,7 @@ EXPORT_SYMBOL_GPL(decode_rs8);
 #ifdef CONFIG_REED_SOLOMON_ENC16
 /**
  *  encode_rs16 - Calculate the parity for data values (16bit data width)
- *  @rs:	the rs control structure
+ *  @rsc:	the rs control structure
  *  @data:	data field of a given type
  *  @len:	data length
  *  @par:	parity data, must be initialized by caller (usually all 0)
@@ -341,7 +376,7 @@ EXPORT_SYMBOL_GPL(decode_rs8);
  *
  *  Each field in the data array contains up to symbol size bits of valid data.
  */
-int encode_rs16(struct rs_control *rs, uint16_t *data, int len, uint16_t *par,
+int encode_rs16(struct rs_control *rsc, uint16_t *data, int len, uint16_t *par,
 	uint16_t invmsk)
 {
 #include "encode_rs.c"
@@ -352,7 +387,7 @@ EXPORT_SYMBOL_GPL(encode_rs16);
 #ifdef CONFIG_REED_SOLOMON_DEC16
 /**
  *  decode_rs16 - Decode codeword (16bit data width)
- *  @rs:	the rs control structure
+ *  @rsc:	the rs control structure
  *  @data:	data field of a given type
  *  @par:	received parity data field
  *  @len:	data length
@@ -363,9 +398,14 @@ EXPORT_SYMBOL_GPL(encode_rs16);
  *  @corr:	buffer to store correction bitmask on eras_pos
  *
  *  Each field in the data array contains up to symbol size bits of valid data.
+ *
+ *  Note: The rc_control struct @rsc contains buffers which are used for
+ *  decoding, so the caller has to ensure that decoder invocations are
+ *  serialized.
+ *
  *  Returns the number of corrected bits or -EBADMSG for uncorrectable errors.
  */
-int decode_rs16(struct rs_control *rs, uint16_t *data, uint16_t *par, int len,
+int decode_rs16(struct rs_control *rsc, uint16_t *data, uint16_t *par, int len,
 		uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
 		uint16_t *corr)
 {
@@ -374,10 +414,6 @@ int decode_rs16(struct rs_control *rs, uint16_t *data, uint16_t *par, int len,
 EXPORT_SYMBOL_GPL(decode_rs16);
 #endif
 
-EXPORT_SYMBOL_GPL(init_rs);
-EXPORT_SYMBOL_GPL(init_rs_non_canonical);
-EXPORT_SYMBOL_GPL(free_rs);
-
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Reed Solomon encoder/decoder");
 MODULE_AUTHOR("Phil Karn, Thomas Gleixner");
diff --git a/lib/refcount.c b/lib/refcount.c
index 0eb48353abe3..d3b81cefce91 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -350,3 +350,31 @@ bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock)
 }
 EXPORT_SYMBOL(refcount_dec_and_lock);
 
+/**
+ * refcount_dec_and_lock_irqsave - return holding spinlock with disabled
+ *                                 interrupts if able to decrement refcount to 0
+ * @r: the refcount
+ * @lock: the spinlock to be locked
+ * @flags: saved IRQ-flags if the is acquired
+ *
+ * Same as refcount_dec_and_lock() above except that the spinlock is acquired
+ * with disabled interupts.
+ *
+ * Return: true and hold spinlock if able to decrement refcount to 0, false
+ *         otherwise
+ */
+bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock,
+				   unsigned long *flags)
+{
+	if (refcount_dec_not_one(r))
+		return false;
+
+	spin_lock_irqsave(lock, *flags);
+	if (!refcount_dec_and_test(r)) {
+		spin_unlock_irqrestore(lock, *flags);
+		return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(refcount_dec_and_lock_irqsave);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 2b2b79974b61..9427b5766134 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -668,8 +668,9 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
  * For a completely stable walk you should construct your own data
  * structure outside the hash table.
  *
- * This function may sleep so you must not call it from interrupt
- * context or with spin locks held.
+ * This function may be called from any process context, including
+ * non-preemptable context, but cannot be called from softirq or
+ * hardirq context.
  *
  * You must call rhashtable_walk_exit after this function returns.
  */
@@ -726,6 +727,7 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
 	__acquires(RCU)
 {
 	struct rhashtable *ht = iter->ht;
+	bool rhlist = ht->rhlist;
 
 	rcu_read_lock();
 
@@ -734,11 +736,52 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
 		list_del(&iter->walker.list);
 	spin_unlock(&ht->lock);
 
-	if (!iter->walker.tbl && !iter->end_of_table) {
+	if (iter->end_of_table)
+		return 0;
+	if (!iter->walker.tbl) {
 		iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
+		iter->slot = 0;
+		iter->skip = 0;
 		return -EAGAIN;
 	}
 
+	if (iter->p && !rhlist) {
+		/*
+		 * We need to validate that 'p' is still in the table, and
+		 * if so, update 'skip'
+		 */
+		struct rhash_head *p;
+		int skip = 0;
+		rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+			skip++;
+			if (p == iter->p) {
+				iter->skip = skip;
+				goto found;
+			}
+		}
+		iter->p = NULL;
+	} else if (iter->p && rhlist) {
+		/* Need to validate that 'list' is still in the table, and
+		 * if so, update 'skip' and 'p'.
+		 */
+		struct rhash_head *p;
+		struct rhlist_head *list;
+		int skip = 0;
+		rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+			for (list = container_of(p, struct rhlist_head, rhead);
+			     list;
+			     list = rcu_dereference(list->next)) {
+				skip++;
+				if (list == iter->list) {
+					iter->p = p;
+					skip = skip;
+					goto found;
+				}
+			}
+		}
+		iter->p = NULL;
+	}
+found:
 	return 0;
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
@@ -914,8 +957,6 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
 		iter->walker.tbl = NULL;
 	spin_unlock(&ht->lock);
 
-	iter->p = NULL;
-
 out:
 	rcu_read_unlock();
 }
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index e6a9c06ec70c..fdd1b8aa8ac6 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -52,7 +52,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
 		return 0;
 	}
 
-	sb->map = kzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node);
+	sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node);
 	if (!sb->map)
 		return -ENOMEM;
 
@@ -270,18 +270,33 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
 }
 EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
 
-static unsigned int sbq_calc_wake_batch(unsigned int depth)
+static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
+					unsigned int depth)
 {
 	unsigned int wake_batch;
+	unsigned int shallow_depth;
 
 	/*
 	 * For each batch, we wake up one queue. We need to make sure that our
-	 * batch size is small enough that the full depth of the bitmap is
-	 * enough to wake up all of the queues.
+	 * batch size is small enough that the full depth of the bitmap,
+	 * potentially limited by a shallow depth, is enough to wake up all of
+	 * the queues.
+	 *
+	 * Each full word of the bitmap has bits_per_word bits, and there might
+	 * be a partial word. There are depth / bits_per_word full words and
+	 * depth % bits_per_word bits left over. In bitwise arithmetic:
+	 *
+	 * bits_per_word = 1 << shift
+	 * depth / bits_per_word = depth >> shift
+	 * depth % bits_per_word = depth & ((1 << shift) - 1)
+	 *
+	 * Each word can be limited to sbq->min_shallow_depth bits.
 	 */
-	wake_batch = SBQ_WAKE_BATCH;
-	if (wake_batch > depth / SBQ_WAIT_QUEUES)
-		wake_batch = max(1U, depth / SBQ_WAIT_QUEUES);
+	shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth);
+	depth = ((depth >> sbq->sb.shift) * shallow_depth +
+		 min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth));
+	wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1,
+			     SBQ_WAKE_BATCH);
 
 	return wake_batch;
 }
@@ -307,7 +322,8 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
 			*per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
 	}
 
-	sbq->wake_batch = sbq_calc_wake_batch(depth);
+	sbq->min_shallow_depth = UINT_MAX;
+	sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
 	atomic_set(&sbq->wake_index, 0);
 
 	sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
@@ -327,21 +343,28 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
 }
 EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
 
-void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
+static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+					    unsigned int depth)
 {
-	unsigned int wake_batch = sbq_calc_wake_batch(depth);
+	unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
 	int i;
 
 	if (sbq->wake_batch != wake_batch) {
 		WRITE_ONCE(sbq->wake_batch, wake_batch);
 		/*
-		 * Pairs with the memory barrier in sbq_wake_up() to ensure that
-		 * the batch size is updated before the wait counts.
+		 * Pairs with the memory barrier in sbitmap_queue_wake_up()
+		 * to ensure that the batch size is updated before the wait
+		 * counts.
 		 */
 		smp_mb__before_atomic();
 		for (i = 0; i < SBQ_WAIT_QUEUES; i++)
 			atomic_set(&sbq->ws[i].wait_cnt, 1);
 	}
+}
+
+void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
+{
+	sbitmap_queue_update_wake_batch(sbq, depth);
 	sbitmap_resize(&sbq->sb, depth);
 }
 EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
@@ -380,6 +403,8 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
 	unsigned int hint, depth;
 	int nr;
 
+	WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth);
+
 	hint = this_cpu_read(*sbq->alloc_hint);
 	depth = READ_ONCE(sbq->sb.depth);
 	if (unlikely(hint >= depth)) {
@@ -403,6 +428,14 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
 }
 EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow);
 
+void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
+				     unsigned int min_shallow_depth)
+{
+	sbq->min_shallow_depth = min_shallow_depth;
+	sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth);
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
+
 static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
 {
 	int i, wake_index;
@@ -425,52 +458,67 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
 	return NULL;
 }
 
-static void sbq_wake_up(struct sbitmap_queue *sbq)
+static bool __sbq_wake_up(struct sbitmap_queue *sbq)
 {
 	struct sbq_wait_state *ws;
 	unsigned int wake_batch;
 	int wait_cnt;
 
-	/*
-	 * Pairs with the memory barrier in set_current_state() to ensure the
-	 * proper ordering of clear_bit()/waitqueue_active() in the waker and
-	 * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
-	 * waiter. See the comment on waitqueue_active(). This is __after_atomic
-	 * because we just did clear_bit_unlock() in the caller.
-	 */
-	smp_mb__after_atomic();
-
 	ws = sbq_wake_ptr(sbq);
 	if (!ws)
-		return;
+		return false;
 
 	wait_cnt = atomic_dec_return(&ws->wait_cnt);
 	if (wait_cnt <= 0) {
+		int ret;
+
 		wake_batch = READ_ONCE(sbq->wake_batch);
+
 		/*
 		 * Pairs with the memory barrier in sbitmap_queue_resize() to
 		 * ensure that we see the batch size update before the wait
 		 * count is reset.
 		 */
 		smp_mb__before_atomic();
+
 		/*
-		 * If there are concurrent callers to sbq_wake_up(), the last
-		 * one to decrement the wait count below zero will bump it back
-		 * up. If there is a concurrent resize, the count reset will
-		 * either cause the cmpxchg to fail or overwrite after the
-		 * cmpxchg.
+		 * For concurrent callers of this, the one that failed the
+		 * atomic_cmpxhcg() race should call this function again
+		 * to wakeup a new batch on a different 'ws'.
 		 */
-		atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch);
-		sbq_index_atomic_inc(&sbq->wake_index);
-		wake_up_nr(&ws->wait, wake_batch);
+		ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch);
+		if (ret == wait_cnt) {
+			sbq_index_atomic_inc(&sbq->wake_index);
+			wake_up_nr(&ws->wait, wake_batch);
+			return false;
+		}
+
+		return true;
 	}
+
+	return false;
+}
+
+void sbitmap_queue_wake_up(struct sbitmap_queue *sbq)
+{
+	while (__sbq_wake_up(sbq))
+		;
 }
+EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
 
 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
 			 unsigned int cpu)
 {
 	sbitmap_clear_bit_unlock(&sbq->sb, nr);
-	sbq_wake_up(sbq);
+	/*
+	 * Pairs with the memory barrier in set_current_state() to ensure the
+	 * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
+	 * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
+	 * waiter. See the comment on waitqueue_active().
+	 */
+	smp_mb__after_atomic();
+	sbitmap_queue_wake_up(sbq);
+
 	if (likely(!sbq->round_robin && nr < sbq->sb.depth))
 		*per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
 }
@@ -482,7 +530,7 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
 
 	/*
 	 * Pairs with the memory barrier in set_current_state() like in
-	 * sbq_wake_up().
+	 * sbitmap_queue_wake_up().
 	 */
 	smp_mb();
 	wake_index = atomic_read(&sbq->wake_index);
@@ -528,5 +576,6 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
 	seq_puts(m, "}\n");
 
 	seq_printf(m, "round_robin=%d\n", sbq->round_robin);
+	seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth);
 }
 EXPORT_SYMBOL_GPL(sbitmap_queue_show);
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 06dad7a072fd..7c6096a71704 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -24,9 +24,6 @@
  **/
 struct scatterlist *sg_next(struct scatterlist *sg)
 {
-#ifdef CONFIG_DEBUG_SG
-	BUG_ON(sg->sg_magic != SG_MAGIC);
-#endif
 	if (sg_is_last(sg))
 		return NULL;
 
@@ -111,10 +108,7 @@ struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
 	for_each_sg(sgl, sg, nents, i)
 		ret = sg;
 
-#ifdef CONFIG_DEBUG_SG
-	BUG_ON(sgl[0].sg_magic != SG_MAGIC);
 	BUG_ON(!sg_is_last(ret));
-#endif
 	return ret;
 }
 EXPORT_SYMBOL(sg_last);
@@ -170,7 +164,8 @@ static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
 		kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
 		return ptr;
 	} else
-		return kmalloc(nents * sizeof(struct scatterlist), gfp_mask);
+		return kmalloc_array(nents, sizeof(struct scatterlist),
+				     gfp_mask);
 }
 
 static void sg_kfree(struct scatterlist *sg, unsigned int nents)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
deleted file mode 100644
index cc640588f145..000000000000
--- a/lib/swiotlb.c
+++ /dev/null
@@ -1,1092 +0,0 @@
-/*
- * Dynamic DMA mapping support.
- *
- * This implementation is a fallback for platforms that do not support
- * I/O TLBs (aka DMA address translation hardware).
- * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
- * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
- * Copyright (C) 2000, 2003 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 03/05/07 davidm	Switch from PCI-DMA to generic device DMA API.
- * 00/12/13 davidm	Rename to swiotlb.c and add mark_clean() to avoid
- *			unnecessary i-cache flushing.
- * 04/07/.. ak		Better overflow handling. Assorted fixes.
- * 05/09/10 linville	Add support for syncing ranges, support syncing for
- *			DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
- * 08/12/11 beckyb	Add highmem support
- */
-
-#include <linux/cache.h>
-#include <linux/dma-direct.h>
-#include <linux/mm.h>
-#include <linux/export.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/swiotlb.h>
-#include <linux/pfn.h>
-#include <linux/types.h>
-#include <linux/ctype.h>
-#include <linux/highmem.h>
-#include <linux/gfp.h>
-#include <linux/scatterlist.h>
-#include <linux/mem_encrypt.h>
-#include <linux/set_memory.h>
-
-#include <asm/io.h>
-#include <asm/dma.h>
-
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/iommu-helper.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/swiotlb.h>
-
-#define OFFSET(val,align) ((unsigned long)	\
-	                   ( (val) & ( (align) - 1)))
-
-#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
-
-/*
- * Minimum IO TLB size to bother booting with.  Systems with mainly
- * 64bit capable cards will only lightly use the swiotlb.  If we can't
- * allocate a contiguous 1MB, we're probably in trouble anyway.
- */
-#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-
-enum swiotlb_force swiotlb_force;
-
-/*
- * Used to do a quick range check in swiotlb_tbl_unmap_single and
- * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
- * API.
- */
-static phys_addr_t io_tlb_start, io_tlb_end;
-
-/*
- * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
- * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
- */
-static unsigned long io_tlb_nslabs;
-
-/*
- * When the IOMMU overflows we return a fallback buffer. This sets the size.
- */
-static unsigned long io_tlb_overflow = 32*1024;
-
-static phys_addr_t io_tlb_overflow_buffer;
-
-/*
- * This is a free list describing the number of free entries available from
- * each index
- */
-static unsigned int *io_tlb_list;
-static unsigned int io_tlb_index;
-
-/*
- * Max segment that we can provide which (if pages are contingous) will
- * not be bounced (unless SWIOTLB_FORCE is set).
- */
-unsigned int max_segment;
-
-/*
- * We need to save away the original address corresponding to a mapped entry
- * for the sync operations.
- */
-#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
-static phys_addr_t *io_tlb_orig_addr;
-
-/*
- * Protect the above data structures in the map and unmap calls
- */
-static DEFINE_SPINLOCK(io_tlb_lock);
-
-static int late_alloc;
-
-static int __init
-setup_io_tlb_npages(char *str)
-{
-	if (isdigit(*str)) {
-		io_tlb_nslabs = simple_strtoul(str, &str, 0);
-		/* avoid tail segment of size < IO_TLB_SEGSIZE */
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-	}
-	if (*str == ',')
-		++str;
-	if (!strcmp(str, "force")) {
-		swiotlb_force = SWIOTLB_FORCE;
-	} else if (!strcmp(str, "noforce")) {
-		swiotlb_force = SWIOTLB_NO_FORCE;
-		io_tlb_nslabs = 1;
-	}
-
-	return 0;
-}
-early_param("swiotlb", setup_io_tlb_npages);
-/* make io_tlb_overflow tunable too? */
-
-unsigned long swiotlb_nr_tbl(void)
-{
-	return io_tlb_nslabs;
-}
-EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
-
-unsigned int swiotlb_max_segment(void)
-{
-	return max_segment;
-}
-EXPORT_SYMBOL_GPL(swiotlb_max_segment);
-
-void swiotlb_set_max_segment(unsigned int val)
-{
-	if (swiotlb_force == SWIOTLB_FORCE)
-		max_segment = 1;
-	else
-		max_segment = rounddown(val, PAGE_SIZE);
-}
-
-/* default to 64MB */
-#define IO_TLB_DEFAULT_SIZE (64UL<<20)
-unsigned long swiotlb_size_or_default(void)
-{
-	unsigned long size;
-
-	size = io_tlb_nslabs << IO_TLB_SHIFT;
-
-	return size ? size : (IO_TLB_DEFAULT_SIZE);
-}
-
-static bool no_iotlb_memory;
-
-void swiotlb_print_info(void)
-{
-	unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-	unsigned char *vstart, *vend;
-
-	if (no_iotlb_memory) {
-		pr_warn("software IO TLB: No low mem\n");
-		return;
-	}
-
-	vstart = phys_to_virt(io_tlb_start);
-	vend = phys_to_virt(io_tlb_end);
-
-	printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n",
-	       (unsigned long long)io_tlb_start,
-	       (unsigned long long)io_tlb_end,
-	       bytes >> 20, vstart, vend - 1);
-}
-
-/*
- * Early SWIOTLB allocation may be too early to allow an architecture to
- * perform the desired operations.  This function allows the architecture to
- * call SWIOTLB when the operations are possible.  It needs to be called
- * before the SWIOTLB memory is used.
- */
-void __init swiotlb_update_mem_attributes(void)
-{
-	void *vaddr;
-	unsigned long bytes;
-
-	if (no_iotlb_memory || late_alloc)
-		return;
-
-	vaddr = phys_to_virt(io_tlb_start);
-	bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
-	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
-	memset(vaddr, 0, bytes);
-
-	vaddr = phys_to_virt(io_tlb_overflow_buffer);
-	bytes = PAGE_ALIGN(io_tlb_overflow);
-	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
-	memset(vaddr, 0, bytes);
-}
-
-int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
-{
-	void *v_overflow_buffer;
-	unsigned long i, bytes;
-
-	bytes = nslabs << IO_TLB_SHIFT;
-
-	io_tlb_nslabs = nslabs;
-	io_tlb_start = __pa(tlb);
-	io_tlb_end = io_tlb_start + bytes;
-
-	/*
-	 * Get the overflow emergency buffer
-	 */
-	v_overflow_buffer = memblock_virt_alloc_low_nopanic(
-						PAGE_ALIGN(io_tlb_overflow),
-						PAGE_SIZE);
-	if (!v_overflow_buffer)
-		return -ENOMEM;
-
-	io_tlb_overflow_buffer = __pa(v_overflow_buffer);
-
-	/*
-	 * Allocate and initialize the free list array.  This array is used
-	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-	 * between io_tlb_start and io_tlb_end.
-	 */
-	io_tlb_list = memblock_virt_alloc(
-				PAGE_ALIGN(io_tlb_nslabs * sizeof(int)),
-				PAGE_SIZE);
-	io_tlb_orig_addr = memblock_virt_alloc(
-				PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)),
-				PAGE_SIZE);
-	for (i = 0; i < io_tlb_nslabs; i++) {
-		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
-	}
-	io_tlb_index = 0;
-
-	if (verbose)
-		swiotlb_print_info();
-
-	swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
-	return 0;
-}
-
-/*
- * Statically reserve bounce buffer space and initialize bounce buffer data
- * structures for the software IO TLB used to implement the DMA API.
- */
-void  __init
-swiotlb_init(int verbose)
-{
-	size_t default_size = IO_TLB_DEFAULT_SIZE;
-	unsigned char *vstart;
-	unsigned long bytes;
-
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-	}
-
-	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-
-	/* Get IO TLB memory from the low pages */
-	vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
-	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
-		return;
-
-	if (io_tlb_start)
-		memblock_free_early(io_tlb_start,
-				    PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
-	pr_warn("Cannot allocate SWIOTLB buffer");
-	no_iotlb_memory = true;
-}
-
-/*
- * Systems with larger DMA zones (those that don't support ISA) can
- * initialize the swiotlb later using the slab allocator if needed.
- * This should be just like above, but with some error catching.
- */
-int
-swiotlb_late_init_with_default_size(size_t default_size)
-{
-	unsigned long bytes, req_nslabs = io_tlb_nslabs;
-	unsigned char *vstart = NULL;
-	unsigned int order;
-	int rc = 0;
-
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-	}
-
-	/*
-	 * Get IO TLB memory from the low pages
-	 */
-	order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
-	io_tlb_nslabs = SLABS_PER_PAGE << order;
-	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-
-	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-		vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
-						  order);
-		if (vstart)
-			break;
-		order--;
-	}
-
-	if (!vstart) {
-		io_tlb_nslabs = req_nslabs;
-		return -ENOMEM;
-	}
-	if (order != get_order(bytes)) {
-		printk(KERN_WARNING "Warning: only able to allocate %ld MB "
-		       "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
-		io_tlb_nslabs = SLABS_PER_PAGE << order;
-	}
-	rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
-	if (rc)
-		free_pages((unsigned long)vstart, order);
-
-	return rc;
-}
-
-int
-swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
-{
-	unsigned long i, bytes;
-	unsigned char *v_overflow_buffer;
-
-	bytes = nslabs << IO_TLB_SHIFT;
-
-	io_tlb_nslabs = nslabs;
-	io_tlb_start = virt_to_phys(tlb);
-	io_tlb_end = io_tlb_start + bytes;
-
-	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
-	memset(tlb, 0, bytes);
-
-	/*
-	 * Get the overflow emergency buffer
-	 */
-	v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
-						     get_order(io_tlb_overflow));
-	if (!v_overflow_buffer)
-		goto cleanup2;
-
-	set_memory_decrypted((unsigned long)v_overflow_buffer,
-			io_tlb_overflow >> PAGE_SHIFT);
-	memset(v_overflow_buffer, 0, io_tlb_overflow);
-	io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
-
-	/*
-	 * Allocate and initialize the free list array.  This array is used
-	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-	 * between io_tlb_start and io_tlb_end.
-	 */
-	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
-	                              get_order(io_tlb_nslabs * sizeof(int)));
-	if (!io_tlb_list)
-		goto cleanup3;
-
-	io_tlb_orig_addr = (phys_addr_t *)
-		__get_free_pages(GFP_KERNEL,
-				 get_order(io_tlb_nslabs *
-					   sizeof(phys_addr_t)));
-	if (!io_tlb_orig_addr)
-		goto cleanup4;
-
-	for (i = 0; i < io_tlb_nslabs; i++) {
-		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
-	}
-	io_tlb_index = 0;
-
-	swiotlb_print_info();
-
-	late_alloc = 1;
-
-	swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
-
-	return 0;
-
-cleanup4:
-	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
-	                                                 sizeof(int)));
-	io_tlb_list = NULL;
-cleanup3:
-	free_pages((unsigned long)v_overflow_buffer,
-		   get_order(io_tlb_overflow));
-	io_tlb_overflow_buffer = 0;
-cleanup2:
-	io_tlb_end = 0;
-	io_tlb_start = 0;
-	io_tlb_nslabs = 0;
-	max_segment = 0;
-	return -ENOMEM;
-}
-
-void __init swiotlb_exit(void)
-{
-	if (!io_tlb_orig_addr)
-		return;
-
-	if (late_alloc) {
-		free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
-			   get_order(io_tlb_overflow));
-		free_pages((unsigned long)io_tlb_orig_addr,
-			   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
-		free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
-								 sizeof(int)));
-		free_pages((unsigned long)phys_to_virt(io_tlb_start),
-			   get_order(io_tlb_nslabs << IO_TLB_SHIFT));
-	} else {
-		memblock_free_late(io_tlb_overflow_buffer,
-				   PAGE_ALIGN(io_tlb_overflow));
-		memblock_free_late(__pa(io_tlb_orig_addr),
-				   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
-		memblock_free_late(__pa(io_tlb_list),
-				   PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
-		memblock_free_late(io_tlb_start,
-				   PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
-	}
-	io_tlb_nslabs = 0;
-	max_segment = 0;
-}
-
-int is_swiotlb_buffer(phys_addr_t paddr)
-{
-	return paddr >= io_tlb_start && paddr < io_tlb_end;
-}
-
-/*
- * Bounce: copy the swiotlb buffer back to the original dma location
- */
-static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
-			   size_t size, enum dma_data_direction dir)
-{
-	unsigned long pfn = PFN_DOWN(orig_addr);
-	unsigned char *vaddr = phys_to_virt(tlb_addr);
-
-	if (PageHighMem(pfn_to_page(pfn))) {
-		/* The buffer does not have a mapping.  Map it in and copy */
-		unsigned int offset = orig_addr & ~PAGE_MASK;
-		char *buffer;
-		unsigned int sz = 0;
-		unsigned long flags;
-
-		while (size) {
-			sz = min_t(size_t, PAGE_SIZE - offset, size);
-
-			local_irq_save(flags);
-			buffer = kmap_atomic(pfn_to_page(pfn));
-			if (dir == DMA_TO_DEVICE)
-				memcpy(vaddr, buffer + offset, sz);
-			else
-				memcpy(buffer + offset, vaddr, sz);
-			kunmap_atomic(buffer);
-			local_irq_restore(flags);
-
-			size -= sz;
-			pfn++;
-			vaddr += sz;
-			offset = 0;
-		}
-	} else if (dir == DMA_TO_DEVICE) {
-		memcpy(vaddr, phys_to_virt(orig_addr), size);
-	} else {
-		memcpy(phys_to_virt(orig_addr), vaddr, size);
-	}
-}
-
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
-				   dma_addr_t tbl_dma_addr,
-				   phys_addr_t orig_addr, size_t size,
-				   enum dma_data_direction dir,
-				   unsigned long attrs)
-{
-	unsigned long flags;
-	phys_addr_t tlb_addr;
-	unsigned int nslots, stride, index, wrap;
-	int i;
-	unsigned long mask;
-	unsigned long offset_slots;
-	unsigned long max_slots;
-
-	if (no_iotlb_memory)
-		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
-
-	if (mem_encrypt_active())
-		pr_warn_once("%s is active and system is using DMA bounce buffers\n",
-			     sme_active() ? "SME" : "SEV");
-
-	mask = dma_get_seg_boundary(hwdev);
-
-	tbl_dma_addr &= mask;
-
-	offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-
-	/*
- 	 * Carefully handle integer overflow which can occur when mask == ~0UL.
- 	 */
-	max_slots = mask + 1
-		    ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
-		    : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
-
-	/*
-	 * For mappings greater than or equal to a page, we limit the stride
-	 * (and hence alignment) to a page size.
-	 */
-	nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	if (size >= PAGE_SIZE)
-		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
-	else
-		stride = 1;
-
-	BUG_ON(!nslots);
-
-	/*
-	 * Find suitable number of IO TLB entries size that will fit this
-	 * request and allocate a buffer from that IO TLB pool.
-	 */
-	spin_lock_irqsave(&io_tlb_lock, flags);
-	index = ALIGN(io_tlb_index, stride);
-	if (index >= io_tlb_nslabs)
-		index = 0;
-	wrap = index;
-
-	do {
-		while (iommu_is_span_boundary(index, nslots, offset_slots,
-					      max_slots)) {
-			index += stride;
-			if (index >= io_tlb_nslabs)
-				index = 0;
-			if (index == wrap)
-				goto not_found;
-		}
-
-		/*
-		 * If we find a slot that indicates we have 'nslots' number of
-		 * contiguous buffers, we allocate the buffers from that slot
-		 * and mark the entries as '0' indicating unavailable.
-		 */
-		if (io_tlb_list[index] >= nslots) {
-			int count = 0;
-
-			for (i = index; i < (int) (index + nslots); i++)
-				io_tlb_list[i] = 0;
-			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
-				io_tlb_list[i] = ++count;
-			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-
-			/*
-			 * Update the indices to avoid searching in the next
-			 * round.
-			 */
-			io_tlb_index = ((index + nslots) < io_tlb_nslabs
-					? (index + nslots) : 0);
-
-			goto found;
-		}
-		index += stride;
-		if (index >= io_tlb_nslabs)
-			index = 0;
-	} while (index != wrap);
-
-not_found:
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
-	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
-		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size);
-	return SWIOTLB_MAP_ERROR;
-found:
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
-
-	/*
-	 * Save away the mapping from the original address to the DMA address.
-	 * This is needed when we sync the memory.  Then we sync the buffer if
-	 * needed.
-	 */
-	for (i = 0; i < nslots; i++)
-		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
-
-	return tlb_addr;
-}
-
-/*
- * Allocates bounce buffer and returns its kernel virtual address.
- */
-
-static phys_addr_t
-map_single(struct device *hwdev, phys_addr_t phys, size_t size,
-	   enum dma_data_direction dir, unsigned long attrs)
-{
-	dma_addr_t start_dma_addr;
-
-	if (swiotlb_force == SWIOTLB_NO_FORCE) {
-		dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n",
-				     &phys);
-		return SWIOTLB_MAP_ERROR;
-	}
-
-	start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
-	return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
-				      dir, attrs);
-}
-
-/*
- * dma_addr is the kernel virtual address of the bounce buffer to unmap.
- */
-void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
-			      size_t size, enum dma_data_direction dir,
-			      unsigned long attrs)
-{
-	unsigned long flags;
-	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	phys_addr_t orig_addr = io_tlb_orig_addr[index];
-
-	/*
-	 * First, sync the memory before unmapping the entry
-	 */
-	if (orig_addr != INVALID_PHYS_ADDR &&
-	    !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-	    ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
-
-	/*
-	 * Return the buffer to the free list by setting the corresponding
-	 * entries to indicate the number of contiguous entries available.
-	 * While returning the entries to the free list, we merge the entries
-	 * with slots below and above the pool being returned.
-	 */
-	spin_lock_irqsave(&io_tlb_lock, flags);
-	{
-		count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
-			 io_tlb_list[index + nslots] : 0);
-		/*
-		 * Step 1: return the slots to the free list, merging the
-		 * slots with superceeding slots
-		 */
-		for (i = index + nslots - 1; i >= index; i--) {
-			io_tlb_list[i] = ++count;
-			io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
-		}
-		/*
-		 * Step 2: merge the returned slots with the preceding slots,
-		 * if available (non zero)
-		 */
-		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
-			io_tlb_list[i] = ++count;
-	}
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
-}
-
-void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
-			     size_t size, enum dma_data_direction dir,
-			     enum dma_sync_target target)
-{
-	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	phys_addr_t orig_addr = io_tlb_orig_addr[index];
-
-	if (orig_addr == INVALID_PHYS_ADDR)
-		return;
-	orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
-
-	switch (target) {
-	case SYNC_FOR_CPU:
-		if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
-			swiotlb_bounce(orig_addr, tlb_addr,
-				       size, DMA_FROM_DEVICE);
-		else
-			BUG_ON(dir != DMA_TO_DEVICE);
-		break;
-	case SYNC_FOR_DEVICE:
-		if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-			swiotlb_bounce(orig_addr, tlb_addr,
-				       size, DMA_TO_DEVICE);
-		else
-			BUG_ON(dir != DMA_FROM_DEVICE);
-		break;
-	default:
-		BUG();
-	}
-}
-
-#ifdef CONFIG_DMA_DIRECT_OPS
-static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
-		size_t size)
-{
-	u64 mask = DMA_BIT_MASK(32);
-
-	if (dev && dev->coherent_dma_mask)
-		mask = dev->coherent_dma_mask;
-	return addr + size - 1 <= mask;
-}
-
-static void *
-swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		unsigned long attrs)
-{
-	phys_addr_t phys_addr;
-
-	if (swiotlb_force == SWIOTLB_NO_FORCE)
-		goto out_warn;
-
-	phys_addr = swiotlb_tbl_map_single(dev,
-			__phys_to_dma(dev, io_tlb_start),
-			0, size, DMA_FROM_DEVICE, attrs);
-	if (phys_addr == SWIOTLB_MAP_ERROR)
-		goto out_warn;
-
-	*dma_handle = __phys_to_dma(dev, phys_addr);
-	if (!dma_coherent_ok(dev, *dma_handle, size))
-		goto out_unmap;
-
-	memset(phys_to_virt(phys_addr), 0, size);
-	return phys_to_virt(phys_addr);
-
-out_unmap:
-	dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
-		(unsigned long long)(dev ? dev->coherent_dma_mask : 0),
-		(unsigned long long)*dma_handle);
-
-	/*
-	 * DMA_TO_DEVICE to avoid memcpy in unmap_single.
-	 * DMA_ATTR_SKIP_CPU_SYNC is optional.
-	 */
-	swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
-			DMA_ATTR_SKIP_CPU_SYNC);
-out_warn:
-	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
-		dev_warn(dev,
-			"swiotlb: coherent allocation failed, size=%zu\n",
-			size);
-		dump_stack();
-	}
-	return NULL;
-}
-
-static bool swiotlb_free_buffer(struct device *dev, size_t size,
-		dma_addr_t dma_addr)
-{
-	phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
-
-	WARN_ON_ONCE(irqs_disabled());
-
-	if (!is_swiotlb_buffer(phys_addr))
-		return false;
-
-	/*
-	 * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
-	 * DMA_ATTR_SKIP_CPU_SYNC is optional.
-	 */
-	swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
-				 DMA_ATTR_SKIP_CPU_SYNC);
-	return true;
-}
-#endif
-
-static void
-swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
-	     int do_panic)
-{
-	if (swiotlb_force == SWIOTLB_NO_FORCE)
-		return;
-
-	/*
-	 * Ran out of IOMMU space for this operation. This is very bad.
-	 * Unfortunately the drivers cannot handle this operation properly.
-	 * unless they check for dma_mapping_error (most don't)
-	 * When the mapping is small enough return a static buffer to limit
-	 * the damage, or panic when the transfer is too big.
-	 */
-	dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n",
-			    size);
-
-	if (size <= io_tlb_overflow || !do_panic)
-		return;
-
-	if (dir == DMA_BIDIRECTIONAL)
-		panic("DMA: Random memory could be DMA accessed\n");
-	if (dir == DMA_FROM_DEVICE)
-		panic("DMA: Random memory could be DMA written\n");
-	if (dir == DMA_TO_DEVICE)
-		panic("DMA: Random memory could be DMA read\n");
-}
-
-/*
- * Map a single buffer of the indicated size for DMA in streaming mode.  The
- * physical address to use is returned.
- *
- * Once the device is given the dma address, the device owns this memory until
- * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
- */
-dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
-			    unsigned long offset, size_t size,
-			    enum dma_data_direction dir,
-			    unsigned long attrs)
-{
-	phys_addr_t map, phys = page_to_phys(page) + offset;
-	dma_addr_t dev_addr = phys_to_dma(dev, phys);
-
-	BUG_ON(dir == DMA_NONE);
-	/*
-	 * If the address happens to be in the device's DMA window,
-	 * we can safely return the device addr and not worry about bounce
-	 * buffering it.
-	 */
-	if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE)
-		return dev_addr;
-
-	trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
-
-	/* Oh well, have to allocate and map a bounce buffer. */
-	map = map_single(dev, phys, size, dir, attrs);
-	if (map == SWIOTLB_MAP_ERROR) {
-		swiotlb_full(dev, size, dir, 1);
-		return __phys_to_dma(dev, io_tlb_overflow_buffer);
-	}
-
-	dev_addr = __phys_to_dma(dev, map);
-
-	/* Ensure that the address returned is DMA'ble */
-	if (dma_capable(dev, dev_addr, size))
-		return dev_addr;
-
-	attrs |= DMA_ATTR_SKIP_CPU_SYNC;
-	swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
-
-	return __phys_to_dma(dev, io_tlb_overflow_buffer);
-}
-
-/*
- * Unmap a single streaming mode DMA translation.  The dma_addr and size must
- * match what was provided for in a previous swiotlb_map_page call.  All
- * other usages are undefined.
- *
- * After this call, reads by the cpu to the buffer are guaranteed to see
- * whatever the device wrote there.
- */
-static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
-			 size_t size, enum dma_data_direction dir,
-			 unsigned long attrs)
-{
-	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
-
-	BUG_ON(dir == DMA_NONE);
-
-	if (is_swiotlb_buffer(paddr)) {
-		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
-		return;
-	}
-
-	if (dir != DMA_FROM_DEVICE)
-		return;
-
-	/*
-	 * phys_to_virt doesn't work with hihgmem page but we could
-	 * call dma_mark_clean() with hihgmem page here. However, we
-	 * are fine since dma_mark_clean() is null on POWERPC. We can
-	 * make dma_mark_clean() take a physical address if necessary.
-	 */
-	dma_mark_clean(phys_to_virt(paddr), size);
-}
-
-void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
-			size_t size, enum dma_data_direction dir,
-			unsigned long attrs)
-{
-	unmap_single(hwdev, dev_addr, size, dir, attrs);
-}
-
-/*
- * Make physical memory consistent for a single streaming mode DMA translation
- * after a transfer.
- *
- * If you perform a swiotlb_map_page() but wish to interrogate the buffer
- * using the cpu, yet do not wish to teardown the dma mapping, you must
- * call this function before doing so.  At the next point you give the dma
- * address back to the card, you must first perform a
- * swiotlb_dma_sync_for_device, and then the device again owns the buffer
- */
-static void
-swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
-		    size_t size, enum dma_data_direction dir,
-		    enum dma_sync_target target)
-{
-	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
-
-	BUG_ON(dir == DMA_NONE);
-
-	if (is_swiotlb_buffer(paddr)) {
-		swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
-		return;
-	}
-
-	if (dir != DMA_FROM_DEVICE)
-		return;
-
-	dma_mark_clean(phys_to_virt(paddr), size);
-}
-
-void
-swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-			    size_t size, enum dma_data_direction dir)
-{
-	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
-}
-
-void
-swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
-			       size_t size, enum dma_data_direction dir)
-{
-	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
-}
-
-/*
- * Map a set of buffers described by scatterlist in streaming mode for DMA.
- * This is the scatter-gather version of the above swiotlb_map_page
- * interface.  Here the scatter gather list elements are each tagged with the
- * appropriate dma address and length.  They are obtained via
- * sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- *       DMA address/length pairs than there are SG table elements.
- *       (for example via virtual mapping capabilities)
- *       The routine returns the number of addr/length pairs actually
- *       used, at most nents.
- *
- * Device ownership issues as mentioned above for swiotlb_map_page are the
- * same here.
- */
-int
-swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
-		     enum dma_data_direction dir, unsigned long attrs)
-{
-	struct scatterlist *sg;
-	int i;
-
-	BUG_ON(dir == DMA_NONE);
-
-	for_each_sg(sgl, sg, nelems, i) {
-		phys_addr_t paddr = sg_phys(sg);
-		dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
-
-		if (swiotlb_force == SWIOTLB_FORCE ||
-		    !dma_capable(hwdev, dev_addr, sg->length)) {
-			phys_addr_t map = map_single(hwdev, sg_phys(sg),
-						     sg->length, dir, attrs);
-			if (map == SWIOTLB_MAP_ERROR) {
-				/* Don't panic here, we expect map_sg users
-				   to do proper error handling. */
-				swiotlb_full(hwdev, sg->length, dir, 0);
-				attrs |= DMA_ATTR_SKIP_CPU_SYNC;
-				swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
-						       attrs);
-				sg_dma_len(sgl) = 0;
-				return 0;
-			}
-			sg->dma_address = __phys_to_dma(hwdev, map);
-		} else
-			sg->dma_address = dev_addr;
-		sg_dma_len(sg) = sg->length;
-	}
-	return nelems;
-}
-
-/*
- * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
- * concerning calls here are the same as for swiotlb_unmap_page() above.
- */
-void
-swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
-		       int nelems, enum dma_data_direction dir,
-		       unsigned long attrs)
-{
-	struct scatterlist *sg;
-	int i;
-
-	BUG_ON(dir == DMA_NONE);
-
-	for_each_sg(sgl, sg, nelems, i)
-		unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
-			     attrs);
-}
-
-/*
- * Make physical memory consistent for a set of streaming mode DMA translations
- * after a transfer.
- *
- * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
- * and usage.
- */
-static void
-swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
-		int nelems, enum dma_data_direction dir,
-		enum dma_sync_target target)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nelems, i)
-		swiotlb_sync_single(hwdev, sg->dma_address,
-				    sg_dma_len(sg), dir, target);
-}
-
-void
-swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
-			int nelems, enum dma_data_direction dir)
-{
-	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
-}
-
-void
-swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
-			   int nelems, enum dma_data_direction dir)
-{
-	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
-}
-
-int
-swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
-{
-	return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
-}
-
-/*
- * Return whether the given device DMA address mask can be supported
- * properly.  For example, if your device can only drive the low 24-bits
- * during bus mastering, then you would pass 0x00ffffff as the mask to
- * this function.
- */
-int
-swiotlb_dma_supported(struct device *hwdev, u64 mask)
-{
-	return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
-}
-
-#ifdef CONFIG_DMA_DIRECT_OPS
-void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		gfp_t gfp, unsigned long attrs)
-{
-	void *vaddr;
-
-	/* temporary workaround: */
-	if (gfp & __GFP_NOWARN)
-		attrs |= DMA_ATTR_NO_WARN;
-
-	/*
-	 * Don't print a warning when the first allocation attempt fails.
-	 * swiotlb_alloc_coherent() will print a warning when the DMA memory
-	 * allocation ultimately failed.
-	 */
-	gfp |= __GFP_NOWARN;
-
-	vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
-	if (!vaddr)
-		vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs);
-	return vaddr;
-}
-
-void swiotlb_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_addr, unsigned long attrs)
-{
-	if (!swiotlb_free_buffer(dev, size, dma_addr))
-		dma_direct_free(dev, size, vaddr, dma_addr, attrs);
-}
-
-const struct dma_map_ops swiotlb_dma_ops = {
-	.mapping_error		= swiotlb_dma_mapping_error,
-	.alloc			= swiotlb_alloc,
-	.free			= swiotlb_free,
-	.sync_single_for_cpu	= swiotlb_sync_single_for_cpu,
-	.sync_single_for_device	= swiotlb_sync_single_for_device,
-	.sync_sg_for_cpu	= swiotlb_sync_sg_for_cpu,
-	.sync_sg_for_device	= swiotlb_sync_sg_for_device,
-	.map_sg			= swiotlb_map_sg_attrs,
-	.unmap_sg		= swiotlb_unmap_sg_attrs,
-	.map_page		= swiotlb_map_page,
-	.unmap_page		= swiotlb_unmap_page,
-	.dma_supported		= dma_direct_supported,
-};
-#endif /* CONFIG_DMA_DIRECT_OPS */
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 8e157806df7a..08d3d59dca17 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -356,29 +356,22 @@ static int bpf_fill_maxinsns11(struct bpf_test *self)
 	return __bpf_fill_ja(self, BPF_MAXINSNS, 68);
 }
 
-static int bpf_fill_ja(struct bpf_test *self)
-{
-	/* Hits exactly 11 passes on x86_64 JIT. */
-	return __bpf_fill_ja(self, 12, 9);
-}
-
-static int bpf_fill_ld_abs_get_processor_id(struct bpf_test *self)
+static int bpf_fill_maxinsns12(struct bpf_test *self)
 {
 	unsigned int len = BPF_MAXINSNS;
 	struct sock_filter *insn;
-	int i;
+	int i = 0;
 
 	insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
 	if (!insn)
 		return -ENOMEM;
 
-	for (i = 0; i < len - 1; i += 2) {
-		insn[i] = __BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 0);
-		insn[i + 1] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
-					 SKF_AD_OFF + SKF_AD_CPU);
-	}
+	insn[0] = __BPF_JUMP(BPF_JMP | BPF_JA, len - 2, 0, 0);
 
-	insn[len - 1] = __BPF_STMT(BPF_RET | BPF_K, 0xbee);
+	for (i = 1; i < len - 1; i++)
+		insn[i] = __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0);
+
+	insn[len - 1] = __BPF_STMT(BPF_RET | BPF_K, 0xabababab);
 
 	self->u.ptr.insns = insn;
 	self->u.ptr.len = len;
@@ -386,50 +379,22 @@ static int bpf_fill_ld_abs_get_processor_id(struct bpf_test *self)
 	return 0;
 }
 
-#define PUSH_CNT 68
-/* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */
-static int bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
+static int bpf_fill_maxinsns13(struct bpf_test *self)
 {
 	unsigned int len = BPF_MAXINSNS;
-	struct bpf_insn *insn;
-	int i = 0, j, k = 0;
+	struct sock_filter *insn;
+	int i = 0;
 
 	insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
 	if (!insn)
 		return -ENOMEM;
 
-	insn[i++] = BPF_MOV64_REG(R6, R1);
-loop:
-	for (j = 0; j < PUSH_CNT; j++) {
-		insn[i++] = BPF_LD_ABS(BPF_B, 0);
-		insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2);
-		i++;
-		insn[i++] = BPF_MOV64_REG(R1, R6);
-		insn[i++] = BPF_MOV64_IMM(R2, 1);
-		insn[i++] = BPF_MOV64_IMM(R3, 2);
-		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
-					 bpf_skb_vlan_push_proto.func - __bpf_call_base);
-		insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2);
-		i++;
-	}
+	for (i = 0; i < len - 3; i++)
+		insn[i] = __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0);
 
-	for (j = 0; j < PUSH_CNT; j++) {
-		insn[i++] = BPF_LD_ABS(BPF_B, 0);
-		insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2);
-		i++;
-		insn[i++] = BPF_MOV64_REG(R1, R6);
-		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
-					 bpf_skb_vlan_pop_proto.func - __bpf_call_base);
-		insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2);
-		i++;
-	}
-	if (++k < 5)
-		goto loop;
-
-	for (; i < len - 1; i++)
-		insn[i] = BPF_ALU32_IMM(BPF_MOV, R0, 0xbef);
-
-	insn[len - 1] = BPF_EXIT_INSN();
+	insn[len - 3] = __BPF_STMT(BPF_LD | BPF_IMM, 0xabababab);
+	insn[len - 2] = __BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0);
+	insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0);
 
 	self->u.ptr.insns = insn;
 	self->u.ptr.len = len;
@@ -437,58 +402,29 @@ loop:
 	return 0;
 }
 
-static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self)
+static int bpf_fill_ja(struct bpf_test *self)
 {
-	struct bpf_insn *insn;
-
-	insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL);
-	if (!insn)
-		return -ENOMEM;
-
-	/* Due to func address being non-const, we need to
-	 * assemble this here.
-	 */
-	insn[0] = BPF_MOV64_REG(R6, R1);
-	insn[1] = BPF_LD_ABS(BPF_B, 0);
-	insn[2] = BPF_LD_ABS(BPF_H, 0);
-	insn[3] = BPF_LD_ABS(BPF_W, 0);
-	insn[4] = BPF_MOV64_REG(R7, R6);
-	insn[5] = BPF_MOV64_IMM(R6, 0);
-	insn[6] = BPF_MOV64_REG(R1, R7);
-	insn[7] = BPF_MOV64_IMM(R2, 1);
-	insn[8] = BPF_MOV64_IMM(R3, 2);
-	insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
-			       bpf_skb_vlan_push_proto.func - __bpf_call_base);
-	insn[10] = BPF_MOV64_REG(R6, R7);
-	insn[11] = BPF_LD_ABS(BPF_B, 0);
-	insn[12] = BPF_LD_ABS(BPF_H, 0);
-	insn[13] = BPF_LD_ABS(BPF_W, 0);
-	insn[14] = BPF_MOV64_IMM(R0, 42);
-	insn[15] = BPF_EXIT_INSN();
-
-	self->u.ptr.insns = insn;
-	self->u.ptr.len = 16;
-
-	return 0;
+	/* Hits exactly 11 passes on x86_64 JIT. */
+	return __bpf_fill_ja(self, 12, 9);
 }
 
-static int bpf_fill_jump_around_ld_abs(struct bpf_test *self)
+static int bpf_fill_ld_abs_get_processor_id(struct bpf_test *self)
 {
 	unsigned int len = BPF_MAXINSNS;
-	struct bpf_insn *insn;
-	int i = 0;
+	struct sock_filter *insn;
+	int i;
 
 	insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
 	if (!insn)
 		return -ENOMEM;
 
-	insn[i++] = BPF_MOV64_REG(R6, R1);
-	insn[i++] = BPF_LD_ABS(BPF_B, 0);
-	insn[i] = BPF_JMP_IMM(BPF_JEQ, R0, 10, len - i - 2);
-	i++;
-	while (i < len - 1)
-		insn[i++] = BPF_LD_ABS(BPF_B, 1);
-	insn[i] = BPF_EXIT_INSN();
+	for (i = 0; i < len - 1; i += 2) {
+		insn[i] = __BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 0);
+		insn[i + 1] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+					 SKF_AD_OFF + SKF_AD_CPU);
+	}
+
+	insn[len - 1] = __BPF_STMT(BPF_RET | BPF_K, 0xbee);
 
 	self->u.ptr.insns = insn;
 	self->u.ptr.len = len;
@@ -1988,40 +1924,6 @@ static struct bpf_test tests[] = {
 		{ { 0, -1 } }
 	},
 	{
-		"INT: DIV + ABS",
-		.u.insns_int = {
-			BPF_ALU64_REG(BPF_MOV, R6, R1),
-			BPF_LD_ABS(BPF_B, 3),
-			BPF_ALU64_IMM(BPF_MOV, R2, 2),
-			BPF_ALU32_REG(BPF_DIV, R0, R2),
-			BPF_ALU64_REG(BPF_MOV, R8, R0),
-			BPF_LD_ABS(BPF_B, 4),
-			BPF_ALU64_REG(BPF_ADD, R8, R0),
-			BPF_LD_IND(BPF_B, R8, -70),
-			BPF_EXIT_INSN(),
-		},
-		INTERNAL,
-		{ 10, 20, 30, 40, 50 },
-		{ { 4, 0 }, { 5, 10 } }
-	},
-	{
-		/* This one doesn't go through verifier, but is just raw insn
-		 * as opposed to cBPF tests from here. Thus div by 0 tests are
-		 * done in test_verifier in BPF kselftests.
-		 */
-		"INT: DIV by -1",
-		.u.insns_int = {
-			BPF_ALU64_REG(BPF_MOV, R6, R1),
-			BPF_ALU64_IMM(BPF_MOV, R7, -1),
-			BPF_LD_ABS(BPF_B, 3),
-			BPF_ALU32_REG(BPF_DIV, R0, R7),
-			BPF_EXIT_INSN(),
-		},
-		INTERNAL,
-		{ 10, 20, 30, 40, 50 },
-		{ { 3, 0 }, { 4, 0 } }
-	},
-	{
 		"check: missing ret",
 		.u.insns = {
 			BPF_STMT(BPF_LD | BPF_IMM, 1),
@@ -2383,50 +2285,6 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } }
 	},
-	{
-		"nmap reduced",
-		.u.insns_int = {
-			BPF_MOV64_REG(R6, R1),
-			BPF_LD_ABS(BPF_H, 12),
-			BPF_JMP_IMM(BPF_JNE, R0, 0x806, 28),
-			BPF_LD_ABS(BPF_H, 12),
-			BPF_JMP_IMM(BPF_JNE, R0, 0x806, 26),
-			BPF_MOV32_IMM(R0, 18),
-			BPF_STX_MEM(BPF_W, R10, R0, -64),
-			BPF_LDX_MEM(BPF_W, R7, R10, -64),
-			BPF_LD_IND(BPF_W, R7, 14),
-			BPF_STX_MEM(BPF_W, R10, R0, -60),
-			BPF_MOV32_IMM(R0, 280971478),
-			BPF_STX_MEM(BPF_W, R10, R0, -56),
-			BPF_LDX_MEM(BPF_W, R7, R10, -56),
-			BPF_LDX_MEM(BPF_W, R0, R10, -60),
-			BPF_ALU32_REG(BPF_SUB, R0, R7),
-			BPF_JMP_IMM(BPF_JNE, R0, 0, 15),
-			BPF_LD_ABS(BPF_H, 12),
-			BPF_JMP_IMM(BPF_JNE, R0, 0x806, 13),
-			BPF_MOV32_IMM(R0, 22),
-			BPF_STX_MEM(BPF_W, R10, R0, -56),
-			BPF_LDX_MEM(BPF_W, R7, R10, -56),
-			BPF_LD_IND(BPF_H, R7, 14),
-			BPF_STX_MEM(BPF_W, R10, R0, -52),
-			BPF_MOV32_IMM(R0, 17366),
-			BPF_STX_MEM(BPF_W, R10, R0, -48),
-			BPF_LDX_MEM(BPF_W, R7, R10, -48),
-			BPF_LDX_MEM(BPF_W, R0, R10, -52),
-			BPF_ALU32_REG(BPF_SUB, R0, R7),
-			BPF_JMP_IMM(BPF_JNE, R0, 0, 2),
-			BPF_MOV32_IMM(R0, 256),
-			BPF_EXIT_INSN(),
-			BPF_MOV32_IMM(R0, 0),
-			BPF_EXIT_INSN(),
-		},
-		INTERNAL,
-		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0, 0,
-		  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		  0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6},
-		{ { 38, 256 } },
-		.stack_depth = 64,
-	},
 	/* BPF_ALU | BPF_MOV | BPF_X */
 	{
 		"ALU_MOV_X: dst = 2",
@@ -5424,21 +5282,31 @@ static struct bpf_test tests[] = {
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Ctx heavy transformations",
 		{ },
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
+		CLASSIC | FLAG_EXPECTED_FAIL,
+#else
 		CLASSIC,
+#endif
 		{ },
 		{
 			{  1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) },
 			{ 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }
 		},
 		.fill_helper = bpf_fill_maxinsns6,
+		.expected_errcode = -ENOTSUPP,
 	},
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Call heavy transformations",
 		{ },
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+#else
 		CLASSIC | FLAG_NO_DATA,
+#endif
 		{ },
 		{ { 1, 0 }, { 10, 0 } },
 		.fill_helper = bpf_fill_maxinsns7,
+		.expected_errcode = -ENOTSUPP,
 	},
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Jump heavy test",
@@ -5478,28 +5346,39 @@ static struct bpf_test tests[] = {
 		.expected_errcode = -ENOTSUPP,
 	},
 	{
-		"BPF_MAXINSNS: ld_abs+get_processor_id",
-		{ },
-		CLASSIC,
+		"BPF_MAXINSNS: jump over MSH",
 		{ },
-		{ { 1, 0xbee } },
-		.fill_helper = bpf_fill_ld_abs_get_processor_id,
+		CLASSIC | FLAG_EXPECTED_FAIL,
+		{ 0xfa, 0xfb, 0xfc, 0xfd, },
+		{ { 4, 0xabababab } },
+		.fill_helper = bpf_fill_maxinsns12,
+		.expected_errcode = -EINVAL,
 	},
 	{
-		"BPF_MAXINSNS: ld_abs+vlan_push/pop",
+		"BPF_MAXINSNS: exec all MSH",
 		{ },
-		INTERNAL,
-		{ 0x34 },
-		{ { ETH_HLEN, 0xbef } },
-		.fill_helper = bpf_fill_ld_abs_vlan_push_pop,
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
+		CLASSIC | FLAG_EXPECTED_FAIL,
+#else
+		CLASSIC,
+#endif
+		{ 0xfa, 0xfb, 0xfc, 0xfd, },
+		{ { 4, 0xababab83 } },
+		.fill_helper = bpf_fill_maxinsns13,
+		.expected_errcode = -ENOTSUPP,
 	},
 	{
-		"BPF_MAXINSNS: jump around ld_abs",
+		"BPF_MAXINSNS: ld_abs+get_processor_id",
 		{ },
-		INTERNAL,
-		{ 10, 11 },
-		{ { 2, 10 } },
-		.fill_helper = bpf_fill_jump_around_ld_abs,
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
+		CLASSIC | FLAG_EXPECTED_FAIL,
+#else
+		CLASSIC,
+#endif
+		{ },
+		{ { 1, 0xbee } },
+		.fill_helper = bpf_fill_ld_abs_get_processor_id,
+		.expected_errcode = -ENOTSUPP,
 	},
 	/*
 	 * LD_IND / LD_ABS on fragmented SKBs
@@ -5683,6 +5562,53 @@ static struct bpf_test tests[] = {
 		{ {0x40, 0x05 } },
 	},
 	{
+		"LD_IND byte positive offset, all ff",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, 0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+		{ {0x40, 0xff } },
+	},
+	{
+		"LD_IND byte positive offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, 0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LD_IND byte negative offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, -0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 } },
+	},
+	{
+		"LD_IND byte negative offset, multiple calls",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3b),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 1),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 2),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 3),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 4),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x82 }, },
+	},
+	{
 		"LD_IND halfword positive offset",
 		.u.insns = {
 			BPF_STMT(BPF_LDX | BPF_IMM, 0x20),
@@ -5731,6 +5657,39 @@ static struct bpf_test tests[] = {
 		{ {0x40, 0x66cc } },
 	},
 	{
+		"LD_IND halfword positive offset, all ff",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3d),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_H, 0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+		{ {0x40, 0xffff } },
+	},
+	{
+		"LD_IND halfword positive offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_H, 0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LD_IND halfword negative offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_H, -0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 } },
+	},
+	{
 		"LD_IND word positive offset",
 		.u.insns = {
 			BPF_STMT(BPF_LDX | BPF_IMM, 0x20),
@@ -5821,6 +5780,39 @@ static struct bpf_test tests[] = {
 		{ {0x40, 0x66cc77dd } },
 	},
 	{
+		"LD_IND word positive offset, all ff",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3b),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_W, 0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+		{ {0x40, 0xffffffff } },
+	},
+	{
+		"LD_IND word positive offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_W, 0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LD_IND word negative offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_W, -0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 } },
+	},
+	{
 		"LD_ABS byte",
 		.u.insns = {
 			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x20),
@@ -5838,6 +5830,68 @@ static struct bpf_test tests[] = {
 		{ {0x40, 0xcc } },
 	},
 	{
+		"LD_ABS byte positive offset, all ff",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+		{ {0x40, 0xff } },
+	},
+	{
+		"LD_ABS byte positive offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LD_ABS byte negative offset, out of bounds load",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, -1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_EXPECTED_FAIL,
+		.expected_errcode = -EINVAL,
+	},
+	{
+		"LD_ABS byte negative offset, in bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x82 }, },
+	},
+	{
+		"LD_ABS byte negative offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LD_ABS byte negative offset, multiple calls",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3c),
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3d),
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3e),
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x82 }, },
+	},
+	{
 		"LD_ABS halfword",
 		.u.insns = {
 			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x22),
@@ -5872,6 +5926,55 @@ static struct bpf_test tests[] = {
 		{ {0x40, 0x99ff } },
 	},
 	{
+		"LD_ABS halfword positive offset, all ff",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x3e),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+		{ {0x40, 0xffff } },
+	},
+	{
+		"LD_ABS halfword positive offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LD_ABS halfword negative offset, out of bounds load",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, -1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_EXPECTED_FAIL,
+		.expected_errcode = -EINVAL,
+	},
+	{
+		"LD_ABS halfword negative offset, in bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, SKF_LL_OFF + 0x3e),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x1982 }, },
+	},
+	{
+		"LD_ABS halfword negative offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, SKF_LL_OFF + 0x3e),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
 		"LD_ABS word",
 		.u.insns = {
 			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x1c),
@@ -5939,6 +6042,140 @@ static struct bpf_test tests[] = {
 		},
 		{ {0x40, 0x88ee99ff } },
 	},
+	{
+		"LD_ABS word positive offset, all ff",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x3c),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+		{ {0x40, 0xffffffff } },
+	},
+	{
+		"LD_ABS word positive offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LD_ABS word negative offset, out of bounds load",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, -1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_EXPECTED_FAIL,
+		.expected_errcode = -EINVAL,
+	},
+	{
+		"LD_ABS word negative offset, in bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, SKF_LL_OFF + 0x3c),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x25051982 }, },
+	},
+	{
+		"LD_ABS word negative offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, SKF_LL_OFF + 0x3c),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LDX_MSH standalone, preserved A",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0xffeebbaa }, },
+	},
+	{
+		"LDX_MSH standalone, preserved A 2",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0x175e9d63),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3d),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3e),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x175e9d63 }, },
+	},
+	{
+		"LDX_MSH standalone, test result 1",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x14 }, },
+	},
+	{
+		"LDX_MSH standalone, test result 2",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3e),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x24 }, },
+	},
+	{
+		"LDX_MSH standalone, negative offset",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, -1),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0 }, },
+	},
+	{
+		"LDX_MSH standalone, negative offset 2",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, SKF_LL_OFF + 0x3e),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x24 }, },
+	},
+	{
+		"LDX_MSH standalone, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x40),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0 }, },
+	},
 	/*
 	 * verify that the interpreter or JIT correctly sets A and X
 	 * to 0.
@@ -6127,14 +6364,6 @@ static struct bpf_test tests[] = {
 		{},
 		{ {0x1, 0x42 } },
 	},
-	{
-		"LD_ABS with helper changing skb data",
-		{ },
-		INTERNAL,
-		{ 0x34 },
-		{ { ETH_HLEN, 42 } },
-		.fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
-	},
 	/* Checking interpreter vs JIT wrt signed extended imms. */
 	{
 		"JNE signed compare, test 1",
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index cee000ac54d8..b984806d7d7b 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -618,8 +618,9 @@ static ssize_t trigger_batched_requests_store(struct device *dev,
 
 	mutex_lock(&test_fw_mutex);
 
-	test_fw_config->reqs = vzalloc(sizeof(struct test_batched_req) *
-				       test_fw_config->num_requests * 2);
+	test_fw_config->reqs =
+		vzalloc(array3_size(sizeof(struct test_batched_req),
+				    test_fw_config->num_requests, 2));
 	if (!test_fw_config->reqs) {
 		rc = -ENOMEM;
 		goto out_unlock;
@@ -720,8 +721,9 @@ ssize_t trigger_batched_requests_async_store(struct device *dev,
 
 	mutex_lock(&test_fw_mutex);
 
-	test_fw_config->reqs = vzalloc(sizeof(struct test_batched_req) *
-				       test_fw_config->num_requests * 2);
+	test_fw_config->reqs =
+		vzalloc(array3_size(sizeof(struct test_batched_req),
+				    test_fw_config->num_requests, 2));
 	if (!test_fw_config->reqs) {
 		rc = -ENOMEM;
 		goto out;
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index 0e5b7a61460b..e3ddd836491f 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -779,8 +779,9 @@ static int kmod_config_sync_info(struct kmod_test_device *test_dev)
 	struct test_config *config = &test_dev->config;
 
 	free_test_dev_info(test_dev);
-	test_dev->info = vzalloc(config->num_threads *
-				 sizeof(struct kmod_test_device_info));
+	test_dev->info =
+		vzalloc(array_size(sizeof(struct kmod_test_device_info),
+				   config->num_threads));
 	if (!test_dev->info)
 		return -ENOMEM;
 
diff --git a/lib/test_overflow.c b/lib/test_overflow.c
new file mode 100644
index 000000000000..2278fe05a1b0
--- /dev/null
+++ b/lib/test_overflow.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Test cases for arithmetic overflow checks.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/overflow.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+
+#define DEFINE_TEST_ARRAY(t)			\
+	static const struct test_ ## t {	\
+		t a, b;				\
+		t sum, diff, prod;		\
+		bool s_of, d_of, p_of;		\
+	} t ## _tests[] __initconst
+
+DEFINE_TEST_ARRAY(u8) = {
+	{0, 0, 0, 0, 0, false, false, false},
+	{1, 1, 2, 0, 1, false, false, false},
+	{0, 1, 1, U8_MAX, 0, false, true, false},
+	{1, 0, 1, 1, 0, false, false, false},
+	{0, U8_MAX, U8_MAX, 1, 0, false, true, false},
+	{U8_MAX, 0, U8_MAX, U8_MAX, 0, false, false, false},
+	{1, U8_MAX, 0, 2, U8_MAX, true, true, false},
+	{U8_MAX, 1, 0, U8_MAX-1, U8_MAX, true, false, false},
+	{U8_MAX, U8_MAX, U8_MAX-1, 0, 1, true, false, true},
+
+	{U8_MAX, U8_MAX-1, U8_MAX-2, 1, 2, true, false, true},
+	{U8_MAX-1, U8_MAX, U8_MAX-2, U8_MAX, 2, true, true, true},
+
+	{1U << 3, 1U << 3, 1U << 4, 0, 1U << 6, false, false, false},
+	{1U << 4, 1U << 4, 1U << 5, 0, 0, false, false, true},
+	{1U << 4, 1U << 3, 3*(1U << 3), 1U << 3, 1U << 7, false, false, false},
+	{1U << 7, 1U << 7, 0, 0, 0, true, false, true},
+
+	{48, 32, 80, 16, 0, false, false, true},
+	{128, 128, 0, 0, 0, true, false, true},
+	{123, 234, 101, 145, 110, true, true, true},
+};
+DEFINE_TEST_ARRAY(u16) = {
+	{0, 0, 0, 0, 0, false, false, false},
+	{1, 1, 2, 0, 1, false, false, false},
+	{0, 1, 1, U16_MAX, 0, false, true, false},
+	{1, 0, 1, 1, 0, false, false, false},
+	{0, U16_MAX, U16_MAX, 1, 0, false, true, false},
+	{U16_MAX, 0, U16_MAX, U16_MAX, 0, false, false, false},
+	{1, U16_MAX, 0, 2, U16_MAX, true, true, false},
+	{U16_MAX, 1, 0, U16_MAX-1, U16_MAX, true, false, false},
+	{U16_MAX, U16_MAX, U16_MAX-1, 0, 1, true, false, true},
+
+	{U16_MAX, U16_MAX-1, U16_MAX-2, 1, 2, true, false, true},
+	{U16_MAX-1, U16_MAX, U16_MAX-2, U16_MAX, 2, true, true, true},
+
+	{1U << 7, 1U << 7, 1U << 8, 0, 1U << 14, false, false, false},
+	{1U << 8, 1U << 8, 1U << 9, 0, 0, false, false, true},
+	{1U << 8, 1U << 7, 3*(1U << 7), 1U << 7, 1U << 15, false, false, false},
+	{1U << 15, 1U << 15, 0, 0, 0, true, false, true},
+
+	{123, 234, 357, 65425, 28782, false, true, false},
+	{1234, 2345, 3579, 64425, 10146, false, true, true},
+};
+DEFINE_TEST_ARRAY(u32) = {
+	{0, 0, 0, 0, 0, false, false, false},
+	{1, 1, 2, 0, 1, false, false, false},
+	{0, 1, 1, U32_MAX, 0, false, true, false},
+	{1, 0, 1, 1, 0, false, false, false},
+	{0, U32_MAX, U32_MAX, 1, 0, false, true, false},
+	{U32_MAX, 0, U32_MAX, U32_MAX, 0, false, false, false},
+	{1, U32_MAX, 0, 2, U32_MAX, true, true, false},
+	{U32_MAX, 1, 0, U32_MAX-1, U32_MAX, true, false, false},
+	{U32_MAX, U32_MAX, U32_MAX-1, 0, 1, true, false, true},
+
+	{U32_MAX, U32_MAX-1, U32_MAX-2, 1, 2, true, false, true},
+	{U32_MAX-1, U32_MAX, U32_MAX-2, U32_MAX, 2, true, true, true},
+
+	{1U << 15, 1U << 15, 1U << 16, 0, 1U << 30, false, false, false},
+	{1U << 16, 1U << 16, 1U << 17, 0, 0, false, false, true},
+	{1U << 16, 1U << 15, 3*(1U << 15), 1U << 15, 1U << 31, false, false, false},
+	{1U << 31, 1U << 31, 0, 0, 0, true, false, true},
+
+	{-2U, 1U, -1U, -3U, -2U, false, false, false},
+	{-4U, 5U, 1U, -9U, -20U, true, false, true},
+};
+
+DEFINE_TEST_ARRAY(u64) = {
+	{0, 0, 0, 0, 0, false, false, false},
+	{1, 1, 2, 0, 1, false, false, false},
+	{0, 1, 1, U64_MAX, 0, false, true, false},
+	{1, 0, 1, 1, 0, false, false, false},
+	{0, U64_MAX, U64_MAX, 1, 0, false, true, false},
+	{U64_MAX, 0, U64_MAX, U64_MAX, 0, false, false, false},
+	{1, U64_MAX, 0, 2, U64_MAX, true, true, false},
+	{U64_MAX, 1, 0, U64_MAX-1, U64_MAX, true, false, false},
+	{U64_MAX, U64_MAX, U64_MAX-1, 0, 1, true, false, true},
+
+	{U64_MAX, U64_MAX-1, U64_MAX-2, 1, 2, true, false, true},
+	{U64_MAX-1, U64_MAX, U64_MAX-2, U64_MAX, 2, true, true, true},
+
+	{1ULL << 31, 1ULL << 31, 1ULL << 32, 0, 1ULL << 62, false, false, false},
+	{1ULL << 32, 1ULL << 32, 1ULL << 33, 0, 0, false, false, true},
+	{1ULL << 32, 1ULL << 31, 3*(1ULL << 31), 1ULL << 31, 1ULL << 63, false, false, false},
+	{1ULL << 63, 1ULL << 63, 0, 0, 0, true, false, true},
+	{1000000000ULL /* 10^9 */, 10000000000ULL /* 10^10 */,
+	 11000000000ULL, 18446744064709551616ULL, 10000000000000000000ULL,
+	 false, true, false},
+	{-15ULL, 10ULL, -5ULL, -25ULL, -150ULL, false, false, true},
+};
+
+DEFINE_TEST_ARRAY(s8) = {
+	{0, 0, 0, 0, 0, false, false, false},
+
+	{0, S8_MAX, S8_MAX, -S8_MAX, 0, false, false, false},
+	{S8_MAX, 0, S8_MAX, S8_MAX, 0, false, false, false},
+	{0, S8_MIN, S8_MIN, S8_MIN, 0, false, true, false},
+	{S8_MIN, 0, S8_MIN, S8_MIN, 0, false, false, false},
+
+	{-1, S8_MIN, S8_MAX, S8_MAX, S8_MIN, true, false, true},
+	{S8_MIN, -1, S8_MAX, -S8_MAX, S8_MIN, true, false, true},
+	{-1, S8_MAX, S8_MAX-1, S8_MIN, -S8_MAX, false, false, false},
+	{S8_MAX, -1, S8_MAX-1, S8_MIN, -S8_MAX, false, true, false},
+	{-1, -S8_MAX, S8_MIN, S8_MAX-1, S8_MAX, false, false, false},
+	{-S8_MAX, -1, S8_MIN, S8_MIN+2, S8_MAX, false, false, false},
+
+	{1, S8_MIN, -S8_MAX, -S8_MAX, S8_MIN, false, true, false},
+	{S8_MIN, 1, -S8_MAX, S8_MAX, S8_MIN, false, true, false},
+	{1, S8_MAX, S8_MIN, S8_MIN+2, S8_MAX, true, false, false},
+	{S8_MAX, 1, S8_MIN, S8_MAX-1, S8_MAX, true, false, false},
+
+	{S8_MIN, S8_MIN, 0, 0, 0, true, false, true},
+	{S8_MAX, S8_MAX, -2, 0, 1, true, false, true},
+
+	{-4, -32, -36, 28, -128, false, false, true},
+	{-4, 32, 28, -36, -128, false, false, false},
+};
+
+DEFINE_TEST_ARRAY(s16) = {
+	{0, 0, 0, 0, 0, false, false, false},
+
+	{0, S16_MAX, S16_MAX, -S16_MAX, 0, false, false, false},
+	{S16_MAX, 0, S16_MAX, S16_MAX, 0, false, false, false},
+	{0, S16_MIN, S16_MIN, S16_MIN, 0, false, true, false},
+	{S16_MIN, 0, S16_MIN, S16_MIN, 0, false, false, false},
+
+	{-1, S16_MIN, S16_MAX, S16_MAX, S16_MIN, true, false, true},
+	{S16_MIN, -1, S16_MAX, -S16_MAX, S16_MIN, true, false, true},
+	{-1, S16_MAX, S16_MAX-1, S16_MIN, -S16_MAX, false, false, false},
+	{S16_MAX, -1, S16_MAX-1, S16_MIN, -S16_MAX, false, true, false},
+	{-1, -S16_MAX, S16_MIN, S16_MAX-1, S16_MAX, false, false, false},
+	{-S16_MAX, -1, S16_MIN, S16_MIN+2, S16_MAX, false, false, false},
+
+	{1, S16_MIN, -S16_MAX, -S16_MAX, S16_MIN, false, true, false},
+	{S16_MIN, 1, -S16_MAX, S16_MAX, S16_MIN, false, true, false},
+	{1, S16_MAX, S16_MIN, S16_MIN+2, S16_MAX, true, false, false},
+	{S16_MAX, 1, S16_MIN, S16_MAX-1, S16_MAX, true, false, false},
+
+	{S16_MIN, S16_MIN, 0, 0, 0, true, false, true},
+	{S16_MAX, S16_MAX, -2, 0, 1, true, false, true},
+};
+DEFINE_TEST_ARRAY(s32) = {
+	{0, 0, 0, 0, 0, false, false, false},
+
+	{0, S32_MAX, S32_MAX, -S32_MAX, 0, false, false, false},
+	{S32_MAX, 0, S32_MAX, S32_MAX, 0, false, false, false},
+	{0, S32_MIN, S32_MIN, S32_MIN, 0, false, true, false},
+	{S32_MIN, 0, S32_MIN, S32_MIN, 0, false, false, false},
+
+	{-1, S32_MIN, S32_MAX, S32_MAX, S32_MIN, true, false, true},
+	{S32_MIN, -1, S32_MAX, -S32_MAX, S32_MIN, true, false, true},
+	{-1, S32_MAX, S32_MAX-1, S32_MIN, -S32_MAX, false, false, false},
+	{S32_MAX, -1, S32_MAX-1, S32_MIN, -S32_MAX, false, true, false},
+	{-1, -S32_MAX, S32_MIN, S32_MAX-1, S32_MAX, false, false, false},
+	{-S32_MAX, -1, S32_MIN, S32_MIN+2, S32_MAX, false, false, false},
+
+	{1, S32_MIN, -S32_MAX, -S32_MAX, S32_MIN, false, true, false},
+	{S32_MIN, 1, -S32_MAX, S32_MAX, S32_MIN, false, true, false},
+	{1, S32_MAX, S32_MIN, S32_MIN+2, S32_MAX, true, false, false},
+	{S32_MAX, 1, S32_MIN, S32_MAX-1, S32_MAX, true, false, false},
+
+	{S32_MIN, S32_MIN, 0, 0, 0, true, false, true},
+	{S32_MAX, S32_MAX, -2, 0, 1, true, false, true},
+};
+DEFINE_TEST_ARRAY(s64) = {
+	{0, 0, 0, 0, 0, false, false, false},
+
+	{0, S64_MAX, S64_MAX, -S64_MAX, 0, false, false, false},
+	{S64_MAX, 0, S64_MAX, S64_MAX, 0, false, false, false},
+	{0, S64_MIN, S64_MIN, S64_MIN, 0, false, true, false},
+	{S64_MIN, 0, S64_MIN, S64_MIN, 0, false, false, false},
+
+	{-1, S64_MIN, S64_MAX, S64_MAX, S64_MIN, true, false, true},
+	{S64_MIN, -1, S64_MAX, -S64_MAX, S64_MIN, true, false, true},
+	{-1, S64_MAX, S64_MAX-1, S64_MIN, -S64_MAX, false, false, false},
+	{S64_MAX, -1, S64_MAX-1, S64_MIN, -S64_MAX, false, true, false},
+	{-1, -S64_MAX, S64_MIN, S64_MAX-1, S64_MAX, false, false, false},
+	{-S64_MAX, -1, S64_MIN, S64_MIN+2, S64_MAX, false, false, false},
+
+	{1, S64_MIN, -S64_MAX, -S64_MAX, S64_MIN, false, true, false},
+	{S64_MIN, 1, -S64_MAX, S64_MAX, S64_MIN, false, true, false},
+	{1, S64_MAX, S64_MIN, S64_MIN+2, S64_MAX, true, false, false},
+	{S64_MAX, 1, S64_MIN, S64_MAX-1, S64_MAX, true, false, false},
+
+	{S64_MIN, S64_MIN, 0, 0, 0, true, false, true},
+	{S64_MAX, S64_MAX, -2, 0, 1, true, false, true},
+
+	{-1, -1, -2, 0, 1, false, false, false},
+	{-1, -128, -129, 127, 128, false, false, false},
+	{-128, -1, -129, -127, 128, false, false, false},
+	{0, -S64_MAX, -S64_MAX, S64_MAX, 0, false, false, false},
+};
+
+#define check_one_op(t, fmt, op, sym, a, b, r, of) do {		\
+	t _r;							\
+	bool _of;						\
+								\
+	_of = check_ ## op ## _overflow(a, b, &_r);		\
+	if (_of != of) {					\
+		pr_warn("expected "fmt" "sym" "fmt		\
+			" to%s overflow (type %s)\n",		\
+			a, b, of ? "" : " not", #t);		\
+		err = 1;					\
+	}							\
+	if (_r != r) {						\
+		pr_warn("expected "fmt" "sym" "fmt" == "	\
+			fmt", got "fmt" (type %s)\n",		\
+			a, b, r, _r, #t);			\
+		err = 1;					\
+	}							\
+} while (0)
+
+#define DEFINE_TEST_FUNC(t, fmt)					\
+static int __init do_test_ ## t(const struct test_ ## t *p)		\
+{							   		\
+	int err = 0;							\
+									\
+	check_one_op(t, fmt, add, "+", p->a, p->b, p->sum, p->s_of);	\
+	check_one_op(t, fmt, add, "+", p->b, p->a, p->sum, p->s_of);	\
+	check_one_op(t, fmt, sub, "-", p->a, p->b, p->diff, p->d_of);	\
+	check_one_op(t, fmt, mul, "*", p->a, p->b, p->prod, p->p_of);	\
+	check_one_op(t, fmt, mul, "*", p->b, p->a, p->prod, p->p_of);	\
+									\
+	return err;							\
+}									\
+									\
+static int __init test_ ## t ## _overflow(void) {			\
+	int err = 0;							\
+	unsigned i;							\
+									\
+	pr_info("%-3s: %zu tests\n", #t, ARRAY_SIZE(t ## _tests));	\
+	for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i)			\
+		err |= do_test_ ## t(&t ## _tests[i]);			\
+	return err;							\
+}
+
+DEFINE_TEST_FUNC(u8, "%d");
+DEFINE_TEST_FUNC(s8, "%d");
+DEFINE_TEST_FUNC(u16, "%d");
+DEFINE_TEST_FUNC(s16, "%d");
+DEFINE_TEST_FUNC(u32, "%u");
+DEFINE_TEST_FUNC(s32, "%d");
+#if BITS_PER_LONG == 64
+DEFINE_TEST_FUNC(u64, "%llu");
+DEFINE_TEST_FUNC(s64, "%lld");
+#endif
+
+static int __init test_overflow_calculation(void)
+{
+	int err = 0;
+
+	err |= test_u8_overflow();
+	err |= test_s8_overflow();
+	err |= test_u16_overflow();
+	err |= test_s16_overflow();
+	err |= test_u32_overflow();
+	err |= test_s32_overflow();
+#if BITS_PER_LONG == 64
+	err |= test_u64_overflow();
+	err |= test_s64_overflow();
+#endif
+
+	return err;
+}
+
+/*
+ * Deal with the various forms of allocator arguments. See comments above
+ * the DEFINE_TEST_ALLOC() instances for mapping of the "bits".
+ */
+#define alloc010(alloc, arg, sz) alloc(sz, GFP_KERNEL)
+#define alloc011(alloc, arg, sz) alloc(sz, GFP_KERNEL, NUMA_NO_NODE)
+#define alloc000(alloc, arg, sz) alloc(sz)
+#define alloc001(alloc, arg, sz) alloc(sz, NUMA_NO_NODE)
+#define alloc110(alloc, arg, sz) alloc(arg, sz, GFP_KERNEL)
+#define free0(free, arg, ptr)	 free(ptr)
+#define free1(free, arg, ptr)	 free(arg, ptr)
+
+/* Wrap around to 8K */
+#define TEST_SIZE		(9 << PAGE_SHIFT)
+
+#define DEFINE_TEST_ALLOC(func, free_func, want_arg, want_gfp, want_node)\
+static int __init test_ ## func (void *arg)				\
+{									\
+	volatile size_t a = TEST_SIZE;					\
+	volatile size_t b = (SIZE_MAX / TEST_SIZE) + 1;			\
+	void *ptr;							\
+									\
+	/* Tiny allocation test. */					\
+	ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, 1);\
+	if (!ptr) {							\
+		pr_warn(#func " failed regular allocation?!\n");	\
+		return 1;						\
+	}								\
+	free ## want_arg (free_func, arg, ptr);				\
+									\
+	/* Wrapped allocation test. */					\
+	ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg,	\
+							  a * b);	\
+	if (!ptr) {							\
+		pr_warn(#func " unexpectedly failed bad wrapping?!\n");	\
+		return 1;						\
+	}								\
+	free ## want_arg (free_func, arg, ptr);				\
+									\
+	/* Saturated allocation test. */				\
+	ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg,	\
+						   array_size(a, b));	\
+	if (ptr) {							\
+		pr_warn(#func " missed saturation!\n");			\
+		free ## want_arg (free_func, arg, ptr);			\
+		return 1;						\
+	}								\
+	pr_info(#func " detected saturation\n");			\
+	return 0;							\
+}
+
+/*
+ * Allocator uses a trailing node argument --------+  (e.g. kmalloc_node())
+ * Allocator uses the gfp_t argument -----------+  |  (e.g. kmalloc())
+ * Allocator uses a special leading argument +  |  |  (e.g. devm_kmalloc())
+ *                                           |  |  |
+ */
+DEFINE_TEST_ALLOC(kmalloc,	 kfree,	     0, 1, 0);
+DEFINE_TEST_ALLOC(kmalloc_node,	 kfree,	     0, 1, 1);
+DEFINE_TEST_ALLOC(kzalloc,	 kfree,	     0, 1, 0);
+DEFINE_TEST_ALLOC(kzalloc_node,  kfree,	     0, 1, 1);
+DEFINE_TEST_ALLOC(vmalloc,	 vfree,	     0, 0, 0);
+DEFINE_TEST_ALLOC(vmalloc_node,  vfree,	     0, 0, 1);
+DEFINE_TEST_ALLOC(vzalloc,	 vfree,	     0, 0, 0);
+DEFINE_TEST_ALLOC(vzalloc_node,  vfree,	     0, 0, 1);
+DEFINE_TEST_ALLOC(kvmalloc,	 kvfree,     0, 1, 0);
+DEFINE_TEST_ALLOC(kvmalloc_node, kvfree,     0, 1, 1);
+DEFINE_TEST_ALLOC(kvzalloc,	 kvfree,     0, 1, 0);
+DEFINE_TEST_ALLOC(kvzalloc_node, kvfree,     0, 1, 1);
+DEFINE_TEST_ALLOC(devm_kmalloc,  devm_kfree, 1, 1, 0);
+DEFINE_TEST_ALLOC(devm_kzalloc,  devm_kfree, 1, 1, 0);
+
+static int __init test_overflow_allocation(void)
+{
+	const char device_name[] = "overflow-test";
+	struct device *dev;
+	int err = 0;
+
+	/* Create dummy device for devm_kmalloc()-family tests. */
+	dev = root_device_register(device_name);
+	if (IS_ERR(dev)) {
+		pr_warn("Cannot register test device\n");
+		return 1;
+	}
+
+	err |= test_kmalloc(NULL);
+	err |= test_kmalloc_node(NULL);
+	err |= test_kzalloc(NULL);
+	err |= test_kzalloc_node(NULL);
+	err |= test_kvmalloc(NULL);
+	err |= test_kvmalloc_node(NULL);
+	err |= test_kvzalloc(NULL);
+	err |= test_kvzalloc_node(NULL);
+	err |= test_vmalloc(NULL);
+	err |= test_vmalloc_node(NULL);
+	err |= test_vzalloc(NULL);
+	err |= test_vzalloc_node(NULL);
+	err |= test_devm_kmalloc(dev);
+	err |= test_devm_kzalloc(dev);
+
+	device_unregister(dev);
+
+	return err;
+}
+
+static int __init test_module_init(void)
+{
+	int err = 0;
+
+	err |= test_overflow_calculation();
+	err |= test_overflow_allocation();
+
+	if (err) {
+		pr_warn("FAIL!\n");
+		err = -EINVAL;
+	} else {
+		pr_info("all tests passed\n");
+	}
+
+	return err;
+}
+
+static void __exit test_module_exit(void)
+{ }
+
+module_init(test_module_init);
+module_exit(test_module_exit);
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/lib/test_printf.c b/lib/test_printf.c
index 71ebfa43ad05..cea592f402ed 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -204,7 +204,7 @@ test_string(void)
 #if BITS_PER_LONG == 64
 
 #define PTR_WIDTH 16
-#define PTR ((void *)0xffff0123456789ab)
+#define PTR ((void *)0xffff0123456789abUL)
 #define PTR_STR "ffff0123456789ab"
 #define ZEROS "00000000"	/* hex 32 zero bits */
 
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index f4000c137dbe..fb6968109113 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -285,12 +285,14 @@ static int __init test_rhltable(unsigned int entries)
 	if (entries == 0)
 		entries = 1;
 
-	rhl_test_objects = vzalloc(sizeof(*rhl_test_objects) * entries);
+	rhl_test_objects = vzalloc(array_size(entries,
+					      sizeof(*rhl_test_objects)));
 	if (!rhl_test_objects)
 		return -ENOMEM;
 
 	ret = -ENOMEM;
-	obj_in_table = vzalloc(BITS_TO_LONGS(entries) * sizeof(unsigned long));
+	obj_in_table = vzalloc(array_size(sizeof(unsigned long),
+					  BITS_TO_LONGS(entries)));
 	if (!obj_in_table)
 		goto out_free;
 
@@ -706,7 +708,8 @@ static int __init test_rht_init(void)
 	test_rht_params.max_size = max_size ? : roundup_pow_of_two(entries);
 	test_rht_params.nelem_hint = size;
 
-	objs = vzalloc((test_rht_params.max_size + 1) * sizeof(struct test_obj));
+	objs = vzalloc(array_size(sizeof(struct test_obj),
+				  test_rht_params.max_size + 1));
 	if (!objs)
 		return -ENOMEM;
 
@@ -753,10 +756,10 @@ static int __init test_rht_init(void)
 	pr_info("Testing concurrent rhashtable access from %d threads\n",
 	        tcount);
 	sema_init(&prestart_sem, 1 - tcount);
-	tdata = vzalloc(tcount * sizeof(struct thread_data));
+	tdata = vzalloc(array_size(tcount, sizeof(struct thread_data)));
 	if (!tdata)
 		return -ENOMEM;
-	objs  = vzalloc(tcount * entries * sizeof(struct test_obj));
+	objs  = vzalloc(array3_size(sizeof(struct test_obj), tcount, entries));
 	if (!objs) {
 		vfree(tdata);
 		return -ENOMEM;
diff --git a/lib/ucmpdi2.c b/lib/ucmpdi2.c
index 25ca2d4c1e19..597998169a96 100644
--- a/lib/ucmpdi2.c
+++ b/lib/ucmpdi2.c
@@ -17,7 +17,7 @@
 #include <linux/module.h>
 #include <linux/libgcc.h>
 
-word_type __ucmpdi2(unsigned long long a, unsigned long long b)
+word_type notrace __ucmpdi2(unsigned long long a, unsigned long long b)
 {
 	const DWunion au = {.ll = a};
 	const DWunion bu = {.ll = b};
diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c
index d7e06b28de38..0a559a42359b 100644
--- a/lib/ucs2_string.c
+++ b/lib/ucs2_string.c
@@ -112,3 +112,5 @@ ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength)
 	return j;
 }
 EXPORT_SYMBOL(ucs2_as_utf8);
+
+MODULE_LICENSE("GPL v2");
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 23920c5ff728..a48aaa79d352 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -703,6 +703,22 @@ char *symbol_string(char *buf, char *end, void *ptr,
 #endif
 }
 
+static const struct printf_spec default_str_spec = {
+	.field_width = -1,
+	.precision = -1,
+};
+
+static const struct printf_spec default_flag_spec = {
+	.base = 16,
+	.precision = -1,
+	.flags = SPECIAL | SMALL,
+};
+
+static const struct printf_spec default_dec_spec = {
+	.base = 10,
+	.precision = -1,
+};
+
 static noinline_for_stack
 char *resource_string(char *buf, char *end, struct resource *res,
 		      struct printf_spec spec, const char *fmt)
@@ -732,21 +748,11 @@ char *resource_string(char *buf, char *end, struct resource *res,
 		.precision = -1,
 		.flags = SMALL | ZEROPAD,
 	};
-	static const struct printf_spec dec_spec = {
-		.base = 10,
-		.precision = -1,
-		.flags = 0,
-	};
 	static const struct printf_spec str_spec = {
 		.field_width = -1,
 		.precision = 10,
 		.flags = LEFT,
 	};
-	static const struct printf_spec flag_spec = {
-		.base = 16,
-		.precision = -1,
-		.flags = SPECIAL | SMALL,
-	};
 
 	/* 32-bit res (sizeof==4): 10 chars in dec, 10 in hex ("0x" + 8)
 	 * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */
@@ -770,10 +776,10 @@ char *resource_string(char *buf, char *end, struct resource *res,
 		specp = &mem_spec;
 	} else if (res->flags & IORESOURCE_IRQ) {
 		p = string(p, pend, "irq ", str_spec);
-		specp = &dec_spec;
+		specp = &default_dec_spec;
 	} else if (res->flags & IORESOURCE_DMA) {
 		p = string(p, pend, "dma ", str_spec);
-		specp = &dec_spec;
+		specp = &default_dec_spec;
 	} else if (res->flags & IORESOURCE_BUS) {
 		p = string(p, pend, "bus ", str_spec);
 		specp = &bus_spec;
@@ -803,7 +809,7 @@ char *resource_string(char *buf, char *end, struct resource *res,
 			p = string(p, pend, " disabled", str_spec);
 	} else {
 		p = string(p, pend, " flags ", str_spec);
-		p = number(p, pend, res->flags, flag_spec);
+		p = number(p, pend, res->flags, default_flag_spec);
 	}
 	*p++ = ']';
 	*p = '\0';
@@ -913,9 +919,6 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap,
 	int cur, rbot, rtop;
 	bool first = true;
 
-	/* reused to print numbers */
-	spec = (struct printf_spec){ .base = 10 };
-
 	rbot = cur = find_first_bit(bitmap, nr_bits);
 	while (cur < nr_bits) {
 		rtop = cur;
@@ -930,13 +933,13 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap,
 		}
 		first = false;
 
-		buf = number(buf, end, rbot, spec);
+		buf = number(buf, end, rbot, default_dec_spec);
 		if (rbot < rtop) {
 			if (buf < end)
 				*buf = '-';
 			buf++;
 
-			buf = number(buf, end, rtop, spec);
+			buf = number(buf, end, rtop, default_dec_spec);
 		}
 
 		rbot = cur;
@@ -1354,11 +1357,9 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 	return string(buf, end, uuid, spec);
 }
 
-int kptr_restrict __read_mostly;
-
 static noinline_for_stack
-char *restricted_pointer(char *buf, char *end, const void *ptr,
-			 struct printf_spec spec)
+char *pointer_string(char *buf, char *end, const void *ptr,
+		     struct printf_spec spec)
 {
 	spec.base = 16;
 	spec.flags |= SMALL;
@@ -1367,6 +1368,15 @@ char *restricted_pointer(char *buf, char *end, const void *ptr,
 		spec.flags |= ZEROPAD;
 	}
 
+	return number(buf, end, (unsigned long int)ptr, spec);
+}
+
+int kptr_restrict __read_mostly;
+
+static noinline_for_stack
+char *restricted_pointer(char *buf, char *end, const void *ptr,
+			 struct printf_spec spec)
+{
 	switch (kptr_restrict) {
 	case 0:
 		/* Always print %pK values */
@@ -1378,8 +1388,11 @@ char *restricted_pointer(char *buf, char *end, const void *ptr,
 		 * kptr_restrict==1 cannot be used in IRQ context
 		 * because its test for CAP_SYSLOG would be meaningless.
 		 */
-		if (in_irq() || in_serving_softirq() || in_nmi())
+		if (in_irq() || in_serving_softirq() || in_nmi()) {
+			if (spec.field_width == -1)
+				spec.field_width = 2 * sizeof(ptr);
 			return string(buf, end, "pK-error", spec);
+		}
 
 		/*
 		 * Only print the real pointer value if the current
@@ -1404,7 +1417,7 @@ char *restricted_pointer(char *buf, char *end, const void *ptr,
 		break;
 	}
 
-	return number(buf, end, (unsigned long)ptr, spec);
+	return pointer_string(buf, end, ptr, spec);
 }
 
 static noinline_for_stack
@@ -1456,9 +1469,6 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec,
 		return string(buf, end, NULL, spec);
 
 	switch (fmt[1]) {
-	case 'r':
-		return number(buf, end, clk_get_rate(clk), spec);
-
 	case 'n':
 	default:
 #ifdef CONFIG_COMMON_CLK
@@ -1474,23 +1484,13 @@ char *format_flags(char *buf, char *end, unsigned long flags,
 					const struct trace_print_flags *names)
 {
 	unsigned long mask;
-	const struct printf_spec strspec = {
-		.field_width = -1,
-		.precision = -1,
-	};
-	const struct printf_spec numspec = {
-		.flags = SPECIAL|SMALL,
-		.field_width = -1,
-		.precision = -1,
-		.base = 16,
-	};
 
 	for ( ; flags && names->name; names++) {
 		mask = names->mask;
 		if ((flags & mask) != mask)
 			continue;
 
-		buf = string(buf, end, names->name, strspec);
+		buf = string(buf, end, names->name, default_str_spec);
 
 		flags &= ~mask;
 		if (flags) {
@@ -1501,7 +1501,7 @@ char *format_flags(char *buf, char *end, unsigned long flags,
 	}
 
 	if (flags)
-		buf = number(buf, end, flags, numspec);
+		buf = number(buf, end, flags, default_flag_spec);
 
 	return buf;
 }
@@ -1548,22 +1548,18 @@ char *device_node_gen_full_name(const struct device_node *np, char *buf, char *e
 {
 	int depth;
 	const struct device_node *parent = np->parent;
-	static const struct printf_spec strspec = {
-		.field_width = -1,
-		.precision = -1,
-	};
 
 	/* special case for root node */
 	if (!parent)
-		return string(buf, end, "/", strspec);
+		return string(buf, end, "/", default_str_spec);
 
 	for (depth = 0; parent->parent; depth++)
 		parent = parent->parent;
 
 	for ( ; depth >= 0; depth--) {
-		buf = string(buf, end, "/", strspec);
+		buf = string(buf, end, "/", default_str_spec);
 		buf = string(buf, end, device_node_name_for_depth(np, depth),
-			     strspec);
+			     default_str_spec);
 	}
 	return buf;
 }
@@ -1655,20 +1651,6 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
 	return widen_string(buf, buf - buf_start, end, spec);
 }
 
-static noinline_for_stack
-char *pointer_string(char *buf, char *end, const void *ptr,
-		     struct printf_spec spec)
-{
-	spec.base = 16;
-	spec.flags |= SMALL;
-	if (spec.field_width == -1) {
-		spec.field_width = 2 * sizeof(ptr);
-		spec.flags |= ZEROPAD;
-	}
-
-	return number(buf, end, (unsigned long int)ptr, spec);
-}
-
 static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key);
 static siphash_key_t ptr_key __read_mostly;
 
@@ -1710,13 +1692,13 @@ early_initcall(initialize_ptr_random);
 /* Maps a pointer to a 32 bit unique identifier. */
 static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec)
 {
+	const char *str = sizeof(ptr) == 8 ? "(____ptrval____)" : "(ptrval)";
 	unsigned long hashval;
-	const int default_width = 2 * sizeof(ptr);
 
 	if (static_branch_unlikely(&not_filled_random_ptr_key)) {
-		spec.field_width = default_width;
+		spec.field_width = 2 * sizeof(ptr);
 		/* string length must be less than default_width */
-		return string(buf, end, "(ptrval)", spec);
+		return string(buf, end, str, spec);
 	}
 
 #ifdef CONFIG_64BIT
@@ -1729,15 +1711,7 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec)
 #else
 	hashval = (unsigned long)siphash_1u32((u32)ptr, &ptr_key);
 #endif
-
-	spec.flags |= SMALL;
-	if (spec.field_width == -1) {
-		spec.field_width = default_width;
-		spec.flags |= ZEROPAD;
-	}
-	spec.base = 16;
-
-	return number(buf, end, hashval, spec);
+	return pointer_string(buf, end, (const void *)hashval, spec);
 }
 
 /*
@@ -1750,10 +1724,10 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec)
  *
  * Right now we handle:
  *
- * - 'F' For symbolic function descriptor pointers with offset
- * - 'f' For simple symbolic function names without offset
- * - 'S' For symbolic direct pointers with offset
- * - 's' For symbolic direct pointers without offset
+ * - 'S' For symbolic direct pointers (or function descriptors) with offset
+ * - 's' For symbolic direct pointers (or function descriptors) without offset
+ * - 'F' Same as 'S'
+ * - 'f' Same as 's'
  * - '[FfSs]R' as above with __builtin_extract_return_addr() translation
  * - 'B' For backtraced symbolic direct pointers with offset
  * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref]
@@ -1850,10 +1824,6 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec)
  * ** When making changes please also update:
  *	Documentation/core-api/printk-formats.rst
  *
- * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
- * function pointers are really function descriptors, which contain a
- * pointer to the real address.
- *
  * Note: The default behaviour (unadorned %p) is to hash the address,
  * rendering it useful as a unique identifier.
  */
@@ -2129,6 +2099,7 @@ qualifier:
 
 	case 'x':
 		spec->flags |= SMALL;
+		/* fall through */
 
 	case 'X':
 		spec->base = 16;
@@ -3087,8 +3058,10 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 			break;
 		case 'i':
 			base = 0;
+			/* fall through */
 		case 'd':
 			is_sign = true;
+			/* fall through */
 		case 'u':
 			break;
 		case '%':