Merge commit 'v3.1-rc2' into HEAD

author: Florian Tobias Schandinat <FlorianSchandinat@gmx.de> 2011-08-17 16:14:01 +0000
committer: Florian Tobias Schandinat <FlorianSchandinat@gmx.de> 2011-08-17 16:14:01 +0000
commit: 94bd217e2d683719ab21a4ac117d8a1b91cbedc9 (patch)
tree: f2d506818cc1643ca816f609518ab5d1cdb28e66 /lib
parent: 5dd72f12df00cbabc16de770b79b17ced63138be (diff)
parent: 93ee7a9340d64f20295aacc3fb6a22b759323280 (diff)
download: linux-94bd217e2d683719ab21a4ac117d8a1b91cbedc9.tar.bz2
53 files changed, 4425 insertions, 776 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 0ee67e08ad3e..6c695ff9caba 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -19,13 +19,6 @@ config RATIONAL
 config GENERIC_FIND_FIRST_BIT
 	bool
 
-config GENERIC_FIND_NEXT_BIT
-	bool
-
-config GENERIC_FIND_LAST_BIT
-	bool
-	default y
-
 config CRC_CCITT
 	tristate "CRC-CCITT functions"
 	help
@@ -86,6 +79,13 @@ config LIBCRC32C
 	  require M here.  See Castagnoli93.
 	  Module will be libcrc32c.
 
+config CRC8
+	tristate "CRC8 function"
+	help
+	  This option provides CRC8 function. Drivers may select this
+	  when they need to do cyclic redundancy check according CRC8
+	  algorithm. Module will be called crc8.
+
 config AUDIT_GENERIC
 	bool
 	depends on AUDIT && !AUDIT_ARCH
@@ -155,6 +155,45 @@ config REED_SOLOMON_DEC16
 	boolean
 
 #
+# BCH support is selected if needed
+#
+config BCH
+	tristate
+
+config BCH_CONST_PARAMS
+	boolean
+	help
+	  Drivers may select this option to force specific constant
+	  values for parameters 'm' (Galois field order) and 't'
+	  (error correction capability). Those specific values must
+	  be set by declaring default values for symbols BCH_CONST_M
+	  and BCH_CONST_T.
+	  Doing so will enable extra compiler optimizations,
+	  improving encoding and decoding performance up to 2x for
+	  usual (m,t) values (typically such that m*t < 200).
+	  When this option is selected, the BCH library supports
+	  only a single (m,t) configuration. This is mainly useful
+	  for NAND flash board drivers requiring known, fixed BCH
+	  parameters.
+
+config BCH_CONST_M
+	int
+	range 5 15
+	help
+	  Constant value for Galois field order 'm'. If 'k' is the
+	  number of data bits to protect, 'm' should be chosen such
+	  that (k + m*t) <= 2**m - 1.
+	  Drivers should declare a default value for this symbol if
+	  they select option BCH_CONST_PARAMS.
+
+config BCH_CONST_T
+	int
+	help
+	  Constant value for error correction capability in bits 't'.
+	  Drivers should declare a default value for this symbol if
+	  they select option BCH_CONST_PARAMS.
+
+#
 # Textsearch support is select'ed if needed
 #
 config TEXTSEARCH
@@ -201,6 +240,10 @@ config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
        bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS
        depends on EXPERIMENTAL && BROKEN
 
+config CPU_RMAP
+	bool
+	depends on SMP
+
 #
 # Netlink attribute parsing support is select'ed if needed
 #
@@ -217,6 +260,23 @@ config LRU_CACHE
 	tristate
 
 config AVERAGE
+	bool "Averaging functions"
+	help
+	  This option is provided for the case where no in-kernel-tree
+	  modules require averaging functions, but a module built outside
+	  the kernel tree does. Such modules that use library averaging
+	  functions require Y here.
+
+	  If unsure, say N.
+
+config CORDIC
+	tristate "Cordic function"
+	help
+	  The option provides arithmetic function using cordic algorithm
+	  so its calculations are in fixed point. Modules can select this
+	  when they require this function. Module will be called cordic.
+
+config LLIST
 	bool
 
 endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2b97418c67e2..c0cb9c4bc46d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -7,7 +7,19 @@ config PRINTK_TIME
 	  included in printk output.  This allows you to measure
 	  the interval between kernel operations, including bootup
 	  operations.  This is useful for identifying long delays
-	  in kernel startup.
+	  in kernel startup.  Or add printk.time=1 at boot-time.
+	  See Documentation/kernel-parameters.txt
+
+config DEFAULT_MESSAGE_LOGLEVEL
+	int "Default message log level (1-7)"
+	range 1 7
+	default "4"
+	help
+	  Default log level for printk statements with no specified priority.
+
+	  This was hard-coded to KERN_WARNING since at least 2.6.10 but folks
+	  that are auditing their logs closely may want to set it to a lower
+	  priority.
 
 config ENABLE_WARN_DEPRECATED
 	bool "Enable __deprecated logic"
@@ -102,11 +114,6 @@ config HEADERS_CHECK
 
 config DEBUG_SECTION_MISMATCH
 	bool "Enable full Section mismatch analysis"
-	depends on UNDEFINED || (BLACKFIN)
-	default y
-	# This option is on purpose disabled for now.
-	# It will be enabled when we are down to a reasonable number
-	# of section mismatch warnings (< 10 for an allyesconfig build)
 	help
 	  The section mismatch analysis checks if there are illegal
 	  references from one section to another section.
@@ -176,6 +183,23 @@ config HARDLOCKUP_DETECTOR
 	def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
 		 !ARCH_HAS_NMI_WATCHDOG
 
+config BOOTPARAM_HARDLOCKUP_PANIC
+	bool "Panic (Reboot) On Hard Lockups"
+	depends on LOCKUP_DETECTOR
+	help
+	  Say Y here to enable the kernel to panic on "hard lockups",
+	  which are bugs that cause the kernel to loop in kernel
+	  mode with interrupts disabled for more than 60 seconds.
+
+	  Say N if unsure.
+
+config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
+	int
+	depends on LOCKUP_DETECTOR
+	range 0 1
+	default 0 if !BOOTPARAM_HARDLOCKUP_PANIC
+	default 1 if BOOTPARAM_HARDLOCKUP_PANIC
+
 config BOOTPARAM_SOFTLOCKUP_PANIC
 	bool "Panic (Reboot) On Soft Lockups"
 	depends on LOCKUP_DETECTOR
@@ -203,7 +227,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
 config DETECT_HUNG_TASK
 	bool "Detect Hung Tasks"
 	depends on DEBUG_KERNEL
-	default DETECT_SOFTLOCKUP
+	default LOCKUP_DETECTOR
 	help
 	  Say Y here to enable the kernel to detect "hung tasks",
 	  which are bugs that cause the task to be stuck in
@@ -215,6 +239,21 @@ config DETECT_HUNG_TASK
 	  enabled then all held locks will also be reported. This
 	  feature has negligible overhead.
 
+config DEFAULT_HUNG_TASK_TIMEOUT
+	int "Default timeout for hung task detection (in seconds)"
+	depends on DETECT_HUNG_TASK
+	default 120
+	help
+	  This option controls the default timeout (in seconds) used
+	  to determine when a task has become non-responsive and should
+	  be considered hung.
+
+	  It can be adjusted at runtime via the kernel.hung_task_timeout
+	  sysctl or by writing a value to /proc/sys/kernel/hung_task_timeout.
+
+	  A timeout of 0 disables the check.  The default is two minutes.
+	  Keeping the default should be fine in most cases.
+
 config BOOTPARAM_HUNG_TASK_PANIC
 	bool "Panic (Reboot) On Hung Tasks"
 	depends on DETECT_HUNG_TASK
@@ -314,7 +353,7 @@ config DEBUG_OBJECTS_WORK
 
 config DEBUG_OBJECTS_RCU_HEAD
 	bool "Debug RCU callbacks objects"
-	depends on DEBUG_OBJECTS && PREEMPT
+	depends on DEBUG_OBJECTS
 	help
 	  Enable this to turn on debugging of RCU list heads (call_rcu() usage).
 
@@ -375,9 +414,9 @@ config SLUB_STATS
 config DEBUG_KMEMLEAK
 	bool "Kernel memory leak detector"
 	depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \
-		(X86 || ARM || PPC || S390 || SPARC64 || SUPERH || MICROBLAZE || TILE)
+		(X86 || ARM || PPC || MIPS || S390 || SPARC64 || SUPERH || MICROBLAZE || TILE)
 
-	select DEBUG_FS if SYSFS
+	select DEBUG_FS
 	select STACKTRACE if STACKTRACE_SUPPORT
 	select KALLSYMS
 	select CRC32
@@ -411,11 +450,9 @@ config DEBUG_KMEMLEAK_EARLY_LOG_SIZE
 
 config DEBUG_KMEMLEAK_TEST
 	tristate "Simple test for the kernel memory leak detector"
-	depends on DEBUG_KMEMLEAK
+	depends on DEBUG_KMEMLEAK && m
 	help
-	  Say Y or M here to build a test for the kernel memory leak
-	  detector. This option enables a module that explicitly leaks
-	  memory.
+	  This option enables a module that explicitly leaks memory.
 
 	  If unsure, say N.
 
@@ -470,15 +507,6 @@ config DEBUG_MUTEXES
 	 This feature allows mutex semantics violations to be detected and
 	 reported.
 
-config BKL
-	bool "Big Kernel Lock" if (SMP || PREEMPT)
-	default y
-	help
-	  This is the traditional lock that is used in old code instead
-	  of proper locking. All drivers that use the BKL should depend
-	  on this symbol.
-	  Say Y here unless you are working on removing the BKL.
-
 config DEBUG_LOCK_ALLOC
 	bool "Lock debugging: detect incorrect freeing of live locks"
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
@@ -620,12 +648,15 @@ config TRACE_IRQFLAGS
 	  Enables hooks to interrupt enabling and disabling for
 	  either tracing or lock debugging.
 
-config DEBUG_SPINLOCK_SLEEP
-	bool "Spinlock debugging: sleep-inside-spinlock checking"
+config DEBUG_ATOMIC_SLEEP
+	bool "Sleep inside atomic section checking"
+	select PREEMPT_COUNT
 	depends on DEBUG_KERNEL
 	help
 	  If you say Y here, various routines which may sleep will become very
-	  noisy if they are called with a spinlock held.
+	  noisy if they are called inside atomic sections: when a spinlock is
+	  held, inside an rcu read side critical section, inside preempt disabled
+	  sections, inside an interrupt, etc...
 
 config DEBUG_LOCKING_API_SELFTESTS
 	bool "Locking API boot-time self-tests"
@@ -642,6 +673,15 @@ config STACKTRACE
 	bool
 	depends on STACKTRACE_SUPPORT
 
+config DEBUG_STACK_USAGE
+	bool "Stack utilization instrumentation"
+	depends on DEBUG_KERNEL
+	help
+	  Enables the display of the minimum amount of free stack which each
+	  task has ever had available in the sysrq-T and sysrq-P debug output.
+
+	  This option will slow down process creation somewhat.
+
 config DEBUG_KOBJECT
 	bool "kobject debugging"
 	depends on DEBUG_KERNEL
@@ -660,7 +700,7 @@ config DEBUG_BUGVERBOSE
 	bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EXPERT
 	depends on BUG
 	depends on ARM || AVR32 || M32R || M68K || SPARC32 || SPARC64 || \
-		   FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300
+		   FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300 || TILE
 	default y
 	help
 	  Say Y here to make BUG() panics output the file name and line number
@@ -829,7 +869,7 @@ config BOOT_PRINTK_DELAY
 	  system, and then set "lpj=M" before setting "boot_delay=N".
 	  NOTE:  Using this option may adversely affect SMP systems.
 	  I.e., processors other than the first one may not boot up.
-	  BOOT_PRINTK_DELAY also may cause DETECT_SOFTLOCKUP to detect
+	  BOOT_PRINTK_DELAY also may cause LOCKUP_DETECTOR to detect
 	  what it believes to be lockup conditions.
 
 config RCU_TORTURE_TEST
@@ -863,22 +903,9 @@ config RCU_TORTURE_TEST_RUNNABLE
 	  Say N here if you want the RCU torture tests to start only
 	  after being manually enabled via /proc.
 
-config RCU_CPU_STALL_DETECTOR
-	bool "Check for stalled CPUs delaying RCU grace periods"
-	depends on TREE_RCU || TREE_PREEMPT_RCU
-	default y
-	help
-	  This option causes RCU to printk information on which
-	  CPUs are delaying the current grace period, but only when
-	  the grace period extends for excessive time periods.
-
-	  Say N if you want to disable such checks.
-
-	  Say Y if you are unsure.
-
 config RCU_CPU_STALL_TIMEOUT
 	int "RCU CPU stall timeout in seconds"
-	depends on RCU_CPU_STALL_DETECTOR
+	depends on TREE_RCU || TREE_PREEMPT_RCU
 	range 3 300
 	default 60
 	help
@@ -887,22 +914,9 @@ config RCU_CPU_STALL_TIMEOUT
 	  RCU grace period persists, additional CPU stall warnings are
 	  printed at more widely spaced intervals.
 
-config RCU_CPU_STALL_DETECTOR_RUNNABLE
-	bool "RCU CPU stall checking starts automatically at boot"
-	depends on RCU_CPU_STALL_DETECTOR
-	default y
-	help
-	  If set, start checking for RCU CPU stalls immediately on
-	  boot.  Otherwise, RCU CPU stall checking must be manually
-	  enabled.
-
-	  Say Y if you are unsure.
-
-	  Say N if you wish to suppress RCU CPU stall checking during boot.
-
 config RCU_CPU_STALL_VERBOSE
 	bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR"
-	depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU
+	depends on TREE_PREEMPT_RCU
 	default y
 	help
 	  This option causes RCU to printk detailed per-task information
@@ -981,6 +995,17 @@ config DEBUG_FORCE_WEAK_PER_CPU
 	  To ensure that generic code follows the above rules, this
 	  option forces all percpu variables to be defined as weak.
 
+config DEBUG_PER_CPU_MAPS
+	bool "Debug access to per_cpu maps"
+	depends on DEBUG_KERNEL
+	depends on SMP
+	help
+	  Say Y to verify that the per_cpu map being accessed has
+	  been set up. This adds a fair amount of code to kernel memory
+	  and decreases performance.
+
+	  Say N if unsure.
+
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
 	depends on DEBUG_FS
@@ -1236,3 +1261,6 @@ source "samples/Kconfig"
 source "lib/Kconfig.kgdb"
 
 source "lib/Kconfig.kmemcheck"
+
+config TEST_KSTRTOX
+	tristate "Test kstrto*() family of functions at runtime"
diff --git a/lib/Makefile b/lib/Makefile
index cbb774f7d41d..d5d175c8a6ca 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -10,9 +10,9 @@ endif
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o prio_tree.o \
-	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
+	 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o prio_heap.o ratelimit.o show_mem.o \
-	 is_single_threaded.o plist.o decompress.o
+	 is_single_threaded.o plist.o decompress.o find_next_bit.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
@@ -21,7 +21,10 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o
+	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \
+	 bsearch.o find_last_bit.o
+obj-y += kstrtox.o
+obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
@@ -36,14 +39,10 @@ obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
-lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
-lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
-obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
 
 CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
-obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
 obj-$(CONFIG_BTREE) += btree.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
@@ -62,11 +61,13 @@ obj-$(CONFIG_CRC_ITU_T)	+= crc-itu-t.o
 obj-$(CONFIG_CRC32)	+= crc32.o
 obj-$(CONFIG_CRC7)	+= crc7.o
 obj-$(CONFIG_LIBCRC32C)	+= libcrc32c.o
+obj-$(CONFIG_CRC8)	+= crc8.o
 obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
 
 obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
 obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
 obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
+obj-$(CONFIG_BCH) += bch.o
 obj-$(CONFIG_LZO_COMPRESS) += lzo/
 obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
 obj-$(CONFIG_XZ_DEC) += xz/
@@ -110,6 +111,12 @@ obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
 
 obj-$(CONFIG_AVERAGE) += average.o
 
+obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
+
+obj-$(CONFIG_CORDIC) += cordic.o
+
+obj-$(CONFIG_LLIST) += llist.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/atomic64.c b/lib/atomic64.c
index a21c12bc727c..e12ae0dd08a8 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -14,7 +14,7 @@
 #include <linux/spinlock.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * We use a hashed array of spinlocks to provide exclusive access
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 44524cc8c32a..0c33cde2a1e6 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -10,7 +10,7 @@
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
 static __init int test_atomic64(void)
diff --git a/lib/audit.c b/lib/audit.c
index 8e7dc1c63aa9..76bbed4a20e5 100644
--- a/lib/audit.c
+++ b/lib/audit.c
@@ -36,8 +36,10 @@ int audit_classify_arch(int arch)
 int audit_classify_syscall(int abi, unsigned syscall)
 {
 	switch(syscall) {
+#ifdef __NR_open
 	case __NR_open:
 		return 2;
+#endif
 #ifdef __NR_openat
 	case __NR_openat:
 		return 3;
diff --git a/lib/bch.c b/lib/bch.c
new file mode 100644
index 000000000000..bc89dfe4d1b3
--- /dev/null
+++ b/lib/bch.c
@@ -0,0 +1,1368 @@
+/*
+ * Generic binary BCH encoding/decoding library
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Copyright © 2011 Parrot S.A.
+ *
+ * Author: Ivan Djelic <ivan.djelic@parrot.com>
+ *
+ * Description:
+ *
+ * This library provides runtime configurable encoding/decoding of binary
+ * Bose-Chaudhuri-Hocquenghem (BCH) codes.
+ *
+ * Call init_bch to get a pointer to a newly allocated bch_control structure for
+ * the given m (Galois field order), t (error correction capability) and
+ * (optional) primitive polynomial parameters.
+ *
+ * Call encode_bch to compute and store ecc parity bytes to a given buffer.
+ * Call decode_bch to detect and locate errors in received data.
+ *
+ * On systems supporting hw BCH features, intermediate results may be provided
+ * to decode_bch in order to skip certain steps. See decode_bch() documentation
+ * for details.
+ *
+ * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
+ * parameters m and t; thus allowing extra compiler optimizations and providing
+ * better (up to 2x) encoding performance. Using this option makes sense when
+ * (m,t) are fixed and known in advance, e.g. when using BCH error correction
+ * on a particular NAND flash device.
+ *
+ * Algorithmic details:
+ *
+ * Encoding is performed by processing 32 input bits in parallel, using 4
+ * remainder lookup tables.
+ *
+ * The final stage of decoding involves the following internal steps:
+ * a. Syndrome computation
+ * b. Error locator polynomial computation using Berlekamp-Massey algorithm
+ * c. Error locator root finding (by far the most expensive step)
+ *
+ * In this implementation, step c is not performed using the usual Chien search.
+ * Instead, an alternative approach described in [1] is used. It consists in
+ * factoring the error locator polynomial using the Berlekamp Trace algorithm
+ * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
+ * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
+ * much better performance than Chien search for usual (m,t) values (typically
+ * m >= 13, t < 32, see [1]).
+ *
+ * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
+ * of characteristic 2, in: Western European Workshop on Research in Cryptology
+ * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
+ * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
+ * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <asm/byteorder.h>
+#include <linux/bch.h>
+
+#if defined(CONFIG_BCH_CONST_PARAMS)
+#define GF_M(_p)               (CONFIG_BCH_CONST_M)
+#define GF_T(_p)               (CONFIG_BCH_CONST_T)
+#define GF_N(_p)               ((1 << (CONFIG_BCH_CONST_M))-1)
+#else
+#define GF_M(_p)               ((_p)->m)
+#define GF_T(_p)               ((_p)->t)
+#define GF_N(_p)               ((_p)->n)
+#endif
+
+#define BCH_ECC_WORDS(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
+#define BCH_ECC_BYTES(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
+
+#ifndef dbg
+#define dbg(_fmt, args...)     do {} while (0)
+#endif
+
+/*
+ * represent a polynomial over GF(2^m)
+ */
+struct gf_poly {
+	unsigned int deg;    /* polynomial degree */
+	unsigned int c[0];   /* polynomial terms */
+};
+
+/* given its degree, compute a polynomial size in bytes */
+#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
+
+/* polynomial of degree 1 */
+struct gf_poly_deg1 {
+	struct gf_poly poly;
+	unsigned int   c[2];
+};
+
+/*
+ * same as encode_bch(), but process input data one byte at a time
+ */
+static void encode_bch_unaligned(struct bch_control *bch,
+				 const unsigned char *data, unsigned int len,
+				 uint32_t *ecc)
+{
+	int i;
+	const uint32_t *p;
+	const int l = BCH_ECC_WORDS(bch)-1;
+
+	while (len--) {
+		p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(*data++)) & 0xff);
+
+		for (i = 0; i < l; i++)
+			ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
+
+		ecc[l] = (ecc[l] << 8)^(*p);
+	}
+}
+
+/*
+ * convert ecc bytes to aligned, zero-padded 32-bit ecc words
+ */
+static void load_ecc8(struct bch_control *bch, uint32_t *dst,
+		      const uint8_t *src)
+{
+	uint8_t pad[4] = {0, 0, 0, 0};
+	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
+
+	for (i = 0; i < nwords; i++, src += 4)
+		dst[i] = (src[0] << 24)|(src[1] << 16)|(src[2] << 8)|src[3];
+
+	memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
+	dst[nwords] = (pad[0] << 24)|(pad[1] << 16)|(pad[2] << 8)|pad[3];
+}
+
+/*
+ * convert 32-bit ecc words to ecc bytes
+ */
+static void store_ecc8(struct bch_control *bch, uint8_t *dst,
+		       const uint32_t *src)
+{
+	uint8_t pad[4];
+	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
+
+	for (i = 0; i < nwords; i++) {
+		*dst++ = (src[i] >> 24);
+		*dst++ = (src[i] >> 16) & 0xff;
+		*dst++ = (src[i] >>  8) & 0xff;
+		*dst++ = (src[i] >>  0) & 0xff;
+	}
+	pad[0] = (src[nwords] >> 24);
+	pad[1] = (src[nwords] >> 16) & 0xff;
+	pad[2] = (src[nwords] >>  8) & 0xff;
+	pad[3] = (src[nwords] >>  0) & 0xff;
+	memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
+}
+
+/**
+ * encode_bch - calculate BCH ecc parity of data
+ * @bch:   BCH control structure
+ * @data:  data to encode
+ * @len:   data length in bytes
+ * @ecc:   ecc parity data, must be initialized by caller
+ *
+ * The @ecc parity array is used both as input and output parameter, in order to
+ * allow incremental computations. It should be of the size indicated by member
+ * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
+ *
+ * The exact number of computed ecc parity bits is given by member @ecc_bits of
+ * @bch; it may be less than m*t for large values of t.
+ */
+void encode_bch(struct bch_control *bch, const uint8_t *data,
+		unsigned int len, uint8_t *ecc)
+{
+	const unsigned int l = BCH_ECC_WORDS(bch)-1;
+	unsigned int i, mlen;
+	unsigned long m;
+	uint32_t w, r[l+1];
+	const uint32_t * const tab0 = bch->mod8_tab;
+	const uint32_t * const tab1 = tab0 + 256*(l+1);
+	const uint32_t * const tab2 = tab1 + 256*(l+1);
+	const uint32_t * const tab3 = tab2 + 256*(l+1);
+	const uint32_t *pdata, *p0, *p1, *p2, *p3;
+
+	if (ecc) {
+		/* load ecc parity bytes into internal 32-bit buffer */
+		load_ecc8(bch, bch->ecc_buf, ecc);
+	} else {
+		memset(bch->ecc_buf, 0, sizeof(r));
+	}
+
+	/* process first unaligned data bytes */
+	m = ((unsigned long)data) & 3;
+	if (m) {
+		mlen = (len < (4-m)) ? len : 4-m;
+		encode_bch_unaligned(bch, data, mlen, bch->ecc_buf);
+		data += mlen;
+		len  -= mlen;
+	}
+
+	/* process 32-bit aligned data words */
+	pdata = (uint32_t *)data;
+	mlen  = len/4;
+	data += 4*mlen;
+	len  -= 4*mlen;
+	memcpy(r, bch->ecc_buf, sizeof(r));
+
+	/*
+	 * split each 32-bit word into 4 polynomials of weight 8 as follows:
+	 *
+	 * 31 ...24  23 ...16  15 ... 8  7 ... 0
+	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt
+	 *                               tttttttt  mod g = r0 (precomputed)
+	 *                     zzzzzzzz  00000000  mod g = r1 (precomputed)
+	 *           yyyyyyyy  00000000  00000000  mod g = r2 (precomputed)
+	 * xxxxxxxx  00000000  00000000  00000000  mod g = r3 (precomputed)
+	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt  mod g = r0^r1^r2^r3
+	 */
+	while (mlen--) {
+		/* input data is read in big-endian format */
+		w = r[0]^cpu_to_be32(*pdata++);
+		p0 = tab0 + (l+1)*((w >>  0) & 0xff);
+		p1 = tab1 + (l+1)*((w >>  8) & 0xff);
+		p2 = tab2 + (l+1)*((w >> 16) & 0xff);
+		p3 = tab3 + (l+1)*((w >> 24) & 0xff);
+
+		for (i = 0; i < l; i++)
+			r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
+
+		r[l] = p0[l]^p1[l]^p2[l]^p3[l];
+	}
+	memcpy(bch->ecc_buf, r, sizeof(r));
+
+	/* process last unaligned bytes */
+	if (len)
+		encode_bch_unaligned(bch, data, len, bch->ecc_buf);
+
+	/* store ecc parity bytes into original parity buffer */
+	if (ecc)
+		store_ecc8(bch, ecc, bch->ecc_buf);
+}
+EXPORT_SYMBOL_GPL(encode_bch);
+
+static inline int modulo(struct bch_control *bch, unsigned int v)
+{
+	const unsigned int n = GF_N(bch);
+	while (v >= n) {
+		v -= n;
+		v = (v & n) + (v >> GF_M(bch));
+	}
+	return v;
+}
+
+/*
+ * shorter and faster modulo function, only works when v < 2N.
+ */
+static inline int mod_s(struct bch_control *bch, unsigned int v)
+{
+	const unsigned int n = GF_N(bch);
+	return (v < n) ? v : v-n;
+}
+
+static inline int deg(unsigned int poly)
+{
+	/* polynomial degree is the most-significant bit index */
+	return fls(poly)-1;
+}
+
+static inline int parity(unsigned int x)
+{
+	/*
+	 * public domain code snippet, lifted from
+	 * http://www-graphics.stanford.edu/~seander/bithacks.html
+	 */
+	x ^= x >> 1;
+	x ^= x >> 2;
+	x = (x & 0x11111111U) * 0x11111111U;
+	return (x >> 28) & 1;
+}
+
+/* Galois field basic operations: multiply, divide, inverse, etc. */
+
+static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
+				  unsigned int b)
+{
+	return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
+					       bch->a_log_tab[b])] : 0;
+}
+
+static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
+{
+	return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
+}
+
+static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
+				  unsigned int b)
+{
+	return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
+					GF_N(bch)-bch->a_log_tab[b])] : 0;
+}
+
+static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
+{
+	return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
+}
+
+static inline unsigned int a_pow(struct bch_control *bch, int i)
+{
+	return bch->a_pow_tab[modulo(bch, i)];
+}
+
+static inline int a_log(struct bch_control *bch, unsigned int x)
+{
+	return bch->a_log_tab[x];
+}
+
+static inline int a_ilog(struct bch_control *bch, unsigned int x)
+{
+	return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
+}
+
+/*
+ * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
+ */
+static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
+			      unsigned int *syn)
+{
+	int i, j, s;
+	unsigned int m;
+	uint32_t poly;
+	const int t = GF_T(bch);
+
+	s = bch->ecc_bits;
+
+	/* make sure extra bits in last ecc word are cleared */
+	m = ((unsigned int)s) & 31;
+	if (m)
+		ecc[s/32] &= ~((1u << (32-m))-1);
+	memset(syn, 0, 2*t*sizeof(*syn));
+
+	/* compute v(a^j) for j=1 .. 2t-1 */
+	do {
+		poly = *ecc++;
+		s -= 32;
+		while (poly) {
+			i = deg(poly);
+			for (j = 0; j < 2*t; j += 2)
+				syn[j] ^= a_pow(bch, (j+1)*(i+s));
+
+			poly ^= (1 << i);
+		}
+	} while (s > 0);
+
+	/* v(a^(2j)) = v(a^j)^2 */
+	for (j = 0; j < t; j++)
+		syn[2*j+1] = gf_sqr(bch, syn[j]);
+}
+
+static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
+{
+	memcpy(dst, src, GF_POLY_SZ(src->deg));
+}
+
+static int compute_error_locator_polynomial(struct bch_control *bch,
+					    const unsigned int *syn)
+{
+	const unsigned int t = GF_T(bch);
+	const unsigned int n = GF_N(bch);
+	unsigned int i, j, tmp, l, pd = 1, d = syn[0];
+	struct gf_poly *elp = bch->elp;
+	struct gf_poly *pelp = bch->poly_2t[0];
+	struct gf_poly *elp_copy = bch->poly_2t[1];
+	int k, pp = -1;
+
+	memset(pelp, 0, GF_POLY_SZ(2*t));
+	memset(elp, 0, GF_POLY_SZ(2*t));
+
+	pelp->deg = 0;
+	pelp->c[0] = 1;
+	elp->deg = 0;
+	elp->c[0] = 1;
+
+	/* use simplified binary Berlekamp-Massey algorithm */
+	for (i = 0; (i < t) && (elp->deg <= t); i++) {
+		if (d) {
+			k = 2*i-pp;
+			gf_poly_copy(elp_copy, elp);
+			/* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
+			tmp = a_log(bch, d)+n-a_log(bch, pd);
+			for (j = 0; j <= pelp->deg; j++) {
+				if (pelp->c[j]) {
+					l = a_log(bch, pelp->c[j]);
+					elp->c[j+k] ^= a_pow(bch, tmp+l);
+				}
+			}
+			/* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
+			tmp = pelp->deg+k;
+			if (tmp > elp->deg) {
+				elp->deg = tmp;
+				gf_poly_copy(pelp, elp_copy);
+				pd = d;
+				pp = 2*i;
+			}
+		}
+		/* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
+		if (i < t-1) {
+			d = syn[2*i+2];
+			for (j = 1; j <= elp->deg; j++)
+				d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
+		}
+	}
+	dbg("elp=%s\n", gf_poly_str(elp));
+	return (elp->deg > t) ? -1 : (int)elp->deg;
+}
+
+/*
+ * solve a m x m linear system in GF(2) with an expected number of solutions,
+ * and return the number of found solutions
+ */
+static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
+			       unsigned int *sol, int nsol)
+{
+	const int m = GF_M(bch);
+	unsigned int tmp, mask;
+	int rem, c, r, p, k, param[m];
+
+	k = 0;
+	mask = 1 << m;
+
+	/* Gaussian elimination */
+	for (c = 0; c < m; c++) {
+		rem = 0;
+		p = c-k;
+		/* find suitable row for elimination */
+		for (r = p; r < m; r++) {
+			if (rows[r] & mask) {
+				if (r != p) {
+					tmp = rows[r];
+					rows[r] = rows[p];
+					rows[p] = tmp;
+				}
+				rem = r+1;
+				break;
+			}
+		}
+		if (rem) {
+			/* perform elimination on remaining rows */
+			tmp = rows[p];
+			for (r = rem; r < m; r++) {
+				if (rows[r] & mask)
+					rows[r] ^= tmp;
+			}
+		} else {
+			/* elimination not needed, store defective row index */
+			param[k++] = c;
+		}
+		mask >>= 1;
+	}
+	/* rewrite system, inserting fake parameter rows */
+	if (k > 0) {
+		p = k;
+		for (r = m-1; r >= 0; r--) {
+			if ((r > m-1-k) && rows[r])
+				/* system has no solution */
+				return 0;
+
+			rows[r] = (p && (r == param[p-1])) ?
+				p--, 1u << (m-r) : rows[r-p];
+		}
+	}
+
+	if (nsol != (1 << k))
+		/* unexpected number of solutions */
+		return 0;
+
+	for (p = 0; p < nsol; p++) {
+		/* set parameters for p-th solution */
+		for (c = 0; c < k; c++)
+			rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
+
+		/* compute unique solution */
+		tmp = 0;
+		for (r = m-1; r >= 0; r--) {
+			mask = rows[r] & (tmp|1);
+			tmp |= parity(mask) << (m-r);
+		}
+		sol[p] = tmp >> 1;
+	}
+	return nsol;
+}
+
+/*
+ * this function builds and solves a linear system for finding roots of a degree
+ * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
+ */
+static int find_affine4_roots(struct bch_control *bch, unsigned int a,
+			      unsigned int b, unsigned int c,
+			      unsigned int *roots)
+{
+	int i, j, k;
+	const int m = GF_M(bch);
+	unsigned int mask = 0xff, t, rows[16] = {0,};
+
+	j = a_log(bch, b);
+	k = a_log(bch, a);
+	rows[0] = c;
+
+	/* buid linear system to solve X^4+aX^2+bX+c = 0 */
+	for (i = 0; i < m; i++) {
+		rows[i+1] = bch->a_pow_tab[4*i]^
+			(a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
+			(b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
+		j++;
+		k += 2;
+	}
+	/*
+	 * transpose 16x16 matrix before passing it to linear solver
+	 * warning: this code assumes m < 16
+	 */
+	for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
+		for (k = 0; k < 16; k = (k+j+1) & ~j) {
+			t = ((rows[k] >> j)^rows[k+j]) & mask;
+			rows[k] ^= (t << j);
+			rows[k+j] ^= t;
+		}
+	}
+	return solve_linear_system(bch, rows, roots, 4);
+}
+
+/*
+ * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
+ */
+static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
+				unsigned int *roots)
+{
+	int n = 0;
+
+	if (poly->c[0])
+		/* poly[X] = bX+c with c!=0, root=c/b */
+		roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
+				   bch->a_log_tab[poly->c[1]]);
+	return n;
+}
+
+/*
+ * compute roots of a degree 2 polynomial over GF(2^m)
+ */
+static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
+				unsigned int *roots)
+{
+	int n = 0, i, l0, l1, l2;
+	unsigned int u, v, r;
+
+	if (poly->c[0] && poly->c[1]) {
+
+		l0 = bch->a_log_tab[poly->c[0]];
+		l1 = bch->a_log_tab[poly->c[1]];
+		l2 = bch->a_log_tab[poly->c[2]];
+
+		/* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
+		u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
+		/*
+		 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
+		 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
+		 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
+		 * i.e. r and r+1 are roots iff Tr(u)=0
+		 */
+		r = 0;
+		v = u;
+		while (v) {
+			i = deg(v);
+			r ^= bch->xi_tab[i];
+			v ^= (1 << i);
+		}
+		/* verify root */
+		if ((gf_sqr(bch, r)^r) == u) {
+			/* reverse z=a/bX transformation and compute log(1/r) */
+			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
+					    bch->a_log_tab[r]+l2);
+			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
+					    bch->a_log_tab[r^1]+l2);
+		}
+	}
+	return n;
+}
+
+/*
+ * compute roots of a degree 3 polynomial over GF(2^m)
+ */
+static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
+				unsigned int *roots)
+{
+	int i, n = 0;
+	unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
+
+	if (poly->c[0]) {
+		/* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
+		e3 = poly->c[3];
+		c2 = gf_div(bch, poly->c[0], e3);
+		b2 = gf_div(bch, poly->c[1], e3);
+		a2 = gf_div(bch, poly->c[2], e3);
+
+		/* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
+		c = gf_mul(bch, a2, c2);           /* c = a2c2      */
+		b = gf_mul(bch, a2, b2)^c2;        /* b = a2b2 + c2 */
+		a = gf_sqr(bch, a2)^b2;            /* a = a2^2 + b2 */
+
+		/* find the 4 roots of this affine polynomial */
+		if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
+			/* remove a2 from final list of roots */
+			for (i = 0; i < 4; i++) {
+				if (tmp[i] != a2)
+					roots[n++] = a_ilog(bch, tmp[i]);
+			}
+		}
+	}
+	return n;
+}
+
+/*
+ * compute roots of a degree 4 polynomial over GF(2^m)
+ */
+static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
+				unsigned int *roots)
+{
+	int i, l, n = 0;
+	unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
+
+	if (poly->c[0] == 0)
+		return 0;
+
+	/* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
+	e4 = poly->c[4];
+	d = gf_div(bch, poly->c[0], e4);
+	c = gf_div(bch, poly->c[1], e4);
+	b = gf_div(bch, poly->c[2], e4);
+	a = gf_div(bch, poly->c[3], e4);
+
+	/* use Y=1/X transformation to get an affine polynomial */
+	if (a) {
+		/* first, eliminate cX by using z=X+e with ae^2+c=0 */
+		if (c) {
+			/* compute e such that e^2 = c/a */
+			f = gf_div(bch, c, a);
+			l = a_log(bch, f);
+			l += (l & 1) ? GF_N(bch) : 0;
+			e = a_pow(bch, l/2);
+			/*
+			 * use transformation z=X+e:
+			 * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
+			 * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
+			 * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
+			 * z^4 + az^3 +     b'z^2 + d'
+			 */
+			d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
+			b = gf_mul(bch, a, e)^b;
+		}
+		/* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
+		if (d == 0)
+			/* assume all roots have multiplicity 1 */
+			return 0;
+
+		c2 = gf_inv(bch, d);
+		b2 = gf_div(bch, a, d);
+		a2 = gf_div(bch, b, d);
+	} else {
+		/* polynomial is already affine */
+		c2 = d;
+		b2 = c;
+		a2 = b;
+	}
+	/* find the 4 roots of this affine polynomial */
+	if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
+		for (i = 0; i < 4; i++) {
+			/* post-process roots (reverse transformations) */
+			f = a ? gf_inv(bch, roots[i]) : roots[i];
+			roots[i] = a_ilog(bch, f^e);
+		}
+		n = 4;
+	}
+	return n;
+}
+
+/*
+ * build monic, log-based representation of a polynomial
+ */
+static void gf_poly_logrep(struct bch_control *bch,
+			   const struct gf_poly *a, int *rep)
+{
+	int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
+
+	/* represent 0 values with -1; warning, rep[d] is not set to 1 */
+	for (i = 0; i < d; i++)
+		rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
+}
+
+/*
+ * compute polynomial Euclidean division remainder in GF(2^m)[X]
+ */
+static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
+			const struct gf_poly *b, int *rep)
+{
+	int la, p, m;
+	unsigned int i, j, *c = a->c;
+	const unsigned int d = b->deg;
+
+	if (a->deg < d)
+		return;
+
+	/* reuse or compute log representation of denominator */
+	if (!rep) {
+		rep = bch->cache;
+		gf_poly_logrep(bch, b, rep);
+	}
+
+	for (j = a->deg; j >= d; j--) {
+		if (c[j]) {
+			la = a_log(bch, c[j]);
+			p = j-d;
+			for (i = 0; i < d; i++, p++) {
+				m = rep[i];
+				if (m >= 0)
+					c[p] ^= bch->a_pow_tab[mod_s(bch,
+								     m+la)];
+			}
+		}
+	}
+	a->deg = d-1;
+	while (!c[a->deg] && a->deg)
+		a->deg--;
+}
+
+/*
+ * compute polynomial Euclidean division quotient in GF(2^m)[X]
+ */
+static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
+			const struct gf_poly *b, struct gf_poly *q)
+{
+	if (a->deg >= b->deg) {
+		q->deg = a->deg-b->deg;
+		/* compute a mod b (modifies a) */
+		gf_poly_mod(bch, a, b, NULL);
+		/* quotient is stored in upper part of polynomial a */
+		memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
+	} else {
+		q->deg = 0;
+		q->c[0] = 0;
+	}
+}
+
+/*
+ * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
+ */
+static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
+				   struct gf_poly *b)
+{
+	struct gf_poly *tmp;
+
+	dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
+
+	if (a->deg < b->deg) {
+		tmp = b;
+		b = a;
+		a = tmp;
+	}
+
+	while (b->deg > 0) {
+		gf_poly_mod(bch, a, b, NULL);
+		tmp = b;
+		b = a;
+		a = tmp;
+	}
+
+	dbg("%s\n", gf_poly_str(a));
+
+	return a;
+}
+
+/*
+ * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
+ * This is used in Berlekamp Trace algorithm for splitting polynomials
+ */
+static void compute_trace_bk_mod(struct bch_control *bch, int k,
+				 const struct gf_poly *f, struct gf_poly *z,
+				 struct gf_poly *out)
+{
+	const int m = GF_M(bch);
+	int i, j;
+
+	/* z contains z^2j mod f */
+	z->deg = 1;
+	z->c[0] = 0;
+	z->c[1] = bch->a_pow_tab[k];
+
+	out->deg = 0;
+	memset(out, 0, GF_POLY_SZ(f->deg));
+
+	/* compute f log representation only once */
+	gf_poly_logrep(bch, f, bch->cache);
+
+	for (i = 0; i < m; i++) {
+		/* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
+		for (j = z->deg; j >= 0; j--) {
+			out->c[j] ^= z->c[j];
+			z->c[2*j] = gf_sqr(bch, z->c[j]);
+			z->c[2*j+1] = 0;
+		}
+		if (z->deg > out->deg)
+			out->deg = z->deg;
+
+		if (i < m-1) {
+			z->deg *= 2;
+			/* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
+			gf_poly_mod(bch, z, f, bch->cache);
+		}
+	}
+	while (!out->c[out->deg] && out->deg)
+		out->deg--;
+
+	dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
+}
+
+/*
+ * factor a polynomial using Berlekamp Trace algorithm (BTA)
+ */
+static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
+			      struct gf_poly **g, struct gf_poly **h)
+{
+	struct gf_poly *f2 = bch->poly_2t[0];
+	struct gf_poly *q  = bch->poly_2t[1];
+	struct gf_poly *tk = bch->poly_2t[2];
+	struct gf_poly *z  = bch->poly_2t[3];
+	struct gf_poly *gcd;
+
+	dbg("factoring %s...\n", gf_poly_str(f));
+
+	*g = f;
+	*h = NULL;
+
+	/* tk = Tr(a^k.X) mod f */
+	compute_trace_bk_mod(bch, k, f, z, tk);
+
+	if (tk->deg > 0) {
+		/* compute g = gcd(f, tk) (destructive operation) */
+		gf_poly_copy(f2, f);
+		gcd = gf_poly_gcd(bch, f2, tk);
+		if (gcd->deg < f->deg) {
+			/* compute h=f/gcd(f,tk); this will modify f and q */
+			gf_poly_div(bch, f, gcd, q);
+			/* store g and h in-place (clobbering f) */
+			*h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
+			gf_poly_copy(*g, gcd);
+			gf_poly_copy(*h, q);
+		}
+	}
+}
+
+/*
+ * find roots of a polynomial, using BTZ algorithm; see the beginning of this
+ * file for details
+ */
+static int find_poly_roots(struct bch_control *bch, unsigned int k,
+			   struct gf_poly *poly, unsigned int *roots)
+{
+	int cnt;
+	struct gf_poly *f1, *f2;
+
+	switch (poly->deg) {
+		/* handle low degree polynomials with ad hoc techniques */
+	case 1:
+		cnt = find_poly_deg1_roots(bch, poly, roots);
+		break;
+	case 2:
+		cnt = find_poly_deg2_roots(bch, poly, roots);
+		break;
+	case 3:
+		cnt = find_poly_deg3_roots(bch, poly, roots);
+		break;
+	case 4:
+		cnt = find_poly_deg4_roots(bch, poly, roots);
+		break;
+	default:
+		/* factor polynomial using Berlekamp Trace Algorithm (BTA) */
+		cnt = 0;
+		if (poly->deg && (k <= GF_M(bch))) {
+			factor_polynomial(bch, k, poly, &f1, &f2);
+			if (f1)
+				cnt += find_poly_roots(bch, k+1, f1, roots);
+			if (f2)
+				cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
+		}
+		break;
+	}
+	return cnt;
+}
+
+#if defined(USE_CHIEN_SEARCH)
+/*
+ * exhaustive root search (Chien) implementation - not used, included only for
+ * reference/comparison tests
+ */
+static int chien_search(struct bch_control *bch, unsigned int len,
+			struct gf_poly *p, unsigned int *roots)
+{
+	int m;
+	unsigned int i, j, syn, syn0, count = 0;
+	const unsigned int k = 8*len+bch->ecc_bits;
+
+	/* use a log-based representation of polynomial */
+	gf_poly_logrep(bch, p, bch->cache);
+	bch->cache[p->deg] = 0;
+	syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
+
+	for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
+		/* compute elp(a^i) */
+		for (j = 1, syn = syn0; j <= p->deg; j++) {
+			m = bch->cache[j];
+			if (m >= 0)
+				syn ^= a_pow(bch, m+j*i);
+		}
+		if (syn == 0) {
+			roots[count++] = GF_N(bch)-i;
+			if (count == p->deg)
+				break;
+		}
+	}
+	return (count == p->deg) ? count : 0;
+}
+#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
+#endif /* USE_CHIEN_SEARCH */
+
+/**
+ * decode_bch - decode received codeword and find bit error locations
+ * @bch:      BCH control structure
+ * @data:     received data, ignored if @calc_ecc is provided
+ * @len:      data length in bytes, must always be provided
+ * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
+ * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
+ * @syn:      hw computed syndrome data (if NULL, syndrome is calculated)
+ * @errloc:   output array of error locations
+ *
+ * Returns:
+ *  The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
+ *  invalid parameters were provided
+ *
+ * Depending on the available hw BCH support and the need to compute @calc_ecc
+ * separately (using encode_bch()), this function should be called with one of
+ * the following parameter configurations -
+ *
+ * by providing @data and @recv_ecc only:
+ *   decode_bch(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
+ *
+ * by providing @recv_ecc and @calc_ecc:
+ *   decode_bch(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
+ *
+ * by providing ecc = recv_ecc XOR calc_ecc:
+ *   decode_bch(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
+ *
+ * by providing syndrome results @syn:
+ *   decode_bch(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
+ *
+ * Once decode_bch() has successfully returned with a positive value, error
+ * locations returned in array @errloc should be interpreted as follows -
+ *
+ * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
+ * data correction)
+ *
+ * if (errloc[n] < 8*len), then n-th error is located in data and can be
+ * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
+ *
+ * Note that this function does not perform any data correction by itself, it
+ * merely indicates error locations.
+ */
+int decode_bch(struct bch_control *bch, const uint8_t *data, unsigned int len,
+	       const uint8_t *recv_ecc, const uint8_t *calc_ecc,
+	       const unsigned int *syn, unsigned int *errloc)
+{
+	const unsigned int ecc_words = BCH_ECC_WORDS(bch);
+	unsigned int nbits;
+	int i, err, nroots;
+	uint32_t sum;
+
+	/* sanity check: make sure data length can be handled */
+	if (8*len > (bch->n-bch->ecc_bits))
+		return -EINVAL;
+
+	/* if caller does not provide syndromes, compute them */
+	if (!syn) {
+		if (!calc_ecc) {
+			/* compute received data ecc into an internal buffer */
+			if (!data || !recv_ecc)
+				return -EINVAL;
+			encode_bch(bch, data, len, NULL);
+		} else {
+			/* load provided calculated ecc */
+			load_ecc8(bch, bch->ecc_buf, calc_ecc);
+		}
+		/* load received ecc or assume it was XORed in calc_ecc */
+		if (recv_ecc) {
+			load_ecc8(bch, bch->ecc_buf2, recv_ecc);
+			/* XOR received and calculated ecc */
+			for (i = 0, sum = 0; i < (int)ecc_words; i++) {
+				bch->ecc_buf[i] ^= bch->ecc_buf2[i];
+				sum |= bch->ecc_buf[i];
+			}
+			if (!sum)
+				/* no error found */
+				return 0;
+		}
+		compute_syndromes(bch, bch->ecc_buf, bch->syn);
+		syn = bch->syn;
+	}
+
+	err = compute_error_locator_polynomial(bch, syn);
+	if (err > 0) {
+		nroots = find_poly_roots(bch, 1, bch->elp, errloc);
+		if (err != nroots)
+			err = -1;
+	}
+	if (err > 0) {
+		/* post-process raw error locations for easier correction */
+		nbits = (len*8)+bch->ecc_bits;
+		for (i = 0; i < err; i++) {
+			if (errloc[i] >= nbits) {
+				err = -1;
+				break;
+			}
+			errloc[i] = nbits-1-errloc[i];
+			errloc[i] = (errloc[i] & ~7)|(7-(errloc[i] & 7));
+		}
+	}
+	return (err >= 0) ? err : -EBADMSG;
+}
+EXPORT_SYMBOL_GPL(decode_bch);
+
+/*
+ * generate Galois field lookup tables
+ */
+static int build_gf_tables(struct bch_control *bch, unsigned int poly)
+{
+	unsigned int i, x = 1;
+	const unsigned int k = 1 << deg(poly);
+
+	/* primitive polynomial must be of degree m */
+	if (k != (1u << GF_M(bch)))
+		return -1;
+
+	for (i = 0; i < GF_N(bch); i++) {
+		bch->a_pow_tab[i] = x;
+		bch->a_log_tab[x] = i;
+		if (i && (x == 1))
+			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
+			return -1;
+		x <<= 1;
+		if (x & k)
+			x ^= poly;
+	}
+	bch->a_pow_tab[GF_N(bch)] = 1;
+	bch->a_log_tab[0] = 0;
+
+	return 0;
+}
+
+/*
+ * compute generator polynomial remainder tables for fast encoding
+ */
+static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
+{
+	int i, j, b, d;
+	uint32_t data, hi, lo, *tab;
+	const int l = BCH_ECC_WORDS(bch);
+	const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
+	const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
+
+	memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
+
+	for (i = 0; i < 256; i++) {
+		/* p(X)=i is a small polynomial of weight <= 8 */
+		for (b = 0; b < 4; b++) {
+			/* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
+			tab = bch->mod8_tab + (b*256+i)*l;
+			data = i << (8*b);
+			while (data) {
+				d = deg(data);
+				/* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
+				data ^= g[0] >> (31-d);
+				for (j = 0; j < ecclen; j++) {
+					hi = (d < 31) ? g[j] << (d+1) : 0;
+					lo = (j+1 < plen) ?
+						g[j+1] >> (31-d) : 0;
+					tab[j] ^= hi|lo;
+				}
+			}
+		}
+	}
+}
+
+/*
+ * build a base for factoring degree 2 polynomials
+ */
+static int build_deg2_base(struct bch_control *bch)
+{
+	const int m = GF_M(bch);
+	int i, j, r;
+	unsigned int sum, x, y, remaining, ak = 0, xi[m];
+
+	/* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
+	for (i = 0; i < m; i++) {
+		for (j = 0, sum = 0; j < m; j++)
+			sum ^= a_pow(bch, i*(1 << j));
+
+		if (sum) {
+			ak = bch->a_pow_tab[i];
+			break;
+		}
+	}
+	/* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
+	remaining = m;
+	memset(xi, 0, sizeof(xi));
+
+	for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
+		y = gf_sqr(bch, x)^x;
+		for (i = 0; i < 2; i++) {
+			r = a_log(bch, y);
+			if (y && (r < m) && !xi[r]) {
+				bch->xi_tab[r] = x;
+				xi[r] = 1;
+				remaining--;
+				dbg("x%d = %x\n", r, x);
+				break;
+			}
+			y ^= ak;
+		}
+	}
+	/* should not happen but check anyway */
+	return remaining ? -1 : 0;
+}
+
+static void *bch_alloc(size_t size, int *err)
+{
+	void *ptr;
+
+	ptr = kmalloc(size, GFP_KERNEL);
+	if (ptr == NULL)
+		*err = 1;
+	return ptr;
+}
+
+/*
+ * compute generator polynomial for given (m,t) parameters.
+ */
+static uint32_t *compute_generator_polynomial(struct bch_control *bch)
+{
+	const unsigned int m = GF_M(bch);
+	const unsigned int t = GF_T(bch);
+	int n, err = 0;
+	unsigned int i, j, nbits, r, word, *roots;
+	struct gf_poly *g;
+	uint32_t *genpoly;
+
+	g = bch_alloc(GF_POLY_SZ(m*t), &err);
+	roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
+	genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
+
+	if (err) {
+		kfree(genpoly);
+		genpoly = NULL;
+		goto finish;
+	}
+
+	/* enumerate all roots of g(X) */
+	memset(roots , 0, (bch->n+1)*sizeof(*roots));
+	for (i = 0; i < t; i++) {
+		for (j = 0, r = 2*i+1; j < m; j++) {
+			roots[r] = 1;
+			r = mod_s(bch, 2*r);
+		}
+	}
+	/* build generator polynomial g(X) */
+	g->deg = 0;
+	g->c[0] = 1;
+	for (i = 0; i < GF_N(bch); i++) {
+		if (roots[i]) {
+			/* multiply g(X) by (X+root) */
+			r = bch->a_pow_tab[i];
+			g->c[g->deg+1] = 1;
+			for (j = g->deg; j > 0; j--)
+				g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
+
+			g->c[0] = gf_mul(bch, g->c[0], r);
+			g->deg++;
+		}
+	}
+	/* store left-justified binary representation of g(X) */
+	n = g->deg+1;
+	i = 0;
+
+	while (n > 0) {
+		nbits = (n > 32) ? 32 : n;
+		for (j = 0, word = 0; j < nbits; j++) {
+			if (g->c[n-1-j])
+				word |= 1u << (31-j);
+		}
+		genpoly[i++] = word;
+		n -= nbits;
+	}
+	bch->ecc_bits = g->deg;
+
+finish:
+	kfree(g);
+	kfree(roots);
+
+	return genpoly;
+}
+
+/**
+ * init_bch - initialize a BCH encoder/decoder
+ * @m:          Galois field order, should be in the range 5-15
+ * @t:          maximum error correction capability, in bits
+ * @prim_poly:  user-provided primitive polynomial (or 0 to use default)
+ *
+ * Returns:
+ *  a newly allocated BCH control structure if successful, NULL otherwise
+ *
+ * This initialization can take some time, as lookup tables are built for fast
+ * encoding/decoding; make sure not to call this function from a time critical
+ * path. Usually, init_bch() should be called on module/driver init and
+ * free_bch() should be called to release memory on exit.
+ *
+ * You may provide your own primitive polynomial of degree @m in argument
+ * @prim_poly, or let init_bch() use its default polynomial.
+ *
+ * Once init_bch() has successfully returned a pointer to a newly allocated
+ * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
+ * the structure.
+ */
+struct bch_control *init_bch(int m, int t, unsigned int prim_poly)
+{
+	int err = 0;
+	unsigned int i, words;
+	uint32_t *genpoly;
+	struct bch_control *bch = NULL;
+
+	const int min_m = 5;
+	const int max_m = 15;
+
+	/* default primitive polynomials */
+	static const unsigned int prim_poly_tab[] = {
+		0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
+		0x402b, 0x8003,
+	};
+
+#if defined(CONFIG_BCH_CONST_PARAMS)
+	if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
+		printk(KERN_ERR "bch encoder/decoder was configured to support "
+		       "parameters m=%d, t=%d only!\n",
+		       CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
+		goto fail;
+	}
+#endif
+	if ((m < min_m) || (m > max_m))
+		/*
+		 * values of m greater than 15 are not currently supported;
+		 * supporting m > 15 would require changing table base type
+		 * (uint16_t) and a small patch in matrix transposition
+		 */
+		goto fail;
+
+	/* sanity checks */
+	if ((t < 1) || (m*t >= ((1 << m)-1)))
+		/* invalid t value */
+		goto fail;
+
+	/* select a primitive polynomial for generating GF(2^m) */
+	if (prim_poly == 0)
+		prim_poly = prim_poly_tab[m-min_m];
+
+	bch = kzalloc(sizeof(*bch), GFP_KERNEL);
+	if (bch == NULL)
+		goto fail;
+
+	bch->m = m;
+	bch->t = t;
+	bch->n = (1 << m)-1;
+	words  = DIV_ROUND_UP(m*t, 32);
+	bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
+	bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
+	bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
+	bch->mod8_tab  = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
+	bch->ecc_buf   = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
+	bch->ecc_buf2  = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
+	bch->xi_tab    = bch_alloc(m*sizeof(*bch->xi_tab), &err);
+	bch->syn       = bch_alloc(2*t*sizeof(*bch->syn), &err);
+	bch->cache     = bch_alloc(2*t*sizeof(*bch->cache), &err);
+	bch->elp       = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
+
+	for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
+		bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
+
+	if (err)
+		goto fail;
+
+	err = build_gf_tables(bch, prim_poly);
+	if (err)
+		goto fail;
+
+	/* use generator polynomial for computing encoding tables */
+	genpoly = compute_generator_polynomial(bch);
+	if (genpoly == NULL)
+		goto fail;
+
+	build_mod8_tables(bch, genpoly);
+	kfree(genpoly);
+
+	err = build_deg2_base(bch);
+	if (err)
+		goto fail;
+
+	return bch;
+
+fail:
+	free_bch(bch);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(init_bch);
+
+/**
+ *  free_bch - free the BCH control structure
+ *  @bch:    BCH control structure to release
+ */
+void free_bch(struct bch_control *bch)
+{
+	unsigned int i;
+
+	if (bch) {
+		kfree(bch->a_pow_tab);
+		kfree(bch->a_log_tab);
+		kfree(bch->mod8_tab);
+		kfree(bch->ecc_buf);
+		kfree(bch->ecc_buf2);
+		kfree(bch->xi_tab);
+		kfree(bch->syn);
+		kfree(bch->cache);
+		kfree(bch->elp);
+
+		for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
+			kfree(bch->poly_2t[i]);
+
+		kfree(bch);
+	}
+}
+EXPORT_SYMBOL_GPL(free_bch);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
+MODULE_DESCRIPTION("Binary BCH encoder/decoder");
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 741fae905ae3..2f4412e4d071 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -271,8 +271,6 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
 }
 EXPORT_SYMBOL(__bitmap_weight);
 
-#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
-
 void bitmap_set(unsigned long *map, int start, int nr)
 {
 	unsigned long *p = map + BIT_WORD(start);
@@ -571,8 +569,11 @@ int bitmap_scnlistprintf(char *buf, unsigned int buflen,
 EXPORT_SYMBOL(bitmap_scnlistprintf);
 
 /**
- * bitmap_parselist - convert list format ASCII string to bitmap
- * @bp: read nul-terminated user string from this buffer
+ * __bitmap_parselist - convert list format ASCII string to bitmap
+ * @buf: read nul-terminated user string from this buffer
+ * @buflen: buffer size in bytes.  If string is smaller than this
+ *    then it must be terminated with a \0.
+ * @is_user: location of buffer, 0 indicates kernel space
  * @maskp: write resulting mask here
  * @nmaskbits: number of bits in mask to be written
  *
@@ -587,20 +588,63 @@ EXPORT_SYMBOL(bitmap_scnlistprintf);
  *    %-EINVAL: invalid character in string
  *    %-ERANGE: bit number specified too large for mask
  */
-int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
+static int __bitmap_parselist(const char *buf, unsigned int buflen,
+		int is_user, unsigned long *maskp,
+		int nmaskbits)
 {
 	unsigned a, b;
+	int c, old_c, totaldigits;
+	const char __user *ubuf = buf;
+	int exp_digit, in_range;
 
+	totaldigits = c = 0;
 	bitmap_zero(maskp, nmaskbits);
 	do {
-		if (!isdigit(*bp))
-			return -EINVAL;
-		b = a = simple_strtoul(bp, (char **)&bp, BASEDEC);
-		if (*bp == '-') {
-			bp++;
-			if (!isdigit(*bp))
+		exp_digit = 1;
+		in_range = 0;
+		a = b = 0;
+
+		/* Get the next cpu# or a range of cpu#'s */
+		while (buflen) {
+			old_c = c;
+			if (is_user) {
+				if (__get_user(c, ubuf++))
+					return -EFAULT;
+			} else
+				c = *buf++;
+			buflen--;
+			if (isspace(c))
+				continue;
+
+			/*
+			 * If the last character was a space and the current
+			 * character isn't '\0', we've got embedded whitespace.
+			 * This is a no-no, so throw an error.
+			 */
+			if (totaldigits && c && isspace(old_c))
 				return -EINVAL;
-			b = simple_strtoul(bp, (char **)&bp, BASEDEC);
+
+			/* A '\0' or a ',' signal the end of a cpu# or range */
+			if (c == '\0' || c == ',')
+				break;
+
+			if (c == '-') {
+				if (exp_digit || in_range)
+					return -EINVAL;
+				b = 0;
+				in_range = 1;
+				exp_digit = 1;
+				continue;
+			}
+
+			if (!isdigit(c))
+				return -EINVAL;
+
+			b = b * 10 + (c - '0');
+			if (!in_range)
+				a = b;
+			exp_digit = 0;
+			totaldigits++;
 		}
 		if (!(a <= b))
 			return -EINVAL;
@@ -610,13 +654,52 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
 			set_bit(a, maskp);
 			a++;
 		}
-		if (*bp == ',')
-			bp++;
-	} while (*bp != '\0' && *bp != '\n');
+	} while (buflen && c == ',');
 	return 0;
 }
+
+int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
+{
+	char *nl  = strchr(bp, '\n');
+	int len;
+
+	if (nl)
+		len = nl - bp;
+	else
+		len = strlen(bp);
+
+	return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
+}
 EXPORT_SYMBOL(bitmap_parselist);
 
+
+/**
+ * bitmap_parselist_user()
+ *
+ * @ubuf: pointer to user buffer containing string.
+ * @ulen: buffer size in bytes.  If string is smaller than this
+ *    then it must be terminated with a \0.
+ * @maskp: pointer to bitmap array that will contain result.
+ * @nmaskbits: size of bitmap, in bits.
+ *
+ * Wrapper for bitmap_parselist(), providing it with user buffer.
+ *
+ * We cannot have this as an inline function in bitmap.h because it needs
+ * linux/uaccess.h to get the access_ok() declaration and this causes
+ * cyclic dependencies.
+ */
+int bitmap_parselist_user(const char __user *ubuf,
+			unsigned int ulen, unsigned long *maskp,
+			int nmaskbits)
+{
+	if (!access_ok(VERIFY_READ, ubuf, ulen))
+		return -EFAULT;
+	return __bitmap_parselist((const char *)ubuf,
+					ulen, 1, maskp, nmaskbits);
+}
+EXPORT_SYMBOL(bitmap_parselist_user);
+
+
 /**
  * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
  *	@buf: pointer to a bitmap
@@ -671,7 +754,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
  *
  * The bit positions 0 through @bits are valid positions in @buf.
  */
-static int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
+int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
 {
 	int pos = 0;
 
@@ -830,7 +913,7 @@ EXPORT_SYMBOL(bitmap_bitremap);
  *  @orig (i.e. bits 3, 5, 7 and 9) were also set.
  *
  *  When bit 11 is set in @orig, it means turn on the bit in
- *  @dst corresponding to whatever is the twelth bit that is
+ *  @dst corresponding to whatever is the twelfth bit that is
  *  turned on in @relmap.  In the above example, there were
  *  only ten bits turned on in @relmap (30..39), so that bit
  *  11 was set in @orig had no affect on @dst.
diff --git a/lib/bsearch.c b/lib/bsearch.c
new file mode 100644
index 000000000000..5b54758e2afb
--- /dev/null
+++ b/lib/bsearch.c
@@ -0,0 +1,53 @@
+/*
+ * A generic implementation of binary search for the Linux kernel
+ *
+ * Copyright (C) 2008-2009 Ksplice, Inc.
+ * Author: Tim Abbott <tabbott@ksplice.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2.
+ */
+
+#include <linux/module.h>
+#include <linux/bsearch.h>
+
+/*
+ * bsearch - binary search an array of elements
+ * @key: pointer to item being searched for
+ * @base: pointer to first element to search
+ * @num: number of elements
+ * @size: size of each element
+ * @cmp: pointer to comparison function
+ *
+ * This function does a binary search on the given array.  The
+ * contents of the array should already be in ascending sorted order
+ * under the provided comparison function.
+ *
+ * Note that the key need not have the same type as the elements in
+ * the array, e.g. key could be a string and the comparison function
+ * could compare the string with the struct's name field.  However, if
+ * the key and elements in the array are of the same type, you can use
+ * the same comparison function for both sort() and bsearch().
+ */
+void *bsearch(const void *key, const void *base, size_t num, size_t size,
+	      int (*cmp)(const void *key, const void *elt))
+{
+	size_t start = 0, end = num;
+	int result;
+
+	while (start < end) {
+		size_t mid = start + (end - start) / 2;
+
+		result = cmp(key, base + mid * size);
+		if (result < 0)
+			end = mid;
+		else if (result > 0)
+			start = mid + 1;
+		else
+			return (void *)base + mid * size;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(bsearch);
diff --git a/lib/btree.c b/lib/btree.c
index c9c6f0351526..2a34392bcecc 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -11,7 +11,7 @@
  * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch
  *
  * A relatively simple B+Tree implementation.  I have written it as a learning
- * excercise to understand how B+Trees work.  Turned out to be useful as well.
+ * exercise to understand how B+Trees work.  Turned out to be useful as well.
  *
  * B+Trees can be used similar to Linux radix trees (which don't have anything
  * in common with textbook radix trees, beware).  Prerequisite for them working
@@ -541,7 +541,7 @@ static void rebalance(struct btree_head *head, struct btree_geo *geo,
 	int i, no_left, no_right;
 
 	if (fill == 0) {
-		/* Because we don't steal entries from a neigbour, this case
+		/* Because we don't steal entries from a neighbour, this case
 		 * can happen.  Parent node contains a single child, this
 		 * node, so merging with a sibling never happens.
 		 */
diff --git a/lib/checksum.c b/lib/checksum.c
index 097508732f34..8df2f91e6d98 100644
--- a/lib/checksum.c
+++ b/lib/checksum.c
@@ -49,7 +49,7 @@ static inline unsigned short from32to16(unsigned int x)
 
 static unsigned int do_csum(const unsigned char *buff, int len)
 {
-	int odd, count;
+	int odd;
 	unsigned int result = 0;
 
 	if (len <= 0)
@@ -64,25 +64,22 @@ static unsigned int do_csum(const unsigned char *buff, int len)
 		len--;
 		buff++;
 	}
-	count = len >> 1;		/* nr of 16-bit words.. */
-	if (count) {
+	if (len >= 2) {
 		if (2 & (unsigned long) buff) {
 			result += *(unsigned short *) buff;
-			count--;
 			len -= 2;
 			buff += 2;
 		}
-		count >>= 1;		/* nr of 32-bit words.. */
-		if (count) {
+		if (len >= 4) {
+			const unsigned char *end = buff + ((unsigned)len & ~3);
 			unsigned int carry = 0;
 			do {
 				unsigned int w = *(unsigned int *) buff;
-				count--;
 				buff += 4;
 				result += carry;
 				result += w;
 				carry = (w > result);
-			} while (count);
+			} while (buff < end);
 			result += carry;
 			result = (result & 0xffff) + (result >> 16);
 		}
diff --git a/lib/cordic.c b/lib/cordic.c
new file mode 100644
index 000000000000..aa27a88d7e04
--- /dev/null
+++ b/lib/cordic.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2011 Broadcom Corporation
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/cordic.h>
+
+#define CORDIC_ANGLE_GEN	39797
+#define CORDIC_PRECISION_SHIFT	16
+#define	CORDIC_NUM_ITER		(CORDIC_PRECISION_SHIFT + 2)
+
+#define	FIXED(X)	((s32)((X) << CORDIC_PRECISION_SHIFT))
+#define	FLOAT(X)	(((X) >= 0) \
+		? ((((X) >> (CORDIC_PRECISION_SHIFT - 1)) + 1) >> 1) \
+		: -((((-(X)) >> (CORDIC_PRECISION_SHIFT - 1)) + 1) >> 1))
+
+static const s32 arctan_table[] = {
+	2949120,
+	1740967,
+	919879,
+	466945,
+	234379,
+	117304,
+	58666,
+	29335,
+	14668,
+	7334,
+	3667,
+	1833,
+	917,
+	458,
+	229,
+	115,
+	57,
+	29
+};
+
+/*
+ * cordic_calc_iq() - calculates the i/q coordinate for given angle
+ *
+ * theta: angle in degrees for which i/q coordinate is to be calculated
+ * coord: function output parameter holding the i/q coordinate
+ */
+struct cordic_iq cordic_calc_iq(s32 theta)
+{
+	struct cordic_iq coord;
+	s32 angle, valtmp;
+	unsigned iter;
+	int signx = 1;
+	int signtheta;
+
+	coord.i = CORDIC_ANGLE_GEN;
+	coord.q = 0;
+	angle = 0;
+
+	theta = FIXED(theta);
+	signtheta = (theta < 0) ? -1 : 1;
+	theta = ((theta + FIXED(180) * signtheta) % FIXED(360)) -
+		FIXED(180) * signtheta;
+
+	if (FLOAT(theta) > 90) {
+		theta -= FIXED(180);
+		signx = -1;
+	} else if (FLOAT(theta) < -90) {
+		theta += FIXED(180);
+		signx = -1;
+	}
+
+	for (iter = 0; iter < CORDIC_NUM_ITER; iter++) {
+		if (theta > angle) {
+			valtmp = coord.i - (coord.q >> iter);
+			coord.q += (coord.i >> iter);
+			angle += arctan_table[iter];
+		} else {
+			valtmp = coord.i + (coord.q >> iter);
+			coord.q -= (coord.i >> iter);
+			angle -= arctan_table[iter];
+		}
+		coord.i = valtmp;
+	}
+
+	coord.i *= signx;
+	coord.q *= signx;
+	return coord;
+}
+EXPORT_SYMBOL(cordic_calc_iq);
+
+MODULE_DESCRIPTION("Cordic functions");
+MODULE_AUTHOR("Broadcom Corporation");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c
new file mode 100644
index 000000000000..987acfafeb83
--- /dev/null
+++ b/lib/cpu_rmap.c
@@ -0,0 +1,269 @@
+/*
+ * cpu_rmap.c: CPU affinity reverse-map support
+ * Copyright 2011 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/cpu_rmap.h>
+#ifdef CONFIG_GENERIC_HARDIRQS
+#include <linux/interrupt.h>
+#endif
+#include <linux/module.h>
+
+/*
+ * These functions maintain a mapping from CPUs to some ordered set of
+ * objects with CPU affinities.  This can be seen as a reverse-map of
+ * CPU affinity.  However, we do not assume that the object affinities
+ * cover all CPUs in the system.  For those CPUs not directly covered
+ * by object affinities, we attempt to find a nearest object based on
+ * CPU topology.
+ */
+
+/**
+ * alloc_cpu_rmap - allocate CPU affinity reverse-map
+ * @size: Number of objects to be mapped
+ * @flags: Allocation flags e.g. %GFP_KERNEL
+ */
+struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
+{
+	struct cpu_rmap *rmap;
+	unsigned int cpu;
+	size_t obj_offset;
+
+	/* This is a silly number of objects, and we use u16 indices. */
+	if (size > 0xffff)
+		return NULL;
+
+	/* Offset of object pointer array from base structure */
+	obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]),
+			   sizeof(void *));
+
+	rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags);
+	if (!rmap)
+		return NULL;
+
+	rmap->obj = (void **)((char *)rmap + obj_offset);
+
+	/* Initially assign CPUs to objects on a rota, since we have
+	 * no idea where the objects are.  Use infinite distance, so
+	 * any object with known distance is preferable.  Include the
+	 * CPUs that are not present/online, since we definitely want
+	 * any newly-hotplugged CPUs to have some object assigned.
+	 */
+	for_each_possible_cpu(cpu) {
+		rmap->near[cpu].index = cpu % size;
+		rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
+	}
+
+	rmap->size = size;
+	return rmap;
+}
+EXPORT_SYMBOL(alloc_cpu_rmap);
+
+/* Reevaluate nearest object for given CPU, comparing with the given
+ * neighbours at the given distance.
+ */
+static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu,
+				const struct cpumask *mask, u16 dist)
+{
+	int neigh;
+
+	for_each_cpu(neigh, mask) {
+		if (rmap->near[cpu].dist > dist &&
+		    rmap->near[neigh].dist <= dist) {
+			rmap->near[cpu].index = rmap->near[neigh].index;
+			rmap->near[cpu].dist = dist;
+			return true;
+		}
+	}
+	return false;
+}
+
+#ifdef DEBUG
+static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
+{
+	unsigned index;
+	unsigned int cpu;
+
+	pr_info("cpu_rmap %p, %s:\n", rmap, prefix);
+
+	for_each_possible_cpu(cpu) {
+		index = rmap->near[cpu].index;
+		pr_info("cpu %d -> obj %u (distance %u)\n",
+			cpu, index, rmap->near[cpu].dist);
+	}
+}
+#else
+static inline void
+debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
+{
+}
+#endif
+
+/**
+ * cpu_rmap_add - add object to a rmap
+ * @rmap: CPU rmap allocated with alloc_cpu_rmap()
+ * @obj: Object to add to rmap
+ *
+ * Return index of object.
+ */
+int cpu_rmap_add(struct cpu_rmap *rmap, void *obj)
+{
+	u16 index;
+
+	BUG_ON(rmap->used >= rmap->size);
+	index = rmap->used++;
+	rmap->obj[index] = obj;
+	return index;
+}
+EXPORT_SYMBOL(cpu_rmap_add);
+
+/**
+ * cpu_rmap_update - update CPU rmap following a change of object affinity
+ * @rmap: CPU rmap to update
+ * @index: Index of object whose affinity changed
+ * @affinity: New CPU affinity of object
+ */
+int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
+		    const struct cpumask *affinity)
+{
+	cpumask_var_t update_mask;
+	unsigned int cpu;
+
+	if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL)))
+		return -ENOMEM;
+
+	/* Invalidate distance for all CPUs for which this used to be
+	 * the nearest object.  Mark those CPUs for update.
+	 */
+	for_each_online_cpu(cpu) {
+		if (rmap->near[cpu].index == index) {
+			rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
+			cpumask_set_cpu(cpu, update_mask);
+		}
+	}
+
+	debug_print_rmap(rmap, "after invalidating old distances");
+
+	/* Set distance to 0 for all CPUs in the new affinity mask.
+	 * Mark all CPUs within their NUMA nodes for update.
+	 */
+	for_each_cpu(cpu, affinity) {
+		rmap->near[cpu].index = index;
+		rmap->near[cpu].dist = 0;
+		cpumask_or(update_mask, update_mask,
+			   cpumask_of_node(cpu_to_node(cpu)));
+	}
+
+	debug_print_rmap(rmap, "after updating neighbours");
+
+	/* Update distances based on topology */
+	for_each_cpu(cpu, update_mask) {
+		if (cpu_rmap_copy_neigh(rmap, cpu,
+					topology_thread_cpumask(cpu), 1))
+			continue;
+		if (cpu_rmap_copy_neigh(rmap, cpu,
+					topology_core_cpumask(cpu), 2))
+			continue;
+		if (cpu_rmap_copy_neigh(rmap, cpu,
+					cpumask_of_node(cpu_to_node(cpu)), 3))
+			continue;
+		/* We could continue into NUMA node distances, but for now
+		 * we give up.
+		 */
+	}
+
+	debug_print_rmap(rmap, "after copying neighbours");
+
+	free_cpumask_var(update_mask);
+	return 0;
+}
+EXPORT_SYMBOL(cpu_rmap_update);
+
+#ifdef CONFIG_GENERIC_HARDIRQS
+
+/* Glue between IRQ affinity notifiers and CPU rmaps */
+
+struct irq_glue {
+	struct irq_affinity_notify notify;
+	struct cpu_rmap *rmap;
+	u16 index;
+};
+
+/**
+ * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
+ * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
+ *
+ * Must be called in process context, before freeing the IRQs, and
+ * without holding any locks required by global workqueue items.
+ */
+void free_irq_cpu_rmap(struct cpu_rmap *rmap)
+{
+	struct irq_glue *glue;
+	u16 index;
+
+	if (!rmap)
+		return;
+
+	for (index = 0; index < rmap->used; index++) {
+		glue = rmap->obj[index];
+		irq_set_affinity_notifier(glue->notify.irq, NULL);
+	}
+	irq_run_affinity_notifiers();
+
+	kfree(rmap);
+}
+EXPORT_SYMBOL(free_irq_cpu_rmap);
+
+static void
+irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
+{
+	struct irq_glue *glue =
+		container_of(notify, struct irq_glue, notify);
+	int rc;
+
+	rc = cpu_rmap_update(glue->rmap, glue->index, mask);
+	if (rc)
+		pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc);
+}
+
+static void irq_cpu_rmap_release(struct kref *ref)
+{
+	struct irq_glue *glue =
+		container_of(ref, struct irq_glue, notify.kref);
+	kfree(glue);
+}
+
+/**
+ * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
+ * @rmap: The reverse-map
+ * @irq: The IRQ number
+ *
+ * This adds an IRQ affinity notifier that will update the reverse-map
+ * automatically.
+ *
+ * Must be called in process context, after the IRQ is allocated but
+ * before it is bound with request_irq().
+ */
+int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
+{
+	struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL);
+	int rc;
+
+	if (!glue)
+		return -ENOMEM;
+	glue->notify.notify = irq_cpu_rmap_notify;
+	glue->notify.release = irq_cpu_rmap_release;
+	glue->rmap = rmap;
+	glue->index = cpu_rmap_add(rmap, glue);
+	rc = irq_set_affinity_notifier(irq, &glue->notify);
+	if (rc)
+		kfree(glue);
+	return rc;
+}
+EXPORT_SYMBOL(irq_cpu_rmap_add);
+
+#endif /* CONFIG_GENERIC_HARDIRQS */
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 05d6aca7fc19..af3e5817de98 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -30,7 +30,7 @@ int __any_online_cpu(const cpumask_t *mask)
 {
 	int cpu;
 
-	for_each_cpu_mask(cpu, *mask) {
+	for_each_cpu(cpu, mask) {
 		if (cpu_online(cpu))
 			break;
 	}
@@ -131,7 +131,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var_node);
  */
 bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
 {
-	return alloc_cpumask_var_node(mask, flags, numa_node_id());
+	return alloc_cpumask_var_node(mask, flags, NUMA_NO_NODE);
 }
 EXPORT_SYMBOL(alloc_cpumask_var);
 
diff --git a/lib/crc32.c b/lib/crc32.c
index 4855995fcde9..a6e633a48cea 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -26,7 +26,7 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/init.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "crc32defs.h"
 #if CRC_LE_BITS == 8
 # define tole(x) __constant_cpu_to_le32(x)
diff --git a/lib/crc8.c b/lib/crc8.c
new file mode 100644
index 000000000000..87b59cafdb83
--- /dev/null
+++ b/lib/crc8.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2011 Broadcom Corporation
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/crc8.h>
+#include <linux/printk.h>
+
+/*
+ * crc8_populate_msb - fill crc table for given polynomial in reverse bit order.
+ *
+ * table:	table to be filled.
+ * polynomial:	polynomial for which table is to be filled.
+ */
+void crc8_populate_msb(u8 table[CRC8_TABLE_SIZE], u8 polynomial)
+{
+	int i, j;
+	const u8 msbit = 0x80;
+	u8 t = msbit;
+
+	table[0] = 0;
+
+	for (i = 1; i < CRC8_TABLE_SIZE; i *= 2) {
+		t = (t << 1) ^ (t & msbit ? polynomial : 0);
+		for (j = 0; j < i; j++)
+			table[i+j] = table[j] ^ t;
+	}
+}
+EXPORT_SYMBOL(crc8_populate_msb);
+
+/*
+ * crc8_populate_lsb - fill crc table for given polynomial in regular bit order.
+ *
+ * table:	table to be filled.
+ * polynomial:	polynomial for which table is to be filled.
+ */
+void crc8_populate_lsb(u8 table[CRC8_TABLE_SIZE], u8 polynomial)
+{
+	int i, j;
+	u8 t = 1;
+
+	table[0] = 0;
+
+	for (i = (CRC8_TABLE_SIZE >> 1); i; i >>= 1) {
+		t = (t >> 1) ^ (t & 1 ? polynomial : 0);
+		for (j = 0; j < CRC8_TABLE_SIZE; j += 2*i)
+			table[i+j] = table[j] ^ t;
+	}
+}
+EXPORT_SYMBOL(crc8_populate_lsb);
+
+/*
+ * crc8 - calculate a crc8 over the given input data.
+ *
+ * table: crc table used for calculation.
+ * pdata: pointer to data buffer.
+ * nbytes: number of bytes in data buffer.
+ * crc:	previous returned crc8 value.
+ */
+u8 crc8(const u8 table[CRC8_TABLE_SIZE], u8 *pdata, size_t nbytes, u8 crc)
+{
+	/* loop over the buffer data */
+	while (nbytes-- > 0)
+		crc = table[(crc ^ *pdata++) & 0xff];
+
+	return crc;
+}
+EXPORT_SYMBOL(crc8);
+
+MODULE_DESCRIPTION("CRC8 (by Williams, Ross N.) function");
+MODULE_AUTHOR("Broadcom Corporation");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index deebcc57d4e6..a78b7c6e042c 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -198,7 +198,7 @@ static void free_object(struct debug_obj *obj)
 	 * initialized:
 	 */
 	if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache)
-		sched = !work_pending(&debug_obj_work);
+		sched = keventd_up() && !work_pending(&debug_obj_work);
 	hlist_add_head(&obj->node, &obj_pool);
 	obj_pool_free++;
 	obj_pool_used--;
@@ -249,14 +249,17 @@ static struct debug_bucket *get_bucket(unsigned long addr)
 
 static void debug_print_object(struct debug_obj *obj, char *msg)
 {
+	struct debug_obj_descr *descr = obj->descr;
 	static int limit;
 
-	if (limit < 5 && obj->descr != descr_test) {
+	if (limit < 5 && descr != descr_test) {
+		void *hint = descr->debug_hint ?
+			descr->debug_hint(obj->object) : NULL;
 		limit++;
 		WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
-				 "object type: %s\n",
+				 "object type: %s hint: %pS\n",
 			msg, obj_states[obj->state], obj->astate,
-			obj->descr->name);
+			descr->name, hint);
 	}
 	debug_objects_warnings++;
 }
diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c
index e73822aa6e9a..b5257725daad 100644
--- a/lib/dec_and_lock.c
+++ b/lib/dec_and_lock.c
@@ -1,6 +1,6 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * This is an implementation of the notion of "decrement a
diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c
index cecd23df2b9a..9f34eb56854d 100644
--- a/lib/decompress_unxz.c
+++ b/lib/decompress_unxz.c
@@ -83,7 +83,7 @@
  *    safety_margin = 128 + uncompressed_size * 8 / 32768 + 65536
  *                  = 128 + (uncompressed_size >> 12) + 65536
  *
- * For comparision, according to arch/x86/boot/compressed/misc.c, the
+ * For comparison, according to arch/x86/boot/compressed/misc.c, the
  * equivalent formula for Deflate is this:
  *
  *    safety_margin = 18 + (uncompressed_size >> 12) + 32768
diff --git a/lib/devres.c b/lib/devres.c
index 6efddf53b90c..7c0e953a7486 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -79,9 +79,9 @@ EXPORT_SYMBOL(devm_ioremap_nocache);
  */
 void devm_iounmap(struct device *dev, void __iomem *addr)
 {
-	iounmap(addr);
 	WARN_ON(devres_destroy(dev, devm_ioremap_release, devm_ioremap_match,
 			       (void *)addr));
+	iounmap(addr);
 }
 EXPORT_SYMBOL(devm_iounmap);
 
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 4bfb0471f106..db07bfd9298e 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -649,7 +649,7 @@ out_err:
 	return -ENOMEM;
 }
 
-static int device_dma_allocations(struct device *dev)
+static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry)
 {
 	struct dma_debug_entry *entry;
 	unsigned long flags;
@@ -660,8 +660,10 @@ static int device_dma_allocations(struct device *dev)
 	for (i = 0; i < HASH_SIZE; ++i) {
 		spin_lock(&dma_entry_hash[i].lock);
 		list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
-			if (entry->dev == dev)
+			if (entry->dev == dev) {
 				count += 1;
+				*out_entry = entry;
+			}
 		}
 		spin_unlock(&dma_entry_hash[i].lock);
 	}
@@ -674,6 +676,7 @@ static int device_dma_allocations(struct device *dev)
 static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data)
 {
 	struct device *dev = data;
+	struct dma_debug_entry *uninitialized_var(entry);
 	int count;
 
 	if (global_disable)
@@ -681,12 +684,17 @@ static int dma_debug_device_change(struct notifier_block *nb, unsigned long acti
 
 	switch (action) {
 	case BUS_NOTIFY_UNBOUND_DRIVER:
-		count = device_dma_allocations(dev);
+		count = device_dma_allocations(dev, &entry);
 		if (count == 0)
 			break;
-		err_printk(dev, NULL, "DMA-API: device driver has pending "
+		err_printk(dev, entry, "DMA-API: device driver has pending "
 				"DMA allocations while released from device "
-				"[count=%d]\n", count);
+				"[count=%d]\n"
+				"One of leaked entries details: "
+				"[device address=0x%016llx] [size=%llu bytes] "
+				"[mapped with %s] [mapped as %s]\n",
+			count, entry->dev_addr, entry->size,
+			dir2name[entry->direction], type2name[entry->type]);
 		break;
 	default:
 		break;
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index b335acb43be2..75ca78f3a8c9 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -7,6 +7,7 @@
  * Copyright (C) 2008 Jason Baron <jbaron@redhat.com>
  * By Greg Banks <gnb@melbourne.sgi.com>
  * Copyright (c) 2008 Silicon Graphics Inc.  All Rights Reserved.
+ * Copyright (C) 2011 Bart Van Assche.  All Rights Reserved.
  */
 
 #include <linux/kernel.h>
@@ -27,6 +28,8 @@
 #include <linux/debugfs.h>
 #include <linux/slab.h>
 #include <linux/jump_label.h>
+#include <linux/hardirq.h>
+#include <linux/sched.h>
 
 extern struct _ddebug __start___verbose[];
 extern struct _ddebug __stop___verbose[];
@@ -63,15 +66,25 @@ static inline const char *basename(const char *path)
 	return tail ? tail+1 : path;
 }
 
+static struct { unsigned flag:8; char opt_char; } opt_array[] = {
+	{ _DPRINTK_FLAGS_PRINT, 'p' },
+	{ _DPRINTK_FLAGS_INCL_MODNAME, 'm' },
+	{ _DPRINTK_FLAGS_INCL_FUNCNAME, 'f' },
+	{ _DPRINTK_FLAGS_INCL_LINENO, 'l' },
+	{ _DPRINTK_FLAGS_INCL_TID, 't' },
+};
+
 /* format a string into buf[] which describes the _ddebug's flags */
 static char *ddebug_describe_flags(struct _ddebug *dp, char *buf,
 				    size_t maxlen)
 {
 	char *p = buf;
+	int i;
 
 	BUG_ON(maxlen < 4);
-	if (dp->flags & _DPRINTK_FLAGS_PRINT)
-		*p++ = 'p';
+	for (i = 0; i < ARRAY_SIZE(opt_array); ++i)
+		if (dp->flags & opt_array[i].flag)
+			*p++ = opt_array[i].opt_char;
 	if (p == buf)
 		*p++ = '-';
 	*p = '\0';
@@ -343,7 +356,7 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
 			       unsigned int *maskp)
 {
 	unsigned flags = 0;
-	int op = '=';
+	int op = '=', i;
 
 	switch (*str) {
 	case '+':
@@ -358,13 +371,14 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
 		printk(KERN_INFO "%s: op='%c'\n", __func__, op);
 
 	for ( ; *str ; ++str) {
-		switch (*str) {
-		case 'p':
-			flags |= _DPRINTK_FLAGS_PRINT;
-			break;
-		default:
-			return -EINVAL;
+		for (i = ARRAY_SIZE(opt_array) - 1; i >= 0; i--) {
+			if (*str == opt_array[i].opt_char) {
+				flags |= opt_array[i].flag;
+				break;
+			}
 		}
+		if (i < 0)
+			return -EINVAL;
 	}
 	if (flags == 0)
 		return -EINVAL;
@@ -413,6 +427,35 @@ static int ddebug_exec_query(char *query_string)
 	return 0;
 }
 
+int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
+{
+	va_list args;
+	int res;
+
+	BUG_ON(!descriptor);
+	BUG_ON(!fmt);
+
+	va_start(args, fmt);
+	res = printk(KERN_DEBUG);
+	if (descriptor->flags & _DPRINTK_FLAGS_INCL_TID) {
+		if (in_interrupt())
+			res += printk(KERN_CONT "<intr> ");
+		else
+			res += printk(KERN_CONT "[%d] ", task_pid_vnr(current));
+	}
+	if (descriptor->flags & _DPRINTK_FLAGS_INCL_MODNAME)
+		res += printk(KERN_CONT "%s:", descriptor->modname);
+	if (descriptor->flags & _DPRINTK_FLAGS_INCL_FUNCNAME)
+		res += printk(KERN_CONT "%s:", descriptor->function);
+	if (descriptor->flags & _DPRINTK_FLAGS_INCL_LINENO)
+		res += printk(KERN_CONT "%d ", descriptor->lineno);
+	res += vprintk(fmt, args);
+	va_end(args);
+
+	return res;
+}
+EXPORT_SYMBOL(__dynamic_pr_debug);
+
 static __initdata char ddebug_setup_string[1024];
 static __init int ddebug_setup_query(char *str)
 {
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 7e65af70635e..f193b7796449 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -8,7 +8,6 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/stacktrace.h>
-#include <linux/kallsyms.h>
 #include <linux/fault-inject.h>
 
 /*
@@ -140,16 +139,6 @@ static int debugfs_ul_set(void *data, u64 val)
 	return 0;
 }
 
-#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER
-static int debugfs_ul_set_MAX_STACK_TRACE_DEPTH(void *data, u64 val)
-{
-	*(unsigned long *)data =
-		val < MAX_STACK_TRACE_DEPTH ?
-		val : MAX_STACK_TRACE_DEPTH;
-	return 0;
-}
-#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
-
 static int debugfs_ul_get(void *data, u64 *val)
 {
 	*val = *(unsigned long *)data;
@@ -165,16 +154,26 @@ static struct dentry *debugfs_create_ul(const char *name, mode_t mode,
 }
 
 #ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER
-DEFINE_SIMPLE_ATTRIBUTE(fops_ul_MAX_STACK_TRACE_DEPTH, debugfs_ul_get,
-			debugfs_ul_set_MAX_STACK_TRACE_DEPTH, "%llu\n");
 
-static struct dentry *debugfs_create_ul_MAX_STACK_TRACE_DEPTH(
+static int debugfs_stacktrace_depth_set(void *data, u64 val)
+{
+	*(unsigned long *)data =
+		min_t(unsigned long, val, MAX_STACK_TRACE_DEPTH);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_stacktrace_depth, debugfs_ul_get,
+			debugfs_stacktrace_depth_set, "%llu\n");
+
+static struct dentry *debugfs_create_stacktrace_depth(
 	const char *name, mode_t mode,
 	struct dentry *parent, unsigned long *value)
 {
 	return debugfs_create_file(name, mode, parent, value,
-				   &fops_ul_MAX_STACK_TRACE_DEPTH);
+				   &fops_stacktrace_depth);
 }
+
 #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
 
 static int debugfs_atomic_t_set(void *data, u64 val)
@@ -198,118 +197,51 @@ static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode,
 	return debugfs_create_file(name, mode, parent, value, &fops_atomic_t);
 }
 
-void cleanup_fault_attr_dentries(struct fault_attr *attr)
-{
-	debugfs_remove(attr->dentries.probability_file);
-	attr->dentries.probability_file = NULL;
-
-	debugfs_remove(attr->dentries.interval_file);
-	attr->dentries.interval_file = NULL;
-
-	debugfs_remove(attr->dentries.times_file);
-	attr->dentries.times_file = NULL;
-
-	debugfs_remove(attr->dentries.space_file);
-	attr->dentries.space_file = NULL;
-
-	debugfs_remove(attr->dentries.verbose_file);
-	attr->dentries.verbose_file = NULL;
-
-	debugfs_remove(attr->dentries.task_filter_file);
-	attr->dentries.task_filter_file = NULL;
-
-#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER
-
-	debugfs_remove(attr->dentries.stacktrace_depth_file);
-	attr->dentries.stacktrace_depth_file = NULL;
-
-	debugfs_remove(attr->dentries.require_start_file);
-	attr->dentries.require_start_file = NULL;
-
-	debugfs_remove(attr->dentries.require_end_file);
-	attr->dentries.require_end_file = NULL;
-
-	debugfs_remove(attr->dentries.reject_start_file);
-	attr->dentries.reject_start_file = NULL;
-
-	debugfs_remove(attr->dentries.reject_end_file);
-	attr->dentries.reject_end_file = NULL;
-
-#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
-
-	if (attr->dentries.dir)
-		WARN_ON(!simple_empty(attr->dentries.dir));
-
-	debugfs_remove(attr->dentries.dir);
-	attr->dentries.dir = NULL;
-}
-
-int init_fault_attr_dentries(struct fault_attr *attr, const char *name)
+struct dentry *fault_create_debugfs_attr(const char *name,
+			struct dentry *parent, struct fault_attr *attr)
 {
 	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
 	struct dentry *dir;
 
-	memset(&attr->dentries, 0, sizeof(attr->dentries));
-
-	dir = debugfs_create_dir(name, NULL);
+	dir = debugfs_create_dir(name, parent);
 	if (!dir)
-		goto fail;
-	attr->dentries.dir = dir;
-
-	attr->dentries.probability_file =
-		debugfs_create_ul("probability", mode, dir, &attr->probability);
+		return ERR_PTR(-ENOMEM);
 
-	attr->dentries.interval_file =
-		debugfs_create_ul("interval", mode, dir, &attr->interval);
-
-	attr->dentries.times_file =
-		debugfs_create_atomic_t("times", mode, dir, &attr->times);
-
-	attr->dentries.space_file =
-		debugfs_create_atomic_t("space", mode, dir, &attr->space);
-
-	attr->dentries.verbose_file =
-		debugfs_create_ul("verbose", mode, dir, &attr->verbose);
-
-	attr->dentries.task_filter_file = debugfs_create_bool("task-filter",
-						mode, dir, &attr->task_filter);
-
-	if (!attr->dentries.probability_file || !attr->dentries.interval_file ||
-	    !attr->dentries.times_file || !attr->dentries.space_file ||
-	    !attr->dentries.verbose_file || !attr->dentries.task_filter_file)
+	if (!debugfs_create_ul("probability", mode, dir, &attr->probability))
+		goto fail;
+	if (!debugfs_create_ul("interval", mode, dir, &attr->interval))
+		goto fail;
+	if (!debugfs_create_atomic_t("times", mode, dir, &attr->times))
+		goto fail;
+	if (!debugfs_create_atomic_t("space", mode, dir, &attr->space))
+		goto fail;
+	if (!debugfs_create_ul("verbose", mode, dir, &attr->verbose))
+		goto fail;
+	if (!debugfs_create_bool("task-filter", mode, dir, &attr->task_filter))
 		goto fail;
 
 #ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER
 
-	attr->dentries.stacktrace_depth_file =
-		debugfs_create_ul_MAX_STACK_TRACE_DEPTH(
-			"stacktrace-depth", mode, dir, &attr->stacktrace_depth);
-
-	attr->dentries.require_start_file =
-		debugfs_create_ul("require-start", mode, dir, &attr->require_start);
-
-	attr->dentries.require_end_file =
-		debugfs_create_ul("require-end", mode, dir, &attr->require_end);
-
-	attr->dentries.reject_start_file =
-		debugfs_create_ul("reject-start", mode, dir, &attr->reject_start);
-
-	attr->dentries.reject_end_file =
-		debugfs_create_ul("reject-end", mode, dir, &attr->reject_end);
-
-	if (!attr->dentries.stacktrace_depth_file ||
-	    !attr->dentries.require_start_file ||
-	    !attr->dentries.require_end_file ||
-	    !attr->dentries.reject_start_file ||
-	    !attr->dentries.reject_end_file)
+	if (!debugfs_create_stacktrace_depth("stacktrace-depth", mode, dir,
+				&attr->stacktrace_depth))
+		goto fail;
+	if (!debugfs_create_ul("require-start", mode, dir,
+				&attr->require_start))
+		goto fail;
+	if (!debugfs_create_ul("require-end", mode, dir, &attr->require_end))
+		goto fail;
+	if (!debugfs_create_ul("reject-start", mode, dir, &attr->reject_start))
+		goto fail;
+	if (!debugfs_create_ul("reject-end", mode, dir, &attr->reject_end))
 		goto fail;
 
 #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
 
-	return 0;
+	return dir;
 fail:
-	cleanup_fault_attr_dentries(attr);
-	return -ENOMEM;
+	debugfs_remove_recursive(dir);
+
+	return ERR_PTR(-ENOMEM);
 }
 
 #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
index 5d202e36bdd8..d903959ad695 100644
--- a/lib/find_last_bit.c
+++ b/lib/find_last_bit.c
@@ -15,6 +15,8 @@
 #include <asm/types.h>
 #include <asm/byteorder.h>
 
+#ifndef find_last_bit
+
 unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
 {
 	unsigned long words;
@@ -43,3 +45,5 @@ found:
 	return size;
 }
 EXPORT_SYMBOL(find_last_bit);
+
+#endif
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
index 24c59ded47a0..4bd75a73ba00 100644
--- a/lib/find_next_bit.c
+++ b/lib/find_next_bit.c
@@ -16,7 +16,7 @@
 
 #define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
 
-#ifdef CONFIG_GENERIC_FIND_NEXT_BIT
+#ifndef find_next_bit
 /*
  * Find the next set bit in a memory region.
  */
@@ -59,7 +59,9 @@ found_middle:
 	return result + __ffs(tmp);
 }
 EXPORT_SYMBOL(find_next_bit);
+#endif
 
+#ifndef find_next_zero_bit
 /*
  * This implementation of find_{first,next}_zero_bit was stolen from
  * Linus' asm-alpha/bitops.h.
@@ -103,9 +105,9 @@ found_middle:
 	return result + ffz(tmp);
 }
 EXPORT_SYMBOL(find_next_zero_bit);
-#endif /* CONFIG_GENERIC_FIND_NEXT_BIT */
+#endif
 
-#ifdef CONFIG_GENERIC_FIND_FIRST_BIT
+#ifndef find_first_bit
 /*
  * Find the first set bit in a memory region.
  */
@@ -131,7 +133,9 @@ found:
 	return result + __ffs(tmp);
 }
 EXPORT_SYMBOL(find_first_bit);
+#endif
 
+#ifndef find_first_zero_bit
 /*
  * Find the first cleared bit in a memory region.
  */
@@ -157,7 +161,7 @@ found:
 	return result + ffz(tmp);
 }
 EXPORT_SYMBOL(find_first_zero_bit);
-#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
+#endif
 
 #ifdef __BIG_ENDIAN
 
@@ -185,15 +189,17 @@ static inline unsigned long ext2_swab(const unsigned long y)
 #endif
 }
 
-unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned
+#ifndef find_next_zero_bit_le
+unsigned long find_next_zero_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
-	const unsigned long *p = addr + BITOP_WORD(offset);
+	const unsigned long *p = addr;
 	unsigned long result = offset & ~(BITS_PER_LONG - 1);
 	unsigned long tmp;
 
 	if (offset >= size)
 		return size;
+	p += BITOP_WORD(offset);
 	size -= result;
 	offset &= (BITS_PER_LONG - 1UL);
 	if (offset) {
@@ -226,18 +232,20 @@ found_middle:
 found_middle_swap:
 	return result + ffz(ext2_swab(tmp));
 }
+EXPORT_SYMBOL(find_next_zero_bit_le);
+#endif
 
-EXPORT_SYMBOL(generic_find_next_zero_le_bit);
-
-unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned
+#ifndef find_next_bit_le
+unsigned long find_next_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
-	const unsigned long *p = addr + BITOP_WORD(offset);
+	const unsigned long *p = addr;
 	unsigned long result = offset & ~(BITS_PER_LONG - 1);
 	unsigned long tmp;
 
 	if (offset >= size)
 		return size;
+	p += BITOP_WORD(offset);
 	size -= result;
 	offset &= (BITS_PER_LONG - 1UL);
 	if (offset) {
@@ -271,5 +279,7 @@ found_middle:
 found_middle_swap:
 	return result + __ffs(ext2_swab(tmp));
 }
-EXPORT_SYMBOL(generic_find_next_le_bit);
+EXPORT_SYMBOL(find_next_bit_le);
+#endif
+
 #endif /* __BIG_ENDIAN */
diff --git a/lib/flex_array.c b/lib/flex_array.c
index c0ea40ba2082..9b8b89458c4c 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/stddef.h>
 #include <linux/module.h>
+#include <linux/reciprocal_div.h>
 
 struct flex_array_part {
 	char elements[FLEX_ARRAY_PART_SIZE];
@@ -70,15 +71,15 @@ static inline int elements_fit_in_base(struct flex_array *fa)
  * Element size | Objects | Objects |
  * PAGE_SIZE=4k |  32-bit |  64-bit |
  * ---------------------------------|
- *      1 bytes | 4186112 | 2093056 |
- *      2 bytes | 2093056 | 1046528 |
- *      3 bytes | 1395030 |  697515 |
- *      4 bytes | 1046528 |  523264 |
- *     32 bytes |  130816 |   65408 |
- *     33 bytes |  126728 |   63364 |
- *   2048 bytes |    2044 |    1022 |
- *   2049 bytes |    1022 |     511 |
- *       void * | 1046528 |  261632 |
+ *      1 bytes | 4177920 | 2088960 |
+ *      2 bytes | 2088960 | 1044480 |
+ *      3 bytes | 1392300 |  696150 |
+ *      4 bytes | 1044480 |  522240 |
+ *     32 bytes |  130560 |   65408 |
+ *     33 bytes |  126480 |   63240 |
+ *   2048 bytes |    2040 |    1020 |
+ *   2049 bytes |    1020 |     510 |
+ *       void * | 1044480 |  261120 |
  *
  * Since 64-bit pointers are twice the size, we lose half the
  * capacity in the base structure.  Also note that no effort is made
@@ -88,8 +89,15 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 					gfp_t flags)
 {
 	struct flex_array *ret;
-	int max_size = FLEX_ARRAY_NR_BASE_PTRS *
-				FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
+	int elems_per_part = 0;
+	int reciprocal_elems = 0;
+	int max_size = 0;
+
+	if (element_size) {
+		elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
+		reciprocal_elems = reciprocal_value(elems_per_part);
+		max_size = FLEX_ARRAY_NR_BASE_PTRS * elems_per_part;
+	}
 
 	/* max_size will end up 0 if element_size > PAGE_SIZE */
 	if (total > max_size)
@@ -99,6 +107,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 		return NULL;
 	ret->element_size = element_size;
 	ret->total_nr_elements = total;
+	ret->elems_per_part = elems_per_part;
+	ret->reciprocal_elems = reciprocal_elems;
 	if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
 		memset(&ret->parts[0], FLEX_ARRAY_FREE,
 						FLEX_ARRAY_BASE_BYTES_LEFT);
@@ -109,7 +119,7 @@ EXPORT_SYMBOL(flex_array_alloc);
 static int fa_element_to_part_nr(struct flex_array *fa,
 					unsigned int element_nr)
 {
-	return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
+	return reciprocal_divide(element_nr, fa->reciprocal_elems);
 }
 
 /**
@@ -138,12 +148,12 @@ void flex_array_free(struct flex_array *fa)
 EXPORT_SYMBOL(flex_array_free);
 
 static unsigned int index_inside_part(struct flex_array *fa,
-					unsigned int element_nr)
+					unsigned int element_nr,
+					unsigned int part_nr)
 {
 	unsigned int part_offset;
 
-	part_offset = element_nr %
-				FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
+	part_offset = element_nr - part_nr * fa->elems_per_part;
 	return part_offset * fa->element_size;
 }
 
@@ -183,20 +193,23 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
 int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
 			gfp_t flags)
 {
-	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	int part_nr = 0;
 	struct flex_array_part *part;
 	void *dst;
 
 	if (element_nr >= fa->total_nr_elements)
 		return -ENOSPC;
+	if (!fa->element_size)
+		return 0;
 	if (elements_fit_in_base(fa))
 		part = (struct flex_array_part *)&fa->parts[0];
 	else {
+		part_nr = fa_element_to_part_nr(fa, element_nr);
 		part = __fa_get_part(fa, part_nr, flags);
 		if (!part)
 			return -ENOMEM;
 	}
-	dst = &part->elements[index_inside_part(fa, element_nr)];
+	dst = &part->elements[index_inside_part(fa, element_nr, part_nr)];
 	memcpy(dst, src, fa->element_size);
 	return 0;
 }
@@ -211,20 +224,23 @@ EXPORT_SYMBOL(flex_array_put);
  */
 int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
 {
-	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	int part_nr = 0;
 	struct flex_array_part *part;
 	void *dst;
 
 	if (element_nr >= fa->total_nr_elements)
 		return -ENOSPC;
+	if (!fa->element_size)
+		return 0;
 	if (elements_fit_in_base(fa))
 		part = (struct flex_array_part *)&fa->parts[0];
 	else {
+		part_nr = fa_element_to_part_nr(fa, element_nr);
 		part = fa->parts[part_nr];
 		if (!part)
 			return -EINVAL;
 	}
-	dst = &part->elements[index_inside_part(fa, element_nr)];
+	dst = &part->elements[index_inside_part(fa, element_nr, part_nr)];
 	memset(dst, FLEX_ARRAY_FREE, fa->element_size);
 	return 0;
 }
@@ -232,10 +248,10 @@ EXPORT_SYMBOL(flex_array_clear);
 
 /**
  * flex_array_prealloc - guarantee that array space exists
- * @fa:		the flex array for which to preallocate parts
- * @start:	index of first array element for which space is allocated
- * @end:	index of last (inclusive) element for which space is allocated
- * @flags:	page allocation flags
+ * @fa:			the flex array for which to preallocate parts
+ * @start:		index of first array element for which space is allocated
+ * @nr_elements:	number of elements for which space is allocated
+ * @flags:		page allocation flags
  *
  * This will guarantee that no future calls to flex_array_put()
  * will allocate memory.  It can be used if you are expecting to
@@ -245,15 +261,27 @@ EXPORT_SYMBOL(flex_array_clear);
  * Locking must be provided by the caller.
  */
 int flex_array_prealloc(struct flex_array *fa, unsigned int start,
-			unsigned int end, gfp_t flags)
+			unsigned int nr_elements, gfp_t flags)
 {
 	int start_part;
 	int end_part;
 	int part_nr;
+	unsigned int end;
 	struct flex_array_part *part;
 
-	if (start >= fa->total_nr_elements || end >= fa->total_nr_elements)
+	if (!start && !nr_elements)
+		return 0;
+	if (start >= fa->total_nr_elements)
 		return -ENOSPC;
+	if (!nr_elements)
+		return 0;
+
+	end = start + nr_elements - 1;
+
+	if (end >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (!fa->element_size)
+		return 0;
 	if (elements_fit_in_base(fa))
 		return 0;
 	start_part = fa_element_to_part_nr(fa, start);
@@ -281,19 +309,22 @@ EXPORT_SYMBOL(flex_array_prealloc);
  */
 void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
 {
-	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	int part_nr = 0;
 	struct flex_array_part *part;
 
+	if (!fa->element_size)
+		return NULL;
 	if (element_nr >= fa->total_nr_elements)
 		return NULL;
 	if (elements_fit_in_base(fa))
 		part = (struct flex_array_part *)&fa->parts[0];
 	else {
+		part_nr = fa_element_to_part_nr(fa, element_nr);
 		part = fa->parts[part_nr];
 		if (!part)
 			return NULL;
 	}
-	return &part->elements[index_inside_part(fa, element_nr)];
+	return &part->elements[index_inside_part(fa, element_nr, part_nr)];
 }
 EXPORT_SYMBOL(flex_array_get);
 
@@ -343,6 +374,8 @@ int flex_array_shrink(struct flex_array *fa)
 	int part_nr;
 	int ret = 0;
 
+	if (!fa->total_nr_elements || !fa->element_size)
+		return 0;
 	if (elements_fit_in_base(fa))
 		return ret;
 	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) {
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 1923f1490e72..f352cc42f4f8 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -1,8 +1,26 @@
 /*
- * Basic general purpose allocator for managing special purpose memory
- * not managed by the regular kmalloc/kfree interface.
- * Uses for this includes on-device special memory, uncached memory
- * etc.
+ * Basic general purpose allocator for managing special purpose
+ * memory, for example, memory that is not managed by the regular
+ * kmalloc/kfree interface.  Uses for this includes on-device special
+ * memory, uncached memory etc.
+ *
+ * It is safe to use the allocator in NMI handlers and other special
+ * unblockable contexts that could otherwise deadlock on locks.  This
+ * is implemented by using atomic operations and retries on any
+ * conflicts.  The disadvantage is that there may be livelocks in
+ * extreme cases.  For better scalability, one allocator can be used
+ * for each CPU.
+ *
+ * The lockless operation only works if there is enough memory
+ * available.  If new memory is added to the pool a lock has to be
+ * still taken.  So any user relying on locklessness has to ensure
+ * that sufficient memory is preallocated.
+ *
+ * The basic atomic operation of this allocator is cmpxchg on long.
+ * On architectures that don't have NMI-safe cmpxchg implementation,
+ * the allocator can NOT be used in NMI handler.  So code uses the
+ * allocator in NMI handler should depend on
+ * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
  *
  * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org>
  *
@@ -13,8 +31,109 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/bitmap.h>
+#include <linux/rculist.h>
+#include <linux/interrupt.h>
 #include <linux/genalloc.h>
 
+static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set)
+{
+	unsigned long val, nval;
+
+	nval = *addr;
+	do {
+		val = nval;
+		if (val & mask_to_set)
+			return -EBUSY;
+		cpu_relax();
+	} while ((nval = cmpxchg(addr, val, val | mask_to_set)) != val);
+
+	return 0;
+}
+
+static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear)
+{
+	unsigned long val, nval;
+
+	nval = *addr;
+	do {
+		val = nval;
+		if ((val & mask_to_clear) != mask_to_clear)
+			return -EBUSY;
+		cpu_relax();
+	} while ((nval = cmpxchg(addr, val, val & ~mask_to_clear)) != val);
+
+	return 0;
+}
+
+/*
+ * bitmap_set_ll - set the specified number of bits at the specified position
+ * @map: pointer to a bitmap
+ * @start: a bit position in @map
+ * @nr: number of bits to set
+ *
+ * Set @nr bits start from @start in @map lock-lessly. Several users
+ * can set/clear the same bitmap simultaneously without lock. If two
+ * users set the same bit, one user will return remain bits, otherwise
+ * return 0.
+ */
+static int bitmap_set_ll(unsigned long *map, int start, int nr)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const int size = start + nr;
+	int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+	while (nr - bits_to_set >= 0) {
+		if (set_bits_ll(p, mask_to_set))
+			return nr;
+		nr -= bits_to_set;
+		bits_to_set = BITS_PER_LONG;
+		mask_to_set = ~0UL;
+		p++;
+	}
+	if (nr) {
+		mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+		if (set_bits_ll(p, mask_to_set))
+			return nr;
+	}
+
+	return 0;
+}
+
+/*
+ * bitmap_clear_ll - clear the specified number of bits at the specified position
+ * @map: pointer to a bitmap
+ * @start: a bit position in @map
+ * @nr: number of bits to set
+ *
+ * Clear @nr bits start from @start in @map lock-lessly. Several users
+ * can set/clear the same bitmap simultaneously without lock. If two
+ * users clear the same bit, one user will return remain bits,
+ * otherwise return 0.
+ */
+static int bitmap_clear_ll(unsigned long *map, int start, int nr)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const int size = start + nr;
+	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+	while (nr - bits_to_clear >= 0) {
+		if (clear_bits_ll(p, mask_to_clear))
+			return nr;
+		nr -= bits_to_clear;
+		bits_to_clear = BITS_PER_LONG;
+		mask_to_clear = ~0UL;
+		p++;
+	}
+	if (nr) {
+		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+		if (clear_bits_ll(p, mask_to_clear))
+			return nr;
+	}
+
+	return 0;
+}
 
 /**
  * gen_pool_create - create a new special memory pool
@@ -30,7 +149,7 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
 
 	pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid);
 	if (pool != NULL) {
-		rwlock_init(&pool->lock);
+		spin_lock_init(&pool->lock);
 		INIT_LIST_HEAD(&pool->chunks);
 		pool->min_alloc_order = min_alloc_order;
 	}
@@ -39,17 +158,20 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
 EXPORT_SYMBOL(gen_pool_create);
 
 /**
- * gen_pool_add - add a new chunk of special memory to the pool
+ * gen_pool_add_virt - add a new chunk of special memory to the pool
  * @pool: pool to add new memory chunk to
- * @addr: starting address of memory chunk to add to pool
+ * @virt: virtual starting address of memory chunk to add to pool
+ * @phys: physical starting address of memory chunk to add to pool
  * @size: size in bytes of the memory chunk to add to pool
  * @nid: node id of the node the chunk structure and bitmap should be
  *       allocated on, or -1
  *
  * Add a new chunk of special memory to the specified pool.
+ *
+ * Returns 0 on success or a -ve errno on failure.
  */
-int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size,
-		 int nid)
+int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phys,
+		 size_t size, int nid)
 {
 	struct gen_pool_chunk *chunk;
 	int nbits = size >> pool->min_alloc_order;
@@ -58,19 +180,45 @@ int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size,
 
 	chunk = kmalloc_node(nbytes, GFP_KERNEL | __GFP_ZERO, nid);
 	if (unlikely(chunk == NULL))
-		return -1;
+		return -ENOMEM;
 
-	spin_lock_init(&chunk->lock);
-	chunk->start_addr = addr;
-	chunk->end_addr = addr + size;
+	chunk->phys_addr = phys;
+	chunk->start_addr = virt;
+	chunk->end_addr = virt + size;
+	atomic_set(&chunk->avail, size);
 
-	write_lock(&pool->lock);
-	list_add(&chunk->next_chunk, &pool->chunks);
-	write_unlock(&pool->lock);
+	spin_lock(&pool->lock);
+	list_add_rcu(&chunk->next_chunk, &pool->chunks);
+	spin_unlock(&pool->lock);
 
 	return 0;
 }
-EXPORT_SYMBOL(gen_pool_add);
+EXPORT_SYMBOL(gen_pool_add_virt);
+
+/**
+ * gen_pool_virt_to_phys - return the physical address of memory
+ * @pool: pool to allocate from
+ * @addr: starting address of memory
+ *
+ * Returns the physical address on success, or -1 on error.
+ */
+phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr)
+{
+	struct gen_pool_chunk *chunk;
+	phys_addr_t paddr = -1;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
+		if (addr >= chunk->start_addr && addr < chunk->end_addr) {
+			paddr = chunk->phys_addr + (addr - chunk->start_addr);
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return paddr;
+}
+EXPORT_SYMBOL(gen_pool_virt_to_phys);
 
 /**
  * gen_pool_destroy - destroy a special memory pool
@@ -86,7 +234,6 @@ void gen_pool_destroy(struct gen_pool *pool)
 	int order = pool->min_alloc_order;
 	int bit, end_bit;
 
-
 	list_for_each_safe(_chunk, _next_chunk, &pool->chunks) {
 		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
 		list_del(&chunk->next_chunk);
@@ -108,44 +255,50 @@ EXPORT_SYMBOL(gen_pool_destroy);
  * @size: number of bytes to allocate from the pool
  *
  * Allocate the requested number of bytes from the specified pool.
- * Uses a first-fit algorithm.
+ * Uses a first-fit algorithm. Can not be used in NMI handler on
+ * architectures without NMI-safe cmpxchg implementation.
  */
 unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
 {
-	struct list_head *_chunk;
 	struct gen_pool_chunk *chunk;
-	unsigned long addr, flags;
+	unsigned long addr = 0;
 	int order = pool->min_alloc_order;
-	int nbits, start_bit, end_bit;
+	int nbits, start_bit = 0, end_bit, remain;
+
+#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+	BUG_ON(in_nmi());
+#endif
 
 	if (size == 0)
 		return 0;
 
 	nbits = (size + (1UL << order) - 1) >> order;
-
-	read_lock(&pool->lock);
-	list_for_each(_chunk, &pool->chunks) {
-		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
+		if (size > atomic_read(&chunk->avail))
+			continue;
 
 		end_bit = (chunk->end_addr - chunk->start_addr) >> order;
-
-		spin_lock_irqsave(&chunk->lock, flags);
-		start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0,
-						nbits, 0);
-		if (start_bit >= end_bit) {
-			spin_unlock_irqrestore(&chunk->lock, flags);
+retry:
+		start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit,
+						       start_bit, nbits, 0);
+		if (start_bit >= end_bit)
 			continue;
+		remain = bitmap_set_ll(chunk->bits, start_bit, nbits);
+		if (remain) {
+			remain = bitmap_clear_ll(chunk->bits, start_bit,
+						 nbits - remain);
+			BUG_ON(remain);
+			goto retry;
 		}
 
 		addr = chunk->start_addr + ((unsigned long)start_bit << order);
-
-		bitmap_set(chunk->bits, start_bit, nbits);
-		spin_unlock_irqrestore(&chunk->lock, flags);
-		read_unlock(&pool->lock);
-		return addr;
+		size = nbits << order;
+		atomic_sub(size, &chunk->avail);
+		break;
 	}
-	read_unlock(&pool->lock);
-	return 0;
+	rcu_read_unlock();
+	return addr;
 }
 EXPORT_SYMBOL(gen_pool_alloc);
 
@@ -155,33 +308,95 @@ EXPORT_SYMBOL(gen_pool_alloc);
  * @addr: starting address of memory to free back to pool
  * @size: size in bytes of memory to free
  *
- * Free previously allocated special memory back to the specified pool.
+ * Free previously allocated special memory back to the specified
+ * pool.  Can not be used in NMI handler on architectures without
+ * NMI-safe cmpxchg implementation.
  */
 void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
 {
-	struct list_head *_chunk;
 	struct gen_pool_chunk *chunk;
-	unsigned long flags;
 	int order = pool->min_alloc_order;
-	int bit, nbits;
+	int start_bit, nbits, remain;
 
-	nbits = (size + (1UL << order) - 1) >> order;
-
-	read_lock(&pool->lock);
-	list_for_each(_chunk, &pool->chunks) {
-		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
+#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+	BUG_ON(in_nmi());
+#endif
 
+	nbits = (size + (1UL << order) - 1) >> order;
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
 		if (addr >= chunk->start_addr && addr < chunk->end_addr) {
 			BUG_ON(addr + size > chunk->end_addr);
-			spin_lock_irqsave(&chunk->lock, flags);
-			bit = (addr - chunk->start_addr) >> order;
-			while (nbits--)
-				__clear_bit(bit++, chunk->bits);
-			spin_unlock_irqrestore(&chunk->lock, flags);
-			break;
+			start_bit = (addr - chunk->start_addr) >> order;
+			remain = bitmap_clear_ll(chunk->bits, start_bit, nbits);
+			BUG_ON(remain);
+			size = nbits << order;
+			atomic_add(size, &chunk->avail);
+			rcu_read_unlock();
+			return;
 		}
 	}
-	BUG_ON(nbits > 0);
-	read_unlock(&pool->lock);
+	rcu_read_unlock();
+	BUG();
 }
 EXPORT_SYMBOL(gen_pool_free);
+
+/**
+ * gen_pool_for_each_chunk - call func for every chunk of generic memory pool
+ * @pool:	the generic memory pool
+ * @func:	func to call
+ * @data:	additional data used by @func
+ *
+ * Call @func for every chunk of generic memory pool.  The @func is
+ * called with rcu_read_lock held.
+ */
+void gen_pool_for_each_chunk(struct gen_pool *pool,
+	void (*func)(struct gen_pool *pool, struct gen_pool_chunk *chunk, void *data),
+	void *data)
+{
+	struct gen_pool_chunk *chunk;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk)
+		func(pool, chunk, data);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(gen_pool_for_each_chunk);
+
+/**
+ * gen_pool_avail - get available free space of the pool
+ * @pool: pool to get available free space
+ *
+ * Return available free space of the specified pool.
+ */
+size_t gen_pool_avail(struct gen_pool *pool)
+{
+	struct gen_pool_chunk *chunk;
+	size_t avail = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
+		avail += atomic_read(&chunk->avail);
+	rcu_read_unlock();
+	return avail;
+}
+EXPORT_SYMBOL_GPL(gen_pool_avail);
+
+/**
+ * gen_pool_size - get size in bytes of memory managed by the pool
+ * @pool: pool to get size
+ *
+ * Return size in bytes of memory managed by the pool.
+ */
+size_t gen_pool_size(struct gen_pool *pool)
+{
+	struct gen_pool_chunk *chunk;
+	size_t size = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
+		size += chunk->end_addr - chunk->start_addr;
+	rcu_read_unlock();
+	return size;
+}
+EXPORT_SYMBOL_GPL(gen_pool_size);
diff --git a/lib/idr.c b/lib/idr.c
index e15502e8b21e..db040ce3fa73 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -34,8 +34,10 @@
 #include <linux/err.h>
 #include <linux/string.h>
 #include <linux/idr.h>
+#include <linux/spinlock.h>
 
 static struct kmem_cache *idr_layer_cache;
+static DEFINE_SPINLOCK(simple_ida_lock);
 
 static struct idr_layer *get_from_free_list(struct idr *idp)
 {
@@ -926,6 +928,71 @@ void ida_destroy(struct ida *ida)
 EXPORT_SYMBOL(ida_destroy);
 
 /**
+ * ida_simple_get - get a new id.
+ * @ida: the (initialized) ida.
+ * @start: the minimum id (inclusive, < 0x8000000)
+ * @end: the maximum id (exclusive, < 0x8000000 or 0)
+ * @gfp_mask: memory allocation flags
+ *
+ * Allocates an id in the range start <= id < end, or returns -ENOSPC.
+ * On memory allocation failure, returns -ENOMEM.
+ *
+ * Use ida_simple_remove() to get rid of an id.
+ */
+int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
+		   gfp_t gfp_mask)
+{
+	int ret, id;
+	unsigned int max;
+
+	BUG_ON((int)start < 0);
+	BUG_ON((int)end < 0);
+
+	if (end == 0)
+		max = 0x80000000;
+	else {
+		BUG_ON(end < start);
+		max = end - 1;
+	}
+
+again:
+	if (!ida_pre_get(ida, gfp_mask))
+		return -ENOMEM;
+
+	spin_lock(&simple_ida_lock);
+	ret = ida_get_new_above(ida, start, &id);
+	if (!ret) {
+		if (id > max) {
+			ida_remove(ida, id);
+			ret = -ENOSPC;
+		} else {
+			ret = id;
+		}
+	}
+	spin_unlock(&simple_ida_lock);
+
+	if (unlikely(ret == -EAGAIN))
+		goto again;
+
+	return ret;
+}
+EXPORT_SYMBOL(ida_simple_get);
+
+/**
+ * ida_simple_remove - remove an allocated id.
+ * @ida: the (initialized) ida.
+ * @id: the id returned by ida_simple_get.
+ */
+void ida_simple_remove(struct ida *ida, unsigned int id)
+{
+	BUG_ON((int)id < 0);
+	spin_lock(&simple_ida_lock);
+	ida_remove(ida, id);
+	spin_unlock(&simple_ida_lock);
+}
+EXPORT_SYMBOL(ida_simple_remove);
+
+/**
  * ida_init - initialize ida handle
  * @ida:	ida handle
  *
diff --git a/lib/iomap.c b/lib/iomap.c
index d32229385151..5dbcb4b2d864 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -224,6 +224,7 @@ EXPORT_SYMBOL(iowrite8_rep);
 EXPORT_SYMBOL(iowrite16_rep);
 EXPORT_SYMBOL(iowrite32_rep);
 
+#ifdef CONFIG_HAS_IOPORT
 /* Create a virtual mapping cookie for an IO port range */
 void __iomem *ioport_map(unsigned long port, unsigned int nr)
 {
@@ -238,7 +239,9 @@ void ioport_unmap(void __iomem *addr)
 }
 EXPORT_SYMBOL(ioport_map);
 EXPORT_SYMBOL(ioport_unmap);
+#endif /* CONFIG_HAS_IOPORT */
 
+#ifdef CONFIG_PCI
 /**
  * pci_iomap - create a virtual mapping cookie for a PCI BAR
  * @dev: PCI device that owns the BAR
@@ -280,3 +283,4 @@ void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
 }
 EXPORT_SYMBOL(pci_iomap);
 EXPORT_SYMBOL(pci_iounmap);
+#endif /* CONFIG_PCI */
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
deleted file mode 100644
index b135d04aa48a..000000000000
--- a/lib/kernel_lock.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * lib/kernel_lock.c
- *
- * This is the traditional BKL - big kernel lock. Largely
- * relegated to obsolescence, but used by various less
- * important (or lazy) subsystems.
- */
-#include <linux/module.h>
-#include <linux/kallsyms.h>
-#include <linux/semaphore.h>
-#include <linux/smp_lock.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/bkl.h>
-
-/*
- * The 'big kernel lock'
- *
- * This spinlock is taken and released recursively by lock_kernel()
- * and unlock_kernel().  It is transparently dropped and reacquired
- * over schedule().  It is used to protect legacy code that hasn't
- * been migrated to a proper locking design yet.
- *
- * Don't use in new code.
- */
-static  __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(kernel_flag);
-
-
-/*
- * Acquire/release the underlying lock from the scheduler.
- *
- * This is called with preemption disabled, and should
- * return an error value if it cannot get the lock and
- * TIF_NEED_RESCHED gets set.
- *
- * If it successfully gets the lock, it should increment
- * the preemption count like any spinlock does.
- *
- * (This works on UP too - do_raw_spin_trylock will never
- * return false in that case)
- */
-int __lockfunc __reacquire_kernel_lock(void)
-{
-	while (!do_raw_spin_trylock(&kernel_flag)) {
-		if (need_resched())
-			return -EAGAIN;
-		cpu_relax();
-	}
-	preempt_disable();
-	return 0;
-}
-
-void __lockfunc __release_kernel_lock(void)
-{
-	do_raw_spin_unlock(&kernel_flag);
-	preempt_enable_no_resched();
-}
-
-/*
- * These are the BKL spinlocks - we try to be polite about preemption.
- * If SMP is not on (ie UP preemption), this all goes away because the
- * do_raw_spin_trylock() will always succeed.
- */
-#ifdef CONFIG_PREEMPT
-static inline void __lock_kernel(void)
-{
-	preempt_disable();
-	if (unlikely(!do_raw_spin_trylock(&kernel_flag))) {
-		/*
-		 * If preemption was disabled even before this
-		 * was called, there's nothing we can be polite
-		 * about - just spin.
-		 */
-		if (preempt_count() > 1) {
-			do_raw_spin_lock(&kernel_flag);
-			return;
-		}
-
-		/*
-		 * Otherwise, let's wait for the kernel lock
-		 * with preemption enabled..
-		 */
-		do {
-			preempt_enable();
-			while (raw_spin_is_locked(&kernel_flag))
-				cpu_relax();
-			preempt_disable();
-		} while (!do_raw_spin_trylock(&kernel_flag));
-	}
-}
-
-#else
-
-/*
- * Non-preemption case - just get the spinlock
- */
-static inline void __lock_kernel(void)
-{
-	do_raw_spin_lock(&kernel_flag);
-}
-#endif
-
-static inline void __unlock_kernel(void)
-{
-	/*
-	 * the BKL is not covered by lockdep, so we open-code the
-	 * unlocking sequence (and thus avoid the dep-chain ops):
-	 */
-	do_raw_spin_unlock(&kernel_flag);
-	preempt_enable();
-}
-
-/*
- * Getting the big kernel lock.
- *
- * This cannot happen asynchronously, so we only need to
- * worry about other CPU's.
- */
-void __lockfunc _lock_kernel(const char *func, const char *file, int line)
-{
-	int depth = current->lock_depth + 1;
-
-	trace_lock_kernel(func, file, line);
-
-	if (likely(!depth)) {
-		might_sleep();
-		__lock_kernel();
-	}
-	current->lock_depth = depth;
-}
-
-void __lockfunc _unlock_kernel(const char *func, const char *file, int line)
-{
-	BUG_ON(current->lock_depth < 0);
-	if (likely(--current->lock_depth < 0))
-		__unlock_kernel();
-
-	trace_unlock_kernel(func, file, line);
-}
-
-EXPORT_SYMBOL(_lock_kernel);
-EXPORT_SYMBOL(_unlock_kernel);
-
diff --git a/lib/kobject.c b/lib/kobject.c
index 82dc34c095c2..640bd98a4c8a 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -948,14 +948,14 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
 }
 
 
-const void *kobj_ns_current(enum kobj_ns_type type)
+void *kobj_ns_grab_current(enum kobj_ns_type type)
 {
-	const void *ns = NULL;
+	void *ns = NULL;
 
 	spin_lock(&kobj_ns_type_lock);
 	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
 	    kobj_ns_ops_tbl[type])
-		ns = kobj_ns_ops_tbl[type]->current_ns();
+		ns = kobj_ns_ops_tbl[type]->grab_current_ns();
 	spin_unlock(&kobj_ns_type_lock);
 
 	return ns;
@@ -987,23 +987,15 @@ const void *kobj_ns_initial(enum kobj_ns_type type)
 	return ns;
 }
 
-/*
- * kobj_ns_exit - invalidate a namespace tag
- *
- * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET)
- * @ns: the actual namespace being invalidated
- *
- * This is called when a tag is no longer valid.  For instance,
- * when a network namespace exits, it uses this helper to
- * make sure no sb's sysfs_info points to the now-invalidated
- * netns.
- */
-void kobj_ns_exit(enum kobj_ns_type type, const void *ns)
+void kobj_ns_drop(enum kobj_ns_type type, void *ns)
 {
-	sysfs_exit_ns(type, ns);
+	spin_lock(&kobj_ns_type_lock);
+	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+	    kobj_ns_ops_tbl[type] && kobj_ns_ops_tbl[type]->drop_ns)
+		kobj_ns_ops_tbl[type]->drop_ns(ns);
+	spin_unlock(&kobj_ns_type_lock);
 }
 
-
 EXPORT_SYMBOL(kobject_get);
 EXPORT_SYMBOL(kobject_put);
 EXPORT_SYMBOL(kobject_del);
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
new file mode 100644
index 000000000000..5e066759f551
--- /dev/null
+++ b/lib/kstrtox.c
@@ -0,0 +1,245 @@
+/*
+ * Convert integer string representation to an integer.
+ * If an integer doesn't fit into specified type, -E is returned.
+ *
+ * Integer starts with optional sign.
+ * kstrtou*() functions do not accept sign "-".
+ *
+ * Radix 0 means autodetection: leading "0x" implies radix 16,
+ * leading "0" implies radix 8, otherwise radix is 10.
+ * Autodetection hints work after optional sign, but not before.
+ *
+ * If -E is returned, result is not touched.
+ */
+#include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <asm/uaccess.h>
+
+static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+	unsigned long long acc;
+	int ok;
+
+	if (base == 0) {
+		if (s[0] == '0') {
+			if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
+				base = 16;
+			else
+				base = 8;
+		} else
+			base = 10;
+	}
+	if (base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
+		s += 2;
+
+	acc = 0;
+	ok = 0;
+	while (*s) {
+		unsigned int val;
+
+		if ('0' <= *s && *s <= '9')
+			val = *s - '0';
+		else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f')
+			val = _tolower(*s) - 'a' + 10;
+		else if (*s == '\n' && *(s + 1) == '\0')
+			break;
+		else
+			return -EINVAL;
+
+		if (val >= base)
+			return -EINVAL;
+		if (acc > div_u64(ULLONG_MAX - val, base))
+			return -ERANGE;
+		acc = acc * base + val;
+		ok = 1;
+
+		s++;
+	}
+	if (!ok)
+		return -EINVAL;
+	*res = acc;
+	return 0;
+}
+
+int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+	if (s[0] == '+')
+		s++;
+	return _kstrtoull(s, base, res);
+}
+EXPORT_SYMBOL(kstrtoull);
+
+int kstrtoll(const char *s, unsigned int base, long long *res)
+{
+	unsigned long long tmp;
+	int rv;
+
+	if (s[0] == '-') {
+		rv = _kstrtoull(s + 1, base, &tmp);
+		if (rv < 0)
+			return rv;
+		if ((long long)(-tmp) >= 0)
+			return -ERANGE;
+		*res = -tmp;
+	} else {
+		rv = kstrtoull(s, base, &tmp);
+		if (rv < 0)
+			return rv;
+		if ((long long)tmp < 0)
+			return -ERANGE;
+		*res = tmp;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(kstrtoll);
+
+/* Internal, do not use. */
+int _kstrtoul(const char *s, unsigned int base, unsigned long *res)
+{
+	unsigned long long tmp;
+	int rv;
+
+	rv = kstrtoull(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (unsigned long long)(unsigned long)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(_kstrtoul);
+
+/* Internal, do not use. */
+int _kstrtol(const char *s, unsigned int base, long *res)
+{
+	long long tmp;
+	int rv;
+
+	rv = kstrtoll(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (long long)(long)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(_kstrtol);
+
+int kstrtouint(const char *s, unsigned int base, unsigned int *res)
+{
+	unsigned long long tmp;
+	int rv;
+
+	rv = kstrtoull(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (unsigned long long)(unsigned int)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(kstrtouint);
+
+int kstrtoint(const char *s, unsigned int base, int *res)
+{
+	long long tmp;
+	int rv;
+
+	rv = kstrtoll(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (long long)(int)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(kstrtoint);
+
+int kstrtou16(const char *s, unsigned int base, u16 *res)
+{
+	unsigned long long tmp;
+	int rv;
+
+	rv = kstrtoull(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (unsigned long long)(u16)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(kstrtou16);
+
+int kstrtos16(const char *s, unsigned int base, s16 *res)
+{
+	long long tmp;
+	int rv;
+
+	rv = kstrtoll(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (long long)(s16)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(kstrtos16);
+
+int kstrtou8(const char *s, unsigned int base, u8 *res)
+{
+	unsigned long long tmp;
+	int rv;
+
+	rv = kstrtoull(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (unsigned long long)(u8)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(kstrtou8);
+
+int kstrtos8(const char *s, unsigned int base, s8 *res)
+{
+	long long tmp;
+	int rv;
+
+	rv = kstrtoll(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (long long)(s8)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(kstrtos8);
+
+#define kstrto_from_user(f, g, type)					\
+int f(const char __user *s, size_t count, unsigned int base, type *res)	\
+{									\
+	/* sign, base 2 representation, newline, terminator */		\
+	char buf[1 + sizeof(type) * 8 + 1 + 1];				\
+									\
+	count = min(count, sizeof(buf) - 1);				\
+	if (copy_from_user(buf, s, count))				\
+		return -EFAULT;						\
+	buf[count] = '\0';						\
+	return g(buf, base, res);					\
+}									\
+EXPORT_SYMBOL(f)
+
+kstrto_from_user(kstrtoull_from_user,	kstrtoull,	unsigned long long);
+kstrto_from_user(kstrtoll_from_user,	kstrtoll,	long long);
+kstrto_from_user(kstrtoul_from_user,	kstrtoul,	unsigned long);
+kstrto_from_user(kstrtol_from_user,	kstrtol,	long);
+kstrto_from_user(kstrtouint_from_user,	kstrtouint,	unsigned int);
+kstrto_from_user(kstrtoint_from_user,	kstrtoint,	int);
+kstrto_from_user(kstrtou16_from_user,	kstrtou16,	u16);
+kstrto_from_user(kstrtos16_from_user,	kstrtos16,	s16);
+kstrto_from_user(kstrtou8_from_user,	kstrtou8,	u8);
+kstrto_from_user(kstrtos8_from_user,	kstrtos8,	s8);
diff --git a/lib/lcm.c b/lib/lcm.c
index 157cd88a6ffc..10b5cfcacf6b 100644
--- a/lib/lcm.c
+++ b/lib/lcm.c
@@ -1,6 +1,7 @@
 #include <linux/kernel.h>
 #include <linux/gcd.h>
 #include <linux/module.h>
+#include <linux/lcm.h>
 
 /* Lowest common multiple */
 unsigned long lcm(unsigned long a, unsigned long b)
diff --git a/lib/llist.c b/lib/llist.c
new file mode 100644
index 000000000000..da445724fa1f
--- /dev/null
+++ b/lib/llist.c
@@ -0,0 +1,129 @@
+/*
+ * Lock-less NULL terminated single linked list
+ *
+ * The basic atomic operation of this list is cmpxchg on long.  On
+ * architectures that don't have NMI-safe cmpxchg implementation, the
+ * list can NOT be used in NMI handler.  So code uses the list in NMI
+ * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ *
+ * Copyright 2010,2011 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/llist.h>
+
+#include <asm/system.h>
+
+/**
+ * llist_add - add a new entry
+ * @new:	new entry to be added
+ * @head:	the head for your lock-less list
+ */
+void llist_add(struct llist_node *new, struct llist_head *head)
+{
+	struct llist_node *entry, *old_entry;
+
+#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+	BUG_ON(in_nmi());
+#endif
+
+	entry = head->first;
+	do {
+		old_entry = entry;
+		new->next = entry;
+		cpu_relax();
+	} while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry);
+}
+EXPORT_SYMBOL_GPL(llist_add);
+
+/**
+ * llist_add_batch - add several linked entries in batch
+ * @new_first:	first entry in batch to be added
+ * @new_last:	last entry in batch to be added
+ * @head:	the head for your lock-less list
+ */
+void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
+		     struct llist_head *head)
+{
+	struct llist_node *entry, *old_entry;
+
+#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+	BUG_ON(in_nmi());
+#endif
+
+	entry = head->first;
+	do {
+		old_entry = entry;
+		new_last->next = entry;
+		cpu_relax();
+	} while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry);
+}
+EXPORT_SYMBOL_GPL(llist_add_batch);
+
+/**
+ * llist_del_first - delete the first entry of lock-less list
+ * @head:	the head for your lock-less list
+ *
+ * If list is empty, return NULL, otherwise, return the first entry
+ * deleted, this is the newest added one.
+ *
+ * Only one llist_del_first user can be used simultaneously with
+ * multiple llist_add users without lock.  Because otherwise
+ * llist_del_first, llist_add, llist_add (or llist_del_all, llist_add,
+ * llist_add) sequence in another user may change @head->first->next,
+ * but keep @head->first.  If multiple consumers are needed, please
+ * use llist_del_all or use lock between consumers.
+ */
+struct llist_node *llist_del_first(struct llist_head *head)
+{
+	struct llist_node *entry, *old_entry, *next;
+
+#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+	BUG_ON(in_nmi());
+#endif
+
+	entry = head->first;
+	do {
+		if (entry == NULL)
+			return NULL;
+		old_entry = entry;
+		next = entry->next;
+		cpu_relax();
+	} while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry);
+
+	return entry;
+}
+EXPORT_SYMBOL_GPL(llist_del_first);
+
+/**
+ * llist_del_all - delete all entries from lock-less list
+ * @head:	the head of lock-less list to delete all entries
+ *
+ * If list is empty, return NULL, otherwise, delete all entries and
+ * return the pointer to the first entry.  The order of entries
+ * deleted is from the newest to the oldest added one.
+ */
+struct llist_node *llist_del_all(struct llist_head *head)
+{
+#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+	BUG_ON(in_nmi());
+#endif
+
+	return xchg(&head->first, NULL);
+}
+EXPORT_SYMBOL_GPL(llist_del_all);
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 619313ed6c46..507a22fab738 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -144,7 +144,7 @@ static void init_shared_classes(void)
 
 #define HARDIRQ_ENTER()				\
 	local_irq_disable();			\
-	irq_enter();				\
+	__irq_enter();				\
 	WARN_ON(!in_irq());
 
 #define HARDIRQ_EXIT()				\
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index 270de9d31b8c..a07e7268d7ed 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -84,7 +84,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
 	if (e_count > LC_MAX_ACTIVE)
 		return NULL;
 
-	slot = kzalloc(e_count * sizeof(struct hlist_head*), GFP_KERNEL);
+	slot = kcalloc(e_count, sizeof(struct hlist_head), GFP_KERNEL);
 	if (!slot)
 		goto out_fail;
 	element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL);
diff --git a/lib/md5.c b/lib/md5.c
new file mode 100644
index 000000000000..c777180e1f2f
--- /dev/null
+++ b/lib/md5.c
@@ -0,0 +1,95 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cryptohash.h>
+
+#define F1(x, y, z)	(z ^ (x & (y ^ z)))
+#define F2(x, y, z)	F1(z, x, y)
+#define F3(x, y, z)	(x ^ y ^ z)
+#define F4(x, y, z)	(y ^ (x | ~z))
+
+#define MD5STEP(f, w, x, y, z, in, s) \
+	(w += f(x, y, z) + in, w = (w<<s | w>>(32-s)) + x)
+
+void md5_transform(__u32 *hash, __u32 const *in)
+{
+	u32 a, b, c, d;
+
+	a = hash[0];
+	b = hash[1];
+	c = hash[2];
+	d = hash[3];
+
+	MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
+	MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
+	MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
+	MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
+	MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
+	MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
+	MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
+	MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
+	MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
+	MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
+	MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
+	MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
+	MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
+	MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
+	MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
+	MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
+
+	MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
+	MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
+	MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
+	MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
+	MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
+	MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
+	MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
+	MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
+	MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
+	MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
+	MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
+	MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
+	MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
+	MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
+	MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
+	MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
+
+	MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
+	MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
+	MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
+	MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
+	MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
+	MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
+	MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
+	MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
+	MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
+	MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
+	MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
+	MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
+	MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
+	MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
+	MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
+	MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
+
+	MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
+	MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
+	MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
+	MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
+	MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
+	MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
+	MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
+	MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
+	MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
+	MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
+	MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
+	MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
+	MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
+	MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
+	MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
+	MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
+
+	hash[0] += a;
+	hash[1] += b;
+	hash[2] += c;
+	hash[3] += d;
+}
+EXPORT_SYMBOL(md5_transform);
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 5021cbc34411..ac09f2226dc7 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -148,7 +148,7 @@ nla_policy_len(const struct nla_policy *p, int n)
 {
 	int i, len = 0;
 
-	for (i = 0; i < n; i++) {
+	for (i = 0; i < n; i++, p++) {
 		if (p->len)
 			len += nla_total_size(p->len);
 		else if (nla_attr_minlen[p->type])
diff --git a/lib/parser.c b/lib/parser.c
index 6e89eca5cca0..dcbaaef6cf11 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -13,7 +13,7 @@
 
 /**
  * match_one: - Determines if a string matches a simple pattern
- * @s: the string to examine for presense of the pattern
+ * @s: the string to examine for presence of the pattern
  * @p: the string containing the pattern
  * @args: array of %MAX_OPT_ARGS &substring_t elements. Used to return match
  * locations.
diff --git a/lib/plist.c b/lib/plist.c
index 1471988d9190..a0a4da489c22 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -28,6 +28,8 @@
 
 #ifdef CONFIG_DEBUG_PI_LIST
 
+static struct plist_head test_head;
+
 static void plist_check_prev_next(struct list_head *t, struct list_head *p,
 				  struct list_head *n)
 {
@@ -54,12 +56,8 @@ static void plist_check_list(struct list_head *top)
 
 static void plist_check_head(struct plist_head *head)
 {
-	WARN_ON(!head->rawlock && !head->spinlock);
-	if (head->rawlock)
-		WARN_ON_SMP(!raw_spin_is_locked(head->rawlock));
-	if (head->spinlock)
-		WARN_ON_SMP(!spin_is_locked(head->spinlock));
-	plist_check_list(&head->prio_list);
+	if (!plist_head_empty(head))
+		plist_check_list(&plist_first(head)->prio_list);
 	plist_check_list(&head->node_list);
 }
 
@@ -75,25 +73,33 @@ static void plist_check_head(struct plist_head *head)
  */
 void plist_add(struct plist_node *node, struct plist_head *head)
 {
-	struct plist_node *iter;
+	struct plist_node *first, *iter, *prev = NULL;
+	struct list_head *node_next = &head->node_list;
 
 	plist_check_head(head);
 	WARN_ON(!plist_node_empty(node));
+	WARN_ON(!list_empty(&node->prio_list));
+
+	if (plist_head_empty(head))
+		goto ins_node;
 
-	list_for_each_entry(iter, &head->prio_list, plist.prio_list) {
-		if (node->prio < iter->prio)
-			goto lt_prio;
-		else if (node->prio == iter->prio) {
-			iter = list_entry(iter->plist.prio_list.next,
-					struct plist_node, plist.prio_list);
-			goto eq_prio;
+	first = iter = plist_first(head);
+
+	do {
+		if (node->prio < iter->prio) {
+			node_next = &iter->node_list;
+			break;
 		}
-	}
 
-lt_prio:
-	list_add_tail(&node->plist.prio_list, &iter->plist.prio_list);
-eq_prio:
-	list_add_tail(&node->plist.node_list, &iter->plist.node_list);
+		prev = iter;
+		iter = list_entry(iter->prio_list.next,
+				struct plist_node, prio_list);
+	} while (iter != first);
+
+	if (!prev || prev->prio != node->prio)
+		list_add_tail(&node->prio_list, &iter->prio_list);
+ins_node:
+	list_add_tail(&node->node_list, node_next);
 
 	plist_check_head(head);
 }
@@ -108,14 +114,98 @@ void plist_del(struct plist_node *node, struct plist_head *head)
 {
 	plist_check_head(head);
 
-	if (!list_empty(&node->plist.prio_list)) {
-		struct plist_node *next = plist_first(&node->plist);
+	if (!list_empty(&node->prio_list)) {
+		if (node->node_list.next != &head->node_list) {
+			struct plist_node *next;
+
+			next = list_entry(node->node_list.next,
+					struct plist_node, node_list);
 
-		list_move_tail(&next->plist.prio_list, &node->plist.prio_list);
-		list_del_init(&node->plist.prio_list);
+			/* add the next plist_node into prio_list */
+			if (list_empty(&next->prio_list))
+				list_add(&next->prio_list, &node->prio_list);
+		}
+		list_del_init(&node->prio_list);
 	}
 
-	list_del_init(&node->plist.node_list);
+	list_del_init(&node->node_list);
 
 	plist_check_head(head);
 }
+
+#ifdef CONFIG_DEBUG_PI_LIST
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+static struct plist_node __initdata test_node[241];
+
+static void __init plist_test_check(int nr_expect)
+{
+	struct plist_node *first, *prio_pos, *node_pos;
+
+	if (plist_head_empty(&test_head)) {
+		BUG_ON(nr_expect != 0);
+		return;
+	}
+
+	prio_pos = first = plist_first(&test_head);
+	plist_for_each(node_pos, &test_head) {
+		if (nr_expect-- < 0)
+			break;
+		if (node_pos == first)
+			continue;
+		if (node_pos->prio == prio_pos->prio) {
+			BUG_ON(!list_empty(&node_pos->prio_list));
+			continue;
+		}
+
+		BUG_ON(prio_pos->prio > node_pos->prio);
+		BUG_ON(prio_pos->prio_list.next != &node_pos->prio_list);
+		prio_pos = node_pos;
+	}
+
+	BUG_ON(nr_expect != 0);
+	BUG_ON(prio_pos->prio_list.next != &first->prio_list);
+}
+
+static int  __init plist_test(void)
+{
+	int nr_expect = 0, i, loop;
+	unsigned int r = local_clock();
+
+	printk(KERN_INFO "start plist test\n");
+	plist_head_init(&test_head);
+	for (i = 0; i < ARRAY_SIZE(test_node); i++)
+		plist_node_init(test_node + i, 0);
+
+	for (loop = 0; loop < 1000; loop++) {
+		r = r * 193939 % 47629;
+		i = r % ARRAY_SIZE(test_node);
+		if (plist_node_empty(test_node + i)) {
+			r = r * 193939 % 47629;
+			test_node[i].prio = r % 99;
+			plist_add(test_node + i, &test_head);
+			nr_expect++;
+		} else {
+			plist_del(test_node + i, &test_head);
+			nr_expect--;
+		}
+		plist_test_check(nr_expect);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(test_node); i++) {
+		if (plist_node_empty(test_node + i))
+			continue;
+		plist_del(test_node + i, &test_head);
+		nr_expect--;
+		plist_test_check(nr_expect);
+	}
+
+	printk(KERN_INFO "end plist test\n");
+	return 0;
+}
+
+module_init(plist_test);
+
+#endif
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 7ea2e033d715..a2f9da59c197 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -823,8 +823,8 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
 EXPORT_SYMBOL(radix_tree_prev_hole);
 
 static unsigned int
-__lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
-	unsigned int max_items, unsigned long *next_index)
+__lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices,
+	unsigned long index, unsigned int max_items, unsigned long *next_index)
 {
 	unsigned int nr_found = 0;
 	unsigned int shift, height;
@@ -857,12 +857,16 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
 
 	/* Bottom level: grab some items */
 	for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
-		index++;
 		if (slot->slots[i]) {
-			results[nr_found++] = &(slot->slots[i]);
-			if (nr_found == max_items)
+			results[nr_found] = &(slot->slots[i]);
+			if (indices)
+				indices[nr_found] = index;
+			if (++nr_found == max_items) {
+				index++;
 				goto out;
+			}
 		}
+		index++;
 	}
 out:
 	*next_index = index;
@@ -918,8 +922,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 
 		if (cur_index > max_index)
 			break;
-		slots_found = __lookup(node, (void ***)results + ret, cur_index,
-					max_items - ret, &next_index);
+		slots_found = __lookup(node, (void ***)results + ret, NULL,
+				cur_index, max_items - ret, &next_index);
 		nr_found = 0;
 		for (i = 0; i < slots_found; i++) {
 			struct radix_tree_node *slot;
@@ -944,6 +948,7 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
  *	radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
  *	@root:		radix tree root
  *	@results:	where the results of the lookup are placed
+ *	@indices:	where their indices should be placed (but usually NULL)
  *	@first_index:	start the lookup from this key
  *	@max_items:	place up to this many items at *results
  *
@@ -958,7 +963,8 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
  *	protection, radix_tree_deref_slot may fail requiring a retry.
  */
 unsigned int
-radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
+radix_tree_gang_lookup_slot(struct radix_tree_root *root,
+			void ***results, unsigned long *indices,
 			unsigned long first_index, unsigned int max_items)
 {
 	unsigned long max_index;
@@ -974,6 +980,8 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
 		if (first_index > 0)
 			return 0;
 		results[0] = (void **)&root->rnode;
+		if (indices)
+			indices[0] = 0;
 		return 1;
 	}
 	node = indirect_to_ptr(node);
@@ -987,8 +995,9 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
 
 		if (cur_index > max_index)
 			break;
-		slots_found = __lookup(node, results + ret, cur_index,
-					max_items - ret, &next_index);
+		slots_found = __lookup(node, results + ret,
+				indices ? indices + ret : NULL,
+				cur_index, max_items - ret, &next_index);
 		ret += slots_found;
 		if (next_index == 0)
 			break;
@@ -1194,6 +1203,98 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
 }
 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
 
+#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
+#include <linux/sched.h> /* for cond_resched() */
+
+/*
+ * This linear search is at present only useful to shmem_unuse_inode().
+ */
+static unsigned long __locate(struct radix_tree_node *slot, void *item,
+			      unsigned long index, unsigned long *found_index)
+{
+	unsigned int shift, height;
+	unsigned long i;
+
+	height = slot->height;
+	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+
+	for ( ; height > 1; height--) {
+		i = (index >> shift) & RADIX_TREE_MAP_MASK;
+		for (;;) {
+			if (slot->slots[i] != NULL)
+				break;
+			index &= ~((1UL << shift) - 1);
+			index += 1UL << shift;
+			if (index == 0)
+				goto out;	/* 32-bit wraparound */
+			i++;
+			if (i == RADIX_TREE_MAP_SIZE)
+				goto out;
+		}
+
+		shift -= RADIX_TREE_MAP_SHIFT;
+		slot = rcu_dereference_raw(slot->slots[i]);
+		if (slot == NULL)
+			goto out;
+	}
+
+	/* Bottom level: check items */
+	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
+		if (slot->slots[i] == item) {
+			*found_index = index + i;
+			index = 0;
+			goto out;
+		}
+	}
+	index += RADIX_TREE_MAP_SIZE;
+out:
+	return index;
+}
+
+/**
+ *	radix_tree_locate_item - search through radix tree for item
+ *	@root:		radix tree root
+ *	@item:		item to be found
+ *
+ *	Returns index where item was found, or -1 if not found.
+ *	Caller must hold no lock (since this time-consuming function needs
+ *	to be preemptible), and must check afterwards if item is still there.
+ */
+unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
+{
+	struct radix_tree_node *node;
+	unsigned long max_index;
+	unsigned long cur_index = 0;
+	unsigned long found_index = -1;
+
+	do {
+		rcu_read_lock();
+		node = rcu_dereference_raw(root->rnode);
+		if (!radix_tree_is_indirect_ptr(node)) {
+			rcu_read_unlock();
+			if (node == item)
+				found_index = 0;
+			break;
+		}
+
+		node = indirect_to_ptr(node);
+		max_index = radix_tree_maxindex(node->height);
+		if (cur_index > max_index)
+			break;
+
+		cur_index = __locate(node, item, cur_index, &found_index);
+		rcu_read_unlock();
+		cond_resched();
+	} while (cur_index != 0 && cur_index <= max_index);
+
+	return found_index;
+}
+#else
+unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
+{
+	return -1;
+}
+#endif /* CONFIG_SHMEM && CONFIG_SWAP */
 
 /**
  *	radix_tree_shrink    -    shrink height of a radix tree to minimal
diff --git a/lib/rwsem.c b/lib/rwsem.c
index f236d7cd5cf3..aa7c3052261f 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -222,8 +222,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
 /*
  * wait for the read lock to be granted
  */
-asmregparm struct rw_semaphore __sched *
-rwsem_down_read_failed(struct rw_semaphore *sem)
+struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 {
 	return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
 					-RWSEM_ACTIVE_READ_BIAS);
@@ -232,8 +231,7 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
 /*
  * wait for the write lock to be granted
  */
-asmregparm struct rw_semaphore __sched *
-rwsem_down_write_failed(struct rw_semaphore *sem)
+struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
 {
 	return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
 					-RWSEM_ACTIVE_WRITE_BIAS);
@@ -243,7 +241,7 @@ rwsem_down_write_failed(struct rw_semaphore *sem)
  * handle waking up a waiter on the semaphore
  * - up_read/up_write has decremented the active part of count if we come here
  */
-asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
+struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 
@@ -263,7 +261,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
  * - caller incremented waiting part of count and discovered it still negative
  * - just wake up any readers at the front of the queue
  */
-asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
+struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 
diff --git a/lib/sha1.c b/lib/sha1.c
index 4c45fd50e913..f33271dd00cb 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -1,31 +1,72 @@
 /*
- * SHA transform algorithm, originally taken from code written by
- * Peter Gutmann, and placed in the public domain.
+ * SHA1 routine optimized to do word accesses rather than byte accesses,
+ * and to avoid unnecessary copies into the context array.
+ *
+ * This was based on the git SHA1 implementation.
  */
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/cryptohash.h>
+#include <linux/bitops.h>
+#include <asm/unaligned.h>
 
-/* The SHA f()-functions.  */
+/*
+ * If you have 32 registers or more, the compiler can (and should)
+ * try to change the array[] accesses into registers. However, on
+ * machines with less than ~25 registers, that won't really work,
+ * and at least gcc will make an unholy mess of it.
+ *
+ * So to avoid that mess which just slows things down, we force
+ * the stores to memory to actually happen (we might be better off
+ * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as
+ * suggested by Artur Skawina - that will also make gcc unable to
+ * try to do the silly "optimize away loads" part because it won't
+ * see what the value will be).
+ *
+ * Ben Herrenschmidt reports that on PPC, the C version comes close
+ * to the optimized asm with this (ie on PPC you don't want that
+ * 'volatile', since there are lots of registers).
+ *
+ * On ARM we get the best code generation by forcing a full memory barrier
+ * between each SHA_ROUND, otherwise gcc happily get wild with spilling and
+ * the stack frame size simply explode and performance goes down the drain.
+ */
 
-#define f1(x,y,z)   (z ^ (x & (y ^ z)))		/* x ? y : z */
-#define f2(x,y,z)   (x ^ y ^ z)			/* XOR */
-#define f3(x,y,z)   ((x & y) + (z & (x ^ y)))	/* majority */
+#ifdef CONFIG_X86
+  #define setW(x, val) (*(volatile __u32 *)&W(x) = (val))
+#elif defined(CONFIG_ARM)
+  #define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while (0)
+#else
+  #define setW(x, val) (W(x) = (val))
+#endif
 
-/* The SHA Mysterious Constants */
+/* This "rolls" over the 512-bit array */
+#define W(x) (array[(x)&15])
 
-#define K1  0x5A827999L			/* Rounds  0-19: sqrt(2) * 2^30 */
-#define K2  0x6ED9EBA1L			/* Rounds 20-39: sqrt(3) * 2^30 */
-#define K3  0x8F1BBCDCL			/* Rounds 40-59: sqrt(5) * 2^30 */
-#define K4  0xCA62C1D6L			/* Rounds 60-79: sqrt(10) * 2^30 */
+/*
+ * Where do we get the source from? The first 16 iterations get it from
+ * the input data, the next mix it from the 512-bit array.
+ */
+#define SHA_SRC(t) get_unaligned_be32((__u32 *)data + t)
+#define SHA_MIX(t) rol32(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1)
+
+#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \
+	__u32 TEMP = input(t); setW(t, TEMP); \
+	E += TEMP + rol32(A,5) + (fn) + (constant); \
+	B = ror32(B, 2); } while (0)
+
+#define T_0_15(t, A, B, C, D, E)  SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
+#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
+#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E )
+#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E )
+#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) ,  0xca62c1d6, A, B, C, D, E )
 
 /**
  * sha_transform - single block SHA1 transform
  *
  * @digest: 160 bit digest to update
  * @data:   512 bits of data to hash
- * @W:      80 words of workspace (see note)
+ * @array:  16 words of workspace (see note)
  *
  * This function generates a SHA1 digest for a single 512-bit block.
  * Be warned, it does not handle padding and message digest, do not
@@ -36,47 +77,111 @@
  * to clear the workspace. This is left to the caller to avoid
  * unnecessary clears between chained hashing operations.
  */
-void sha_transform(__u32 *digest, const char *in, __u32 *W)
+void sha_transform(__u32 *digest, const char *data, __u32 *array)
 {
-	__u32 a, b, c, d, e, t, i;
-
-	for (i = 0; i < 16; i++)
-		W[i] = be32_to_cpu(((const __be32 *)in)[i]);
-
-	for (i = 0; i < 64; i++)
-		W[i+16] = rol32(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 1);
-
-	a = digest[0];
-	b = digest[1];
-	c = digest[2];
-	d = digest[3];
-	e = digest[4];
-
-	for (i = 0; i < 20; i++) {
-		t = f1(b, c, d) + K1 + rol32(a, 5) + e + W[i];
-		e = d; d = c; c = rol32(b, 30); b = a; a = t;
-	}
-
-	for (; i < 40; i ++) {
-		t = f2(b, c, d) + K2 + rol32(a, 5) + e + W[i];
-		e = d; d = c; c = rol32(b, 30); b = a; a = t;
-	}
-
-	for (; i < 60; i ++) {
-		t = f3(b, c, d) + K3 + rol32(a, 5) + e + W[i];
-		e = d; d = c; c = rol32(b, 30); b = a; a = t;
-	}
-
-	for (; i < 80; i ++) {
-		t = f2(b, c, d) + K4 + rol32(a, 5) + e + W[i];
-		e = d; d = c; c = rol32(b, 30); b = a; a = t;
-	}
-
-	digest[0] += a;
-	digest[1] += b;
-	digest[2] += c;
-	digest[3] += d;
-	digest[4] += e;
+	__u32 A, B, C, D, E;
+
+	A = digest[0];
+	B = digest[1];
+	C = digest[2];
+	D = digest[3];
+	E = digest[4];
+
+	/* Round 1 - iterations 0-16 take their input from 'data' */
+	T_0_15( 0, A, B, C, D, E);
+	T_0_15( 1, E, A, B, C, D);
+	T_0_15( 2, D, E, A, B, C);
+	T_0_15( 3, C, D, E, A, B);
+	T_0_15( 4, B, C, D, E, A);
+	T_0_15( 5, A, B, C, D, E);
+	T_0_15( 6, E, A, B, C, D);
+	T_0_15( 7, D, E, A, B, C);
+	T_0_15( 8, C, D, E, A, B);
+	T_0_15( 9, B, C, D, E, A);
+	T_0_15(10, A, B, C, D, E);
+	T_0_15(11, E, A, B, C, D);
+	T_0_15(12, D, E, A, B, C);
+	T_0_15(13, C, D, E, A, B);
+	T_0_15(14, B, C, D, E, A);
+	T_0_15(15, A, B, C, D, E);
+
+	/* Round 1 - tail. Input from 512-bit mixing array */
+	T_16_19(16, E, A, B, C, D);
+	T_16_19(17, D, E, A, B, C);
+	T_16_19(18, C, D, E, A, B);
+	T_16_19(19, B, C, D, E, A);
+
+	/* Round 2 */
+	T_20_39(20, A, B, C, D, E);
+	T_20_39(21, E, A, B, C, D);
+	T_20_39(22, D, E, A, B, C);
+	T_20_39(23, C, D, E, A, B);
+	T_20_39(24, B, C, D, E, A);
+	T_20_39(25, A, B, C, D, E);
+	T_20_39(26, E, A, B, C, D);
+	T_20_39(27, D, E, A, B, C);
+	T_20_39(28, C, D, E, A, B);
+	T_20_39(29, B, C, D, E, A);
+	T_20_39(30, A, B, C, D, E);
+	T_20_39(31, E, A, B, C, D);
+	T_20_39(32, D, E, A, B, C);
+	T_20_39(33, C, D, E, A, B);
+	T_20_39(34, B, C, D, E, A);
+	T_20_39(35, A, B, C, D, E);
+	T_20_39(36, E, A, B, C, D);
+	T_20_39(37, D, E, A, B, C);
+	T_20_39(38, C, D, E, A, B);
+	T_20_39(39, B, C, D, E, A);
+
+	/* Round 3 */
+	T_40_59(40, A, B, C, D, E);
+	T_40_59(41, E, A, B, C, D);
+	T_40_59(42, D, E, A, B, C);
+	T_40_59(43, C, D, E, A, B);
+	T_40_59(44, B, C, D, E, A);
+	T_40_59(45, A, B, C, D, E);
+	T_40_59(46, E, A, B, C, D);
+	T_40_59(47, D, E, A, B, C);
+	T_40_59(48, C, D, E, A, B);
+	T_40_59(49, B, C, D, E, A);
+	T_40_59(50, A, B, C, D, E);
+	T_40_59(51, E, A, B, C, D);
+	T_40_59(52, D, E, A, B, C);
+	T_40_59(53, C, D, E, A, B);
+	T_40_59(54, B, C, D, E, A);
+	T_40_59(55, A, B, C, D, E);
+	T_40_59(56, E, A, B, C, D);
+	T_40_59(57, D, E, A, B, C);
+	T_40_59(58, C, D, E, A, B);
+	T_40_59(59, B, C, D, E, A);
+
+	/* Round 4 */
+	T_60_79(60, A, B, C, D, E);
+	T_60_79(61, E, A, B, C, D);
+	T_60_79(62, D, E, A, B, C);
+	T_60_79(63, C, D, E, A, B);
+	T_60_79(64, B, C, D, E, A);
+	T_60_79(65, A, B, C, D, E);
+	T_60_79(66, E, A, B, C, D);
+	T_60_79(67, D, E, A, B, C);
+	T_60_79(68, C, D, E, A, B);
+	T_60_79(69, B, C, D, E, A);
+	T_60_79(70, A, B, C, D, E);
+	T_60_79(71, E, A, B, C, D);
+	T_60_79(72, D, E, A, B, C);
+	T_60_79(73, C, D, E, A, B);
+	T_60_79(74, B, C, D, E, A);
+	T_60_79(75, A, B, C, D, E);
+	T_60_79(76, E, A, B, C, D);
+	T_60_79(77, D, E, A, B, C);
+	T_60_79(78, C, D, E, A, B);
+	T_60_79(79, B, C, D, E, A);
+
+	digest[0] += A;
+	digest[1] += B;
+	digest[2] += C;
+	digest[3] += D;
+	digest[4] += E;
 }
 EXPORT_SYMBOL(sha_transform);
 
@@ -92,4 +197,3 @@ void sha_init(__u32 *buf)
 	buf[3] = 0x10325476;
 	buf[4] = 0xc3d2e1f0;
 }
-
diff --git a/lib/show_mem.c b/lib/show_mem.c
index fdc77c82f922..4407f8c9b1f7 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -9,14 +9,14 @@
 #include <linux/nmi.h>
 #include <linux/quicklist.h>
 
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
 	pg_data_t *pgdat;
 	unsigned long total = 0, reserved = 0, shared = 0,
 		nonshared = 0, highmem = 0;
 
 	printk("Mem-Info:\n");
-	show_free_areas();
+	show_free_areas(filter);
 
 	for_each_online_pgdat(pgdat) {
 		unsigned long i, flags;
diff --git a/lib/string.c b/lib/string.c
index f71bead1be3e..01fad9b203e1 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -535,6 +535,35 @@ bool sysfs_streq(const char *s1, const char *s2)
 }
 EXPORT_SYMBOL(sysfs_streq);
 
+/**
+ * strtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
+ * Otherwise it will return -EINVAL.  Value pointed to by res is
+ * updated upon finding a match.
+ */
+int strtobool(const char *s, bool *res)
+{
+	switch (s[0]) {
+	case 'y':
+	case 'Y':
+	case '1':
+		*res = true;
+		break;
+	case 'n':
+	case 'N':
+	case '0':
+		*res = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(strtobool);
+
 #ifndef __HAVE_ARCH_MEMSET
 /**
  * memset - Fill a region of memory with the given value
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 93ca08b8a451..99093b396145 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -110,6 +110,11 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
+unsigned long swioltb_nr_tbl(void)
+{
+	return io_tlb_nslabs;
+}
+
 /* Note that this doesn't work with highmem page */
 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 				      volatile void *address)
diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c
new file mode 100644
index 000000000000..d55769d63cb8
--- /dev/null
+++ b/lib/test-kstrtox.c
@@ -0,0 +1,739 @@
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#define for_each_test(i, test)	\
+	for (i = 0; i < sizeof(test) / sizeof(test[0]); i++)
+
+struct test_fail {
+	const char *str;
+	unsigned int base;
+};
+
+#define DEFINE_TEST_FAIL(test)	\
+	const struct test_fail test[] __initdata
+
+#define DECLARE_TEST_OK(type, test_type)	\
+	test_type {				\
+		const char *str;		\
+		unsigned int base;		\
+		type expected_res;		\
+	}
+
+#define DEFINE_TEST_OK(type, test)	\
+	const type test[] __initdata
+
+#define TEST_FAIL(fn, type, fmt, test)					\
+{									\
+	unsigned int i;							\
+									\
+	for_each_test(i, test) {					\
+		const struct test_fail *t = &test[i];			\
+		type tmp;						\
+		int rv;							\
+									\
+		tmp = 0;						\
+		rv = fn(t->str, t->base, &tmp);				\
+		if (rv >= 0) {						\
+			WARN(1, "str '%s', base %u, expected -E, got %d/" fmt "\n",	\
+				t->str, t->base, rv, tmp);		\
+			continue;					\
+		}							\
+	}								\
+}
+
+#define TEST_OK(fn, type, fmt, test)					\
+{									\
+	unsigned int i;							\
+									\
+	for_each_test(i, test) {					\
+		const typeof(test[0]) *t = &test[i];			\
+		type res;						\
+		int rv;							\
+									\
+		rv = fn(t->str, t->base, &res);				\
+		if (rv != 0) {						\
+			WARN(1, "str '%s', base %u, expected 0/" fmt ", got %d\n",	\
+				t->str, t->base, t->expected_res, rv);	\
+			continue;					\
+		}							\
+		if (res != t->expected_res) {				\
+			WARN(1, "str '%s', base %u, expected " fmt ", got " fmt "\n",	\
+				t->str, t->base, t->expected_res, res);	\
+			continue;					\
+		}							\
+	}								\
+}
+
+static void __init test_kstrtoull_ok(void)
+{
+	DECLARE_TEST_OK(unsigned long long, struct test_ull);
+	static DEFINE_TEST_OK(struct test_ull, test_ull_ok) = {
+		{"0",	10,	0ULL},
+		{"1",	10,	1ULL},
+		{"127",	10,	127ULL},
+		{"128",	10,	128ULL},
+		{"129",	10,	129ULL},
+		{"255",	10,	255ULL},
+		{"256",	10,	256ULL},
+		{"257",	10,	257ULL},
+		{"32767",	10,	32767ULL},
+		{"32768",	10,	32768ULL},
+		{"32769",	10,	32769ULL},
+		{"65535",	10,	65535ULL},
+		{"65536",	10,	65536ULL},
+		{"65537",	10,	65537ULL},
+		{"2147483647",	10,	2147483647ULL},
+		{"2147483648",	10,	2147483648ULL},
+		{"2147483649",	10,	2147483649ULL},
+		{"4294967295",	10,	4294967295ULL},
+		{"4294967296",	10,	4294967296ULL},
+		{"4294967297",	10,	4294967297ULL},
+		{"9223372036854775807",	10,	9223372036854775807ULL},
+		{"9223372036854775808",	10,	9223372036854775808ULL},
+		{"9223372036854775809",	10,	9223372036854775809ULL},
+		{"18446744073709551614",	10,	18446744073709551614ULL},
+		{"18446744073709551615",	10,	18446744073709551615ULL},
+
+		{"00",		8,	00ULL},
+		{"01",		8,	01ULL},
+		{"0177",	8,	0177ULL},
+		{"0200",	8,	0200ULL},
+		{"0201",	8,	0201ULL},
+		{"0377",	8,	0377ULL},
+		{"0400",	8,	0400ULL},
+		{"0401",	8,	0401ULL},
+		{"077777",	8,	077777ULL},
+		{"0100000",	8,	0100000ULL},
+		{"0100001",	8,	0100001ULL},
+		{"0177777",	8,	0177777ULL},
+		{"0200000",	8,	0200000ULL},
+		{"0200001",	8,	0200001ULL},
+		{"017777777777",	8,	017777777777ULL},
+		{"020000000000",	8,	020000000000ULL},
+		{"020000000001",	8,	020000000001ULL},
+		{"037777777777",	8,	037777777777ULL},
+		{"040000000000",	8,	040000000000ULL},
+		{"040000000001",	8,	040000000001ULL},
+		{"0777777777777777777777",	8,	0777777777777777777777ULL},
+		{"01000000000000000000000",	8,	01000000000000000000000ULL},
+		{"01000000000000000000001",	8,	01000000000000000000001ULL},
+		{"01777777777777777777776",	8,	01777777777777777777776ULL},
+		{"01777777777777777777777",	8,	01777777777777777777777ULL},
+
+		{"0x0",		16,	0x0ULL},
+		{"0x1",		16,	0x1ULL},
+		{"0x7f",	16,	0x7fULL},
+		{"0x80",	16,	0x80ULL},
+		{"0x81",	16,	0x81ULL},
+		{"0xff",	16,	0xffULL},
+		{"0x100",	16,	0x100ULL},
+		{"0x101",	16,	0x101ULL},
+		{"0x7fff",	16,	0x7fffULL},
+		{"0x8000",	16,	0x8000ULL},
+		{"0x8001",	16,	0x8001ULL},
+		{"0xffff",	16,	0xffffULL},
+		{"0x10000",	16,	0x10000ULL},
+		{"0x10001",	16,	0x10001ULL},
+		{"0x7fffffff",	16,	0x7fffffffULL},
+		{"0x80000000",	16,	0x80000000ULL},
+		{"0x80000001",	16,	0x80000001ULL},
+		{"0xffffffff",	16,	0xffffffffULL},
+		{"0x100000000",	16,	0x100000000ULL},
+		{"0x100000001",	16,	0x100000001ULL},
+		{"0x7fffffffffffffff",	16,	0x7fffffffffffffffULL},
+		{"0x8000000000000000",	16,	0x8000000000000000ULL},
+		{"0x8000000000000001",	16,	0x8000000000000001ULL},
+		{"0xfffffffffffffffe",	16,	0xfffffffffffffffeULL},
+		{"0xffffffffffffffff",	16,	0xffffffffffffffffULL},
+
+		{"0\n",	0,	0ULL},
+	};
+	TEST_OK(kstrtoull, unsigned long long, "%llu", test_ull_ok);
+}
+
+static void __init test_kstrtoull_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_ull_fail) = {
+		{"",	0},
+		{"",	8},
+		{"",	10},
+		{"",	16},
+		{"\n",	0},
+		{"\n",	8},
+		{"\n",	10},
+		{"\n",	16},
+		{"\n0",	0},
+		{"\n0",	8},
+		{"\n0",	10},
+		{"\n0",	16},
+		{"+",	0},
+		{"+",	8},
+		{"+",	10},
+		{"+",	16},
+		{"-",	0},
+		{"-",	8},
+		{"-",	10},
+		{"-",	16},
+		{"0x",	0},
+		{"0x",	16},
+		{"0X",	0},
+		{"0X",	16},
+		{"0 ",	0},
+		{"1+",	0},
+		{"1-",	0},
+		{" 2",	0},
+		/* base autodetection */
+		{"0x0z",	0},
+		{"0z",		0},
+		{"a",		0},
+		/* digit >= base */
+		{"2",	2},
+		{"8",	8},
+		{"a",	10},
+		{"A",	10},
+		{"g",	16},
+		{"G",	16},
+		/* overflow */
+		{"10000000000000000000000000000000000000000000000000000000000000000",	2},
+		{"2000000000000000000000",	8},
+		{"18446744073709551616",	10},
+		{"10000000000000000",	16},
+		/* negative */
+		{"-0", 0},
+		{"-0", 8},
+		{"-0", 10},
+		{"-0", 16},
+		{"-1", 0},
+		{"-1", 8},
+		{"-1", 10},
+		{"-1", 16},
+		/* sign is first character if any */
+		{"-+1", 0},
+		{"-+1", 8},
+		{"-+1", 10},
+		{"-+1", 16},
+		/* nothing after \n */
+		{"0\n0", 0},
+		{"0\n0", 8},
+		{"0\n0", 10},
+		{"0\n0", 16},
+		{"0\n+", 0},
+		{"0\n+", 8},
+		{"0\n+", 10},
+		{"0\n+", 16},
+		{"0\n-", 0},
+		{"0\n-", 8},
+		{"0\n-", 10},
+		{"0\n-", 16},
+		{"0\n ", 0},
+		{"0\n ", 8},
+		{"0\n ", 10},
+		{"0\n ", 16},
+	};
+	TEST_FAIL(kstrtoull, unsigned long long, "%llu", test_ull_fail);
+}
+
+static void __init test_kstrtoll_ok(void)
+{
+	DECLARE_TEST_OK(long long, struct test_ll);
+	static DEFINE_TEST_OK(struct test_ll, test_ll_ok) = {
+		{"0",	10,	0LL},
+		{"1",	10,	1LL},
+		{"127",	10,	127LL},
+		{"128",	10,	128LL},
+		{"129",	10,	129LL},
+		{"255",	10,	255LL},
+		{"256",	10,	256LL},
+		{"257",	10,	257LL},
+		{"32767",	10,	32767LL},
+		{"32768",	10,	32768LL},
+		{"32769",	10,	32769LL},
+		{"65535",	10,	65535LL},
+		{"65536",	10,	65536LL},
+		{"65537",	10,	65537LL},
+		{"2147483647",	10,	2147483647LL},
+		{"2147483648",	10,	2147483648LL},
+		{"2147483649",	10,	2147483649LL},
+		{"4294967295",	10,	4294967295LL},
+		{"4294967296",	10,	4294967296LL},
+		{"4294967297",	10,	4294967297LL},
+		{"9223372036854775807",	10,	9223372036854775807LL},
+
+		{"-1",	10,	-1LL},
+		{"-2",	10,	-2LL},
+		{"-9223372036854775808",	10,	LLONG_MIN},
+	};
+	TEST_OK(kstrtoll, long long, "%lld", test_ll_ok);
+}
+
+static void __init test_kstrtoll_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_ll_fail) = {
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"-9223372036854775809",	10},
+		{"-18446744073709551614",	10},
+		{"-18446744073709551615",	10},
+		/* negative zero isn't an integer in Linux */
+		{"-0",	0},
+		{"-0",	8},
+		{"-0",	10},
+		{"-0",	16},
+		/* sign is first character if any */
+		{"-+1", 0},
+		{"-+1", 8},
+		{"-+1", 10},
+		{"-+1", 16},
+	};
+	TEST_FAIL(kstrtoll, long long, "%lld", test_ll_fail);
+}
+
+static void __init test_kstrtou64_ok(void)
+{
+	DECLARE_TEST_OK(u64, struct test_u64);
+	static DEFINE_TEST_OK(struct test_u64, test_u64_ok) = {
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+		{"128",	10,	128},
+		{"129",	10,	129},
+		{"254",	10,	254},
+		{"255",	10,	255},
+		{"256",	10,	256},
+		{"257",	10,	257},
+		{"32766",	10,	32766},
+		{"32767",	10,	32767},
+		{"32768",	10,	32768},
+		{"32769",	10,	32769},
+		{"65534",	10,	65534},
+		{"65535",	10,	65535},
+		{"65536",	10,	65536},
+		{"65537",	10,	65537},
+		{"2147483646",	10,	2147483646},
+		{"2147483647",	10,	2147483647},
+		{"2147483648",	10,	2147483648ULL},
+		{"2147483649",	10,	2147483649ULL},
+		{"4294967294",	10,	4294967294ULL},
+		{"4294967295",	10,	4294967295ULL},
+		{"4294967296",	10,	4294967296ULL},
+		{"4294967297",	10,	4294967297ULL},
+		{"9223372036854775806",	10,	9223372036854775806ULL},
+		{"9223372036854775807",	10,	9223372036854775807ULL},
+		{"9223372036854775808",	10,	9223372036854775808ULL},
+		{"9223372036854775809",	10,	9223372036854775809ULL},
+		{"18446744073709551614",	10,	18446744073709551614ULL},
+		{"18446744073709551615",	10,	18446744073709551615ULL},
+	};
+	TEST_OK(kstrtou64, u64, "%llu", test_u64_ok);
+}
+
+static void __init test_kstrtou64_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_u64_fail) = {
+		{"-2",	10},
+		{"-1",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtou64, u64, "%llu", test_u64_fail);
+}
+
+static void __init test_kstrtos64_ok(void)
+{
+	DECLARE_TEST_OK(s64, struct test_s64);
+	static DEFINE_TEST_OK(struct test_s64, test_s64_ok) = {
+		{"-128",	10,	-128},
+		{"-127",	10,	-127},
+		{"-1",	10,	-1},
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+		{"128",	10,	128},
+		{"129",	10,	129},
+		{"254",	10,	254},
+		{"255",	10,	255},
+		{"256",	10,	256},
+		{"257",	10,	257},
+		{"32766",	10,	32766},
+		{"32767",	10,	32767},
+		{"32768",	10,	32768},
+		{"32769",	10,	32769},
+		{"65534",	10,	65534},
+		{"65535",	10,	65535},
+		{"65536",	10,	65536},
+		{"65537",	10,	65537},
+		{"2147483646",	10,	2147483646},
+		{"2147483647",	10,	2147483647},
+		{"2147483648",	10,	2147483648LL},
+		{"2147483649",	10,	2147483649LL},
+		{"4294967294",	10,	4294967294LL},
+		{"4294967295",	10,	4294967295LL},
+		{"4294967296",	10,	4294967296LL},
+		{"4294967297",	10,	4294967297LL},
+		{"9223372036854775806",	10,	9223372036854775806LL},
+		{"9223372036854775807",	10,	9223372036854775807LL},
+	};
+	TEST_OK(kstrtos64, s64, "%lld", test_s64_ok);
+}
+
+static void __init test_kstrtos64_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_s64_fail) = {
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtos64, s64, "%lld", test_s64_fail);
+}
+
+static void __init test_kstrtou32_ok(void)
+{
+	DECLARE_TEST_OK(u32, struct test_u32);
+	static DEFINE_TEST_OK(struct test_u32, test_u32_ok) = {
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+		{"128",	10,	128},
+		{"129",	10,	129},
+		{"254",	10,	254},
+		{"255",	10,	255},
+		{"256",	10,	256},
+		{"257",	10,	257},
+		{"32766",	10,	32766},
+		{"32767",	10,	32767},
+		{"32768",	10,	32768},
+		{"32769",	10,	32769},
+		{"65534",	10,	65534},
+		{"65535",	10,	65535},
+		{"65536",	10,	65536},
+		{"65537",	10,	65537},
+		{"2147483646",	10,	2147483646},
+		{"2147483647",	10,	2147483647},
+		{"2147483648",	10,	2147483648U},
+		{"2147483649",	10,	2147483649U},
+		{"4294967294",	10,	4294967294U},
+		{"4294967295",	10,	4294967295U},
+	};
+	TEST_OK(kstrtou32, u32, "%u", test_u32_ok);
+}
+
+static void __init test_kstrtou32_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_u32_fail) = {
+		{"-2",	10},
+		{"-1",	10},
+		{"4294967296",	10},
+		{"4294967297",	10},
+		{"9223372036854775806",	10},
+		{"9223372036854775807",	10},
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtou32, u32, "%u", test_u32_fail);
+}
+
+static void __init test_kstrtos32_ok(void)
+{
+	DECLARE_TEST_OK(s32, struct test_s32);
+	static DEFINE_TEST_OK(struct test_s32, test_s32_ok) = {
+		{"-128",	10,	-128},
+		{"-127",	10,	-127},
+		{"-1",	10,	-1},
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+		{"128",	10,	128},
+		{"129",	10,	129},
+		{"254",	10,	254},
+		{"255",	10,	255},
+		{"256",	10,	256},
+		{"257",	10,	257},
+		{"32766",	10,	32766},
+		{"32767",	10,	32767},
+		{"32768",	10,	32768},
+		{"32769",	10,	32769},
+		{"65534",	10,	65534},
+		{"65535",	10,	65535},
+		{"65536",	10,	65536},
+		{"65537",	10,	65537},
+		{"2147483646",	10,	2147483646},
+		{"2147483647",	10,	2147483647},
+	};
+	TEST_OK(kstrtos32, s32, "%d", test_s32_ok);
+}
+
+static void __init test_kstrtos32_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_s32_fail) = {
+		{"2147483648",	10},
+		{"2147483649",	10},
+		{"4294967294",	10},
+		{"4294967295",	10},
+		{"4294967296",	10},
+		{"4294967297",	10},
+		{"9223372036854775806",	10},
+		{"9223372036854775807",	10},
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtos32, s32, "%d", test_s32_fail);
+}
+
+static void __init test_kstrtou16_ok(void)
+{
+	DECLARE_TEST_OK(u16, struct test_u16);
+	static DEFINE_TEST_OK(struct test_u16, test_u16_ok) = {
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+		{"128",	10,	128},
+		{"129",	10,	129},
+		{"254",	10,	254},
+		{"255",	10,	255},
+		{"256",	10,	256},
+		{"257",	10,	257},
+		{"32766",	10,	32766},
+		{"32767",	10,	32767},
+		{"32768",	10,	32768},
+		{"32769",	10,	32769},
+		{"65534",	10,	65534},
+		{"65535",	10,	65535},
+	};
+	TEST_OK(kstrtou16, u16, "%hu", test_u16_ok);
+}
+
+static void __init test_kstrtou16_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_u16_fail) = {
+		{"-2",	10},
+		{"-1",	10},
+		{"65536",	10},
+		{"65537",	10},
+		{"2147483646",	10},
+		{"2147483647",	10},
+		{"2147483648",	10},
+		{"2147483649",	10},
+		{"4294967294",	10},
+		{"4294967295",	10},
+		{"4294967296",	10},
+		{"4294967297",	10},
+		{"9223372036854775806",	10},
+		{"9223372036854775807",	10},
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtou16, u16, "%hu", test_u16_fail);
+}
+
+static void __init test_kstrtos16_ok(void)
+{
+	DECLARE_TEST_OK(s16, struct test_s16);
+	static DEFINE_TEST_OK(struct test_s16, test_s16_ok) = {
+		{"-130",	10,	-130},
+		{"-129",	10,	-129},
+		{"-128",	10,	-128},
+		{"-127",	10,	-127},
+		{"-1",	10,	-1},
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+		{"128",	10,	128},
+		{"129",	10,	129},
+		{"254",	10,	254},
+		{"255",	10,	255},
+		{"256",	10,	256},
+		{"257",	10,	257},
+		{"32766",	10,	32766},
+		{"32767",	10,	32767},
+	};
+	TEST_OK(kstrtos16, s16, "%hd", test_s16_ok);
+}
+
+static void __init test_kstrtos16_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_s16_fail) = {
+		{"32768",	10},
+		{"32769",	10},
+		{"65534",	10},
+		{"65535",	10},
+		{"65536",	10},
+		{"65537",	10},
+		{"2147483646",	10},
+		{"2147483647",	10},
+		{"2147483648",	10},
+		{"2147483649",	10},
+		{"4294967294",	10},
+		{"4294967295",	10},
+		{"4294967296",	10},
+		{"4294967297",	10},
+		{"9223372036854775806",	10},
+		{"9223372036854775807",	10},
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtos16, s16, "%hd", test_s16_fail);
+}
+
+static void __init test_kstrtou8_ok(void)
+{
+	DECLARE_TEST_OK(u8, struct test_u8);
+	static DEFINE_TEST_OK(struct test_u8, test_u8_ok) = {
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+		{"128",	10,	128},
+		{"129",	10,	129},
+		{"254",	10,	254},
+		{"255",	10,	255},
+	};
+	TEST_OK(kstrtou8, u8, "%hhu", test_u8_ok);
+}
+
+static void __init test_kstrtou8_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_u8_fail) = {
+		{"-2",	10},
+		{"-1",	10},
+		{"256",	10},
+		{"257",	10},
+		{"32766",	10},
+		{"32767",	10},
+		{"32768",	10},
+		{"32769",	10},
+		{"65534",	10},
+		{"65535",	10},
+		{"65536",	10},
+		{"65537",	10},
+		{"2147483646",	10},
+		{"2147483647",	10},
+		{"2147483648",	10},
+		{"2147483649",	10},
+		{"4294967294",	10},
+		{"4294967295",	10},
+		{"4294967296",	10},
+		{"4294967297",	10},
+		{"9223372036854775806",	10},
+		{"9223372036854775807",	10},
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtou8, u8, "%hhu", test_u8_fail);
+}
+
+static void __init test_kstrtos8_ok(void)
+{
+	DECLARE_TEST_OK(s8, struct test_s8);
+	static DEFINE_TEST_OK(struct test_s8, test_s8_ok) = {
+		{"-128",	10,	-128},
+		{"-127",	10,	-127},
+		{"-1",	10,	-1},
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+	};
+	TEST_OK(kstrtos8, s8, "%hhd", test_s8_ok);
+}
+
+static void __init test_kstrtos8_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_s8_fail) = {
+		{"-130",	10},
+		{"-129",	10},
+		{"128",	10},
+		{"129",	10},
+		{"254",	10},
+		{"255",	10},
+		{"256",	10},
+		{"257",	10},
+		{"32766",	10},
+		{"32767",	10},
+		{"32768",	10},
+		{"32769",	10},
+		{"65534",	10},
+		{"65535",	10},
+		{"65536",	10},
+		{"65537",	10},
+		{"2147483646",	10},
+		{"2147483647",	10},
+		{"2147483648",	10},
+		{"2147483649",	10},
+		{"4294967294",	10},
+		{"4294967295",	10},
+		{"4294967296",	10},
+		{"4294967297",	10},
+		{"9223372036854775806",	10},
+		{"9223372036854775807",	10},
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtos8, s8, "%hhd", test_s8_fail);
+}
+
+static int __init test_kstrtox_init(void)
+{
+	test_kstrtoull_ok();
+	test_kstrtoull_fail();
+	test_kstrtoll_ok();
+	test_kstrtoll_fail();
+
+	test_kstrtou64_ok();
+	test_kstrtou64_fail();
+	test_kstrtos64_ok();
+	test_kstrtos64_fail();
+
+	test_kstrtou32_ok();
+	test_kstrtou32_fail();
+	test_kstrtos32_ok();
+	test_kstrtos32_fail();
+
+	test_kstrtou16_ok();
+	test_kstrtou16_fail();
+	test_kstrtos16_ok();
+	test_kstrtos16_fail();
+
+	test_kstrtou8_ok();
+	test_kstrtou8_fail();
+	test_kstrtos8_ok();
+	test_kstrtos8_fail();
+	return -EINVAL;
+}
+module_init(test_kstrtox_init);
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index e3a1050e6820..191176a43e9a 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -5,7 +5,7 @@
  *  Uses rbtrees for quick list adds and expiration.
  *
  *  NOTE: All of the following functions need to be serialized
- *  to avoid races. No locking is done by this libary code.
+ *  to avoid races. No locking is done by this library code.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index d3023df8477f..d7222a9c8267 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -31,13 +31,10 @@
 #include <asm/div64.h>
 #include <asm/sections.h>	/* for dereference_function_descriptor() */
 
-/* Works only for digits and letters, but small and fast */
-#define TOLOWER(x) ((x) | 0x20)
-
 static unsigned int simple_guess_base(const char *cp)
 {
 	if (cp[0] == '0') {
-		if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2]))
+		if (_tolower(cp[1]) == 'x' && isxdigit(cp[2]))
 			return 16;
 		else
 			return 8;
@@ -59,13 +56,13 @@ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int bas
 	if (!base)
 		base = simple_guess_base(cp);
 
-	if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x')
+	if (base == 16 && cp[0] == '0' && _tolower(cp[1]) == 'x')
 		cp += 2;
 
 	while (isxdigit(*cp)) {
 		unsigned int value;
 
-		value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10;
+		value = isdigit(*cp) ? *cp - '0' : _tolower(*cp) - 'a' + 10;
 		if (value >= base)
 			break;
 		result = result * base + value;
@@ -120,147 +117,6 @@ long long simple_strtoll(const char *cp, char **endp, unsigned int base)
 }
 EXPORT_SYMBOL(simple_strtoll);
 
-/**
- * strict_strtoul - convert a string to an unsigned long strictly
- * @cp: The string to be converted
- * @base: The number base to use
- * @res: The converted result value
- *
- * strict_strtoul converts a string to an unsigned long only if the
- * string is really an unsigned long string, any string containing
- * any invalid char at the tail will be rejected and -EINVAL is returned,
- * only a newline char at the tail is acceptible because people generally
- * change a module parameter in the following way:
- *
- * 	echo 1024 > /sys/module/e1000/parameters/copybreak
- *
- * echo will append a newline to the tail.
- *
- * It returns 0 if conversion is successful and *res is set to the converted
- * value, otherwise it returns -EINVAL and *res is set to 0.
- *
- * simple_strtoul just ignores the successive invalid characters and
- * return the converted value of prefix part of the string.
- */
-int strict_strtoul(const char *cp, unsigned int base, unsigned long *res)
-{
-	char *tail;
-	unsigned long val;
-
-	*res = 0;
-	if (!*cp)
-		return -EINVAL;
-
-	val = simple_strtoul(cp, &tail, base);
-	if (tail == cp)
-		return -EINVAL;
-
-	if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
-		*res = val;
-		return 0;
-	}
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL(strict_strtoul);
-
-/**
- * strict_strtol - convert a string to a long strictly
- * @cp: The string to be converted
- * @base: The number base to use
- * @res: The converted result value
- *
- * strict_strtol is similiar to strict_strtoul, but it allows the first
- * character of a string is '-'.
- *
- * It returns 0 if conversion is successful and *res is set to the converted
- * value, otherwise it returns -EINVAL and *res is set to 0.
- */
-int strict_strtol(const char *cp, unsigned int base, long *res)
-{
-	int ret;
-	if (*cp == '-') {
-		ret = strict_strtoul(cp + 1, base, (unsigned long *)res);
-		if (!ret)
-			*res = -(*res);
-	} else {
-		ret = strict_strtoul(cp, base, (unsigned long *)res);
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL(strict_strtol);
-
-/**
- * strict_strtoull - convert a string to an unsigned long long strictly
- * @cp: The string to be converted
- * @base: The number base to use
- * @res: The converted result value
- *
- * strict_strtoull converts a string to an unsigned long long only if the
- * string is really an unsigned long long string, any string containing
- * any invalid char at the tail will be rejected and -EINVAL is returned,
- * only a newline char at the tail is acceptible because people generally
- * change a module parameter in the following way:
- *
- * 	echo 1024 > /sys/module/e1000/parameters/copybreak
- *
- * echo will append a newline to the tail of the string.
- *
- * It returns 0 if conversion is successful and *res is set to the converted
- * value, otherwise it returns -EINVAL and *res is set to 0.
- *
- * simple_strtoull just ignores the successive invalid characters and
- * return the converted value of prefix part of the string.
- */
-int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res)
-{
-	char *tail;
-	unsigned long long val;
-
-	*res = 0;
-	if (!*cp)
-		return -EINVAL;
-
-	val = simple_strtoull(cp, &tail, base);
-	if (tail == cp)
-		return -EINVAL;
-	if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
-		*res = val;
-		return 0;
-	}
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL(strict_strtoull);
-
-/**
- * strict_strtoll - convert a string to a long long strictly
- * @cp: The string to be converted
- * @base: The number base to use
- * @res: The converted result value
- *
- * strict_strtoll is similiar to strict_strtoull, but it allows the first
- * character of a string is '-'.
- *
- * It returns 0 if conversion is successful and *res is set to the converted
- * value, otherwise it returns -EINVAL and *res is set to 0.
- */
-int strict_strtoll(const char *cp, unsigned int base, long long *res)
-{
-	int ret;
-	if (*cp == '-') {
-		ret = strict_strtoull(cp + 1, base, (unsigned long long *)res);
-		if (!ret)
-			*res = -(*res);
-	} else {
-		ret = strict_strtoull(cp, base, (unsigned long long *)res);
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL(strict_strtoll);
-
 static noinline_for_stack
 int skip_atoi(const char **s)
 {
@@ -574,7 +430,9 @@ char *symbol_string(char *buf, char *end, void *ptr,
 	unsigned long value = (unsigned long) ptr;
 #ifdef CONFIG_KALLSYMS
 	char sym[KSYM_SYMBOL_LEN];
-	if (ext != 'f' && ext != 's')
+	if (ext == 'B')
+		sprint_backtrace(sym, value);
+	else if (ext != 'f' && ext != 's')
 		sprint_symbol(sym, value);
 	else
 		kallsyms_lookup(value, NULL, NULL, NULL, sym);
@@ -805,6 +663,8 @@ char *ip6_compressed_string(char *p, const char *addr)
 			colonpos = i;
 		}
 	}
+	if (longest == 1)		/* don't compress a single 0 */
+		colonpos = -1;
 
 	/* emit address */
 	for (i = 0; i < range; i++) {
@@ -936,7 +796,7 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 	return string(buf, end, uuid, spec);
 }
 
-int kptr_restrict = 1;
+int kptr_restrict __read_mostly;
 
 /*
  * Show a '%p' thing.  A kernel extension is that the '%p' is followed
@@ -949,6 +809,7 @@ int kptr_restrict = 1;
  * - 'f' For simple symbolic function names without offset
  * - 'S' For symbolic direct pointers with offset
  * - 's' For symbolic direct pointers without offset
+ * - 'B' For backtraced symbolic direct pointers with offset
  * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref]
  * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201]
  * - 'M' For a 6-byte MAC address, it prints the address in the
@@ -964,7 +825,7 @@ int kptr_restrict = 1;
  *       IPv4 uses dot-separated decimal with leading 0's (010.123.045.006)
  * - '[Ii]4[hnbl]' IPv4 addresses in host, network, big or little endian order
  * - 'I6c' for IPv6 addresses printed as specified by
- *       http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00
+ *       http://tools.ietf.org/html/rfc5952
  * - 'U' For a 16 byte UUID/GUID, it prints the UUID/GUID in the form
  *       "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
  *       Options for %pU are:
@@ -991,7 +852,7 @@ static noinline_for_stack
 char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 	      struct printf_spec spec)
 {
-	if (!ptr) {
+	if (!ptr && *fmt != 'K') {
 		/*
 		 * Print (null) with the same width as a pointer so it makes
 		 * tabular output look nice.
@@ -1008,6 +869,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 		/* Fallthrough */
 	case 'S':
 	case 's':
+	case 'B':
 		return symbol_string(buf, end, ptr, spec, *fmt);
 	case 'R':
 	case 'r':
@@ -1035,7 +897,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 	case 'U':
 		return uuid_string(buf, end, ptr, spec, fmt);
 	case 'V':
-		return buf + vsnprintf(buf, end - buf,
+		return buf + vsnprintf(buf, end > buf ? end - buf : 0,
 				       ((struct va_format *)ptr)->fmt,
 				       *(((struct va_format *)ptr)->va));
 	case 'K':
@@ -1047,16 +909,12 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 			if (spec.field_width == -1)
 				spec.field_width = 2 * sizeof(void *);
 			return string(buf, end, "pK-error", spec);
-		} else if ((kptr_restrict == 0) ||
-			 (kptr_restrict == 1 &&
-			  has_capability_noaudit(current, CAP_SYSLOG)))
-			break;
-
-		if (spec.field_width == -1) {
-			spec.field_width = 2 * sizeof(void *);
-			spec.flags |= ZEROPAD;
 		}
-		return number(buf, end, 0, spec);
+		if (!((kptr_restrict == 0) ||
+		      (kptr_restrict == 1 &&
+		       has_capability_noaudit(current, CAP_SYSLOG))))
+			ptr = NULL;
+		break;
 	}
 	spec.flags |= SMALL;
 	if (spec.field_width == -1) {
@@ -1175,8 +1033,8 @@ precision:
 qualifier:
 	/* get the conversion qualifier */
 	spec->qualifier = -1;
-	if (*fmt == 'h' || TOLOWER(*fmt) == 'l' ||
-	    TOLOWER(*fmt) == 'z' || *fmt == 't') {
+	if (*fmt == 'h' || _tolower(*fmt) == 'l' ||
+	    _tolower(*fmt) == 'z' || *fmt == 't') {
 		spec->qualifier = *fmt++;
 		if (unlikely(spec->qualifier == *fmt)) {
 			if (spec->qualifier == 'l') {
@@ -1243,7 +1101,7 @@ qualifier:
 			spec->type = FORMAT_TYPE_LONG;
 		else
 			spec->type = FORMAT_TYPE_ULONG;
-	} else if (TOLOWER(spec->qualifier) == 'z') {
+	} else if (_tolower(spec->qualifier) == 'z') {
 		spec->type = FORMAT_TYPE_SIZE_T;
 	} else if (spec->qualifier == 't') {
 		spec->type = FORMAT_TYPE_PTRDIFF;
@@ -1279,6 +1137,7 @@ qualifier:
  * %ps output the name of a text symbol without offset
  * %pF output the name of a function pointer with its offset
  * %pf output the name of a function pointer without its offset
+ * %pB output the name of a backtrace symbol with its offset
  * %pR output the address range in a struct resource with decoded flags
  * %pr output the address range in a struct resource with raw flags
  * %pM output a 6-byte MAC address with colons
@@ -1287,8 +1146,7 @@ qualifier:
  * %pi4 print an IPv4 address with leading zeros
  * %pI6 print an IPv6 address with colons
  * %pi6 print an IPv6 address without colons
- * %pI6c print an IPv6 address as specified by
- *   http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00
+ * %pI6c print an IPv6 address as specified by RFC 5952
  * %pU[bBlL] print a UUID/GUID in big or little endian using lower or upper
  *   case.
  * %n is ignored
@@ -1301,8 +1159,7 @@ qualifier:
  * return is greater than or equal to @size, the resulting
  * string is truncated.
  *
- * Call this function if you are already dealing with a va_list.
- * You probably want snprintf() instead.
+ * If you're not already dealing with a va_list consider using snprintf().
  */
 int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 {
@@ -1402,7 +1259,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 			if (qualifier == 'l') {
 				long *ip = va_arg(args, long *);
 				*ip = (str - buf);
-			} else if (TOLOWER(qualifier) == 'z') {
+			} else if (_tolower(qualifier) == 'z') {
 				size_t *ip = va_arg(args, size_t *);
 				*ip = (str - buf);
 			} else {
@@ -1476,8 +1333,7 @@ EXPORT_SYMBOL(vsnprintf);
  * the @buf not including the trailing '\0'. If @size is == 0 the function
  * returns 0.
  *
- * Call this function if you are already dealing with a va_list.
- * You probably want scnprintf() instead.
+ * If you're not already dealing with a va_list consider using scnprintf().
  *
  * See the vsnprintf() documentation for format string extensions over C99.
  */
@@ -1556,8 +1412,7 @@ EXPORT_SYMBOL(scnprintf);
  * into @buf. Use vsnprintf() or vscnprintf() in order to avoid
  * buffer overflows.
  *
- * Call this function if you are already dealing with a va_list.
- * You probably want sprintf() instead.
+ * If you're not already dealing with a va_list consider using sprintf().
  *
  * See the vsnprintf() documentation for format string extensions over C99.
  */
@@ -1691,7 +1546,7 @@ do {									\
 			void *skip_arg;
 			if (qualifier == 'l')
 				skip_arg = va_arg(args, long *);
-			else if (TOLOWER(qualifier) == 'z')
+			else if (_tolower(qualifier) == 'z')
 				skip_arg = va_arg(args, size_t *);
 			else
 				skip_arg = va_arg(args, int *);
@@ -1997,8 +1852,8 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 
 		/* get conversion qualifier */
 		qualifier = -1;
-		if (*fmt == 'h' || TOLOWER(*fmt) == 'l' ||
-		    TOLOWER(*fmt) == 'z') {
+		if (*fmt == 'h' || _tolower(*fmt) == 'l' ||
+		    _tolower(*fmt) == 'z') {
 			qualifier = *fmt++;
 			if (unlikely(qualifier == *fmt)) {
 				if (qualifier == 'h') {
diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c
index ea5fa4fe9d67..a6cdc969ea42 100644
--- a/lib/xz/xz_dec_lzma2.c
+++ b/lib/xz/xz_dec_lzma2.c
@@ -969,6 +969,9 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
 			 */
 			tmp = b->in[b->in_pos++];
 
+			if (tmp == 0x00)
+				return XZ_STREAM_END;
+
 			if (tmp >= 0xE0 || tmp == 0x01) {
 				s->lzma2.need_props = true;
 				s->lzma2.need_dict_reset = false;
@@ -1001,9 +1004,6 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
 						lzma_reset(s);
 				}
 			} else {
-				if (tmp == 0x00)
-					return XZ_STREAM_END;
-
 				if (tmp > 0x02)
 					return XZ_DATA_ERROR;
 
diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h
index a65633e06962..482b90f363fe 100644
--- a/lib/xz/xz_private.h
+++ b/lib/xz/xz_private.h
@@ -12,7 +12,7 @@
 
 #ifdef __KERNEL__
 #	include <linux/xz.h>
-#	include <asm/byteorder.h>
+#	include <linux/kernel.h>
 #	include <asm/unaligned.h>
 	/* XZ_PREBOOT may be defined only via decompress_unxz.c. */
 #	ifndef XZ_PREBOOT
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
index 46a31e5f49c3..d63381e8e333 100644
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -176,6 +176,7 @@ int zlib_deflateInit2(
     deflate_state *s;
     int noheader = 0;
     deflate_workspace *mem;
+    char *next;
 
     ush *overlay;
     /* We overlay pending_buf and d_buf+l_buf. This works since the average
@@ -199,6 +200,21 @@ int zlib_deflateInit2(
 	strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
         return Z_STREAM_ERROR;
     }
+
+    /*
+     * Direct the workspace's pointers to the chunks that were allocated
+     * along with the deflate_workspace struct.
+     */
+    next = (char *) mem;
+    next += sizeof(*mem);
+    mem->window_memory = (Byte *) next;
+    next += zlib_deflate_window_memsize(windowBits);
+    mem->prev_memory = (Pos *) next;
+    next += zlib_deflate_prev_memsize(windowBits);
+    mem->head_memory = (Pos *) next;
+    next += zlib_deflate_head_memsize(memLevel);
+    mem->overlay_memory = next;
+
     s = (deflate_state *) &(mem->deflate_memory);
     strm->state = (struct internal_state *)s;
     s->strm = strm;
@@ -1247,7 +1263,18 @@ static block_state deflate_slow(
     return flush == Z_FINISH ? finish_done : block_done;
 }
 
-int zlib_deflate_workspacesize(void)
+int zlib_deflate_workspacesize(int windowBits, int memLevel)
 {
-    return sizeof(deflate_workspace);
+    if (windowBits < 0) /* undocumented feature: suppress zlib header */
+        windowBits = -windowBits;
+
+    /* Since the return value is typically passed to vmalloc() unchecked... */
+    BUG_ON(memLevel < 1 || memLevel > MAX_MEM_LEVEL || windowBits < 9 ||
+							windowBits > 15);
+
+    return sizeof(deflate_workspace)
+        + zlib_deflate_window_memsize(windowBits)
+        + zlib_deflate_prev_memsize(windowBits)
+        + zlib_deflate_head_memsize(memLevel)
+        + zlib_deflate_overlay_memsize(memLevel);
 }
diff --git a/lib/zlib_deflate/defutil.h b/lib/zlib_deflate/defutil.h
index 6b15a909ca3f..b640b6402e99 100644
--- a/lib/zlib_deflate/defutil.h
+++ b/lib/zlib_deflate/defutil.h
@@ -241,12 +241,21 @@ typedef struct deflate_state {
 typedef struct deflate_workspace {
     /* State memory for the deflator */
     deflate_state deflate_memory;
-    Byte window_memory[2 * (1 << MAX_WBITS)];
-    Pos prev_memory[1 << MAX_WBITS];
-    Pos head_memory[1 << (MAX_MEM_LEVEL + 7)];
-    char overlay_memory[(1 << (MAX_MEM_LEVEL + 6)) * (sizeof(ush)+2)];
+    Byte *window_memory;
+    Pos *prev_memory;
+    Pos *head_memory;
+    char *overlay_memory;
 } deflate_workspace;
 
+#define zlib_deflate_window_memsize(windowBits) \
+	(2 * (1 << (windowBits)) * sizeof(Byte))
+#define zlib_deflate_prev_memsize(windowBits) \
+	((1 << (windowBits)) * sizeof(Pos))
+#define zlib_deflate_head_memsize(memLevel) \
+	((1 << ((memLevel)+7)) * sizeof(Pos))
+#define zlib_deflate_overlay_memsize(memLevel) \
+	((1 << ((memLevel)+6)) * (sizeof(ush)+2))
+
 /* Output a byte on the stream.
  * IN assertion: there is enough room in pending_buf.
  */
author	Florian Tobias Schandinat <FlorianSchandinat@gmx.de>	2011-08-17 16:14:01 +0000
committer	Florian Tobias Schandinat <FlorianSchandinat@gmx.de>	2011-08-17 16:14:01 +0000
commit	94bd217e2d683719ab21a4ac117d8a1b91cbedc9 (patch)
tree	f2d506818cc1643ca816f609518ab5d1cdb28e66 /lib
parent	5dd72f12df00cbabc16de770b79b17ced63138be (diff)
parent	93ee7a9340d64f20295aacc3fb6a22b759323280 (diff)
download	linux-94bd217e2d683719ab21a4ac117d8a1b91cbedc9.tar.bz2