13 files changed, 162 insertions, 120 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 17670573dda8..b908048f8d6a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1233,6 +1233,7 @@ config RCU_TORTURE_TEST
 	depends on DEBUG_KERNEL
 	select TORTURE_TEST
 	select SRCU
+	select TASKS_RCU
 	default n
 	help
 	  This option provides a kernel module that runs torture tests
@@ -1261,12 +1262,38 @@ config RCU_TORTURE_TEST_RUNNABLE
 	  Say N here if you want the RCU torture tests to start only
 	  after being manually enabled via /proc.
 
+config RCU_TORTURE_TEST_SLOW_PREINIT
+	bool "Slow down RCU grace-period pre-initialization to expose races"
+	depends on RCU_TORTURE_TEST
+	help
+	  This option delays grace-period pre-initialization (the
+	  propagation of CPU-hotplug changes up the rcu_node combining
+	  tree) for a few jiffies between initializing each pair of
+	  consecutive rcu_node structures.  This helps to expose races
+	  involving grace-period pre-initialization, in other words, it
+	  makes your kernel less stable.  It can also greatly increase
+	  grace-period latency, especially on systems with large numbers
+	  of CPUs.  This is useful when torture-testing RCU, but in
+	  almost no other circumstance.
+
+	  Say Y here if you want your system to crash and hang more often.
+	  Say N if you want a sane system.
+
+config RCU_TORTURE_TEST_SLOW_PREINIT_DELAY
+	int "How much to slow down RCU grace-period pre-initialization"
+	range 0 5
+	default 3
+	depends on RCU_TORTURE_TEST_SLOW_PREINIT
+	help
+	  This option specifies the number of jiffies to wait between
+	  each rcu_node structure pre-initialization step.
+
 config RCU_TORTURE_TEST_SLOW_INIT
 	bool "Slow down RCU grace-period initialization to expose races"
 	depends on RCU_TORTURE_TEST
 	help
-	  This option makes grace-period initialization block for a
-	  few jiffies between initializing each pair of consecutive
+	  This option delays grace-period initialization for a few
+	  jiffies between initializing each pair of consecutive
 	  rcu_node structures.	This helps to expose races involving
 	  grace-period initialization, in other words, it makes your
 	  kernel less stable.  It can also greatly increase grace-period
@@ -1281,10 +1308,35 @@ config RCU_TORTURE_TEST_SLOW_INIT_DELAY
 	int "How much to slow down RCU grace-period initialization"
 	range 0 5
 	default 3
+	depends on RCU_TORTURE_TEST_SLOW_INIT
 	help
 	  This option specifies the number of jiffies to wait between
 	  each rcu_node structure initialization.
 
+config RCU_TORTURE_TEST_SLOW_CLEANUP
+	bool "Slow down RCU grace-period cleanup to expose races"
+	depends on RCU_TORTURE_TEST
+	help
+	  This option delays grace-period cleanup for a few jiffies
+	  between cleaning up each pair of consecutive rcu_node
+	  structures.  This helps to expose races involving grace-period
+	  cleanup, in other words, it makes your kernel less stable.
+	  It can also greatly increase grace-period latency, especially
+	  on systems with large numbers of CPUs.  This is useful when
+	  torture-testing RCU, but in almost no other circumstance.
+
+	  Say Y here if you want your system to crash and hang more often.
+	  Say N if you want a sane system.
+
+config RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY
+	int "How much to slow down RCU grace-period cleanup"
+	range 0 5
+	default 3
+	depends on RCU_TORTURE_TEST_SLOW_CLEANUP
+	help
+	  This option specifies the number of jiffies to wait between
+	  each rcu_node structure cleanup operation.
+
 config RCU_CPU_STALL_TIMEOUT
 	int "RCU CPU stall timeout in seconds"
 	depends on RCU_STALL_COMMON
@@ -1321,6 +1373,17 @@ config RCU_TRACE
 	  Say Y here if you want to enable RCU tracing
 	  Say N if you are unsure.
 
+config RCU_EQS_DEBUG
+	bool "Use this when adding any sort of NO_HZ support to your arch"
+	depends on DEBUG_KERNEL
+	help
+	  This option provides consistency checks in RCU's handling of
+	  NO_HZ.  These checks have proven quite helpful in detecting
+	  bugs in arch-specific NO_HZ code.
+
+	  Say N here if you need ultimate kernel/user switch latencies
+	  Say Y if you are unsure
+
 endmenu # "RCU Debugging"
 
 config DEBUG_BLOCK_EXT_DEVT
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 4fecaedc80a2..777eda7d1ab4 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -10,8 +10,11 @@ config KASAN
 	help
 	  Enables kernel address sanitizer - runtime memory debugger,
 	  designed to find out-of-bounds accesses and use-after-free bugs.
-	  This is strictly debugging feature. It consumes about 1/8
-	  of available memory and brings about ~x3 performance slowdown.
+	  This is strictly a debugging feature and it requires a gcc version
+	  of 4.9.2 or later. Detection of out of bounds accesses to stack or
+	  global variables requires gcc 5.0 or later.
+	  This feature consumes about 1/8 of available memory and brings about
+	  ~x3 performance slowdown.
 	  For better error detection enable CONFIG_STACKTRACE,
 	  and add slub_debug=U to boot cmdline.
 
@@ -40,6 +43,7 @@ config KASAN_INLINE
 	  memory accesses. This is faster than outline (in some workloads
 	  it gives about x2 boost over outline instrumentation), but
 	  make kernel's .text size much bigger.
+	  This requires a gcc version of 5.0 or later.
 
 endchoice
 
diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c
index 4f134d8907a7..f610b2a10b3e 100644
--- a/lib/cpu_rmap.c
+++ b/lib/cpu_rmap.c
@@ -191,7 +191,7 @@ int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
 	/* Update distances based on topology */
 	for_each_cpu(cpu, update_mask) {
 		if (cpu_rmap_copy_neigh(rmap, cpu,
-					topology_thread_cpumask(cpu), 1))
+					topology_sibling_cpumask(cpu), 1))
 			continue;
 		if (cpu_rmap_copy_neigh(rmap, cpu,
 					topology_core_cpumask(cpu), 2))
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 830dd5dec40f..5a70f6196f57 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -16,11 +16,10 @@
 int cpumask_next_and(int n, const struct cpumask *src1p,
 		     const struct cpumask *src2p)
 {
-	struct cpumask tmp;
-
-	if (cpumask_and(&tmp, src1p, src2p))
-		return cpumask_next(n, &tmp);
-	return nr_cpu_ids;
+	while ((n = cpumask_next(n, src1p)) < nr_cpu_ids)
+		if (cpumask_test_cpu(n, src2p))
+			break;
+	return n;
 }
 EXPORT_SYMBOL(cpumask_next_and);
 
@@ -139,64 +138,42 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 #endif
 
 /**
- * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first
- *
+ * cpumask_local_spread - select the i'th cpu with local numa cpu's first
  * @i: index number
- * @numa_node: local numa_node
- * @dstp: cpumask with the relevant cpu bit set according to the policy
+ * @node: local numa_node
  *
- * This function sets the cpumask according to a numa aware policy.
- * cpumask could be used as an affinity hint for the IRQ related to a
- * queue. When the policy is to spread queues across cores - local cores
- * first.
+ * This function selects an online CPU according to a numa aware policy;
+ * local cpus are returned first, followed by non-local ones, then it
+ * wraps around.
  *
- * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set
- * the cpu bit and need to re-call the function.
+ * It's not very efficient, but useful for setup.
  */
-int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+unsigned int cpumask_local_spread(unsigned int i, int node)
 {
-	cpumask_var_t mask;
 	int cpu;
-	int ret = 0;
-
-	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
-		return -ENOMEM;
 
+	/* Wrap: we always want a cpu. */
 	i %= num_online_cpus();
 
-	if (numa_node == -1 || !cpumask_of_node(numa_node)) {
-		/* Use all online cpu's for non numa aware system */
-		cpumask_copy(mask, cpu_online_mask);
+	if (node == -1) {
+		for_each_cpu(cpu, cpu_online_mask)
+			if (i-- == 0)
+				return cpu;
 	} else {
-		int n;
-
-		cpumask_and(mask,
-			    cpumask_of_node(numa_node), cpu_online_mask);
-
-		n = cpumask_weight(mask);
-		if (i >= n) {
-			i -= n;
-
-			/* If index > number of local cpu's, mask out local
-			 * cpu's
-			 */
-			cpumask_andnot(mask, cpu_online_mask, mask);
+		/* NUMA first. */
+		for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask)
+			if (i-- == 0)
+				return cpu;
+
+		for_each_cpu(cpu, cpu_online_mask) {
+			/* Skip NUMA nodes, done above. */
+			if (cpumask_test_cpu(cpu, cpumask_of_node(node)))
+				continue;
+
+			if (i-- == 0)
+				return cpu;
 		}
 	}
-
-	for_each_cpu(cpu, mask) {
-		if (--i < 0)
-			goto out;
-	}
-
-	ret = -EAGAIN;
-
-out:
-	free_cpumask_var(mask);
-
-	if (!ret)
-		cpumask_set_cpu(cpu, dstp);
-
-	return ret;
+	BUG();
 }
-EXPORT_SYMBOL(cpumask_set_cpu_local_first);
+EXPORT_SYMBOL(cpumask_local_spread);
diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
deleted file mode 100644
index 3e3be40c6a6e..000000000000
--- a/lib/find_last_bit.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* find_last_bit.c: fallback find next bit implementation
- *
- * Copyright (C) 2008 IBM Corporation
- * Written by Rusty Russell <rusty@rustcorp.com.au>
- * (Inspired by David Howell's find_next_bit implementation)
- *
- * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
- * size and improve performance, 2015.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/bitops.h>
-#include <linux/bitmap.h>
-#include <linux/export.h>
-#include <linux/kernel.h>
-
-#ifndef find_last_bit
-
-unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
-{
-	if (size) {
-		unsigned long val = BITMAP_LAST_WORD_MASK(size);
-		unsigned long idx = (size-1) / BITS_PER_LONG;
-
-		do {
-			val &= addr[idx];
-			if (val)
-				return idx * BITS_PER_LONG + __fls(val);
-
-			val = ~0ul;
-		} while (idx--);
-	}
-	return size;
-}
-EXPORT_SYMBOL(find_last_bit);
-
-#endif
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
index aac511417ad1..a89d041592c8 100644
--- a/lib/mpi/longlong.h
+++ b/lib/mpi/longlong.h
@@ -639,7 +639,7 @@ do { \
 	**************  MIPS  *****************
 	***************************************/
 #if defined(__mips__) && W_TYPE_SIZE == 32
-#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4
+#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
 #define umul_ppmm(w1, w0, u, v)			\
 do {						\
 	UDItype __ll = (UDItype)(u) * (v);	\
@@ -671,7 +671,7 @@ do {						\
 	**************  MIPS/64  **************
 	***************************************/
 #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
-#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4
+#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
 #define umul_ppmm(w1, w0, u, v) \
 do {									\
 	typedef unsigned int __ll_UTItype __attribute__((mode(TI)));	\
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 48144cdae819..f051d69f0910 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -197,13 +197,13 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
  * Compare counter against given value.
  * Return 1 if greater, 0 if equal and -1 if less
  */
-int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
+int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
 {
 	s64	count;
 
 	count = percpu_counter_read(fbc);
 	/* Check to see if rough count will be sufficient for comparison */
-	if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) {
+	if (abs(count - rhs) > (batch * num_online_cpus())) {
 		if (count > rhs)
 			return 1;
 		else
@@ -218,7 +218,7 @@ int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
 	else
 		return 0;
 }
-EXPORT_SYMBOL(percpu_counter_compare);
+EXPORT_SYMBOL(__percpu_counter_compare);
 
 static int __init percpu_counter_startup(void)
 {
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 3d2aa27b845b..061550de77bc 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -33,7 +33,7 @@
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <linux/rcupdate.h>
-#include <linux/preempt_mask.h>		/* in_interrupt() */
+#include <linux/preempt.h>		/* in_interrupt() */
 
 
 /*
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index b7595484a815..8fe9d9662abb 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -23,7 +23,7 @@
 
 #ifdef __KERNEL__ /* Real code */
 
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
 
 #else /* Dummy code for user space testing */
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 4898442b837f..8609378e6505 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -14,6 +14,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/atomic.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/log2.h>
@@ -25,6 +26,7 @@
 #include <linux/random.h>
 #include <linux/rhashtable.h>
 #include <linux/err.h>
+#include <linux/export.h>
 
 #define HASH_DEFAULT_SIZE	64UL
 #define HASH_MIN_SIZE		4U
@@ -405,13 +407,18 @@ int rhashtable_insert_rehash(struct rhashtable *ht)
 
 	if (rht_grow_above_75(ht, tbl))
 		size *= 2;
-	/* More than two rehashes (not resizes) detected. */
-	else if (WARN_ON(old_tbl != tbl && old_tbl->size == size))
+	/* Do not schedule more than one rehash */
+	else if (old_tbl != tbl)
 		return -EBUSY;
 
 	new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
-	if (new_tbl == NULL)
+	if (new_tbl == NULL) {
+		/* Schedule async resize/rehash to try allocation
+		 * non-atomic context.
+		 */
+		schedule_work(&ht->run_work);
 		return -ENOMEM;
+	}
 
 	err = rhashtable_rehash_attach(ht, tbl, new_tbl);
 	if (err) {
@@ -441,6 +448,10 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
 	if (key && rhashtable_lookup_fast(ht, key, ht->p))
 		goto exit;
 
+	err = -E2BIG;
+	if (unlikely(rht_grow_above_max(ht, tbl)))
+		goto exit;
+
 	err = -EAGAIN;
 	if (rhashtable_check_elasticity(ht, tbl, hash) ||
 	    rht_grow_above_100(ht, tbl))
@@ -733,6 +744,12 @@ int rhashtable_init(struct rhashtable *ht,
 	if (params->max_size)
 		ht->p.max_size = rounddown_pow_of_two(params->max_size);
 
+	if (params->insecure_max_entries)
+		ht->p.insecure_max_entries =
+			rounddown_pow_of_two(params->insecure_max_entries);
+	else
+		ht->p.insecure_max_entries = ht->p.max_size * 2;
+
 	ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
 
 	/* The maximum (not average) chain length grows with the
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index a28df5206d95..3a5f2b366d84 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -57,7 +57,8 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
 			return res + find_zero(data) + 1 - align;
 		}
 		res += sizeof(unsigned long);
-		if (unlikely(max < sizeof(unsigned long)))
+		/* We already handled 'unsigned long' bytes. Did we do it all ? */
+		if (unlikely(max <= sizeof(unsigned long)))
 			break;
 		max -= sizeof(unsigned long);
 		if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
@@ -84,13 +85,21 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
  * @str: The string to measure.
  * @count: Maximum count (including NUL character)
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Get the size of a NUL-terminated string in user space.
  *
  * Returns the size of the string INCLUDING the terminating NUL.
- * If the string is too long, returns 'count+1'.
+ * If the string is too long, returns a number larger than @count. User
+ * has to check the return value against "> count".
  * On exception (or invalid count), returns 0.
+ *
+ * NOTE! You should basically never use this function. There is
+ * almost never any valid case for using the length of a user space
+ * string, since the string can be changed at any time by other
+ * threads. Use "strncpy_from_user()" instead to get a stable copy
+ * of the string.
  */
 long strnlen_user(const char __user *str, long count)
 {
@@ -113,7 +122,8 @@ EXPORT_SYMBOL(strnlen_user);
  * strlen_user: - Get the size of a user string INCLUDING final NUL.
  * @str: The string to measure.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Get the size of a NUL-terminated string in user space.
  *
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 4abda074ea45..42e192decbfd 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -537,8 +537,9 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
  * Allocates bounce buffer and returns its kernel virtual address.
  */
 
-phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size,
-		       enum dma_data_direction dir)
+static phys_addr_t
+map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+	   enum dma_data_direction dir)
 {
 	dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
 
@@ -655,7 +656,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		 */
 		phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
 		if (paddr == SWIOTLB_MAP_ERROR)
-			return NULL;
+			goto err_warn;
 
 		ret = phys_to_virt(paddr);
 		dev_addr = phys_to_dma(hwdev, paddr);
@@ -669,7 +670,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
 			swiotlb_tbl_unmap_single(hwdev, paddr,
 						 size, DMA_TO_DEVICE);
-			return NULL;
+			goto err_warn;
 		}
 	}
 
@@ -677,6 +678,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	memset(ret, 0, size);
 
 	return ret;
+
+err_warn:
+	pr_warn("swiotlb: coherent allocation failed for device %s size=%zu\n",
+		dev_name(hwdev), size);
+	dump_stack();
+
+	return NULL;
 }
 EXPORT_SYMBOL(swiotlb_alloc_coherent);
 
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index a382e4a32609..782ae8ca2c06 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -36,7 +36,7 @@
  * Adds the timer node to the timerqueue, sorted by the
  * node's expires value.
  */
-void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
+bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
 {
 	struct rb_node **p = &head->head.rb_node;
 	struct rb_node *parent = NULL;
@@ -56,8 +56,11 @@ void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
 	rb_link_node(&node->node, parent, p);
 	rb_insert_color(&node->node, &head->head);
 
-	if (!head->next || node->expires.tv64 < head->next->expires.tv64)
+	if (!head->next || node->expires.tv64 < head->next->expires.tv64) {
 		head->next = node;
+		return true;
+	}
+	return false;
 }
 EXPORT_SYMBOL_GPL(timerqueue_add);
 
@@ -69,7 +72,7 @@ EXPORT_SYMBOL_GPL(timerqueue_add);
  *
  * Removes the timer node from the timerqueue.
  */
-void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
+bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
 {
 	WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
 
@@ -82,6 +85,7 @@ void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
 	}
 	rb_erase(&node->node, &head->head);
 	RB_CLEAR_NODE(&node->node);
+	return head->next != NULL;
 }
 EXPORT_SYMBOL_GPL(timerqueue_del);