diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig.debug | 67 | ||||
-rw-r--r-- | lib/Kconfig.kasan | 8 | ||||
-rw-r--r-- | lib/cpu_rmap.c | 2 | ||||
-rw-r--r-- | lib/cpumask.c | 83 | ||||
-rw-r--r-- | lib/find_last_bit.c | 41 | ||||
-rw-r--r-- | lib/mpi/longlong.h | 4 | ||||
-rw-r--r-- | lib/percpu_counter.c | 6 | ||||
-rw-r--r-- | lib/radix-tree.c | 2 | ||||
-rw-r--r-- | lib/raid6/x86.h | 2 | ||||
-rw-r--r-- | lib/rhashtable.c | 23 | ||||
-rw-r--r-- | lib/strnlen_user.c | 18 | ||||
-rw-r--r-- | lib/swiotlb.c | 16 | ||||
-rw-r--r-- | lib/timerqueue.c | 10 |
13 files changed, 162 insertions, 120 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 17670573dda8..b908048f8d6a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1233,6 +1233,7 @@ config RCU_TORTURE_TEST depends on DEBUG_KERNEL select TORTURE_TEST select SRCU + select TASKS_RCU default n help This option provides a kernel module that runs torture tests @@ -1261,12 +1262,38 @@ config RCU_TORTURE_TEST_RUNNABLE Say N here if you want the RCU torture tests to start only after being manually enabled via /proc. +config RCU_TORTURE_TEST_SLOW_PREINIT + bool "Slow down RCU grace-period pre-initialization to expose races" + depends on RCU_TORTURE_TEST + help + This option delays grace-period pre-initialization (the + propagation of CPU-hotplug changes up the rcu_node combining + tree) for a few jiffies between initializing each pair of + consecutive rcu_node structures. This helps to expose races + involving grace-period pre-initialization, in other words, it + makes your kernel less stable. It can also greatly increase + grace-period latency, especially on systems with large numbers + of CPUs. This is useful when torture-testing RCU, but in + almost no other circumstance. + + Say Y here if you want your system to crash and hang more often. + Say N if you want a sane system. + +config RCU_TORTURE_TEST_SLOW_PREINIT_DELAY + int "How much to slow down RCU grace-period pre-initialization" + range 0 5 + default 3 + depends on RCU_TORTURE_TEST_SLOW_PREINIT + help + This option specifies the number of jiffies to wait between + each rcu_node structure pre-initialization step. + config RCU_TORTURE_TEST_SLOW_INIT bool "Slow down RCU grace-period initialization to expose races" depends on RCU_TORTURE_TEST help - This option makes grace-period initialization block for a - few jiffies between initializing each pair of consecutive + This option delays grace-period initialization for a few + jiffies between initializing each pair of consecutive rcu_node structures. This helps to expose races involving grace-period initialization, in other words, it makes your kernel less stable. It can also greatly increase grace-period @@ -1281,10 +1308,35 @@ config RCU_TORTURE_TEST_SLOW_INIT_DELAY int "How much to slow down RCU grace-period initialization" range 0 5 default 3 + depends on RCU_TORTURE_TEST_SLOW_INIT help This option specifies the number of jiffies to wait between each rcu_node structure initialization. +config RCU_TORTURE_TEST_SLOW_CLEANUP + bool "Slow down RCU grace-period cleanup to expose races" + depends on RCU_TORTURE_TEST + help + This option delays grace-period cleanup for a few jiffies + between cleaning up each pair of consecutive rcu_node + structures. This helps to expose races involving grace-period + cleanup, in other words, it makes your kernel less stable. + It can also greatly increase grace-period latency, especially + on systems with large numbers of CPUs. This is useful when + torture-testing RCU, but in almost no other circumstance. + + Say Y here if you want your system to crash and hang more often. + Say N if you want a sane system. + +config RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY + int "How much to slow down RCU grace-period cleanup" + range 0 5 + default 3 + depends on RCU_TORTURE_TEST_SLOW_CLEANUP + help + This option specifies the number of jiffies to wait between + each rcu_node structure cleanup operation. + config RCU_CPU_STALL_TIMEOUT int "RCU CPU stall timeout in seconds" depends on RCU_STALL_COMMON @@ -1321,6 +1373,17 @@ config RCU_TRACE Say Y here if you want to enable RCU tracing Say N if you are unsure. +config RCU_EQS_DEBUG + bool "Use this when adding any sort of NO_HZ support to your arch" + depends on DEBUG_KERNEL + help + This option provides consistency checks in RCU's handling of + NO_HZ. These checks have proven quite helpful in detecting + bugs in arch-specific NO_HZ code. + + Say N here if you need ultimate kernel/user switch latencies + Say Y if you are unsure + endmenu # "RCU Debugging" config DEBUG_BLOCK_EXT_DEVT diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 4fecaedc80a2..777eda7d1ab4 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -10,8 +10,11 @@ config KASAN help Enables kernel address sanitizer - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. - This is strictly debugging feature. It consumes about 1/8 - of available memory and brings about ~x3 performance slowdown. + This is strictly a debugging feature and it requires a gcc version + of 4.9.2 or later. Detection of out of bounds accesses to stack or + global variables requires gcc 5.0 or later. + This feature consumes about 1/8 of available memory and brings about + ~x3 performance slowdown. For better error detection enable CONFIG_STACKTRACE, and add slub_debug=U to boot cmdline. @@ -40,6 +43,7 @@ config KASAN_INLINE memory accesses. This is faster than outline (in some workloads it gives about x2 boost over outline instrumentation), but make kernel's .text size much bigger. + This requires a gcc version of 5.0 or later. endchoice diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c index 4f134d8907a7..f610b2a10b3e 100644 --- a/lib/cpu_rmap.c +++ b/lib/cpu_rmap.c @@ -191,7 +191,7 @@ int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, /* Update distances based on topology */ for_each_cpu(cpu, update_mask) { if (cpu_rmap_copy_neigh(rmap, cpu, - topology_thread_cpumask(cpu), 1)) + topology_sibling_cpumask(cpu), 1)) continue; if (cpu_rmap_copy_neigh(rmap, cpu, topology_core_cpumask(cpu), 2)) diff --git a/lib/cpumask.c b/lib/cpumask.c index 830dd5dec40f..5a70f6196f57 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -16,11 +16,10 @@ int cpumask_next_and(int n, const struct cpumask *src1p, const struct cpumask *src2p) { - struct cpumask tmp; - - if (cpumask_and(&tmp, src1p, src2p)) - return cpumask_next(n, &tmp); - return nr_cpu_ids; + while ((n = cpumask_next(n, src1p)) < nr_cpu_ids) + if (cpumask_test_cpu(n, src2p)) + break; + return n; } EXPORT_SYMBOL(cpumask_next_and); @@ -139,64 +138,42 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) #endif /** - * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first - * + * cpumask_local_spread - select the i'th cpu with local numa cpu's first * @i: index number - * @numa_node: local numa_node - * @dstp: cpumask with the relevant cpu bit set according to the policy + * @node: local numa_node * - * This function sets the cpumask according to a numa aware policy. - * cpumask could be used as an affinity hint for the IRQ related to a - * queue. When the policy is to spread queues across cores - local cores - * first. + * This function selects an online CPU according to a numa aware policy; + * local cpus are returned first, followed by non-local ones, then it + * wraps around. * - * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set - * the cpu bit and need to re-call the function. + * It's not very efficient, but useful for setup. */ -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +unsigned int cpumask_local_spread(unsigned int i, int node) { - cpumask_var_t mask; int cpu; - int ret = 0; - - if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; + /* Wrap: we always want a cpu. */ i %= num_online_cpus(); - if (numa_node == -1 || !cpumask_of_node(numa_node)) { - /* Use all online cpu's for non numa aware system */ - cpumask_copy(mask, cpu_online_mask); + if (node == -1) { + for_each_cpu(cpu, cpu_online_mask) + if (i-- == 0) + return cpu; } else { - int n; - - cpumask_and(mask, - cpumask_of_node(numa_node), cpu_online_mask); - - n = cpumask_weight(mask); - if (i >= n) { - i -= n; - - /* If index > number of local cpu's, mask out local - * cpu's - */ - cpumask_andnot(mask, cpu_online_mask, mask); + /* NUMA first. */ + for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask) + if (i-- == 0) + return cpu; + + for_each_cpu(cpu, cpu_online_mask) { + /* Skip NUMA nodes, done above. */ + if (cpumask_test_cpu(cpu, cpumask_of_node(node))) + continue; + + if (i-- == 0) + return cpu; } } - - for_each_cpu(cpu, mask) { - if (--i < 0) - goto out; - } - - ret = -EAGAIN; - -out: - free_cpumask_var(mask); - - if (!ret) - cpumask_set_cpu(cpu, dstp); - - return ret; + BUG(); } -EXPORT_SYMBOL(cpumask_set_cpu_local_first); +EXPORT_SYMBOL(cpumask_local_spread); diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c deleted file mode 100644 index 3e3be40c6a6e..000000000000 --- a/lib/find_last_bit.c +++ /dev/null @@ -1,41 +0,0 @@ -/* find_last_bit.c: fallback find next bit implementation - * - * Copyright (C) 2008 IBM Corporation - * Written by Rusty Russell <rusty@rustcorp.com.au> - * (Inspired by David Howell's find_next_bit implementation) - * - * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease - * size and improve performance, 2015. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/bitops.h> -#include <linux/bitmap.h> -#include <linux/export.h> -#include <linux/kernel.h> - -#ifndef find_last_bit - -unsigned long find_last_bit(const unsigned long *addr, unsigned long size) -{ - if (size) { - unsigned long val = BITMAP_LAST_WORD_MASK(size); - unsigned long idx = (size-1) / BITS_PER_LONG; - - do { - val &= addr[idx]; - if (val) - return idx * BITS_PER_LONG + __fls(val); - - val = ~0ul; - } while (idx--); - } - return size; -} -EXPORT_SYMBOL(find_last_bit); - -#endif diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h index aac511417ad1..a89d041592c8 100644 --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -639,7 +639,7 @@ do { \ ************** MIPS ***************** ***************************************/ #if defined(__mips__) && W_TYPE_SIZE == 32 -#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4 +#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) #define umul_ppmm(w1, w0, u, v) \ do { \ UDItype __ll = (UDItype)(u) * (v); \ @@ -671,7 +671,7 @@ do { \ ************** MIPS/64 ************** ***************************************/ #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 -#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4 +#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) #define umul_ppmm(w1, w0, u, v) \ do { \ typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 48144cdae819..f051d69f0910 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -197,13 +197,13 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb, * Compare counter against given value. * Return 1 if greater, 0 if equal and -1 if less */ -int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) { s64 count; count = percpu_counter_read(fbc); /* Check to see if rough count will be sufficient for comparison */ - if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) { + if (abs(count - rhs) > (batch * num_online_cpus())) { if (count > rhs) return 1; else @@ -218,7 +218,7 @@ int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) else return 0; } -EXPORT_SYMBOL(percpu_counter_compare); +EXPORT_SYMBOL(__percpu_counter_compare); static int __init percpu_counter_startup(void) { diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 3d2aa27b845b..061550de77bc 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -33,7 +33,7 @@ #include <linux/string.h> #include <linux/bitops.h> #include <linux/rcupdate.h> -#include <linux/preempt_mask.h> /* in_interrupt() */ +#include <linux/preempt.h> /* in_interrupt() */ /* diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index b7595484a815..8fe9d9662abb 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h @@ -23,7 +23,7 @@ #ifdef __KERNEL__ /* Real code */ -#include <asm/i387.h> +#include <asm/fpu/api.h> #else /* Dummy code for user space testing */ diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 4898442b837f..8609378e6505 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -14,6 +14,7 @@ * published by the Free Software Foundation. */ +#include <linux/atomic.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/log2.h> @@ -25,6 +26,7 @@ #include <linux/random.h> #include <linux/rhashtable.h> #include <linux/err.h> +#include <linux/export.h> #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4U @@ -405,13 +407,18 @@ int rhashtable_insert_rehash(struct rhashtable *ht) if (rht_grow_above_75(ht, tbl)) size *= 2; - /* More than two rehashes (not resizes) detected. */ - else if (WARN_ON(old_tbl != tbl && old_tbl->size == size)) + /* Do not schedule more than one rehash */ + else if (old_tbl != tbl) return -EBUSY; new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC); - if (new_tbl == NULL) + if (new_tbl == NULL) { + /* Schedule async resize/rehash to try allocation + * non-atomic context. + */ + schedule_work(&ht->run_work); return -ENOMEM; + } err = rhashtable_rehash_attach(ht, tbl, new_tbl); if (err) { @@ -441,6 +448,10 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key, if (key && rhashtable_lookup_fast(ht, key, ht->p)) goto exit; + err = -E2BIG; + if (unlikely(rht_grow_above_max(ht, tbl))) + goto exit; + err = -EAGAIN; if (rhashtable_check_elasticity(ht, tbl, hash) || rht_grow_above_100(ht, tbl)) @@ -733,6 +744,12 @@ int rhashtable_init(struct rhashtable *ht, if (params->max_size) ht->p.max_size = rounddown_pow_of_two(params->max_size); + if (params->insecure_max_entries) + ht->p.insecure_max_entries = + rounddown_pow_of_two(params->insecure_max_entries); + else + ht->p.insecure_max_entries = ht->p.max_size * 2; + ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); /* The maximum (not average) chain length grows with the diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index a28df5206d95..3a5f2b366d84 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -57,7 +57,8 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, return res + find_zero(data) + 1 - align; } res += sizeof(unsigned long); - if (unlikely(max < sizeof(unsigned long))) + /* We already handled 'unsigned long' bytes. Did we do it all ? */ + if (unlikely(max <= sizeof(unsigned long))) break; max -= sizeof(unsigned long); if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) @@ -84,13 +85,21 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, * @str: The string to measure. * @count: Maximum count (including NUL character) * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Get the size of a NUL-terminated string in user space. * * Returns the size of the string INCLUDING the terminating NUL. - * If the string is too long, returns 'count+1'. + * If the string is too long, returns a number larger than @count. User + * has to check the return value against "> count". * On exception (or invalid count), returns 0. + * + * NOTE! You should basically never use this function. There is + * almost never any valid case for using the length of a user space + * string, since the string can be changed at any time by other + * threads. Use "strncpy_from_user()" instead to get a stable copy + * of the string. */ long strnlen_user(const char __user *str, long count) { @@ -113,7 +122,8 @@ EXPORT_SYMBOL(strnlen_user); * strlen_user: - Get the size of a user string INCLUDING final NUL. * @str: The string to measure. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Get the size of a NUL-terminated string in user space. * diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 4abda074ea45..42e192decbfd 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -537,8 +537,9 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); * Allocates bounce buffer and returns its kernel virtual address. */ -phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, - enum dma_data_direction dir) +static phys_addr_t +map_single(struct device *hwdev, phys_addr_t phys, size_t size, + enum dma_data_direction dir) { dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); @@ -655,7 +656,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, */ phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE); if (paddr == SWIOTLB_MAP_ERROR) - return NULL; + goto err_warn; ret = phys_to_virt(paddr); dev_addr = phys_to_dma(hwdev, paddr); @@ -669,7 +670,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE); - return NULL; + goto err_warn; } } @@ -677,6 +678,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, memset(ret, 0, size); return ret; + +err_warn: + pr_warn("swiotlb: coherent allocation failed for device %s size=%zu\n", + dev_name(hwdev), size); + dump_stack(); + + return NULL; } EXPORT_SYMBOL(swiotlb_alloc_coherent); diff --git a/lib/timerqueue.c b/lib/timerqueue.c index a382e4a32609..782ae8ca2c06 100644 --- a/lib/timerqueue.c +++ b/lib/timerqueue.c @@ -36,7 +36,7 @@ * Adds the timer node to the timerqueue, sorted by the * node's expires value. */ -void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) +bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) { struct rb_node **p = &head->head.rb_node; struct rb_node *parent = NULL; @@ -56,8 +56,11 @@ void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) rb_link_node(&node->node, parent, p); rb_insert_color(&node->node, &head->head); - if (!head->next || node->expires.tv64 < head->next->expires.tv64) + if (!head->next || node->expires.tv64 < head->next->expires.tv64) { head->next = node; + return true; + } + return false; } EXPORT_SYMBOL_GPL(timerqueue_add); @@ -69,7 +72,7 @@ EXPORT_SYMBOL_GPL(timerqueue_add); * * Removes the timer node from the timerqueue. */ -void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) +bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) { WARN_ON_ONCE(RB_EMPTY_NODE(&node->node)); @@ -82,6 +85,7 @@ void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) } rb_erase(&node->node, &head->head); RB_CLEAR_NODE(&node->node); + return head->next != NULL; } EXPORT_SYMBOL_GPL(timerqueue_del); |