diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig.debug | 102 | ||||
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/crc64.c | 1 | ||||
-rw-r--r-- | lib/debugobjects.c | 46 | ||||
-rw-r--r-- | lib/devres.c | 20 | ||||
-rw-r--r-- | lib/fdt_addresses.c | 2 | ||||
-rw-r--r-- | lib/iov_iter.c | 3 | ||||
-rw-r--r-- | lib/livepatch/test_klp_shadow_vars.c | 119 | ||||
-rw-r--r-- | lib/raid6/algos.c | 63 | ||||
-rw-r--r-- | lib/raid6/mktables.c | 2 | ||||
-rw-r--r-- | lib/raid6/unroll.awk | 2 | ||||
-rw-r--r-- | lib/sbitmap.c | 2 | ||||
-rw-r--r-- | lib/strncpy_from_user.c | 14 | ||||
-rw-r--r-- | lib/strnlen_user.c | 14 | ||||
-rw-r--r-- | lib/test_xarray.c | 78 | ||||
-rw-r--r-- | lib/vdso/Kconfig | 6 | ||||
-rw-r--r-- | lib/vdso/gettimeofday.c | 205 | ||||
-rw-r--r-- | lib/xarray.c | 41 |
18 files changed, 473 insertions, 249 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d1842fe756d5..6859f523517b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1025,7 +1025,7 @@ config DEBUG_TIMEKEEPING config DEBUG_PREEMPT bool "Debug preemptible kernel" - depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT + depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT default y help If you say Y here then the kernel will use a debug variant of the @@ -1483,6 +1483,55 @@ config PROVIDE_OHCI1394_DMA_INIT See Documentation/debugging-via-ohci1394.txt for more information. +source "samples/Kconfig" + +config ARCH_HAS_DEVMEM_IS_ALLOWED + bool + +config STRICT_DEVMEM + bool "Filter access to /dev/mem" + depends on MMU && DEVMEM + depends on ARCH_HAS_DEVMEM_IS_ALLOWED + default y if PPC || X86 || ARM64 + help + If this option is disabled, you allow userspace (root) access to all + of memory, including kernel and userspace memory. Accidental + access to this is obviously disastrous, but specific access can + be used by people debugging the kernel. Note that with PAT support + enabled, even in this case there are restrictions on /dev/mem + use due to the cache aliasing requirements. + + If this option is switched on, and IO_STRICT_DEVMEM=n, the /dev/mem + file only allows userspace access to PCI space and the BIOS code and + data regions. This is sufficient for dosemu and X and all common + users of /dev/mem. + + If in doubt, say Y. + +config IO_STRICT_DEVMEM + bool "Filter I/O access to /dev/mem" + depends on STRICT_DEVMEM + help + If this option is disabled, you allow userspace (root) access to all + io-memory regardless of whether a driver is actively using that + range. Accidental access to this is obviously disastrous, but + specific access can be used by people debugging kernel drivers. + + If this option is switched on, the /dev/mem file only allows + userspace access to *idle* io-memory ranges (see /proc/iomem) This + may break traditional users of /dev/mem (dosemu, legacy X, etc...) + if the driver using a given range cannot be disabled. + + If in doubt, say Y. + +menu "$(SRCARCH) Debugging" + +source "arch/$(SRCARCH)/Kconfig.debug" + +endmenu + +menu "Kernel Testing and Coverage" + source "lib/kunit/Kconfig" config NOTIFIER_ERROR_INJECTION @@ -1643,10 +1692,6 @@ config FAULT_INJECTION_STACKTRACE_FILTER help Provide stacktrace filter for fault-injection capabilities -endmenu # "Kernel Testing and Coverage" - -menu "Kernel Testing and Coverage" - config ARCH_HAS_KCOV bool help @@ -2130,52 +2175,7 @@ config MEMTEST memtest=17, mean do 17 test patterns. If you are unsure how to answer this question, answer N. -source "samples/Kconfig" - -config ARCH_HAS_DEVMEM_IS_ALLOWED - bool - -config STRICT_DEVMEM - bool "Filter access to /dev/mem" - depends on MMU && DEVMEM - depends on ARCH_HAS_DEVMEM_IS_ALLOWED - default y if PPC || X86 || ARM64 - ---help--- - If this option is disabled, you allow userspace (root) access to all - of memory, including kernel and userspace memory. Accidental - access to this is obviously disastrous, but specific access can - be used by people debugging the kernel. Note that with PAT support - enabled, even in this case there are restrictions on /dev/mem - use due to the cache aliasing requirements. - - If this option is switched on, and IO_STRICT_DEVMEM=n, the /dev/mem - file only allows userspace access to PCI space and the BIOS code and - data regions. This is sufficient for dosemu and X and all common - users of /dev/mem. - - If in doubt, say Y. -config IO_STRICT_DEVMEM - bool "Filter I/O access to /dev/mem" - depends on STRICT_DEVMEM - ---help--- - If this option is disabled, you allow userspace (root) access to all - io-memory regardless of whether a driver is actively using that - range. Accidental access to this is obviously disastrous, but - specific access can be used by people debugging kernel drivers. - - If this option is switched on, the /dev/mem file only allows - userspace access to *idle* io-memory ranges (see /proc/iomem) This - may break traditional users of /dev/mem (dosemu, legacy X, etc...) - if the driver using a given range cannot be disabled. - - If in doubt, say Y. - -menu "$(SRCARCH) Debugging" - -source "arch/$(SRCARCH)/Kconfig.debug" - -endmenu config HYPERV_TESTING bool "Microsoft Hyper-V driver testing" @@ -2184,4 +2184,6 @@ config HYPERV_TESTING help Select this option to enable Hyper-V vmbus testing. +endmenu # "Kernel Testing and Coverage" + endmenu # Kernel hacking diff --git a/lib/Makefile b/lib/Makefile index 93217d44237f..c20b1debe9b4 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -223,7 +223,7 @@ KASAN_SANITIZE_stackdepot.o := n KCOV_INSTRUMENT_stackdepot.o := n libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ - fdt_empty_tree.o + fdt_empty_tree.o fdt_addresses.o $(foreach file, $(libfdt_files), \ $(eval CFLAGS_$(file) = -I $(srctree)/scripts/dtc/libfdt)) lib-$(CONFIG_LIBFDT) += $(libfdt_files) diff --git a/lib/crc64.c b/lib/crc64.c index 0ef8ae6ac047..f8928ce28280 100644 --- a/lib/crc64.c +++ b/lib/crc64.c @@ -28,6 +28,7 @@ #include <linux/module.h> #include <linux/types.h> +#include <linux/crc64.h> #include "crc64table.h" MODULE_DESCRIPTION("CRC64 calculations"); diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 61261195f5b6..48054dbf1b51 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -132,14 +132,18 @@ static void fill_pool(void) struct debug_obj *obj; unsigned long flags; - if (likely(obj_pool_free >= debug_objects_pool_min_level)) + if (likely(READ_ONCE(obj_pool_free) >= debug_objects_pool_min_level)) return; /* * Reuse objs from the global free list; they will be reinitialized * when allocating. + * + * Both obj_nr_tofree and obj_pool_free are checked locklessly; the + * READ_ONCE()s pair with the WRITE_ONCE()s in pool_lock critical + * sections. */ - while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) { + while (READ_ONCE(obj_nr_tofree) && (READ_ONCE(obj_pool_free) < obj_pool_min_free)) { raw_spin_lock_irqsave(&pool_lock, flags); /* * Recheck with the lock held as the worker thread might have @@ -148,9 +152,9 @@ static void fill_pool(void) while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) { obj = hlist_entry(obj_to_free.first, typeof(*obj), node); hlist_del(&obj->node); - obj_nr_tofree--; + WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1); hlist_add_head(&obj->node, &obj_pool); - obj_pool_free++; + WRITE_ONCE(obj_pool_free, obj_pool_free + 1); } raw_spin_unlock_irqrestore(&pool_lock, flags); } @@ -158,7 +162,7 @@ static void fill_pool(void) if (unlikely(!obj_cache)) return; - while (obj_pool_free < debug_objects_pool_min_level) { + while (READ_ONCE(obj_pool_free) < debug_objects_pool_min_level) { struct debug_obj *new[ODEBUG_BATCH_SIZE]; int cnt; @@ -174,7 +178,7 @@ static void fill_pool(void) while (cnt) { hlist_add_head(&new[--cnt]->node, &obj_pool); debug_objects_allocated++; - obj_pool_free++; + WRITE_ONCE(obj_pool_free, obj_pool_free + 1); } raw_spin_unlock_irqrestore(&pool_lock, flags); } @@ -236,7 +240,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) obj = __alloc_object(&obj_pool); if (obj) { obj_pool_used++; - obj_pool_free--; + WRITE_ONCE(obj_pool_free, obj_pool_free - 1); /* * Looking ahead, allocate one batch of debug objects and @@ -255,7 +259,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) &percpu_pool->free_objs); percpu_pool->obj_free++; obj_pool_used++; - obj_pool_free--; + WRITE_ONCE(obj_pool_free, obj_pool_free - 1); } } @@ -309,8 +313,8 @@ static void free_obj_work(struct work_struct *work) obj = hlist_entry(obj_to_free.first, typeof(*obj), node); hlist_del(&obj->node); hlist_add_head(&obj->node, &obj_pool); - obj_pool_free++; - obj_nr_tofree--; + WRITE_ONCE(obj_pool_free, obj_pool_free + 1); + WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1); } raw_spin_unlock_irqrestore(&pool_lock, flags); return; @@ -324,7 +328,7 @@ free_objs: if (obj_nr_tofree) { hlist_move_list(&obj_to_free, &tofree); debug_objects_freed += obj_nr_tofree; - obj_nr_tofree = 0; + WRITE_ONCE(obj_nr_tofree, 0); } raw_spin_unlock_irqrestore(&pool_lock, flags); @@ -375,10 +379,10 @@ free_to_obj_pool: obj_pool_used--; if (work) { - obj_nr_tofree++; + WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1); hlist_add_head(&obj->node, &obj_to_free); if (lookahead_count) { - obj_nr_tofree += lookahead_count; + WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + lookahead_count); obj_pool_used -= lookahead_count; while (lookahead_count) { hlist_add_head(&objs[--lookahead_count]->node, @@ -396,15 +400,15 @@ free_to_obj_pool: for (i = 0; i < ODEBUG_BATCH_SIZE; i++) { obj = __alloc_object(&obj_pool); hlist_add_head(&obj->node, &obj_to_free); - obj_pool_free--; - obj_nr_tofree++; + WRITE_ONCE(obj_pool_free, obj_pool_free - 1); + WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1); } } } else { - obj_pool_free++; + WRITE_ONCE(obj_pool_free, obj_pool_free + 1); hlist_add_head(&obj->node, &obj_pool); if (lookahead_count) { - obj_pool_free += lookahead_count; + WRITE_ONCE(obj_pool_free, obj_pool_free + lookahead_count); obj_pool_used -= lookahead_count; while (lookahead_count) { hlist_add_head(&objs[--lookahead_count]->node, @@ -423,7 +427,7 @@ free_to_obj_pool: static void free_object(struct debug_obj *obj) { __free_object(obj); - if (!obj_freeing && obj_nr_tofree) { + if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) { WRITE_ONCE(obj_freeing, true); schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY); } @@ -982,7 +986,7 @@ repeat: debug_objects_maxchecked = objs_checked; /* Schedule work to actually kmem_cache_free() objects */ - if (!obj_freeing && obj_nr_tofree) { + if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) { WRITE_ONCE(obj_freeing, true); schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY); } @@ -1008,12 +1012,12 @@ static int debug_stats_show(struct seq_file *m, void *v) seq_printf(m, "max_checked :%d\n", debug_objects_maxchecked); seq_printf(m, "warnings :%d\n", debug_objects_warnings); seq_printf(m, "fixups :%d\n", debug_objects_fixups); - seq_printf(m, "pool_free :%d\n", obj_pool_free + obj_percpu_free); + seq_printf(m, "pool_free :%d\n", READ_ONCE(obj_pool_free) + obj_percpu_free); seq_printf(m, "pool_pcp_free :%d\n", obj_percpu_free); seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free); seq_printf(m, "pool_used :%d\n", obj_pool_used - obj_percpu_free); seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used); - seq_printf(m, "on_free_list :%d\n", obj_nr_tofree); + seq_printf(m, "on_free_list :%d\n", READ_ONCE(obj_nr_tofree)); seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated); seq_printf(m, "objs_freed :%d\n", debug_objects_freed); return 0; diff --git a/lib/devres.c b/lib/devres.c index f56070cf970b..6ef51f159c54 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -8,7 +8,6 @@ enum devm_ioremap_type { DEVM_IOREMAP = 0, - DEVM_IOREMAP_NC, DEVM_IOREMAP_UC, DEVM_IOREMAP_WC, }; @@ -37,9 +36,6 @@ static void __iomem *__devm_ioremap(struct device *dev, resource_size_t offset, case DEVM_IOREMAP: addr = ioremap(offset, size); break; - case DEVM_IOREMAP_NC: - addr = ioremap_nocache(offset, size); - break; case DEVM_IOREMAP_UC: addr = ioremap_uc(offset, size); break; @@ -88,22 +84,6 @@ void __iomem *devm_ioremap_uc(struct device *dev, resource_size_t offset, EXPORT_SYMBOL_GPL(devm_ioremap_uc); /** - * devm_ioremap_nocache - Managed ioremap_nocache() - * @dev: Generic device to remap IO address for - * @offset: Resource address to map - * @size: Size of map - * - * Managed ioremap_nocache(). Map is automatically unmapped on driver - * detach. - */ -void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset, - resource_size_t size) -{ - return __devm_ioremap(dev, offset, size, DEVM_IOREMAP_NC); -} -EXPORT_SYMBOL(devm_ioremap_nocache); - -/** * devm_ioremap_wc - Managed ioremap_wc() * @dev: Generic device to remap IO address for * @offset: Resource address to map diff --git a/lib/fdt_addresses.c b/lib/fdt_addresses.c new file mode 100644 index 000000000000..23610bcf390b --- /dev/null +++ b/lib/fdt_addresses.c @@ -0,0 +1,2 @@ +#include <linux/libfdt_env.h> +#include "../scripts/dtc/libfdt/fdt_addresses.c" diff --git a/lib/iov_iter.c b/lib/iov_iter.c index fb29c02c6a3c..51595bf3af85 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1222,11 +1222,12 @@ EXPORT_SYMBOL(iov_iter_discard); unsigned long iov_iter_alignment(const struct iov_iter *i) { - unsigned int p_mask = i->pipe->ring_size - 1; unsigned long res = 0; size_t size = i->count; if (unlikely(iov_iter_is_pipe(i))) { + unsigned int p_mask = i->pipe->ring_size - 1; + if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) return size | i->iov_offset; return size; diff --git a/lib/livepatch/test_klp_shadow_vars.c b/lib/livepatch/test_klp_shadow_vars.c index fe5c413efe96..f0b5a1d24e55 100644 --- a/lib/livepatch/test_klp_shadow_vars.c +++ b/lib/livepatch/test_klp_shadow_vars.c @@ -60,36 +60,43 @@ static int ptr_id(void *ptr) */ static void *shadow_get(void *obj, unsigned long id) { - void *ret = klp_shadow_get(obj, id); + int **sv; + sv = klp_shadow_get(obj, id); pr_info("klp_%s(obj=PTR%d, id=0x%lx) = PTR%d\n", - __func__, ptr_id(obj), id, ptr_id(ret)); + __func__, ptr_id(obj), id, ptr_id(sv)); - return ret; + return sv; } static void *shadow_alloc(void *obj, unsigned long id, size_t size, gfp_t gfp_flags, klp_shadow_ctor_t ctor, void *ctor_data) { - void *ret = klp_shadow_alloc(obj, id, size, gfp_flags, ctor, - ctor_data); + int **var = ctor_data; + int **sv; + + sv = klp_shadow_alloc(obj, id, size, gfp_flags, ctor, var); pr_info("klp_%s(obj=PTR%d, id=0x%lx, size=%zx, gfp_flags=%pGg), ctor=PTR%d, ctor_data=PTR%d = PTR%d\n", __func__, ptr_id(obj), id, size, &gfp_flags, ptr_id(ctor), - ptr_id(ctor_data), ptr_id(ret)); - return ret; + ptr_id(*var), ptr_id(sv)); + + return sv; } static void *shadow_get_or_alloc(void *obj, unsigned long id, size_t size, gfp_t gfp_flags, klp_shadow_ctor_t ctor, void *ctor_data) { - void *ret = klp_shadow_get_or_alloc(obj, id, size, gfp_flags, ctor, - ctor_data); + int **var = ctor_data; + int **sv; + + sv = klp_shadow_get_or_alloc(obj, id, size, gfp_flags, ctor, var); pr_info("klp_%s(obj=PTR%d, id=0x%lx, size=%zx, gfp_flags=%pGg), ctor=PTR%d, ctor_data=PTR%d = PTR%d\n", __func__, ptr_id(obj), id, size, &gfp_flags, ptr_id(ctor), - ptr_id(ctor_data), ptr_id(ret)); - return ret; + ptr_id(*var), ptr_id(sv)); + + return sv; } static void shadow_free(void *obj, unsigned long id, klp_shadow_dtor_t dtor) @@ -110,58 +117,70 @@ static void shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor) /* Shadow variable constructor - remember simple pointer data */ static int shadow_ctor(void *obj, void *shadow_data, void *ctor_data) { - int **shadow_int = shadow_data; - *shadow_int = ctor_data; + int **sv = shadow_data; + int **var = ctor_data; + + if (!var) + return -EINVAL; + + *sv = *var; pr_info("%s: PTR%d -> PTR%d\n", - __func__, ptr_id(shadow_int), ptr_id(ctor_data)); + __func__, ptr_id(sv), ptr_id(*var)); return 0; } static void shadow_dtor(void *obj, void *shadow_data) { + int **sv = shadow_data; + pr_info("%s(obj=PTR%d, shadow_data=PTR%d)\n", - __func__, ptr_id(obj), ptr_id(shadow_data)); + __func__, ptr_id(obj), ptr_id(sv)); } static int test_klp_shadow_vars_init(void) { void *obj = THIS_MODULE; int id = 0x1234; - size_t size = sizeof(int *); gfp_t gfp_flags = GFP_KERNEL; int var1, var2, var3, var4; + int *pv1, *pv2, *pv3, *pv4; int **sv1, **sv2, **sv3, **sv4; - void *ret; + int **sv; + + pv1 = &var1; + pv2 = &var2; + pv3 = &var3; + pv4 = &var4; ptr_id(NULL); - ptr_id(&var1); - ptr_id(&var2); - ptr_id(&var3); - ptr_id(&var4); + ptr_id(pv1); + ptr_id(pv2); + ptr_id(pv3); + ptr_id(pv4); /* * With an empty shadow variable hash table, expect not to find * any matches. */ - ret = shadow_get(obj, id); - if (!ret) + sv = shadow_get(obj, id); + if (!sv) pr_info(" got expected NULL result\n"); /* * Allocate a few shadow variables with different <obj> and <id>. */ - sv1 = shadow_alloc(obj, id, size, gfp_flags, shadow_ctor, &var1); + sv1 = shadow_alloc(obj, id, sizeof(pv1), gfp_flags, shadow_ctor, &pv1); if (!sv1) return -ENOMEM; - sv2 = shadow_alloc(obj + 1, id, size, gfp_flags, shadow_ctor, &var2); + sv2 = shadow_alloc(obj + 1, id, sizeof(pv2), gfp_flags, shadow_ctor, &pv2); if (!sv2) return -ENOMEM; - sv3 = shadow_alloc(obj, id + 1, size, gfp_flags, shadow_ctor, &var3); + sv3 = shadow_alloc(obj, id + 1, sizeof(pv3), gfp_flags, shadow_ctor, &pv3); if (!sv3) return -ENOMEM; @@ -169,23 +188,23 @@ static int test_klp_shadow_vars_init(void) * Verify we can find our new shadow variables and that they point * to expected data. */ - ret = shadow_get(obj, id); - if (!ret) + sv = shadow_get(obj, id); + if (!sv) return -EINVAL; - if (ret == sv1 && *sv1 == &var1) + if (sv == sv1 && *sv1 == pv1) pr_info(" got expected PTR%d -> PTR%d result\n", ptr_id(sv1), ptr_id(*sv1)); - ret = shadow_get(obj + 1, id); - if (!ret) + sv = shadow_get(obj + 1, id); + if (!sv) return -EINVAL; - if (ret == sv2 && *sv2 == &var2) + if (sv == sv2 && *sv2 == pv2) pr_info(" got expected PTR%d -> PTR%d result\n", ptr_id(sv2), ptr_id(*sv2)); - ret = shadow_get(obj, id + 1); - if (!ret) + sv = shadow_get(obj, id + 1); + if (!sv) return -EINVAL; - if (ret == sv3 && *sv3 == &var3) + if (sv == sv3 && *sv3 == pv3) pr_info(" got expected PTR%d -> PTR%d result\n", ptr_id(sv3), ptr_id(*sv3)); @@ -193,14 +212,14 @@ static int test_klp_shadow_vars_init(void) * Allocate or get a few more, this time with the same <obj>, <id>. * The second invocation should return the same shadow var. */ - sv4 = shadow_get_or_alloc(obj + 2, id, size, gfp_flags, shadow_ctor, &var4); + sv4 = shadow_get_or_alloc(obj + 2, id, sizeof(pv4), gfp_flags, shadow_ctor, &pv4); if (!sv4) return -ENOMEM; - ret = shadow_get_or_alloc(obj + 2, id, size, gfp_flags, shadow_ctor, &var4); - if (!ret) + sv = shadow_get_or_alloc(obj + 2, id, sizeof(pv4), gfp_flags, shadow_ctor, &pv4); + if (!sv) return -EINVAL; - if (ret == sv4 && *sv4 == &var4) + if (sv == sv4 && *sv4 == pv4) pr_info(" got expected PTR%d -> PTR%d result\n", ptr_id(sv4), ptr_id(*sv4)); @@ -209,27 +228,27 @@ static int test_klp_shadow_vars_init(void) * longer find them. */ shadow_free(obj, id, shadow_dtor); /* sv1 */ - ret = shadow_get(obj, id); - if (!ret) + sv = shadow_get(obj, id); + if (!sv) pr_info(" got expected NULL result\n"); shadow_free(obj + 1, id, shadow_dtor); /* sv2 */ - ret = shadow_get(obj + 1, id); - if (!ret) + sv = shadow_get(obj + 1, id); + if (!sv) pr_info(" got expected NULL result\n"); shadow_free(obj + 2, id, shadow_dtor); /* sv4 */ - ret = shadow_get(obj + 2, id); - if (!ret) + sv = shadow_get(obj + 2, id); + if (!sv) pr_info(" got expected NULL result\n"); /* * We should still find an <id+1> variable. */ - ret = shadow_get(obj, id + 1); - if (!ret) + sv = shadow_get(obj, id + 1); + if (!sv) return -EINVAL; - if (ret == sv3 && *sv3 == &var3) + if (sv == sv3 && *sv3 == pv3) pr_info(" got expected PTR%d -> PTR%d result\n", ptr_id(sv3), ptr_id(*sv3)); @@ -237,8 +256,8 @@ static int test_klp_shadow_vars_init(void) * Free all the <id+1> variables, too. */ shadow_free_all(id + 1, shadow_dtor); /* sv3 */ - ret = shadow_get(obj, id); - if (!ret) + sv = shadow_get(obj, id); + if (!sv) pr_info(" shadow_get() got expected NULL result\n"); diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 17417eee0866..bf1b4765c8f6 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -124,6 +124,9 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = { #define time_before(x, y) ((x) < (y)) #endif +#define RAID6_TEST_DISKS 8 +#define RAID6_TEST_DISKS_ORDER 3 + static inline const struct raid6_recov_calls *raid6_choose_recov(void) { const struct raid6_recov_calls *const *algo; @@ -146,7 +149,7 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) } static inline const struct raid6_calls *raid6_choose_gen( - void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) + void *(*const dptrs)[RAID6_TEST_DISKS], const int disks) { unsigned long perf, bestgenperf, bestxorperf, j0, j1; int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */ @@ -181,7 +184,8 @@ static inline const struct raid6_calls *raid6_choose_gen( best = *algo; } pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name, - (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + (perf * HZ * (disks-2)) >> + (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2)); if (!(*algo)->xor_syndrome) continue; @@ -204,17 +208,24 @@ static inline const struct raid6_calls *raid6_choose_gen( bestxorperf = perf; pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name, - (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); + (perf * HZ * (disks-2)) >> + (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); } } if (best) { - pr_info("raid6: using algorithm %s gen() %ld MB/s\n", - best->name, - (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); - if (best->xor_syndrome) - pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", - (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); + if (IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) { + pr_info("raid6: using algorithm %s gen() %ld MB/s\n", + best->name, + (bestgenperf * HZ * (disks-2)) >> + (20 - PAGE_SHIFT+RAID6_TIME_JIFFIES_LG2)); + if (best->xor_syndrome) + pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", + (bestxorperf * HZ * (disks-2)) >> + (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); + } else + pr_info("raid6: skip pq benchmark and using algorithm %s\n", + best->name); raid6_call = *best; } else pr_err("raid6: Yikes! No algorithm found!\n"); @@ -228,27 +239,33 @@ static inline const struct raid6_calls *raid6_choose_gen( int __init raid6_select_algo(void) { - const int disks = (65536/PAGE_SIZE)+2; + const int disks = RAID6_TEST_DISKS; const struct raid6_calls *gen_best; const struct raid6_recov_calls *rec_best; - char *syndromes; - void *dptrs[(65536/PAGE_SIZE)+2]; - int i; - - for (i = 0; i < disks-2; i++) - dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; - - /* Normal code - use a 2-page allocation to avoid D$ conflict */ - syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); + char *disk_ptr, *p; + void *dptrs[RAID6_TEST_DISKS]; + int i, cycle; - if (!syndromes) { + /* prepare the buffer and fill it circularly with gfmul table */ + disk_ptr = (char *)__get_free_pages(GFP_KERNEL, RAID6_TEST_DISKS_ORDER); + if (!disk_ptr) { pr_err("raid6: Yikes! No memory available.\n"); return -ENOMEM; } - dptrs[disks-2] = syndromes; - dptrs[disks-1] = syndromes + PAGE_SIZE; + p = disk_ptr; + for (i = 0; i < disks; i++) + dptrs[i] = p + PAGE_SIZE * i; + + cycle = ((disks - 2) * PAGE_SIZE) / 65536; + for (i = 0; i < cycle; i++) { + memcpy(p, raid6_gfmul, 65536); + p += 65536; + } + + if ((disks - 2) * PAGE_SIZE % 65536) + memcpy(p, raid6_gfmul, (disks - 2) * PAGE_SIZE % 65536); /* select raid gen_syndrome function */ gen_best = raid6_choose_gen(&dptrs, disks); @@ -256,7 +273,7 @@ int __init raid6_select_algo(void) /* select raid recover functions */ rec_best = raid6_choose_recov(); - free_pages((unsigned long)syndromes, 1); + free_pages((unsigned long)disk_ptr, RAID6_TEST_DISKS_ORDER); return gen_best && rec_best ? 0 : -EINVAL; } diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c index 9c485df1308f..f02e10fa6238 100644 --- a/lib/raid6/mktables.c +++ b/lib/raid6/mktables.c @@ -56,8 +56,8 @@ int main(int argc, char *argv[]) uint8_t v; uint8_t exptbl[256], invtbl[256]; - printf("#include <linux/raid/pq.h>\n"); printf("#include <linux/export.h>\n"); + printf("#include <linux/raid/pq.h>\n"); /* Compute multiplication table */ printf("\nconst u8 __attribute__((aligned(256)))\n" diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk index c6aa03631df8..0809805a7e23 100644 --- a/lib/raid6/unroll.awk +++ b/lib/raid6/unroll.awk @@ -13,7 +13,7 @@ BEGIN { for (i = 0; i < rep; ++i) { tmp = $0 gsub(/\$\$/, i, tmp) - gsub(/\$\#/, n, tmp) + gsub(/\$#/, n, tmp) gsub(/\$\*/, "$", tmp) print tmp } diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 33feec8989f1..af88d1346dd7 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -650,8 +650,8 @@ void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, if (!sbq_wait->sbq) { sbq_wait->sbq = sbq; atomic_inc(&sbq->ws_active); + add_wait_queue(&ws->wait, &sbq_wait->wait); } - add_wait_queue(&ws->wait, &sbq_wait->wait); } EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue); diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index dccb95af6003..706020b06617 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -30,13 +30,6 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; unsigned long res = 0; - /* - * Truncate 'max' to the user-specified limit, so that - * we only have one limit we need to check in the loop - */ - if (max > count) - max = count; - if (IS_UNALIGNED(src, dst)) goto byte_at_a_time; @@ -114,6 +107,13 @@ long strncpy_from_user(char *dst, const char __user *src, long count) unsigned long max = max_addr - src_addr; long retval; + /* + * Truncate 'max' to the user-specified limit, so that + * we only have one limit we need to check in the loop + */ + if (max > count) + max = count; + kasan_check_write(dst, count); check_object_size(dst, count, false); if (user_access_begin(src, max)) { diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index 6c0005d5dd5c..41670d4a5816 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -27,13 +27,6 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long c; /* - * Truncate 'max' to the user-specified limit, so that - * we only have one limit we need to check in the loop - */ - if (max > count) - max = count; - - /* * Do everything aligned. But that means that we * need to also expand the maximum.. */ @@ -109,6 +102,13 @@ long strnlen_user(const char __user *str, long count) unsigned long max = max_addr - src_addr; long retval; + /* + * Truncate 'max' to the user-specified limit, so that + * we only have one limit we need to check in the loop + */ + if (max > count) + max = count; + if (user_access_begin(str, max)) { retval = do_strnlen_user(str, count, max); user_access_end(); diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 7df4f7f395bf..55c14e8c8859 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -2,6 +2,7 @@ /* * test_xarray.c: Test the XArray API * Copyright (c) 2017-2018 Microsoft Corporation + * Copyright (c) 2019-2020 Oracle * Author: Matthew Wilcox <willy@infradead.org> */ @@ -902,28 +903,34 @@ static noinline void check_store_iter(struct xarray *xa) XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_multi_find(struct xarray *xa) +static noinline void check_multi_find_1(struct xarray *xa, unsigned order) { #ifdef CONFIG_XARRAY_MULTI + unsigned long multi = 3 << order; + unsigned long next = 4 << order; unsigned long index; - xa_store_order(xa, 12, 2, xa_mk_value(12), GFP_KERNEL); - XA_BUG_ON(xa, xa_store_index(xa, 16, GFP_KERNEL) != NULL); + xa_store_order(xa, multi, order, xa_mk_value(multi), GFP_KERNEL); + XA_BUG_ON(xa, xa_store_index(xa, next, GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_store_index(xa, next + 1, GFP_KERNEL) != NULL); index = 0; XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) != - xa_mk_value(12)); - XA_BUG_ON(xa, index != 12); - index = 13; + xa_mk_value(multi)); + XA_BUG_ON(xa, index != multi); + index = multi + 1; XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) != - xa_mk_value(12)); - XA_BUG_ON(xa, (index < 12) || (index >= 16)); + xa_mk_value(multi)); + XA_BUG_ON(xa, (index < multi) || (index >= next)); XA_BUG_ON(xa, xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT) != - xa_mk_value(16)); - XA_BUG_ON(xa, index != 16); - - xa_erase_index(xa, 12); - xa_erase_index(xa, 16); + xa_mk_value(next)); + XA_BUG_ON(xa, index != next); + XA_BUG_ON(xa, xa_find_after(xa, &index, next, XA_PRESENT) != NULL); + XA_BUG_ON(xa, index != next); + + xa_erase_index(xa, multi); + xa_erase_index(xa, next); + xa_erase_index(xa, next + 1); XA_BUG_ON(xa, !xa_empty(xa)); #endif } @@ -1046,12 +1053,33 @@ static noinline void check_find_3(struct xarray *xa) xa_destroy(xa); } +static noinline void check_find_4(struct xarray *xa) +{ + unsigned long index = 0; + void *entry; + + xa_store_index(xa, ULONG_MAX, GFP_KERNEL); + + entry = xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT); + XA_BUG_ON(xa, entry != xa_mk_index(ULONG_MAX)); + + entry = xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT); + XA_BUG_ON(xa, entry); + + xa_erase_index(xa, ULONG_MAX); +} + static noinline void check_find(struct xarray *xa) { + unsigned i; + check_find_1(xa); check_find_2(xa); check_find_3(xa); - check_multi_find(xa); + check_find_4(xa); + + for (i = 2; i < 10; i++) + check_multi_find_1(xa, i); check_multi_find_2(xa); } @@ -1132,6 +1160,27 @@ static noinline void check_move_tiny(struct xarray *xa) XA_BUG_ON(xa, !xa_empty(xa)); } +static noinline void check_move_max(struct xarray *xa) +{ + XA_STATE(xas, xa, 0); + + xa_store_index(xa, ULONG_MAX, GFP_KERNEL); + rcu_read_lock(); + XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != xa_mk_index(ULONG_MAX)); + XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != NULL); + rcu_read_unlock(); + + xas_set(&xas, 0); + rcu_read_lock(); + XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != xa_mk_index(ULONG_MAX)); + xas_pause(&xas); + XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != NULL); + rcu_read_unlock(); + + xa_erase_index(xa, ULONG_MAX); + XA_BUG_ON(xa, !xa_empty(xa)); +} + static noinline void check_move_small(struct xarray *xa, unsigned long idx) { XA_STATE(xas, xa, 0); @@ -1240,6 +1289,7 @@ static noinline void check_move(struct xarray *xa) xa_destroy(xa); check_move_tiny(xa); + check_move_max(xa); for (i = 0; i < 16; i++) check_move_small(xa, 1UL << i); diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig index 9fe698ff62ec..d883ac299508 100644 --- a/lib/vdso/Kconfig +++ b/lib/vdso/Kconfig @@ -24,4 +24,10 @@ config GENERIC_COMPAT_VDSO help This config option enables the compat VDSO layer. +config GENERIC_VDSO_TIME_NS + bool + help + Selected by architectures which support time namespaces in the + VDSO + endif diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 9ecfd3b547ba..f8b8ec5e63ac 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -38,12 +38,22 @@ u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) } #endif -static int do_hres(const struct vdso_data *vd, clockid_t clk, - struct __kernel_timespec *ts) +#ifdef CONFIG_TIME_NS +static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) { - const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; - u64 cycles, last, sec, ns; + const struct vdso_data *vd = __arch_get_timens_vdso_data(); + const struct timens_offset *offs = &vdns->offset[clk]; + const struct vdso_timestamp *vdso_ts; + u64 cycles, last, ns; u32 seq; + s64 sec; + + if (clk != CLOCK_MONOTONIC_RAW) + vd = &vd[CS_HRES_COARSE]; + else + vd = &vd[CS_RAW]; + vdso_ts = &vd->basetime[clk]; do { seq = vdso_read_begin(vd); @@ -58,6 +68,10 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk, sec = vdso_ts->sec; } while (unlikely(vdso_read_retry(vd, seq))); + /* Add the namespace offset */ + sec += offs->sec; + ns += offs->nsec; + /* * Do this outside the loop: a race inside the loop could result * in __iter_div_u64_rem() being extremely slow. @@ -67,18 +81,128 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk, return 0; } +#else +static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +{ + return NULL; +} + +static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) +{ + return -EINVAL; +} +#endif -static void do_coarse(const struct vdso_data *vd, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, + struct __kernel_timespec *ts) { const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; + u64 cycles, last, sec, ns; u32 seq; do { + /* + * Open coded to handle VCLOCK_TIMENS. Time namespace + * enabled tasks have a special VVAR page installed which + * has vd->seq set to 1 and vd->clock_mode set to + * VCLOCK_TIMENS. For non time namespace affected tasks + * this does not affect performance because if vd->seq is + * odd, i.e. a concurrent update is in progress the extra + * check for vd->clock_mode is just a few extra + * instructions while spin waiting for vd->seq to become + * even again. + */ + while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) { + if (IS_ENABLED(CONFIG_TIME_NS) && + vd->clock_mode == VCLOCK_TIMENS) + return do_hres_timens(vd, clk, ts); + cpu_relax(); + } + smp_rmb(); + + cycles = __arch_get_hw_counter(vd->clock_mode); + ns = vdso_ts->nsec; + last = vd->cycle_last; + if (unlikely((s64)cycles < 0)) + return -1; + + ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); + ns >>= vd->shift; + sec = vdso_ts->sec; + } while (unlikely(vdso_read_retry(vd, seq))); + + /* + * Do this outside the loop: a race inside the loop could result + * in __iter_div_u64_rem() being extremely slow. + */ + ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; + + return 0; +} + +#ifdef CONFIG_TIME_NS +static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) +{ + const struct vdso_data *vd = __arch_get_timens_vdso_data(); + const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; + const struct timens_offset *offs = &vdns->offset[clk]; + u64 nsec; + s64 sec; + s32 seq; + + do { seq = vdso_read_begin(vd); + sec = vdso_ts->sec; + nsec = vdso_ts->nsec; + } while (unlikely(vdso_read_retry(vd, seq))); + + /* Add the namespace offset */ + sec += offs->sec; + nsec += offs->nsec; + + /* + * Do this outside the loop: a race inside the loop could result + * in __iter_div_u64_rem() being extremely slow. + */ + ts->tv_sec = sec + __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec); + ts->tv_nsec = nsec; + return 0; +} +#else +static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) +{ + return -1; +} +#endif + +static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk, + struct __kernel_timespec *ts) +{ + const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; + u32 seq; + + do { + /* + * Open coded to handle VCLOCK_TIMENS. See comment in + * do_hres(). + */ + while ((seq = READ_ONCE(vd->seq)) & 1) { + if (IS_ENABLED(CONFIG_TIME_NS) && + vd->clock_mode == VCLOCK_TIMENS) + return do_coarse_timens(vd, clk, ts); + cpu_relax(); + } + smp_rmb(); + ts->tv_sec = vdso_ts->sec; ts->tv_nsec = vdso_ts->nsec; } while (unlikely(vdso_read_retry(vd, seq))); + + return 0; } static __maybe_unused int @@ -96,15 +220,16 @@ __cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts) * clocks are handled in the VDSO directly. */ msk = 1U << clock; - if (likely(msk & VDSO_HRES)) { - return do_hres(&vd[CS_HRES_COARSE], clock, ts); - } else if (msk & VDSO_COARSE) { - do_coarse(&vd[CS_HRES_COARSE], clock, ts); - return 0; - } else if (msk & VDSO_RAW) { - return do_hres(&vd[CS_RAW], clock, ts); - } - return -1; + if (likely(msk & VDSO_HRES)) + vd = &vd[CS_HRES_COARSE]; + else if (msk & VDSO_COARSE) + return do_coarse(&vd[CS_HRES_COARSE], clock, ts); + else if (msk & VDSO_RAW) + vd = &vd[CS_RAW]; + else + return -1; + + return do_hres(vd, clock, ts); } static __maybe_unused int @@ -117,6 +242,7 @@ __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) return 0; } +#ifdef BUILD_VDSO32 static __maybe_unused int __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) { @@ -125,20 +251,16 @@ __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) ret = __cvdso_clock_gettime_common(clock, &ts); -#ifdef VDSO_HAS_32BIT_FALLBACK if (unlikely(ret)) return clock_gettime32_fallback(clock, res); -#else - if (unlikely(ret)) - ret = clock_gettime_fallback(clock, &ts); -#endif - if (likely(!ret)) { - res->tv_sec = ts.tv_sec; - res->tv_nsec = ts.tv_nsec; - } + /* For ret == 0 */ + res->tv_sec = ts.tv_sec; + res->tv_nsec = ts.tv_nsec; + return ret; } +#endif /* BUILD_VDSO32 */ static __maybe_unused int __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) @@ -156,6 +278,10 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) } if (unlikely(tz != NULL)) { + if (IS_ENABLED(CONFIG_TIME_NS) && + vd->clock_mode == VCLOCK_TIMENS) + vd = __arch_get_timens_vdso_data(); + tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest; tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime; } @@ -167,7 +293,12 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time) { const struct vdso_data *vd = __arch_get_vdso_data(); - __kernel_old_time_t t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); + __kernel_old_time_t t; + + if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VCLOCK_TIMENS) + vd = __arch_get_timens_vdso_data(); + + t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); if (time) *time = t; @@ -181,7 +312,6 @@ static __maybe_unused int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res) { const struct vdso_data *vd = __arch_get_vdso_data(); - u64 hrtimer_res; u32 msk; u64 ns; @@ -189,27 +319,24 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res) if (unlikely((u32) clock >= MAX_CLOCKS)) return -1; - hrtimer_res = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res); + if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VCLOCK_TIMENS) + vd = __arch_get_timens_vdso_data(); + /* * Convert the clockid to a bitmask and use it to check which * clocks are handled in the VDSO directly. */ msk = 1U << clock; - if (msk & VDSO_HRES) { + if (msk & (VDSO_HRES | VDSO_RAW)) { /* * Preserves the behaviour of posix_get_hrtimer_res(). */ - ns = hrtimer_res; + ns = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res); } else if (msk & VDSO_COARSE) { /* * Preserves the behaviour of posix_get_coarse_res(). */ ns = LOW_RES_NSEC; - } else if (msk & VDSO_RAW) { - /* - * Preserves the behaviour of posix_get_hrtimer_res(). - */ - ns = hrtimer_res; } else { return -1; } @@ -221,6 +348,7 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res) return 0; } +static __maybe_unused int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res) { int ret = __cvdso_clock_getres_common(clock, res); @@ -230,6 +358,7 @@ int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res) return 0; } +#ifdef BUILD_VDSO32 static __maybe_unused int __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res) { @@ -238,18 +367,14 @@ __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res) ret = __cvdso_clock_getres_common(clock, &ts); -#ifdef VDSO_HAS_32BIT_FALLBACK if (unlikely(ret)) return clock_getres32_fallback(clock, res); -#else - if (unlikely(ret)) - ret = clock_getres_fallback(clock, &ts); -#endif - if (likely(!ret && res)) { + if (likely(res)) { res->tv_sec = ts.tv_sec; res->tv_nsec = ts.tv_nsec; } return ret; } +#endif /* BUILD_VDSO32 */ #endif /* VDSO_HAS_CLOCK_GETRES */ diff --git a/lib/xarray.c b/lib/xarray.c index 1237c213f52b..1d9fab7db8da 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0+ /* * XArray implementation - * Copyright (c) 2017 Microsoft Corporation + * Copyright (c) 2017-2018 Microsoft Corporation + * Copyright (c) 2018-2020 Oracle * Author: Matthew Wilcox <willy@infradead.org> */ @@ -967,6 +968,7 @@ void xas_pause(struct xa_state *xas) if (xas_invalid(xas)) return; + xas->xa_node = XAS_RESTART; if (node) { unsigned int offset = xas->xa_offset; while (++offset < XA_CHUNK_SIZE) { @@ -974,10 +976,11 @@ void xas_pause(struct xa_state *xas) break; } xas->xa_index += (offset - xas->xa_offset) << node->shift; + if (xas->xa_index == 0) + xas->xa_node = XAS_BOUNDS; } else { xas->xa_index++; } - xas->xa_node = XAS_RESTART; } EXPORT_SYMBOL_GPL(xas_pause); @@ -1079,13 +1082,15 @@ void *xas_find(struct xa_state *xas, unsigned long max) { void *entry; - if (xas_error(xas)) + if (xas_error(xas) || xas->xa_node == XAS_BOUNDS) return NULL; + if (xas->xa_index > max) + return set_bounds(xas); if (!xas->xa_node) { xas->xa_index = 1; return set_bounds(xas); - } else if (xas_top(xas->xa_node)) { + } else if (xas->xa_node == XAS_RESTART) { entry = xas_load(xas); if (entry || xas_not_node(xas->xa_node)) return entry; @@ -1150,6 +1155,8 @@ void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark) if (xas_error(xas)) return NULL; + if (xas->xa_index > max) + goto max; if (!xas->xa_node) { xas->xa_index = 1; @@ -1824,6 +1831,17 @@ void *xa_find(struct xarray *xa, unsigned long *indexp, } EXPORT_SYMBOL(xa_find); +static bool xas_sibling(struct xa_state *xas) +{ + struct xa_node *node = xas->xa_node; + unsigned long mask; + + if (!node) + return false; + mask = (XA_CHUNK_SIZE << node->shift) - 1; + return (xas->xa_index & mask) > (xas->xa_offset << node->shift); +} + /** * xa_find_after() - Search the XArray for a present entry. * @xa: XArray. @@ -1847,21 +1865,20 @@ void *xa_find_after(struct xarray *xa, unsigned long *indexp, XA_STATE(xas, xa, *indexp + 1); void *entry; + if (xas.xa_index == 0) + return NULL; + rcu_read_lock(); for (;;) { if ((__force unsigned int)filter < XA_MAX_MARKS) entry = xas_find_marked(&xas, max, filter); else entry = xas_find(&xas, max); - if (xas.xa_node == XAS_BOUNDS) + + if (xas_invalid(&xas)) break; - if (xas.xa_shift) { - if (xas.xa_index & ((1UL << xas.xa_shift) - 1)) - continue; - } else { - if (xas.xa_offset < (xas.xa_index & XA_CHUNK_MASK)) - continue; - } + if (xas_sibling(&xas)) + continue; if (!xas_retry(&xas, entry)) break; } |