summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig.debug102
-rw-r--r--lib/Makefile2
-rw-r--r--lib/crc64.c1
-rw-r--r--lib/debugobjects.c46
-rw-r--r--lib/devres.c20
-rw-r--r--lib/fdt_addresses.c2
-rw-r--r--lib/iov_iter.c3
-rw-r--r--lib/livepatch/test_klp_shadow_vars.c119
-rw-r--r--lib/raid6/algos.c63
-rw-r--r--lib/raid6/mktables.c2
-rw-r--r--lib/raid6/unroll.awk2
-rw-r--r--lib/sbitmap.c2
-rw-r--r--lib/strncpy_from_user.c14
-rw-r--r--lib/strnlen_user.c14
-rw-r--r--lib/test_xarray.c78
-rw-r--r--lib/vdso/Kconfig6
-rw-r--r--lib/vdso/gettimeofday.c205
-rw-r--r--lib/xarray.c41
18 files changed, 473 insertions, 249 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d1842fe756d5..6859f523517b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1025,7 +1025,7 @@ config DEBUG_TIMEKEEPING
config DEBUG_PREEMPT
bool "Debug preemptible kernel"
- depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
+ depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT
default y
help
If you say Y here then the kernel will use a debug variant of the
@@ -1483,6 +1483,55 @@ config PROVIDE_OHCI1394_DMA_INIT
See Documentation/debugging-via-ohci1394.txt for more information.
+source "samples/Kconfig"
+
+config ARCH_HAS_DEVMEM_IS_ALLOWED
+ bool
+
+config STRICT_DEVMEM
+ bool "Filter access to /dev/mem"
+ depends on MMU && DEVMEM
+ depends on ARCH_HAS_DEVMEM_IS_ALLOWED
+ default y if PPC || X86 || ARM64
+ help
+ If this option is disabled, you allow userspace (root) access to all
+ of memory, including kernel and userspace memory. Accidental
+ access to this is obviously disastrous, but specific access can
+ be used by people debugging the kernel. Note that with PAT support
+ enabled, even in this case there are restrictions on /dev/mem
+ use due to the cache aliasing requirements.
+
+ If this option is switched on, and IO_STRICT_DEVMEM=n, the /dev/mem
+ file only allows userspace access to PCI space and the BIOS code and
+ data regions. This is sufficient for dosemu and X and all common
+ users of /dev/mem.
+
+ If in doubt, say Y.
+
+config IO_STRICT_DEVMEM
+ bool "Filter I/O access to /dev/mem"
+ depends on STRICT_DEVMEM
+ help
+ If this option is disabled, you allow userspace (root) access to all
+ io-memory regardless of whether a driver is actively using that
+ range. Accidental access to this is obviously disastrous, but
+ specific access can be used by people debugging kernel drivers.
+
+ If this option is switched on, the /dev/mem file only allows
+ userspace access to *idle* io-memory ranges (see /proc/iomem) This
+ may break traditional users of /dev/mem (dosemu, legacy X, etc...)
+ if the driver using a given range cannot be disabled.
+
+ If in doubt, say Y.
+
+menu "$(SRCARCH) Debugging"
+
+source "arch/$(SRCARCH)/Kconfig.debug"
+
+endmenu
+
+menu "Kernel Testing and Coverage"
+
source "lib/kunit/Kconfig"
config NOTIFIER_ERROR_INJECTION
@@ -1643,10 +1692,6 @@ config FAULT_INJECTION_STACKTRACE_FILTER
help
Provide stacktrace filter for fault-injection capabilities
-endmenu # "Kernel Testing and Coverage"
-
-menu "Kernel Testing and Coverage"
-
config ARCH_HAS_KCOV
bool
help
@@ -2130,52 +2175,7 @@ config MEMTEST
memtest=17, mean do 17 test patterns.
If you are unsure how to answer this question, answer N.
-source "samples/Kconfig"
-
-config ARCH_HAS_DEVMEM_IS_ALLOWED
- bool
-
-config STRICT_DEVMEM
- bool "Filter access to /dev/mem"
- depends on MMU && DEVMEM
- depends on ARCH_HAS_DEVMEM_IS_ALLOWED
- default y if PPC || X86 || ARM64
- ---help---
- If this option is disabled, you allow userspace (root) access to all
- of memory, including kernel and userspace memory. Accidental
- access to this is obviously disastrous, but specific access can
- be used by people debugging the kernel. Note that with PAT support
- enabled, even in this case there are restrictions on /dev/mem
- use due to the cache aliasing requirements.
-
- If this option is switched on, and IO_STRICT_DEVMEM=n, the /dev/mem
- file only allows userspace access to PCI space and the BIOS code and
- data regions. This is sufficient for dosemu and X and all common
- users of /dev/mem.
-
- If in doubt, say Y.
-config IO_STRICT_DEVMEM
- bool "Filter I/O access to /dev/mem"
- depends on STRICT_DEVMEM
- ---help---
- If this option is disabled, you allow userspace (root) access to all
- io-memory regardless of whether a driver is actively using that
- range. Accidental access to this is obviously disastrous, but
- specific access can be used by people debugging kernel drivers.
-
- If this option is switched on, the /dev/mem file only allows
- userspace access to *idle* io-memory ranges (see /proc/iomem) This
- may break traditional users of /dev/mem (dosemu, legacy X, etc...)
- if the driver using a given range cannot be disabled.
-
- If in doubt, say Y.
-
-menu "$(SRCARCH) Debugging"
-
-source "arch/$(SRCARCH)/Kconfig.debug"
-
-endmenu
config HYPERV_TESTING
bool "Microsoft Hyper-V driver testing"
@@ -2184,4 +2184,6 @@ config HYPERV_TESTING
help
Select this option to enable Hyper-V vmbus testing.
+endmenu # "Kernel Testing and Coverage"
+
endmenu # Kernel hacking
diff --git a/lib/Makefile b/lib/Makefile
index 93217d44237f..c20b1debe9b4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -223,7 +223,7 @@ KASAN_SANITIZE_stackdepot.o := n
KCOV_INSTRUMENT_stackdepot.o := n
libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
- fdt_empty_tree.o
+ fdt_empty_tree.o fdt_addresses.o
$(foreach file, $(libfdt_files), \
$(eval CFLAGS_$(file) = -I $(srctree)/scripts/dtc/libfdt))
lib-$(CONFIG_LIBFDT) += $(libfdt_files)
diff --git a/lib/crc64.c b/lib/crc64.c
index 0ef8ae6ac047..f8928ce28280 100644
--- a/lib/crc64.c
+++ b/lib/crc64.c
@@ -28,6 +28,7 @@
#include <linux/module.h>
#include <linux/types.h>
+#include <linux/crc64.h>
#include "crc64table.h"
MODULE_DESCRIPTION("CRC64 calculations");
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 61261195f5b6..48054dbf1b51 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -132,14 +132,18 @@ static void fill_pool(void)
struct debug_obj *obj;
unsigned long flags;
- if (likely(obj_pool_free >= debug_objects_pool_min_level))
+ if (likely(READ_ONCE(obj_pool_free) >= debug_objects_pool_min_level))
return;
/*
* Reuse objs from the global free list; they will be reinitialized
* when allocating.
+ *
+ * Both obj_nr_tofree and obj_pool_free are checked locklessly; the
+ * READ_ONCE()s pair with the WRITE_ONCE()s in pool_lock critical
+ * sections.
*/
- while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) {
+ while (READ_ONCE(obj_nr_tofree) && (READ_ONCE(obj_pool_free) < obj_pool_min_free)) {
raw_spin_lock_irqsave(&pool_lock, flags);
/*
* Recheck with the lock held as the worker thread might have
@@ -148,9 +152,9 @@ static void fill_pool(void)
while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) {
obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
hlist_del(&obj->node);
- obj_nr_tofree--;
+ WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1);
hlist_add_head(&obj->node, &obj_pool);
- obj_pool_free++;
+ WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
}
raw_spin_unlock_irqrestore(&pool_lock, flags);
}
@@ -158,7 +162,7 @@ static void fill_pool(void)
if (unlikely(!obj_cache))
return;
- while (obj_pool_free < debug_objects_pool_min_level) {
+ while (READ_ONCE(obj_pool_free) < debug_objects_pool_min_level) {
struct debug_obj *new[ODEBUG_BATCH_SIZE];
int cnt;
@@ -174,7 +178,7 @@ static void fill_pool(void)
while (cnt) {
hlist_add_head(&new[--cnt]->node, &obj_pool);
debug_objects_allocated++;
- obj_pool_free++;
+ WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
}
raw_spin_unlock_irqrestore(&pool_lock, flags);
}
@@ -236,7 +240,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
obj = __alloc_object(&obj_pool);
if (obj) {
obj_pool_used++;
- obj_pool_free--;
+ WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
/*
* Looking ahead, allocate one batch of debug objects and
@@ -255,7 +259,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
&percpu_pool->free_objs);
percpu_pool->obj_free++;
obj_pool_used++;
- obj_pool_free--;
+ WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
}
}
@@ -309,8 +313,8 @@ static void free_obj_work(struct work_struct *work)
obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
hlist_del(&obj->node);
hlist_add_head(&obj->node, &obj_pool);
- obj_pool_free++;
- obj_nr_tofree--;
+ WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
+ WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1);
}
raw_spin_unlock_irqrestore(&pool_lock, flags);
return;
@@ -324,7 +328,7 @@ free_objs:
if (obj_nr_tofree) {
hlist_move_list(&obj_to_free, &tofree);
debug_objects_freed += obj_nr_tofree;
- obj_nr_tofree = 0;
+ WRITE_ONCE(obj_nr_tofree, 0);
}
raw_spin_unlock_irqrestore(&pool_lock, flags);
@@ -375,10 +379,10 @@ free_to_obj_pool:
obj_pool_used--;
if (work) {
- obj_nr_tofree++;
+ WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1);
hlist_add_head(&obj->node, &obj_to_free);
if (lookahead_count) {
- obj_nr_tofree += lookahead_count;
+ WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + lookahead_count);
obj_pool_used -= lookahead_count;
while (lookahead_count) {
hlist_add_head(&objs[--lookahead_count]->node,
@@ -396,15 +400,15 @@ free_to_obj_pool:
for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
obj = __alloc_object(&obj_pool);
hlist_add_head(&obj->node, &obj_to_free);
- obj_pool_free--;
- obj_nr_tofree++;
+ WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
+ WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1);
}
}
} else {
- obj_pool_free++;
+ WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
hlist_add_head(&obj->node, &obj_pool);
if (lookahead_count) {
- obj_pool_free += lookahead_count;
+ WRITE_ONCE(obj_pool_free, obj_pool_free + lookahead_count);
obj_pool_used -= lookahead_count;
while (lookahead_count) {
hlist_add_head(&objs[--lookahead_count]->node,
@@ -423,7 +427,7 @@ free_to_obj_pool:
static void free_object(struct debug_obj *obj)
{
__free_object(obj);
- if (!obj_freeing && obj_nr_tofree) {
+ if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) {
WRITE_ONCE(obj_freeing, true);
schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
}
@@ -982,7 +986,7 @@ repeat:
debug_objects_maxchecked = objs_checked;
/* Schedule work to actually kmem_cache_free() objects */
- if (!obj_freeing && obj_nr_tofree) {
+ if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) {
WRITE_ONCE(obj_freeing, true);
schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
}
@@ -1008,12 +1012,12 @@ static int debug_stats_show(struct seq_file *m, void *v)
seq_printf(m, "max_checked :%d\n", debug_objects_maxchecked);
seq_printf(m, "warnings :%d\n", debug_objects_warnings);
seq_printf(m, "fixups :%d\n", debug_objects_fixups);
- seq_printf(m, "pool_free :%d\n", obj_pool_free + obj_percpu_free);
+ seq_printf(m, "pool_free :%d\n", READ_ONCE(obj_pool_free) + obj_percpu_free);
seq_printf(m, "pool_pcp_free :%d\n", obj_percpu_free);
seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free);
seq_printf(m, "pool_used :%d\n", obj_pool_used - obj_percpu_free);
seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used);
- seq_printf(m, "on_free_list :%d\n", obj_nr_tofree);
+ seq_printf(m, "on_free_list :%d\n", READ_ONCE(obj_nr_tofree));
seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated);
seq_printf(m, "objs_freed :%d\n", debug_objects_freed);
return 0;
diff --git a/lib/devres.c b/lib/devres.c
index f56070cf970b..6ef51f159c54 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -8,7 +8,6 @@
enum devm_ioremap_type {
DEVM_IOREMAP = 0,
- DEVM_IOREMAP_NC,
DEVM_IOREMAP_UC,
DEVM_IOREMAP_WC,
};
@@ -37,9 +36,6 @@ static void __iomem *__devm_ioremap(struct device *dev, resource_size_t offset,
case DEVM_IOREMAP:
addr = ioremap(offset, size);
break;
- case DEVM_IOREMAP_NC:
- addr = ioremap_nocache(offset, size);
- break;
case DEVM_IOREMAP_UC:
addr = ioremap_uc(offset, size);
break;
@@ -88,22 +84,6 @@ void __iomem *devm_ioremap_uc(struct device *dev, resource_size_t offset,
EXPORT_SYMBOL_GPL(devm_ioremap_uc);
/**
- * devm_ioremap_nocache - Managed ioremap_nocache()
- * @dev: Generic device to remap IO address for
- * @offset: Resource address to map
- * @size: Size of map
- *
- * Managed ioremap_nocache(). Map is automatically unmapped on driver
- * detach.
- */
-void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset,
- resource_size_t size)
-{
- return __devm_ioremap(dev, offset, size, DEVM_IOREMAP_NC);
-}
-EXPORT_SYMBOL(devm_ioremap_nocache);
-
-/**
* devm_ioremap_wc - Managed ioremap_wc()
* @dev: Generic device to remap IO address for
* @offset: Resource address to map
diff --git a/lib/fdt_addresses.c b/lib/fdt_addresses.c
new file mode 100644
index 000000000000..23610bcf390b
--- /dev/null
+++ b/lib/fdt_addresses.c
@@ -0,0 +1,2 @@
+#include <linux/libfdt_env.h>
+#include "../scripts/dtc/libfdt/fdt_addresses.c"
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index fb29c02c6a3c..51595bf3af85 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1222,11 +1222,12 @@ EXPORT_SYMBOL(iov_iter_discard);
unsigned long iov_iter_alignment(const struct iov_iter *i)
{
- unsigned int p_mask = i->pipe->ring_size - 1;
unsigned long res = 0;
size_t size = i->count;
if (unlikely(iov_iter_is_pipe(i))) {
+ unsigned int p_mask = i->pipe->ring_size - 1;
+
if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
return size | i->iov_offset;
return size;
diff --git a/lib/livepatch/test_klp_shadow_vars.c b/lib/livepatch/test_klp_shadow_vars.c
index fe5c413efe96..f0b5a1d24e55 100644
--- a/lib/livepatch/test_klp_shadow_vars.c
+++ b/lib/livepatch/test_klp_shadow_vars.c
@@ -60,36 +60,43 @@ static int ptr_id(void *ptr)
*/
static void *shadow_get(void *obj, unsigned long id)
{
- void *ret = klp_shadow_get(obj, id);
+ int **sv;
+ sv = klp_shadow_get(obj, id);
pr_info("klp_%s(obj=PTR%d, id=0x%lx) = PTR%d\n",
- __func__, ptr_id(obj), id, ptr_id(ret));
+ __func__, ptr_id(obj), id, ptr_id(sv));
- return ret;
+ return sv;
}
static void *shadow_alloc(void *obj, unsigned long id, size_t size,
gfp_t gfp_flags, klp_shadow_ctor_t ctor,
void *ctor_data)
{
- void *ret = klp_shadow_alloc(obj, id, size, gfp_flags, ctor,
- ctor_data);
+ int **var = ctor_data;
+ int **sv;
+
+ sv = klp_shadow_alloc(obj, id, size, gfp_flags, ctor, var);
pr_info("klp_%s(obj=PTR%d, id=0x%lx, size=%zx, gfp_flags=%pGg), ctor=PTR%d, ctor_data=PTR%d = PTR%d\n",
__func__, ptr_id(obj), id, size, &gfp_flags, ptr_id(ctor),
- ptr_id(ctor_data), ptr_id(ret));
- return ret;
+ ptr_id(*var), ptr_id(sv));
+
+ return sv;
}
static void *shadow_get_or_alloc(void *obj, unsigned long id, size_t size,
gfp_t gfp_flags, klp_shadow_ctor_t ctor,
void *ctor_data)
{
- void *ret = klp_shadow_get_or_alloc(obj, id, size, gfp_flags, ctor,
- ctor_data);
+ int **var = ctor_data;
+ int **sv;
+
+ sv = klp_shadow_get_or_alloc(obj, id, size, gfp_flags, ctor, var);
pr_info("klp_%s(obj=PTR%d, id=0x%lx, size=%zx, gfp_flags=%pGg), ctor=PTR%d, ctor_data=PTR%d = PTR%d\n",
__func__, ptr_id(obj), id, size, &gfp_flags, ptr_id(ctor),
- ptr_id(ctor_data), ptr_id(ret));
- return ret;
+ ptr_id(*var), ptr_id(sv));
+
+ return sv;
}
static void shadow_free(void *obj, unsigned long id, klp_shadow_dtor_t dtor)
@@ -110,58 +117,70 @@ static void shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor)
/* Shadow variable constructor - remember simple pointer data */
static int shadow_ctor(void *obj, void *shadow_data, void *ctor_data)
{
- int **shadow_int = shadow_data;
- *shadow_int = ctor_data;
+ int **sv = shadow_data;
+ int **var = ctor_data;
+
+ if (!var)
+ return -EINVAL;
+
+ *sv = *var;
pr_info("%s: PTR%d -> PTR%d\n",
- __func__, ptr_id(shadow_int), ptr_id(ctor_data));
+ __func__, ptr_id(sv), ptr_id(*var));
return 0;
}
static void shadow_dtor(void *obj, void *shadow_data)
{
+ int **sv = shadow_data;
+
pr_info("%s(obj=PTR%d, shadow_data=PTR%d)\n",
- __func__, ptr_id(obj), ptr_id(shadow_data));
+ __func__, ptr_id(obj), ptr_id(sv));
}
static int test_klp_shadow_vars_init(void)
{
void *obj = THIS_MODULE;
int id = 0x1234;
- size_t size = sizeof(int *);
gfp_t gfp_flags = GFP_KERNEL;
int var1, var2, var3, var4;
+ int *pv1, *pv2, *pv3, *pv4;
int **sv1, **sv2, **sv3, **sv4;
- void *ret;
+ int **sv;
+
+ pv1 = &var1;
+ pv2 = &var2;
+ pv3 = &var3;
+ pv4 = &var4;
ptr_id(NULL);
- ptr_id(&var1);
- ptr_id(&var2);
- ptr_id(&var3);
- ptr_id(&var4);
+ ptr_id(pv1);
+ ptr_id(pv2);
+ ptr_id(pv3);
+ ptr_id(pv4);
/*
* With an empty shadow variable hash table, expect not to find
* any matches.
*/
- ret = shadow_get(obj, id);
- if (!ret)
+ sv = shadow_get(obj, id);
+ if (!sv)
pr_info(" got expected NULL result\n");
/*
* Allocate a few shadow variables with different <obj> and <id>.
*/
- sv1 = shadow_alloc(obj, id, size, gfp_flags, shadow_ctor, &var1);
+ sv1 = shadow_alloc(obj, id, sizeof(pv1), gfp_flags, shadow_ctor, &pv1);
if (!sv1)
return -ENOMEM;
- sv2 = shadow_alloc(obj + 1, id, size, gfp_flags, shadow_ctor, &var2);
+ sv2 = shadow_alloc(obj + 1, id, sizeof(pv2), gfp_flags, shadow_ctor, &pv2);
if (!sv2)
return -ENOMEM;
- sv3 = shadow_alloc(obj, id + 1, size, gfp_flags, shadow_ctor, &var3);
+ sv3 = shadow_alloc(obj, id + 1, sizeof(pv3), gfp_flags, shadow_ctor, &pv3);
if (!sv3)
return -ENOMEM;
@@ -169,23 +188,23 @@ static int test_klp_shadow_vars_init(void)
* Verify we can find our new shadow variables and that they point
* to expected data.
*/
- ret = shadow_get(obj, id);
- if (!ret)
+ sv = shadow_get(obj, id);
+ if (!sv)
return -EINVAL;
- if (ret == sv1 && *sv1 == &var1)
+ if (sv == sv1 && *sv1 == pv1)
pr_info(" got expected PTR%d -> PTR%d result\n",
ptr_id(sv1), ptr_id(*sv1));
- ret = shadow_get(obj + 1, id);
- if (!ret)
+ sv = shadow_get(obj + 1, id);
+ if (!sv)
return -EINVAL;
- if (ret == sv2 && *sv2 == &var2)
+ if (sv == sv2 && *sv2 == pv2)
pr_info(" got expected PTR%d -> PTR%d result\n",
ptr_id(sv2), ptr_id(*sv2));
- ret = shadow_get(obj, id + 1);
- if (!ret)
+ sv = shadow_get(obj, id + 1);
+ if (!sv)
return -EINVAL;
- if (ret == sv3 && *sv3 == &var3)
+ if (sv == sv3 && *sv3 == pv3)
pr_info(" got expected PTR%d -> PTR%d result\n",
ptr_id(sv3), ptr_id(*sv3));
@@ -193,14 +212,14 @@ static int test_klp_shadow_vars_init(void)
* Allocate or get a few more, this time with the same <obj>, <id>.
* The second invocation should return the same shadow var.
*/
- sv4 = shadow_get_or_alloc(obj + 2, id, size, gfp_flags, shadow_ctor, &var4);
+ sv4 = shadow_get_or_alloc(obj + 2, id, sizeof(pv4), gfp_flags, shadow_ctor, &pv4);
if (!sv4)
return -ENOMEM;
- ret = shadow_get_or_alloc(obj + 2, id, size, gfp_flags, shadow_ctor, &var4);
- if (!ret)
+ sv = shadow_get_or_alloc(obj + 2, id, sizeof(pv4), gfp_flags, shadow_ctor, &pv4);
+ if (!sv)
return -EINVAL;
- if (ret == sv4 && *sv4 == &var4)
+ if (sv == sv4 && *sv4 == pv4)
pr_info(" got expected PTR%d -> PTR%d result\n",
ptr_id(sv4), ptr_id(*sv4));
@@ -209,27 +228,27 @@ static int test_klp_shadow_vars_init(void)
* longer find them.
*/
shadow_free(obj, id, shadow_dtor); /* sv1 */
- ret = shadow_get(obj, id);
- if (!ret)
+ sv = shadow_get(obj, id);
+ if (!sv)
pr_info(" got expected NULL result\n");
shadow_free(obj + 1, id, shadow_dtor); /* sv2 */
- ret = shadow_get(obj + 1, id);
- if (!ret)
+ sv = shadow_get(obj + 1, id);
+ if (!sv)
pr_info(" got expected NULL result\n");
shadow_free(obj + 2, id, shadow_dtor); /* sv4 */
- ret = shadow_get(obj + 2, id);
- if (!ret)
+ sv = shadow_get(obj + 2, id);
+ if (!sv)
pr_info(" got expected NULL result\n");
/*
* We should still find an <id+1> variable.
*/
- ret = shadow_get(obj, id + 1);
- if (!ret)
+ sv = shadow_get(obj, id + 1);
+ if (!sv)
return -EINVAL;
- if (ret == sv3 && *sv3 == &var3)
+ if (sv == sv3 && *sv3 == pv3)
pr_info(" got expected PTR%d -> PTR%d result\n",
ptr_id(sv3), ptr_id(*sv3));
@@ -237,8 +256,8 @@ static int test_klp_shadow_vars_init(void)
* Free all the <id+1> variables, too.
*/
shadow_free_all(id + 1, shadow_dtor); /* sv3 */
- ret = shadow_get(obj, id);
- if (!ret)
+ sv = shadow_get(obj, id);
+ if (!sv)
pr_info(" shadow_get() got expected NULL result\n");
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 17417eee0866..bf1b4765c8f6 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -124,6 +124,9 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
#define time_before(x, y) ((x) < (y))
#endif
+#define RAID6_TEST_DISKS 8
+#define RAID6_TEST_DISKS_ORDER 3
+
static inline const struct raid6_recov_calls *raid6_choose_recov(void)
{
const struct raid6_recov_calls *const *algo;
@@ -146,7 +149,7 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
}
static inline const struct raid6_calls *raid6_choose_gen(
- void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
+ void *(*const dptrs)[RAID6_TEST_DISKS], const int disks)
{
unsigned long perf, bestgenperf, bestxorperf, j0, j1;
int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */
@@ -181,7 +184,8 @@ static inline const struct raid6_calls *raid6_choose_gen(
best = *algo;
}
pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
- (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+ (perf * HZ * (disks-2)) >>
+ (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
if (!(*algo)->xor_syndrome)
continue;
@@ -204,17 +208,24 @@ static inline const struct raid6_calls *raid6_choose_gen(
bestxorperf = perf;
pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
- (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
+ (perf * HZ * (disks-2)) >>
+ (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
}
}
if (best) {
- pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
- best->name,
- (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
- if (best->xor_syndrome)
- pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
- (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
+ if (IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) {
+ pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
+ best->name,
+ (bestgenperf * HZ * (disks-2)) >>
+ (20 - PAGE_SHIFT+RAID6_TIME_JIFFIES_LG2));
+ if (best->xor_syndrome)
+ pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
+ (bestxorperf * HZ * (disks-2)) >>
+ (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
+ } else
+ pr_info("raid6: skip pq benchmark and using algorithm %s\n",
+ best->name);
raid6_call = *best;
} else
pr_err("raid6: Yikes! No algorithm found!\n");
@@ -228,27 +239,33 @@ static inline const struct raid6_calls *raid6_choose_gen(
int __init raid6_select_algo(void)
{
- const int disks = (65536/PAGE_SIZE)+2;
+ const int disks = RAID6_TEST_DISKS;
const struct raid6_calls *gen_best;
const struct raid6_recov_calls *rec_best;
- char *syndromes;
- void *dptrs[(65536/PAGE_SIZE)+2];
- int i;
-
- for (i = 0; i < disks-2; i++)
- dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
-
- /* Normal code - use a 2-page allocation to avoid D$ conflict */
- syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
+ char *disk_ptr, *p;
+ void *dptrs[RAID6_TEST_DISKS];
+ int i, cycle;
- if (!syndromes) {
+ /* prepare the buffer and fill it circularly with gfmul table */
+ disk_ptr = (char *)__get_free_pages(GFP_KERNEL, RAID6_TEST_DISKS_ORDER);
+ if (!disk_ptr) {
pr_err("raid6: Yikes! No memory available.\n");
return -ENOMEM;
}
- dptrs[disks-2] = syndromes;
- dptrs[disks-1] = syndromes + PAGE_SIZE;
+ p = disk_ptr;
+ for (i = 0; i < disks; i++)
+ dptrs[i] = p + PAGE_SIZE * i;
+
+ cycle = ((disks - 2) * PAGE_SIZE) / 65536;
+ for (i = 0; i < cycle; i++) {
+ memcpy(p, raid6_gfmul, 65536);
+ p += 65536;
+ }
+
+ if ((disks - 2) * PAGE_SIZE % 65536)
+ memcpy(p, raid6_gfmul, (disks - 2) * PAGE_SIZE % 65536);
/* select raid gen_syndrome function */
gen_best = raid6_choose_gen(&dptrs, disks);
@@ -256,7 +273,7 @@ int __init raid6_select_algo(void)
/* select raid recover functions */
rec_best = raid6_choose_recov();
- free_pages((unsigned long)syndromes, 1);
+ free_pages((unsigned long)disk_ptr, RAID6_TEST_DISKS_ORDER);
return gen_best && rec_best ? 0 : -EINVAL;
}
diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c
index 9c485df1308f..f02e10fa6238 100644
--- a/lib/raid6/mktables.c
+++ b/lib/raid6/mktables.c
@@ -56,8 +56,8 @@ int main(int argc, char *argv[])
uint8_t v;
uint8_t exptbl[256], invtbl[256];
- printf("#include <linux/raid/pq.h>\n");
printf("#include <linux/export.h>\n");
+ printf("#include <linux/raid/pq.h>\n");
/* Compute multiplication table */
printf("\nconst u8 __attribute__((aligned(256)))\n"
diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk
index c6aa03631df8..0809805a7e23 100644
--- a/lib/raid6/unroll.awk
+++ b/lib/raid6/unroll.awk
@@ -13,7 +13,7 @@ BEGIN {
for (i = 0; i < rep; ++i) {
tmp = $0
gsub(/\$\$/, i, tmp)
- gsub(/\$\#/, n, tmp)
+ gsub(/\$#/, n, tmp)
gsub(/\$\*/, "$", tmp)
print tmp
}
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 33feec8989f1..af88d1346dd7 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -650,8 +650,8 @@ void sbitmap_add_wait_queue(struct sbitmap_queue *sbq,
if (!sbq_wait->sbq) {
sbq_wait->sbq = sbq;
atomic_inc(&sbq->ws_active);
+ add_wait_queue(&ws->wait, &sbq_wait->wait);
}
- add_wait_queue(&ws->wait, &sbq_wait->wait);
}
EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue);
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index dccb95af6003..706020b06617 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -30,13 +30,6 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src,
const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
unsigned long res = 0;
- /*
- * Truncate 'max' to the user-specified limit, so that
- * we only have one limit we need to check in the loop
- */
- if (max > count)
- max = count;
-
if (IS_UNALIGNED(src, dst))
goto byte_at_a_time;
@@ -114,6 +107,13 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
unsigned long max = max_addr - src_addr;
long retval;
+ /*
+ * Truncate 'max' to the user-specified limit, so that
+ * we only have one limit we need to check in the loop
+ */
+ if (max > count)
+ max = count;
+
kasan_check_write(dst, count);
check_object_size(dst, count, false);
if (user_access_begin(src, max)) {
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 6c0005d5dd5c..41670d4a5816 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -27,13 +27,6 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
unsigned long c;
/*
- * Truncate 'max' to the user-specified limit, so that
- * we only have one limit we need to check in the loop
- */
- if (max > count)
- max = count;
-
- /*
* Do everything aligned. But that means that we
* need to also expand the maximum..
*/
@@ -109,6 +102,13 @@ long strnlen_user(const char __user *str, long count)
unsigned long max = max_addr - src_addr;
long retval;
+ /*
+ * Truncate 'max' to the user-specified limit, so that
+ * we only have one limit we need to check in the loop
+ */
+ if (max > count)
+ max = count;
+
if (user_access_begin(str, max)) {
retval = do_strnlen_user(str, count, max);
user_access_end();
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 7df4f7f395bf..55c14e8c8859 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -2,6 +2,7 @@
/*
* test_xarray.c: Test the XArray API
* Copyright (c) 2017-2018 Microsoft Corporation
+ * Copyright (c) 2019-2020 Oracle
* Author: Matthew Wilcox <willy@infradead.org>
*/
@@ -902,28 +903,34 @@ static noinline void check_store_iter(struct xarray *xa)
XA_BUG_ON(xa, !xa_empty(xa));
}
-static noinline void check_multi_find(struct xarray *xa)
+static noinline void check_multi_find_1(struct xarray *xa, unsigned order)
{
#ifdef CONFIG_XARRAY_MULTI
+ unsigned long multi = 3 << order;
+ unsigned long next = 4 << order;
unsigned long index;
- xa_store_order(xa, 12, 2, xa_mk_value(12), GFP_KERNEL);
- XA_BUG_ON(xa, xa_store_index(xa, 16, GFP_KERNEL) != NULL);
+ xa_store_order(xa, multi, order, xa_mk_value(multi), GFP_KERNEL);
+ XA_BUG_ON(xa, xa_store_index(xa, next, GFP_KERNEL) != NULL);
+ XA_BUG_ON(xa, xa_store_index(xa, next + 1, GFP_KERNEL) != NULL);
index = 0;
XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) !=
- xa_mk_value(12));
- XA_BUG_ON(xa, index != 12);
- index = 13;
+ xa_mk_value(multi));
+ XA_BUG_ON(xa, index != multi);
+ index = multi + 1;
XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) !=
- xa_mk_value(12));
- XA_BUG_ON(xa, (index < 12) || (index >= 16));
+ xa_mk_value(multi));
+ XA_BUG_ON(xa, (index < multi) || (index >= next));
XA_BUG_ON(xa, xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT) !=
- xa_mk_value(16));
- XA_BUG_ON(xa, index != 16);
-
- xa_erase_index(xa, 12);
- xa_erase_index(xa, 16);
+ xa_mk_value(next));
+ XA_BUG_ON(xa, index != next);
+ XA_BUG_ON(xa, xa_find_after(xa, &index, next, XA_PRESENT) != NULL);
+ XA_BUG_ON(xa, index != next);
+
+ xa_erase_index(xa, multi);
+ xa_erase_index(xa, next);
+ xa_erase_index(xa, next + 1);
XA_BUG_ON(xa, !xa_empty(xa));
#endif
}
@@ -1046,12 +1053,33 @@ static noinline void check_find_3(struct xarray *xa)
xa_destroy(xa);
}
+static noinline void check_find_4(struct xarray *xa)
+{
+ unsigned long index = 0;
+ void *entry;
+
+ xa_store_index(xa, ULONG_MAX, GFP_KERNEL);
+
+ entry = xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT);
+ XA_BUG_ON(xa, entry != xa_mk_index(ULONG_MAX));
+
+ entry = xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT);
+ XA_BUG_ON(xa, entry);
+
+ xa_erase_index(xa, ULONG_MAX);
+}
+
static noinline void check_find(struct xarray *xa)
{
+ unsigned i;
+
check_find_1(xa);
check_find_2(xa);
check_find_3(xa);
- check_multi_find(xa);
+ check_find_4(xa);
+
+ for (i = 2; i < 10; i++)
+ check_multi_find_1(xa, i);
check_multi_find_2(xa);
}
@@ -1132,6 +1160,27 @@ static noinline void check_move_tiny(struct xarray *xa)
XA_BUG_ON(xa, !xa_empty(xa));
}
+static noinline void check_move_max(struct xarray *xa)
+{
+ XA_STATE(xas, xa, 0);
+
+ xa_store_index(xa, ULONG_MAX, GFP_KERNEL);
+ rcu_read_lock();
+ XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != xa_mk_index(ULONG_MAX));
+ XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != NULL);
+ rcu_read_unlock();
+
+ xas_set(&xas, 0);
+ rcu_read_lock();
+ XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != xa_mk_index(ULONG_MAX));
+ xas_pause(&xas);
+ XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != NULL);
+ rcu_read_unlock();
+
+ xa_erase_index(xa, ULONG_MAX);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
static noinline void check_move_small(struct xarray *xa, unsigned long idx)
{
XA_STATE(xas, xa, 0);
@@ -1240,6 +1289,7 @@ static noinline void check_move(struct xarray *xa)
xa_destroy(xa);
check_move_tiny(xa);
+ check_move_max(xa);
for (i = 0; i < 16; i++)
check_move_small(xa, 1UL << i);
diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig
index 9fe698ff62ec..d883ac299508 100644
--- a/lib/vdso/Kconfig
+++ b/lib/vdso/Kconfig
@@ -24,4 +24,10 @@ config GENERIC_COMPAT_VDSO
help
This config option enables the compat VDSO layer.
+config GENERIC_VDSO_TIME_NS
+ bool
+ help
+ Selected by architectures which support time namespaces in the
+ VDSO
+
endif
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index 9ecfd3b547ba..f8b8ec5e63ac 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -38,12 +38,22 @@ u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
}
#endif
-static int do_hres(const struct vdso_data *vd, clockid_t clk,
- struct __kernel_timespec *ts)
+#ifdef CONFIG_TIME_NS
+static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
+ struct __kernel_timespec *ts)
{
- const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
- u64 cycles, last, sec, ns;
+ const struct vdso_data *vd = __arch_get_timens_vdso_data();
+ const struct timens_offset *offs = &vdns->offset[clk];
+ const struct vdso_timestamp *vdso_ts;
+ u64 cycles, last, ns;
u32 seq;
+ s64 sec;
+
+ if (clk != CLOCK_MONOTONIC_RAW)
+ vd = &vd[CS_HRES_COARSE];
+ else
+ vd = &vd[CS_RAW];
+ vdso_ts = &vd->basetime[clk];
do {
seq = vdso_read_begin(vd);
@@ -58,6 +68,10 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk,
sec = vdso_ts->sec;
} while (unlikely(vdso_read_retry(vd, seq)));
+ /* Add the namespace offset */
+ sec += offs->sec;
+ ns += offs->nsec;
+
/*
* Do this outside the loop: a race inside the loop could result
* in __iter_div_u64_rem() being extremely slow.
@@ -67,18 +81,128 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk,
return 0;
}
+#else
+static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
+{
+ return NULL;
+}
+
+static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
+ struct __kernel_timespec *ts)
+{
+ return -EINVAL;
+}
+#endif
-static void do_coarse(const struct vdso_data *vd, clockid_t clk,
- struct __kernel_timespec *ts)
+static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk,
+ struct __kernel_timespec *ts)
{
const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
+ u64 cycles, last, sec, ns;
u32 seq;
do {
+ /*
+ * Open coded to handle VCLOCK_TIMENS. Time namespace
+ * enabled tasks have a special VVAR page installed which
+ * has vd->seq set to 1 and vd->clock_mode set to
+ * VCLOCK_TIMENS. For non time namespace affected tasks
+ * this does not affect performance because if vd->seq is
+ * odd, i.e. a concurrent update is in progress the extra
+ * check for vd->clock_mode is just a few extra
+ * instructions while spin waiting for vd->seq to become
+ * even again.
+ */
+ while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) {
+ if (IS_ENABLED(CONFIG_TIME_NS) &&
+ vd->clock_mode == VCLOCK_TIMENS)
+ return do_hres_timens(vd, clk, ts);
+ cpu_relax();
+ }
+ smp_rmb();
+
+ cycles = __arch_get_hw_counter(vd->clock_mode);
+ ns = vdso_ts->nsec;
+ last = vd->cycle_last;
+ if (unlikely((s64)cycles < 0))
+ return -1;
+
+ ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
+ ns >>= vd->shift;
+ sec = vdso_ts->sec;
+ } while (unlikely(vdso_read_retry(vd, seq)));
+
+ /*
+ * Do this outside the loop: a race inside the loop could result
+ * in __iter_div_u64_rem() being extremely slow.
+ */
+ ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+ ts->tv_nsec = ns;
+
+ return 0;
+}
+
+#ifdef CONFIG_TIME_NS
+static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
+ struct __kernel_timespec *ts)
+{
+ const struct vdso_data *vd = __arch_get_timens_vdso_data();
+ const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
+ const struct timens_offset *offs = &vdns->offset[clk];
+ u64 nsec;
+ s64 sec;
+ s32 seq;
+
+ do {
seq = vdso_read_begin(vd);
+ sec = vdso_ts->sec;
+ nsec = vdso_ts->nsec;
+ } while (unlikely(vdso_read_retry(vd, seq)));
+
+ /* Add the namespace offset */
+ sec += offs->sec;
+ nsec += offs->nsec;
+
+ /*
+ * Do this outside the loop: a race inside the loop could result
+ * in __iter_div_u64_rem() being extremely slow.
+ */
+ ts->tv_sec = sec + __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec);
+ ts->tv_nsec = nsec;
+ return 0;
+}
+#else
+static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
+ struct __kernel_timespec *ts)
+{
+ return -1;
+}
+#endif
+
+static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk,
+ struct __kernel_timespec *ts)
+{
+ const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
+ u32 seq;
+
+ do {
+ /*
+ * Open coded to handle VCLOCK_TIMENS. See comment in
+ * do_hres().
+ */
+ while ((seq = READ_ONCE(vd->seq)) & 1) {
+ if (IS_ENABLED(CONFIG_TIME_NS) &&
+ vd->clock_mode == VCLOCK_TIMENS)
+ return do_coarse_timens(vd, clk, ts);
+ cpu_relax();
+ }
+ smp_rmb();
+
ts->tv_sec = vdso_ts->sec;
ts->tv_nsec = vdso_ts->nsec;
} while (unlikely(vdso_read_retry(vd, seq)));
+
+ return 0;
}
static __maybe_unused int
@@ -96,15 +220,16 @@ __cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts)
* clocks are handled in the VDSO directly.
*/
msk = 1U << clock;
- if (likely(msk & VDSO_HRES)) {
- return do_hres(&vd[CS_HRES_COARSE], clock, ts);
- } else if (msk & VDSO_COARSE) {
- do_coarse(&vd[CS_HRES_COARSE], clock, ts);
- return 0;
- } else if (msk & VDSO_RAW) {
- return do_hres(&vd[CS_RAW], clock, ts);
- }
- return -1;
+ if (likely(msk & VDSO_HRES))
+ vd = &vd[CS_HRES_COARSE];
+ else if (msk & VDSO_COARSE)
+ return do_coarse(&vd[CS_HRES_COARSE], clock, ts);
+ else if (msk & VDSO_RAW)
+ vd = &vd[CS_RAW];
+ else
+ return -1;
+
+ return do_hres(vd, clock, ts);
}
static __maybe_unused int
@@ -117,6 +242,7 @@ __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
return 0;
}
+#ifdef BUILD_VDSO32
static __maybe_unused int
__cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
{
@@ -125,20 +251,16 @@ __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
ret = __cvdso_clock_gettime_common(clock, &ts);
-#ifdef VDSO_HAS_32BIT_FALLBACK
if (unlikely(ret))
return clock_gettime32_fallback(clock, res);
-#else
- if (unlikely(ret))
- ret = clock_gettime_fallback(clock, &ts);
-#endif
- if (likely(!ret)) {
- res->tv_sec = ts.tv_sec;
- res->tv_nsec = ts.tv_nsec;
- }
+ /* For ret == 0 */
+ res->tv_sec = ts.tv_sec;
+ res->tv_nsec = ts.tv_nsec;
+
return ret;
}
+#endif /* BUILD_VDSO32 */
static __maybe_unused int
__cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
@@ -156,6 +278,10 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
}
if (unlikely(tz != NULL)) {
+ if (IS_ENABLED(CONFIG_TIME_NS) &&
+ vd->clock_mode == VCLOCK_TIMENS)
+ vd = __arch_get_timens_vdso_data();
+
tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest;
tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime;
}
@@ -167,7 +293,12 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time)
{
const struct vdso_data *vd = __arch_get_vdso_data();
- __kernel_old_time_t t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec);
+ __kernel_old_time_t t;
+
+ if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VCLOCK_TIMENS)
+ vd = __arch_get_timens_vdso_data();
+
+ t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec);
if (time)
*time = t;
@@ -181,7 +312,6 @@ static __maybe_unused
int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res)
{
const struct vdso_data *vd = __arch_get_vdso_data();
- u64 hrtimer_res;
u32 msk;
u64 ns;
@@ -189,27 +319,24 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res)
if (unlikely((u32) clock >= MAX_CLOCKS))
return -1;
- hrtimer_res = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res);
+ if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VCLOCK_TIMENS)
+ vd = __arch_get_timens_vdso_data();
+
/*
* Convert the clockid to a bitmask and use it to check which
* clocks are handled in the VDSO directly.
*/
msk = 1U << clock;
- if (msk & VDSO_HRES) {
+ if (msk & (VDSO_HRES | VDSO_RAW)) {
/*
* Preserves the behaviour of posix_get_hrtimer_res().
*/
- ns = hrtimer_res;
+ ns = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res);
} else if (msk & VDSO_COARSE) {
/*
* Preserves the behaviour of posix_get_coarse_res().
*/
ns = LOW_RES_NSEC;
- } else if (msk & VDSO_RAW) {
- /*
- * Preserves the behaviour of posix_get_hrtimer_res().
- */
- ns = hrtimer_res;
} else {
return -1;
}
@@ -221,6 +348,7 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res)
return 0;
}
+static __maybe_unused
int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
{
int ret = __cvdso_clock_getres_common(clock, res);
@@ -230,6 +358,7 @@ int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
return 0;
}
+#ifdef BUILD_VDSO32
static __maybe_unused int
__cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res)
{
@@ -238,18 +367,14 @@ __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res)
ret = __cvdso_clock_getres_common(clock, &ts);
-#ifdef VDSO_HAS_32BIT_FALLBACK
if (unlikely(ret))
return clock_getres32_fallback(clock, res);
-#else
- if (unlikely(ret))
- ret = clock_getres_fallback(clock, &ts);
-#endif
- if (likely(!ret && res)) {
+ if (likely(res)) {
res->tv_sec = ts.tv_sec;
res->tv_nsec = ts.tv_nsec;
}
return ret;
}
+#endif /* BUILD_VDSO32 */
#endif /* VDSO_HAS_CLOCK_GETRES */
diff --git a/lib/xarray.c b/lib/xarray.c
index 1237c213f52b..1d9fab7db8da 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* XArray implementation
- * Copyright (c) 2017 Microsoft Corporation
+ * Copyright (c) 2017-2018 Microsoft Corporation
+ * Copyright (c) 2018-2020 Oracle
* Author: Matthew Wilcox <willy@infradead.org>
*/
@@ -967,6 +968,7 @@ void xas_pause(struct xa_state *xas)
if (xas_invalid(xas))
return;
+ xas->xa_node = XAS_RESTART;
if (node) {
unsigned int offset = xas->xa_offset;
while (++offset < XA_CHUNK_SIZE) {
@@ -974,10 +976,11 @@ void xas_pause(struct xa_state *xas)
break;
}
xas->xa_index += (offset - xas->xa_offset) << node->shift;
+ if (xas->xa_index == 0)
+ xas->xa_node = XAS_BOUNDS;
} else {
xas->xa_index++;
}
- xas->xa_node = XAS_RESTART;
}
EXPORT_SYMBOL_GPL(xas_pause);
@@ -1079,13 +1082,15 @@ void *xas_find(struct xa_state *xas, unsigned long max)
{
void *entry;
- if (xas_error(xas))
+ if (xas_error(xas) || xas->xa_node == XAS_BOUNDS)
return NULL;
+ if (xas->xa_index > max)
+ return set_bounds(xas);
if (!xas->xa_node) {
xas->xa_index = 1;
return set_bounds(xas);
- } else if (xas_top(xas->xa_node)) {
+ } else if (xas->xa_node == XAS_RESTART) {
entry = xas_load(xas);
if (entry || xas_not_node(xas->xa_node))
return entry;
@@ -1150,6 +1155,8 @@ void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark)
if (xas_error(xas))
return NULL;
+ if (xas->xa_index > max)
+ goto max;
if (!xas->xa_node) {
xas->xa_index = 1;
@@ -1824,6 +1831,17 @@ void *xa_find(struct xarray *xa, unsigned long *indexp,
}
EXPORT_SYMBOL(xa_find);
+static bool xas_sibling(struct xa_state *xas)
+{
+ struct xa_node *node = xas->xa_node;
+ unsigned long mask;
+
+ if (!node)
+ return false;
+ mask = (XA_CHUNK_SIZE << node->shift) - 1;
+ return (xas->xa_index & mask) > (xas->xa_offset << node->shift);
+}
+
/**
* xa_find_after() - Search the XArray for a present entry.
* @xa: XArray.
@@ -1847,21 +1865,20 @@ void *xa_find_after(struct xarray *xa, unsigned long *indexp,
XA_STATE(xas, xa, *indexp + 1);
void *entry;
+ if (xas.xa_index == 0)
+ return NULL;
+
rcu_read_lock();
for (;;) {
if ((__force unsigned int)filter < XA_MAX_MARKS)
entry = xas_find_marked(&xas, max, filter);
else
entry = xas_find(&xas, max);
- if (xas.xa_node == XAS_BOUNDS)
+
+ if (xas_invalid(&xas))
break;
- if (xas.xa_shift) {
- if (xas.xa_index & ((1UL << xas.xa_shift) - 1))
- continue;
- } else {
- if (xas.xa_offset < (xas.xa_index & XA_CHUNK_MASK))
- continue;
- }
+ if (xas_sibling(&xas))
+ continue;
if (!xas_retry(&xas, entry))
break;
}