summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2017-07-03 16:54:58 -0700
committerDan Williams <dan.j.williams@intel.com>2017-07-03 16:54:58 -0700
commit9d92573fff3ec70785ef1815cc80573f70e7a921 (patch)
treebce6e6bbad56f805d1adcebddabf9dd9e8072ce4 /arch
parent2de5148ffb12ff6b4088125f44818771e78e6830 (diff)
parent0b277961f4484fb3f142caaa1dd1748cb0b2cbee (diff)
downloadlinux-9d92573fff3ec70785ef1815cc80573f70e7a921.tar.bz2
Merge branch 'for-4.13/dax' into libnvdimm-for-next
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/sysdev/axonram.c8
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/pmem.h136
-rw-r--r--arch/x86/include/asm/string_64.h5
-rw-r--r--arch/x86/include/asm/uaccess_64.h11
-rw-r--r--arch/x86/lib/usercopy_64.c134
-rw-r--r--arch/x86/mm/pageattr.c6
7 files changed, 165 insertions, 136 deletions
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index a7fe5fee744f..2799706106c6 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -45,6 +45,7 @@
#include <linux/of_device.h>
#include <linux/of_platform.h>
#include <linux/pfn_t.h>
+#include <linux/uio.h>
#include <asm/page.h>
#include <asm/prom.h>
@@ -163,8 +164,15 @@ axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pa
return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn);
}
+static size_t axon_ram_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+ void *addr, size_t bytes, struct iov_iter *i)
+{
+ return copy_from_iter(addr, bytes, i);
+}
+
static const struct dax_operations axon_ram_dax_ops = {
.direct_access = axon_ram_dax_direct_access,
+ .copy_from_iter = axon_ram_copy_from_iter,
};
/**
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4ccfacc7232a..bb273b2f50b5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -54,6 +54,7 @@ config X86
select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_MMIO_FLUSH
select ARCH_HAS_PMEM_API if X86_64
+ select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
deleted file mode 100644
index 0ff8fe71b255..000000000000
--- a/arch/x86/include/asm/pmem.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright(c) 2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#ifndef __ASM_X86_PMEM_H__
-#define __ASM_X86_PMEM_H__
-
-#include <linux/uaccess.h>
-#include <asm/cacheflush.h>
-#include <asm/cpufeature.h>
-#include <asm/special_insns.h>
-
-#ifdef CONFIG_ARCH_HAS_PMEM_API
-/**
- * arch_memcpy_to_pmem - copy data to persistent memory
- * @dst: destination buffer for the copy
- * @src: source buffer for the copy
- * @n: length of the copy in bytes
- *
- * Copy data to persistent memory media via non-temporal stores so that
- * a subsequent pmem driver flush operation will drain posted write queues.
- */
-static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
-{
- int rem;
-
- /*
- * We are copying between two kernel buffers, if
- * __copy_from_user_inatomic_nocache() returns an error (page
- * fault) we would have already reported a general protection fault
- * before the WARN+BUG.
- */
- rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n);
- if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n",
- __func__, dst, src, rem))
- BUG();
-}
-
-/**
- * arch_wb_cache_pmem - write back a cache range with CLWB
- * @vaddr: virtual start address
- * @size: number of bytes to write back
- *
- * Write back a cache range using the CLWB (cache line write back)
- * instruction. Note that @size is internally rounded up to be cache
- * line size aligned.
- */
-static inline void arch_wb_cache_pmem(void *addr, size_t size)
-{
- u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
- unsigned long clflush_mask = x86_clflush_size - 1;
- void *vend = addr + size;
- void *p;
-
- for (p = (void *)((unsigned long)addr & ~clflush_mask);
- p < vend; p += x86_clflush_size)
- clwb(p);
-}
-
-/**
- * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
- * @addr: PMEM destination address
- * @bytes: number of bytes to copy
- * @i: iterator with source data
- *
- * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
- */
-static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
- struct iov_iter *i)
-{
- size_t len;
-
- /* TODO: skip the write-back by always using non-temporal stores */
- len = copy_from_iter_nocache(addr, bytes, i);
-
- /*
- * In the iovec case on x86_64 copy_from_iter_nocache() uses
- * non-temporal stores for the bulk of the transfer, but we need
- * to manually flush if the transfer is unaligned. A cached
- * memory copy is used when destination or size is not naturally
- * aligned. That is:
- * - Require 8-byte alignment when size is 8 bytes or larger.
- * - Require 4-byte alignment when size is 4 bytes.
- *
- * In the non-iovec case the entire destination needs to be
- * flushed.
- */
- if (iter_is_iovec(i)) {
- unsigned long flushed, dest = (unsigned long) addr;
-
- if (bytes < 8) {
- if (!IS_ALIGNED(dest, 4) || (bytes != 4))
- arch_wb_cache_pmem(addr, bytes);
- } else {
- if (!IS_ALIGNED(dest, 8)) {
- dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
- arch_wb_cache_pmem(addr, 1);
- }
-
- flushed = dest - (unsigned long) addr;
- if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8))
- arch_wb_cache_pmem(addr + bytes - 1, 1);
- }
- } else
- arch_wb_cache_pmem(addr, bytes);
-
- return len;
-}
-
-/**
- * arch_clear_pmem - zero a PMEM memory range
- * @addr: virtual start address
- * @size: number of bytes to zero
- *
- * Write zeros into the memory range starting at 'addr' for 'size' bytes.
- */
-static inline void arch_clear_pmem(void *addr, size_t size)
-{
- memset(addr, 0, size);
- arch_wb_cache_pmem(addr, size);
-}
-
-static inline void arch_invalidate_pmem(void *addr, size_t size)
-{
- clflush_cache_range(addr, size);
-}
-#endif /* CONFIG_ARCH_HAS_PMEM_API */
-#endif /* __ASM_X86_PMEM_H__ */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 733bae07fb29..1f22bc277c45 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -109,6 +109,11 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt)
return 0;
}
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
+void memcpy_flushcache(void *dst, const void *src, size_t cnt);
+#endif
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_STRING_64_H */
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index c5504b9a472e..b16f6a1d8b26 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -171,6 +171,10 @@ unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigne
extern long __copy_user_nocache(void *dst, const void __user *src,
unsigned size, int zerorest);
+extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size);
+extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
+ size_t len);
+
static inline int
__copy_from_user_inatomic_nocache(void *dst, const void __user *src,
unsigned size)
@@ -179,6 +183,13 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
return __copy_user_nocache(dst, src, size, 0);
}
+static inline int
+__copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
+{
+ kasan_check_write(dst, size);
+ return __copy_user_flushcache(dst, src, size);
+}
+
unsigned long
copy_user_handle_tail(char *to, char *from, unsigned len);
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 3b7c40a2e3e1..75d3776123cc 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -7,6 +7,7 @@
*/
#include <linux/export.h>
#include <linux/uaccess.h>
+#include <linux/highmem.h>
/*
* Zero Userspace
@@ -73,3 +74,136 @@ copy_user_handle_tail(char *to, char *from, unsigned len)
clac();
return len;
}
+
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+/**
+ * clean_cache_range - write back a cache range with CLWB
+ * @vaddr: virtual start address
+ * @size: number of bytes to write back
+ *
+ * Write back a cache range using the CLWB (cache line write back)
+ * instruction. Note that @size is internally rounded up to be cache
+ * line size aligned.
+ */
+static void clean_cache_range(void *addr, size_t size)
+{
+ u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
+ unsigned long clflush_mask = x86_clflush_size - 1;
+ void *vend = addr + size;
+ void *p;
+
+ for (p = (void *)((unsigned long)addr & ~clflush_mask);
+ p < vend; p += x86_clflush_size)
+ clwb(p);
+}
+
+void arch_wb_cache_pmem(void *addr, size_t size)
+{
+ clean_cache_range(addr, size);
+}
+EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
+
+long __copy_user_flushcache(void *dst, const void __user *src, unsigned size)
+{
+ unsigned long flushed, dest = (unsigned long) dst;
+ long rc = __copy_user_nocache(dst, src, size, 0);
+
+ /*
+ * __copy_user_nocache() uses non-temporal stores for the bulk
+ * of the transfer, but we need to manually flush if the
+ * transfer is unaligned. A cached memory copy is used when
+ * destination or size is not naturally aligned. That is:
+ * - Require 8-byte alignment when size is 8 bytes or larger.
+ * - Require 4-byte alignment when size is 4 bytes.
+ */
+ if (size < 8) {
+ if (!IS_ALIGNED(dest, 4) || size != 4)
+ clean_cache_range(dst, 1);
+ } else {
+ if (!IS_ALIGNED(dest, 8)) {
+ dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
+ clean_cache_range(dst, 1);
+ }
+
+ flushed = dest - (unsigned long) dst;
+ if (size > flushed && !IS_ALIGNED(size - flushed, 8))
+ clean_cache_range(dst + size - 1, 1);
+ }
+
+ return rc;
+}
+
+void memcpy_flushcache(void *_dst, const void *_src, size_t size)
+{
+ unsigned long dest = (unsigned long) _dst;
+ unsigned long source = (unsigned long) _src;
+
+ /* cache copy and flush to align dest */
+ if (!IS_ALIGNED(dest, 8)) {
+ unsigned len = min_t(unsigned, size, ALIGN(dest, 8) - dest);
+
+ memcpy((void *) dest, (void *) source, len);
+ clean_cache_range((void *) dest, len);
+ dest += len;
+ source += len;
+ size -= len;
+ if (!size)
+ return;
+ }
+
+ /* 4x8 movnti loop */
+ while (size >= 32) {
+ asm("movq (%0), %%r8\n"
+ "movq 8(%0), %%r9\n"
+ "movq 16(%0), %%r10\n"
+ "movq 24(%0), %%r11\n"
+ "movnti %%r8, (%1)\n"
+ "movnti %%r9, 8(%1)\n"
+ "movnti %%r10, 16(%1)\n"
+ "movnti %%r11, 24(%1)\n"
+ :: "r" (source), "r" (dest)
+ : "memory", "r8", "r9", "r10", "r11");
+ dest += 32;
+ source += 32;
+ size -= 32;
+ }
+
+ /* 1x8 movnti loop */
+ while (size >= 8) {
+ asm("movq (%0), %%r8\n"
+ "movnti %%r8, (%1)\n"
+ :: "r" (source), "r" (dest)
+ : "memory", "r8");
+ dest += 8;
+ source += 8;
+ size -= 8;
+ }
+
+ /* 1x4 movnti loop */
+ while (size >= 4) {
+ asm("movl (%0), %%r8d\n"
+ "movnti %%r8d, (%1)\n"
+ :: "r" (source), "r" (dest)
+ : "memory", "r8");
+ dest += 4;
+ source += 4;
+ size -= 4;
+ }
+
+ /* cache copy for remaining bytes */
+ if (size) {
+ memcpy((void *) dest, (void *) source, size);
+ clean_cache_range((void *) dest, size);
+ }
+}
+EXPORT_SYMBOL_GPL(memcpy_flushcache);
+
+void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
+ size_t len)
+{
+ char *from = kmap_atomic(page);
+
+ memcpy_flushcache(to, from + offset, len);
+ kunmap_atomic(from);
+}
+#endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index c8520b2c62d2..757b0bcdf712 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -150,6 +150,12 @@ void clflush_cache_range(void *vaddr, unsigned int size)
}
EXPORT_SYMBOL_GPL(clflush_cache_range);
+void arch_invalidate_pmem(void *addr, size_t size)
+{
+ clflush_cache_range(addr, size);
+}
+EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+
static void __cpa_flush_all(void *arg)
{
unsigned long cache = (unsigned long)arg;