From bf3eeb9b5f2a1a05b3a68c6d82112babd58d6a39 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 8 Jul 2018 13:46:02 -0700 Subject: lib/iov_iter: Document _copy_to_iter_mcsafe() Add some theory of operation documentation to _copy_to_iter_mcsafe(). Reported-by: Al Viro Signed-off-by: Dan Williams Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/153108276256.37979.1689794213845539316.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- lib/iov_iter.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 7e43cd54c84c..94fa361be7bb 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -596,6 +596,32 @@ static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, return ret; } +/** + * _copy_to_iter_mcsafe - copy to user with source-read error exception handling + * @addr: source kernel address + * @bytes: total transfer length + * @iter: destination iterator + * + * The pmem driver arranges for filesystem-dax to use this facility via + * dax_copy_to_iter() for protecting read/write to persistent memory. + * Unless / until an architecture can guarantee identical performance + * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a + * performance regression to switch more users to the mcsafe version. + * + * Otherwise, the main differences between this and typical _copy_to_iter(). + * + * * Typical tail/residue handling after a fault retries the copy + * byte-by-byte until the fault happens again. Re-triggering machine + * checks is potentially fatal so the implementation uses source + * alignment and poison alignment assumptions to avoid re-triggering + * hardware exceptions. + * + * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. + * Compare to copy_to_iter() where only ITER_IOVEC attempts might return + * a short copy. + * + * See MCSAFE_TEST for self-test. + */ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) { const char *from = addr; -- cgit v1.2.3 From abd08d7d245397bcbded8c6c29ff79a36b3875b0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 8 Jul 2018 13:46:07 -0700 Subject: lib/iov_iter: Document _copy_to_iter_flushcache() Add some theory of operation documentation to _copy_to_iter_flushcache(). Reported-by: Al Viro Signed-off-by: Dan Williams Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/153108276767.37979.9462477994086841699.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- lib/iov_iter.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 94fa361be7bb..09fb73ad9d54 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -727,6 +727,20 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) EXPORT_SYMBOL(_copy_from_iter_nocache); #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +/** + * _copy_from_iter_flushcache - write destination through cpu cache + * @addr: destination kernel address + * @bytes: total transfer length + * @iter: source iterator + * + * The pmem driver arranges for filesystem-dax to use this facility via + * dax_copy_from_iter() for ensuring that writes to persistent memory + * are flushed through the CPU cache. It is differentiated from + * _copy_from_iter_nocache() in that guarantees all data is flushed for + * all iterator types. The _copy_from_iter_nocache() only attempts to + * bypass the cache for the ITER_IOVEC case, and on some archs may use + * instructions that strand dirty-data in the cache. + */ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; -- cgit v1.2.3 From ca146f6f091e47b3fd18d6a7e76ec0297d202e0f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 8 Jul 2018 13:46:12 -0700 Subject: lib/iov_iter: Fix pipe handling in _copy_to_iter_mcsafe() By mistake the ITER_PIPE early-exit / warning from copy_from_iter() was cargo-culted in _copy_to_iter_mcsafe() rather than a machine-check-safe version of copy_to_iter_pipe(). Implement copy_pipe_to_iter_mcsafe() being careful to return the indication of short copies due to a CPU exception. Without this regression-fix all splice reads to dax-mode files fail. Reported-by: Ross Zwisler Tested-by: Ross Zwisler Signed-off-by: Dan Williams Acked-by: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Fixes: 8780356ef630 ("x86/asm/memcpy_mcsafe: Define copy_to_iter_mcsafe()") Link: http://lkml.kernel.org/r/153108277278.37979.3327916996902264102.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- lib/iov_iter.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 09fb73ad9d54..8be175df3075 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -596,6 +596,37 @@ static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, return ret; } +static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, + struct iov_iter *i) +{ + struct pipe_inode_info *pipe = i->pipe; + size_t n, off, xfer = 0; + int idx; + + if (!sanity(i)) + return 0; + + bytes = n = push_pipe(i, bytes, &idx, &off); + if (unlikely(!n)) + return 0; + for ( ; n; idx = next_idx(idx, pipe), off = 0) { + size_t chunk = min_t(size_t, n, PAGE_SIZE - off); + unsigned long rem; + + rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr, + chunk); + i->idx = idx; + i->iov_offset = off + chunk - rem; + xfer += chunk - rem; + if (rem) + break; + n -= chunk; + addr += chunk; + } + i->count -= xfer; + return xfer; +} + /** * _copy_to_iter_mcsafe - copy to user with source-read error exception handling * @addr: source kernel address @@ -627,10 +658,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) const char *from = addr; unsigned long rem, curr_addr, s_addr = (unsigned long) addr; - if (unlikely(i->type & ITER_PIPE)) { - WARN_ON(1); - return 0; - } + if (unlikely(i->type & ITER_PIPE)) + return copy_pipe_to_iter_mcsafe(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); iterate_and_advance(i, bytes, v, -- cgit v1.2.3