From 0a5f9b382c6131381f9f2ed64ae6fdd994328d0d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 13 Jul 2022 14:56:42 +0200 Subject: s390/cpufeature: rework to allow more than only hwcap bits Rework cpufeature implementation to allow for various cpu feature indications, which is not only limited to hwcap bits. This is achieved by adding a sequential list of cpu feature numbers, where each of them is mapped to an entry which indicates what this number is about. Each entry contains a type member, which indicates what feature name space to look into (e.g. hwcap, or cpu facility). If wanted this allows also to automatically load modules only in e.g. z/VM configurations. Signed-off-by: Heiko Carstens Signed-off-by: Steffen Eiden Reviewed-by: Claudio Imbrenda Reviewed-by: Hendrik Brueckner Link: https://lore.kernel.org/r/20220713125644.16121-2-seiden@linux.ibm.com Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/cpufeature.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h index 14cfd48d598e..771caf5281e5 100644 --- a/arch/s390/include/asm/cpufeature.h +++ b/arch/s390/include/asm/cpufeature.h @@ -2,28 +2,20 @@ /* * Module interface for CPU features * - * Copyright IBM Corp. 2015 + * Copyright IBM Corp. 2015, 2022 * Author(s): Hendrik Brueckner */ #ifndef __ASM_S390_CPUFEATURE_H #define __ASM_S390_CPUFEATURE_H -#include +enum { + S390_CPU_FEATURE_MSA, + S390_CPU_FEATURE_VXRS, + MAX_CPU_FEATURES +}; -/* Hardware features on Linux on z Systems are indicated by facility bits that - * are mapped to the so-called machine flags. Particular machine flags are - * then used to define ELF hardware capabilities; most notably hardware flags - * that are essential for user space / glibc. - * - * Restrict the set of exposed CPU features to ELF hardware capabilities for - * now. Additional machine flags can be indicated by values larger than - * MAX_ELF_HWCAP_FEATURES. - */ -#define MAX_ELF_HWCAP_FEATURES (8 * sizeof(elf_hwcap)) -#define MAX_CPU_FEATURES MAX_ELF_HWCAP_FEATURES - -#define cpu_feature(feat) ilog2(HWCAP_ ## feat) +#define cpu_feature(feature) (feature) int cpu_have_feature(unsigned int nr); -- cgit v1.2.3 From 5fcd0d8ae23a3d26f21bc191c32283a5ba0bdaf9 Mon Sep 17 00:00:00 2001 From: Steffen Eiden Date: Wed, 13 Jul 2022 14:56:44 +0200 Subject: s390/uvdevice: autoload module based on CPU facility Make sure the uvdevice driver will be automatically loaded when facility 158 is available. Signed-off-by: Steffen Eiden Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20220713125644.16121-4-seiden@linux.ibm.com Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/cpufeature.h | 1 + arch/s390/kernel/cpufeature.c | 1 + drivers/s390/char/uvdevice.c | 5 ++--- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h index 771caf5281e5..931204613753 100644 --- a/arch/s390/include/asm/cpufeature.h +++ b/arch/s390/include/asm/cpufeature.h @@ -12,6 +12,7 @@ enum { S390_CPU_FEATURE_MSA, S390_CPU_FEATURE_VXRS, + S390_CPU_FEATURE_UV, MAX_CPU_FEATURES }; diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c index 8b5072b335f4..1b2ae42a0c15 100644 --- a/arch/s390/kernel/cpufeature.c +++ b/arch/s390/kernel/cpufeature.c @@ -20,6 +20,7 @@ struct s390_cpu_feature { static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = { [S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA}, [S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS}, + [S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158}, }; /* diff --git a/drivers/s390/char/uvdevice.c b/drivers/s390/char/uvdevice.c index 66505d7166a6..1d40457c7b10 100644 --- a/drivers/s390/char/uvdevice.c +++ b/drivers/s390/char/uvdevice.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -244,12 +245,10 @@ static void __exit uvio_dev_exit(void) static int __init uvio_dev_init(void) { - if (!test_facility(158)) - return -ENXIO; return misc_register(&uvio_dev_miscdev); } -module_init(uvio_dev_init); +module_cpu_feature_match(S390_CPU_FEATURE_UV, uvio_dev_init); module_exit(uvio_dev_exit); MODULE_AUTHOR("IBM Corporation"); -- cgit v1.2.3 From 7190d84966b34de3892cd4eb8698a2229ceb8d82 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 15 Jul 2022 06:43:32 +0200 Subject: s390/mm: remove unused tprot() function Since commit 461e0da7ddbb ("s390: remove broken hibernate / power management support") there are no users of tprot() left. Remove the function itself as well. Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/mmu.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 82aae78e1315..bc254bce48fa 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -42,18 +42,4 @@ typedef struct { .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \ .context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list), -static inline int tprot(unsigned long addr) -{ - int rc = -EFAULT; - - asm volatile( - " tprot 0(%1),0\n" - "0: ipm %0\n" - " srl %0,28\n" - "1:\n" - EX_TABLE(0b,1b) - : "+d" (rc) : "a" (addr) : "cc"); - return rc; -} - #endif -- cgit v1.2.3 From d6da67378198e4caa37404f87851659553b936b9 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 19 Jul 2022 07:16:34 +0200 Subject: s390/crash: move copy_to_user_real() to crash_dump.c Function copy_to_user_real() does not really belong to maccess.c. It is only used for copying oldmem to user space, so let's move it to the friends. Acked-by: Heiko Carstens Tested-by: Alexander Egorenkov Link: https://lore.kernel.org/r/e8de968d40202d87caa09aef12e9c67ec23a1c1a.1658206891.git.agordeev@linux.ibm.com Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/uaccess.h | 1 - arch/s390/kernel/crash_dump.c | 26 ++++++++++++++++++++++++++ arch/s390/mm/maccess.c | 26 -------------------------- 3 files changed, 26 insertions(+), 27 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index f4511e21d646..2a067315fe59 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -285,7 +285,6 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return __clear_user(to, n); } -int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count); void *s390_kernel_write(void *dst, const void *src, size_t size); int __noreturn __put_kernel_bad(void); diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 0efee5c49b1e..8d7332d4444c 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -173,6 +173,32 @@ int copy_oldmem_kernel(void *dst, unsigned long src, size_t count) return 0; } +/* + * Copy memory from kernel (real) to user (virtual) + */ +static int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *)__get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (memcpy_real(buf, src + offs, size)) + goto out; + if (copy_to_user(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long)buf); + return rc; +} + /* * Copy memory of the old, dumped system to a user space virtual address */ diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 421efa46946b..d6d84e02f35a 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -171,32 +171,6 @@ void memcpy_absolute(void *dest, void *src, size_t count) arch_local_irq_restore(flags); } -/* - * Copy memory from kernel (real) to user (virtual) - */ -int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count) -{ - int offs = 0, size, rc; - char *buf; - - buf = (char *) __get_free_page(GFP_KERNEL); - if (!buf) - return -ENOMEM; - rc = -EFAULT; - while (offs < count) { - size = min(PAGE_SIZE, count - offs); - if (memcpy_real(buf, src + offs, size)) - goto out; - if (copy_to_user(dest + offs, buf, size)) - goto out; - offs += size; - } - rc = 0; -out: - free_page((unsigned long) buf); - return rc; -} - /* * Check if physical address is within prefix or zero page */ -- cgit v1.2.3 From ebbc9570169147740aa39aee1d61b4cc5a631644 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 19 Jul 2022 07:16:36 +0200 Subject: s390/crash: support multi-segment iterators Make it possible to handle not only single-, but also multi- segment iterators in copy_oldmem_iter() callback. Change the semantics of called functions to match the iterator model - instead of an error code the exact number of bytes copied is returned. The swap page used to copy data to user space is adopted for kernel space too. That does not bring any performance impact. Suggested-by: Matthew Wilcox Fixes: cc02e6e21aa5 ("s390/crash: add missing iterator advance in copy_oldmem_page()") Acked-by: Heiko Carstens Tested-by: Alexander Egorenkov Link: https://lore.kernel.org/r/5af6da3a0bffe48a90b0b7139ecf6a818b2d18e8.1658206891.git.agordeev@linux.ibm.com Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/os_info.h | 17 +++++- arch/s390/include/asm/sclp.h | 4 +- arch/s390/kernel/crash_dump.c | 128 +++++++--------------------------------- drivers/s390/char/zcore.c | 58 ++++++++---------- 4 files changed, 63 insertions(+), 144 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h index 147a8d547ef9..85248d8fee0c 100644 --- a/arch/s390/include/asm/os_info.h +++ b/arch/s390/include/asm/os_info.h @@ -8,6 +8,8 @@ #ifndef _ASM_S390_OS_INFO_H #define _ASM_S390_OS_INFO_H +#include + #define OS_INFO_VERSION_MAJOR 1 #define OS_INFO_VERSION_MINOR 1 #define OS_INFO_MAGIC 0x4f53494e464f535aULL /* OSINFOSZ */ @@ -39,7 +41,20 @@ u32 os_info_csum(struct os_info *os_info); #ifdef CONFIG_CRASH_DUMP void *os_info_old_entry(int nr, unsigned long *size); -int copy_oldmem_kernel(void *dst, unsigned long src, size_t count); +size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count); + +static inline int copy_oldmem_kernel(void *dst, unsigned long src, size_t count) +{ + struct iov_iter iter; + struct kvec kvec; + + kvec.iov_base = dst; + kvec.iov_len = count; + iov_iter_kvec(&iter, WRITE, &kvec, 1, count); + if (copy_oldmem_iter(&iter, src, count) < count) + return -EFAULT; + return 0; +} #else static inline void *os_info_old_entry(int nr, unsigned long *size) { diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 236b34b75ddb..24ee532fab84 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -17,6 +17,7 @@ #define EXT_SCCB_READ_CPU (3 * PAGE_SIZE) #ifndef __ASSEMBLY__ +#include #include #include @@ -142,8 +143,7 @@ int sclp_pci_deconfigure(u32 fid); int sclp_ap_configure(u32 apid); int sclp_ap_deconfigure(u32 apid); int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid); -int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count); -int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count); +size_t memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count); void sclp_ocf_cpc_name_copy(char *dst); static inline int sclp_get_core_info(struct sclp_core_info *info, int early) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 1662f1d81abe..bad8f47fc5d6 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -116,102 +116,35 @@ void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs) memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128)); } -/* - * Return physical address for virtual address - */ -static inline void *load_real_addr(void *addr) -{ - unsigned long real_addr; - - asm volatile( - " lra %0,0(%1)\n" - " jz 0f\n" - " la %0,0\n" - "0:" - : "=a" (real_addr) : "a" (addr) : "cc"); - return (void *)real_addr; -} - -/* - * Copy memory of the old, dumped system to a kernel space virtual address - */ -int copy_oldmem_kernel(void *dst, unsigned long src, size_t count) -{ - unsigned long len; - void *ra; - int rc; - - while (count) { - if (!oldmem_data.start && src < sclp.hsa_size) { - /* Copy from zfcp/nvme dump HSA area */ - len = min(count, sclp.hsa_size - src); - rc = memcpy_hsa_kernel(dst, src, len); - if (rc) - return rc; - } else { - /* Check for swapped kdump oldmem areas */ - if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) { - src -= oldmem_data.start; - len = min(count, oldmem_data.size - src); - } else if (oldmem_data.start && src < oldmem_data.size) { - len = min(count, oldmem_data.size - src); - src += oldmem_data.start; - } else { - len = count; - } - if (is_vmalloc_or_module_addr(dst)) { - ra = load_real_addr(dst); - len = min(PAGE_SIZE - offset_in_page(ra), len); - } else { - ra = dst; - } - if (memcpy_real(ra, src, len)) - return -EFAULT; - } - dst += len; - src += len; - count -= len; - } - return 0; -} - -/* - * Copy memory from kernel (real) to user (virtual) - */ -static int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count) +static size_t copy_to_iter_real(struct iov_iter *iter, unsigned long src, size_t count) { - unsigned long offs = 0, size; + size_t len, copied, res = 0; mutex_lock(&memcpy_real_mutex); - while (offs < count) { - size = min(PAGE_SIZE, count - offs); - if (memcpy_real(memcpy_real_buf, src + offs, size)) + while (count) { + len = min(PAGE_SIZE, count); + if (memcpy_real(memcpy_real_buf, src, len)) break; - if (copy_to_user(dest + offs, memcpy_real_buf, size)) + copied = copy_to_iter(memcpy_real_buf, len, iter); + count -= copied; + src += copied; + res += copied; + if (copied < len) break; - offs += size; } mutex_unlock(&memcpy_real_mutex); - if (offs < count) - return -EFAULT; - return 0; + return res; } -/* - * Copy memory of the old, dumped system to a user space virtual address - */ -static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count) +size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count) { - unsigned long len; - int rc; + size_t len, copied, res = 0; while (count) { if (!oldmem_data.start && src < sclp.hsa_size) { /* Copy from zfcp/nvme dump HSA area */ len = min(count, sclp.hsa_size - src); - rc = memcpy_hsa_user(dst, src, len); - if (rc) - return rc; + copied = memcpy_hsa_iter(iter, src, len); } else { /* Check for swapped kdump oldmem areas */ if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) { @@ -223,15 +156,15 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count) } else { len = count; } - rc = copy_to_user_real(dst, src, len); - if (rc) - return rc; + copied = copy_to_iter_real(iter, src, len); } - dst += len; - src += len; - count -= len; + count -= copied; + src += copied; + res += copied; + if (copied < len) + break; } - return 0; + return res; } /* @@ -241,26 +174,9 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize, unsigned long offset) { unsigned long src; - int rc; - if (!(iter_is_iovec(iter) || iov_iter_is_kvec(iter))) - return -EINVAL; - /* Multi-segment iterators are not supported */ - if (iter->nr_segs > 1) - return -EINVAL; - if (!csize) - return 0; src = pfn_to_phys(pfn) + offset; - - /* XXX: pass the iov_iter down to a common function */ - if (iter_is_iovec(iter)) - rc = copy_oldmem_user(iter->iov->iov_base, src, csize); - else - rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize); - if (rc < 0) - return rc; - iov_iter_advance(iter, csize); - return csize; + return copy_oldmem_iter(iter, src, csize); } /* diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 92b32ce645b9..f6da215ccf9f 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -54,38 +55,37 @@ static DEFINE_MUTEX(hsa_buf_mutex); static char hsa_buf[PAGE_SIZE] __aligned(PAGE_SIZE); /* - * Copy memory from HSA to user memory (not reentrant): + * Copy memory from HSA to iterator (not reentrant): * - * @dest: User buffer where memory should be copied to + * @iter: Iterator where memory should be copied to * @src: Start address within HSA where data should be copied * @count: Size of buffer, which should be copied */ -int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count) +size_t memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count) { - unsigned long offset, bytes; + size_t bytes, copied, res = 0; + unsigned long offset; if (!hsa_available) - return -ENODATA; + return 0; mutex_lock(&hsa_buf_mutex); while (count) { if (sclp_sdias_copy(hsa_buf, src / PAGE_SIZE + 2, 1)) { TRACE("sclp_sdias_copy() failed\n"); - mutex_unlock(&hsa_buf_mutex); - return -EIO; + break; } offset = src % PAGE_SIZE; bytes = min(PAGE_SIZE - offset, count); - if (copy_to_user(dest, hsa_buf + offset, bytes)) { - mutex_unlock(&hsa_buf_mutex); - return -EFAULT; - } - src += bytes; - dest += bytes; - count -= bytes; + copied = copy_to_iter(hsa_buf + offset, bytes, iter); + count -= copied; + src += copied; + res += copied; + if (copied < bytes) + break; } mutex_unlock(&hsa_buf_mutex); - return 0; + return res; } /* @@ -95,28 +95,16 @@ int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count) * @src: Start address within HSA where data should be copied * @count: Size of buffer, which should be copied */ -int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count) +static inline int memcpy_hsa_kernel(void *dst, unsigned long src, size_t count) { - unsigned long offset, bytes; + struct iov_iter iter; + struct kvec kvec; - if (!hsa_available) - return -ENODATA; - - mutex_lock(&hsa_buf_mutex); - while (count) { - if (sclp_sdias_copy(hsa_buf, src / PAGE_SIZE + 2, 1)) { - TRACE("sclp_sdias_copy() failed\n"); - mutex_unlock(&hsa_buf_mutex); - return -EIO; - } - offset = src % PAGE_SIZE; - bytes = min(PAGE_SIZE - offset, count); - memcpy(dest, hsa_buf + offset, bytes); - src += bytes; - dest += bytes; - count -= bytes; - } - mutex_unlock(&hsa_buf_mutex); + kvec.iov_base = dst; + kvec.iov_len = count; + iov_iter_kvec(&iter, WRITE, &kvec, 1, count); + if (memcpy_hsa_iter(&iter, src, count) < count) + return -EIO; return 0; } -- cgit v1.2.3 From 7d06fed77b7d8fc9f6cc41b4e3f2823d32532ad8 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 20 Jul 2022 08:22:01 +0200 Subject: s390/smp: rework absolute lowcore access Temporary unsetting of the prefix page in memcpy_absolute() routine poses a risk of executing code path with unexpectedly disabled prefix page. This rework avoids the prefix page uninstalling and disabling of normal and machine check interrupts when accessing the absolute zero memory. Although memcpy_absolute() routine can access the whole memory, it is only used to update the absolute zero lowcore. This rework therefore introduces a new mechanism for the absolute zero lowcore access and scraps memcpy_absolute() routine for good. Instead, an area is reserved in the virtual memory that is used for the absolute lowcore access only. That area holds an array of 8KB virtual mappings - one per CPU. Whenever a CPU is brought online, the corresponding item is mapped to the real address of the previously installed prefix page. The absolute zero lowcore access works like this: a CPU calls the new primitive get_abs_lowcore() to obtain its 8KB mapping as a pointer to the struct lowcore. Virtual address references to that pointer get translated to the real addresses of the prefix page, which in turn gets swapped with the absolute zero memory addresses due to prefixing. Once the pointer is not needed it must be released with put_abs_lowcore() primitive: struct lowcore *abs_lc; unsigned long flags; abs_lc = get_abs_lowcore(&flags); abs_lc->... = ...; put_abs_lowcore(abs_lc, flags); To ensure the described mechanism works large segment- and region- table entries must be avoided for the 8KB mappings. Failure to do so results in usage of Region-Frame Absolute Address (RFAA) or Segment-Frame Absolute Address (SFAA) large page fields. In that case absolute addresses would be used to address the prefix page instead of the real ones and the prefixing would get bypassed. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/startup.c | 5 ++- arch/s390/include/asm/abs_lowcore.h | 17 +++++++ arch/s390/include/asm/pgtable.h | 2 + arch/s390/include/asm/processor.h | 15 ------- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/abs_lowcore.c | 88 +++++++++++++++++++++++++++++++++++++ arch/s390/kernel/ipl.c | 9 +++- arch/s390/kernel/machine_kexec.c | 8 +++- arch/s390/kernel/os_info.c | 9 ++-- arch/s390/kernel/setup.c | 34 ++++++++------ arch/s390/kernel/smp.c | 34 ++++++++++---- arch/s390/mm/init.c | 2 +- arch/s390/mm/maccess.c | 67 +++++++++++++--------------- arch/s390/mm/vmem.c | 85 +++++++++++++++++++++++++++++++++++ 14 files changed, 294 insertions(+), 83 deletions(-) create mode 100644 arch/s390/include/asm/abs_lowcore.h create mode 100644 arch/s390/kernel/abs_lowcore.c (limited to 'arch/s390/include') diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index bc48fe82d949..41b7af7a9365 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -10,11 +10,13 @@ #include #include #include +#include #include "decompressor.h" #include "boot.h" #include "uv.h" unsigned long __bootdata_preserved(__kaslr_offset); +unsigned long __bootdata_preserved(__abs_lowcore); unsigned long __bootdata(__amode31_base); unsigned long __bootdata_preserved(VMALLOC_START); unsigned long __bootdata_preserved(VMALLOC_END); @@ -180,7 +182,8 @@ static void setup_kernel_memory_layout(void) /* force vmalloc and modules below kasan shadow */ vmax = min(vmax, KASAN_SHADOW_START); #endif - MODULES_END = vmax; + __abs_lowcore = round_down(vmax - ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)); + MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE); MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h new file mode 100644 index 000000000000..bdef8d24d237 --- /dev/null +++ b/arch/s390/include/asm/abs_lowcore.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_ABS_LOWCORE_H +#define _ASM_S390_ABS_LOWCORE_H + +#include + +#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore)) + +extern unsigned long __abs_lowcore; +extern bool abs_lowcore_mapped; + +struct lowcore *get_abs_lowcore(unsigned long *flags); +void put_abs_lowcore(struct lowcore *lc, unsigned long flags); +int abs_lowcore_map(int cpu, struct lowcore *lc); +void abs_lowcore_unmap(int cpu); + +#endif /* _ASM_ABS_S390_LOWCORE_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index a397b072a580..82506ebd544b 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1781,6 +1781,8 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) extern int vmem_add_mapping(unsigned long start, unsigned long size); extern void vmem_remove_mapping(unsigned long start, unsigned long size); +extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot); +extern void vmem_unmap_4k_page(unsigned long addr); extern int s390_enable_sie(void); extern int s390_enable_skey(void); extern void s390_reset_cmma(struct mm_struct *mm); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index bd66f8e34949..93677ae89e7e 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -307,21 +307,6 @@ static __always_inline void __noreturn disabled_wait(void) #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL extern int memcpy_real(void *, unsigned long, size_t); -extern void memcpy_absolute(void *, void *, size_t); - -#define put_abs_lowcore(member, x) do { \ - unsigned long __abs_address = offsetof(struct lowcore, member); \ - __typeof__(((struct lowcore *)0)->member) __tmp = (x); \ - \ - memcpy_absolute(__va(__abs_address), &__tmp, sizeof(__tmp)); \ -} while (0) - -#define get_abs_lowcore(x, member) do { \ - unsigned long __abs_address = offsetof(struct lowcore, member); \ - __typeof__(((struct lowcore *)0)->member) *__ptr = &(x); \ - \ - memcpy_absolute(__ptr, __va(__abs_address), sizeof(*__ptr)); \ -} while (0) extern int s390_isolate_bp(void); extern int s390_isolate_bp_guest(void); diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 3cbfa9fddd9a..45e4b2f41e05 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -40,7 +40,7 @@ obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o -obj-y += smp.o text_amode31.o stacktrace.o +obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o extra-y += head64.o vmlinux.lds diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c new file mode 100644 index 000000000000..dc9f0ecd4695 --- /dev/null +++ b/arch/s390/kernel/abs_lowcore.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +#define ABS_LOWCORE_UNMAPPED 1 +#define ABS_LOWCORE_LAP_ON 2 +#define ABS_LOWCORE_IRQS_ON 4 + +unsigned long __bootdata_preserved(__abs_lowcore); +bool __ro_after_init abs_lowcore_mapped; + +int abs_lowcore_map(int cpu, struct lowcore *lc) +{ + unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore)); + unsigned long phys = __pa(lc); + int rc, i; + + for (i = 0; i < LC_PAGES; i++) { + rc = vmem_map_4k_page(addr, phys, PAGE_KERNEL); + if (rc) { + for (--i; i >= 0; i--) { + addr -= PAGE_SIZE; + vmem_unmap_4k_page(addr); + } + return rc; + } + addr += PAGE_SIZE; + phys += PAGE_SIZE; + } + return 0; +} + +void abs_lowcore_unmap(int cpu) +{ + unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore)); + int i; + + for (i = 0; i < LC_PAGES; i++) { + vmem_unmap_4k_page(addr); + addr += PAGE_SIZE; + } +} + +struct lowcore *get_abs_lowcore(unsigned long *flags) +{ + unsigned long irq_flags; + union ctlreg0 cr0; + int cpu; + + *flags = 0; + cpu = get_cpu(); + if (abs_lowcore_mapped) { + return ((struct lowcore *)__abs_lowcore) + cpu; + } else { + if (cpu != 0) + panic("Invalid unmapped absolute lowcore access\n"); + local_irq_save(irq_flags); + if (!irqs_disabled_flags(irq_flags)) + *flags |= ABS_LOWCORE_IRQS_ON; + __ctl_store(cr0.val, 0, 0); + if (cr0.lap) { + *flags |= ABS_LOWCORE_LAP_ON; + __ctl_clear_bit(0, 28); + } + *flags |= ABS_LOWCORE_UNMAPPED; + return lowcore_ptr[0]; + } +} + +void put_abs_lowcore(struct lowcore *lc, unsigned long flags) +{ + if (abs_lowcore_mapped) { + if (flags) + panic("Invalid mapped absolute lowcore release\n"); + } else { + if (smp_processor_id() != 0) + panic("Invalid mapped absolute lowcore access\n"); + if (!(flags & ABS_LOWCORE_UNMAPPED)) + panic("Invalid unmapped absolute lowcore release\n"); + if (flags & ABS_LOWCORE_LAP_ON) + __ctl_set_bit(0, 28); + if (flags & ABS_LOWCORE_IRQS_ON) + local_irq_enable(); + } + put_cpu(); +} diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 1cc85b8ff42e..325cbf69ebbd 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -1642,12 +1643,16 @@ static struct shutdown_action __refdata dump_action = { static void dump_reipl_run(struct shutdown_trigger *trigger) { unsigned long ipib = (unsigned long) reipl_block_actual; + struct lowcore *abs_lc; + unsigned long flags; unsigned int csum; csum = (__force unsigned int) csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); - put_abs_lowcore(ipib, ipib); - put_abs_lowcore(ipib_checksum, csum); + abs_lc = get_abs_lowcore(&flags); + abs_lc->ipib = ipib; + abs_lc->ipib_checksum = csum; + put_abs_lowcore(abs_lc, flags); dump_run(trigger); } diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index ab761c008f98..4579b42286d5 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -222,13 +223,18 @@ void machine_kexec_cleanup(struct kimage *image) void arch_crash_save_vmcoreinfo(void) { + struct lowcore *abs_lc; + unsigned long flags; + VMCOREINFO_SYMBOL(lowcore_ptr); VMCOREINFO_SYMBOL(high_memory); VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31); vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); - put_abs_lowcore(vmcore_info, paddr_vmcoreinfo_note()); + abs_lc = get_abs_lowcore(&flags); + abs_lc->vmcore_info = paddr_vmcoreinfo_note(); + put_abs_lowcore(abs_lc, flags); } void machine_shutdown(void) diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index 1acc2e05d70f..506ccb74d2d0 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -57,13 +57,16 @@ void os_info_entry_add(int nr, void *ptr, u64 size) */ void __init os_info_init(void) { - void *ptr = &os_info; + struct lowcore *abs_lc; + unsigned long flags; os_info.version_major = OS_INFO_VERSION_MAJOR; os_info.version_minor = OS_INFO_VERSION_MINOR; os_info.magic = OS_INFO_MAGIC; os_info.csum = os_info_csum(&os_info); - put_abs_lowcore(os_info, __pa(ptr)); + abs_lc = get_abs_lowcore(&flags); + abs_lc->os_info = __pa(&os_info); + put_abs_lowcore(abs_lc, flags); } #ifdef CONFIG_CRASH_DUMP diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 8f483132901e..91139a16a44f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -58,7 +58,7 @@ #include #include #include -#include +#include #include #include #include @@ -411,8 +411,9 @@ void __init arch_call_rest_init(void) static void __init setup_lowcore_dat_off(void) { unsigned long int_psw_mask = PSW_KERNEL_BITS; + struct lowcore *abs_lc, *lc; unsigned long mcck_stack; - struct lowcore *lc; + unsigned long flags; if (IS_ENABLED(CONFIG_KASAN)) int_psw_mask |= PSW_MASK_DAT; @@ -474,11 +475,13 @@ static void __init setup_lowcore_dat_off(void) lc->restart_data = 0; lc->restart_source = -1U; - put_abs_lowcore(restart_stack, lc->restart_stack); - put_abs_lowcore(restart_fn, lc->restart_fn); - put_abs_lowcore(restart_data, lc->restart_data); - put_abs_lowcore(restart_source, lc->restart_source); - put_abs_lowcore(restart_psw, lc->restart_psw); + abs_lc = get_abs_lowcore(&flags); + abs_lc->restart_stack = lc->restart_stack; + abs_lc->restart_fn = lc->restart_fn; + abs_lc->restart_data = lc->restart_data; + abs_lc->restart_source = lc->restart_source; + abs_lc->restart_psw = lc->restart_psw; + put_abs_lowcore(abs_lc, flags); mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); if (!mcck_stack) @@ -499,8 +502,8 @@ static void __init setup_lowcore_dat_off(void) static void __init setup_lowcore_dat_on(void) { - struct lowcore *lc = lowcore_ptr[0]; - int cr; + struct lowcore *abs_lc; + unsigned long flags; __ctl_clear_bit(0, 28); S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT; @@ -509,10 +512,15 @@ static void __init setup_lowcore_dat_on(void) S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT; __ctl_set_bit(0, 28); __ctl_store(S390_lowcore.cregs_save_area, 0, 15); - put_abs_lowcore(restart_flags, RESTART_FLAG_CTLREGS); - put_abs_lowcore(program_new_psw, lc->program_new_psw); - for (cr = 0; cr < ARRAY_SIZE(lc->cregs_save_area); cr++) - put_abs_lowcore(cregs_save_area[cr], lc->cregs_save_area[cr]); + abs_lc = get_abs_lowcore(&flags); + abs_lc->restart_flags = RESTART_FLAG_CTLREGS; + abs_lc->program_new_psw = S390_lowcore.program_new_psw; + memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area, + sizeof(abs_lc->cregs_save_area)); + put_abs_lowcore(abs_lc, flags); + if (abs_lowcore_map(0, lowcore_ptr[0])) + panic("Couldn't setup absolute lowcore"); + abs_lowcore_mapped = true; } static struct resource code_resource = { diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 30c91d565933..40876d809ea6 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include #include #include @@ -212,10 +212,14 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->preempt_count = PREEMPT_DISABLED; if (nmi_alloc_mcesa(&lc->mcesad)) goto out; + if (abs_lowcore_map(cpu, lc)) + goto out_mcesa; lowcore_ptr[cpu] = lc; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc)); return 0; +out_mcesa: + nmi_free_mcesa(&lc->mcesad); out: stack_free(mcck_stack); stack_free(async_stack); @@ -237,6 +241,7 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[cpu] = NULL; + abs_lowcore_unmap(cpu); nmi_free_mcesa(&lc->mcesad); stack_free(async_stack); stack_free(mcck_stack); @@ -315,9 +320,12 @@ static void pcpu_delegate(struct pcpu *pcpu, pcpu_delegate_fn *func, void *data, unsigned long stack) { - struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; - unsigned int source_cpu = stap(); + struct lowcore *lc, *abs_lc; + unsigned int source_cpu; + unsigned long flags; + lc = lowcore_ptr[pcpu - pcpu_devices]; + source_cpu = stap(); __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); if (pcpu->address == source_cpu) { call_on_stack(2, stack, void, __pcpu_delegate, @@ -332,10 +340,12 @@ static void pcpu_delegate(struct pcpu *pcpu, lc->restart_data = (unsigned long)data; lc->restart_source = source_cpu; } else { - put_abs_lowcore(restart_stack, stack); - put_abs_lowcore(restart_fn, (unsigned long)func); - put_abs_lowcore(restart_data, (unsigned long)data); - put_abs_lowcore(restart_source, source_cpu); + abs_lc = get_abs_lowcore(&flags); + abs_lc->restart_stack = stack; + abs_lc->restart_fn = (unsigned long)func; + abs_lc->restart_data = (unsigned long)data; + abs_lc->restart_source = source_cpu; + put_abs_lowcore(abs_lc, flags); } __bpon(); asm volatile( @@ -581,6 +591,8 @@ static DEFINE_SPINLOCK(ctl_lock); void smp_ctl_set_clear_bit(int cr, int bit, bool set) { struct ec_creg_mask_parms parms = { .cr = cr, }; + struct lowcore *abs_lc; + unsigned long flags; u64 ctlreg; if (set) { @@ -591,9 +603,11 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set) parms.andval = ~(1UL << bit); } spin_lock(&ctl_lock); - get_abs_lowcore(ctlreg, cregs_save_area[cr]); + abs_lc = get_abs_lowcore(&flags); + ctlreg = abs_lc->cregs_save_area[cr]; ctlreg = (ctlreg & parms.andval) | parms.orval; - put_abs_lowcore(cregs_save_area[cr], ctlreg); + abs_lc->cregs_save_area[cr] = ctlreg; + put_abs_lowcore(abs_lc, flags); spin_unlock(&ctl_lock); on_each_cpu(smp_ctl_bit_callback, &parms, 1); } @@ -1281,6 +1295,8 @@ static int __init smp_reinit_ipl_cpu(void) __ctl_clear_bit(0, 28); /* disable lowcore protection */ S390_lowcore.mcesad = mcesad; __ctl_load(cr0, 0, 0); + if (abs_lowcore_map(0, lc)) + panic("Couldn't remap absolute lowcore"); lowcore_ptr[0] = lc; local_mcck_enable(); local_irq_restore(flags); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 6a0ac00d5a42..7b6873ac99d1 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index d6d84e02f35a..b8451ddbb3d6 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -15,6 +15,7 @@ #include #include #include +#include #include static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size) @@ -148,46 +149,20 @@ int memcpy_real(void *dest, unsigned long src, size_t count) } /* - * Copy memory in absolute mode (kernel to kernel) + * Find CPU that owns swapped prefix page */ -void memcpy_absolute(void *dest, void *src, size_t count) -{ - unsigned long cr0, flags, prefix; - - flags = arch_local_irq_save(); - __ctl_store(cr0, 0, 0); - __ctl_clear_bit(0, 28); /* disable lowcore protection */ - prefix = store_prefix(); - if (prefix) { - local_mcck_disable(); - set_prefix(0); - memcpy(dest, src, count); - set_prefix(prefix); - local_mcck_enable(); - } else { - memcpy(dest, src, count); - } - __ctl_load(cr0, 0, 0); - arch_local_irq_restore(flags); -} - -/* - * Check if physical address is within prefix or zero page - */ -static int is_swapped(phys_addr_t addr) +static int get_swapped_owner(phys_addr_t addr) { phys_addr_t lc; int cpu; - if (addr < sizeof(struct lowcore)) - return 1; for_each_online_cpu(cpu) { lc = virt_to_phys(lowcore_ptr[cpu]); if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc) continue; - return 1; + return cpu; } - return 0; + return -1; } /* @@ -200,17 +175,35 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) { void *ptr = phys_to_virt(addr); void *bounce = ptr; + struct lowcore *abs_lc; + unsigned long flags; unsigned long size; + int this_cpu, cpu; cpus_read_lock(); - preempt_disable(); - if (is_swapped(addr)) { - size = PAGE_SIZE - (addr & ~PAGE_MASK); - bounce = (void *) __get_free_page(GFP_ATOMIC); - if (bounce) - memcpy_absolute(bounce, ptr, size); + this_cpu = get_cpu(); + if (addr >= sizeof(struct lowcore)) { + cpu = get_swapped_owner(addr); + if (cpu < 0) + goto out; + } + bounce = (void *)__get_free_page(GFP_ATOMIC); + if (!bounce) + goto out; + size = PAGE_SIZE - (addr & ~PAGE_MASK); + if (addr < sizeof(struct lowcore)) { + abs_lc = get_abs_lowcore(&flags); + ptr = (void *)abs_lc + addr; + memcpy(bounce, ptr, size); + put_abs_lowcore(abs_lc, flags); + } else if (cpu == this_cpu) { + ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu])); + memcpy(bounce, ptr, size); + } else { + memcpy(bounce, ptr, size); } - preempt_enable(); +out: + put_cpu(); cpus_read_unlock(); return bounce; } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index c2583f921ca8..203ba2bfea59 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -560,6 +560,91 @@ int vmem_add_mapping(unsigned long start, unsigned long size) return ret; } +/* + * Allocate new or return existing page-table entry, but do not map it + * to any physical address. If missing, allocate segment- and region- + * table entries along. Meeting a large segment- or region-table entry + * while traversing is an error, since the function is expected to be + * called against virtual regions reserverd for 4KB mappings only. + */ +static pte_t *vmem_get_alloc_pte(unsigned long addr) +{ + pte_t *ptep = NULL; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) { + p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); + if (!p4d) + goto out; + pgd_populate(&init_mm, pgd, p4d); + } + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); + if (!pud) + goto out; + p4d_populate(&init_mm, p4d, pud); + } + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); + if (!pmd) + goto out; + pud_populate(&init_mm, pud, pmd); + } else if (WARN_ON_ONCE(pud_large(*pud))) { + goto out; + } + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + pte = vmem_pte_alloc(); + if (!pte) + goto out; + pmd_populate(&init_mm, pmd, pte); + } else if (WARN_ON_ONCE(pmd_large(*pmd))) { + goto out; + } + ptep = pte_offset_kernel(pmd, addr); +out: + return ptep; +} + +int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot) +{ + pte_t *ptep, pte; + int rc = 0; + + if (!IS_ALIGNED(addr, PAGE_SIZE)) + return -EINVAL; + mutex_lock(&vmem_mutex); + ptep = vmem_get_alloc_pte(addr); + if (!ptep) { + rc = -ENOMEM; + goto out; + } + __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); + pte = mk_pte_phys(phys, prot); + set_pte(ptep, pte); +out: + mutex_unlock(&vmem_mutex); + return rc; +} + +void vmem_unmap_4k_page(unsigned long addr) +{ + pte_t *ptep; + + mutex_lock(&vmem_mutex); + ptep = virt_to_kpte(addr); + __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); + pte_clear(&init_mm, addr, ptep); + mutex_unlock(&vmem_mutex); +} + /* * map whole physical memory to virtual memory (identity mapping) * we reserve enough space in the vmalloc area for vmemmap to hotplug -- cgit v1.2.3 From ded466e1806686794b403ebf031133bbaca76bb2 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Tue, 26 Jul 2022 18:57:59 +0200 Subject: s390/unwind: fix fgraph return address recovery When HAVE_FUNCTION_GRAPH_RET_ADDR_PTR is defined, the return address to the fgraph caller is recovered by tagging it along with the stack pointer of ftrace stack. This makes the stack unwinding more reliable. When the fgraph return address is modified to return_to_handler, ftrace_graph_ret_addr tries to restore it to the original value using tagged stack pointer. Fix this by passing tagged sp to ftrace_graph_ret_addr. Fixes: d81675b60d09 ("s390/unwind: recover kretprobe modified return address in stacktrace") Cc: # 5.18 Reviewed-by: Vasily Gorbik Signed-off-by: Sumanth Korikkar Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/unwind.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h index 0bf06f1682d8..02462e7100c1 100644 --- a/arch/s390/include/asm/unwind.h +++ b/arch/s390/include/asm/unwind.h @@ -47,7 +47,7 @@ struct unwind_state { static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state, unsigned long ip) { - ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL); + ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *)state->sp); if (is_kretprobe_trampoline(ip)) ip = kretprobe_find_ret_addr(state->task, (void *)state->sp, &state->kr_cur); return ip; -- cgit v1.2.3 From 5e441f61f509617a3f57fcb156b7aa2870cc8752 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Sat, 6 Aug 2022 09:24:07 +0200 Subject: Revert "s390/smp: rework absolute lowcore access" This reverts commit 7d06fed77b7d8fc9f6cc41b4e3f2823d32532ad8. This introduced vmem_mutex locking from vmem_map_4k_page() function called from smp_reinit_ipl_cpu() with interrupts disabled. While it is a pre-SMP early initcall no other CPUs running in parallel nor other code taking vmem_mutex on this boot stage - it still needs to be fixed. Signed-off-by: Alexander Gordeev --- arch/s390/boot/startup.c | 5 +-- arch/s390/include/asm/abs_lowcore.h | 17 ------- arch/s390/include/asm/pgtable.h | 2 - arch/s390/include/asm/processor.h | 15 +++++++ arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/abs_lowcore.c | 88 ------------------------------------- arch/s390/kernel/ipl.c | 9 +--- arch/s390/kernel/machine_kexec.c | 8 +--- arch/s390/kernel/os_info.c | 9 ++-- arch/s390/kernel/setup.c | 34 ++++++-------- arch/s390/kernel/smp.c | 34 ++++---------- arch/s390/mm/init.c | 2 +- arch/s390/mm/maccess.c | 67 +++++++++++++++------------- arch/s390/mm/vmem.c | 85 ----------------------------------- 14 files changed, 83 insertions(+), 294 deletions(-) delete mode 100644 arch/s390/include/asm/abs_lowcore.h delete mode 100644 arch/s390/kernel/abs_lowcore.c (limited to 'arch/s390/include') diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 41b7af7a9365..bc48fe82d949 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -10,13 +10,11 @@ #include #include #include -#include #include "decompressor.h" #include "boot.h" #include "uv.h" unsigned long __bootdata_preserved(__kaslr_offset); -unsigned long __bootdata_preserved(__abs_lowcore); unsigned long __bootdata(__amode31_base); unsigned long __bootdata_preserved(VMALLOC_START); unsigned long __bootdata_preserved(VMALLOC_END); @@ -182,8 +180,7 @@ static void setup_kernel_memory_layout(void) /* force vmalloc and modules below kasan shadow */ vmax = min(vmax, KASAN_SHADOW_START); #endif - __abs_lowcore = round_down(vmax - ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)); - MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE); + MODULES_END = vmax; MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h deleted file mode 100644 index bdef8d24d237..000000000000 --- a/arch/s390/include/asm/abs_lowcore.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_S390_ABS_LOWCORE_H -#define _ASM_S390_ABS_LOWCORE_H - -#include - -#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore)) - -extern unsigned long __abs_lowcore; -extern bool abs_lowcore_mapped; - -struct lowcore *get_abs_lowcore(unsigned long *flags); -void put_abs_lowcore(struct lowcore *lc, unsigned long flags); -int abs_lowcore_map(int cpu, struct lowcore *lc); -void abs_lowcore_unmap(int cpu); - -#endif /* _ASM_ABS_S390_LOWCORE_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 82506ebd544b..a397b072a580 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1781,8 +1781,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) extern int vmem_add_mapping(unsigned long start, unsigned long size); extern void vmem_remove_mapping(unsigned long start, unsigned long size); -extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot); -extern void vmem_unmap_4k_page(unsigned long addr); extern int s390_enable_sie(void); extern int s390_enable_skey(void); extern void s390_reset_cmma(struct mm_struct *mm); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 93677ae89e7e..bd66f8e34949 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -307,6 +307,21 @@ static __always_inline void __noreturn disabled_wait(void) #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL extern int memcpy_real(void *, unsigned long, size_t); +extern void memcpy_absolute(void *, void *, size_t); + +#define put_abs_lowcore(member, x) do { \ + unsigned long __abs_address = offsetof(struct lowcore, member); \ + __typeof__(((struct lowcore *)0)->member) __tmp = (x); \ + \ + memcpy_absolute(__va(__abs_address), &__tmp, sizeof(__tmp)); \ +} while (0) + +#define get_abs_lowcore(x, member) do { \ + unsigned long __abs_address = offsetof(struct lowcore, member); \ + __typeof__(((struct lowcore *)0)->member) *__ptr = &(x); \ + \ + memcpy_absolute(__ptr, __va(__abs_address), sizeof(*__ptr)); \ +} while (0) extern int s390_isolate_bp(void); extern int s390_isolate_bp_guest(void); diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 45e4b2f41e05..3cbfa9fddd9a 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -40,7 +40,7 @@ obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o -obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o +obj-y += smp.o text_amode31.o stacktrace.o extra-y += head64.o vmlinux.lds diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c deleted file mode 100644 index dc9f0ecd4695..000000000000 --- a/arch/s390/kernel/abs_lowcore.c +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include -#include - -#define ABS_LOWCORE_UNMAPPED 1 -#define ABS_LOWCORE_LAP_ON 2 -#define ABS_LOWCORE_IRQS_ON 4 - -unsigned long __bootdata_preserved(__abs_lowcore); -bool __ro_after_init abs_lowcore_mapped; - -int abs_lowcore_map(int cpu, struct lowcore *lc) -{ - unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore)); - unsigned long phys = __pa(lc); - int rc, i; - - for (i = 0; i < LC_PAGES; i++) { - rc = vmem_map_4k_page(addr, phys, PAGE_KERNEL); - if (rc) { - for (--i; i >= 0; i--) { - addr -= PAGE_SIZE; - vmem_unmap_4k_page(addr); - } - return rc; - } - addr += PAGE_SIZE; - phys += PAGE_SIZE; - } - return 0; -} - -void abs_lowcore_unmap(int cpu) -{ - unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore)); - int i; - - for (i = 0; i < LC_PAGES; i++) { - vmem_unmap_4k_page(addr); - addr += PAGE_SIZE; - } -} - -struct lowcore *get_abs_lowcore(unsigned long *flags) -{ - unsigned long irq_flags; - union ctlreg0 cr0; - int cpu; - - *flags = 0; - cpu = get_cpu(); - if (abs_lowcore_mapped) { - return ((struct lowcore *)__abs_lowcore) + cpu; - } else { - if (cpu != 0) - panic("Invalid unmapped absolute lowcore access\n"); - local_irq_save(irq_flags); - if (!irqs_disabled_flags(irq_flags)) - *flags |= ABS_LOWCORE_IRQS_ON; - __ctl_store(cr0.val, 0, 0); - if (cr0.lap) { - *flags |= ABS_LOWCORE_LAP_ON; - __ctl_clear_bit(0, 28); - } - *flags |= ABS_LOWCORE_UNMAPPED; - return lowcore_ptr[0]; - } -} - -void put_abs_lowcore(struct lowcore *lc, unsigned long flags) -{ - if (abs_lowcore_mapped) { - if (flags) - panic("Invalid mapped absolute lowcore release\n"); - } else { - if (smp_processor_id() != 0) - panic("Invalid mapped absolute lowcore access\n"); - if (!(flags & ABS_LOWCORE_UNMAPPED)) - panic("Invalid unmapped absolute lowcore release\n"); - if (flags & ABS_LOWCORE_LAP_ON) - __ctl_set_bit(0, 28); - if (flags & ABS_LOWCORE_IRQS_ON) - local_irq_enable(); - } - put_cpu(); -} diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 325cbf69ebbd..1cc85b8ff42e 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -1643,16 +1642,12 @@ static struct shutdown_action __refdata dump_action = { static void dump_reipl_run(struct shutdown_trigger *trigger) { unsigned long ipib = (unsigned long) reipl_block_actual; - struct lowcore *abs_lc; - unsigned long flags; unsigned int csum; csum = (__force unsigned int) csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); - abs_lc = get_abs_lowcore(&flags); - abs_lc->ipib = ipib; - abs_lc->ipib_checksum = csum; - put_abs_lowcore(abs_lc, flags); + put_abs_lowcore(ipib, ipib); + put_abs_lowcore(ipib_checksum, csum); dump_run(trigger); } diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 4579b42286d5..ab761c008f98 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -223,18 +222,13 @@ void machine_kexec_cleanup(struct kimage *image) void arch_crash_save_vmcoreinfo(void) { - struct lowcore *abs_lc; - unsigned long flags; - VMCOREINFO_SYMBOL(lowcore_ptr); VMCOREINFO_SYMBOL(high_memory); VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31); vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); - abs_lc = get_abs_lowcore(&flags); - abs_lc->vmcore_info = paddr_vmcoreinfo_note(); - put_abs_lowcore(abs_lc, flags); + put_abs_lowcore(vmcore_info, paddr_vmcoreinfo_note()); } void machine_shutdown(void) diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index 506ccb74d2d0..1acc2e05d70f 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -57,16 +57,13 @@ void os_info_entry_add(int nr, void *ptr, u64 size) */ void __init os_info_init(void) { - struct lowcore *abs_lc; - unsigned long flags; + void *ptr = &os_info; os_info.version_major = OS_INFO_VERSION_MAJOR; os_info.version_minor = OS_INFO_VERSION_MINOR; os_info.magic = OS_INFO_MAGIC; os_info.csum = os_info_csum(&os_info); - abs_lc = get_abs_lowcore(&flags); - abs_lc->os_info = __pa(&os_info); - put_abs_lowcore(abs_lc, flags); + put_abs_lowcore(os_info, __pa(ptr)); } #ifdef CONFIG_CRASH_DUMP diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 91139a16a44f..8f483132901e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -58,7 +58,7 @@ #include #include #include -#include +#include #include #include #include @@ -411,9 +411,8 @@ void __init arch_call_rest_init(void) static void __init setup_lowcore_dat_off(void) { unsigned long int_psw_mask = PSW_KERNEL_BITS; - struct lowcore *abs_lc, *lc; unsigned long mcck_stack; - unsigned long flags; + struct lowcore *lc; if (IS_ENABLED(CONFIG_KASAN)) int_psw_mask |= PSW_MASK_DAT; @@ -475,13 +474,11 @@ static void __init setup_lowcore_dat_off(void) lc->restart_data = 0; lc->restart_source = -1U; - abs_lc = get_abs_lowcore(&flags); - abs_lc->restart_stack = lc->restart_stack; - abs_lc->restart_fn = lc->restart_fn; - abs_lc->restart_data = lc->restart_data; - abs_lc->restart_source = lc->restart_source; - abs_lc->restart_psw = lc->restart_psw; - put_abs_lowcore(abs_lc, flags); + put_abs_lowcore(restart_stack, lc->restart_stack); + put_abs_lowcore(restart_fn, lc->restart_fn); + put_abs_lowcore(restart_data, lc->restart_data); + put_abs_lowcore(restart_source, lc->restart_source); + put_abs_lowcore(restart_psw, lc->restart_psw); mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); if (!mcck_stack) @@ -502,8 +499,8 @@ static void __init setup_lowcore_dat_off(void) static void __init setup_lowcore_dat_on(void) { - struct lowcore *abs_lc; - unsigned long flags; + struct lowcore *lc = lowcore_ptr[0]; + int cr; __ctl_clear_bit(0, 28); S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT; @@ -512,15 +509,10 @@ static void __init setup_lowcore_dat_on(void) S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT; __ctl_set_bit(0, 28); __ctl_store(S390_lowcore.cregs_save_area, 0, 15); - abs_lc = get_abs_lowcore(&flags); - abs_lc->restart_flags = RESTART_FLAG_CTLREGS; - abs_lc->program_new_psw = S390_lowcore.program_new_psw; - memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area, - sizeof(abs_lc->cregs_save_area)); - put_abs_lowcore(abs_lc, flags); - if (abs_lowcore_map(0, lowcore_ptr[0])) - panic("Couldn't setup absolute lowcore"); - abs_lowcore_mapped = true; + put_abs_lowcore(restart_flags, RESTART_FLAG_CTLREGS); + put_abs_lowcore(program_new_psw, lc->program_new_psw); + for (cr = 0; cr < ARRAY_SIZE(lc->cregs_save_area); cr++) + put_abs_lowcore(cregs_save_area[cr], lc->cregs_save_area[cr]); } static struct resource code_resource = { diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 40876d809ea6..30c91d565933 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include #include #include @@ -212,14 +212,10 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->preempt_count = PREEMPT_DISABLED; if (nmi_alloc_mcesa(&lc->mcesad)) goto out; - if (abs_lowcore_map(cpu, lc)) - goto out_mcesa; lowcore_ptr[cpu] = lc; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc)); return 0; -out_mcesa: - nmi_free_mcesa(&lc->mcesad); out: stack_free(mcck_stack); stack_free(async_stack); @@ -241,7 +237,6 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[cpu] = NULL; - abs_lowcore_unmap(cpu); nmi_free_mcesa(&lc->mcesad); stack_free(async_stack); stack_free(mcck_stack); @@ -320,12 +315,9 @@ static void pcpu_delegate(struct pcpu *pcpu, pcpu_delegate_fn *func, void *data, unsigned long stack) { - struct lowcore *lc, *abs_lc; - unsigned int source_cpu; - unsigned long flags; + struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; + unsigned int source_cpu = stap(); - lc = lowcore_ptr[pcpu - pcpu_devices]; - source_cpu = stap(); __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); if (pcpu->address == source_cpu) { call_on_stack(2, stack, void, __pcpu_delegate, @@ -340,12 +332,10 @@ static void pcpu_delegate(struct pcpu *pcpu, lc->restart_data = (unsigned long)data; lc->restart_source = source_cpu; } else { - abs_lc = get_abs_lowcore(&flags); - abs_lc->restart_stack = stack; - abs_lc->restart_fn = (unsigned long)func; - abs_lc->restart_data = (unsigned long)data; - abs_lc->restart_source = source_cpu; - put_abs_lowcore(abs_lc, flags); + put_abs_lowcore(restart_stack, stack); + put_abs_lowcore(restart_fn, (unsigned long)func); + put_abs_lowcore(restart_data, (unsigned long)data); + put_abs_lowcore(restart_source, source_cpu); } __bpon(); asm volatile( @@ -591,8 +581,6 @@ static DEFINE_SPINLOCK(ctl_lock); void smp_ctl_set_clear_bit(int cr, int bit, bool set) { struct ec_creg_mask_parms parms = { .cr = cr, }; - struct lowcore *abs_lc; - unsigned long flags; u64 ctlreg; if (set) { @@ -603,11 +591,9 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set) parms.andval = ~(1UL << bit); } spin_lock(&ctl_lock); - abs_lc = get_abs_lowcore(&flags); - ctlreg = abs_lc->cregs_save_area[cr]; + get_abs_lowcore(ctlreg, cregs_save_area[cr]); ctlreg = (ctlreg & parms.andval) | parms.orval; - abs_lc->cregs_save_area[cr] = ctlreg; - put_abs_lowcore(abs_lc, flags); + put_abs_lowcore(cregs_save_area[cr], ctlreg); spin_unlock(&ctl_lock); on_each_cpu(smp_ctl_bit_callback, &parms, 1); } @@ -1295,8 +1281,6 @@ static int __init smp_reinit_ipl_cpu(void) __ctl_clear_bit(0, 28); /* disable lowcore protection */ S390_lowcore.mcesad = mcesad; __ctl_load(cr0, 0, 0); - if (abs_lowcore_map(0, lc)) - panic("Couldn't remap absolute lowcore"); lowcore_ptr[0] = lc; local_mcck_enable(); local_irq_restore(flags); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 7b6873ac99d1..6a0ac00d5a42 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index b8451ddbb3d6..d6d84e02f35a 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -15,7 +15,6 @@ #include #include #include -#include #include static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size) @@ -149,20 +148,46 @@ int memcpy_real(void *dest, unsigned long src, size_t count) } /* - * Find CPU that owns swapped prefix page + * Copy memory in absolute mode (kernel to kernel) */ -static int get_swapped_owner(phys_addr_t addr) +void memcpy_absolute(void *dest, void *src, size_t count) +{ + unsigned long cr0, flags, prefix; + + flags = arch_local_irq_save(); + __ctl_store(cr0, 0, 0); + __ctl_clear_bit(0, 28); /* disable lowcore protection */ + prefix = store_prefix(); + if (prefix) { + local_mcck_disable(); + set_prefix(0); + memcpy(dest, src, count); + set_prefix(prefix); + local_mcck_enable(); + } else { + memcpy(dest, src, count); + } + __ctl_load(cr0, 0, 0); + arch_local_irq_restore(flags); +} + +/* + * Check if physical address is within prefix or zero page + */ +static int is_swapped(phys_addr_t addr) { phys_addr_t lc; int cpu; + if (addr < sizeof(struct lowcore)) + return 1; for_each_online_cpu(cpu) { lc = virt_to_phys(lowcore_ptr[cpu]); if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc) continue; - return cpu; + return 1; } - return -1; + return 0; } /* @@ -175,35 +200,17 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) { void *ptr = phys_to_virt(addr); void *bounce = ptr; - struct lowcore *abs_lc; - unsigned long flags; unsigned long size; - int this_cpu, cpu; cpus_read_lock(); - this_cpu = get_cpu(); - if (addr >= sizeof(struct lowcore)) { - cpu = get_swapped_owner(addr); - if (cpu < 0) - goto out; - } - bounce = (void *)__get_free_page(GFP_ATOMIC); - if (!bounce) - goto out; - size = PAGE_SIZE - (addr & ~PAGE_MASK); - if (addr < sizeof(struct lowcore)) { - abs_lc = get_abs_lowcore(&flags); - ptr = (void *)abs_lc + addr; - memcpy(bounce, ptr, size); - put_abs_lowcore(abs_lc, flags); - } else if (cpu == this_cpu) { - ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu])); - memcpy(bounce, ptr, size); - } else { - memcpy(bounce, ptr, size); + preempt_disable(); + if (is_swapped(addr)) { + size = PAGE_SIZE - (addr & ~PAGE_MASK); + bounce = (void *) __get_free_page(GFP_ATOMIC); + if (bounce) + memcpy_absolute(bounce, ptr, size); } -out: - put_cpu(); + preempt_enable(); cpus_read_unlock(); return bounce; } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 203ba2bfea59..c2583f921ca8 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -560,91 +560,6 @@ int vmem_add_mapping(unsigned long start, unsigned long size) return ret; } -/* - * Allocate new or return existing page-table entry, but do not map it - * to any physical address. If missing, allocate segment- and region- - * table entries along. Meeting a large segment- or region-table entry - * while traversing is an error, since the function is expected to be - * called against virtual regions reserverd for 4KB mappings only. - */ -static pte_t *vmem_get_alloc_pte(unsigned long addr) -{ - pte_t *ptep = NULL; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pgd = pgd_offset_k(addr); - if (pgd_none(*pgd)) { - p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); - if (!p4d) - goto out; - pgd_populate(&init_mm, pgd, p4d); - } - p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) { - pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); - if (!pud) - goto out; - p4d_populate(&init_mm, p4d, pud); - } - pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { - pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); - if (!pmd) - goto out; - pud_populate(&init_mm, pud, pmd); - } else if (WARN_ON_ONCE(pud_large(*pud))) { - goto out; - } - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - pte = vmem_pte_alloc(); - if (!pte) - goto out; - pmd_populate(&init_mm, pmd, pte); - } else if (WARN_ON_ONCE(pmd_large(*pmd))) { - goto out; - } - ptep = pte_offset_kernel(pmd, addr); -out: - return ptep; -} - -int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot) -{ - pte_t *ptep, pte; - int rc = 0; - - if (!IS_ALIGNED(addr, PAGE_SIZE)) - return -EINVAL; - mutex_lock(&vmem_mutex); - ptep = vmem_get_alloc_pte(addr); - if (!ptep) { - rc = -ENOMEM; - goto out; - } - __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); - pte = mk_pte_phys(phys, prot); - set_pte(ptep, pte); -out: - mutex_unlock(&vmem_mutex); - return rc; -} - -void vmem_unmap_4k_page(unsigned long addr) -{ - pte_t *ptep; - - mutex_lock(&vmem_mutex); - ptep = virt_to_kpte(addr); - __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); - pte_clear(&init_mm, addr, ptep); - mutex_unlock(&vmem_mutex); -} - /* * map whole physical memory to virtual memory (identity mapping) * we reserve enough space in the vmalloc area for vmemmap to hotplug -- cgit v1.2.3