diff options
107 files changed, 1268 insertions, 1993 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4a2a12be04c9..a60cc523d810 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -53,8 +53,7 @@ config ARCH_SUPPORTS_UPROBES config KASAN_SHADOW_OFFSET hex depends on KASAN - default 0x18000000000000 if KASAN_S390_4_LEVEL_PAGING - default 0x30000000000 + default 0x18000000000000 config S390 def_bool y @@ -191,7 +190,6 @@ config S390 select PCI_DOMAINS if PCI select PCI_MSI if PCI select PCI_MSI_ARCH_FALLBACKS if PCI_MSI - select SET_FS select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK @@ -714,7 +712,7 @@ if PCI config PCI_NR_FUNCTIONS int "Maximum number of PCI functions (1-4096)" range 1 4096 - default "128" + default "512" help This allows you to specify the maximum number of PCI functions which this kernel will support. diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index ab48b694ade8..6bfaceebbbc0 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -5,3 +5,11 @@ config TRACE_IRQFLAGS_SUPPORT config EARLY_PRINTK def_bool y + +config DEBUG_USER_ASCE + bool "Debug User ASCE" + help + Check on exit to user space that address space control + elements are setup correctly. + + If unsure, say N. diff --git a/arch/s390/Makefile b/arch/s390/Makefile index ba94b03c8b2f..8db267d2a543 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -25,7 +25,7 @@ KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY -KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float +KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 2ea603f70c3b..8b50967f5804 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -2,20 +2,32 @@ #ifndef BOOT_BOOT_H #define BOOT_BOOT_H +#include <linux/types.h> + +#define BOOT_STACK_OFFSET 0x8000 + +#ifndef __ASSEMBLY__ + +#include <linux/compiler.h> + void startup_kernel(void); -void detect_memory(void); +unsigned long detect_memory(void); +bool is_ipl_block_dump(void); void store_ipl_parmblock(void); void setup_boot_command_line(void); void parse_boot_command_line(void); -void setup_memory_end(void); void verify_facilities(void); void print_missing_facilities(void); void print_pgm_check_info(void); unsigned long get_random_base(unsigned long safe_addr); +void __printf(1, 2) decompressor_printk(const char *fmt, ...); -extern int kaslr_enabled; extern const char kernel_version[]; +extern unsigned long memory_limit; +extern int vmalloc_size_set; +extern int kaslr_enabled; unsigned long read_ipl_report(unsigned long safe_offset); +#endif /* __ASSEMBLY__ */ #endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore index 765a08f1bd77..01d93832cf4a 100644 --- a/arch/s390/boot/compressed/.gitignore +++ b/arch/s390/boot/compressed/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only vmlinux vmlinux.lds +vmlinux.syms diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index b235ed95a3d8..de18dab518bb 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -10,21 +10,39 @@ GCOV_PROFILE := n UBSAN_SANITIZE := n KASAN_SANITIZE := n -obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) piggy.o info.o +obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o +obj-all := $(obj-y) piggy.o syms.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 -targets += info.bin $(obj-y) +targets += info.bin syms.bin vmlinux.syms $(obj-all) KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) OBJCOPYFLAGS := OBJECTS := $(addprefix $(obj)/,$(obj-y)) +OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all)) -LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T -$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE +LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup --build-id=sha1 -T +$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS_ALL) FORCE $(call if_changed,ld) +LDFLAGS_vmlinux.syms := --oformat $(LD_BFD) -e startup -T +$(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE + $(call if_changed,ld) + +quiet_cmd_dumpsyms = DUMPSYMS $< +define cmd_dumpsyms + $(NM) -n -S --format=bsd "$<" | $(PERL) -ne '/(\w+)\s+(\w+)\s+[tT]\s+(\w+)/ and printf "%x %x %s\0",hex $$1,hex $$2,$$3' > "$@" +endef + +$(obj)/syms.bin: $(obj)/vmlinux.syms FORCE + $(call if_changed,dumpsyms) + +OBJCOPYFLAGS_syms.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.decompressor.syms +$(obj)/syms.o: $(obj)/syms.bin FORCE + $(call if_changed,objcopy) + OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load $(obj)/info.bin: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h index c15eb7114d83..41f0ad97a4db 100644 --- a/arch/s390/boot/compressed/decompressor.h +++ b/arch/s390/boot/compressed/decompressor.h @@ -2,8 +2,10 @@ #ifndef BOOT_COMPRESSED_DECOMPRESSOR_H #define BOOT_COMPRESSED_DECOMPRESSOR_H +#include <linux/stddef.h> + #ifdef CONFIG_KERNEL_UNCOMPRESSED -static inline void *decompress_kernel(void) {} +static inline void *decompress_kernel(void) { return NULL; } #else void *decompress_kernel(void); #endif diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index 9427e2cd0c15..27a09c1c78f6 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -27,6 +27,7 @@ SECTIONS *(.rodata.*) _erodata = . ; } + NOTES .data : { _data = . ; *(.data) @@ -82,6 +83,14 @@ SECTIONS *(.vmlinux.info) } + .decompressor.syms : { + . += 1; /* make sure we have \0 before the first entry */ + . = ALIGN(2); + _decompressor_syms_start = .; + *(.decompressor.syms) + _decompressor_syms_end = .; + } + #ifdef CONFIG_KERNEL_UNCOMPRESSED . = 0x100000; #else diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 1a2c2b1ed964..dacb7813f982 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -28,6 +28,7 @@ #include <asm/thread_info.h> #include <asm/page.h> #include <asm/ptrace.h> +#include "boot.h" #define ARCH_OFFSET 4 @@ -62,8 +63,12 @@ __HEAD .org __LC_RST_NEW_PSW # 0x1a0 .quad 0,iplstart + .org __LC_EXT_NEW_PSW # 0x1b0 + .quad 0x0002000180000000,0x1b0 # disabled wait .org __LC_PGM_NEW_PSW # 0x1d0 .quad 0x0000000180000000,startup_pgm_check_handler + .org __LC_IO_NEW_PSW # 0x1f0 + .quad 0x0002000180000000,0x1f0 # disabled wait .org 0x200 @@ -275,8 +280,8 @@ iplstart: # or linload or SALIPL # .org 0x10000 -ENTRY(startup) - j .Lep_startup_normal +SYM_CODE_START(startup) + j startup_normal .org EP_OFFSET # # This is a list of s390 kernel entry points. At address 0x1000f the number of @@ -290,9 +295,9 @@ ENTRY(startup) # kdump startup-code at 0x10010, running in 64 bit absolute addressing mode # .org 0x10010 -ENTRY(startup_kdump) - j .Lep_startup_kdump -.Lep_startup_normal: + j startup_kdump +SYM_CODE_END(startup) +SYM_CODE_START_LOCAL(startup_normal) mvi __LC_AR_MODE_ID,1 # set esame flag slr %r0,%r0 # set cpuid to zero lhi %r1,2 # mode 2 = esame (dump) @@ -303,6 +308,9 @@ ENTRY(startup_kdump) sam64 # switch to 64 bit addressing mode basr %r13,0 # get base .LPG0: + mvc __LC_EXT_NEW_PSW(16),.Lext_new_psw-.LPG0(%r13) + mvc __LC_PGM_NEW_PSW(16),.Lpgm_new_psw-.LPG0(%r13) + mvc __LC_IO_NEW_PSW(16),.Lio_new_psw-.LPG0(%r13) xc 0x200(256),0x200 # partially clear lowcore xc 0x300(256),0x300 xc 0xe00(256),0xe00 @@ -315,12 +323,18 @@ ENTRY(startup_kdump) l %r15,.Lstack-.LPG0(%r13) brasl %r14,verify_facilities brasl %r14,startup_kernel +SYM_CODE_END(startup_normal) .Lstack: - .long 0x8000 + (1<<(PAGE_SHIFT+BOOT_STACK_ORDER)) - STACK_FRAME_OVERHEAD + .long BOOT_STACK_OFFSET + BOOT_STACK_SIZE - STACK_FRAME_OVERHEAD .align 8 6: .long 0x7fffffff,0xffffffff - +.Lext_new_psw: + .quad 0x0002000180000000,0x1b0 # disabled wait +.Lpgm_new_psw: + .quad 0x0000000180000000,startup_pgm_check_handler +.Lio_new_psw: + .quad 0x0002000180000000,0x1f0 # disabled wait .Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space .quad 0 # cr1: primary space segment table .quad .Lduct # cr2: dispatchable unit control table @@ -359,7 +373,7 @@ ENTRY(startup_kdump) # It simply saves general/control registers and psw in # the save area and does disabled wait with a faulty address. # -ENTRY(startup_pgm_check_handler) +SYM_CODE_START_LOCAL(startup_pgm_check_handler) stmg %r8,%r15,__LC_SAVE_AREA_SYNC la %r8,4095 stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8) @@ -378,9 +392,9 @@ ENTRY(startup_pgm_check_handler) la %r8,4095 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8) lpswe __LC_RETURN_PSW # disabled wait +SYM_CODE_END(startup_pgm_check_handler) .Ldump_info_stack: .long 0x5000 + PAGE_SIZE - STACK_FRAME_OVERHEAD -ENDPROC(startup_pgm_check_handler) # # params at 10400 (setup.h) diff --git a/arch/s390/boot/head_kdump.S b/arch/s390/boot/head_kdump.S index 174d6959bf5b..f015469e7db9 100644 --- a/arch/s390/boot/head_kdump.S +++ b/arch/s390/boot/head_kdump.S @@ -19,8 +19,7 @@ # Note: This code has to be position independent # -.align 2 -.Lep_startup_kdump: +SYM_CODE_START_LOCAL(startup_kdump) lhi %r1,2 # mode 2 = esame (dump) sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to esame mode sam64 # Switch to 64 bit addressing @@ -87,14 +86,15 @@ startup_kdump_relocated: basr %r13,0 0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel... +SYM_CODE_END(startup_kdump) .align 8 .Lrestart_psw: .quad 0x0000000080000000,0x0000000000000000 + startup #else -.align 2 -.Lep_startup_kdump: +SYM_CODE_START_LOCAL(startup_kdump) larl %r13,startup_kdump_crash lpswe 0(%r13) +SYM_CODE_END(startup_kdump) .align 8 startup_kdump_crash: .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index f94b91d72620..d372a45fe10e 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -17,10 +17,10 @@ int __bootdata_preserved(ipl_block_valid); unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL; unsigned long __bootdata(vmalloc_size) = VMALLOC_DEFAULT_SIZE; -unsigned long __bootdata(memory_end); -int __bootdata(memory_end_set); int __bootdata(noexec_disabled); +unsigned long memory_limit; +int vmalloc_size_set; int kaslr_enabled; static inline int __diag308(unsigned long subcode, void *addr) @@ -57,6 +57,17 @@ void store_ipl_parmblock(void) ipl_block_valid = 1; } +bool is_ipl_block_dump(void) +{ + if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP && + ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) + return true; + if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME && + ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP) + return true; + return false; +} + static size_t scpdata_length(const u8 *buf, size_t count) { while (count) { @@ -237,13 +248,13 @@ void parse_boot_command_line(void) while (*args) { args = next_arg(args, ¶m, &val); - if (!strcmp(param, "mem") && val) { - memory_end = round_down(memparse(val, NULL), PAGE_SIZE); - memory_end_set = 1; - } + if (!strcmp(param, "mem") && val) + memory_limit = round_down(memparse(val, NULL), PAGE_SIZE); - if (!strcmp(param, "vmalloc") && val) + if (!strcmp(param, "vmalloc") && val) { vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE); + vmalloc_size_set = 1; + } if (!strcmp(param, "dfltcc") && val) { if (!strcmp(val, "off")) @@ -279,27 +290,3 @@ void parse_boot_command_line(void) #endif } } - -static inline bool is_ipl_block_dump(void) -{ - if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP && - ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) - return true; - if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME && - ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP) - return true; - return false; -} - -void setup_memory_end(void) -{ -#ifdef CONFIG_CRASH_DUMP - if (OLDMEM_BASE) { - kaslr_enabled = 0; - } else if (ipl_block_valid && is_ipl_block_dump()) { - kaslr_enabled = 0; - if (!sclp_early_get_hsa_size(&memory_end) && memory_end) - memory_end_set = 1; - } -#endif -} diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index d844a5ef9089..0dd48fbdbaa4 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -7,6 +7,7 @@ #include <asm/cpacf.h> #include <asm/timex.h> #include <asm/sclp.h> +#include <asm/kasan.h> #include "compressed/decompressor.h" #include "boot.h" @@ -176,8 +177,14 @@ unsigned long get_random_base(unsigned long safe_addr) unsigned long kasan_needs; int i; - if (memory_end_set) - memory_limit = min(memory_limit, memory_end); + memory_limit = min(memory_limit, ident_map_size); + + /* + * Avoid putting kernel in the end of physical memory + * which kasan will use for shadow memory and early pgtable + * mapping allocations. + */ + memory_limit -= kasan_estimate_memory_needs(memory_limit); if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE) { if (safe_addr < INITRD_START + INITRD_SIZE) @@ -185,28 +192,6 @@ unsigned long get_random_base(unsigned long safe_addr) } safe_addr = ALIGN(safe_addr, THREAD_SIZE); - if ((IS_ENABLED(CONFIG_KASAN))) { - /* - * Estimate kasan memory requirements, which it will reserve - * at the very end of available physical memory. To estimate - * that, we take into account that kasan would require - * 1/8 of available physical memory (for shadow memory) + - * creating page tables for the whole memory + shadow memory - * region (1 + 1/8). To keep page tables estimates simple take - * the double of combined ptes size. - */ - memory_limit = get_mem_detect_end(); - if (memory_end_set && memory_limit > memory_end) - memory_limit = memory_end; - - /* for shadow memory */ - kasan_needs = memory_limit / 8; - /* for paging structures */ - kasan_needs += (memory_limit + kasan_needs) / PAGE_SIZE / - _PAGE_ENTRIES * _PAGE_TABLE_SIZE * 2; - memory_limit -= kasan_needs; - } - kernel_size = vmlinux.image_size + vmlinux.bss_size; if (safe_addr + kernel_size > memory_limit) return 0; diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c index 62e7c13ce85c..40168e59abd3 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/mem_detect.c @@ -8,7 +8,6 @@ #include "compressed/decompressor.h" #include "boot.h" -unsigned long __bootdata(max_physmem_end); struct mem_detect_info __bootdata(mem_detect); /* up to 256 storage elements, 1020 subincrements each */ @@ -149,27 +148,29 @@ static void search_mem_end(void) add_mem_detect_block(0, (offset + 1) << 20); } -void detect_memory(void) +unsigned long detect_memory(void) { + unsigned long max_physmem_end; + sclp_early_get_memsize(&max_physmem_end); if (!sclp_early_read_storage_info()) { mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO; - return; + return max_physmem_end; } if (!diag260()) { mem_detect.info_source = MEM_DETECT_DIAG260; - return; + return max_physmem_end; } if (max_physmem_end) { add_mem_detect_block(0, max_physmem_end); mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO; - return; + return max_physmem_end; } search_mem_end(); mem_detect.info_source = MEM_DETECT_BIN_SEARCH; - max_physmem_end = get_mem_detect_end(); + return get_mem_detect_end(); } diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index a3c9862bcede..3a46abed2549 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -1,99 +1,181 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/string.h> +#include <linux/ctype.h> +#include <asm/stacktrace.h> +#include <asm/boot_data.h> #include <asm/lowcore.h> #include <asm/setup.h> #include <asm/sclp.h> +#include <asm/uv.h> +#include <stdarg.h> #include "boot.h" const char hex_asc[] = "0123456789abcdef"; -#define add_val_as_hex(dst, val) \ - __add_val_as_hex(dst, (const unsigned char *)&val, sizeof(val)) +static char *as_hex(char *dst, unsigned long val, int pad) +{ + char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); + + for (*p-- = 0; p >= dst; val >>= 4) + *p-- = hex_asc[val & 0x0f]; + return end; +} -static char *__add_val_as_hex(char *dst, const unsigned char *src, size_t count) +static char *symstart(char *p) { - while (count--) - dst = hex_byte_pack(dst, *src++); - return dst; + while (*p) + p--; + return p + 1; } -static char *add_str(char *dst, char *src) +extern char _decompressor_syms_start[], _decompressor_syms_end[]; +static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len) { - strcpy(dst, src); - return dst + strlen(dst); + /* symbol entries are in a form "10000 c4 startup\0" */ + char *a = _decompressor_syms_start; + char *b = _decompressor_syms_end; + unsigned long start; + unsigned long size; + char *pivot; + char *endp; + + while (a < b) { + pivot = symstart(a + (b - a) / 2); + start = simple_strtoull(pivot, &endp, 16); + size = simple_strtoull(endp + 1, &endp, 16); + if (ip < start) { + b = pivot; + continue; + } + if (ip > start + size) { + a = pivot + strlen(pivot) + 1; + continue; + } + *off = ip - start; + *len = size; + return endp + 1; + } + return NULL; } -void print_pgm_check_info(void) +static noinline char *strsym(void *ip) { - struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area); - unsigned short ilc = S390_lowcore.pgm_ilc >> 1; - char buf[256]; - int row, col; + static char buf[64]; + unsigned short off; + unsigned short len; char *p; - add_str(buf, "Linux version "); - strlcat(buf, kernel_version, sizeof(buf) - 1); - strlcat(buf, "\n", sizeof(buf)); - sclp_early_printk(buf); + p = findsym((unsigned long)ip, &off, &len); + if (p) { + strncpy(buf, p, sizeof(buf)); + /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */ + p = buf + strnlen(buf, sizeof(buf) - 15); + strcpy(p, "+0x"); + p = as_hex(p + 3, off, 0); + strcpy(p, "/0x"); + as_hex(p + 3, len, 0); + } else { + as_hex(buf, (unsigned long)ip, 16); + } + return buf; +} - p = add_str(buf, "Kernel fault: interruption code "); - p = add_val_as_hex(buf + strlen(buf), S390_lowcore.pgm_code); - p = add_str(p, " ilc:"); - *p++ = hex_asc_lo(ilc); - add_str(p, "\n"); - sclp_early_printk(buf); +void decompressor_printk(const char *fmt, ...) +{ + char buf[1024] = { 0 }; + char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */ + unsigned long pad; + char *p = buf; + va_list args; - if (kaslr_enabled) { - p = add_str(buf, "Kernel random base: "); - p = add_val_as_hex(p, __kaslr_offset); - add_str(p, "\n"); - sclp_early_printk(buf); + va_start(args, fmt); + for (; p < end && *fmt; fmt++) { + if (*fmt != '%') { + *p++ = *fmt; + continue; + } + pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0; + switch (*fmt) { + case 's': + p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf)); + break; + case 'p': + if (*++fmt != 'S') + goto out; + p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf)); + break; + case 'l': + if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad)) + goto out; + p = as_hex(p, va_arg(args, unsigned long), pad); + break; + case 'x': + if (end - p <= max(sizeof(int) * 2, pad)) + goto out; + p = as_hex(p, va_arg(args, unsigned int), pad); + break; + default: + goto out; + } } - - p = add_str(buf, "PSW : "); - p = add_val_as_hex(p, S390_lowcore.psw_save_area.mask); - p = add_str(p, " "); - p = add_val_as_hex(p, S390_lowcore.psw_save_area.addr); - add_str(p, "\n"); +out: + va_end(args); sclp_early_printk(buf); +} - p = add_str(buf, " R:"); - *p++ = hex_asc_lo(psw->per); - p = add_str(p, " T:"); - *p++ = hex_asc_lo(psw->dat); - p = add_str(p, " IO:"); - *p++ = hex_asc_lo(psw->io); - p = add_str(p, " EX:"); - *p++ = hex_asc_lo(psw->ext); - p = add_str(p, " Key:"); - *p++ = hex_asc_lo(psw->key); - p = add_str(p, " M:"); - *p++ = hex_asc_lo(psw->mcheck); - p = add_str(p, " W:"); - *p++ = hex_asc_lo(psw->wait); - p = add_str(p, " P:"); - *p++ = hex_asc_lo(psw->pstate); - p = add_str(p, " AS:"); - *p++ = hex_asc_lo(psw->as); - p = add_str(p, " CC:"); - *p++ = hex_asc_lo(psw->cc); - p = add_str(p, " PM:"); - *p++ = hex_asc_lo(psw->pm); - p = add_str(p, " RI:"); - *p++ = hex_asc_lo(psw->ri); - p = add_str(p, " EA:"); - *p++ = hex_asc_lo(psw->eaba); - add_str(p, "\n"); - sclp_early_printk(buf); +static noinline void print_stacktrace(void) +{ + struct stack_info boot_stack = { STACK_TYPE_TASK, BOOT_STACK_OFFSET, + BOOT_STACK_OFFSET + BOOT_STACK_SIZE }; + unsigned long sp = S390_lowcore.gpregs_save_area[15]; + bool first = true; - for (row = 0; row < 4; row++) { - p = add_str(buf, row == 0 ? "GPRS:" : " "); - for (col = 0; col < 4; col++) { - p = add_str(p, " "); - p = add_val_as_hex(p, S390_lowcore.gpregs_save_area[row * 4 + col]); - } - add_str(p, "\n"); - sclp_early_printk(buf); + decompressor_printk("Call Trace:\n"); + while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) { + struct stack_frame *sf = (struct stack_frame *)sp; + + decompressor_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" : + " sp:%016lx [<%016lx>] %pS\n", + sp, sf->gprs[8], (void *)sf->gprs[8]); + if (sf->back_chain <= sp) + break; + sp = sf->back_chain; + first = false; } } + +void print_pgm_check_info(void) +{ + unsigned long *gpregs = (unsigned long *)S390_lowcore.gpregs_save_area; + struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area); + + decompressor_printk("Linux version %s\n", kernel_version); + if (!is_prot_virt_guest() && early_command_line[0]) + decompressor_printk("Kernel command line: %s\n", early_command_line); + decompressor_printk("Kernel fault: interruption code %04x ilc:%x\n", + S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1); + if (kaslr_enabled) + decompressor_printk("Kernel random base: %lx\n", __kaslr_offset); + decompressor_printk("PSW : %016lx %016lx (%pS)\n", + S390_lowcore.psw_save_area.mask, + S390_lowcore.psw_save_area.addr, + (void *)S390_lowcore.psw_save_area.addr); + decompressor_printk( + " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", + psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, + psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, + psw->eaba); + decompressor_printk("GPRS: %016lx %016lx %016lx %016lx\n", + gpregs[0], gpregs[1], gpregs[2], gpregs[3]); + decompressor_printk(" %016lx %016lx %016lx %016lx\n", + gpregs[4], gpregs[5], gpregs[6], gpregs[7]); + decompressor_printk(" %016lx %016lx %016lx %016lx\n", + gpregs[8], gpregs[9], gpregs[10], gpregs[11]); + decompressor_printk(" %016lx %016lx %016lx %016lx\n", + gpregs[12], gpregs[13], gpregs[14], gpregs[15]); + print_stacktrace(); + decompressor_printk("Last Breaking-Event-Address:\n"); + decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.breaking_event_addr, + (void *)S390_lowcore.breaking_event_addr); +} diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index cc96b04cc0ba..05f8eefa3dcf 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/string.h> #include <linux/elf.h> +#include <asm/boot_data.h> #include <asm/sections.h> +#include <asm/cpu_mf.h> #include <asm/setup.h> #include <asm/kexec.h> #include <asm/sclp.h> @@ -13,6 +15,7 @@ extern char __boot_data_start[], __boot_data_end[]; extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; unsigned long __bootdata_preserved(__kaslr_offset); +unsigned long __bootdata(ident_map_size); /* * Some code and data needs to stay below 2 GB, even when the kernel would be @@ -58,6 +61,14 @@ void error(char *x) disabled_wait(); } +static void setup_lpp(void) +{ + S390_lowcore.current_pid = 0; + S390_lowcore.lpp = LPP_MAGIC; + if (test_facility(40)) + lpp(&S390_lowcore.lpp); +} + #ifdef CONFIG_KERNEL_UNCOMPRESSED unsigned long mem_safe_offset(void) { @@ -119,6 +130,46 @@ static void handle_relocs(unsigned long offset) } /* + * Merge information from several sources into a single ident_map_size value. + * "ident_map_size" represents the upper limit of physical memory we may ever + * reach. It might not be all online memory, but also include standby (offline) + * memory. "ident_map_size" could be lower then actual standby or even online + * memory present, due to limiting factors. We should never go above this limit. + * It is the size of our identity mapping. + * + * Consider the following factors: + * 1. max_physmem_end - end of physical memory online or standby. + * Always <= end of the last online memory block (get_mem_detect_end()). + * 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the + * kernel is able to support. + * 3. "mem=" kernel command line option which limits physical memory usage. + * 4. OLDMEM_BASE which is a kdump memory limit when the kernel is executed as + * crash kernel. + * 5. "hsa" size which is a memory limit when the kernel is executed during + * zfcp/nvme dump. + */ +static void setup_ident_map_size(unsigned long max_physmem_end) +{ + unsigned long hsa_size; + + ident_map_size = max_physmem_end; + if (memory_limit) + ident_map_size = min(ident_map_size, memory_limit); + ident_map_size = min(ident_map_size, 1UL << MAX_PHYSMEM_BITS); + +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) { + kaslr_enabled = 0; + ident_map_size = min(ident_map_size, OLDMEM_SIZE); + } else if (ipl_block_valid && is_ipl_block_dump()) { + kaslr_enabled = 0; + if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) + ident_map_size = min(ident_map_size, hsa_size); + } +#endif +} + +/* * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's. */ static void clear_bss_section(void) @@ -126,12 +177,27 @@ static void clear_bss_section(void) memset((void *)vmlinux.default_lma + vmlinux.image_size, 0, vmlinux.bss_size); } +/* + * Set vmalloc area size to an 8th of (potential) physical memory + * size, unless size has been set by kernel command line parameter. + */ +static void setup_vmalloc_size(void) +{ + unsigned long size; + + if (vmalloc_size_set) + return; + size = round_up(ident_map_size / 8, _SEGMENT_SIZE); + vmalloc_size = max(size, vmalloc_size); +} + void startup_kernel(void) { unsigned long random_lma; unsigned long safe_addr; void *img; + setup_lpp(); store_ipl_parmblock(); safe_addr = mem_safe_offset(); safe_addr = read_ipl_report(safe_addr); @@ -140,8 +206,8 @@ void startup_kernel(void) sclp_early_read_info(); setup_boot_command_line(); parse_boot_command_line(); - setup_memory_end(); - detect_memory(); + setup_ident_map_size(detect_memory()); + setup_vmalloc_size(); random_lma = __kaslr_offset = 0; if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index fe6f529ac82c..c52113a238b1 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -826,6 +826,7 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y +CONFIG_DEBUG_USER_ASCE=y CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index dd95cdbd22ce..7b947728d57e 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -2,7 +2,7 @@ /* * s390 arch random implementation. * - * Copyright IBM Corp. 2017, 2018 + * Copyright IBM Corp. 2017, 2020 * Author(s): Harald Freudenberger * * The s390_arch_random_generate() function may be called from random.c @@ -33,6 +33,7 @@ #include <linux/slab.h> #include <linux/static_key.h> #include <linux/workqueue.h> +#include <linux/moduleparam.h> #include <asm/cpacf.h> DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); @@ -99,6 +100,113 @@ static void arch_rng_refill_buffer(struct work_struct *unused) queue_delayed_work(system_long_wq, &arch_rng_work, delay); } +/* + * Here follows the implementation of s390_arch_get_random_long(). + * + * The random longs to be pulled by arch_get_random_long() are + * prepared in an 4K buffer which is filled from the NIST 800-90 + * compliant s390 drbg. By default the random long buffer is refilled + * 256 times before the drbg itself needs a reseed. The reseed of the + * drbg is done with 32 bytes fetched from the high quality (but slow) + * trng which is assumed to deliver 100% entropy. So the 32 * 8 = 256 + * bits of entropy are spread over 256 * 4KB = 1MB serving 131072 + * arch_get_random_long() invocations before reseeded. + * + * How often the 4K random long buffer is refilled with the drbg + * before the drbg is reseeded can be adjusted. There is a module + * parameter 's390_arch_rnd_long_drbg_reseed' accessible via + * /sys/module/arch_random/parameters/rndlong_drbg_reseed + * or as kernel command line parameter + * arch_random.rndlong_drbg_reseed=<value> + * This parameter tells how often the drbg fills the 4K buffer before + * it is re-seeded by fresh entropy from the trng. + * A value of 16 results in reseeding the drbg at every 16 * 4 KB = 64 + * KB with 32 bytes of fresh entropy pulled from the trng. So a value + * of 16 would result in 256 bits entropy per 64 KB. + * A value of 256 results in 1MB of drbg output before a reseed of the + * drbg is done. So this would spread the 256 bits of entropy among 1MB. + * Setting this parameter to 0 forces the reseed to take place every + * time the 4K buffer is depleted, so the entropy rises to 256 bits + * entropy per 4K or 0.5 bit entropy per arch_get_random_long(). With + * setting this parameter to negative values all this effort is + * disabled, arch_get_random long() returns false and thus indicating + * that the arch_get_random_long() feature is disabled at all. + */ + +static unsigned long rndlong_buf[512]; +static DEFINE_SPINLOCK(rndlong_lock); +static int rndlong_buf_index; + +static int rndlong_drbg_reseed = 256; +module_param_named(rndlong_drbg_reseed, rndlong_drbg_reseed, int, 0600); +MODULE_PARM_DESC(rndlong_drbg_reseed, "s390 arch_get_random_long() drbg reseed"); + +static inline void refill_rndlong_buf(void) +{ + static u8 prng_ws[240]; + static int drbg_counter; + + if (--drbg_counter < 0) { + /* need to re-seed the drbg */ + u8 seed[32]; + + /* fetch seed from trng */ + cpacf_trng(NULL, 0, seed, sizeof(seed)); + /* seed drbg */ + memset(prng_ws, 0, sizeof(prng_ws)); + cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, + &prng_ws, NULL, 0, seed, sizeof(seed)); + /* re-init counter for drbg */ + drbg_counter = rndlong_drbg_reseed; + } + + /* fill the arch_get_random_long buffer from drbg */ + cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &prng_ws, + (u8 *) rndlong_buf, sizeof(rndlong_buf), + NULL, 0); +} + +bool s390_arch_get_random_long(unsigned long *v) +{ + bool rc = false; + unsigned long flags; + + /* arch_get_random_long() disabled ? */ + if (rndlong_drbg_reseed < 0) + return false; + + /* try to lock the random long lock */ + if (!spin_trylock_irqsave(&rndlong_lock, flags)) + return false; + + if (--rndlong_buf_index >= 0) { + /* deliver next long value from the buffer */ + *v = rndlong_buf[rndlong_buf_index]; + rc = true; + goto out; + } + + /* buffer is depleted and needs refill */ + if (in_interrupt()) { + /* delay refill in interrupt context to next caller */ + rndlong_buf_index = 0; + goto out; + } + + /* refill random long buffer */ + refill_rndlong_buf(); + rndlong_buf_index = ARRAY_SIZE(rndlong_buf); + + /* and provide one random long */ + *v = rndlong_buf[--rndlong_buf_index]; + rc = true; + +out: + spin_unlock_irqrestore(&rndlong_lock, flags); + return rc; +} +EXPORT_SYMBOL(s390_arch_get_random_long); + static int __init s390_arch_random_init(void) { /* all the needed PRNO subfunctions available ? */ diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 5057773f82e9..b2f219ec379c 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -674,20 +674,6 @@ static const struct file_operations prng_tdes_fops = { .llseek = noop_llseek, }; -static struct miscdevice prng_sha512_dev = { - .name = "prandom", - .minor = MISC_DYNAMIC_MINOR, - .mode = 0644, - .fops = &prng_sha512_fops, -}; -static struct miscdevice prng_tdes_dev = { - .name = "prandom", - .minor = MISC_DYNAMIC_MINOR, - .mode = 0644, - .fops = &prng_tdes_fops, -}; - - /* chunksize attribute (ro) */ static ssize_t prng_chunksize_show(struct device *dev, struct device_attribute *attr, @@ -801,18 +787,30 @@ static struct attribute *prng_sha512_dev_attrs[] = { &dev_attr_strength.attr, NULL }; +ATTRIBUTE_GROUPS(prng_sha512_dev); + static struct attribute *prng_tdes_dev_attrs[] = { &dev_attr_chunksize.attr, &dev_attr_byte_counter.attr, &dev_attr_mode.attr, NULL }; +ATTRIBUTE_GROUPS(prng_tdes_dev); -static struct attribute_group prng_sha512_dev_attr_group = { - .attrs = prng_sha512_dev_attrs +static struct miscdevice prng_sha512_dev = { + .name = "prandom", + .minor = MISC_DYNAMIC_MINOR, + .mode = 0644, + .fops = &prng_sha512_fops, + .groups = prng_sha512_dev_groups, }; -static struct attribute_group prng_tdes_dev_attr_group = { - .attrs = prng_tdes_dev_attrs + +static struct miscdevice prng_tdes_dev = { + .name = "prandom", + .minor = MISC_DYNAMIC_MINOR, + .mode = 0644, + .fops = &prng_tdes_fops, + .groups = prng_tdes_dev_groups, }; @@ -867,13 +865,6 @@ static int __init prng_init(void) prng_sha512_deinstantiate(); goto out; } - ret = sysfs_create_group(&prng_sha512_dev.this_device->kobj, - &prng_sha512_dev_attr_group); - if (ret) { - misc_deregister(&prng_sha512_dev); - prng_sha512_deinstantiate(); - goto out; - } } else { @@ -898,14 +889,6 @@ static int __init prng_init(void) prng_tdes_deinstantiate(); goto out; } - ret = sysfs_create_group(&prng_tdes_dev.this_device->kobj, - &prng_tdes_dev_attr_group); - if (ret) { - misc_deregister(&prng_tdes_dev); - prng_tdes_deinstantiate(); - goto out; - } - } out: @@ -916,13 +899,9 @@ out: static void __exit prng_exit(void) { if (prng_mode == PRNG_MODE_SHA512) { - sysfs_remove_group(&prng_sha512_dev.this_device->kobj, - &prng_sha512_dev_attr_group); misc_deregister(&prng_sha512_dev); prng_sha512_deinstantiate(); } else { - sysfs_remove_group(&prng_tdes_dev.this_device->kobj, - &prng_tdes_dev_attr_group); misc_deregister(&prng_tdes_dev); prng_tdes_deinstantiate(); } diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index de61ce562052..5dc712fde3c7 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -2,7 +2,7 @@ /* * Kernel interface for the s390 arch_random_* functions * - * Copyright IBM Corp. 2017 + * Copyright IBM Corp. 2017, 2020 * * Author: Harald Freudenberger <freude@de.ibm.com> * @@ -19,10 +19,13 @@ DECLARE_STATIC_KEY_FALSE(s390_arch_random_available); extern atomic64_t s390_arch_random_counter; +bool s390_arch_get_random_long(unsigned long *v); bool s390_arch_random_generate(u8 *buf, unsigned int nbytes); static inline bool __must_check arch_get_random_long(unsigned long *v) { + if (static_branch_likely(&s390_arch_random_available)) + return s390_arch_get_random_long(v); return false; } diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index c0be5fe1ddba..e3e2ab0acf83 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -115,7 +115,7 @@ enum uc_todo { }; /** - * struct ccw driver - device driver for channel attached devices + * struct ccw_driver - device driver for channel attached devices * @ids: ids supported by this driver * @probe: function called on probe * @remove: function called on remove @@ -124,11 +124,6 @@ enum uc_todo { * @notify: notify driver of device state changes * @path_event: notify driver of channel path events * @shutdown: called at device shutdown - * @prepare: prepare for pm state transition - * @complete: undo work done in @prepare - * @freeze: callback for freezing during hibernation snapshotting - * @thaw: undo work done in @freeze - * @restore: callback for restoring after hibernation * @uc_handler: callback for unit check handler * @driver: embedded device driver structure * @int_class: interruption class to use for accounting interrupts @@ -142,11 +137,6 @@ struct ccw_driver { int (*notify) (struct ccw_device *, int); void (*path_event) (struct ccw_device *, int *); void (*shutdown) (struct ccw_device *); - int (*prepare) (struct ccw_device *); - void (*complete) (struct ccw_device *); - int (*freeze)(struct ccw_device *); - int (*thaw) (struct ccw_device *); - int (*restore)(struct ccw_device *); enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *); struct device_driver driver; enum interruption_class int_class; diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 5c58756d6476..23dceb8d0453 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -329,7 +329,7 @@ struct ccw_dev_id { }; /** - * ccw_device_id_is_equal() - compare two ccw_dev_ids + * ccw_dev_id_is_equal() - compare two ccw_dev_ids * @dev_id1: a ccw_dev_id * @dev_id2: another ccw_dev_id * Returns: diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h index 898323fd93d2..4a08379cd1eb 100644 --- a/arch/s390/include/asm/delay.h +++ b/arch/s390/include/asm/delay.h @@ -13,6 +13,7 @@ #ifndef _S390_DELAY_H #define _S390_DELAY_H +void udelay_enable(void); void __ndelay(unsigned long long nsecs); void __udelay(unsigned long long usecs); void udelay_simple(unsigned long long usecs); diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 68d362f8d6c1..695c61989f97 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -2,16 +2,9 @@ #ifndef _ASM_S390_FTRACE_H #define _ASM_S390_FTRACE_H +#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #define ARCH_SUPPORTS_FTRACE_OPS 1 - -#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) #define MCOUNT_INSN_SIZE 6 -#else -#define MCOUNT_INSN_SIZE 24 -#define MCOUNT_RETURN_FIXUP 18 -#endif - -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #ifndef __ASSEMBLY__ @@ -22,7 +15,6 @@ #define ftrace_return_address(n) __builtin_return_address(n) #endif -void _mcount(void); void ftrace_caller(void); extern char ftrace_graph_caller_end; @@ -30,12 +22,20 @@ extern unsigned long ftrace_plt; struct dyn_arch_ftrace { }; -#define MCOUNT_ADDR ((unsigned long)_mcount) +#define MCOUNT_ADDR 0 #define FTRACE_ADDR ((unsigned long)ftrace_caller) #define KPROBE_ON_FTRACE_NOP 0 #define KPROBE_ON_FTRACE_CALL 1 +struct module; +struct dyn_ftrace; +/* + * Either -mhotpatch or -mnop-mcount is used - no explicit init is required + */ +static inline int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) { return 0; } +#define ftrace_init_nop ftrace_init_nop + static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; @@ -49,28 +49,17 @@ struct ftrace_insn { static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn) { #ifdef CONFIG_FUNCTION_TRACER -#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) /* brcl 0,0 */ insn->opc = 0xc004; insn->disp = 0; -#else - /* jg .+24 */ - insn->opc = 0xc0f4; - insn->disp = MCOUNT_INSN_SIZE / 2; -#endif #endif } static inline int is_ftrace_nop(struct ftrace_insn *insn) { #ifdef CONFIG_FUNCTION_TRACER -#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) if (insn->disp == 0) return 1; -#else - if (insn->disp == MCOUNT_INSN_SIZE / 2) - return 1; -#endif #endif return 0; } diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 26f9144562c9..c22debfcebf1 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -26,9 +26,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { int oldval = 0, newval, ret; - mm_segment_t old_fs; - old_fs = enable_sacf_uaccess(); switch (op) { case FUTEX_OP_SET: __futex_atomic_op("lr %2,%5\n", @@ -53,7 +51,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, default: ret = -ENOSYS; } - disable_sacf_uaccess(old_fs); if (!ret) *oval = oldval; @@ -64,10 +61,8 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { - mm_segment_t old_fs; int ret; - old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" "0: cs %1,%4,0(%5)\n" @@ -77,7 +72,6 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "=d" (ret), "+d" (oldval), "=m" (*uaddr) : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) : "cc", "memory"); - disable_sacf_uaccess(old_fs); *uval = oldval; return ret; } diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index e9bf486de136..76f351bd6645 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -2,28 +2,51 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H +#include <asm/pgtable.h> + #ifdef CONFIG_KASAN #define KASAN_SHADOW_SCALE_SHIFT 3 -#ifdef CONFIG_KASAN_S390_4_LEVEL_PAGING #define KASAN_SHADOW_SIZE \ (_AC(1, UL) << (_REGION1_SHIFT - KASAN_SHADOW_SCALE_SHIFT)) -#else -#define KASAN_SHADOW_SIZE \ - (_AC(1, UL) << (_REGION2_SHIFT - KASAN_SHADOW_SCALE_SHIFT)) -#endif #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) #define KASAN_SHADOW_START KASAN_SHADOW_OFFSET #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) extern void kasan_early_init(void); -extern void kasan_copy_shadow(pgd_t *dst); +extern void kasan_copy_shadow_mapping(void); extern void kasan_free_early_identity(void); extern unsigned long kasan_vmax; + +/* + * Estimate kasan memory requirements, which it will reserve + * at the very end of available physical memory. To estimate + * that, we take into account that kasan would require + * 1/8 of available physical memory (for shadow memory) + + * creating page tables for the whole memory + shadow memory + * region (1 + 1/8). To keep page tables estimates simple take + * the double of combined ptes size. + * + * physmem parameter has to be already adjusted if not entire physical memory + * would be used (e.g. due to effect of "mem=" option). + */ +static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) +{ + unsigned long kasan_needs; + unsigned long pages; + /* for shadow memory */ + kasan_needs = round_up(physmem / 8, PAGE_SIZE); + /* for paging structures */ + pages = DIV_ROUND_UP(physmem + kasan_needs, PAGE_SIZE); + kasan_needs += DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2; + + return kasan_needs; +} #else static inline void kasan_early_init(void) { } -static inline void kasan_copy_shadow(pgd_t *dst) { } +static inline void kasan_copy_shadow_mapping(void) { } static inline void kasan_free_early_identity(void) { } +static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) { return 0; } #endif #endif diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 612ed3c6d581..69ce9191eaf1 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -116,7 +116,7 @@ struct lowcore { /* Address space pointer. */ __u64 kernel_asce; /* 0x0380 */ __u64 user_asce; /* 0x0388 */ - __u64 vdso_asce; /* 0x0390 */ + __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */ /* * The lpp and current_pid fields form a @@ -134,7 +134,7 @@ struct lowcore { __u32 spinlock_index; /* 0x03b0 */ __u32 fpu_flags; /* 0x03b4 */ __u64 percpu_offset; /* 0x03b8 */ - __u64 vdso_per_cpu_data; /* 0x03c0 */ + __u8 pad_0x03c0[0x03c8-0x03c0]; /* 0x03c0 */ __u64 machine_flags; /* 0x03c8 */ __u64 gmap; /* 0x03d0 */ __u8 pad_0x03d8[0x0400-0x03d8]; /* 0x03d8 */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index c9f3d8a52756..5dc49c467319 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -71,39 +71,18 @@ static inline int init_new_context(struct task_struct *tsk, #define destroy_context(mm) do { } while (0) -static inline void set_user_asce(struct mm_struct *mm) -{ - S390_lowcore.user_asce = mm->context.asce; - __ctl_load(S390_lowcore.user_asce, 1, 1); - clear_cpu_flag(CIF_ASCE_PRIMARY); -} - -static inline void clear_user_asce(void) -{ - S390_lowcore.user_asce = S390_lowcore.kernel_asce; - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_cpu_flag(CIF_ASCE_PRIMARY); -} - -mm_segment_t enable_sacf_uaccess(void); -void disable_sacf_uaccess(mm_segment_t old_fs); - static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { int cpu = smp_processor_id(); - S390_lowcore.user_asce = next->context.asce; + if (next == &init_mm) + S390_lowcore.user_asce = s390_invalid_asce; + else + S390_lowcore.user_asce = next->context.asce; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); - /* Clear previous user-ASCE from CR1 and CR7 */ - if (!test_cpu_flag(CIF_ASCE_PRIMARY)) { - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_cpu_flag(CIF_ASCE_PRIMARY); - } - if (test_cpu_flag(CIF_ASCE_SECONDARY)) { - __ctl_load(S390_lowcore.vdso_asce, 7, 7); - clear_cpu_flag(CIF_ASCE_SECONDARY); - } + /* Clear previous user-ASCE from CR7 */ + __ctl_load(s390_invalid_asce, 7, 7); if (prev != next) cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } @@ -122,7 +101,7 @@ static inline void finish_arch_post_lock_switch(void) __tlb_flush_mm_lazy(mm); preempt_enable(); } - set_fs(current->thread.mm_segment); + __ctl_load(S390_lowcore.user_asce, 7, 7); } #define enter_lazy_tlb(mm,tsk) do { } while (0) @@ -133,7 +112,7 @@ static inline void activate_mm(struct mm_struct *prev, { switch_mm(prev, next, current); cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); - set_user_asce(next); + __ctl_load(S390_lowcore.user_asce, 7, 7); } #endif /* __S390_MMU_CONTEXT_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b5dbae78969b..794746a32806 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -23,6 +23,7 @@ extern pgd_t swapper_pg_dir[]; extern void paging_init(void); +extern unsigned long s390_invalid_asce; enum { PG_DIRECT_MAP_4K = 0, @@ -79,15 +80,15 @@ extern unsigned long zero_page_mask; /* * The vmalloc and module area will always be on the topmost area of the - * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules. - * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where - * modules will reside. That makes sure that inter module branches always - * happen without trampolines and in addition the placement within a 2GB frame - * is branch prediction unit friendly. + * kernel mapping. 512GB are reserved for vmalloc by default. + * At the top of the vmalloc area a 2GB area is reserved where modules + * will reside. That makes sure that inter module branches always + * happen without trampolines and in addition the placement within a + * 2GB frame is branch prediction unit friendly. */ extern unsigned long VMALLOC_START; extern unsigned long VMALLOC_END; -#define VMALLOC_DEFAULT_SIZE ((128UL << 30) - MODULES_LEN) +#define VMALLOC_DEFAULT_SIZE ((512UL << 30) - MODULES_LEN) extern struct page *vmemmap; extern unsigned long vmemmap_size; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 962da04234af..6b7269f51f83 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,8 +14,6 @@ #include <linux/bits.h> -#define CIF_ASCE_PRIMARY 0 /* primary asce needs fixup / uaccess */ -#define CIF_ASCE_SECONDARY 1 /* secondary asce needs fixup / uaccess */ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define CIF_FPU 3 /* restore FPU registers */ #define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */ @@ -23,8 +21,6 @@ #define CIF_MCCK_GUEST 6 /* machine check happening in guest */ #define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */ -#define _CIF_ASCE_PRIMARY BIT(CIF_ASCE_PRIMARY) -#define _CIF_ASCE_SECONDARY BIT(CIF_ASCE_SECONDARY) #define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY) #define _CIF_FPU BIT(CIF_FPU) #define _CIF_IGNORE_IRQ BIT(CIF_IGNORE_IRQ) @@ -102,8 +98,6 @@ extern void __bpon(void); #define HAVE_ARCH_PICK_MMAP_LAYOUT -typedef unsigned int mm_segment_t; - /* * Thread structure */ @@ -116,7 +110,6 @@ struct thread_struct { unsigned long hardirq_timer; /* task cputime in hardirq context */ unsigned long softirq_timer; /* task cputime in softirq context */ unsigned long sys_call_table; /* system call table address */ - mm_segment_t mm_segment; unsigned long gmap_addr; /* address of last gmap fault. */ unsigned int gmap_write_flag; /* gmap fault write indication */ unsigned int gmap_int_code; /* int code of last gmap fault */ @@ -318,14 +311,10 @@ static __always_inline void __noreturn disabled_wait(void) } /* - * Basic Machine Check/Program Check Handler. + * Basic Program Check Handler. */ - extern void s390_base_pgm_handler(void); -extern void s390_base_ext_handler(void); - extern void (*s390_base_pgm_handler_fn)(void); -extern void (*s390_base_ext_handler_fn)(void); #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 16b3e4396312..73ca7f7cac33 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -87,6 +87,7 @@ struct pt_regs unsigned int int_parm; unsigned long int_parm_long; unsigned long flags; + unsigned long cr1; }; /* diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index a7bdd128d85b..5763769a39b6 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -12,7 +12,12 @@ #include <asm/cpu.h> #define SCLP_CHP_INFO_MASK_SIZE 32 -#define SCLP_MAX_CORES 256 +#define EARLY_SCCB_SIZE PAGE_SIZE +#define SCLP_MAX_CORES 512 +/* 144 + 16 * SCLP_MAX_CORES + 2 * (SCLP_MAX_CORES - 1) */ +#define EXT_SCCB_READ_SCP (3 * PAGE_SIZE) +/* 24 + 16 * SCLP_MAX_CORES */ +#define EXT_SCCB_READ_CPU (3 * PAGE_SIZE) struct sclp_chp_info { u8 recognized[SCLP_CHP_INFO_MASK_SIZE]; diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index bdb242a1544e..3e388fa208d4 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -16,8 +16,6 @@ #define EARLY_SCCB_OFFSET 0x11000 #define HEAD_END 0x12000 -#define EARLY_SCCB_SIZE PAGE_SIZE - /* * Machine features detected in early.c */ @@ -88,10 +86,8 @@ extern unsigned int zlib_dfltcc_support; #define ZLIB_DFLTCC_FULL_DEBUG 4 extern int noexec_disabled; -extern int memory_end_set; -extern unsigned long memory_end; +extern unsigned long ident_map_size; extern unsigned long vmalloc_size; -extern unsigned long max_physmem_end; /* The Write Back bit position in the physaddr is given by the SLPC PCI */ extern unsigned long mio_wb_bit_mask; diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 13a04fcf7762..ce788f3e534d 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -18,7 +18,7 @@ #else #define THREAD_SIZE_ORDER 2 #endif -#define BOOT_STACK_ORDER 2 +#define BOOT_STACK_SIZE (PAGE_SIZE << 2) #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #ifndef __ASSEMBLY__ diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 289aaff4d365..c8e244ecdfde 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -49,6 +49,13 @@ static inline void set_clock_comparator(__u64 time) asm volatile("sckc %0" : : "Q" (time)); } +static inline void set_tod_programmable_field(u16 val) +{ + register unsigned long reg0 asm("0") = val; + + asm volatile("sckpf" : : "d" (reg0)); +} + void clock_comparator_work(void); void __init time_early_init(void); diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index c868e7ee49b3..c6707885e7c2 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -18,23 +18,7 @@ #include <asm/extable.h> #include <asm/facility.h> -/* - * The fs value determines whether argument validity checking should be - * performed or not. If get_fs() == USER_DS, checking is performed, with - * get_fs() == KERNEL_DS, checking is bypassed. - * - * For historical reasons, these macros are grossly misnamed. - */ - -#define KERNEL_DS (0) -#define KERNEL_DS_SACF (1) -#define USER_DS (2) -#define USER_DS_SACF (3) - -#define get_fs() (current->thread.mm_segment) -#define uaccess_kernel() ((get_fs() & 2) == KERNEL_DS) - -void set_fs(mm_segment_t fs); +void debug_user_asce(void); static inline int __range_ok(unsigned long addr, unsigned long size) { @@ -88,7 +72,7 @@ int __get_user_bad(void) __attribute__((noreturn)); static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { - unsigned long spec = 0x010000UL; + unsigned long spec = 0x810000UL; int rc; switch (size) { @@ -121,7 +105,7 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { - unsigned long spec = 0x01UL; + unsigned long spec = 0x81UL; int rc; switch (size) { diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 29b44a930e71..f65590889054 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -12,32 +12,9 @@ #ifndef __ASSEMBLY__ -/* - * Note about the vdso_data and vdso_per_cpu_data structures: - * - * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the - * structure is supposed to be known only to the function in the vdso - * itself and may change without notice. - */ - -struct vdso_per_cpu_data { - /* - * Note: node_id and cpu_nr must be at adjacent memory locations. - * VDSO userspace must read both values with a single instruction. - */ - union { - __u64 getcpu_val; - struct { - __u32 node_id; - __u32 cpu_nr; - }; - }; -}; - extern struct vdso_data *vdso_data; -int vdso_alloc_per_cpu(struct lowcore *lowcore); -void vdso_free_per_cpu(struct lowcore *lowcore); +void vdso_getcpu_init(void); #endif /* __ASSEMBLY__ */ #endif /* __S390_VDSO_H__ */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 483051e10db3..79724d861dc9 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -13,7 +13,6 @@ #include <linux/purgatory.h> #include <linux/pgtable.h> #include <asm/idle.h> -#include <asm/vdso.h> #include <asm/gmap.h> #include <asm/nmi.h> #include <asm/stacktrace.h> @@ -48,6 +47,7 @@ int main(void) OFFSET(__PT_INT_PARM, pt_regs, int_parm); OFFSET(__PT_INT_PARM_LONG, pt_regs, int_parm_long); OFFSET(__PT_FLAGS, pt_regs, flags); + OFFSET(__PT_CR1, pt_regs, cr1); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); /* stack_frame offsets */ @@ -59,8 +59,6 @@ int main(void) OFFSET(__SF_SIE_REASON, stack_frame, empty1[3]); OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[4]); BLANK(); - OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val); - BLANK(); /* idle data offsets */ OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter); OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit); @@ -138,12 +136,11 @@ int main(void) OFFSET(__LC_RESTART_FN, lowcore, restart_fn); OFFSET(__LC_RESTART_DATA, lowcore, restart_data); OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source); + OFFSET(__LC_KERNEL_ASCE, lowcore, kernel_asce); OFFSET(__LC_USER_ASCE, lowcore, user_asce); - OFFSET(__LC_VDSO_ASCE, lowcore, vdso_asce); OFFSET(__LC_LPP, lowcore, lpp); OFFSET(__LC_CURRENT_PID, lowcore, current_pid); OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset); - OFFSET(__LC_VDSO_PER_CPU, lowcore, vdso_per_cpu_data); OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags); OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count); OFFSET(__LC_GMAP, lowcore, gmap); diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index b79e0fd571f8..d255c69c1779 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -11,32 +11,10 @@ #include <asm/asm-offsets.h> #include <asm/nospec-insn.h> #include <asm/ptrace.h> -#include <asm/sigp.h> GEN_BR_THUNK %r9 GEN_BR_THUNK %r14 -ENTRY(s390_base_ext_handler) - stmg %r0,%r15,__LC_SAVE_AREA_ASYNC - basr %r13,0 -0: aghi %r15,-STACK_FRAME_OVERHEAD - larl %r1,s390_base_ext_handler_fn - lg %r9,0(%r1) - ltgr %r9,%r9 - jz 1f - BASR_EX %r14,%r9 -1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC - ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit - lpswe __LC_EXT_OLD_PSW -ENDPROC(s390_base_ext_handler) - - .section .bss - .align 8 - .globl s390_base_ext_handler_fn -s390_base_ext_handler_fn: - .quad 0 - .previous - ENTRY(s390_base_pgm_handler) stmg %r0,%r15,__LC_SAVE_AREA_SYNC basr %r13,0 diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 705844f73934..cc89763a4d3c 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -169,12 +169,10 @@ static noinline __init void setup_lowcore_early(void) { psw_t psw; + psw.addr = (unsigned long)s390_base_pgm_handler; psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; if (IS_ENABLED(CONFIG_KASAN)) psw.mask |= PSW_MASK_DAT; - psw.addr = (unsigned long) s390_base_ext_handler; - S390_lowcore.external_new_psw = psw; - psw.addr = (unsigned long) s390_base_pgm_handler; S390_lowcore.program_new_psw = psw; s390_base_pgm_handler_fn = early_pgm_check_handler; S390_lowcore.preempt_count = INIT_PREEMPT_COUNT; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 92beb1444644..8bb9ebb71c4b 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -55,7 +55,7 @@ _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) -_CIF_WORK = (_CIF_ASCE_PRIMARY | _CIF_ASCE_SECONDARY | _CIF_FPU) +_CIF_WORK = (_CIF_FPU) _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) _LPP_OFFSET = __LC_LPP @@ -90,6 +90,12 @@ _LPP_OFFSET = __LC_LPP #endif .endm + .macro DEBUG_USER_ASCE +#ifdef CONFIG_DEBUG_USER_ASCE + brasl %r14,debug_user_asce +#endif + .endm + .macro CHECK_VMAP_STACK savearea,oklabel #ifdef CONFIG_VMAP_STACK lgr %r14,%r15 @@ -110,9 +116,9 @@ _LPP_OFFSET = __LC_LPP #endif .endm - .macro SWITCH_ASYNC savearea,timer + .macro SWITCH_ASYNC savearea,timer,clock tmhh %r8,0x0001 # interrupting from user ? - jnz 2f + jnz 4f #if IS_ENABLED(CONFIG_KVM) lgr %r14,%r9 larl %r13,.Lsie_gmap @@ -125,10 +131,26 @@ _LPP_OFFSET = __LC_LPP #endif 0: larl %r13,.Lpsw_idle_exit cgr %r13,%r9 - jne 1f + jne 3f - mvc __CLOCK_IDLE_EXIT(8,%r2), __LC_INT_CLOCK - mvc __TIMER_IDLE_EXIT(8,%r2), __LC_ASYNC_ENTER_TIMER + larl %r1,smp_cpu_mtid + llgf %r1,0(%r1) + ltgr %r1,%r1 + jz 2f # no SMT, skip mt_cycles calculation + .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15) + larl %r3,mt_cycles + ag %r3,__LC_PERCPU_OFFSET + la %r4,__SF_EMPTY+16(%r15) +1: lg %r0,0(%r3) + slg %r0,0(%r4) + alg %r0,64(%r4) + stg %r0,0(%r3) + la %r3,8(%r3) + la %r4,8(%r4) + brct %r1,1b + +2: mvc __CLOCK_IDLE_EXIT(8,%r2), \clock + mvc __TIMER_IDLE_EXIT(8,%r2), \timer # account system time going idle ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT @@ -146,17 +168,17 @@ _LPP_OFFSET = __LC_LPP mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) nihh %r8,0xfcfd # clear wait state and irq bits -1: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? +3: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? slgr %r14,%r15 srag %r14,%r14,STACK_SHIFT - jnz 3f + jnz 5f CHECK_STACK \savearea aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 4f -2: UPDATE_VTIME %r14,%r15,\timer + j 6f +4: UPDATE_VTIME %r14,%r15,\timer BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP -3: lg %r15,__LC_ASYNC_STACK # load async stack -4: la %r11,STACK_FRAME_OVERHEAD(%r15) +5: lg %r15,__LC_ASYNC_STACK # load async stack +6: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm .macro UPDATE_VTIME w1,w2,enter_timer @@ -327,7 +349,7 @@ ENTRY(sie64a) BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_skip: ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce .Lsie_done: # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There @@ -380,6 +402,7 @@ ENTRY(system_call) lg %r12,__LC_CURRENT lghi %r14,_PIF_SYSCALL .Lsysc_per: + lctlg %c1,%c1,__LC_KERNEL_ASCE lghi %r13,__TASK_thread lg %r15,__LC_KERNEL_STACK la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs @@ -427,11 +450,9 @@ ENTRY(system_call) jnz .Lsysc_work TSTMSK __TI_flags(%r12),_TIF_WORK jnz .Lsysc_work # check for work - TSTMSK __LC_CPU_FLAGS,(_CIF_WORK-_CIF_FPU) - jnz .Lsysc_work + DEBUG_USER_ASCE + lctlg %c1,%c1,__LC_USER_ASCE BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP -.Lsysc_restore: - DISABLE_INTS TSTMSK __LC_CPU_FLAGS, _CIF_FPU jz .Lsysc_skip_fpu brasl %r14,load_fpu_regs @@ -469,8 +490,6 @@ ENTRY(system_call) jo .Lsysc_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lsysc_notify_resume - TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) - jnz .Lsysc_asce j .Lsysc_return # @@ -481,26 +500,6 @@ ENTRY(system_call) jg schedule # -# _CIF_ASCE_PRIMARY and/or _CIF_ASCE_SECONDARY set, load user space asce -# -.Lsysc_asce: - ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY - lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce - TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY - jz .Lsysc_return -#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES - tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ? - jnz .Lsysc_set_fs_fixup - ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - j .Lsysc_return -.Lsysc_set_fs_fixup: -#endif - larl %r14,.Lsysc_return - jg set_fs_fixup - - -# # _TIF_SIGPENDING is set, call do_signal # .Lsysc_sigpending: @@ -636,8 +635,11 @@ ENTRY(pgm_check_handler) 0: lg %r12,__LC_CURRENT lghi %r11,0 lmg %r8,%r9,__LC_PGM_OLD_PSW - tmhh %r8,0x0001 # test problem state bit - jnz 3f # -> fault in user space + tmhh %r8,0x0001 # coming from user space? + jno .Lpgm_skip_asce + lctlg %c1,%c1,__LC_KERNEL_ASCE + j 3f +.Lpgm_skip_asce: #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in sie64a lgr %r14,%r9 @@ -648,7 +650,7 @@ ENTRY(pgm_check_handler) jhe 1f lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit lghi %r11,_PIF_GUEST_FAULT #endif @@ -709,10 +711,20 @@ ENTRY(pgm_check_handler) .Lpgm_return: LOCKDEP_SYS_EXIT tm __PT_PSW+1(%r11),0x01 # returning to user ? - jno .Lsysc_restore + jno .Lpgm_restore TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL jo .Lsysc_do_syscall j .Lsysc_tif +.Lpgm_restore: + DISABLE_INTS + TSTMSK __LC_CPU_FLAGS, _CIF_FPU + jz .Lpgm_skip_fpu + brasl %r14,load_fpu_regs +.Lpgm_skip_fpu: + mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + lmg %r0,%r15,__PT_R0(%r11) + b __LC_RETURN_LPSWE # # PER event in supervisor state, must be kprobes @@ -745,7 +757,7 @@ ENTRY(io_int_handler) stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT lmg %r8,%r9,__LC_IO_OLD_PSW - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 @@ -759,6 +771,10 @@ ENTRY(io_int_handler) xgr %r10,%r10 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) + tm __PT_PSW+1(%r11),0x01 # coming from user space? + jno .Lio_skip_asce + lctlg %c1,%c1,__LC_KERNEL_ASCE +.Lio_skip_asce: mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ @@ -790,6 +806,8 @@ ENTRY(io_int_handler) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) tm __PT_PSW+1(%r11),0x01 # returning to user ? jno .Lio_exit_kernel + DEBUG_USER_ASCE + lctlg %c1,%c1,__LC_USER_ASCE BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER .Lio_exit_kernel: @@ -855,30 +873,9 @@ ENTRY(io_int_handler) jo .Lio_guarded_storage TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lio_vxrs - TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) - jnz .Lio_asce j .Lio_return # -# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce -# -.Lio_asce: - ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY - lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce - TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY - jz .Lio_return -#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES - tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ? - jnz .Lio_set_fs_fixup - ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - j .Lio_return -.Lio_set_fs_fixup: -#endif - larl %r14,.Lio_return - jg set_fs_fixup - -# # CIF_FPU is set, restore floating-point controls and floating-point registers. # .Lio_vxrs: @@ -945,7 +942,7 @@ ENTRY(ext_int_handler) stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT lmg %r8,%r9,__LC_EXT_OLD_PSW - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 @@ -959,6 +956,10 @@ ENTRY(ext_int_handler) xgr %r10,%r10 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) + tm __PT_PSW+1(%r11),0x01 # coming from user space? + jno .Lext_skip_asce + lctlg %c1,%c1,__LC_KERNEL_ASCE +.Lext_skip_asce: lghi %r1,__LC_EXT_PARAMS2 mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS @@ -1167,7 +1168,7 @@ ENTRY(mcck_int_handler) TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic 4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER,__LC_MCCK_CLOCK .Lmcck_skip: lghi %r14,__LC_GPREGS_SAVE_AREA+64 stmg %r0,%r7,__PT_R0(%r11) @@ -1183,6 +1184,9 @@ ENTRY(mcck_int_handler) xgr %r10,%r10 mvc __PT_R8(64,%r11),0(%r14) stmg %r8,%r9,__PT_PSW(%r11) + la %r14,4095 + mvc __PT_CR1(8,%r11),__LC_CREGS_SAVE_AREA-4095+8(%r14) + lctlg %c1,%c1,__LC_KERNEL_ASCE xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs @@ -1198,6 +1202,7 @@ ENTRY(mcck_int_handler) brasl %r14,s390_handle_mcck TRACE_IRQS_ON .Lmcck_return: + lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? @@ -1274,7 +1279,7 @@ ENDPROC(stack_overflow) 1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + lctlg %c1,%c1,__LC_KERNEL_ASCE larl %r9,sie_exit # skip forward to sie_exit BR_EX %r14,%r11 diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index d2ca3fe51f8e..a16c33b32ab0 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -83,7 +83,6 @@ long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user DECLARE_PER_CPU(u64, mt_cycles[8]); void gs_load_bc_cb(struct pt_regs *regs); -void set_fs_fixup(void); unsigned long stack_alloc(void); void stack_free(unsigned long stack); diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index b388e87a08bf..ebc1284a618b 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -22,56 +22,26 @@ #include "entry.h" /* - * The mcount code looks like this: - * stg %r14,8(%r15) # offset 0 - * larl %r1,<&counter> # offset 6 - * brasl %r14,_mcount # offset 12 - * lg %r14,8(%r15) # offset 18 - * Total length is 24 bytes. Only the first instruction will be patched - * by ftrace_make_call / ftrace_make_nop. - * The enabled ftrace code block looks like this: + * To generate function prologue either gcc's hotpatch feature (since gcc 4.8) + * or a combination of -pg -mrecord-mcount -mnop-mcount -mfentry flags + * (since gcc 9 / clang 10) is used. + * In both cases the original and also the disabled function prologue contains + * only a single six byte instruction and looks like this: + * > brcl 0,0 # offset 0 + * To enable ftrace the code gets patched like above and afterwards looks + * like this: * > brasl %r0,ftrace_caller # offset 0 - * larl %r1,<&counter> # offset 6 - * brasl %r14,_mcount # offset 12 - * lg %r14,8(%r15) # offset 18 + * + * The instruction will be patched by ftrace_make_call / ftrace_make_nop. * The ftrace function gets called with a non-standard C function call ABI * where r0 contains the return address. It is also expected that the called * function only clobbers r0 and r1, but restores r2-r15. * For module code we can't directly jump to ftrace caller, but need a * trampoline (ftrace_plt), which clobbers also r1. - * The return point of the ftrace function has offset 24, so execution - * continues behind the mcount block. - * The disabled ftrace code block looks like this: - * > jg .+24 # offset 0 - * larl %r1,<&counter> # offset 6 - * brasl %r14,_mcount # offset 12 - * lg %r14,8(%r15) # offset 18 - * The jg instruction branches to offset 24 to skip as many instructions - * as possible. - * In case we use gcc's hotpatch feature the original and also the disabled - * function prologue contains only a single six byte instruction and looks - * like this: - * > brcl 0,0 # offset 0 - * To enable ftrace the code gets patched like above and afterwards looks - * like this: - * > brasl %r0,ftrace_caller # offset 0 */ unsigned long ftrace_plt; -static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn) -{ -#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) - /* brcl 0,0 */ - insn->opc = 0xc004; - insn->disp = 0; -#else - /* stg r14,8(r15) */ - insn->opc = 0xe3e0; - insn->disp = 0xf0080024; -#endif -} - int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -85,15 +55,10 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; - if (addr == MCOUNT_ADDR) { - /* Initial code replacement */ - ftrace_generate_orig_insn(&orig); - ftrace_generate_nop_insn(&new); - } else { - /* Replace ftrace call with a nop. */ - ftrace_generate_call_insn(&orig, rec->ip); - ftrace_generate_nop_insn(&new); - } + /* Replace ftrace call with a nop. */ + ftrace_generate_call_insn(&orig, rec->ip); + ftrace_generate_nop_insn(&new); + /* Verify that the to be replaced code matches what we expect. */ if (memcmp(&orig, &old, sizeof(old))) return -EINVAL; diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 8b88dbbda7df..0c253886da78 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -18,12 +18,7 @@ __HEAD ENTRY(startup_continue) - tm __LC_STFLE_FAC_LIST+5,0x80 # LPP available ? - jz 0f - xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid - mvi __LC_LPP,0x80 # and set LPP_MAGIC - .insn s,0xb2800000,__LC_LPP # load program parameter -0: larl %r1,tod_clock_base + larl %r1,tod_clock_base mvc 0(16,%r1),__LC_BOOT_CLOCK larl %r13,.LPG1 # get base # diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 7458dcfd6464..faf64c2f90f5 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -33,11 +33,6 @@ ENDPROC(ftrace_stub) #define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD #endif -ENTRY(_mcount) - BR_EX %r14 -ENDPROC(_mcount) -EXPORT_SYMBOL(_mcount) - ENTRY(ftrace_caller) .globl ftrace_regs_caller .set ftrace_regs_caller,ftrace_caller @@ -46,9 +41,6 @@ ENTRY(ftrace_caller) ipm %r14 # don't put any instructions sllg %r14,%r14,16 # clobbering CC before this point lgr %r1,%r15 -#if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)) - aghi %r0,MCOUNT_RETURN_FIXUP -#endif # allocate stack frame for ftrace_caller to contain traced function aghi %r15,-TRACED_FUNC_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index ec801d3bbb37..bc3ca54edfb4 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -94,7 +94,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, /* Save access registers to new thread structure. */ save_access_regs(&p->thread.acrs[0]); /* start new process with ar4 pointing to the correct address space */ - p->thread.mm_segment = get_fs(); /* Don't copy debug registers */ memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); @@ -208,16 +207,3 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) ret = PAGE_ALIGN(mm->brk + brk_rnd()); return (ret > mm->brk) ? ret : mm->brk; } - -void set_fs_fixup(void) -{ - struct pt_regs *regs = current_pt_regs(); - static bool warned; - - set_fs(USER_DS); - if (warned) - return; - WARN(1, "Unbalanced set_fs - int code: 0x%x\n", regs->int_code); - show_registers(regs); - warned = true; -} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 4d843e64496f..1f16a03be995 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -49,6 +49,7 @@ #include <linux/memory.h> #include <linux/compat.h> #include <linux/start_kernel.h> +#include <linux/hugetlb.h> #include <asm/boot_data.h> #include <asm/ipl.h> @@ -94,10 +95,8 @@ char elf_platform[ELF_PLATFORM_SIZE]; unsigned long int_hwcap = 0; int __bootdata(noexec_disabled); -int __bootdata(memory_end_set); -unsigned long __bootdata(memory_end); +unsigned long __bootdata(ident_map_size); unsigned long __bootdata(vmalloc_size); -unsigned long __bootdata(max_physmem_end); struct mem_detect_info __bootdata(mem_detect); struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table); @@ -336,6 +335,7 @@ int __init arch_early_irq_init(void) if (!stack) panic("Couldn't allocate async stack"); S390_lowcore.async_stack = stack + STACK_INIT_OFFSET; + udelay_enable(); return 0; } @@ -556,24 +556,25 @@ static void __init setup_resources(void) #endif } -static void __init setup_memory_end(void) +static void __init setup_ident_map_size(void) { unsigned long vmax, tmp; /* Choose kernel address space layout: 3 or 4 levels. */ - tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; + tmp = ident_map_size / PAGE_SIZE; tmp = tmp * (sizeof(struct page) + PAGE_SIZE); if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE) vmax = _REGION2_SIZE; /* 3-level kernel page table */ else vmax = _REGION1_SIZE; /* 4-level kernel page table */ + /* module area is at the end of the kernel address space. */ + MODULES_END = vmax; if (is_prot_virt_host()) - adjust_to_uv_max(&vmax); + adjust_to_uv_max(&MODULES_END); #ifdef CONFIG_KASAN - vmax = kasan_vmax; + vmax = _REGION1_SIZE; + MODULES_END = kasan_vmax; #endif - /* module area is at the end of the kernel address space. */ - MODULES_END = vmax; MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; VMALLOC_START = VMALLOC_END - vmalloc_size; @@ -587,22 +588,22 @@ static void __init setup_memory_end(void) tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS); vmemmap = (struct page *) tmp; - /* Take care that memory_end is set and <= vmemmap */ - memory_end = min(memory_end ?: max_physmem_end, (unsigned long)vmemmap); + /* Take care that ident_map_size <= vmemmap */ + ident_map_size = min(ident_map_size, (unsigned long)vmemmap); #ifdef CONFIG_KASAN - memory_end = min(memory_end, KASAN_SHADOW_START); + ident_map_size = min(ident_map_size, KASAN_SHADOW_START); #endif - vmemmap_size = SECTION_ALIGN_UP(memory_end / PAGE_SIZE) * sizeof(struct page); + vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page); #ifdef CONFIG_KASAN /* move vmemmap above kasan shadow only if stands in a way */ if (KASAN_SHADOW_END > (unsigned long)vmemmap && (unsigned long)vmemmap + vmemmap_size > KASAN_SHADOW_START) vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END); #endif - max_pfn = max_low_pfn = PFN_DOWN(memory_end); - memblock_remove(memory_end, ULONG_MAX); + max_pfn = max_low_pfn = PFN_DOWN(ident_map_size); + memblock_remove(ident_map_size, ULONG_MAX); - pr_notice("The maximum memory size is %luMB\n", memory_end >> 20); + pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20); } #ifdef CONFIG_CRASH_DUMP @@ -632,12 +633,11 @@ static struct notifier_block kdump_mem_nb = { #endif /* - * Make sure that the area behind memory_end is protected + * Make sure that the area above identity mapping is protected */ -static void __init reserve_memory_end(void) +static void __init reserve_above_ident_map(void) { - if (memory_end_set) - memblock_reserve(memory_end, ULONG_MAX); + memblock_reserve(ident_map_size, ULONG_MAX); } /* @@ -674,7 +674,7 @@ static void __init reserve_crashkernel(void) phys_addr_t low, high; int rc; - rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, + rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size, &crash_base); crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); @@ -1128,7 +1128,7 @@ void __init setup_arch(char **cmdline_p) setup_control_program_code(); /* Do some memory reservations *before* memory is added to memblock */ - reserve_memory_end(); + reserve_above_ident_map(); reserve_oldmem(); reserve_kernel(); reserve_initrd(); @@ -1143,10 +1143,12 @@ void __init setup_arch(char **cmdline_p) remove_oldmem(); setup_uv(); - setup_memory_end(); + setup_ident_map_size(); setup_memory(); - dma_contiguous_reserve(memory_end); + dma_contiguous_reserve(ident_map_size); vmcp_cma_reserve(); + if (MACHINE_HAS_EDAT2) + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); check_initrd(); reserve_crashkernel(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 390d97daa2b3..27c763014114 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -47,7 +47,6 @@ #include <asm/vtimer.h> #include <asm/lowcore.h> #include <asm/sclp.h> -#include <asm/vdso.h> #include <asm/debug.h> #include <asm/os_info.h> #include <asm/sigp.h> @@ -55,6 +54,7 @@ #include <asm/nmi.h> #include <asm/stacktrace.h> #include <asm/topology.h> +#include <asm/vdso.h> #include "entry.h" enum { @@ -217,14 +217,10 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); if (nmi_alloc_per_cpu(lc)) goto out_async; - if (vdso_alloc_per_cpu(lc)) - goto out_mcesa; lowcore_ptr[cpu] = lc; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc); return 0; -out_mcesa: - nmi_free_per_cpu(lc); out_async: stack_free(async_stack); out: @@ -245,7 +241,6 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[pcpu - pcpu_devices] = NULL; - vdso_free_per_cpu(pcpu->lowcore); nmi_free_per_cpu(pcpu->lowcore); stack_free(async_stack); if (pcpu == &pcpu_devices[0]) @@ -265,13 +260,13 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->spinlock_index = 0; lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = S390_lowcore.kernel_asce; - lc->user_asce = S390_lowcore.kernel_asce; + lc->user_asce = s390_invalid_asce; lc->machine_flags = S390_lowcore.machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; __ctl_store(lc->cregs_save_area, 0, 15); lc->cregs_save_area[1] = lc->kernel_asce; - lc->cregs_save_area[7] = lc->vdso_asce; + lc->cregs_save_area[7] = lc->user_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, sizeof(lc->stfle_fac_list)); @@ -859,13 +854,12 @@ static void smp_init_secondary(void) S390_lowcore.last_update_clock = get_tod_clock(); restore_access_regs(S390_lowcore.access_regs_save_area); - set_cpu_flag(CIF_ASCE_PRIMARY); - set_cpu_flag(CIF_ASCE_SECONDARY); cpu_init(); rcu_cpu_starting(cpu); preempt_disable(); init_cpu_timer(); vtime_init(); + vdso_getcpu_init(); pfault_init(); notify_cpu_starting(cpu); if (topology_cpu_dedicated(cpu)) @@ -896,24 +890,12 @@ static void __no_sanitize_address smp_start_secondary(void *cpuvoid) /* Upping and downing of CPUs */ int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - struct pcpu *pcpu; - int base, i, rc; + struct pcpu *pcpu = pcpu_devices + cpu; + int rc; - pcpu = pcpu_devices + cpu; if (pcpu->state != CPU_STATE_CONFIGURED) return -EIO; - base = smp_get_base_cpu(cpu); - for (i = 0; i <= smp_cpu_mtid; i++) { - if (base + i < nr_cpu_ids) - if (cpu_online(base + i)) - break; - } - /* - * If this is the first CPU of the core to get online - * do an initial CPU reset. - */ - if (i > smp_cpu_mtid && - pcpu_sigp_retry(pcpu_devices + base, SIGP_INITIAL_CPU_RESET, 0) != + if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 0ac30ee2c633..c59cb44fbb7d 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -927,41 +927,25 @@ static ssize_t online_store(struct device *dev, */ static DEVICE_ATTR_RW(online); -static struct device_attribute *stp_attributes[] = { - &dev_attr_ctn_id, - &dev_attr_ctn_type, - &dev_attr_dst_offset, - &dev_attr_leap_seconds, - &dev_attr_online, - &dev_attr_leap_seconds_scheduled, - &dev_attr_stratum, - &dev_attr_time_offset, - &dev_attr_time_zone_offset, - &dev_attr_timing_mode, - &dev_attr_timing_state, +static struct attribute *stp_dev_attrs[] = { + &dev_attr_ctn_id.attr, + &dev_attr_ctn_type.attr, + &dev_attr_dst_offset.attr, + &dev_attr_leap_seconds.attr, + &dev_attr_online.attr, + &dev_attr_leap_seconds_scheduled.attr, + &dev_attr_stratum.attr, + &dev_attr_time_offset.attr, + &dev_attr_time_zone_offset.attr, + &dev_attr_timing_mode.attr, + &dev_attr_timing_state.attr, NULL }; +ATTRIBUTE_GROUPS(stp_dev); static int __init stp_init_sysfs(void) { - struct device_attribute **attr; - int rc; - - rc = subsys_system_register(&stp_subsys, NULL); - if (rc) - goto out; - for (attr = stp_attributes; *attr; attr++) { - rc = device_create_file(stp_subsys.dev_root, *attr); - if (rc) - goto out_unreg; - } - return 0; -out_unreg: - for (; attr >= stp_attributes; attr--) - device_remove_file(stp_subsys.dev_root, *attr); - bus_unregister(&stp_subsys); -out: - return rc; + return subsys_system_register(&stp_subsys, stp_dev_groups); } device_initcall(stp_init_sysfs); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index f9da5b149141..aef2edff9959 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -29,6 +29,7 @@ #include <asm/sections.h> #include <asm/vdso.h> #include <asm/facility.h> +#include <asm/timex.h> extern char vdso64_start, vdso64_end; static void *vdso64_kbase = &vdso64_start; @@ -99,60 +100,10 @@ static union { u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; struct vdso_data *vdso_data = (struct vdso_data *)&vdso_data_store.data; -/* - * Allocate/free per cpu vdso data. - */ -#define SEGMENT_ORDER 2 -int vdso_alloc_per_cpu(struct lowcore *lowcore) +void vdso_getcpu_init(void) { - unsigned long segment_table, page_table, page_frame; - struct vdso_per_cpu_data *vd; - - segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); - page_table = get_zeroed_page(GFP_KERNEL); - page_frame = get_zeroed_page(GFP_KERNEL); - if (!segment_table || !page_table || !page_frame) - goto out; - arch_set_page_dat(virt_to_page(segment_table), SEGMENT_ORDER); - arch_set_page_dat(virt_to_page(page_table), 0); - - /* Initialize per-cpu vdso data page */ - vd = (struct vdso_per_cpu_data *) page_frame; - vd->cpu_nr = lowcore->cpu_nr; - vd->node_id = cpu_to_node(vd->cpu_nr); - - /* Set up page table for the vdso address space */ - memset64((u64 *)segment_table, _SEGMENT_ENTRY_EMPTY, _CRST_ENTRIES); - memset64((u64 *)page_table, _PAGE_INVALID, PTRS_PER_PTE); - - *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; - *(unsigned long *) page_table = _PAGE_PROTECT + page_frame; - - lowcore->vdso_asce = segment_table + - _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; - lowcore->vdso_per_cpu_data = page_frame; - - return 0; - -out: - free_page(page_frame); - free_page(page_table); - free_pages(segment_table, SEGMENT_ORDER); - return -ENOMEM; -} - -void vdso_free_per_cpu(struct lowcore *lowcore) -{ - unsigned long segment_table, page_table, page_frame; - - segment_table = lowcore->vdso_asce & PAGE_MASK; - page_table = *(unsigned long *) segment_table; - page_frame = *(unsigned long *) page_table; - - free_page(page_frame); - free_page(page_table); - free_pages(segment_table, SEGMENT_ORDER); + set_tod_programmable_field(smp_processor_id()); } /* @@ -225,6 +176,7 @@ static int __init vdso_init(void) { int i; + vdso_getcpu_init(); /* Calculate the size of the 64 bit vDSO */ vdso64_pages = ((&vdso64_end - &vdso64_start + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; @@ -240,8 +192,6 @@ static int __init vdso_init(void) } vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); vdso64_pagelist[vdso64_pages] = NULL; - if (vdso_alloc_per_cpu(&S390_lowcore)) - BUG(); get_page(virt_to_page(vdso_data)); diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 13cc5a3f9abf..a6e0fb6b91d6 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -6,8 +6,9 @@ ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT include $(srctree)/lib/vdso/Makefile -obj-vdso64 = vdso_user_wrapper.o note.o getcpu.o -obj-cvdso64 = vdso64_generic.o +obj-vdso64 = vdso_user_wrapper.o note.o +obj-cvdso64 = vdso64_generic.o getcpu.o +CFLAGS_REMOVE_getcpu.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) # Build rules diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S deleted file mode 100644 index 3c04f7328500..000000000000 --- a/arch/s390/kernel/vdso64/getcpu.S +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of getcpu() for 64 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2016 - * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> - */ -#include <asm/vdso.h> -#include <asm/asm-offsets.h> -#include <asm/dwarf.h> - - .text - .align 4 - .globl __kernel_getcpu - .type __kernel_getcpu,@function -__kernel_getcpu: - CFI_STARTPROC - sacf 256 - lm %r4,%r5,__VDSO_GETCPU_VAL(%r0) - sacf 0 - ltgr %r2,%r2 - jz 2f - st %r5,0(%r2) -2: ltgr %r3,%r3 - jz 3f - st %r4,0(%r3) -3: lghi %r2,0 - br %r14 - CFI_ENDPROC - .size __kernel_getcpu,.-__kernel_getcpu diff --git a/arch/s390/kernel/vdso64/getcpu.c b/arch/s390/kernel/vdso64/getcpu.c new file mode 100644 index 000000000000..5b2bc7494d5b --- /dev/null +++ b/arch/s390/kernel/vdso64/getcpu.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright IBM Corp. 2020 */ + +#include <linux/compiler.h> +#include <linux/getcpu.h> +#include <asm/timex.h> +#include "vdso.h" + +int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) +{ + __u16 todval[8]; + + /* CPU number is stored in the programmable field of the TOD clock */ + get_tod_clock_ext((char *)todval); + if (cpu) + *cpu = todval[7]; + /* NUMA node is always zero */ + if (node) + *node = 0; + return 0; +} diff --git a/arch/s390/kernel/vdso64/vdso.h b/arch/s390/kernel/vdso64/vdso.h new file mode 100644 index 000000000000..34c7a2312f9d --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ARCH_S390_KERNEL_VDSO64_VDSO_H +#define __ARCH_S390_KERNEL_VDSO64_VDSO_H + +#include <vdso/datapage.h> + +struct getcpu_cache; + +int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused); +int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); +int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); +int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts); + +#endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */ diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index 7ddb116b5e2e..7bde3909290f 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -136,7 +136,6 @@ VERSION __kernel_clock_gettime; __kernel_clock_getres; __kernel_getcpu; - local: *; }; } diff --git a/arch/s390/kernel/vdso64/vdso64_generic.c b/arch/s390/kernel/vdso64/vdso64_generic.c index a8cef7e4d137..a9aa75643c08 100644 --- a/arch/s390/kernel/vdso64/vdso64_generic.c +++ b/arch/s390/kernel/vdso64/vdso64_generic.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "../../../../lib/vdso/gettimeofday.c" +#include "vdso.h" int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S index a775d7e52872..f773505c7e63 100644 --- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S +++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S @@ -36,3 +36,4 @@ __kernel_\func: vdso_func gettimeofday vdso_func clock_getres vdso_func clock_gettime +vdso_func getcpu diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 177ccfbda40a..4c0e19145cc6 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -10,7 +10,8 @@ * Put .bss..swapper_pg_dir as the first thing in .bss. This will * make sure it has 16k alignment. */ -#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) +#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) \ + *(.bss..invalid_pg_dir) /* Handle ro_after_init data on our own. */ #define RO_AFTER_INIT_DATA diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index 8c0c68e7770e..68d61f2835df 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -13,11 +13,19 @@ #include <linux/export.h> #include <linux/irqflags.h> #include <linux/interrupt.h> +#include <linux/jump_label.h> #include <linux/irq.h> #include <asm/vtimer.h> #include <asm/div64.h> #include <asm/idle.h> +static DEFINE_STATIC_KEY_FALSE(udelay_ready); + +void __init udelay_enable(void) +{ + static_branch_enable(&udelay_ready); +} + void __delay(unsigned long loops) { /* @@ -76,6 +84,11 @@ void __udelay(unsigned long long usecs) { unsigned long flags; + if (!static_branch_likely(&udelay_ready)) { + udelay_simple(usecs); + return; + } + preempt_disable(); local_irq_save(flags); if (in_irq()) { diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 0267405ab7c6..e8f642446fed 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -16,6 +16,22 @@ #include <asm/mmu_context.h> #include <asm/facility.h> +#ifdef CONFIG_DEBUG_USER_ASCE +void debug_user_asce(void) +{ + unsigned long cr1, cr7; + + __ctl_store(cr1, 1, 1); + __ctl_store(cr7, 7, 7); + if (cr1 == S390_lowcore.kernel_asce && cr7 == S390_lowcore.user_asce) + return; + panic("incorrect ASCE on kernel exit\n" + "cr1: %016lx cr7: %016lx\n" + "kernel: %016llx user: %016llx\n", + cr1, cr7, S390_lowcore.kernel_asce, S390_lowcore.user_asce); +} +#endif /*CONFIG_DEBUG_USER_ASCE */ + #ifndef CONFIG_HAVE_MARCH_Z10_FEATURES static DEFINE_STATIC_KEY_FALSE(have_mvcos); @@ -40,71 +56,10 @@ static inline int copy_with_mvcos(void) } #endif -void set_fs(mm_segment_t fs) -{ - current->thread.mm_segment = fs; - if (fs == USER_DS) { - __ctl_load(S390_lowcore.user_asce, 1, 1); - clear_cpu_flag(CIF_ASCE_PRIMARY); - } else { - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_cpu_flag(CIF_ASCE_PRIMARY); - } - if (fs & 1) { - if (fs == USER_DS_SACF) - __ctl_load(S390_lowcore.user_asce, 7, 7); - else - __ctl_load(S390_lowcore.kernel_asce, 7, 7); - set_cpu_flag(CIF_ASCE_SECONDARY); - } -} -EXPORT_SYMBOL(set_fs); - -mm_segment_t enable_sacf_uaccess(void) -{ - mm_segment_t old_fs; - unsigned long asce, cr; - unsigned long flags; - - old_fs = current->thread.mm_segment; - if (old_fs & 1) - return old_fs; - /* protect against a concurrent page table upgrade */ - local_irq_save(flags); - current->thread.mm_segment |= 1; - asce = S390_lowcore.kernel_asce; - if (likely(old_fs == USER_DS)) { - __ctl_store(cr, 1, 1); - if (cr != S390_lowcore.kernel_asce) { - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_cpu_flag(CIF_ASCE_PRIMARY); - } - asce = S390_lowcore.user_asce; - } - __ctl_store(cr, 7, 7); - if (cr != asce) { - __ctl_load(asce, 7, 7); - set_cpu_flag(CIF_ASCE_SECONDARY); - } - local_irq_restore(flags); - return old_fs; -} -EXPORT_SYMBOL(enable_sacf_uaccess); - -void disable_sacf_uaccess(mm_segment_t old_fs) -{ - current->thread.mm_segment = old_fs; - if (old_fs == USER_DS && test_facility(27)) { - __ctl_load(S390_lowcore.user_asce, 1, 1); - clear_cpu_flag(CIF_ASCE_PRIMARY); - } -} -EXPORT_SYMBOL(disable_sacf_uaccess); - static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, unsigned long size) { - register unsigned long reg0 asm("0") = 0x01UL; + register unsigned long reg0 asm("0") = 0x81UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -135,9 +90,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, unsigned long size) { unsigned long tmp1, tmp2; - mm_segment_t old_fs; - old_fs = enable_sacf_uaccess(); tmp1 = -256UL; asm volatile( " sacf 0\n" @@ -164,7 +117,6 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) : : "cc", "memory"); - disable_sacf_uaccess(old_fs); return size; } @@ -179,7 +131,7 @@ EXPORT_SYMBOL(raw_copy_from_user); static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, unsigned long size) { - register unsigned long reg0 asm("0") = 0x010000UL; + register unsigned long reg0 asm("0") = 0x810000UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -210,9 +162,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, unsigned long size) { unsigned long tmp1, tmp2; - mm_segment_t old_fs; - old_fs = enable_sacf_uaccess(); tmp1 = -256UL; asm volatile( " sacf 0\n" @@ -239,7 +189,6 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) : : "cc", "memory"); - disable_sacf_uaccess(old_fs); return size; } @@ -254,7 +203,7 @@ EXPORT_SYMBOL(raw_copy_to_user); static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from, unsigned long size) { - register unsigned long reg0 asm("0") = 0x010001UL; + register unsigned long reg0 asm("0") = 0x810081UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -277,10 +226,8 @@ static inline unsigned long copy_in_user_mvcos(void __user *to, const void __use static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from, unsigned long size) { - mm_segment_t old_fs; unsigned long tmp1; - old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" " aghi %0,-1\n" @@ -304,7 +251,6 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1) : : "cc", "memory"); - disable_sacf_uaccess(old_fs); return size; } @@ -318,7 +264,7 @@ EXPORT_SYMBOL(raw_copy_in_user); static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) { - register unsigned long reg0 asm("0") = 0x010000UL; + register unsigned long reg0 asm("0") = 0x810000UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -346,10 +292,8 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size static inline unsigned long clear_user_xc(void __user *to, unsigned long size) { - mm_segment_t old_fs; unsigned long tmp1, tmp2; - old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" " aghi %0,-1\n" @@ -378,7 +322,6 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size) EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) : : "cc", "memory"); - disable_sacf_uaccess(old_fs); return size; } @@ -414,15 +357,9 @@ static inline unsigned long strnlen_user_srst(const char __user *src, unsigned long __strnlen_user(const char __user *src, unsigned long size) { - mm_segment_t old_fs; - unsigned long len; - if (unlikely(!size)) return 0; - old_fs = enable_sacf_uaccess(); - len = strnlen_user_srst(src, size); - disable_sacf_uaccess(old_fs); - return len; + return strnlen_user_srst(src, size); } EXPORT_SYMBOL(__strnlen_user); diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 8f9ff7e7187d..e40a30647d99 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -255,7 +255,7 @@ static int pt_dump_init(void) */ max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); - address_markers[IDENTITY_AFTER_END_NR].start_address = memory_end; + address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; address_markers[MODULES_NR].start_address = MODULES_VADDR; address_markers[MODULES_END_NR].start_address = MODULES_END; address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 996884dcc9fd..b8210103de14 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -53,7 +53,6 @@ enum fault_type { KERNEL_FAULT, USER_FAULT, - VDSO_FAULT, GMAP_FAULT, }; @@ -77,22 +76,16 @@ static enum fault_type get_fault_type(struct pt_regs *regs) trans_exc_code = regs->int_parm_long & 3; if (likely(trans_exc_code == 0)) { /* primary space exception */ - if (IS_ENABLED(CONFIG_PGSTE) && - test_pt_regs_flag(regs, PIF_GUEST_FAULT)) - return GMAP_FAULT; - if (current->thread.mm_segment == USER_DS) + if (user_mode(regs)) return USER_FAULT; - return KERNEL_FAULT; - } - if (trans_exc_code == 2) { - /* secondary space exception */ - if (current->thread.mm_segment & 1) { - if (current->thread.mm_segment == USER_DS_SACF) - return USER_FAULT; + if (!IS_ENABLED(CONFIG_PGSTE)) return KERNEL_FAULT; - } - return VDSO_FAULT; + if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) + return GMAP_FAULT; + return KERNEL_FAULT; } + if (trans_exc_code == 2) + return USER_FAULT; if (trans_exc_code == 1) { /* access register mode, not used in the kernel */ return USER_FAULT; @@ -188,10 +181,6 @@ static void dump_fault_info(struct pt_regs *regs) asce = S390_lowcore.user_asce; pr_cont("user "); break; - case VDSO_FAULT: - asce = S390_lowcore.vdso_asce; - pr_cont("vdso "); - break; case GMAP_FAULT: asce = ((struct gmap *) S390_lowcore.gmap)->asce; pr_cont("gmap "); @@ -414,9 +403,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) switch (type) { case KERNEL_FAULT: goto out; - case VDSO_FAULT: - fault = VM_FAULT_BADMAP; - goto out; case USER_FAULT: case GMAP_FAULT: if (faulthandler_disabled() || !mm) @@ -834,7 +820,6 @@ void do_secure_storage_access(struct pt_regs *regs) if (rc) BUG(); break; - case VDSO_FAULT: case GMAP_FAULT: default: do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 77767850d0d0..73a163065b95 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -49,6 +49,9 @@ #include <linux/virtio_config.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir"); +static pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir"); + +unsigned long s390_invalid_asce; unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); @@ -92,6 +95,9 @@ void __init paging_init(void) unsigned long pgd_type, asce_bits; psw_t psw; + s390_invalid_asce = (unsigned long)invalid_pg_dir; + s390_invalid_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); init_mm.pgd = swapper_pg_dir; if (VMALLOC_END > _REGION2_SIZE) { asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; @@ -102,14 +108,14 @@ void __init paging_init(void) } init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; S390_lowcore.kernel_asce = init_mm.context.asce; - S390_lowcore.user_asce = S390_lowcore.kernel_asce; + S390_lowcore.user_asce = s390_invalid_asce; crst_table_init((unsigned long *) init_mm.pgd, pgd_type); vmem_map_init(); - kasan_copy_shadow(init_mm.pgd); + kasan_copy_shadow_mapping(); /* enable virtual mapping in kernel mode */ __ctl_load(S390_lowcore.kernel_asce, 1, 1); - __ctl_load(S390_lowcore.kernel_asce, 7, 7); + __ctl_load(S390_lowcore.user_asce, 7, 7); __ctl_load(S390_lowcore.kernel_asce, 13, 13); psw.mask = __extract_psw(); psw_bits(psw).dat = 1; diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 5646b39c728a..db4d303aaaa9 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -87,7 +87,7 @@ enum populate_mode { POPULATE_ZERO_SHADOW, POPULATE_SHALLOW }; -static void __init kasan_early_vmemmap_populate(unsigned long address, +static void __init kasan_early_pgtable_populate(unsigned long address, unsigned long end, enum populate_mode mode) { @@ -123,8 +123,7 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, pgd_populate(&init_mm, pg_dir, p4_dir); } - if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) && - mode == POPULATE_SHALLOW) { + if (mode == POPULATE_SHALLOW) { address = (address + P4D_SIZE) & P4D_MASK; continue; } @@ -143,12 +142,6 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, p4d_populate(&init_mm, p4_dir, pu_dir); } - if (!IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) && - mode == POPULATE_SHALLOW) { - address = (address + PUD_SIZE) & PUD_MASK; - continue; - } - pu_dir = pud_offset(p4_dir, address); if (pud_none(*pu_dir)) { if (mode == POPULATE_ZERO_SHADOW && @@ -281,7 +274,6 @@ void __init kasan_early_init(void) unsigned long shadow_alloc_size; unsigned long vmax_unlimited; unsigned long initrd_end; - unsigned long asce_type; unsigned long memsize; unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO); pte_t pte_z; @@ -297,32 +289,26 @@ void __init kasan_early_init(void) memsize = get_mem_detect_end(); if (!memsize) kasan_early_panic("cannot detect physical memory size\n"); - /* respect mem= cmdline parameter */ - if (memory_end_set && memsize > memory_end) - memsize = memory_end; - if (IS_ENABLED(CONFIG_CRASH_DUMP) && OLDMEM_BASE) - memsize = min(memsize, OLDMEM_SIZE); - memsize = min(memsize, KASAN_SHADOW_START); - - if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)) { - /* 4 level paging */ - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE)); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); - crst_table_init((unsigned long *)early_pg_dir, - _REGION2_ENTRY_EMPTY); - untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION1_SIZE; - if (has_uv_sec_stor_limit()) - kasan_vmax = min(vmax_unlimited, uv_info.max_sec_stor_addr); - asce_type = _ASCE_TYPE_REGION2; - } else { - /* 3 level paging */ - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PUD_SIZE)); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE)); - crst_table_init((unsigned long *)early_pg_dir, - _REGION3_ENTRY_EMPTY); - untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION2_SIZE; - asce_type = _ASCE_TYPE_REGION3; - } + /* + * Kasan currently supports standby memory but only if it follows + * online memory (default allocation), i.e. no memory holes. + * - memsize represents end of online memory + * - ident_map_size represents online + standby and memory limits + * accounted. + * Kasan maps "memsize" right away. + * [0, memsize] - as identity mapping + * [__sha(0), __sha(memsize)] - shadow memory for identity mapping + * The rest [memsize, ident_map_size] if memsize < ident_map_size + * could be mapped/unmapped dynamically later during memory hotplug. + */ + memsize = min(memsize, ident_map_size); + + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); + crst_table_init((unsigned long *)early_pg_dir, _REGION2_ENTRY_EMPTY); + untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION1_SIZE; + if (has_uv_sec_stor_limit()) + kasan_vmax = min(vmax_unlimited, uv_info.max_sec_stor_addr); /* init kasan zero shadow */ crst_table_init((unsigned long *)kasan_early_shadow_p4d, @@ -388,27 +374,25 @@ void __init kasan_early_init(void) * +-----------------+ +- shadow end ---+ */ /* populate kasan shadow (for identity mapping and zero page mapping) */ - kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP); + kasan_early_pgtable_populate(__sha(0), __sha(memsize), POPULATE_MAP); if (IS_ENABLED(CONFIG_MODULES)) untracked_mem_end = kasan_vmax - MODULES_LEN; if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { untracked_mem_end = kasan_vmax - vmalloc_size - MODULES_LEN; /* shallowly populate kasan shadow for vmalloc and modules */ - kasan_early_vmemmap_populate(__sha(untracked_mem_end), - __sha(kasan_vmax), POPULATE_SHALLOW); + kasan_early_pgtable_populate(__sha(untracked_mem_end), __sha(kasan_vmax), + POPULATE_SHALLOW); } /* populate kasan shadow for untracked memory */ - kasan_early_vmemmap_populate(__sha(max_physmem_end), - __sha(untracked_mem_end), + kasan_early_pgtable_populate(__sha(ident_map_size), __sha(untracked_mem_end), POPULATE_ZERO_SHADOW); - kasan_early_vmemmap_populate(__sha(kasan_vmax), - __sha(vmax_unlimited), + kasan_early_pgtable_populate(__sha(kasan_vmax), __sha(vmax_unlimited), POPULATE_ZERO_SHADOW); /* memory allocated for identity mapping structs will be freed later */ pgalloc_freeable = pgalloc_pos; /* populate identity mapping */ - kasan_early_vmemmap_populate(0, memsize, POPULATE_ONE2ONE); - kasan_set_pgd(early_pg_dir, asce_type); + kasan_early_pgtable_populate(0, memsize, POPULATE_ONE2ONE); + kasan_set_pgd(early_pg_dir, _ASCE_TYPE_REGION2); kasan_enable_dat(); /* enable kasan */ init_task.kasan_depth = 0; @@ -416,7 +400,7 @@ void __init kasan_early_init(void) sclp_early_printk("KernelAddressSanitizer initialized\n"); } -void __init kasan_copy_shadow(pgd_t *pg_dir) +void __init kasan_copy_shadow_mapping(void) { /* * At this point we are still running on early pages setup early_pg_dir, @@ -428,24 +412,13 @@ void __init kasan_copy_shadow(pgd_t *pg_dir) pgd_t *pg_dir_dst; p4d_t *p4_dir_src; p4d_t *p4_dir_dst; - pud_t *pu_dir_src; - pud_t *pu_dir_dst; pg_dir_src = pgd_offset_raw(early_pg_dir, KASAN_SHADOW_START); - pg_dir_dst = pgd_offset_raw(pg_dir, KASAN_SHADOW_START); + pg_dir_dst = pgd_offset_raw(init_mm.pgd, KASAN_SHADOW_START); p4_dir_src = p4d_offset(pg_dir_src, KASAN_SHADOW_START); p4_dir_dst = p4d_offset(pg_dir_dst, KASAN_SHADOW_START); - if (!p4d_folded(*p4_dir_src)) { - /* 4 level paging */ - memcpy(p4_dir_dst, p4_dir_src, - (KASAN_SHADOW_SIZE >> P4D_SHIFT) * sizeof(p4d_t)); - return; - } - /* 3 level paging */ - pu_dir_src = pud_offset(p4_dir_src, KASAN_SHADOW_START); - pu_dir_dst = pud_offset(p4_dir_dst, KASAN_SHADOW_START); - memcpy(pu_dir_dst, pu_dir_src, - (KASAN_SHADOW_SIZE >> PUD_SHIFT) * sizeof(pud_t)); + memcpy(p4_dir_dst, p4_dir_src, + (KASAN_SHADOW_SIZE >> P4D_SHIFT) * sizeof(p4d_t)); } void __init kasan_free_early_identity(void) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 11d2c8395e2a..4e87c819ddea 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -70,19 +70,10 @@ static void __crst_table_upgrade(void *arg) { struct mm_struct *mm = arg; - /* we must change all active ASCEs to avoid the creation of new TLBs */ + /* change all active ASCEs to avoid the creation of new TLBs */ if (current->active_mm == mm) { S390_lowcore.user_asce = mm->context.asce; - if (current->thread.mm_segment == USER_DS) { - __ctl_load(S390_lowcore.user_asce, 1, 1); - /* Mark user-ASCE present in CR1 */ - clear_cpu_flag(CIF_ASCE_PRIMARY); - } - if (current->thread.mm_segment == USER_DS_SACF) { - __ctl_load(S390_lowcore.user_asce, 7, 7); - /* enable_sacf_uaccess does all or nothing */ - WARN_ON(!test_cpu_flag(CIF_ASCE_SECONDARY)); - } + __ctl_load(S390_lowcore.user_asce, 7, 7); } __tlb_flush_local(); } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index b239f2ba93b0..01f3a5f58e64 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -76,20 +76,20 @@ static void vmem_pte_free(unsigned long *table) /* * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges - * from unused_pmd_start to next PMD_SIZE boundary. + * from unused_sub_pmd_start to next PMD_SIZE boundary. */ -static unsigned long unused_pmd_start; +static unsigned long unused_sub_pmd_start; -static void vmemmap_flush_unused_pmd(void) +static void vmemmap_flush_unused_sub_pmd(void) { - if (!unused_pmd_start) + if (!unused_sub_pmd_start) return; - memset(__va(unused_pmd_start), PAGE_UNUSED, - ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start); - unused_pmd_start = 0; + memset(__va(unused_sub_pmd_start), PAGE_UNUSED, + ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start); + unused_sub_pmd_start = 0; } -static void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end) +static void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end) { /* * As we expect to add in the same granularity as we remove, it's @@ -106,24 +106,24 @@ static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) * We only optimize if the new used range directly follows the * previously unused range (esp., when populating consecutive sections). */ - if (unused_pmd_start == start) { - unused_pmd_start = end; - if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE))) - unused_pmd_start = 0; + if (unused_sub_pmd_start == start) { + unused_sub_pmd_start = end; + if (likely(IS_ALIGNED(unused_sub_pmd_start, PMD_SIZE))) + unused_sub_pmd_start = 0; return; } - vmemmap_flush_unused_pmd(); - __vmemmap_use_sub_pmd(start, end); + vmemmap_flush_unused_sub_pmd(); + vmemmap_mark_sub_pmd_used(start, end); } static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) { void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); - vmemmap_flush_unused_pmd(); + vmemmap_flush_unused_sub_pmd(); /* Could be our memmap page is filled with PAGE_UNUSED already ... */ - __vmemmap_use_sub_pmd(start, end); + vmemmap_mark_sub_pmd_used(start, end); /* Mark the unused parts of the new memmap page PAGE_UNUSED. */ if (!IS_ALIGNED(start, PMD_SIZE)) @@ -134,7 +134,7 @@ static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) * unused range in the populated PMD. */ if (!IS_ALIGNED(end, PMD_SIZE)) - unused_pmd_start = end; + unused_sub_pmd_start = end; } /* Returns true if the PMD is completely unused and can be freed. */ @@ -142,7 +142,7 @@ static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end) { void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); - vmemmap_flush_unused_pmd(); + vmemmap_flush_unused_sub_pmd(); memset(__va(start), PAGE_UNUSED, end - start); return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE); } @@ -223,7 +223,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, if (!add) { if (pmd_none(*pmd)) continue; - if (pmd_large(*pmd) && !add) { + if (pmd_large(*pmd)) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE)) { if (!direct) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 570016ae8bcd..41df8fcfddde 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -851,8 +851,10 @@ static int __init pci_base_init(void) if (!s390_pci_probe) return 0; - if (!test_facility(69) || !test_facility(71)) + if (!test_facility(69) || !test_facility(71)) { + pr_info("PCI is not supported because CPU facilities 69 or 71 are not available\n"); return 0; + } if (test_facility(153) && !s390_pci_no_mio) { static_branch_enable(&have_mio); diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 401cf670a243..de3bdbed8881 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -93,12 +93,10 @@ static inline int __memcpy_toio_inuser(void __iomem *dst, { int size, rc = 0; u8 status = 0; - mm_segment_t old_fs; if (!src) return -EINVAL; - old_fs = enable_sacf_uaccess(); while (n > 0) { size = zpci_get_max_write_size((u64 __force) dst, (u64 __force) src, n, @@ -113,39 +111,20 @@ static inline int __memcpy_toio_inuser(void __iomem *dst, dst += size; n -= size; } - disable_sacf_uaccess(old_fs); if (rc) zpci_err_mmio(rc, status, (__force u64) dst); return rc; } -static long get_pfn(unsigned long user_addr, unsigned long access, - unsigned long *pfn) -{ - struct vm_area_struct *vma; - long ret; - - mmap_read_lock(current->mm); - ret = -EINVAL; - vma = find_vma(current->mm, user_addr); - if (!vma) - goto out; - ret = -EACCES; - if (!(vma->vm_flags & access)) - goto out; - ret = follow_pfn(vma, user_addr, pfn); -out: - mmap_read_unlock(current->mm); - return ret; -} - SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, const void __user *, user_buffer, size_t, length) { u8 local_buf[64]; void __iomem *io_addr; void *buf; - unsigned long pfn; + struct vm_area_struct *vma; + pte_t *ptep; + spinlock_t *ptl; long ret; if (!zpci_is_enabled()) @@ -158,7 +137,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, * We only support write access to MIO capable devices if we are on * a MIO enabled system. Otherwise we would have to check for every * address if it is a special ZPCI_ADDR and would have to do - * a get_pfn() which we don't need for MIO capable devices. Currently + * a pfn lookup which we don't need for MIO capable devices. Currently * ISM devices are the only devices without MIO support and there is no * known need for accessing these from userspace. */ @@ -176,21 +155,37 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, } else buf = local_buf; - ret = get_pfn(mmio_addr, VM_WRITE, &pfn); + ret = -EFAULT; + if (copy_from_user(buf, user_buffer, length)) + goto out_free; + + mmap_read_lock(current->mm); + ret = -EINVAL; + vma = find_vma(current->mm, mmio_addr); + if (!vma) + goto out_unlock_mmap; + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) + goto out_unlock_mmap; + ret = -EACCES; + if (!(vma->vm_flags & VM_WRITE)) + goto out_unlock_mmap; + + ret = follow_pte_pmd(vma->vm_mm, mmio_addr, NULL, &ptep, NULL, &ptl); if (ret) - goto out; - io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | + goto out_unlock_mmap; + + io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); - ret = -EFAULT; if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) - goto out; - - if (copy_from_user(buf, user_buffer, length)) - goto out; + goto out_unlock_pt; ret = zpci_memcpy_toio(io_addr, buf, length); -out: +out_unlock_pt: + pte_unmap_unlock(ptep, ptl); +out_unlock_mmap: + mmap_read_unlock(current->mm); +out_free: if (buf != local_buf) kfree(buf); return ret; @@ -248,9 +243,7 @@ static inline int __memcpy_fromio_inuser(void __user *dst, { int size, rc = 0; u8 status; - mm_segment_t old_fs; - old_fs = enable_sacf_uaccess(); while (n > 0) { size = zpci_get_max_write_size((u64 __force) src, (u64 __force) dst, n, @@ -262,7 +255,6 @@ static inline int __memcpy_fromio_inuser(void __user *dst, dst += size; n -= size; } - disable_sacf_uaccess(old_fs); if (rc) zpci_err_mmio(rc, status, (__force u64) dst); return rc; @@ -274,7 +266,9 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, u8 local_buf[64]; void __iomem *io_addr; void *buf; - unsigned long pfn; + struct vm_area_struct *vma; + pte_t *ptep; + spinlock_t *ptl; long ret; if (!zpci_is_enabled()) @@ -287,7 +281,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, * We only support read access to MIO capable devices if we are on * a MIO enabled system. Otherwise we would have to check for every * address if it is a special ZPCI_ADDR and would have to do - * a get_pfn() which we don't need for MIO capable devices. Currently + * a pfn lookup which we don't need for MIO capable devices. Currently * ISM devices are the only devices without MIO support and there is no * known need for accessing these from userspace. */ @@ -306,22 +300,38 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, buf = local_buf; } - ret = get_pfn(mmio_addr, VM_READ, &pfn); + mmap_read_lock(current->mm); + ret = -EINVAL; + vma = find_vma(current->mm, mmio_addr); + if (!vma) + goto out_unlock_mmap; + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) + goto out_unlock_mmap; + ret = -EACCES; + if (!(vma->vm_flags & VM_WRITE)) + goto out_unlock_mmap; + + ret = follow_pte_pmd(vma->vm_mm, mmio_addr, NULL, &ptep, NULL, &ptl); if (ret) - goto out; - io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); + goto out_unlock_mmap; + + io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) | + (mmio_addr & ~PAGE_MASK)); if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) { ret = -EFAULT; - goto out; + goto out_unlock_pt; } ret = zpci_memcpy_fromio(buf, io_addr, length); - if (ret) - goto out; - if (copy_to_user(user_buffer, buf, length)) + +out_unlock_pt: + pte_unmap_unlock(ptep, ptl); +out_unlock_mmap: + mmap_read_unlock(current->mm); + + if (!ret && copy_to_user(user_buffer, buf, length)) ret = -EFAULT; -out: if (buf != local_buf) kfree(buf); return ret; diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S index 5a10ce34b95d..3d1c31e0cf3d 100644 --- a/arch/s390/purgatory/head.S +++ b/arch/s390/purgatory/head.S @@ -62,14 +62,15 @@ jh 10b .endm -.macro START_NEXT_KERNEL base +.macro START_NEXT_KERNEL base subcode lg %r4,kernel_entry-\base(%r13) lg %r5,load_psw_mask-\base(%r13) ogr %r4,%r5 stg %r4,0(%r0) xgr %r0,%r0 - diag %r0,%r0,0x308 + lghi %r1,\subcode + diag %r0,%r1,0x308 .endm .text @@ -123,7 +124,7 @@ ENTRY(purgatory_start) je .start_crash_kernel /* start normal kernel */ - START_NEXT_KERNEL .base_crash + START_NEXT_KERNEL .base_crash 0 .return_old_kernel: lmg %r6,%r15,gprregs-.base_crash(%r13) @@ -227,7 +228,7 @@ ENTRY(purgatory_start) MEMCPY %r9,%r10,%r11 /* start crash kernel */ - START_NEXT_KERNEL .base_dst + START_NEXT_KERNEL .base_dst 1 load_psw_mask: diff --git a/drivers/char/hw_random/s390-trng.c b/drivers/char/hw_random/s390-trng.c index 413cacbb08e2..7c673afd7241 100644 --- a/drivers/char/hw_random/s390-trng.c +++ b/drivers/char/hw_random/s390-trng.c @@ -192,14 +192,15 @@ static int trng_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait) /* * hwrng register struct - * The trng is suppost to have 100% entropy, and thus - * we register with a very high quality value. + * The trng is supposed to have 100% entropy, and thus we register with a very + * high quality value. If we ever have a better driver in the future, we should + * change this value again when we merge this driver. */ static struct hwrng trng_hwrng_dev = { .name = "s390-trng", .data_read = trng_hwrng_data_read, .read = trng_hwrng_read, - .quality = 999, + .quality = 1024, }; diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 217a7b84abdf..fd568248fd26 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -75,7 +75,6 @@ static int dasd_flush_block_queue(struct dasd_block *); static void dasd_device_tasklet(unsigned long); static void dasd_block_tasklet(unsigned long); static void do_kick_device(struct work_struct *); -static void do_restore_device(struct work_struct *); static void do_reload_device(struct work_struct *); static void do_requeue_requests(struct work_struct *); static void dasd_return_cqr_cb(struct dasd_ccw_req *, void *); @@ -138,7 +137,6 @@ struct dasd_device *dasd_alloc_device(void) INIT_LIST_HEAD(&device->ccw_queue); timer_setup(&device->timer, dasd_device_timeout, 0); INIT_WORK(&device->kick_work, do_kick_device); - INIT_WORK(&device->restore_device, do_restore_device); INIT_WORK(&device->reload_device, do_reload_device); INIT_WORK(&device->requeue_requests, do_requeue_requests); device->state = DASD_STATE_NEW; @@ -621,26 +619,6 @@ void dasd_reload_device(struct dasd_device *device) EXPORT_SYMBOL(dasd_reload_device); /* - * dasd_restore_device will schedule a call do do_restore_device to the kernel - * event daemon. - */ -static void do_restore_device(struct work_struct *work) -{ - struct dasd_device *device = container_of(work, struct dasd_device, - restore_device); - device->cdev->drv->restore(device->cdev); - dasd_put_device(device); -} - -void dasd_restore_device(struct dasd_device *device) -{ - dasd_get_device(device); - /* queue call to dasd_restore_device to the kernel event daemon. */ - if (!schedule_work(&device->restore_device)) - dasd_put_device(device); -} - -/* * Set the target state for a device and starts the state change. */ void dasd_set_target_state(struct dasd_device *device, int target) @@ -1514,7 +1492,6 @@ int dasd_start_IO(struct dasd_ccw_req *cqr) "start_IO: -EIO device gone, retry"); break; case -EINVAL: - /* most likely caused in power management context */ DBF_DEV_EVENT(DBF_WARNING, device, "%s", "start_IO: -EINVAL device currently " "not accessible"); @@ -2048,7 +2025,7 @@ static void __dasd_device_check_expire(struct dasd_device *device) static int __dasd_device_is_unusable(struct dasd_device *device, struct dasd_ccw_req *cqr) { - int mask = ~(DASD_STOPPED_DC_WAIT | DASD_UNRESUMED_PM | DASD_STOPPED_NOSPC); + int mask = ~(DASD_STOPPED_DC_WAIT | DASD_STOPPED_NOSPC); if (test_bit(DASD_FLAG_OFFLINE, &device->flags) && !test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { @@ -2112,8 +2089,7 @@ static void __dasd_device_check_path_events(struct dasd_device *device) if (!dasd_path_get_tbvpm(device)) return; - if (device->stopped & - ~(DASD_STOPPED_DC_WAIT | DASD_UNRESUMED_PM)) + if (device->stopped & ~(DASD_STOPPED_DC_WAIT)) return; rc = device->discipline->verify_path(device, dasd_path_get_tbvpm(device)); @@ -3794,11 +3770,6 @@ int dasd_generic_path_operational(struct dasd_device *device) "operational\n"); DBF_DEV_EVENT(DBF_WARNING, device, "%s", "path operational"); dasd_device_remove_stop_bits(device, DASD_STOPPED_DC_WAIT); - if (device->stopped & DASD_UNRESUMED_PM) { - dasd_device_remove_stop_bits(device, DASD_UNRESUMED_PM); - dasd_restore_device(device); - return 1; - } dasd_schedule_device_bh(device); if (device->block) { dasd_schedule_block_bh(device->block); @@ -4058,66 +4029,6 @@ void dasd_schedule_requeue(struct dasd_device *device) } EXPORT_SYMBOL(dasd_schedule_requeue); -int dasd_generic_pm_freeze(struct ccw_device *cdev) -{ - struct dasd_device *device = dasd_device_from_cdev(cdev); - - if (IS_ERR(device)) - return PTR_ERR(device); - - /* mark device as suspended */ - set_bit(DASD_FLAG_SUSPENDED, &device->flags); - - if (device->discipline->freeze) - device->discipline->freeze(device); - - /* disallow new I/O */ - dasd_device_set_stop_bits(device, DASD_STOPPED_PM); - - return dasd_generic_requeue_all_requests(device); -} -EXPORT_SYMBOL_GPL(dasd_generic_pm_freeze); - -int dasd_generic_restore_device(struct ccw_device *cdev) -{ - struct dasd_device *device = dasd_device_from_cdev(cdev); - int rc = 0; - - if (IS_ERR(device)) - return PTR_ERR(device); - - /* allow new IO again */ - dasd_device_remove_stop_bits(device, - (DASD_STOPPED_PM | DASD_UNRESUMED_PM)); - - dasd_schedule_device_bh(device); - - /* - * call discipline restore function - * if device is stopped do nothing e.g. for disconnected devices - */ - if (device->discipline->restore && !(device->stopped)) - rc = device->discipline->restore(device); - if (rc || device->stopped) - /* - * if the resume failed for the DASD we put it in - * an UNRESUMED stop state - */ - device->stopped |= DASD_UNRESUMED_PM; - - if (device->block) { - dasd_schedule_block_bh(device->block); - if (device->block->request_queue) - blk_mq_run_hw_queues(device->block->request_queue, - true); - } - - clear_bit(DASD_FLAG_SUSPENDED, &device->flags); - dasd_put_device(device); - return 0; -} -EXPORT_SYMBOL_GPL(dasd_generic_restore_device); - static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device, int rdc_buffer_size, int magic) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index ad44d22e8859..758ee4153ac1 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -5716,95 +5716,6 @@ static void dasd_eckd_dump_sense(struct dasd_device *device, } } -static int dasd_eckd_pm_freeze(struct dasd_device *device) -{ - /* - * the device should be disconnected from our LCU structure - * on restore we will reconnect it and reread LCU specific - * information like PAV support that might have changed - */ - dasd_alias_remove_device(device); - dasd_alias_disconnect_device_from_lcu(device); - - return 0; -} - -static int dasd_eckd_restore_device(struct dasd_device *device) -{ - struct dasd_eckd_private *private = device->private; - struct dasd_eckd_characteristics temp_rdc_data; - int rc; - struct dasd_uid temp_uid; - unsigned long flags; - unsigned long cqr_flags = 0; - - /* Read Configuration Data */ - rc = dasd_eckd_read_conf(device); - if (rc) { - DBF_EVENT_DEVID(DBF_WARNING, device->cdev, - "Read configuration data failed, rc=%d", rc); - goto out_err; - } - - dasd_eckd_get_uid(device, &temp_uid); - /* Generate device unique id */ - rc = dasd_eckd_generate_uid(device); - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - if (memcmp(&private->uid, &temp_uid, sizeof(struct dasd_uid)) != 0) - dev_err(&device->cdev->dev, "The UID of the DASD has " - "changed\n"); - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); - if (rc) - goto out_err; - - /* register lcu with alias handling, enable PAV if this is a new lcu */ - rc = dasd_alias_make_device_known_to_lcu(device); - if (rc) - goto out_err; - - set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr_flags); - dasd_eckd_validate_server(device, cqr_flags); - - /* RE-Read Configuration Data */ - rc = dasd_eckd_read_conf(device); - if (rc) { - DBF_EVENT_DEVID(DBF_WARNING, device->cdev, - "Read configuration data failed, rc=%d", rc); - goto out_err2; - } - - /* Read Feature Codes */ - dasd_eckd_read_features(device); - - /* Read Volume Information */ - dasd_eckd_read_vol_info(device); - - /* Read Extent Pool Information */ - dasd_eckd_read_ext_pool_info(device); - - /* Read Device Characteristics */ - rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC, - &temp_rdc_data, 64); - if (rc) { - DBF_EVENT_DEVID(DBF_WARNING, device->cdev, - "Read device characteristic failed, rc=%d", rc); - goto out_err2; - } - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - memcpy(&private->rdc_data, &temp_rdc_data, sizeof(temp_rdc_data)); - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); - - /* add device to alias management */ - dasd_alias_add_device(device); - - return 0; - -out_err2: - dasd_alias_disconnect_device_from_lcu(device); -out_err: - return -1; -} - static int dasd_eckd_reload_device(struct dasd_device *device) { struct dasd_eckd_private *private = device->private; @@ -6668,9 +6579,6 @@ static struct ccw_driver dasd_eckd_driver = { .notify = dasd_generic_notify, .path_event = dasd_generic_path_event, .shutdown = dasd_generic_shutdown, - .freeze = dasd_generic_pm_freeze, - .thaw = dasd_generic_restore_device, - .restore = dasd_generic_restore_device, .uc_handler = dasd_generic_uc_handler, .int_class = IRQIO_DAS, }; @@ -6702,8 +6610,6 @@ static struct dasd_discipline dasd_eckd_discipline = { .dump_sense_dbf = dasd_eckd_dump_sense_dbf, .fill_info = dasd_eckd_fill_info, .ioctl = dasd_eckd_ioctl, - .freeze = dasd_eckd_pm_freeze, - .restore = dasd_eckd_restore_device, .reload = dasd_eckd_reload_device, .get_uid = dasd_eckd_get_uid, .kick_validate = dasd_eckd_kick_validate_server, diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 1a44e321b54e..c027344ee225 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -79,9 +79,6 @@ static struct ccw_driver dasd_fba_driver = { .set_online = dasd_fba_set_online, .notify = dasd_generic_notify, .path_event = dasd_generic_path_event, - .freeze = dasd_generic_pm_freeze, - .thaw = dasd_generic_restore_device, - .restore = dasd_generic_restore_device, .int_class = IRQIO_DAS, }; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index fa552f9f1666..7a34161ea5c6 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -355,10 +355,6 @@ struct dasd_discipline { int (*fill_info) (struct dasd_device *, struct dasd_information2_t *); int (*ioctl) (struct dasd_block *, unsigned int, void __user *); - /* suspend/resume functions */ - int (*freeze) (struct dasd_device *); - int (*restore) (struct dasd_device *); - /* reload device after state change */ int (*reload) (struct dasd_device *); @@ -520,7 +516,6 @@ struct dasd_device { atomic_t tasklet_scheduled; struct tasklet_struct tasklet; struct work_struct kick_work; - struct work_struct restore_device; struct work_struct reload_device; struct work_struct kick_validate; struct work_struct suc_work; @@ -592,8 +587,6 @@ struct dasd_queue { #define DASD_STOPPED_PENDING 4 /* long busy */ #define DASD_STOPPED_DC_WAIT 8 /* disconnected, wait */ #define DASD_STOPPED_SU 16 /* summary unit check handling */ -#define DASD_STOPPED_PM 32 /* pm state transition */ -#define DASD_UNRESUMED_PM 64 /* pm resume failed state */ #define DASD_STOPPED_NOSPC 128 /* no space left */ /* per device flags */ @@ -753,7 +746,6 @@ enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved); void dasd_enable_device(struct dasd_device *); void dasd_set_target_state(struct dasd_device *, int); void dasd_kick_device(struct dasd_device *); -void dasd_restore_device(struct dasd_device *); void dasd_reload_device(struct dasd_device *); void dasd_schedule_requeue(struct dasd_device *); @@ -785,8 +777,6 @@ int dasd_generic_path_operational(struct dasd_device *); void dasd_generic_shutdown(struct ccw_device *); void dasd_generic_handle_state_change(struct dasd_device *); -int dasd_generic_pm_freeze(struct ccw_device *); -int dasd_generic_restore_device(struct ccw_device *); enum uc_todo dasd_generic_uc_handler(struct ccw_device *, struct irb *); void dasd_generic_path_event(struct ccw_device *, int *); int dasd_generic_verify_path(struct dasd_device *, __u8); diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index d8acabbb1ed3..1354c42d95aa 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -289,16 +289,14 @@ static void raw3215_timeout(struct timer_list *t) spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); raw->flags &= ~RAW3215_TIMER_RUNS; - if (!tty_port_suspended(&raw->port)) { - raw3215_mk_write_req(raw); - raw3215_start_io(raw); - if ((raw->queued_read || raw->queued_write) && - !(raw->flags & RAW3215_WORKING) && - !(raw->flags & RAW3215_TIMER_RUNS)) { - raw->timer.expires = RAW3215_TIMEOUT + jiffies; - add_timer(&raw->timer); - raw->flags |= RAW3215_TIMER_RUNS; - } + raw3215_mk_write_req(raw); + raw3215_start_io(raw); + if ((raw->queued_read || raw->queued_write) && + !(raw->flags & RAW3215_WORKING) && + !(raw->flags & RAW3215_TIMER_RUNS)) { + raw->timer.expires = RAW3215_TIMEOUT + jiffies; + add_timer(&raw->timer); + raw->flags |= RAW3215_TIMER_RUNS; } spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); } @@ -311,7 +309,7 @@ static void raw3215_timeout(struct timer_list *t) */ static inline void raw3215_try_io(struct raw3215_info *raw) { - if (!tty_port_initialized(&raw->port) || tty_port_suspended(&raw->port)) + if (!tty_port_initialized(&raw->port)) return; if (raw->queued_read != NULL) raw3215_start_io(raw); @@ -464,26 +462,6 @@ put_tty: } /* - * Drop the oldest line from the output buffer. - */ -static void raw3215_drop_line(struct raw3215_info *raw) -{ - int ix; - char ch; - - BUG_ON(raw->written != 0); - ix = (raw->head - raw->count) & (RAW3215_BUFFER_SIZE - 1); - while (raw->count > 0) { - ch = raw->buffer[ix]; - ix = (ix + 1) & (RAW3215_BUFFER_SIZE - 1); - raw->count--; - if (ch == 0x15) - break; - } - raw->head = ix; -} - -/* * Wait until length bytes are available int the output buffer. * Has to be called with the s390irq lock held. Can be called * disabled. @@ -491,13 +469,6 @@ static void raw3215_drop_line(struct raw3215_info *raw) static void raw3215_make_room(struct raw3215_info *raw, unsigned int length) { while (RAW3215_BUFFER_SIZE - raw->count < length) { - /* While console is frozen for suspend we have no other - * choice but to drop message from the buffer to make - * room for even more messages. */ - if (tty_port_suspended(&raw->port)) { - raw3215_drop_line(raw); - continue; - } /* there might be a request pending */ raw->flags |= RAW3215_FLUSHING; raw3215_mk_write_req(raw); @@ -763,36 +734,6 @@ static int raw3215_set_offline (struct ccw_device *cdev) return 0; } -static int raw3215_pm_stop(struct ccw_device *cdev) -{ - struct raw3215_info *raw; - unsigned long flags; - - /* Empty the output buffer, then prevent new I/O. */ - raw = dev_get_drvdata(&cdev->dev); - spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); - raw3215_make_room(raw, RAW3215_BUFFER_SIZE); - tty_port_set_suspended(&raw->port, 1); - spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); - return 0; -} - -static int raw3215_pm_start(struct ccw_device *cdev) -{ - struct raw3215_info *raw; - unsigned long flags; - - /* Allow I/O again and flush output buffer. */ - raw = dev_get_drvdata(&cdev->dev); - spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); - tty_port_set_suspended(&raw->port, 0); - raw->flags |= RAW3215_FLUSHING; - raw3215_try_io(raw); - raw->flags &= ~RAW3215_FLUSHING; - spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); - return 0; -} - static struct ccw_device_id raw3215_id[] = { { CCW_DEVICE(0x3215, 0) }, { /* end of list */ }, @@ -808,9 +749,6 @@ static struct ccw_driver raw3215_ccw_driver = { .remove = &raw3215_remove, .set_online = &raw3215_set_online, .set_offline = &raw3215_set_offline, - .freeze = &raw3215_pm_stop, - .thaw = &raw3215_pm_start, - .restore = &raw3215_pm_start, .int_class = IRQIO_C15, }; @@ -858,11 +796,6 @@ static void con3215_flush(void) unsigned long flags; raw = raw3215[0]; /* console 3215 is the first one */ - if (tty_port_suspended(&raw->port)) - /* The console is still frozen for suspend. */ - if (ccw_device_force_console(raw->cdev)) - /* Forcing didn't work, no panic message .. */ - return; spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); raw3215_make_room(raw, RAW3215_BUFFER_SIZE); spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index e17364e13d2f..e21962c0fd94 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -544,7 +544,6 @@ con3270_flush(void) cp = condev; if (!cp->view.dev) return; - raw3270_pm_unfreeze(&cp->view); raw3270_activate_view(&cp->view); spin_lock_irqsave(&cp->view.lock, flags); con3270_wait_write(cp); diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 63a41b168761..646ec796bb83 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -67,7 +67,6 @@ struct raw3270 { #define RAW3270_FLAGS_14BITADDR 0 /* 14-bit buffer addresses */ #define RAW3270_FLAGS_BUSY 1 /* Device busy, leave it alone */ #define RAW3270_FLAGS_CONSOLE 2 /* Device is the console. */ -#define RAW3270_FLAGS_FROZEN 3 /* set if 3270 is frozen for suspend */ /* Semaphore to protect global data of raw3270 (devices, views, etc). */ static DEFINE_MUTEX(raw3270_mutex); @@ -260,8 +259,7 @@ raw3270_view_active(struct raw3270_view *view) { struct raw3270 *rp = view->dev; - return rp && rp->view == view && - !test_bit(RAW3270_FLAGS_FROZEN, &rp->flags); + return rp && rp->view == view; } int @@ -273,8 +271,7 @@ raw3270_start(struct raw3270_view *view, struct raw3270_request *rq) spin_lock_irqsave(get_ccwdev_lock(view->dev->cdev), flags); rp = view->dev; - if (!rp || rp->view != view || - test_bit(RAW3270_FLAGS_FROZEN, &rp->flags)) + if (!rp || rp->view != view) rc = -EACCES; else if (!raw3270_state_ready(rp)) rc = -EBUSY; @@ -291,8 +288,7 @@ raw3270_start_locked(struct raw3270_view *view, struct raw3270_request *rq) int rc; rp = view->dev; - if (!rp || rp->view != view || - test_bit(RAW3270_FLAGS_FROZEN, &rp->flags)) + if (!rp || rp->view != view) rc = -EACCES; else if (!raw3270_state_ready(rp)) rc = -EBUSY; @@ -629,8 +625,7 @@ raw3270_reset(struct raw3270_view *view) int rc; rp = view->dev; - if (!rp || rp->view != view || - test_bit(RAW3270_FLAGS_FROZEN, &rp->flags)) + if (!rp || rp->view != view) rc = -EACCES; else if (!raw3270_state_ready(rp)) rc = -EBUSY; @@ -854,8 +849,6 @@ raw3270_activate_view(struct raw3270_view *view) rc = 0; else if (!raw3270_state_ready(rp)) rc = -EBUSY; - else if (test_bit(RAW3270_FLAGS_FROZEN, &rp->flags)) - rc = -EACCES; else { oldview = NULL; if (rp->view && rp->view->fn->deactivate) { @@ -903,8 +896,7 @@ raw3270_deactivate_view(struct raw3270_view *view) list_del_init(&view->list); list_add_tail(&view->list, &rp->view_list); /* Try to activate another view. */ - if (raw3270_state_ready(rp) && - !test_bit(RAW3270_FLAGS_FROZEN, &rp->flags)) { + if (raw3270_state_ready(rp)) { list_for_each_entry(view, &rp->view_list, list) { rp->view = view; if (view->fn->activate(view) == 0) @@ -999,8 +991,7 @@ raw3270_del_view(struct raw3270_view *view) rp->view = NULL; } list_del_init(&view->list); - if (!rp->view && raw3270_state_ready(rp) && - !test_bit(RAW3270_FLAGS_FROZEN, &rp->flags)) { + if (!rp->view && raw3270_state_ready(rp)) { /* Try to activate another view. */ list_for_each_entry(nv, &rp->view_list, list) { if (nv->fn->activate(nv) == 0) { @@ -1215,60 +1206,6 @@ raw3270_set_offline (struct ccw_device *cdev) return 0; } -static int raw3270_pm_stop(struct ccw_device *cdev) -{ - struct raw3270 *rp; - struct raw3270_view *view; - unsigned long flags; - - rp = dev_get_drvdata(&cdev->dev); - if (!rp) - return 0; - spin_lock_irqsave(get_ccwdev_lock(rp->cdev), flags); - if (rp->view && rp->view->fn->deactivate) - rp->view->fn->deactivate(rp->view); - if (!test_bit(RAW3270_FLAGS_CONSOLE, &rp->flags)) { - /* - * Release tty and fullscreen for all non-console - * devices. - */ - list_for_each_entry(view, &rp->view_list, list) { - if (view->fn->release) - view->fn->release(view); - } - } - set_bit(RAW3270_FLAGS_FROZEN, &rp->flags); - spin_unlock_irqrestore(get_ccwdev_lock(rp->cdev), flags); - return 0; -} - -static int raw3270_pm_start(struct ccw_device *cdev) -{ - struct raw3270 *rp; - unsigned long flags; - - rp = dev_get_drvdata(&cdev->dev); - if (!rp) - return 0; - spin_lock_irqsave(get_ccwdev_lock(rp->cdev), flags); - clear_bit(RAW3270_FLAGS_FROZEN, &rp->flags); - if (rp->view && rp->view->fn->activate) - rp->view->fn->activate(rp->view); - spin_unlock_irqrestore(get_ccwdev_lock(rp->cdev), flags); - return 0; -} - -void raw3270_pm_unfreeze(struct raw3270_view *view) -{ -#ifdef CONFIG_TN3270_CONSOLE - struct raw3270 *rp; - - rp = view->dev; - if (rp && test_bit(RAW3270_FLAGS_FROZEN, &rp->flags)) - ccw_device_force_console(rp->cdev); -#endif -} - static struct ccw_device_id raw3270_id[] = { { CCW_DEVICE(0x3270, 0) }, { CCW_DEVICE(0x3271, 0) }, @@ -1294,9 +1231,6 @@ static struct ccw_driver raw3270_ccw_driver = { .remove = &raw3270_remove, .set_online = &raw3270_set_online, .set_offline = &raw3270_set_offline, - .freeze = &raw3270_pm_stop, - .thaw = &raw3270_pm_start, - .restore = &raw3270_pm_start, .int_class = IRQIO_C70, }; diff --git a/drivers/s390/char/raw3270.h b/drivers/s390/char/raw3270.h index 8d979e0ee605..c6645167cd2b 100644 --- a/drivers/s390/char/raw3270.h +++ b/drivers/s390/char/raw3270.h @@ -199,7 +199,6 @@ struct raw3270_notifier { int raw3270_register_notifier(struct raw3270_notifier *); void raw3270_unregister_notifier(struct raw3270_notifier *); -void raw3270_pm_unfreeze(struct raw3270_view *); /* * Little memory allocator for string objects. diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index 69d9cde9ff5a..6de919944a39 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -156,7 +156,11 @@ struct read_cpu_info_sccb { u16 offset_configured; u16 nr_standby; u16 offset_standby; - u8 reserved[4096 - 16]; + /* + * Without ext sccb, struct size is PAGE_SIZE. + * With ext sccb, struct size is EXT_SCCB_READ_CPU. + */ + u8 reserved[]; } __attribute__((packed, aligned(PAGE_SIZE))); struct read_info_sccb { @@ -199,7 +203,7 @@ struct read_info_sccb { u8 byte_134; /* 134 */ u8 cpudirq; /* 135 */ u16 cbl; /* 136-137 */ - u8 _pad_138[4096 - 138]; /* 138-4095 */ + u8 _pad_138[EXT_SCCB_READ_SCP - 138]; } __packed __aligned(PAGE_SIZE); struct read_storage_sccb { @@ -328,7 +332,7 @@ unsigned int sclp_early_con_check_vt220(struct init_sccb *sccb); int sclp_early_set_event_mask(struct init_sccb *sccb, sccb_mask_t receive_mask, sccb_mask_t send_mask); -int sclp_early_get_info(struct read_info_sccb *info); +struct read_info_sccb * __init sclp_early_get_info(void); /* useful inlines */ diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index f6e97f0830f6..d41bc144c183 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -27,6 +27,7 @@ #include <asm/page.h> #include <asm/sclp.h> #include <asm/numa.h> +#include <asm/facility.h> #include "sclp.h" @@ -87,14 +88,17 @@ out: int _sclp_get_core_info(struct sclp_core_info *info) { int rc; + int length = test_facility(140) ? EXT_SCCB_READ_CPU : PAGE_SIZE; struct read_cpu_info_sccb *sccb; if (!SCLP_HAS_CPU_INFO) return -EOPNOTSUPP; - sccb = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + + sccb = (void *)__get_free_pages(GFP_KERNEL | GFP_DMA | __GFP_ZERO, get_order(length)); if (!sccb) return -ENOMEM; - sccb->header.length = sizeof(*sccb); + sccb->header.length = length; + sccb->header.control_mask[2] = 0x80; rc = sclp_sync_request_timeout(SCLP_CMDW_READ_CPU_INFO, sccb, SCLP_QUEUE_INTERVAL); if (rc) @@ -107,7 +111,7 @@ int _sclp_get_core_info(struct sclp_core_info *info) } sclp_fill_core_info(info, sccb); out: - free_page((unsigned long) sccb); + free_pages((unsigned long) sccb, get_order(length)); return rc; } @@ -397,10 +401,10 @@ static void __init add_memory_merged(u16 rn) goto skip_add; if (start + size > VMEM_MAX_PHYS) size = VMEM_MAX_PHYS - start; - if (memory_end_set && (start >= memory_end)) + if (start >= ident_map_size) goto skip_add; - if (memory_end_set && (start + size > memory_end)) - size = memory_end - start; + if (start + size > ident_map_size) + size = ident_map_size - start; block_size = memory_block_size_bytes(); align_to_block_size(&start, &size, block_size); if (!size) diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index cc5e84b80c69..2f3515fa242a 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -9,9 +9,12 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/errno.h> +#include <linux/memblock.h> #include <asm/ctl_reg.h> #include <asm/sclp.h> #include <asm/ipl.h> +#include <asm/setup.h> +#include <asm/facility.h> #include "sclp_sdias.h" #include "sclp.h" @@ -20,12 +23,14 @@ static struct sclp_ipl_info sclp_ipl_info; struct sclp_info sclp; EXPORT_SYMBOL(sclp); -static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb) +static void __init sclp_early_facilities_detect(void) { struct sclp_core_entry *cpue; + struct read_info_sccb *sccb; u16 boot_cpu_address, cpu; - if (sclp_early_get_info(sccb)) + sccb = sclp_early_get_info(); + if (!sccb) return; sclp.facilities = sccb->facilities; @@ -107,29 +112,34 @@ void __init sclp_early_get_ipl_info(struct sclp_ipl_info *info) *info = sclp_ipl_info; } -static struct sclp_core_info sclp_early_core_info __initdata; -static int sclp_early_core_info_valid __initdata; - -static void __init sclp_early_init_core_info(struct read_cpu_info_sccb *sccb) -{ - if (!SCLP_HAS_CPU_INFO) - return; - memset(sccb, 0, sizeof(*sccb)); - sccb->header.length = sizeof(*sccb); - if (sclp_early_cmd(SCLP_CMDW_READ_CPU_INFO, sccb)) - return; - if (sccb->header.response_code != 0x0010) - return; - sclp_fill_core_info(&sclp_early_core_info, sccb); - sclp_early_core_info_valid = 1; -} - int __init sclp_early_get_core_info(struct sclp_core_info *info) { - if (!sclp_early_core_info_valid) - return -EIO; - *info = sclp_early_core_info; - return 0; + struct read_cpu_info_sccb *sccb; + int length = test_facility(140) ? EXT_SCCB_READ_CPU : PAGE_SIZE; + int rc = 0; + + if (!SCLP_HAS_CPU_INFO) + return -EOPNOTSUPP; + + sccb = memblock_alloc_low(length, PAGE_SIZE); + if (!sccb) + return -ENOMEM; + + memset(sccb, 0, length); + sccb->header.length = length; + sccb->header.control_mask[2] = 0x80; + if (sclp_early_cmd(SCLP_CMDW_READ_CPU_INFO, sccb)) { + rc = -EIO; + goto out; + } + if (sccb->header.response_code != 0x0010) { + rc = -EIO; + goto out; + } + sclp_fill_core_info(info, sccb); +out: + memblock_free_early((unsigned long)sccb, length); + return rc; } static void __init sclp_early_console_detect(struct init_sccb *sccb) @@ -148,8 +158,7 @@ void __init sclp_early_detect(void) { void *sccb = sclp_early_sccb; - sclp_early_facilities_detect(sccb); - sclp_early_init_core_info(sccb); + sclp_early_facilities_detect(); /* * Turn off SCLP event notifications. Also save remote masks in the diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index a960afa974bf..ec9f8ad5341c 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -11,6 +11,7 @@ #include <asm/irq.h> #include <asm/sections.h> #include <asm/mem_detect.h> +#include <asm/facility.h> #include "sclp.h" #include "sclp_rw.h" @@ -237,13 +238,14 @@ void sclp_early_printk(const char *str) int __init sclp_early_read_info(void) { int i; + int length = test_facility(140) ? EXT_SCCB_READ_SCP : PAGE_SIZE; struct read_info_sccb *sccb = &sclp_info_sccb; sclp_cmdw_t commands[] = {SCLP_CMDW_READ_SCP_INFO_FORCED, SCLP_CMDW_READ_SCP_INFO}; for (i = 0; i < ARRAY_SIZE(commands); i++) { - memset(sccb, 0, sizeof(*sccb)); - sccb->header.length = sizeof(*sccb); + memset(sccb, 0, length); + sccb->header.length = length; sccb->header.function_code = 0x80; sccb->header.control_mask[2] = 0x80; if (sclp_early_cmd(commands[i], sccb)) @@ -258,13 +260,12 @@ int __init sclp_early_read_info(void) return -EIO; } -int __init sclp_early_get_info(struct read_info_sccb *info) +struct read_info_sccb * __init sclp_early_get_info(void) { if (!sclp_info_sccb_valid) - return -EIO; + return NULL; - *info = sclp_info_sccb; - return 0; + return &sclp_info_sccb; } int __init sclp_early_get_memsize(unsigned long *mem) diff --git a/drivers/s390/char/tape.h b/drivers/s390/char/tape.h index e2c60475dfa8..4e5d5efa978f 100644 --- a/drivers/s390/char/tape.h +++ b/drivers/s390/char/tape.h @@ -264,7 +264,6 @@ extern void tape_state_set(struct tape_device *, enum tape_state); extern int tape_generic_online(struct tape_device *, struct tape_discipline *); extern int tape_generic_offline(struct ccw_device *); -extern int tape_generic_pm_suspend(struct ccw_device *); /* Externals from tape_devmap.c */ extern int tape_generic_probe(struct ccw_device *); diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c index 6d73ee3f827a..7ada994d4592 100644 --- a/drivers/s390/char/tape_34xx.c +++ b/drivers/s390/char/tape_34xx.c @@ -1191,7 +1191,6 @@ static struct ccw_driver tape_34xx_driver = { .remove = tape_generic_remove, .set_online = tape_34xx_online, .set_offline = tape_generic_offline, - .freeze = tape_generic_pm_suspend, .int_class = IRQIO_TAP, }; diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index 4554cdf4d6bd..ecf8c5006a0e 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -1651,7 +1651,6 @@ static struct ccw_driver tape_3590_driver = { .remove = tape_generic_remove, .set_offline = tape_generic_offline, .set_online = tape_3590_online, - .freeze = tape_generic_pm_suspend, .int_class = IRQIO_TAP, }; diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index 380e6a67719c..a6d2a4792185 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -428,55 +428,6 @@ tape_cleanup_device(struct tape_device *device) } /* - * Suspend device. - * - * Called by the common I/O layer if the drive should be suspended on user - * request. We refuse to suspend if the device is loaded or in use for the - * following reason: - * While the Linux guest is suspended, it might be logged off which causes - * devices to be detached. Tape devices are automatically rewound and unloaded - * during DETACH processing (unless the tape device was attached with the - * NOASSIGN or MULTIUSER option). After rewind/unload, there is no way to - * resume the original state of the tape device, since we would need to - * manually re-load the cartridge which was active at suspend time. - */ -int tape_generic_pm_suspend(struct ccw_device *cdev) -{ - struct tape_device *device; - - device = dev_get_drvdata(&cdev->dev); - if (!device) { - return -ENODEV; - } - - DBF_LH(3, "(%08x): tape_generic_pm_suspend(%p)\n", - device->cdev_id, device); - - if (device->medium_state != MS_UNLOADED) { - pr_err("A cartridge is loaded in tape device %s, " - "refusing to suspend\n", dev_name(&cdev->dev)); - return -EBUSY; - } - - spin_lock_irq(get_ccwdev_lock(device->cdev)); - switch (device->tape_state) { - case TS_INIT: - case TS_NOT_OPER: - case TS_UNUSED: - spin_unlock_irq(get_ccwdev_lock(device->cdev)); - break; - default: - pr_err("Tape device %s is busy, refusing to " - "suspend\n", dev_name(&cdev->dev)); - spin_unlock_irq(get_ccwdev_lock(device->cdev)); - return -EBUSY; - } - - DBF_LH(3, "(%08x): Drive suspended.\n", device->cdev_id); - return 0; -} - -/* * Set device offline. * * Called by the common I/O layer if the drive should set offline on user @@ -1360,7 +1311,6 @@ EXPORT_SYMBOL(tape_generic_remove); EXPORT_SYMBOL(tape_generic_probe); EXPORT_SYMBOL(tape_generic_online); EXPORT_SYMBOL(tape_generic_offline); -EXPORT_SYMBOL(tape_generic_pm_suspend); EXPORT_SYMBOL(tape_put_device); EXPORT_SYMBOL(tape_get_device); EXPORT_SYMBOL(tape_state_verbose); diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c index cbde65ab2170..1bbf27b98cf6 100644 --- a/drivers/s390/char/vmur.c +++ b/drivers/s390/char/vmur.c @@ -62,7 +62,6 @@ static int ur_probe(struct ccw_device *cdev); static void ur_remove(struct ccw_device *cdev); static int ur_set_online(struct ccw_device *cdev); static int ur_set_offline(struct ccw_device *cdev); -static int ur_pm_suspend(struct ccw_device *cdev); static struct ccw_driver ur_driver = { .driver = { @@ -74,7 +73,6 @@ static struct ccw_driver ur_driver = { .remove = ur_remove, .set_online = ur_set_online, .set_offline = ur_set_offline, - .freeze = ur_pm_suspend, .int_class = IRQIO_VMR, }; @@ -165,28 +163,6 @@ static void urdev_put(struct urdev *urd) } /* - * State and contents of ur devices can be changed by class D users issuing - * CP commands such as PURGE or TRANSFER, while the Linux guest is suspended. - * Also the Linux guest might be logged off, which causes all active spool - * files to be closed. - * So we cannot guarantee that spool files are still the same when the Linux - * guest is resumed. In order to avoid unpredictable results at resume time - * we simply refuse to suspend if a ur device node is open. - */ -static int ur_pm_suspend(struct ccw_device *cdev) -{ - struct urdev *urd = dev_get_drvdata(&cdev->dev); - - TRACE("ur_pm_suspend: cdev=%p\n", cdev); - if (urd->open_flag) { - pr_err("Unit record device %s is busy, %s refusing to " - "suspend.\n", dev_name(&cdev->dev), ur_banner); - return -EBUSY; - } - return 0; -} - -/* * Low-level functions to do I/O to a ur device. * alloc_chan_prog * free_chan_prog diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index 8f080d3fd380..c42405c620b5 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -120,31 +120,6 @@ static void chsc_subchannel_shutdown(struct subchannel *sch) cio_disable_subchannel(sch); } -static int chsc_subchannel_prepare(struct subchannel *sch) -{ - int cc; - struct schib schib; - /* - * Don't allow suspend while the subchannel is not idle - * since we don't have a way to clear the subchannel and - * cannot disable it with a request running. - */ - cc = stsch(sch->schid, &schib); - if (!cc && scsw_stctl(&schib.scsw)) - return -EAGAIN; - return 0; -} - -static int chsc_subchannel_freeze(struct subchannel *sch) -{ - return cio_disable_subchannel(sch); -} - -static int chsc_subchannel_restore(struct subchannel *sch) -{ - return cio_enable_subchannel(sch, (u32)(unsigned long)sch); -} - static struct css_device_id chsc_subchannel_ids[] = { { .match_flags = 0x1, .type =SUBCHANNEL_TYPE_CHSC, }, { /* end of list */ }, @@ -161,10 +136,6 @@ static struct css_driver chsc_subchannel_driver = { .probe = chsc_subchannel_probe, .remove = chsc_subchannel_remove, .shutdown = chsc_subchannel_shutdown, - .prepare = chsc_subchannel_prepare, - .freeze = chsc_subchannel_freeze, - .thaw = chsc_subchannel_restore, - .restore = chsc_subchannel_restore, }; static int __init chsc_init_dbfs(void) diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index 72dd2471ec1e..b7b590646d58 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -1109,11 +1109,6 @@ static ssize_t cmb_enable_store(struct device *dev, } DEVICE_ATTR_RW(cmb_enable); -int ccw_set_cmf(struct ccw_device *cdev, int enable) -{ - return cmbops->set(cdev, enable ? 2 : 0); -} - /** * enable_cmf() - switch on the channel measurement for a specific device * @cdev: The ccw device to be enabled diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index cca1a7c4bb33..94c6470de635 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -18,7 +18,6 @@ #include <linux/errno.h> #include <linux/list.h> #include <linux/reboot.h> -#include <linux/suspend.h> #include <linux/proc_fs.h> #include <linux/genalloc.h> #include <linux/dma-mapping.h> @@ -1044,59 +1043,6 @@ static struct notifier_block css_reboot_notifier = { .notifier_call = css_reboot_event, }; -/* - * Since the css devices are neither on a bus nor have a class - * nor have a special device type, we cannot stop/restart channel - * path measurements via the normal suspend/resume callbacks, but have - * to use notifiers. - */ -static int css_power_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct channel_subsystem *css; - int ret; - - switch (event) { - case PM_HIBERNATION_PREPARE: - case PM_SUSPEND_PREPARE: - ret = NOTIFY_DONE; - for_each_css(css) { - mutex_lock(&css->mutex); - if (!css->cm_enabled) { - mutex_unlock(&css->mutex); - continue; - } - ret = __chsc_do_secm(css, 0); - ret = notifier_from_errno(ret); - mutex_unlock(&css->mutex); - } - break; - case PM_POST_HIBERNATION: - case PM_POST_SUSPEND: - ret = NOTIFY_DONE; - for_each_css(css) { - mutex_lock(&css->mutex); - if (!css->cm_enabled) { - mutex_unlock(&css->mutex); - continue; - } - ret = __chsc_do_secm(css, 1); - ret = notifier_from_errno(ret); - mutex_unlock(&css->mutex); - } - /* search for subchannels, which appeared during hibernation */ - css_schedule_reprobe(); - break; - default: - ret = NOTIFY_DONE; - } - return ret; - -} -static struct notifier_block css_power_notifier = { - .notifier_call = css_power_event, -}; - #define CIO_DMA_GFP (GFP_KERNEL | __GFP_ZERO) static struct gen_pool *cio_dma_pool; @@ -1242,12 +1188,9 @@ static int __init css_bus_init(void) ret = register_reboot_notifier(&css_reboot_notifier); if (ret) goto out_unregister; - ret = register_pm_notifier(&css_power_notifier); - if (ret) - goto out_unregister_rn; ret = cio_dma_pool_init(); if (ret) - goto out_unregister_pmn; + goto out_unregister_rn; airq_init(); css_init_done = 1; @@ -1255,8 +1198,6 @@ static int __init css_bus_init(void) isc_register(IO_SCH_ISC); return 0; -out_unregister_pmn: - unregister_pm_notifier(&css_power_notifier); out_unregister_rn: unregister_reboot_notifier(&css_reboot_notifier); out_unregister: @@ -1456,74 +1397,6 @@ static int css_uevent(struct device *dev, struct kobj_uevent_env *env) return ret; } -static int css_pm_prepare(struct device *dev) -{ - struct subchannel *sch = to_subchannel(dev); - struct css_driver *drv; - - if (mutex_is_locked(&sch->reg_mutex)) - return -EAGAIN; - if (!sch->dev.driver) - return 0; - drv = to_cssdriver(sch->dev.driver); - /* Notify drivers that they may not register children. */ - return drv->prepare ? drv->prepare(sch) : 0; -} - -static void css_pm_complete(struct device *dev) -{ - struct subchannel *sch = to_subchannel(dev); - struct css_driver *drv; - - if (!sch->dev.driver) - return; - drv = to_cssdriver(sch->dev.driver); - if (drv->complete) - drv->complete(sch); -} - -static int css_pm_freeze(struct device *dev) -{ - struct subchannel *sch = to_subchannel(dev); - struct css_driver *drv; - - if (!sch->dev.driver) - return 0; - drv = to_cssdriver(sch->dev.driver); - return drv->freeze ? drv->freeze(sch) : 0; -} - -static int css_pm_thaw(struct device *dev) -{ - struct subchannel *sch = to_subchannel(dev); - struct css_driver *drv; - - if (!sch->dev.driver) - return 0; - drv = to_cssdriver(sch->dev.driver); - return drv->thaw ? drv->thaw(sch) : 0; -} - -static int css_pm_restore(struct device *dev) -{ - struct subchannel *sch = to_subchannel(dev); - struct css_driver *drv; - - css_update_ssd_info(sch); - if (!sch->dev.driver) - return 0; - drv = to_cssdriver(sch->dev.driver); - return drv->restore ? drv->restore(sch) : 0; -} - -static const struct dev_pm_ops css_pm_ops = { - .prepare = css_pm_prepare, - .complete = css_pm_complete, - .freeze = css_pm_freeze, - .thaw = css_pm_thaw, - .restore = css_pm_restore, -}; - static struct bus_type css_bus_type = { .name = "css", .match = css_bus_match, @@ -1531,7 +1404,6 @@ static struct bus_type css_bus_type = { .remove = css_remove, .shutdown = css_shutdown, .uevent = css_uevent, - .pm = &css_pm_ops, }; /** diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h index 3f322ea0f498..2eddfc47f687 100644 --- a/drivers/s390/cio/css.h +++ b/drivers/s390/cio/css.h @@ -72,11 +72,6 @@ struct chp_link; * @probe: function called on probe * @remove: function called on remove * @shutdown: called at device shutdown - * @prepare: prepare for pm state transition - * @complete: undo work done in @prepare - * @freeze: callback for freezing during hibernation snapshotting - * @thaw: undo work done in @freeze - * @restore: callback for restoring after hibernation * @settle: wait for asynchronous work to finish */ struct css_driver { @@ -88,11 +83,6 @@ struct css_driver { int (*probe)(struct subchannel *); int (*remove)(struct subchannel *); void (*shutdown)(struct subchannel *); - int (*prepare) (struct subchannel *); - void (*complete) (struct subchannel *); - int (*freeze)(struct subchannel *); - int (*thaw) (struct subchannel *); - int (*restore)(struct subchannel *); int (*settle)(void); }; diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index b29fe8d50baf..e0005a4fc978 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -149,19 +149,6 @@ static struct css_device_id io_subchannel_ids[] = { { /* end of list */ }, }; -static int io_subchannel_prepare(struct subchannel *sch) -{ - struct ccw_device *cdev; - /* - * Don't allow suspend while a ccw device registration - * is still outstanding. - */ - cdev = sch_get_cdev(sch); - if (cdev && !device_is_registered(&cdev->dev)) - return -EAGAIN; - return 0; -} - static int io_subchannel_settle(void) { int ret; @@ -186,7 +173,6 @@ static struct css_driver io_subchannel_driver = { .probe = io_subchannel_probe, .remove = io_subchannel_remove, .shutdown = io_subchannel_shutdown, - .prepare = io_subchannel_prepare, .settle = io_subchannel_settle, }; @@ -1422,7 +1408,7 @@ static enum io_sch_action sch_get_action(struct subchannel *sch) } if (device_is_disconnected(cdev)) return IO_SCH_REPROBE; - if (cdev->online && !cdev->private->flags.resuming) + if (cdev->online) return IO_SCH_VERIFY; if (cdev->private->state == DEV_STATE_NOT_OPER) return IO_SCH_UNREG_ATTACH; @@ -1514,11 +1500,6 @@ static int io_subchannel_sch_event(struct subchannel *sch, int process) break; case IO_SCH_UNREG_ATTACH: spin_lock_irqsave(sch->lock, flags); - if (cdev->private->flags.resuming) { - /* Device will be handled later. */ - rc = 0; - goto out_unlock; - } sch_set_cdev(sch, NULL); spin_unlock_irqrestore(sch->lock, flags); /* Unregister ccw device. */ @@ -1531,7 +1512,7 @@ static int io_subchannel_sch_event(struct subchannel *sch, int process) switch (action) { case IO_SCH_ORPH_UNREG: case IO_SCH_UNREG: - if (!cdev || !cdev->private->flags.resuming) + if (!cdev) css_sch_device_unregister(sch); break; case IO_SCH_ORPH_ATTACH: @@ -1664,10 +1645,10 @@ void __init ccw_device_destroy_console(struct ccw_device *cdev) struct io_subchannel_private *io_priv = to_io_private(sch); set_io_private(sch, NULL); - put_device(&sch->dev); - put_device(&cdev->dev); dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area), io_priv->dma_area, io_priv->dma_area_dma); + put_device(&sch->dev); + put_device(&cdev->dev); kfree(io_priv); } @@ -1690,14 +1671,6 @@ void ccw_device_wait_idle(struct ccw_device *cdev) udelay_simple(100); } } - -static int ccw_device_pm_restore(struct device *dev); - -int ccw_device_force_console(struct ccw_device *cdev) -{ - return ccw_device_pm_restore(&cdev->dev); -} -EXPORT_SYMBOL_GPL(ccw_device_force_console); #endif /** @@ -1798,235 +1771,6 @@ static void ccw_device_shutdown(struct device *dev) __disable_cmf(cdev); } -static int ccw_device_pm_prepare(struct device *dev) -{ - struct ccw_device *cdev = to_ccwdev(dev); - - if (work_pending(&cdev->private->todo_work)) - return -EAGAIN; - /* Fail while device is being set online/offline. */ - if (atomic_read(&cdev->private->onoff)) - return -EAGAIN; - - if (cdev->online && cdev->drv && cdev->drv->prepare) - return cdev->drv->prepare(cdev); - - return 0; -} - -static void ccw_device_pm_complete(struct device *dev) -{ - struct ccw_device *cdev = to_ccwdev(dev); - - if (cdev->online && cdev->drv && cdev->drv->complete) - cdev->drv->complete(cdev); -} - -static int ccw_device_pm_freeze(struct device *dev) -{ - struct ccw_device *cdev = to_ccwdev(dev); - struct subchannel *sch = to_subchannel(cdev->dev.parent); - int ret, cm_enabled; - - /* Fail suspend while device is in transistional state. */ - if (!dev_fsm_final_state(cdev)) - return -EAGAIN; - if (!cdev->online) - return 0; - if (cdev->drv && cdev->drv->freeze) { - ret = cdev->drv->freeze(cdev); - if (ret) - return ret; - } - - spin_lock_irq(sch->lock); - cm_enabled = cdev->private->cmb != NULL; - spin_unlock_irq(sch->lock); - if (cm_enabled) { - /* Don't have the css write on memory. */ - ret = ccw_set_cmf(cdev, 0); - if (ret) - return ret; - } - /* From here on, disallow device driver I/O. */ - spin_lock_irq(sch->lock); - ret = cio_disable_subchannel(sch); - spin_unlock_irq(sch->lock); - - return ret; -} - -static int ccw_device_pm_thaw(struct device *dev) -{ - struct ccw_device *cdev = to_ccwdev(dev); - struct subchannel *sch = to_subchannel(cdev->dev.parent); - int ret, cm_enabled; - - if (!cdev->online) - return 0; - - spin_lock_irq(sch->lock); - /* Allow device driver I/O again. */ - ret = cio_enable_subchannel(sch, (u32)(addr_t)sch); - cm_enabled = cdev->private->cmb != NULL; - spin_unlock_irq(sch->lock); - if (ret) - return ret; - - if (cm_enabled) { - ret = ccw_set_cmf(cdev, 1); - if (ret) - return ret; - } - - if (cdev->drv && cdev->drv->thaw) - ret = cdev->drv->thaw(cdev); - - return ret; -} - -static void __ccw_device_pm_restore(struct ccw_device *cdev) -{ - struct subchannel *sch = to_subchannel(cdev->dev.parent); - - spin_lock_irq(sch->lock); - if (cio_is_console(sch->schid)) { - cio_enable_subchannel(sch, (u32)(addr_t)sch); - goto out_unlock; - } - /* - * While we were sleeping, devices may have gone or become - * available again. Kick re-detection. - */ - cdev->private->flags.resuming = 1; - cdev->private->path_new_mask = LPM_ANYPATH; - css_sched_sch_todo(sch, SCH_TODO_EVAL); - spin_unlock_irq(sch->lock); - css_wait_for_slow_path(); - - /* cdev may have been moved to a different subchannel. */ - sch = to_subchannel(cdev->dev.parent); - spin_lock_irq(sch->lock); - if (cdev->private->state != DEV_STATE_ONLINE && - cdev->private->state != DEV_STATE_OFFLINE) - goto out_unlock; - - ccw_device_recognition(cdev); - spin_unlock_irq(sch->lock); - wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev) || - cdev->private->state == DEV_STATE_DISCONNECTED); - spin_lock_irq(sch->lock); - -out_unlock: - cdev->private->flags.resuming = 0; - spin_unlock_irq(sch->lock); -} - -static int resume_handle_boxed(struct ccw_device *cdev) -{ - cdev->private->state = DEV_STATE_BOXED; - if (ccw_device_notify(cdev, CIO_BOXED) == NOTIFY_OK) - return 0; - ccw_device_sched_todo(cdev, CDEV_TODO_UNREG); - return -ENODEV; -} - -static int resume_handle_disc(struct ccw_device *cdev) -{ - cdev->private->state = DEV_STATE_DISCONNECTED; - if (ccw_device_notify(cdev, CIO_GONE) == NOTIFY_OK) - return 0; - ccw_device_sched_todo(cdev, CDEV_TODO_UNREG); - return -ENODEV; -} - -static int ccw_device_pm_restore(struct device *dev) -{ - struct ccw_device *cdev = to_ccwdev(dev); - struct subchannel *sch; - int ret = 0; - - __ccw_device_pm_restore(cdev); - sch = to_subchannel(cdev->dev.parent); - spin_lock_irq(sch->lock); - if (cio_is_console(sch->schid)) - goto out_restore; - - /* check recognition results */ - switch (cdev->private->state) { - case DEV_STATE_OFFLINE: - case DEV_STATE_ONLINE: - cdev->private->flags.donotify = 0; - break; - case DEV_STATE_BOXED: - ret = resume_handle_boxed(cdev); - if (ret) - goto out_unlock; - goto out_restore; - default: - ret = resume_handle_disc(cdev); - if (ret) - goto out_unlock; - goto out_restore; - } - /* check if the device type has changed */ - if (!ccw_device_test_sense_data(cdev)) { - ccw_device_update_sense_data(cdev); - ccw_device_sched_todo(cdev, CDEV_TODO_REBIND); - ret = -ENODEV; - goto out_unlock; - } - if (!cdev->online) - goto out_unlock; - - if (ccw_device_online(cdev)) { - ret = resume_handle_disc(cdev); - if (ret) - goto out_unlock; - goto out_restore; - } - spin_unlock_irq(sch->lock); - wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev)); - spin_lock_irq(sch->lock); - - if (ccw_device_notify(cdev, CIO_OPER) == NOTIFY_BAD) { - ccw_device_sched_todo(cdev, CDEV_TODO_UNREG); - ret = -ENODEV; - goto out_unlock; - } - - /* reenable cmf, if needed */ - if (cdev->private->cmb) { - spin_unlock_irq(sch->lock); - ret = ccw_set_cmf(cdev, 1); - spin_lock_irq(sch->lock); - if (ret) { - CIO_MSG_EVENT(2, "resume: cdev 0.%x.%04x: cmf failed " - "(rc=%d)\n", cdev->private->dev_id.ssid, - cdev->private->dev_id.devno, ret); - ret = 0; - } - } - -out_restore: - spin_unlock_irq(sch->lock); - if (cdev->online && cdev->drv && cdev->drv->restore) - ret = cdev->drv->restore(cdev); - return ret; - -out_unlock: - spin_unlock_irq(sch->lock); - return ret; -} - -static const struct dev_pm_ops ccw_pm_ops = { - .prepare = ccw_device_pm_prepare, - .complete = ccw_device_pm_complete, - .freeze = ccw_device_pm_freeze, - .thaw = ccw_device_pm_thaw, - .restore = ccw_device_pm_restore, -}; - static struct bus_type ccw_bus_type = { .name = "ccw", .match = ccw_bus_match, @@ -2034,7 +1778,6 @@ static struct bus_type ccw_bus_type = { .probe = ccw_device_probe, .remove = ccw_device_remove, .shutdown = ccw_device_shutdown, - .pm = &ccw_pm_ops, }; /** diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h index 853b6a8ca095..24b2fce69590 100644 --- a/drivers/s390/cio/device.h +++ b/drivers/s390/cio/device.h @@ -143,6 +143,5 @@ void retry_set_schib(struct ccw_device *cdev); void cmf_retry_copy_block(struct ccw_device *); int cmf_reenable(struct ccw_device *); void cmf_reactivate(void); -int ccw_set_cmf(struct ccw_device *cdev, int enable); extern struct device_attribute dev_attr_cmb_enable; #endif diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 8fc267324ebb..6420b197bb05 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -224,12 +224,6 @@ ccw_device_recog_done(struct ccw_device *cdev, int state) wake_up(&cdev->private->wait_q); return; } - if (cdev->private->flags.resuming) { - cdev->private->state = state; - cdev->private->flags.recog_done = 1; - wake_up(&cdev->private->wait_q); - return; - } switch (state) { case DEV_STATE_NOT_OPER: break; diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c index 53468ae64b99..c8964e0a23e7 100644 --- a/drivers/s390/cio/eadm_sch.c +++ b/drivers/s390/cio/eadm_sch.c @@ -306,16 +306,6 @@ static void eadm_subchannel_shutdown(struct subchannel *sch) eadm_quiesce(sch); } -static int eadm_subchannel_freeze(struct subchannel *sch) -{ - return cio_disable_subchannel(sch); -} - -static int eadm_subchannel_restore(struct subchannel *sch) -{ - return cio_enable_subchannel(sch, (u32)(unsigned long)sch); -} - /** * eadm_subchannel_sch_event - process subchannel event * @sch: subchannel @@ -369,9 +359,6 @@ static struct css_driver eadm_subchannel_driver = { .remove = eadm_subchannel_remove, .shutdown = eadm_subchannel_shutdown, .sch_event = eadm_subchannel_sch_event, - .freeze = eadm_subchannel_freeze, - .thaw = eadm_subchannel_restore, - .restore = eadm_subchannel_restore, }; static int __init eadm_sch_init(void) diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h index c03b4a19974e..85a11c1836e5 100644 --- a/drivers/s390/cio/io_sch.h +++ b/drivers/s390/cio/io_sch.h @@ -160,7 +160,6 @@ struct ccw_device_private { unsigned int donotify:1; /* call notify function */ unsigned int recog_done:1; /* dev. recog. complete */ unsigned int fake_irb:2; /* deliver faked irb */ - unsigned int resuming:1; /* recognition while resume */ unsigned int pgroup:1; /* pathgroup is set up */ unsigned int mpath:1; /* multipathing is set up */ unsigned int pgid_unknown:1;/* unknown pgid state */ diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index ef738b42a092..2758d05a802d 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1,11 +1,12 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * Copyright IBM Corp. 2006, 2012 + * Copyright IBM Corp. 2006, 2020 * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> * Martin Schwidefsky <schwidefsky@de.ibm.com> * Ralph Wuerthner <rwuerthn@de.ibm.com> * Felix Beck <felix.beck@de.ibm.com> * Holger Dengler <hd@linux.vnet.ibm.com> + * Harald Freudenberger <freude@linux.ibm.com> * * Adjunct processor bus. */ @@ -73,6 +74,12 @@ EXPORT_SYMBOL(ap_perms); DEFINE_MUTEX(ap_perms_mutex); EXPORT_SYMBOL(ap_perms_mutex); +/* # of bus scans since init */ +static atomic64_t ap_scan_bus_count; + +/* completion for initial APQN bindings complete */ +static DECLARE_COMPLETION(ap_init_apqn_bindings_complete); + static struct ap_config_info *ap_qci_info; /* @@ -577,23 +584,125 @@ static int ap_bus_match(struct device *dev, struct device_driver *drv) */ static int ap_uevent(struct device *dev, struct kobj_uevent_env *env) { + int rc; struct ap_device *ap_dev = to_ap_dev(dev); - int retval = 0; - if (!ap_dev) - return -ENODEV; + /* Uevents from ap bus core don't need extensions to the env */ + if (dev == ap_root_device) + return 0; /* Set up DEV_TYPE environment variable. */ - retval = add_uevent_var(env, "DEV_TYPE=%04X", ap_dev->device_type); - if (retval) - return retval; + rc = add_uevent_var(env, "DEV_TYPE=%04X", ap_dev->device_type); + if (rc) + return rc; /* Add MODALIAS= */ - retval = add_uevent_var(env, "MODALIAS=ap:t%02X", ap_dev->device_type); + rc = add_uevent_var(env, "MODALIAS=ap:t%02X", ap_dev->device_type); + if (rc) + return rc; + + return 0; +} + +static void ap_send_init_scan_done_uevent(void) +{ + char *envp[] = { "INITSCAN=done", NULL }; - return retval; + kobject_uevent_env(&ap_root_device->kobj, KOBJ_CHANGE, envp); } +static void ap_send_bindings_complete_uevent(void) +{ + char *envp[] = { "BINDINGS=complete", NULL }; + + kobject_uevent_env(&ap_root_device->kobj, KOBJ_CHANGE, envp); +} + +/* + * calc # of bound APQNs + */ + +struct __ap_calc_ctrs { + unsigned int apqns; + unsigned int bound; +}; + +static int __ap_calc_helper(struct device *dev, void *arg) +{ + struct __ap_calc_ctrs *pctrs = (struct __ap_calc_ctrs *) arg; + + if (is_queue_dev(dev)) { + pctrs->apqns++; + if ((to_ap_dev(dev))->drv) + pctrs->bound++; + } + + return 0; +} + +static void ap_calc_bound_apqns(unsigned int *apqns, unsigned int *bound) +{ + struct __ap_calc_ctrs ctrs; + + memset(&ctrs, 0, sizeof(ctrs)); + bus_for_each_dev(&ap_bus_type, NULL, (void *) &ctrs, __ap_calc_helper); + + *apqns = ctrs.apqns; + *bound = ctrs.bound; +} + +/* + * After initial ap bus scan do check if all existing APQNs are + * bound to device drivers. + */ +static void ap_check_bindings_complete(void) +{ + unsigned int apqns, bound; + + if (atomic64_read(&ap_scan_bus_count) >= 1) { + ap_calc_bound_apqns(&apqns, &bound); + if (bound == apqns) { + if (!completion_done(&ap_init_apqn_bindings_complete)) { + complete_all(&ap_init_apqn_bindings_complete); + AP_DBF(DBF_INFO, "%s complete\n", __func__); + } + ap_send_bindings_complete_uevent(); + } + } +} + +/* + * Interface to wait for the AP bus to have done one initial ap bus + * scan and all detected APQNs have been bound to device drivers. + * If these both conditions are not fulfilled, this function blocks + * on a condition with wait_for_completion_interruptible_timeout(). + * If these both conditions are fulfilled (before the timeout hits) + * the return value is 0. If the timeout (in jiffies) hits instead + * -ETIME is returned. On failures negative return values are + * returned to the caller. + */ +int ap_wait_init_apqn_bindings_complete(unsigned long timeout) +{ + long l; + + if (completion_done(&ap_init_apqn_bindings_complete)) + return 0; + + if (timeout) + l = wait_for_completion_interruptible_timeout( + &ap_init_apqn_bindings_complete, timeout); + else + l = wait_for_completion_interruptible( + &ap_init_apqn_bindings_complete); + if (l < 0) + return l == -ERESTARTSYS ? -EINTR : l; + else if (l == 0 && timeout) + return -ETIME; + + return 0; +} +EXPORT_SYMBOL(ap_wait_init_apqn_bindings_complete); + static int __ap_queue_devices_with_id_unregister(struct device *dev, void *data) { if (is_queue_dev(dev) && @@ -602,12 +711,6 @@ static int __ap_queue_devices_with_id_unregister(struct device *dev, void *data) return 0; } -static struct bus_type ap_bus_type = { - .name = "ap", - .match = &ap_bus_match, - .uevent = &ap_uevent, -}; - static int __ap_revise_reserved(struct device *dev, void *dummy) { int rc, card, queue, devres, drvres; @@ -719,7 +822,8 @@ static int ap_device_probe(struct device *dev) hash_del(&to_ap_queue(dev)->hnode); spin_unlock_bh(&ap_queues_lock); ap_dev->drv = NULL; - } + } else + ap_check_bindings_complete(); out: if (rc) @@ -749,6 +853,7 @@ static int ap_device_remove(struct device *dev) if (is_queue_dev(dev)) hash_del(&to_ap_queue(dev)->hnode); spin_unlock_bh(&ap_queues_lock); + ap_dev->drv = NULL; put_device(dev); @@ -1166,21 +1271,55 @@ static ssize_t aqmask_store(struct bus_type *bus, const char *buf, static BUS_ATTR_RW(aqmask); -static struct bus_attribute *const ap_bus_attrs[] = { - &bus_attr_ap_domain, - &bus_attr_ap_control_domain_mask, - &bus_attr_ap_usage_domain_mask, - &bus_attr_ap_adapter_mask, - &bus_attr_config_time, - &bus_attr_poll_thread, - &bus_attr_ap_interrupts, - &bus_attr_poll_timeout, - &bus_attr_ap_max_domain_id, - &bus_attr_ap_max_adapter_id, - &bus_attr_apmask, - &bus_attr_aqmask, +static ssize_t scans_show(struct bus_type *bus, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%llu\n", + atomic64_read(&ap_scan_bus_count)); +} + +static BUS_ATTR_RO(scans); + +static ssize_t bindings_show(struct bus_type *bus, char *buf) +{ + int rc; + unsigned int apqns, n; + + ap_calc_bound_apqns(&apqns, &n); + if (atomic64_read(&ap_scan_bus_count) >= 1 && n == apqns) + rc = scnprintf(buf, PAGE_SIZE, "%u/%u (complete)\n", n, apqns); + else + rc = scnprintf(buf, PAGE_SIZE, "%u/%u\n", n, apqns); + + return rc; +} + +static BUS_ATTR_RO(bindings); + +static struct attribute *ap_bus_attrs[] = { + &bus_attr_ap_domain.attr, + &bus_attr_ap_control_domain_mask.attr, + &bus_attr_ap_usage_domain_mask.attr, + &bus_attr_ap_adapter_mask.attr, + &bus_attr_config_time.attr, + &bus_attr_poll_thread.attr, + &bus_attr_ap_interrupts.attr, + &bus_attr_poll_timeout.attr, + &bus_attr_ap_max_domain_id.attr, + &bus_attr_ap_max_adapter_id.attr, + &bus_attr_apmask.attr, + &bus_attr_aqmask.attr, + &bus_attr_scans.attr, + &bus_attr_bindings.attr, NULL, }; +ATTRIBUTE_GROUPS(ap_bus); + +static struct bus_type ap_bus_type = { + .name = "ap", + .bus_groups = ap_bus_groups, + .match = &ap_bus_match, + .uevent = &ap_uevent, +}; /** * ap_select_domain(): Select an AP domain if possible and we haven't @@ -1608,6 +1747,12 @@ static void ap_scan_bus(struct work_struct *unused) ap_domain_index); } + if (atomic64_inc_return(&ap_scan_bus_count) == 1) { + AP_DBF(DBF_DEBUG, "%s init scan complete\n", __func__); + ap_send_init_scan_done_uevent(); + ap_check_bindings_complete(); + } + mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ); } @@ -1655,7 +1800,7 @@ static void __init ap_perms_init(void) */ static int __init ap_module_init(void) { - int rc, i; + int rc; rc = ap_debug_init(); if (rc) @@ -1694,17 +1839,13 @@ static int __init ap_module_init(void) rc = bus_register(&ap_bus_type); if (rc) goto out; - for (i = 0; ap_bus_attrs[i]; i++) { - rc = bus_create_file(&ap_bus_type, ap_bus_attrs[i]); - if (rc) - goto out_bus; - } /* Create /sys/devices/ap. */ ap_root_device = root_device_register("ap"); rc = PTR_ERR_OR_ZERO(ap_root_device); if (rc) goto out_bus; + ap_root_device->bus = &ap_bus_type; /* Setup the AP bus rescan timer. */ timer_setup(&ap_config_timer, ap_config_timeout, 0); @@ -1733,8 +1874,6 @@ out_work: hrtimer_cancel(&ap_poll_timer); root_device_unregister(ap_root_device); out_bus: - while (i--) - bus_remove_file(&ap_bus_type, ap_bus_attrs[i]); bus_unregister(&ap_bus_type); out: if (ap_using_interrupts()) diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 5029b80132aa..472efd3a755c 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -350,4 +350,16 @@ int ap_parse_mask_str(const char *str, unsigned long *bitmap, int bits, struct mutex *lock); +/* + * Interface to wait for the AP bus to have done one initial ap bus + * scan and all detected APQNs have been bound to device drivers. + * If these both conditions are not fulfilled, this function blocks + * on a condition with wait_for_completion_killable_timeout(). + * If these both conditions are fulfilled (before the timeout hits) + * the return value is 0. If the timeout (in jiffies) hits instead + * -ETIME is returned. On failures negative return values are + * returned to the caller. + */ +int ap_wait_init_apqn_bindings_complete(unsigned long timeout); + #endif /* _AP_BUS_H_ */ diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index dd84995049b9..cf23ce1b1146 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -150,6 +150,8 @@ static int pkey_skey2pkey(const u8 *key, struct pkey_protkey *pkey) u16 cardnr, domain; struct keytoken_header *hdr = (struct keytoken_header *)key; + zcrypt_wait_api_operational(); + /* * The cca_xxx2protkey call may fail when a card has been * addressed where the master key was changed after last fetch @@ -197,6 +199,8 @@ static int pkey_clr2ep11key(const u8 *clrkey, size_t clrkeylen, u16 card, dom; u32 nr_apqns, *apqns = NULL; + zcrypt_wait_api_operational(); + /* build a list of apqns suitable for ep11 keys with cpacf support */ rc = ep11_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF, ZCRYPT_CEX7, EP11_API_V, NULL); @@ -230,6 +234,8 @@ static int pkey_ep11key2pkey(const u8 *key, struct pkey_protkey *pkey) u32 nr_apqns, *apqns = NULL; struct ep11keyblob *kb = (struct ep11keyblob *) key; + zcrypt_wait_api_operational(); + /* build a list of apqns suitable for this key */ rc = ep11_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF, ZCRYPT_CEX7, EP11_API_V, kb->wkvp); @@ -436,6 +442,7 @@ static int pkey_nonccatok2pkey(const u8 *key, u32 keylen, if (rc == 0) break; /* PCKMO failed, so try the CCA secure key way */ + zcrypt_wait_api_operational(); rc = cca_clr2seckey(0xFFFF, 0xFFFF, t->keytype, ckey.clrkey, tmpbuf); if (rc == 0) @@ -625,6 +632,8 @@ static int pkey_clr2seckey2(const struct pkey_apqn *apqns, size_t nr_apqns, return -EINVAL; } + zcrypt_wait_api_operational(); + /* simple try all apqns from the list */ for (i = 0, rc = -ENODEV; i < nr_apqns; i++) { card = apqns[i].card; @@ -801,6 +810,8 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns, return -EINVAL; } + zcrypt_wait_api_operational(); + /* simple try all apqns from the list */ for (i = 0, rc = -ENODEV; i < nr_apqns; i++) { card = apqns[i].card; @@ -838,6 +849,8 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags, if (keylen < sizeof(struct keytoken_header) || flags == 0) return -EINVAL; + zcrypt_wait_api_operational(); + if (hdr->type == TOKTYPE_NON_CCA && (hdr->version == TOKVER_EP11_AES_WITH_HEADER || hdr->version == TOKVER_EP11_ECC_WITH_HEADER) @@ -941,6 +954,8 @@ static int pkey_apqns4keytype(enum pkey_key_type ktype, int rc; u32 _nr_apqns, *_apqns = NULL; + zcrypt_wait_api_operational(); + if (ktype == PKEY_TYPE_CCA_DATA || ktype == PKEY_TYPE_CCA_CIPHER) { u64 cur_mkvp = 0, old_mkvp = 0; int minhwtype = ZCRYPT_CEX3C; diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index f60f9fb25214..10206e4498d0 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -1992,6 +1992,72 @@ void zcrypt_rng_device_remove(void) mutex_unlock(&zcrypt_rng_mutex); } +/* + * Wait until the zcrypt api is operational. + * The AP bus scan and the binding of ap devices to device drivers is + * an asynchronous job. This function waits until these initial jobs + * are done and so the zcrypt api should be ready to serve crypto + * requests - if there are resources available. The function uses an + * internal timeout of 60s. The very first caller will either wait for + * ap bus bindings complete or the timeout happens. This state will be + * remembered for further callers which will only be blocked until a + * decision is made (timeout or bindings complete). + * On timeout -ETIME is returned, on success the return value is 0. + */ +int zcrypt_wait_api_operational(void) +{ + static DEFINE_MUTEX(zcrypt_wait_api_lock); + static int zcrypt_wait_api_state; + int rc; + + rc = mutex_lock_interruptible(&zcrypt_wait_api_lock); + if (rc) + return rc; + + switch (zcrypt_wait_api_state) { + case 0: + /* initial state, invoke wait for the ap bus complete */ + rc = ap_wait_init_apqn_bindings_complete( + msecs_to_jiffies(60 * 1000)); + switch (rc) { + case 0: + /* ap bus bindings are complete */ + zcrypt_wait_api_state = 1; + break; + case -EINTR: + /* interrupted, go back to caller */ + break; + case -ETIME: + /* timeout */ + ZCRYPT_DBF(DBF_WARN, + "%s ap_wait_init_apqn_bindings_complete() returned with ETIME\n", + __func__); + zcrypt_wait_api_state = -ETIME; + break; + default: + /* other failure */ + ZCRYPT_DBF(DBF_DEBUG, + "%s ap_wait_init_apqn_bindings_complete() failure rc=%d\n", + __func__, rc); + break; + } + break; + case 1: + /* a previous caller already found ap bus bindings complete */ + rc = 0; + break; + default: + /* a previous caller had timeout or other failure */ + rc = zcrypt_wait_api_state; + break; + } + + mutex_unlock(&zcrypt_wait_api_lock); + + return rc; +} +EXPORT_SYMBOL(zcrypt_wait_api_operational); + int __init zcrypt_debug_init(void) { zcrypt_dbf_info = debug_register("zcrypt", 1, 1, diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index 51c0b8bdef50..16219efb2f61 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -162,6 +162,8 @@ void zcrypt_device_status_mask_ext(struct zcrypt_device_status_ext *devstatus); int zcrypt_device_status_ext(int card, int queue, struct zcrypt_device_status_ext *devstatus); +int zcrypt_wait_api_operational(void); + static inline unsigned long z_copy_from_user(bool userspace, void *to, const void __user *from, diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c index d9fd0a41da64..bdf2cc1ea713 100644 --- a/drivers/s390/scsi/zfcp_ccw.c +++ b/drivers/s390/scsi/zfcp_ccw.c @@ -194,23 +194,21 @@ static int zfcp_ccw_set_online(struct ccw_device *cdev) } /** - * zfcp_ccw_offline_sync - shut down adapter and wait for it to finish + * zfcp_ccw_set_offline - set_offline function of zfcp driver * @cdev: pointer to belonging ccw device - * @set: Status flags to set. - * @tag: s390dbf trace record tag * * This function gets called by the common i/o layer and sets an adapter * into state offline. */ -static int zfcp_ccw_offline_sync(struct ccw_device *cdev, int set, char *tag) +static int zfcp_ccw_set_offline(struct ccw_device *cdev) { struct zfcp_adapter *adapter = zfcp_ccw_adapter_by_cdev(cdev); if (!adapter) return 0; - zfcp_erp_set_adapter_status(adapter, set); - zfcp_erp_adapter_shutdown(adapter, 0, tag); + zfcp_erp_set_adapter_status(adapter, 0); + zfcp_erp_adapter_shutdown(adapter, 0, "ccsoff1"); zfcp_erp_wait(adapter); zfcp_ccw_adapter_put(adapter); @@ -218,18 +216,6 @@ static int zfcp_ccw_offline_sync(struct ccw_device *cdev, int set, char *tag) } /** - * zfcp_ccw_set_offline - set_offline function of zfcp driver - * @cdev: pointer to belonging ccw device - * - * This function gets called by the common i/o layer and sets an adapter - * into state offline. - */ -static int zfcp_ccw_set_offline(struct ccw_device *cdev) -{ - return zfcp_ccw_offline_sync(cdev, 0, "ccsoff1"); -} - -/** * zfcp_ccw_notify - ccw notify function * @cdev: pointer to belonging ccw device * @event: indicates if adapter was detached or attached @@ -246,11 +232,6 @@ static int zfcp_ccw_notify(struct ccw_device *cdev, int event) switch (event) { case CIO_GONE: - if (atomic_read(&adapter->status) & - ZFCP_STATUS_ADAPTER_SUSPENDED) { /* notification ignore */ - zfcp_dbf_hba_basic("ccnigo1", adapter); - break; - } dev_warn(&cdev->dev, "The FCP device has been detached\n"); zfcp_erp_adapter_shutdown(adapter, 0, "ccnoti1"); break; @@ -260,11 +241,6 @@ static int zfcp_ccw_notify(struct ccw_device *cdev, int event) zfcp_erp_adapter_shutdown(adapter, 0, "ccnoti2"); break; case CIO_OPER: - if (atomic_read(&adapter->status) & - ZFCP_STATUS_ADAPTER_SUSPENDED) { /* notification ignore */ - zfcp_dbf_hba_basic("ccniop1", adapter); - break; - } dev_info(&cdev->dev, "The FCP device is operational again\n"); zfcp_erp_set_adapter_status(adapter, ZFCP_STATUS_COMMON_RUNNING); @@ -300,28 +276,6 @@ static void zfcp_ccw_shutdown(struct ccw_device *cdev) zfcp_ccw_adapter_put(adapter); } -static int zfcp_ccw_suspend(struct ccw_device *cdev) -{ - zfcp_ccw_offline_sync(cdev, ZFCP_STATUS_ADAPTER_SUSPENDED, "ccsusp1"); - return 0; -} - -static int zfcp_ccw_thaw(struct ccw_device *cdev) -{ - /* trace records for thaw and final shutdown during suspend - can only be found in system dump until the end of suspend - but not after resume because it's based on the memory image - right after the very first suspend (freeze) callback */ - zfcp_ccw_activate(cdev, 0, "ccthaw1"); - return 0; -} - -static int zfcp_ccw_resume(struct ccw_device *cdev) -{ - zfcp_ccw_activate(cdev, ZFCP_STATUS_ADAPTER_SUSPENDED, "ccresu1"); - return 0; -} - struct ccw_driver zfcp_ccw_driver = { .driver = { .owner = THIS_MODULE, @@ -334,7 +288,4 @@ struct ccw_driver zfcp_ccw_driver = { .set_offline = zfcp_ccw_set_offline, .notify = zfcp_ccw_notify, .shutdown = zfcp_ccw_shutdown, - .freeze = zfcp_ccw_suspend, - .thaw = zfcp_ccw_thaw, - .restore = zfcp_ccw_resume, }; diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index 673e42defb91..ca473b368905 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -263,31 +263,6 @@ void zfcp_dbf_hba_def_err(struct zfcp_adapter *adapter, u64 req_id, u16 scount, spin_unlock_irqrestore(&dbf->pay_lock, flags); } -/** - * zfcp_dbf_hba_basic - trace event for basic adapter events - * @tag: identifier for event - * @adapter: pointer to struct zfcp_adapter - */ -void zfcp_dbf_hba_basic(char *tag, struct zfcp_adapter *adapter) -{ - struct zfcp_dbf *dbf = adapter->dbf; - struct zfcp_dbf_hba *rec = &dbf->hba_buf; - static int const level = 1; - unsigned long flags; - - if (unlikely(!debug_level_enabled(dbf->hba, level))) - return; - - spin_lock_irqsave(&dbf->hba_lock, flags); - memset(rec, 0, sizeof(*rec)); - - memcpy(rec->tag, tag, ZFCP_DBF_TAG_LEN); - rec->id = ZFCP_DBF_HBA_BASIC; - - debug_event(dbf->hba, level, rec, sizeof(*rec)); - spin_unlock_irqrestore(&dbf->hba_lock, flags); -} - static void zfcp_dbf_set_common(struct zfcp_dbf_rec *rec, struct zfcp_adapter *adapter, struct zfcp_port *port, diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h index da8a5ceb615c..5069b555c6c1 100644 --- a/drivers/s390/scsi/zfcp_def.h +++ b/drivers/s390/scsi/zfcp_def.h @@ -70,7 +70,6 @@ #define ZFCP_STATUS_ADAPTER_SIOSL_ISSUED 0x00000004 #define ZFCP_STATUS_ADAPTER_XCONFIG_OK 0x00000008 #define ZFCP_STATUS_ADAPTER_HOST_CON_INIT 0x00000010 -#define ZFCP_STATUS_ADAPTER_SUSPENDED 0x00000040 #define ZFCP_STATUS_ADAPTER_ERP_PENDING 0x00000100 #define ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED 0x00000200 #define ZFCP_STATUS_ADAPTER_DATA_DIV_ENABLED 0x00000400 diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 3ef5d74331c3..fdac6350c579 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -49,7 +49,6 @@ extern void zfcp_dbf_hba_fsf_fces(char *tag, const struct zfcp_fsf_req *req, u32 fc_security_new); extern void zfcp_dbf_hba_bit_err(char *, struct zfcp_fsf_req *); extern void zfcp_dbf_hba_def_err(struct zfcp_adapter *, u64, u16, void **); -extern void zfcp_dbf_hba_basic(char *, struct zfcp_adapter *); extern void zfcp_dbf_san_req(char *, struct zfcp_fsf_req *, u32); extern void zfcp_dbf_san_res(char *, struct zfcp_fsf_req *); extern void zfcp_dbf_san_in_els(char *, struct zfcp_fsf_req *); diff --git a/init/Kconfig b/init/Kconfig index 0872a5a2e759..b77c60f8b963 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -114,7 +114,7 @@ config INIT_ENV_ARG_LIMIT config COMPILE_TEST bool "Compile also drivers which will not load" - depends on !UML + depends on !UML && !S390 default n help Some drivers can be compiled on a different platform than they are diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 542a9c18398e..8fb097057fec 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -136,15 +136,6 @@ config KASAN_STACK default 1 if KASAN_STACK_ENABLE || CC_IS_GCC default 0 -config KASAN_S390_4_LEVEL_PAGING - bool "KASan: use 4-level paging" - depends on S390 - help - Compiling the kernel with KASan disables automatic 3-level vs - 4-level paging selection. 3-level paging is used by default (up - to 3TB of RAM with KASan enabled). This options allows to force - 4-level paging instead. - config KASAN_SW_TAGS_IDENTIFY bool "Enable memory corruption identification" depends on KASAN_SW_TAGS diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 3f77a5d695c1..56c801502b9a 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -254,9 +254,6 @@ if ($arch eq "x86_64") { if ($cc =~ /-DCC_USING_HOTPATCH/) { $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*brcl\\s*0,[0-9a-f]+ <([^\+]*)>\$"; $mcount_adjust = 0; - } else { - $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$"; - $mcount_adjust = -14; } $alignment = 8; $type = ".quad"; |