From b064904c509decf9e038f29f903a2304851a913b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 11 Aug 2020 11:16:51 +0200 Subject: s390/checksum: provide csum_ipv6_magic() This implementation needs only ~30% of the time to calculate the checksum compared to the generic variant. In addition the compiler also generates only ~30% of the instructions compared to the generic variant (on z14, compiled with march=z196). Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/checksum.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 6d01c96aeb5c..f4b42db5d007 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -13,6 +13,7 @@ #define _S390_CHECKSUM_H #include +#include /* * computes the checksum of a memory block at buff, length len, @@ -115,6 +116,25 @@ static inline __sum16 ip_compute_csum(const void *buff, int len) return csum_fold(csum_partial(buff, len, 0)); } -#endif /* _S390_CHECKSUM_H */ - +#define _HAVE_ARCH_IPV6_CSUM +static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum csum) +{ + __u64 sum = (__force __u64)csum; + + sum += (__force __u32)saddr->s6_addr32[0]; + sum += (__force __u32)saddr->s6_addr32[1]; + sum += (__force __u32)saddr->s6_addr32[2]; + sum += (__force __u32)saddr->s6_addr32[3]; + sum += (__force __u32)daddr->s6_addr32[0]; + sum += (__force __u32)daddr->s6_addr32[1]; + sum += (__force __u32)daddr->s6_addr32[2]; + sum += (__force __u32)daddr->s6_addr32[3]; + sum += len; + sum += proto; + sum += (sum >> 32) | (sum << 32); + return csum_fold((__force __wsum)(sum >> 32)); +} +#endif /* _S390_CHECKSUM_H */ -- cgit v1.2.3 From bb4644b14accb05663847277002e3efa9fa3cd3b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 11 Aug 2020 15:30:30 +0200 Subject: s390/checksum: rewrite csum_tcpudp_nofold() Rewrite csum_tcpudp_nofold() so that the generated code will not contain branches. The old implementation was also optimized for machines which came with "add logical with carry" instructions, however the compiler doesn't generate them anymore. This is most likely because those instructions are slower. However with the old code the compiler generates a lot of branches, which isn't too helpful usually. Therefore rewrite the code. In a tight loop this doesn't make any difference since the branch prediction unit does its job. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/checksum.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index f4b42db5d007..961c25c5124b 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -73,25 +73,17 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) * computes the checksum of the TCP/UDP pseudo-header * returns a 32-bit checksum */ -static inline __wsum -csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, - __wsum sum) +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { - __u32 csum = (__force __u32)sum; + __u64 csum = (__force __u64)sum; csum += (__force __u32)saddr; - if (csum < (__force __u32)saddr) - csum++; - csum += (__force __u32)daddr; - if (csum < (__force __u32)daddr) - csum++; - - csum += len + proto; - if (csum < len + proto) - csum++; - - return (__force __wsum)csum; + csum += len; + csum += proto; + csum += (csum >> 32) | (csum << 32); + return (__force __wsum)(csum >> 32); } /* -- cgit v1.2.3 From 614b4f5d0fa3f622cfcc899491d8a3e6af3d4dc5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 11 Aug 2020 16:36:26 +0200 Subject: s390/checksum: make ip_fast_csum() faster Convert ip_fast_csum() so it doesn't call csum_partial(), but instead open code the checksum calculation. The problem with csum_partial() is that it makes use of the cksm instruction, which has high startup costs and therefore is only very fast if used on larger memory regions. IPv4 headers however are small in size (5-16 32-bit words). The open coded variant calculates the checksum in ~30% of the time compared to the old variant (z14, march=z196). Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/checksum.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 961c25c5124b..8bc6bed4715b 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -66,7 +66,18 @@ static inline __sum16 csum_fold(__wsum sum) */ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { - return csum_fold(csum_partial(iph, ihl*4, 0)); + __u64 csum = 0; + __u32 *ptr = (u32 *)iph; + + csum += *ptr++; + csum += *ptr++; + csum += *ptr++; + csum += *ptr++; + ihl -= 4; + while (ihl--) + csum += *ptr++; + csum += (csum >> 32) | (csum << 32); + return csum_fold((__force __wsum)(csum >> 32)); } /* -- cgit v1.2.3 From 612ad0785dd5161dc311b10bd26038553a378386 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 11 Aug 2020 16:37:33 +0200 Subject: s390/checksum: have consistent calculations Use "|" instead of "+" within csum_fold() for consistency reasons, like in the rest of the file. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 8bc6bed4715b..de97ae691060 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -54,7 +54,7 @@ static inline __sum16 csum_fold(__wsum sum) { u32 csum = (__force u32) sum; - csum += (csum >> 16) + (csum << 16); + csum += (csum >> 16) | (csum << 16); csum >>= 16; return (__force __sum16) ~csum; } -- cgit v1.2.3 From 98ad45fb58c14ebef6da27f91905e5b8fcff8686 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 11 Aug 2020 16:41:27 +0200 Subject: s390/checksum: coding style changes Add some coding style changes which hopefully make the code look a bit less odd. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/checksum.h | 50 ++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index de97ae691060..c401a5fd3ad2 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -16,19 +16,18 @@ #include /* - * computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit) + * Computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit). * - * returns a 32-bit number suitable for feeding into itself - * or csum_tcpudp_magic + * Returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic. * - * this function must be called with even lengths, except - * for the last fragment, which may be odd + * This function must be called with even lengths, except + * for the last fragment, which may be odd. * - * it's best to have buff aligned on a 32-bit boundary + * It's best to have buff aligned on a 32-bit boundary. */ -static inline __wsum -csum_partial(const void *buff, int len, __wsum sum) +static inline __wsum csum_partial(const void *buff, int len, __wsum sum) { register unsigned long reg2 asm("2") = (unsigned long) buff; register unsigned long reg3 asm("3") = (unsigned long) len; @@ -40,15 +39,15 @@ csum_partial(const void *buff, int len, __wsum sum) return sum; } -static inline __wsum -csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum) +static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, + int len, __wsum sum) { - memcpy(dst,src,len); + memcpy(dst, src, len); return csum_partial(dst, len, sum); } /* - * Fold a partial checksum without adding pseudo headers + * Fold a partial checksum without adding pseudo headers. */ static inline __sum16 csum_fold(__wsum sum) { @@ -60,9 +59,8 @@ static inline __sum16 csum_fold(__wsum sum) } /* - * This is a version of ip_compute_csum() optimized for IP headers, - * which always checksum on 4 octet boundaries. - * + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksums on 4 octet boundaries. */ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { @@ -81,8 +79,8 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) } /* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 32-bit checksum + * Computes the checksum of the TCP/UDP pseudo-header. + * Returns a 32-bit checksum. */ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) @@ -98,22 +96,18 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, } /* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented + * Computes the checksum of the TCP/UDP pseudo-header. + * Returns a 16-bit checksum, already complemented. */ - -static inline __sum16 -csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, - __wsum sum) +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { - return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } /* - * this routine is used for miscellaneous IP-like checksums, mainly - * in icmp.c + * Used for miscellaneous IP-like checksums, mainly icmp. */ - static inline __sum16 ip_compute_csum(const void *buff, int len) { return csum_fold(csum_partial(buff, len, 0)); -- cgit v1.2.3 From 4bff8cb5450287e246d365b719148b2d9364c292 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 28 Apr 2020 09:52:23 +0200 Subject: s390: convert to GENERIC_VDSO Convert s390 to generic vDSO. There are a few special things on s390: - vDSO can be called without a stack frame - glibc did this in the past. So we need to allocate a stackframe on our own. - The former assembly code used stcke to get the TOD clock and applied time steering to it. We need to do the same in the new code. This is done in the architecture specific __arch_get_hw_counter function. The steering information is stored in an architecure specific area in the vDSO data. - CPUCLOCK_VIRT is now handled with a syscall fallback, which might be slower/less accurate than the old implementation. The getcpu() function stays as an assembly function because there is no generic implementation and the code is just a few lines. Performance number from my system do 100 mio gettimeofday() calls: Plain syscall: 8.6s Generic VDSO: 1.3s old ASM VDSO: 1s So it's a bit slower but still much faster than syscalls. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 3 + arch/s390/include/asm/clocksource.h | 7 ++ arch/s390/include/asm/vdso.h | 25 +---- arch/s390/include/asm/vdso/clocksource.h | 8 ++ arch/s390/include/asm/vdso/data.h | 13 +++ arch/s390/include/asm/vdso/gettimeofday.h | 71 ++++++++++++ arch/s390/include/asm/vdso/processor.h | 7 ++ arch/s390/include/asm/vdso/vdso.h | 0 arch/s390/include/asm/vdso/vsyscall.h | 26 +++++ arch/s390/kernel/asm-offsets.c | 20 ---- arch/s390/kernel/entry.S | 6 - arch/s390/kernel/setup.c | 1 - arch/s390/kernel/time.c | 66 ++--------- arch/s390/kernel/vdso.c | 29 +---- arch/s390/kernel/vdso64/Makefile | 19 +++- arch/s390/kernel/vdso64/clock_getres.S | 50 --------- arch/s390/kernel/vdso64/clock_gettime.S | 163 ---------------------------- arch/s390/kernel/vdso64/gettimeofday.S | 71 ------------ arch/s390/kernel/vdso64/vdso64_generic.c | 18 +++ arch/s390/kernel/vdso64/vdso_user_wrapper.S | 38 +++++++ 20 files changed, 221 insertions(+), 420 deletions(-) create mode 100644 arch/s390/include/asm/clocksource.h create mode 100644 arch/s390/include/asm/vdso/clocksource.h create mode 100644 arch/s390/include/asm/vdso/data.h create mode 100644 arch/s390/include/asm/vdso/gettimeofday.h create mode 100644 arch/s390/include/asm/vdso/processor.h create mode 100644 arch/s390/include/asm/vdso/vdso.h create mode 100644 arch/s390/include/asm/vdso/vsyscall.h delete mode 100644 arch/s390/kernel/vdso64/clock_getres.S delete mode 100644 arch/s390/kernel/vdso64/clock_gettime.S delete mode 100644 arch/s390/kernel/vdso64/gettimeofday.S create mode 100644 arch/s390/kernel/vdso64/vdso64_generic.c create mode 100644 arch/s390/kernel/vdso64/vdso_user_wrapper.S (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3d86e12e8e3c..8a6121f93709 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -73,6 +73,7 @@ config S390 select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAS_VDSO_DATA select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK select ARCH_INLINE_READ_LOCK_BH @@ -118,6 +119,7 @@ config S390 select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES select GENERIC_FIND_FIRST_BIT + select GENERIC_GETTIMEOFDAY select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select HAVE_ALIGNED_STRUCT_PAGE if SLUB @@ -149,6 +151,7 @@ config S390 select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS + select HAVE_GENERIC_VDSO select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 diff --git a/arch/s390/include/asm/clocksource.h b/arch/s390/include/asm/clocksource.h new file mode 100644 index 000000000000..03434369fce4 --- /dev/null +++ b/arch/s390/include/asm/clocksource.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* s390-specific clocksource additions */ + +#ifndef _ASM_S390_CLOCKSOURCE_H +#define _ASM_S390_CLOCKSOURCE_H + +#endif /* _ASM_S390_CLOCKSOURCE_H */ diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 0cd085cdeb4f..82f86b3c394b 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -2,6 +2,8 @@ #ifndef __S390_VDSO_H__ #define __S390_VDSO_H__ +#include + /* Default link addresses for the vDSOs */ #define VDSO32_LBASE 0 #define VDSO64_LBASE 0 @@ -18,30 +20,7 @@ * itself and may change without notice. */ -struct vdso_data { - __u64 tb_update_count; /* Timebase atomicity ctr 0x00 */ - __u64 xtime_tod_stamp; /* TOD clock for xtime 0x08 */ - __u64 xtime_clock_sec; /* Kernel time 0x10 */ - __u64 xtime_clock_nsec; /* 0x18 */ - __u64 xtime_coarse_sec; /* Coarse kernel time 0x20 */ - __u64 xtime_coarse_nsec; /* 0x28 */ - __u64 wtom_clock_sec; /* Wall to monotonic clock 0x30 */ - __u64 wtom_clock_nsec; /* 0x38 */ - __u64 wtom_coarse_sec; /* Coarse wall to monotonic 0x40 */ - __u64 wtom_coarse_nsec; /* 0x48 */ - __u32 tz_minuteswest; /* Minutes west of Greenwich 0x50 */ - __u32 tz_dsttime; /* Type of dst correction 0x54 */ - __u32 ectg_available; /* ECTG instruction present 0x58 */ - __u32 tk_mult; /* Mult. used for xtime_nsec 0x5c */ - __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */ - __u32 ts_dir; /* TOD steering direction 0x64 */ - __u64 ts_end; /* TOD steering end 0x68 */ - __u32 hrtimer_res; /* hrtimer resolution 0x70 */ -}; - struct vdso_per_cpu_data { - __u64 ectg_timer_base; - __u64 ectg_user_time; /* * Note: node_id and cpu_nr must be at adjacent memory locations. * VDSO userspace must read both values with a single instruction. diff --git a/arch/s390/include/asm/vdso/clocksource.h b/arch/s390/include/asm/vdso/clocksource.h new file mode 100644 index 000000000000..a93eda0ce7bb --- /dev/null +++ b/arch/s390/include/asm/vdso/clocksource.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_CLOCKSOURCE_H +#define __ASM_VDSO_CLOCKSOURCE_H + +#define VDSO_ARCH_CLOCKMODES \ + VDSO_CLOCKMODE_TOD + +#endif /* __ASM_VDSO_CLOCKSOURCE_H */ diff --git a/arch/s390/include/asm/vdso/data.h b/arch/s390/include/asm/vdso/data.h new file mode 100644 index 000000000000..7b3cdb4a5f48 --- /dev/null +++ b/arch/s390/include/asm/vdso/data.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __S390_ASM_VDSO_DATA_H +#define __S390_ASM_VDSO_DATA_H + +#include +#include + +struct arch_vdso_data { + __u64 tod_steering_delta; + __u64 tod_steering_end; +}; + +#endif /* __S390_ASM_VDSO_DATA_H */ diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..bf123065ad3b --- /dev/null +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_VDSO_GETTIMEOFDAY_H +#define ASM_VDSO_GETTIMEOFDAY_H + +#define VDSO_HAS_TIME 1 + +#define VDSO_HAS_CLOCK_GETRES 1 + +#include +#include +#include +#include + +#define vdso_calc_delta __arch_vdso_calc_delta +static __always_inline u64 __arch_vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +{ + return (cycles - last) * mult; +} + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + return _vdso_data; +} + +static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd) +{ + const struct vdso_data *vdso = __arch_get_vdso_data(); + u64 adj, now; + + now = get_tod_clock(); + adj = vdso->arch_data.tod_steering_end - now; + if (unlikely((s64) adj > 0)) + now += (vdso->arch_data.tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15); + return now; +} + +static __always_inline +long clock_gettime_fallback(clockid_t clkid, struct __kernel_timespec *ts) +{ + register unsigned long r1 __asm__("r1") = __NR_clock_gettime; + register unsigned long r2 __asm__("r2") = (unsigned long)clkid; + register void *r3 __asm__("r3") = ts; + + asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory"); + return r2; +} + +static __always_inline +long gettimeofday_fallback(register struct __kernel_old_timeval *tv, + register struct timezone *tz) +{ + register unsigned long r1 __asm__("r1") = __NR_gettimeofday; + register unsigned long r2 __asm__("r2") = (unsigned long)tv; + register void *r3 __asm__("r3") = tz; + + asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory"); + return r2; +} + +static __always_inline +long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) +{ + register unsigned long r1 __asm__("r1") = __NR_clock_getres; + register unsigned long r2 __asm__("r2") = (unsigned long)clkid; + register void *r3 __asm__("r3") = ts; + + asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory"); + return r2; +} + +#endif diff --git a/arch/s390/include/asm/vdso/processor.h b/arch/s390/include/asm/vdso/processor.h new file mode 100644 index 000000000000..cfcc3e117c4c --- /dev/null +++ b/arch/s390/include/asm/vdso/processor.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_VDSO_PROCESSOR_H +#define __ASM_VDSO_PROCESSOR_H + +#define cpu_relax() barrier() + +#endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/s390/include/asm/vdso/vdso.h b/arch/s390/include/asm/vdso/vdso.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..6c67c08cefdd --- /dev/null +++ b/arch/s390/include/asm/vdso/vsyscall.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ + +static __always_inline struct vdso_data *__s390_get_k_vdso_data(void) +{ + return vdso_data; +} +#define __arch_get_k_vdso_data __s390_get_k_vdso_data + +/* The asm-generic header needs to be included after the definitions above */ +#include + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 5d8cc1864566..ece58f2217cb 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -59,26 +59,6 @@ int main(void) OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]); OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[3]); BLANK(); - /* timeval/timezone offsets for use by vdso */ - OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count); - OFFSET(__VDSO_XTIME_STAMP, vdso_data, xtime_tod_stamp); - OFFSET(__VDSO_XTIME_SEC, vdso_data, xtime_clock_sec); - OFFSET(__VDSO_XTIME_NSEC, vdso_data, xtime_clock_nsec); - OFFSET(__VDSO_XTIME_CRS_SEC, vdso_data, xtime_coarse_sec); - OFFSET(__VDSO_XTIME_CRS_NSEC, vdso_data, xtime_coarse_nsec); - OFFSET(__VDSO_WTOM_SEC, vdso_data, wtom_clock_sec); - OFFSET(__VDSO_WTOM_NSEC, vdso_data, wtom_clock_nsec); - OFFSET(__VDSO_WTOM_CRS_SEC, vdso_data, wtom_coarse_sec); - OFFSET(__VDSO_WTOM_CRS_NSEC, vdso_data, wtom_coarse_nsec); - OFFSET(__VDSO_TIMEZONE, vdso_data, tz_minuteswest); - OFFSET(__VDSO_ECTG_OK, vdso_data, ectg_available); - OFFSET(__VDSO_TK_MULT, vdso_data, tk_mult); - OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift); - OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir); - OFFSET(__VDSO_TS_END, vdso_data, ts_end); - OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res); - OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base); - OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time); OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val); BLANK(); /* constants used by the vdso */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 23edf196d3dc..86235919c2d1 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -435,10 +435,8 @@ ENTRY(system_call) jz .Lsysc_skip_fpu brasl %r14,load_fpu_regs .Lsysc_skip_fpu: - lg %r14,__LC_VDSO_PER_CPU mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) stpt __LC_EXIT_TIMER - mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER lmg %r0,%r15,__PT_R0(%r11) b __LC_RETURN_LPSWE @@ -797,13 +795,11 @@ ENTRY(io_int_handler) TRACE_IRQS_ON 0: #endif - lg %r14,__LC_VDSO_PER_CPU mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) tm __PT_PSW+1(%r11),0x01 # returning to user ? jno .Lio_exit_kernel BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER - mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER .Lio_exit_kernel: lmg %r0,%r15,__PT_R0(%r11) b __LC_RETURN_LPSWE @@ -1213,14 +1209,12 @@ ENTRY(mcck_int_handler) brasl %r14,s390_handle_mcck TRACE_IRQS_ON .Lmcck_return: - lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER - mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER 0: lmg %r11,%r15,__PT_R11(%r11) b __LC_RETURN_MCCK_LPSWE diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index e600f6953d7c..dfa45027cb47 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -402,7 +402,6 @@ static void __init setup_lowcore_dat_off(void) memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list, sizeof(lc->alt_stfle_fac_list)); nmi_alloc_boot_cpu(lc); - vdso_alloc_boot_cpu(lc); lc->sync_enter_timer = S390_lowcore.sync_enter_timer; lc->async_enter_timer = S390_lowcore.async_enter_timer; lc->exit_timer = S390_lowcore.exit_timer; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 513e59d08a55..bc806e1547d6 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -41,6 +41,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -84,7 +87,7 @@ void __init time_early_init(void) /* Initialize TOD steering parameters */ tod_steering_end = *(unsigned long long *) &tod_clock_base[1]; - vdso_data->ts_end = tod_steering_end; + vdso_data->arch_data.tod_steering_end = tod_steering_end; if (!test_facility(28)) return; @@ -257,6 +260,7 @@ static struct clocksource clocksource_tod = { .mult = 1000, .shift = 12, .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .vdso_clock_mode = VDSO_CLOCKMODE_TOD, }; struct clocksource * __init clocksource_default_clock(void) @@ -264,56 +268,6 @@ struct clocksource * __init clocksource_default_clock(void) return &clocksource_tod; } -void update_vsyscall(struct timekeeper *tk) -{ - u64 nsecps; - - if (tk->tkr_mono.clock != &clocksource_tod) - return; - - /* Make userspace gettimeofday spin until we're done. */ - ++vdso_data->tb_update_count; - smp_wmb(); - vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last; - vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - vdso_data->wtom_clock_sec = - tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec + - + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift); - nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift; - while (vdso_data->wtom_clock_nsec >= nsecps) { - vdso_data->wtom_clock_nsec -= nsecps; - vdso_data->wtom_clock_sec++; - } - - vdso_data->xtime_coarse_sec = tk->xtime_sec; - vdso_data->xtime_coarse_nsec = - (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); - vdso_data->wtom_coarse_sec = - vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec; - vdso_data->wtom_coarse_nsec = - vdso_data->xtime_coarse_nsec + tk->wall_to_monotonic.tv_nsec; - while (vdso_data->wtom_coarse_nsec >= NSEC_PER_SEC) { - vdso_data->wtom_coarse_nsec -= NSEC_PER_SEC; - vdso_data->wtom_coarse_sec++; - } - - vdso_data->tk_mult = tk->tkr_mono.mult; - vdso_data->tk_shift = tk->tkr_mono.shift; - vdso_data->hrtimer_res = hrtimer_resolution; - smp_wmb(); - ++vdso_data->tb_update_count; -} - -extern struct timezone sys_tz; - -void update_vsyscall_tz(void) -{ - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; - vdso_data->tz_dsttime = sys_tz.tz_dsttime; -} - /* * Initialize the TOD clock and the CPU timer of * the boot cpu. @@ -431,7 +385,6 @@ static void clock_sync_global(unsigned long long delta) /* Epoch overflow */ tod_clock_base[0]++; /* Adjust TOD steering parameters. */ - vdso_data->tb_update_count++; now = get_tod_clock(); adj = tod_steering_end - now; if (unlikely((s64) adj >= 0)) @@ -443,9 +396,8 @@ static void clock_sync_global(unsigned long long delta) panic("TOD clock sync offset %lli is too large to drift\n", tod_steering_delta); tod_steering_end = now + (abs(tod_steering_delta) << 15); - vdso_data->ts_dir = (tod_steering_delta < 0) ? 0 : 1; - vdso_data->ts_end = tod_steering_end; - vdso_data->tb_update_count++; + vdso_data->arch_data.tod_steering_end = tod_steering_end; + /* Update LPAR offset. */ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) lpar_offset = qto.tod_epoch_difference; @@ -586,7 +538,7 @@ void stp_queue_work(void) static int stp_sync_clock(void *data) { struct clock_sync_data *sync = data; - unsigned long long clock_delta; + unsigned long long clock_delta, flags; static int first; int rc; @@ -599,6 +551,7 @@ static int stp_sync_clock(void *data) if (stp_info.todoff[0] || stp_info.todoff[1] || stp_info.todoff[2] || stp_info.todoff[3] || stp_info.tmd != 2) { + flags = vdso_update_begin(); rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0, &clock_delta); if (rc == 0) { @@ -609,6 +562,7 @@ static int stp_sync_clock(void *data) if (rc == 0 && stp_info.tmd != 2) rc = -EAGAIN; } + vdso_update_end(flags); } sync->in_sync = rc ? -EAGAIN : 1; xchg(&first, 0); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index c4baefaa6e34..f9da5b149141 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include #include @@ -96,35 +98,12 @@ static union { struct vdso_data data; u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = &vdso_data_store.data; - -/* - * Setup vdso data page. - */ -static void __init vdso_init_data(struct vdso_data *vd) -{ - vd->ectg_available = test_facility(31); -} - +struct vdso_data *vdso_data = (struct vdso_data *)&vdso_data_store.data; /* * Allocate/free per cpu vdso data. */ #define SEGMENT_ORDER 2 -/* - * The initial vdso_data structure for the boot CPU. Eventually - * it is replaced with a properly allocated structure in vdso_init. - * This is necessary because a valid S390_lowcore.vdso_per_cpu_data - * pointer is required to be able to return from an interrupt or - * program check. See the exit paths in entry.S. - */ -struct vdso_data boot_vdso_data __initdata; - -void __init vdso_alloc_boot_cpu(struct lowcore *lowcore) -{ - lowcore->vdso_per_cpu_data = (unsigned long) &boot_vdso_data; -} - int vdso_alloc_per_cpu(struct lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; @@ -246,8 +225,6 @@ static int __init vdso_init(void) { int i; - vdso_init_data(vdso_data); - /* Calculate the size of the 64 bit vDSO */ vdso64_pages = ((&vdso64_end - &vdso64_start + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 4a66a1cb919b..f1c0570780d1 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -1,14 +1,20 @@ # SPDX-License-Identifier: GPL-2.0 -# List of files in the vdso, has to be asm only for now +# List of files in the vdso KCOV_INSTRUMENT := n +ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE +ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT -obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o +include $(srctree)/lib/vdso/Makefile +obj-vdso64 = vdso_user_wrapper.o note.o getcpu.o +obj-cvdso64 = vdso64_generic.o +CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) # Build rules -targets := $(obj-vdso64) vdso64.so vdso64.so.dbg +targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) +obj-cvdso64 := $(addprefix $(obj)/, $(obj-cvdso64)) KBUILD_AFLAGS += -DBUILD_VDSO KBUILD_CFLAGS += -DBUILD_VDSO @@ -37,7 +43,7 @@ KASAN_SANITIZE := n $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) FORCE +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE $(call if_changed,ld) # strip rule for the .so file @@ -49,9 +55,14 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) +$(obj-cvdso64): %.o: %.c FORCE + $(call if_changed_dep,vdso64cc) + # actual build commands quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< +quiet_cmd_vdso64cc = VDSO64C $@ + cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $< # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S deleted file mode 100644 index 0c79caa32b59..000000000000 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of clock_getres() for 64 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ -#include -#include -#include -#include - - .text - .align 4 - .globl __kernel_clock_getres - .type __kernel_clock_getres,@function -__kernel_clock_getres: - CFI_STARTPROC - larl %r1,3f - lg %r0,0(%r1) - cghi %r2,__CLOCK_REALTIME_COARSE - je 0f - cghi %r2,__CLOCK_MONOTONIC_COARSE - je 0f - larl %r1,_vdso_data - llgf %r0,__VDSO_CLOCK_REALTIME_RES(%r1) - cghi %r2,__CLOCK_REALTIME - je 0f - cghi %r2,__CLOCK_MONOTONIC - je 0f - cghi %r2,__CLOCK_THREAD_CPUTIME_ID - je 0f - cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ - jne 2f - larl %r5,_vdso_data - icm %r0,15,__LC_ECTG_OK(%r5) - jz 2f -0: ltgr %r3,%r3 - jz 1f /* res == NULL */ - xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ - stg %r0,8(%r3) /* store tp->tv_usec */ -1: lghi %r2,0 - br %r14 -2: lghi %r1,__NR_clock_getres /* fallback to svc */ - svc 0 - br %r14 - CFI_ENDPROC -3: .quad __CLOCK_COARSE_RES - .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S deleted file mode 100644 index 9d2ee79b90f2..000000000000 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ /dev/null @@ -1,163 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of clock_gettime() for 64 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ -#include -#include -#include -#include -#include - - .text - .align 4 - .globl __kernel_clock_gettime - .type __kernel_clock_gettime,@function -__kernel_clock_gettime: - CFI_STARTPROC - aghi %r15,-16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD - larl %r5,_vdso_data - cghi %r2,__CLOCK_REALTIME_COARSE - je 4f - cghi %r2,__CLOCK_REALTIME - je 5f - cghi %r2,-3 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ - je 9f - cghi %r2,__CLOCK_MONOTONIC_COARSE - je 3f - cghi %r2,__CLOCK_MONOTONIC - jne 12f - - /* CLOCK_MONOTONIC */ -0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ - tmll %r4,0x0001 /* pending update ? loop */ - jnz 0b - stcke 0(%r15) /* Store TOD clock */ - lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - lg %r0,__VDSO_WTOM_SEC(%r5) - lg %r1,1(%r15) - sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ - alg %r1,__VDSO_WTOM_NSEC(%r5) - srlg %r1,%r1,0(%r2) /* >> tk->shift */ - clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ - jne 0b - larl %r5,13f -1: clg %r1,0(%r5) - jl 2f - slg %r1,0(%r5) - aghi %r0,1 - j 1b -2: stg %r0,0(%r3) /* store tp->tv_sec */ - stg %r1,8(%r3) /* store tp->tv_nsec */ - lghi %r2,0 - aghi %r15,16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD - CFI_RESTORE 15 - br %r14 - - /* CLOCK_MONOTONIC_COARSE */ - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD -3: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ - tmll %r4,0x0001 /* pending update ? loop */ - jnz 3b - lg %r0,__VDSO_WTOM_CRS_SEC(%r5) - lg %r1,__VDSO_WTOM_CRS_NSEC(%r5) - clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ - jne 3b - j 2b - - /* CLOCK_REALTIME_COARSE */ -4: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ - tmll %r4,0x0001 /* pending update ? loop */ - jnz 4b - lg %r0,__VDSO_XTIME_CRS_SEC(%r5) - lg %r1,__VDSO_XTIME_CRS_NSEC(%r5) - clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ - jne 4b - j 7f - - /* CLOCK_REALTIME */ -5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ - tmll %r4,0x0001 /* pending update ? loop */ - jnz 5b - stcke 0(%r15) /* Store TOD clock */ - lg %r1,1(%r15) - lg %r0,__VDSO_TS_END(%r5) /* TOD steering end time */ - slgr %r0,%r1 /* now - ts_steering_end */ - ltgr %r0,%r0 /* past end of steering ? */ - jm 17f - srlg %r0,%r0,15 /* 1 per 2^16 */ - tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */ - jz 18f - lcgr %r0,%r0 /* negative TOD offset */ -18: algr %r1,%r0 /* add steering offset */ -17: lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ - alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ - srlg %r1,%r1,0(%r2) /* >> tk->shift */ - lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */ - clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ - jne 5b - larl %r5,13f -6: clg %r1,0(%r5) - jl 7f - slg %r1,0(%r5) - aghi %r0,1 - j 6b -7: stg %r0,0(%r3) /* store tp->tv_sec */ - stg %r1,8(%r3) /* store tp->tv_nsec */ - lghi %r2,0 - aghi %r15,16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD - CFI_RESTORE 15 - br %r14 - - /* CPUCLOCK_VIRT for this thread */ - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD -9: lghi %r4,0 - icm %r0,15,__VDSO_ECTG_OK(%r5) - jz 12f - sacf 256 /* Magic ectg instruction */ - .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 - sacf 0 - algr %r1,%r0 /* r1 = cputime as TOD value */ - mghi %r1,1000 /* convert to nanoseconds */ - srlg %r1,%r1,12 /* r1 = cputime in nanosec */ - lgr %r4,%r1 - larl %r5,13f - srlg %r1,%r1,9 /* divide by 1000000000 */ - mlg %r0,8(%r5) - srlg %r0,%r0,11 /* r0 = tv_sec */ - stg %r0,0(%r3) - msg %r0,0(%r5) /* calculate tv_nsec */ - slgr %r4,%r0 /* r4 = tv_nsec */ - stg %r4,8(%r3) - lghi %r2,0 - aghi %r15,16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD - CFI_RESTORE 15 - br %r14 - - /* Fallback to system call */ - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD -12: lghi %r1,__NR_clock_gettime - svc 0 - aghi %r15,16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD - CFI_RESTORE 15 - br %r14 - CFI_ENDPROC - -13: .quad 1000000000 -14: .quad 19342813113834067 - .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S deleted file mode 100644 index aebe10dc7c99..000000000000 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ /dev/null @@ -1,71 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of gettimeofday() for 64 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ -#include -#include -#include -#include -#include - - .text - .align 4 - .globl __kernel_gettimeofday - .type __kernel_gettimeofday,@function -__kernel_gettimeofday: - CFI_STARTPROC - aghi %r15,-16 - CFI_ADJUST_CFA_OFFSET 16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD - larl %r5,_vdso_data -0: ltgr %r3,%r3 /* check if tz is NULL */ - je 1f - mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) -1: ltgr %r2,%r2 /* check if tv is NULL */ - je 4f - lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ - tmll %r4,0x0001 /* pending update ? loop */ - jnz 0b - stcke 0(%r15) /* Store TOD clock */ - lg %r1,1(%r15) - lg %r0,__VDSO_TS_END(%r5) /* TOD steering end time */ - slgr %r0,%r1 /* now - ts_steering_end */ - ltgr %r0,%r0 /* past end of steering ? */ - jm 6f - srlg %r0,%r0,15 /* 1 per 2^16 */ - tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */ - jz 7f - lcgr %r0,%r0 /* negative TOD offset */ -7: algr %r1,%r0 /* add steering offset */ -6: sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ - alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ - lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */ - clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ - jne 0b - lgf %r5,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - srlg %r1,%r1,0(%r5) /* >> tk->shift */ - larl %r5,5f -2: clg %r1,0(%r5) - jl 3f - slg %r1,0(%r5) - aghi %r0,1 - j 2b -3: stg %r0,0(%r2) /* store tv->tv_sec */ - slgr %r0,%r0 /* tv_nsec -> tv_usec */ - ml %r0,8(%r5) - srlg %r0,%r0,6 - stg %r0,8(%r2) /* store tv->tv_usec */ -4: lghi %r2,0 - aghi %r15,16 - CFI_ADJUST_CFA_OFFSET -16 - CFI_RESTORE 15 - br %r14 - CFI_ENDPROC -5: .quad 1000000000 - .long 274877907 - .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vdso64/vdso64_generic.c b/arch/s390/kernel/vdso64/vdso64_generic.c new file mode 100644 index 000000000000..a8cef7e4d137 --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso64_generic.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../../lib/vdso/gettimeofday.c" + +int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime(clock, ts); +} + +int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts) +{ + return __cvdso_clock_getres(clock, ts); +} diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S new file mode 100644 index 000000000000..a775d7e52872 --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include + +#define WRAPPER_FRAME_SIZE (STACK_FRAME_OVERHEAD+8) + +/* + * Older glibc version called vdso without allocating a stackframe. This wrapper + * is just used to allocate a stackframe. See + * https://sourceware.org/git/?p=glibc.git;a=commit;h=478593e6374f3818da39332260dc453cb19cfa1e + * for details. + */ +.macro vdso_func func + .globl __kernel_\func + .type __kernel_\func,@function + .align 8 +__kernel_\func: + CFI_STARTPROC + aghi %r15,-WRAPPER_FRAME_SIZE + CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE) + CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD + stg %r14,STACK_FRAME_OVERHEAD(%r15) + brasl %r14,__s390_vdso_\func + lg %r14,STACK_FRAME_OVERHEAD(%r15) + aghi %r15,WRAPPER_FRAME_SIZE + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD + CFI_RESTORE 15 + br %r14 + CFI_ENDPROC + .size __kernel_\func,.-__kernel_\func +.endm + +vdso_func gettimeofday +vdso_func clock_getres +vdso_func clock_gettime -- cgit v1.2.3 From 4bf3ec384edf0bf893ec7bd62ccebb635b02efd9 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 7 Sep 2020 17:45:37 +0200 Subject: s390: disable branch profiling for vdso When branch profiling is enabled, if () gets annotated with code to instrument the hit/miss ratio. This doesn't work for VDSO as we can't access kernel code. Add -DDISABLE_BRANCH_PROFILING to fix this. Reported-by: Thomas Richter Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index f1c0570780d1..3d3303283181 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -17,7 +17,7 @@ obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) obj-cvdso64 := $(addprefix $(obj)/, $(obj-cvdso64)) KBUILD_AFLAGS += -DBUILD_VDSO -KBUILD_CFLAGS += -DBUILD_VDSO +KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS)) KBUILD_AFLAGS_64 += -m64 -s -- cgit v1.2.3 From 4d4a3caaf36246520d61c17a9fd86ce3893f6595 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 19 Feb 2020 11:15:30 +0100 Subject: s390/qdio: clean up QDR setup __qdio_allocate_fill_qdr() is meant to set up one specific queue descriptor in the QDR. But for this simple task, it gets passed a bunch of global structs and offsets - and then navigates through the structs to find its actual operands. Clean up all the complicated pointer chasing & index calculation, and just pass a descriptor and its associated queue struct. While at it also add some virt_to_phys() translations, to clarify that addresses in the QDR are meant to be absolute. Signed-off-by: Julian Wiedmann Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/qdio.h | 8 ++++---- drivers/s390/cio/qdio_setup.c | 33 +++++++++++++-------------------- 2 files changed, 17 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index e69dbf438f99..60012fc11bac 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -26,9 +26,9 @@ /** * struct qdesfmt0 - queue descriptor, format 0 - * @sliba: storage list information block address - * @sla: storage list address - * @slsba: storage list state block address + * @sliba: absolute address of storage list information block + * @sla: absolute address of storage list + * @slsba: absolute address of storage list state block * @akey: access key for SLIB * @bkey: access key for SL * @ckey: access key for SBALs @@ -56,7 +56,7 @@ struct qdesfmt0 { * @oqdcnt: output queue descriptor count * @iqdsz: input queue descriptor size * @oqdsz: output queue descriptor size - * @qiba: queue information block address + * @qiba: absolute address of queue information block * @qkey: queue information block key * @qdf0: queue descriptions */ diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 2c5cc6ec668e..42e1c0949309 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -403,28 +403,22 @@ void qdio_free_async_data(struct qdio_irq *irq_ptr) } } -static void __qdio_allocate_fill_qdr(struct qdio_irq *irq_ptr, - struct qdio_q **irq_ptr_qs, - int i, int nr) +static void qdio_fill_qdr_desc(struct qdesfmt0 *desc, struct qdio_q *queue) { - irq_ptr->qdr->qdf0[i + nr].sliba = - (unsigned long)irq_ptr_qs[i]->slib; - - irq_ptr->qdr->qdf0[i + nr].sla = - (unsigned long)irq_ptr_qs[i]->sl; - - irq_ptr->qdr->qdf0[i + nr].slsba = - (unsigned long)&irq_ptr_qs[i]->slsb.val[0]; - - irq_ptr->qdr->qdf0[i + nr].akey = PAGE_DEFAULT_KEY >> 4; - irq_ptr->qdr->qdf0[i + nr].bkey = PAGE_DEFAULT_KEY >> 4; - irq_ptr->qdr->qdf0[i + nr].ckey = PAGE_DEFAULT_KEY >> 4; - irq_ptr->qdr->qdf0[i + nr].dkey = PAGE_DEFAULT_KEY >> 4; + desc->sliba = virt_to_phys(queue->slib); + desc->sla = virt_to_phys(queue->sl); + desc->slsba = virt_to_phys(&queue->slsb); + + desc->akey = PAGE_DEFAULT_KEY >> 4; + desc->bkey = PAGE_DEFAULT_KEY >> 4; + desc->ckey = PAGE_DEFAULT_KEY >> 4; + desc->dkey = PAGE_DEFAULT_KEY >> 4; } static void setup_qdr(struct qdio_irq *irq_ptr, struct qdio_initialize *qdio_init) { + struct qdesfmt0 *desc = &irq_ptr->qdr->qdf0[0]; int i; irq_ptr->qdr->qfmt = qdio_init->q_format; @@ -433,15 +427,14 @@ static void setup_qdr(struct qdio_irq *irq_ptr, irq_ptr->qdr->oqdcnt = qdio_init->no_output_qs; irq_ptr->qdr->iqdsz = sizeof(struct qdesfmt0) / 4; /* size in words */ irq_ptr->qdr->oqdsz = sizeof(struct qdesfmt0) / 4; - irq_ptr->qdr->qiba = (unsigned long)&irq_ptr->qib; + irq_ptr->qdr->qiba = virt_to_phys(&irq_ptr->qib); irq_ptr->qdr->qkey = PAGE_DEFAULT_KEY >> 4; for (i = 0; i < qdio_init->no_input_qs; i++) - __qdio_allocate_fill_qdr(irq_ptr, irq_ptr->input_qs, i, 0); + qdio_fill_qdr_desc(desc++, irq_ptr->input_qs[i]); for (i = 0; i < qdio_init->no_output_qs; i++) - __qdio_allocate_fill_qdr(irq_ptr, irq_ptr->output_qs, i, - qdio_init->no_input_qs); + qdio_fill_qdr_desc(desc++, irq_ptr->output_qs[i]); } static void setup_qib(struct qdio_irq *irq_ptr, -- cgit v1.2.3 From b02002cc4c0f8a2340d07690f58cae0c04ba2325 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 13 Jul 2020 14:12:49 +0200 Subject: s390/pci: Implement ioremap_wc/prot() with MIO With our current support for the new MIO PCI instructions, write combining/write back MMIO memory can be obtained via the pci_iomap_wc() and pci_iomap_wc_range() functions. This is achieved by using the write back address for a specific bar as provided in clp_store_query_pci_fn() These functions are however not widely used and instead drivers often rely on ioremap_wc() and ioremap_prot(), which on other platforms enable write combining using a PTE flag set through the pgrprot value. While we do not have a write combining flag in the low order flag bits of the PTE like x86_64 does, with MIO support, there is a write back bit in the physical address (bit 1 on z15) and thus also the PTE. Which bit is used to toggle write back and whether it is available at all, is however not fixed in the architecture. Instead we get this information from the CLP Store Logical Processor Characteristics for PCI command. When the write back bit is not provided we fall back to the existing behavior. Signed-off-by: Niklas Schnelle Reviewed-by: Pierre Morel Reviewed-by: Gerald Schaefer Signed-off-by: Vasily Gorbik --- .../features/vm/ioremap_prot/arch-support.txt | 2 +- arch/s390/include/asm/clp.h | 3 ++ arch/s390/include/asm/io.h | 8 +++++ arch/s390/include/asm/pci.h | 1 + arch/s390/include/asm/pci_clp.h | 19 ++++++++++++ arch/s390/include/asm/pgtable.h | 9 +++++- arch/s390/include/asm/setup.h | 3 ++ arch/s390/kernel/setup.c | 6 ++++ arch/s390/mm/pgtable.c | 20 ++++++++++++ arch/s390/pci/pci.c | 30 ++++++++++++++++-- arch/s390/pci/pci_clp.c | 36 +++++++++++++++++++++- 11 files changed, 132 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/Documentation/features/vm/ioremap_prot/arch-support.txt b/Documentation/features/vm/ioremap_prot/arch-support.txt index 1cb7406cd858..b5fb37c28cc6 100644 --- a/Documentation/features/vm/ioremap_prot/arch-support.txt +++ b/Documentation/features/vm/ioremap_prot/arch-support.txt @@ -24,7 +24,7 @@ | parisc: | TODO | | powerpc: | ok | | riscv: | TODO | - | s390: | TODO | + | s390: | ok | | sh: | ok | | sparc: | TODO | | um: | TODO | diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h index 3925b0f085b7..10919eeb7533 100644 --- a/arch/s390/include/asm/clp.h +++ b/arch/s390/include/asm/clp.h @@ -5,6 +5,9 @@ /* CLP common request & response block size */ #define CLP_BLK_SIZE PAGE_SIZE +/* Call Logical Processor - Command Code */ +#define CLP_SLPC 0x0001 + #define CLP_LPS_BASE 0 #define CLP_LPS_PCI 2 diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index da014e4f8113..28664ee0abc1 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -12,6 +12,7 @@ #include #include +#include #include #define xlate_dev_mem_ptr xlate_dev_mem_ptr @@ -26,7 +27,10 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define IO_SPACE_LIMIT 0 +void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot); void __iomem *ioremap(phys_addr_t addr, size_t size); +void __iomem *ioremap_wc(phys_addr_t addr, size_t size); +void __iomem *ioremap_wt(phys_addr_t addr, size_t size); void iounmap(volatile void __iomem *addr); static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) @@ -52,6 +56,10 @@ static inline void ioport_unmap(void __iomem *p) #define pci_iomap_wc pci_iomap_wc #define pci_iomap_wc_range pci_iomap_wc_range +#define ioremap ioremap +#define ioremap_wt ioremap_wt +#define ioremap_wc ioremap_wc + #define memcpy_fromio(dst, src, count) zpci_memcpy_fromio(dst, src, count) #define memcpy_toio(dst, src, count) zpci_memcpy_toio(dst, src, count) #define memset_io(dst, val, count) zpci_memset_io(dst, val, count) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 99b92c3e46b0..8015b9301533 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -208,6 +208,7 @@ int zpci_unregister_ioat(struct zpci_dev *, u8); void zpci_remove_reserved_devices(void); /* CLP */ +int clp_setup_writeback_mio(void); int clp_scan_pci_devices(void); int clp_rescan_pci_devices(void); int clp_rescan_pci_devices_simple(u32 *fid); diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index eb51272dd2cc..1f4b666e85ee 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -7,6 +7,7 @@ /* * Call Logical Processor - Command Codes */ +#define CLP_SLPC 0x0001 #define CLP_LIST_PCI 0x0002 #define CLP_QUERY_PCI_FN 0x0003 #define CLP_QUERY_PCI_FNGRP 0x0004 @@ -51,6 +52,19 @@ struct clp_fh_list_entry { extern bool zpci_unique_uid; +struct clp_rsp_slpc_pci { + struct clp_rsp_hdr hdr; + u32 reserved2[4]; + u32 lpif[8]; + u32 reserved3[4]; + u32 vwb : 1; + u32 : 1; + u32 mio_wb : 6; + u32 : 24; + u32 reserved5[3]; + u32 lpic[8]; +} __packed; + /* List PCI functions request */ struct clp_req_list_pci { struct clp_req_hdr hdr; @@ -172,6 +186,11 @@ struct clp_rsp_set_pci { } __packed; /* Combined request/response block structures used by clp insn */ +struct clp_req_rsp_slpc_pci { + struct clp_req_slpc request; + struct clp_rsp_slpc_pci response; +} __packed; + struct clp_req_rsp_list_pci { struct clp_req_list_pci request; struct clp_rsp_list_pci response; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7eb01a5459cd..e9244b9fb504 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1186,6 +1186,12 @@ void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr); void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr); void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr); +#define pgprot_writecombine pgprot_writecombine +pgprot_t pgprot_writecombine(pgprot_t prot); + +#define pgprot_writethrough pgprot_writethrough +pgprot_t pgprot_writethrough(pgprot_t prot); + /* * Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following @@ -1209,7 +1215,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t __pte; - pte_val(__pte) = physpage + pgprot_val(pgprot); + + pte_val(__pte) = physpage | pgprot_val(pgprot); if (!MACHINE_HAS_NX) pte_val(__pte) &= ~_PAGE_NOEXEC; return pte_mkyoung(__pte); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 534f212753d6..7b104f156e34 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -94,6 +94,9 @@ extern unsigned long vmalloc_size; extern unsigned long max_physmem_end; extern unsigned long __swsusp_reset_dma; +/* The Write Back bit position in the physaddr is given by the SLPC PCI */ +extern unsigned long mio_wb_bit_mask; + #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) #define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index dfa45027cb47..efd12221ecb4 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -127,6 +127,12 @@ unsigned long MODULES_END; struct lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); +/* + * The Write Back bit position in the physaddr is given by the SLPC PCI. + * Leaving the mask zero always uses write through which is safe + */ +unsigned long mio_wb_bit_mask __ro_after_init; + /* * This is set up by the setup-routine at boot-time * for S390 need to find out, what we have to setup diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 0d25f743b270..18205f851c24 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -24,6 +24,26 @@ #include #include +pgprot_t pgprot_writecombine(pgprot_t prot) +{ + /* + * mio_wb_bit_mask may be set on a different CPU, but it is only set + * once at init and only read afterwards. + */ + return __pgprot(pgprot_val(prot) | mio_wb_bit_mask); +} +EXPORT_SYMBOL_GPL(pgprot_writecombine); + +pgprot_t pgprot_writethrough(pgprot_t prot) +{ + /* + * mio_wb_bit_mask may be set on a different CPU, but it is only set + * once at init and only read afterwards. + */ + return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask); +} +EXPORT_SYMBOL_GPL(pgprot_writethrough); + static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int nodat) { diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 4b62d6b55024..fdbb99c4569d 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -226,7 +226,7 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count) zpci_memcpy_toio(to, from, count); } -void __iomem *ioremap(phys_addr_t addr, size_t size) +static void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot) { unsigned long offset, vaddr; struct vm_struct *area; @@ -247,14 +247,37 @@ void __iomem *ioremap(phys_addr_t addr, size_t size) return NULL; vaddr = (unsigned long) area->addr; - if (ioremap_page_range(vaddr, vaddr + size, addr, PAGE_KERNEL)) { + if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) { free_vm_area(area); return NULL; } return (void __iomem *) ((unsigned long) area->addr + offset); } + +void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot) +{ + return __ioremap(addr, size, __pgprot(prot)); +} +EXPORT_SYMBOL(ioremap_prot); + +void __iomem *ioremap(phys_addr_t addr, size_t size) +{ + return __ioremap(addr, size, PAGE_KERNEL); +} EXPORT_SYMBOL(ioremap); +void __iomem *ioremap_wc(phys_addr_t addr, size_t size) +{ + return __ioremap(addr, size, pgprot_writecombine(PAGE_KERNEL)); +} +EXPORT_SYMBOL(ioremap_wc); + +void __iomem *ioremap_wt(phys_addr_t addr, size_t size) +{ + return __ioremap(addr, size, pgprot_writethrough(PAGE_KERNEL)); +} +EXPORT_SYMBOL(ioremap_wt); + void iounmap(volatile void __iomem *addr) { if (static_branch_likely(&have_mio)) @@ -784,6 +807,9 @@ static int zpci_mem_init(void) if (!zpci_iomap_bitmap) goto error_iomap_bitmap; + if (static_branch_likely(&have_mio)) + clp_setup_writeback_mio(); + return 0; error_iomap_bitmap: kfree(zpci_iomap_start); diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 7e735f41a0a6..51807945ca00 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -292,6 +292,40 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command) return rc; } +int clp_setup_writeback_mio(void) +{ + struct clp_req_rsp_slpc_pci *rrb; + u8 wb_bit_pos; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + memset(rrb, 0, sizeof(*rrb)); + rrb->request.hdr.len = sizeof(rrb->request); + rrb->request.hdr.cmd = CLP_SLPC; + rrb->response.hdr.len = sizeof(rrb->response); + + rc = clp_req(rrb, CLP_LPS_PCI); + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { + if (rrb->response.vwb) { + wb_bit_pos = rrb->response.mio_wb; + set_bit_inv(wb_bit_pos, &mio_wb_bit_mask); + zpci_dbg(3, "wb bit: %d\n", wb_bit_pos); + } else { + zpci_dbg(3, "wb bit: n.a.\n"); + } + + } else { + zpci_err("SLPC PCI:\n"); + zpci_err_clp(rrb->response.hdr.rsp, rc); + rc = -EIO; + } + clp_free_block(rrb); + return rc; +} + int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as) { int rc; @@ -495,7 +529,7 @@ static int clp_base_command(struct clp_req *req, struct clp_req_hdr *lpcb) } } -static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb) +static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc_pci *lpcb) { unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); -- cgit v1.2.3 From 180a4c42e541cabe478c3f6e6c986a6b61744407 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 24 Aug 2020 17:32:52 +0300 Subject: s390/qdio: always use dev_name() for device name in QIB Passing a custom name from the device driver is nice - but in practice it's only zfcp who has been using this. So we might as well hard-code a naming scheme in the qdio layer, so that qeth also benefits from it. Signed-off-by: Julian Wiedmann Reviewed-by: Steffen Maier Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/qdio.h | 2 -- drivers/s390/cio/qdio_main.c | 1 - drivers/s390/cio/qdio_setup.c | 5 ++++- drivers/s390/scsi/zfcp_qdio.c | 2 -- 4 files changed, 4 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 60012fc11bac..19e84c95d1e7 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -327,7 +327,6 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, * struct qdio_initialize - qdio initialization data * @q_format: queue format * @qdr_ac: feature flags to set - * @adapter_name: name for the adapter * @qib_param_field_format: format for qib_parm_field * @qib_param_field: pointer to 128 bytes or NULL, if no param field * @qib_rflags: rflags to set @@ -347,7 +346,6 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, struct qdio_initialize { unsigned char q_format; unsigned char qdr_ac; - unsigned char adapter_name[8]; unsigned int qib_param_field_format; unsigned char *qib_param_field; unsigned char qib_rflags; diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 0ed8c680cae4..f9a31c7819ae 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1219,7 +1219,6 @@ static void qdio_trace_init_data(struct qdio_irq *irq, struct qdio_initialize *data) { DBF_DEV_EVENT(DBF_ERR, irq, "qfmt:%1u", data->q_format); - DBF_DEV_HEX(irq, data->adapter_name, 8, DBF_ERR); DBF_DEV_EVENT(DBF_ERR, irq, "qpff%4x", data->qib_param_field_format); DBF_DEV_HEX(irq, &data->qib_param_field, sizeof(void *), DBF_ERR); DBF_DEV_HEX(irq, &data->input_slib_elements, sizeof(void *), DBF_ERR); diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 42e1c0949309..a5b2e16b7aa8 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -9,6 +9,8 @@ #include #include #include + +#include #include #include "cio.h" @@ -452,7 +454,8 @@ static void setup_qib(struct qdio_irq *irq_ptr, if (init_data->no_output_qs) irq_ptr->qib.osliba = (unsigned long)(irq_ptr->output_qs[0]->slib); - memcpy(irq_ptr->qib.ebcnam, init_data->adapter_name, 8); + memcpy(irq_ptr->qib.ebcnam, dev_name(&irq_ptr->cdev->dev), 8); + ASCEBC(irq_ptr->qib.ebcnam, 8); } int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data) diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index e78d65bd46b1..a8a514074084 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -380,8 +380,6 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio) &qdio->adapter->status); init_data.q_format = QDIO_ZFCP_QFMT; - memcpy(init_data.adapter_name, dev_name(&cdev->dev), 8); - ASCEBC(init_data.adapter_name, 8); init_data.qib_rflags = QIB_RFLAGS_ENABLE_DATA_DIV; if (enable_multibuffer) init_data.qdr_ac |= QDR_AC_MULTI_BUFFER_ENABLE; -- cgit v1.2.3 From 9d719d39aab41d3b8b6f259574dc29a27f60e66c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 4 Sep 2020 17:41:27 +0200 Subject: s390/mm,ptdump: convert to generic page table dumper Make use of generic ptdump infrastructure. Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + arch/s390/Kconfig.debug | 12 -- arch/s390/configs/debug_defconfig | 2 +- arch/s390/configs/defconfig | 2 +- arch/s390/mm/Makefile | 2 +- arch/s390/mm/dump_pagetables.c | 233 ++++++++------------------------------ 6 files changed, 51 insertions(+), 201 deletions(-) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8a6121f93709..85bf121211d1 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -120,6 +120,7 @@ config S390 select GENERIC_CPU_VULNERABILITIES select GENERIC_FIND_FIRST_BIT select GENERIC_GETTIMEOFDAY + select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select HAVE_ALIGNED_STRUCT_PAGE if SLUB diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 761fe2b0b2f6..ab48b694ade8 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -3,17 +3,5 @@ config TRACE_IRQFLAGS_SUPPORT def_bool y -config S390_PTDUMP - bool "Export kernel pagetable layout to userspace via debugfs" - depends on DEBUG_KERNEL - select DEBUG_FS - help - Say Y here if you want to show the kernel pagetable layout in a - debugfs file. This information is only useful for kernel developers - who are working in architecture specific areas of the kernel. - It is probably not a good idea to enable this feature in a production - kernel. - If in doubt, say "N" - config EARLY_PRINTK def_bool y diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 0cf9a82326a8..f79eafb597cb 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -774,6 +774,7 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y CONFIG_PAGE_OWNER=y CONFIG_DEBUG_RODATA_TEST=y +CONFIG_PTDUMP_DEBUGFS=y CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_SELFTEST=y CONFIG_DEBUG_OBJECTS_FREE=y @@ -819,7 +820,6 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_HIST_TRIGGERS=y -CONFIG_S390_PTDUMP=y CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 5df9759e8ff6..9593cc8a9efd 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -758,6 +758,7 @@ CONFIG_GDB_SCRIPTS=y CONFIG_FRAME_WARN=1024 CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y +CONFIG_PTDUMP_DEBUGFS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_PANIC_ON_OOPS=y CONFIG_TEST_LOCKUP=m @@ -772,7 +773,6 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_HIST_TRIGGERS=y -CONFIG_S390_PTDUMP=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 3175413186b9..8ab9daeeace3 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -8,7 +8,7 @@ obj-y += page-states.o pageattr.o pgtable.o pgalloc.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o +obj-$(CONFIG_PTDUMP_DEBUGFS) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o KASAN_SANITIZE_kasan_init.o := n diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index c2ac9b8ae612..93a29e2f13d4 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include -#include #include #include #include @@ -42,10 +42,11 @@ static struct addr_marker address_markers[] = { }; struct pg_state { + struct ptdump_state ptdump; + struct seq_file *seq; int level; unsigned int current_prot; unsigned long start_address; - unsigned long current_address; const struct addr_marker *marker; }; @@ -63,215 +64,75 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level) seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n"); } -static void note_page(struct seq_file *m, struct pg_state *st, - unsigned int new_prot, int level) +static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val) { - static const char units[] = "KMGTPE"; int width = sizeof(unsigned long) * 2; + static const char units[] = "KMGTPE"; const char *unit = units; - unsigned int prot, cur; unsigned long delta; + struct pg_state *st; + struct seq_file *m; + unsigned int prot; - /* - * If we have a "break" in the series, we need to flush the state - * that we have now. "break" is either changing perms, levels or - * address space marker. - */ - prot = new_prot; - cur = st->current_prot; - - if (!st->level) { - /* First entry */ - st->current_prot = new_prot; - st->level = level; - st->marker = address_markers; + st = container_of(pt_st, struct pg_state, ptdump); + m = st->seq; + prot = val & (_PAGE_PROTECT | _PAGE_NOEXEC); + if (level == 4 && (val & _PAGE_INVALID)) + prot = _PAGE_INVALID; + /* For pmd_none() & friends val gets passed as zero. */ + if (level != 4 && !val) + prot = _PAGE_INVALID; + /* Final flush from generic code. */ + if (level == -1) + addr = max_addr; + if (st->level == -1) { seq_printf(m, "---[ %s ]---\n", st->marker->name); - } else if (prot != cur || level != st->level || - st->current_address >= st->marker[1].start_address) { - /* Print the actual finished series */ + st->start_address = addr; + st->current_prot = prot; + st->level = level; + } else if (prot != st->current_prot || level != st->level || + addr >= st->marker[1].start_address) { seq_printf(m, "0x%0*lx-0x%0*lx ", width, st->start_address, - width, st->current_address); - delta = (st->current_address - st->start_address) >> 10; + width, addr); + delta = (addr - st->start_address) >> 10; while (!(delta & 0x3ff) && unit[1]) { delta >>= 10; unit++; } seq_printf(m, "%9lu%c ", delta, *unit); print_prot(m, st->current_prot, st->level); - while (st->current_address >= st->marker[1].start_address) { + while (addr >= st->marker[1].start_address) { st->marker++; seq_printf(m, "---[ %s ]---\n", st->marker->name); } - st->start_address = st->current_address; - st->current_prot = new_prot; + st->start_address = addr; + st->current_prot = prot; st->level = level; } } -#ifdef CONFIG_KASAN -static void note_kasan_early_shadow_page(struct seq_file *m, - struct pg_state *st) -{ - unsigned int prot; - - prot = pte_val(*kasan_early_shadow_pte) & - (_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC); - note_page(m, st, prot, 4); -} -#endif - -/* - * The actual page table walker functions. In order to keep the - * implementation of print_prot() short, we only check and pass - * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region, - * segment or page table entry is invalid or read-only. - * After all it's just a hint that the current level being walked - * contains an invalid or read-only entry. - */ -static void walk_pte_level(struct seq_file *m, struct pg_state *st, - pmd_t *pmd, unsigned long addr) -{ - unsigned int prot; - pte_t *pte; - int i; - - for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) { - st->current_address = addr; - pte = pte_offset_kernel(pmd, addr); - prot = pte_val(*pte) & - (_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC); - note_page(m, st, prot, 4); - addr += PAGE_SIZE; - } -} - -static void walk_pmd_level(struct seq_file *m, struct pg_state *st, - pud_t *pud, unsigned long addr) -{ - unsigned int prot; - pmd_t *pmd; - int i; - -#ifdef CONFIG_KASAN - if ((pud_val(*pud) & PAGE_MASK) == __pa(kasan_early_shadow_pmd)) { - note_kasan_early_shadow_page(m, st); - return; - } -#endif - - pmd = pmd_offset(pud, addr); - for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++, pmd++) { - st->current_address = addr; - if (!pmd_none(*pmd)) { - if (pmd_large(*pmd)) { - prot = pmd_val(*pmd) & - (_SEGMENT_ENTRY_PROTECT | - _SEGMENT_ENTRY_NOEXEC); - note_page(m, st, prot, 3); - } else - walk_pte_level(m, st, pmd, addr); - } else - note_page(m, st, _PAGE_INVALID, 3); - addr += PMD_SIZE; - } -} - -static void walk_pud_level(struct seq_file *m, struct pg_state *st, - p4d_t *p4d, unsigned long addr) -{ - unsigned int prot; - pud_t *pud; - int i; - -#ifdef CONFIG_KASAN - if ((p4d_val(*p4d) & PAGE_MASK) == __pa(kasan_early_shadow_pud)) { - note_kasan_early_shadow_page(m, st); - return; - } -#endif - - pud = pud_offset(p4d, addr); - for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++, pud++) { - st->current_address = addr; - if (!pud_none(*pud)) - if (pud_large(*pud)) { - prot = pud_val(*pud) & - (_REGION_ENTRY_PROTECT | - _REGION_ENTRY_NOEXEC); - note_page(m, st, prot, 2); - } else - walk_pmd_level(m, st, pud, addr); - else - note_page(m, st, _PAGE_INVALID, 2); - addr += PUD_SIZE; - } -} - -static void walk_p4d_level(struct seq_file *m, struct pg_state *st, - pgd_t *pgd, unsigned long addr) -{ - p4d_t *p4d; - int i; - -#ifdef CONFIG_KASAN - if ((pgd_val(*pgd) & PAGE_MASK) == __pa(kasan_early_shadow_p4d)) { - note_kasan_early_shadow_page(m, st); - return; - } -#endif - - p4d = p4d_offset(pgd, addr); - for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++, p4d++) { - st->current_address = addr; - if (!p4d_none(*p4d)) - walk_pud_level(m, st, p4d, addr); - else - note_page(m, st, _PAGE_INVALID, 2); - addr += P4D_SIZE; - } -} - -static void walk_pgd_level(struct seq_file *m) -{ - unsigned long addr = 0; - struct pg_state st; - pgd_t *pgd; - int i; - - memset(&st, 0, sizeof(st)); - for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) { - st.current_address = addr; - pgd = pgd_offset_k(addr); - if (!pgd_none(*pgd)) - walk_p4d_level(m, &st, pgd, addr); - else - note_page(m, &st, _PAGE_INVALID, 1); - addr += PGDIR_SIZE; - cond_resched(); - } - /* Flush out the last page */ - st.current_address = max_addr; - note_page(m, &st, 0, 0); -} - static int ptdump_show(struct seq_file *m, void *v) { - walk_pgd_level(m); + struct pg_state st = { + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]) { + {.start = 0, .end = max_addr}, + {.start = 0, .end = 0}, + } + }, + .seq = m, + .level = -1, + .current_prot = 0, + .start_address = 0, + .marker = address_markers, + }; + + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); return 0; } - -static int ptdump_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, ptdump_show, NULL); -} - -static const struct file_operations ptdump_fops = { - .open = ptdump_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(ptdump); static int pt_dump_init(void) { -- cgit v1.2.3 From 36c2733c439caa424fe2b7dded870913dcb868ac Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 7 Sep 2020 17:11:36 +0200 Subject: s390/mm,ptdump: hold memory hotplug lock while walking for kernel page table dump This is the s390 variant of commit bf2b59f60ee1 ("arm64/mm: Hold memory hotplug lock while walking for kernel page table dump"). Right now this doesn't fix any real bug, however as soon as kvm patches get merged which make use of memory remove we might end up dereferencing/accessing freed page tables. Therefore fix this potential bug already now. Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/dump_pagetables.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 93a29e2f13d4..3c5e4055a3d2 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -129,7 +129,9 @@ static int ptdump_show(struct seq_file *m, void *v) .marker = address_markers, }; + get_online_mems(); ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + put_online_mems(); return 0; } DEFINE_SHOW_ATTRIBUTE(ptdump); -- cgit v1.2.3 From da1694ad9e8d13484c8b4ecaabde0bd7b958442a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 7 Sep 2020 17:20:05 +0200 Subject: s390/mm,ptdump: hold cpa mutex while walking for kernel page table dump This is currently only preventing that outdated information is provided to user space. A concurrent split of huge/large pages does modify the kernel page tables, however either the huge/large mapping is reported or the split area is being walked. This "fixes" also only a potential future bug, since split pages could also be merged again if page permissions are the same for larger memory areas. Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/set_memory.h | 4 ++++ arch/s390/mm/dump_pagetables.c | 3 +++ arch/s390/mm/pageattr.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h index c59a83536c70..a22a5a81811c 100644 --- a/arch/s390/include/asm/set_memory.h +++ b/arch/s390/include/asm/set_memory.h @@ -2,6 +2,10 @@ #ifndef _ASMS390_SET_MEMORY_H #define _ASMS390_SET_MEMORY_H +#include + +extern struct mutex cpa_mutex; + #define SET_MEMORY_RO 1UL #define SET_MEMORY_RW 2UL #define SET_MEMORY_NX 4UL diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 3c5e4055a3d2..09c7179cb17d 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -130,7 +131,9 @@ static int ptdump_show(struct seq_file *m, void *v) }; get_online_mems(); + mutex_lock(&cpa_mutex); ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + mutex_unlock(&cpa_mutex); put_online_mems(); return 0; } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index c5c52ec2b46f..ed8e5b3575d5 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -278,7 +278,7 @@ static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end, return rc; } -static DEFINE_MUTEX(cpa_mutex); +DEFINE_MUTEX(cpa_mutex); static int change_page_attr(unsigned long addr, unsigned long end, unsigned long flags) -- cgit v1.2.3 From abb95b7550f88bfb77081601f80662a259f2d143 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 17 Aug 2020 10:29:23 +0200 Subject: s390/pci: consolidate SR-IOV specific code currently we have multiple #ifdef CONFIG_PCI_IOV blocks spread over different compliation units and headers, all dealing with SR-IOV specific behavior. This violates the style guide which discourages conditionally compiled code blocks and hinders maintainability by speading SR-IOV functionality over many files. Let's move all of this into a conditionally compiled pci_iov.c file and local header and prefix SR-IOV specific functions with zpci_iov_*. Reviewed-by: Matthew Rosato Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/pci/Makefile | 1 + arch/s390/pci/pci.c | 23 ++---------- arch/s390/pci/pci_bus.c | 66 +-------------------------------- arch/s390/pci/pci_bus.h | 12 ------ arch/s390/pci/pci_iov.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++ arch/s390/pci/pci_iov.h | 30 +++++++++++++++ 6 files changed, 133 insertions(+), 96 deletions(-) create mode 100644 arch/s390/pci/pci_iov.c create mode 100644 arch/s390/pci/pci_iov.h (limited to 'arch') diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index b4e3c84772a1..bf557a1b789c 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ pci_bus.o +obj-$(CONFIG_PCI_IOV) += pci_iov.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index fdbb99c4569d..e432318f6937 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -37,6 +37,7 @@ #include #include "pci_bus.h" +#include "pci_iov.h" /* list of all detected zpci devices */ static LIST_HEAD(zpci_list); @@ -413,15 +414,6 @@ static struct pci_ops pci_root_ops = { .write = pci_write, }; -#ifdef CONFIG_PCI_IOV -static struct resource iov_res = { - .name = "PCI IOV res", - .start = 0, - .end = -1, - .flags = IORESOURCE_MEM, -}; -#endif - static void zpci_map_resources(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); @@ -442,16 +434,7 @@ static void zpci_map_resources(struct pci_dev *pdev) pdev->resource[i].end = pdev->resource[i].start + len - 1; } -#ifdef CONFIG_PCI_IOV - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - int bar = i + PCI_IOV_RESOURCES; - - len = pci_resource_len(pdev, bar); - if (!len) - continue; - pdev->resource[bar].parent = &iov_res; - } -#endif + zpci_iov_map_resources(pdev); } static void zpci_unmap_resources(struct pci_dev *pdev) @@ -703,7 +686,7 @@ void zpci_remove_device(struct zpci_dev *zdev) pdev = pci_get_slot(zbus->bus, zdev->devfn); if (pdev) { if (pdev->is_virtfn) - return zpci_remove_virtfn(pdev, zdev->vfn); + return zpci_iov_remove_virtfn(pdev, zdev->vfn); pci_stop_and_remove_bus_device_locked(pdev); } } diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 5967f3014156..0c0db7c3a404 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -24,6 +24,7 @@ #include #include "pci_bus.h" +#include "pci_iov.h" static LIST_HEAD(zbus_list); static DEFINE_SPINLOCK(zbus_list_lock); @@ -126,69 +127,6 @@ static struct zpci_bus *zpci_bus_alloc(int pchid) return zbus; } -#ifdef CONFIG_PCI_IOV -static int zpci_bus_link_virtfn(struct pci_dev *pdev, - struct pci_dev *virtfn, int vfid) -{ - int rc; - - rc = pci_iov_sysfs_link(pdev, virtfn, vfid); - if (rc) - return rc; - - virtfn->is_virtfn = 1; - virtfn->multifunction = 0; - virtfn->physfn = pci_dev_get(pdev); - - return 0; -} - -static int zpci_bus_setup_virtfn(struct zpci_bus *zbus, - struct pci_dev *virtfn, int vfn) -{ - int i, cand_devfn; - struct zpci_dev *zdev; - struct pci_dev *pdev; - int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/ - int rc = 0; - - if (!zbus->multifunction) - return 0; - - /* If the parent PF for the given VF is also configured in the - * instance, it must be on the same zbus. - * We can then identify the parent PF by checking what - * devfn the VF would have if it belonged to that PF using the PF's - * stride and offset. Only if this candidate devfn matches the - * actual devfn will we link both functions. - */ - for (i = 0; i < ZPCI_FUNCTIONS_PER_BUS; i++) { - zdev = zbus->function[i]; - if (zdev && zdev->is_physfn) { - pdev = pci_get_slot(zbus->bus, zdev->devfn); - if (!pdev) - continue; - cand_devfn = pci_iov_virtfn_devfn(pdev, vfid); - if (cand_devfn == virtfn->devfn) { - rc = zpci_bus_link_virtfn(pdev, virtfn, vfid); - /* balance pci_get_slot() */ - pci_dev_put(pdev); - break; - } - /* balance pci_get_slot() */ - pci_dev_put(pdev); - } - } - return rc; -} -#else -static inline int zpci_bus_setup_virtfn(struct zpci_bus *zbus, - struct pci_dev *virtfn, int vfn) -{ - return 0; -} -#endif - void pcibios_bus_add_device(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); @@ -198,7 +136,7 @@ void pcibios_bus_add_device(struct pci_dev *pdev) * perform PF/VF linking. */ if (zdev->vfn) - zpci_bus_setup_virtfn(zdev->zbus, pdev, zdev->vfn); + zpci_iov_setup_virtfn(zdev->zbus, pdev, zdev->vfn); } diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index 4972433df458..8d19723ed5c0 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -30,15 +30,3 @@ static inline struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus, return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn]; } -#ifdef CONFIG_PCI_IOV -static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn) -{ - - pci_lock_rescan_remove(); - /* Linux' vfid's start at 0 vfn at 1 */ - pci_iov_remove_virtfn(pdev->physfn, vfn - 1); - pci_unlock_rescan_remove(); -} -#else /* CONFIG_PCI_IOV */ -static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn) {} -#endif /* CONFIG_PCI_IOV */ diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c new file mode 100644 index 000000000000..35fca14ebb35 --- /dev/null +++ b/arch/s390/pci/pci_iov.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2020 + * + * Author(s): + * Niklas Schnelle + * + */ + +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include + +static struct resource iov_res = { + .name = "PCI IOV res", + .start = 0, + .end = -1, + .flags = IORESOURCE_MEM, +}; + +void zpci_iov_map_resources(struct pci_dev *pdev) +{ + resource_size_t len; + int i; + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + int bar = i + PCI_IOV_RESOURCES; + + len = pci_resource_len(pdev, bar); + if (!len) + continue; + pdev->resource[bar].parent = &iov_res; + } +} + +void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) +{ + pci_lock_rescan_remove(); + /* Linux' vfid's start at 0 vfn at 1 */ + pci_iov_remove_virtfn(pdev->physfn, vfn - 1); + pci_unlock_rescan_remove(); +} + +static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, int vfid) +{ + int rc; + + rc = pci_iov_sysfs_link(pdev, virtfn, vfid); + if (rc) + return rc; + + virtfn->is_virtfn = 1; + virtfn->multifunction = 0; + virtfn->physfn = pci_dev_get(pdev); + + return 0; +} + +int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) +{ + int i, cand_devfn; + struct zpci_dev *zdev; + struct pci_dev *pdev; + int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/ + int rc = 0; + + if (!zbus->multifunction) + return 0; + + /* If the parent PF for the given VF is also configured in the + * instance, it must be on the same zbus. + * We can then identify the parent PF by checking what + * devfn the VF would have if it belonged to that PF using the PF's + * stride and offset. Only if this candidate devfn matches the + * actual devfn will we link both functions. + */ + for (i = 0; i < ZPCI_FUNCTIONS_PER_BUS; i++) { + zdev = zbus->function[i]; + if (zdev && zdev->is_physfn) { + pdev = pci_get_slot(zbus->bus, zdev->devfn); + if (!pdev) + continue; + cand_devfn = pci_iov_virtfn_devfn(pdev, vfid); + if (cand_devfn == virtfn->devfn) { + rc = zpci_iov_link_virtfn(pdev, virtfn, vfid); + /* balance pci_get_slot() */ + pci_dev_put(pdev); + break; + } + /* balance pci_get_slot() */ + pci_dev_put(pdev); + } + } + return rc; +} diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h new file mode 100644 index 000000000000..b2c828003bad --- /dev/null +++ b/arch/s390/pci/pci_iov.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2020 + * + * Author(s): + * Niklas Schnelle + * + */ + +#ifndef __S390_PCI_IOV_H +#define __S390_PCI_IOV_H + +#ifdef CONFIG_PCI_IOV +void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn); + +void zpci_iov_map_resources(struct pci_dev *pdev); + +int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn); + +#else /* CONFIG_PCI_IOV */ +static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {} + +static inline void zpci_iov_map_resources(struct pci_dev *pdev) {} + +static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) +{ + return 0; +} +#endif /* CONFIG_PCI_IOV */ +#endif /* __S390_PCI_IOV_h */ -- cgit v1.2.3 From 2bce60b5032fe3ca225a36b80f6df90938f0ead6 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 21 Aug 2020 10:26:40 +0200 Subject: s390/pci: remove unused function zpci_rescan() the only caller of this was removed as part of the suspend/resume removal so no need to keep this function around. Reviewed-by: Matthew Rosato Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pci.h | 2 -- arch/s390/pci/pci.c | 6 ------ 2 files changed, 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 8015b9301533..140cb6bc8e1c 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -233,12 +233,10 @@ static inline bool zpci_use_mio(struct zpci_dev *zdev) /* Error handling and recovery */ void zpci_event_error(void *); void zpci_event_availability(void *); -void zpci_rescan(void); bool zpci_is_enabled(void); #else /* CONFIG_PCI */ static inline void zpci_event_error(void *e) {} static inline void zpci_event_availability(void *e) {} -static inline void zpci_rescan(void) {} #endif /* CONFIG_PCI */ #ifdef CONFIG_HOTPLUG_PCI_S390 diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index e432318f6937..0ff48c51cd0e 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -890,9 +890,3 @@ out: return rc; } subsys_initcall_sync(pci_base_init); - -void zpci_rescan(void) -{ - if (zpci_is_enabled()) - clp_rescan_pci_devices_simple(NULL); -} -- cgit v1.2.3 From 809fcfaf9238052f03e8002f6dad156a7fb6cd5e Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 21 Aug 2020 11:01:54 +0200 Subject: s390/pci: remove clp_rescan_pci_devices() there is only one call site of clp_rescan_pci_devices() and all the function does is call zpci_remove_reserved_devices() followed by a duplicating clp_scan_pci_devices(). So inline the single call as a call to zpci_remove_reserved_devices() and clp_scan_pci_devices() and remove the function. Reviewed-by: Matthew Rosato Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pci.h | 1 - arch/s390/pci/pci_clp.c | 17 ----------------- arch/s390/pci/pci_event.c | 3 ++- 3 files changed, 2 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 140cb6bc8e1c..430f694163c1 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -210,7 +210,6 @@ void zpci_remove_reserved_devices(void); /* CLP */ int clp_setup_writeback_mio(void); int clp_scan_pci_devices(void); -int clp_rescan_pci_devices(void); int clp_rescan_pci_devices_simple(u32 *fid); int clp_add_pci_device(u32, u32, int); int clp_enable_fh(struct zpci_dev *, u8); diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 51807945ca00..6aabfb131c1e 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -441,23 +441,6 @@ int clp_scan_pci_devices(void) return rc; } -int clp_rescan_pci_devices(void) -{ - struct clp_req_rsp_list_pci *rrb; - int rc; - - zpci_remove_reserved_devices(); - - rrb = clp_alloc_block(GFP_KERNEL); - if (!rrb) - return -ENOMEM; - - rc = clp_list_pci(rrb, NULL, __clp_add); - - clp_free_block(rrb); - return rc; -} - /* Rescan PCI functions and refresh function handles. If fid is non-NULL only * refresh the handle of the function matching @fid */ diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 9a3a291cad43..e67cc2f6e169 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -150,7 +150,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) } break; case 0x0306: /* 0x308 or 0x302 for multiple devices */ - clp_rescan_pci_devices(); + zpci_remove_reserved_devices(); + clp_scan_pci_devices(); break; case 0x0308: /* Standby -> Reserved */ if (!zdev) -- cgit v1.2.3 From c3b2c9064e76fa3952c99982f11a98e0327f3dbe Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 21 Aug 2020 11:16:48 +0200 Subject: s390/pci: remove clp_rescan_pci_devices_simple() clp_rescan_pci_devices_simple() is neither simpler than clp_scan_pci_devices() nor does it really scan PCI devices, in particular it will neither add newly discovered devices nor remove those which disappeared. Instead it only refreshes PCI function handles and also has just a single callsite in the same translation unit left which in fact only refreshes one specific function handle identified by a FID. Clarify this by renaming the function and its helper to clp_refresh_fh() respectvely __clp_refresh_fh() and make it take a fid directly which saves us dealing with the NULL case which updated all function handles but is not used anymore. Furthermore since the only callsite is in the same translation unit make it static. Reviewed-by: Matthew Rosato Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pci.h | 1 - arch/s390/pci/pci_clp.c | 44 +++++++++++++++++++++----------------------- 2 files changed, 21 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 430f694163c1..178a24e0af5f 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -210,7 +210,6 @@ void zpci_remove_reserved_devices(void); /* CLP */ int clp_setup_writeback_mio(void); int clp_scan_pci_devices(void); -int clp_rescan_pci_devices_simple(u32 *fid); int clp_add_pci_device(u32, u32, int); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 6aabfb131c1e..5a34a1359dc5 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -244,6 +244,7 @@ error: return rc; } +static int clp_refresh_fh(u32 fid); /* * Enable/Disable a given PCI function and update its function handle if * necessary @@ -286,7 +287,7 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command) } else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY && rrb->response.fh == 0) { /* Function is already in desired state - update handle */ - rc = clp_rescan_pci_devices_simple(&fid); + rc = clp_refresh_fh(fid); } clp_free_block(rrb); return rc; @@ -408,24 +409,6 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) clp_add_pci_device(entry->fid, entry->fh, entry->config_state); } -static void __clp_update(struct clp_fh_list_entry *entry, void *data) -{ - struct zpci_dev *zdev; - u32 *fid = data; - - if (!entry->vendor_id) - return; - - if (fid && *fid != entry->fid) - return; - - zdev = get_zdev_by_fid(entry->fid); - if (!zdev) - return; - - zdev->fh = entry->fh; -} - int clp_scan_pci_devices(void) { struct clp_req_rsp_list_pci *rrb; @@ -441,10 +424,25 @@ int clp_scan_pci_devices(void) return rc; } -/* Rescan PCI functions and refresh function handles. If fid is non-NULL only - * refresh the handle of the function matching @fid +static void __clp_refresh_fh(struct clp_fh_list_entry *entry, void *data) +{ + struct zpci_dev *zdev; + u32 fid = *((u32 *)data); + + if (!entry->vendor_id || fid != entry->fid) + return; + + zdev = get_zdev_by_fid(fid); + if (!zdev) + return; + + zdev->fh = entry->fh; +} + +/* + * Refresh the function handle of the function matching @fid */ -int clp_rescan_pci_devices_simple(u32 *fid) +static int clp_refresh_fh(u32 fid) { struct clp_req_rsp_list_pci *rrb; int rc; @@ -453,7 +451,7 @@ int clp_rescan_pci_devices_simple(u32 *fid) if (!rrb) return -ENOMEM; - rc = clp_list_pci(rrb, fid, __clp_update); + rc = clp_list_pci(rrb, &fid, __clp_refresh_fh); clp_free_block(rrb); return rc; -- cgit v1.2.3 From 6c6687a444cfa62548e080a52e6c2d5d41577a73 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 10 Sep 2020 16:48:35 +0200 Subject: s390/kprobes: make insn pages read-only Make sure that kprobe insn pages are not writable anymore. Tested-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/kprobes.c | 57 +++++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index d2a71d872638..b34fa4eef742 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -7,6 +7,7 @@ * s390 port, used ppc64 as template. Mike Grundy */ +#include #include #include #include @@ -32,17 +33,33 @@ DEFINE_INSN_CACHE_OPS(s390_insn); static int insn_page_in_use; static char insn_page[PAGE_SIZE] __aligned(PAGE_SIZE); +void *alloc_insn_page(void) +{ + void *page; + + page = module_alloc(PAGE_SIZE); + if (!page) + return NULL; + __set_memory((unsigned long) page, 1, SET_MEMORY_RO | SET_MEMORY_X); + return page; +} + +void free_insn_page(void *page) +{ + module_memfree(page); +} + static void *alloc_s390_insn_page(void) { if (xchg(&insn_page_in_use, 1) == 1) return NULL; - set_memory_x((unsigned long) &insn_page, 1); + __set_memory((unsigned long) &insn_page, 1, SET_MEMORY_RO | SET_MEMORY_X); return &insn_page; } static void free_s390_insn_page(void *page) { - set_memory_nx((unsigned long) page, 1); + __set_memory((unsigned long) page, 1, SET_MEMORY_RW | SET_MEMORY_NX); xchg(&insn_page_in_use, 0); } @@ -56,25 +73,29 @@ struct kprobe_insn_cache kprobe_s390_insn_slots = { static void copy_instruction(struct kprobe *p) { + kprobe_opcode_t insn[MAX_INSN_SIZE]; s64 disp, new_disp; u64 addr, new_addr; + unsigned int len; - memcpy(p->ainsn.insn, p->addr, insn_length(*p->addr >> 8)); - p->opcode = p->ainsn.insn[0]; - if (!probe_is_insn_relative_long(p->ainsn.insn)) - return; - /* - * For pc-relative instructions in RIL-b or RIL-c format patch the - * RI2 displacement field. We have already made sure that the insn - * slot for the patched instruction is within the same 2GB area - * as the original instruction (either kernel image or module area). - * Therefore the new displacement will always fit. - */ - disp = *(s32 *)&p->ainsn.insn[1]; - addr = (u64)(unsigned long)p->addr; - new_addr = (u64)(unsigned long)p->ainsn.insn; - new_disp = ((addr + (disp * 2)) - new_addr) / 2; - *(s32 *)&p->ainsn.insn[1] = new_disp; + len = insn_length(*p->addr >> 8); + memcpy(&insn, p->addr, len); + p->opcode = insn[0]; + if (probe_is_insn_relative_long(&insn[0])) { + /* + * For pc-relative instructions in RIL-b or RIL-c format patch + * the RI2 displacement field. We have already made sure that + * the insn slot for the patched instruction is within the same + * 2GB area as the original instruction (either kernel image or + * module area). Therefore the new displacement will always fit. + */ + disp = *(s32 *)&insn[1]; + addr = (u64)(unsigned long)p->addr; + new_addr = (u64)(unsigned long)p->ainsn.insn; + new_disp = ((addr + (disp * 2)) - new_addr) / 2; + *(s32 *)&insn[1] = new_disp; + } + s390_kernel_write(p->ainsn.insn, &insn, len); } NOKPROBE_SYMBOL(copy_instruction); -- cgit v1.2.3 From 6bf9a639e76e1da8eb1ed29e037e900106e1dff4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 9 Sep 2020 11:14:52 +0200 Subject: s390/mm,ptdump: make page table dumping seq_file optional s390 version of ae5d1cf358a5 ("arm64: dump: Make the page table dumping seq_file optional"). Tested-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/dump_pagetables.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 09c7179cb17d..b7401a2f93f3 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -51,18 +51,34 @@ struct pg_state { const struct addr_marker *marker; }; +#define pt_dump_seq_printf(m, fmt, args...) \ +({ \ + struct seq_file *__m = (m); \ + \ + if (__m) \ + seq_printf(__m, fmt, ##args); \ +}) + +#define pt_dump_seq_puts(m, fmt) \ +({ \ + struct seq_file *__m = (m); \ + \ + if (__m) \ + seq_printf(__m, fmt); \ +}) + static void print_prot(struct seq_file *m, unsigned int pr, int level) { static const char * const level_name[] = { "ASCE", "PGD", "PUD", "PMD", "PTE" }; - seq_printf(m, "%s ", level_name[level]); + pt_dump_seq_printf(m, "%s ", level_name[level]); if (pr & _PAGE_INVALID) { - seq_printf(m, "I\n"); + pt_dump_seq_printf(m, "I\n"); return; } - seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW "); - seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n"); + pt_dump_seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW "); + pt_dump_seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n"); } static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val) @@ -87,25 +103,25 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, if (level == -1) addr = max_addr; if (st->level == -1) { - seq_printf(m, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); st->start_address = addr; st->current_prot = prot; st->level = level; } else if (prot != st->current_prot || level != st->level || addr >= st->marker[1].start_address) { - seq_printf(m, "0x%0*lx-0x%0*lx ", - width, st->start_address, - width, addr); + pt_dump_seq_printf(m, "0x%0*lx-0x%0*lx ", + width, st->start_address, + width, addr); delta = (addr - st->start_address) >> 10; while (!(delta & 0x3ff) && unit[1]) { delta >>= 10; unit++; } - seq_printf(m, "%9lu%c ", delta, *unit); + pt_dump_seq_printf(m, "%9lu%c ", delta, *unit); print_prot(m, st->current_prot, st->level); while (addr >= st->marker[1].start_address) { st->marker++; - seq_printf(m, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); } st->start_address = addr; st->current_prot = prot; -- cgit v1.2.3 From 08c8e685c7c9223f9c4ad6365e02bebd3f106480 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 9 Sep 2020 17:10:29 +0200 Subject: s390: add ARCH_HAS_DEBUG_WX support Checks the whole kernel address space for W+X mappings. Note that currently the first lowcore page unfortunately has to be mapped W+X. Therefore this not reported as an insecure mapping. For the very same reason the wording is also different to other architectures if the test passes: On s390 it is "no unexpected W+X pages found" instead of "no W+X pages found". Tested-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + arch/s390/configs/debug_defconfig | 1 + arch/s390/configs/defconfig | 1 + arch/s390/include/asm/ptdump.h | 14 +++++++++ arch/s390/mm/Makefile | 2 +- arch/s390/mm/dump_pagetables.c | 64 ++++++++++++++++++++++++++++++++++++++- arch/s390/mm/init.c | 2 ++ 7 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 arch/s390/include/asm/ptdump.h (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 85bf121211d1..2052b39b8459 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -60,6 +60,7 @@ config S390 def_bool y select ARCH_BINFMT_ELF_STATE select ARCH_HAS_DEBUG_VM_PGTABLE + select ARCH_HAS_DEBUG_WX select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index f79eafb597cb..901723e4ed63 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -774,6 +774,7 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y CONFIG_PAGE_OWNER=y CONFIG_DEBUG_RODATA_TEST=y +CONFIG_DEBUG_WX=y CONFIG_PTDUMP_DEBUGFS=y CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_SELFTEST=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 9593cc8a9efd..87da48c85130 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -758,6 +758,7 @@ CONFIG_GDB_SCRIPTS=y CONFIG_FRAME_WARN=1024 CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_WX=y CONFIG_PTDUMP_DEBUGFS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_PANIC_ON_OOPS=y diff --git a/arch/s390/include/asm/ptdump.h b/arch/s390/include/asm/ptdump.h new file mode 100644 index 000000000000..f960b2896606 --- /dev/null +++ b/arch/s390/include/asm/ptdump.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_S390_PTDUMP_H +#define _ASM_S390_PTDUMP_H + +void ptdump_check_wx(void); + +static inline void debug_checkwx(void) +{ + if (IS_ENABLED(CONFIG_DEBUG_WX)) + ptdump_check_wx(); +} + +#endif /* _ASM_S390_PTDUMP_H */ diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 8ab9daeeace3..cd67e94c16aa 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -8,7 +8,7 @@ obj-y += page-states.o pageattr.o pgtable.o pgalloc.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_DEBUGFS) += dump_pagetables.o +obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o KASAN_SANITIZE_kasan_init.o := n diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index b7401a2f93f3..4b27c1a533de 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -47,6 +48,8 @@ struct pg_state { struct seq_file *seq; int level; unsigned int current_prot; + bool check_wx; + unsigned long wx_pages; unsigned long start_address; const struct addr_marker *marker; }; @@ -81,6 +84,26 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level) pt_dump_seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n"); } +static void note_prot_wx(struct pg_state *st, unsigned long addr) +{ +#ifdef CONFIG_DEBUG_WX + if (!st->check_wx) + return; + if (st->current_prot & _PAGE_INVALID) + return; + if (st->current_prot & _PAGE_PROTECT) + return; + if (st->current_prot & _PAGE_NOEXEC) + return; + /* The first lowcore page is currently still W+X. */ + if (addr == PAGE_SIZE) + return; + WARN_ONCE(1, "s390/mm: Found insecure W+X mapping at address %pS\n", + (void *)st->start_address); + st->wx_pages += (addr - st->start_address) / PAGE_SIZE; +#endif /* CONFIG_DEBUG_WX */ +} + static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val) { int width = sizeof(unsigned long) * 2; @@ -109,6 +132,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, st->level = level; } else if (prot != st->current_prot || level != st->level || addr >= st->marker[1].start_address) { + note_prot_wx(st, addr); pt_dump_seq_printf(m, "0x%0*lx-0x%0*lx ", width, st->start_address, width, addr); @@ -129,6 +153,40 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, } } +#ifdef CONFIG_DEBUG_WX +void ptdump_check_wx(void) +{ + struct pg_state st = { + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]) { + {.start = 0, .end = max_addr}, + {.start = 0, .end = 0}, + } + }, + .seq = NULL, + .level = -1, + .current_prot = 0, + .check_wx = true, + .wx_pages = 0, + .start_address = 0, + .marker = (struct addr_marker[]) { + { .start_address = 0, .name = NULL}, + { .start_address = -1, .name = NULL}, + }, + }; + + if (!MACHINE_HAS_NX) + return; + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + if (st.wx_pages) + pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages); + else + pr_info("Checked W+X mappings: passed, no unexpected W+X pages found\n"); +} +#endif /* CONFIG_DEBUG_WX */ + +#ifdef CONFIG_PTDUMP_DEBUGFS static int ptdump_show(struct seq_file *m, void *v) { struct pg_state st = { @@ -142,6 +200,8 @@ static int ptdump_show(struct seq_file *m, void *v) .seq = m, .level = -1, .current_prot = 0, + .check_wx = false, + .wx_pages = 0, .start_address = 0, .marker = address_markers, }; @@ -154,6 +214,7 @@ static int ptdump_show(struct seq_file *m, void *v) return 0; } DEFINE_SHOW_ATTRIBUTE(ptdump); +#endif /* CONFIG_PTDUMP_DEBUGFS */ static int pt_dump_init(void) { @@ -167,7 +228,8 @@ static int pt_dump_init(void) address_markers[MODULES_NR].start_address = MODULES_VADDR; address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; address_markers[VMALLOC_NR].start_address = VMALLOC_START; - debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); + if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS)) + debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); return 0; } device_initcall(pt_dump_init); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 0d282081dc1f..d3ddb4361361 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +130,7 @@ void mark_rodata_ro(void) set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT); pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); + debug_checkwx(); } int set_memory_encrypted(unsigned long addr, int numpages) -- cgit v1.2.3 From d411e3c6744925f08a2c8682559ce77090144fa2 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 10 Sep 2020 22:25:13 +0200 Subject: s390/kasan: make shadow memory noexec ARCH_HAS_DEBUG_WX feature support brought attention to the fact that currently initial kasan shadow memory mapped without noexec flag. So fix that. Temporary initial identity mapping is still created without noexec, but it is replaced by properly set up paging later. Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/kasan_init.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 99dd1c63a065..1a27a7143349 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -99,8 +99,12 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, pgt_prot_zero = pgprot_val(PAGE_KERNEL_RO); if (!has_nx) pgt_prot_zero &= ~_PAGE_NOEXEC; - pgt_prot = pgprot_val(PAGE_KERNEL_EXEC); - sgt_prot = pgprot_val(SEGMENT_KERNEL_EXEC); + pgt_prot = pgprot_val(PAGE_KERNEL); + sgt_prot = pgprot_val(SEGMENT_KERNEL); + if (!has_nx || mode == POPULATE_ONE2ONE) { + pgt_prot &= ~_PAGE_NOEXEC; + sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; + } while (address < end) { pg_dir = pgd_offset_k(address); -- cgit v1.2.3 From e670e64af1d46571be3d9b1177024d9c8520087c Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 11 Sep 2020 12:51:59 +0200 Subject: s390/mm,ptdump: add couple of additional markers Signed-off-by: Vasily Gorbik [hca@linux.ibm.com: add more markers, rename some markers] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pgtable.h | 1 + arch/s390/kernel/setup.c | 2 ++ arch/s390/mm/dump_pagetables.c | 26 +++++++++++++++++++++----- 3 files changed, 24 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index e9244b9fb504..d87b83a77aff 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -89,6 +89,7 @@ extern unsigned long VMALLOC_START; extern unsigned long VMALLOC_END; #define VMALLOC_DEFAULT_SIZE ((128UL << 30) - MODULES_LEN) extern struct page *vmemmap; +extern unsigned long vmemmap_size; #define VMEM_MAX_PHYS ((unsigned long) vmemmap) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index efd12221ecb4..f04252cb6004 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -119,6 +119,7 @@ EXPORT_SYMBOL(VMALLOC_END); struct page *vmemmap; EXPORT_SYMBOL(vmemmap); +unsigned long vmemmap_size; unsigned long MODULES_VADDR; unsigned long MODULES_END; @@ -589,6 +590,7 @@ static void __init setup_memory_end(void) memory_end = min(memory_end, KASAN_SHADOW_START); vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END); #endif + vmemmap_size = SECTION_ALIGN_UP(memory_end / PAGE_SIZE) * sizeof(struct page); max_pfn = max_low_pfn = PFN_DOWN(memory_end); memblock_remove(memory_end, ULONG_MAX); diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 4b27c1a533de..052223c92fb1 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -17,29 +17,41 @@ struct addr_marker { }; enum address_markers_idx { - IDENTITY_NR = 0, + IDENTITY_BEFORE_NR = 0, + IDENTITY_BEFORE_END_NR, KERNEL_START_NR, KERNEL_END_NR, + IDENTITY_AFTER_NR, + IDENTITY_AFTER_END_NR, #ifdef CONFIG_KASAN KASAN_SHADOW_START_NR, KASAN_SHADOW_END_NR, #endif VMEMMAP_NR, + VMEMMAP_END_NR, VMALLOC_NR, + VMALLOC_END_NR, MODULES_NR, + MODULES_END_NR, }; static struct addr_marker address_markers[] = { - [IDENTITY_NR] = {0, "Identity Mapping"}, + [IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"}, + [IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"}, [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"}, [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"}, + [IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"}, + [IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"}, #ifdef CONFIG_KASAN [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"}, [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"}, #endif - [VMEMMAP_NR] = {0, "vmemmap Area"}, - [VMALLOC_NR] = {0, "vmalloc Area"}, - [MODULES_NR] = {0, "Modules Area"}, + [VMEMMAP_NR] = {0, "vmemmap Area Start"}, + [VMEMMAP_END_NR] = {0, "vmemmap Area End"}, + [VMALLOC_NR] = {0, "vmalloc Area Start"}, + [VMALLOC_END_NR] = {0, "vmalloc Area End"}, + [MODULES_NR] = {0, "Modules Area Start"}, + [MODULES_END_NR] = {0, "Modules Area End"}, { -1, NULL } }; @@ -225,9 +237,13 @@ static int pt_dump_init(void) */ max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); + address_markers[IDENTITY_AFTER_END_NR].start_address = memory_end; address_markers[MODULES_NR].start_address = MODULES_VADDR; + address_markers[MODULES_END_NR].start_address = MODULES_END; address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; + address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size; address_markers[VMALLOC_NR].start_address = VMALLOC_START; + address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS)) debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); return 0; -- cgit v1.2.3 From 1a80b54d1ce1556bba6a8d8cd9384d6a9dcb641a Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 7 Sep 2020 08:46:59 -0400 Subject: s390/uv: add destroy page call We don't need to export pages if we destroy the VM configuration afterwards anyway. Instead we can destroy the page which will zero it and then make it accessible to the host. Destroying is about twice as fast as the export. Signed-off-by: Janosch Frank Reviewed-by: Claudio Imbrenda Reviewed-by: Thomas Huth Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/kvm/20200907124700.10374-2-frankja@linux.ibm.com/ Signed-off-by: Janosch Frank Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/uv.h | 7 +++++++ arch/s390/kernel/uv.c | 20 ++++++++++++++++++++ arch/s390/mm/gmap.c | 2 +- 3 files changed, 28 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index cff4b4c99b75..0325fc0469b7 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -33,6 +33,7 @@ #define UVC_CMD_DESTROY_SEC_CPU 0x0121 #define UVC_CMD_CONV_TO_SEC_STOR 0x0200 #define UVC_CMD_CONV_FROM_SEC_STOR 0x0201 +#define UVC_CMD_DESTR_SEC_STOR 0x0202 #define UVC_CMD_SET_SEC_CONF_PARAMS 0x0300 #define UVC_CMD_UNPACK_IMG 0x0301 #define UVC_CMD_VERIFY_IMG 0x0302 @@ -344,6 +345,7 @@ static inline int is_prot_virt_host(void) } int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); +int uv_destroy_page(unsigned long paddr); int uv_convert_from_secure(unsigned long paddr); int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); @@ -354,6 +356,11 @@ void adjust_to_uv_max(unsigned long *vmax); static inline void setup_uv(void) {} static inline void adjust_to_uv_max(unsigned long *vmax) {} +static inline int uv_destroy_page(unsigned long paddr) +{ + return 0; +} + static inline int uv_convert_from_secure(unsigned long paddr) { return 0; diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index c296e5c8dbf9..d3399b8a9b23 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -118,6 +118,26 @@ static int uv_pin_shared(unsigned long paddr) return 0; } +/* + * Requests the Ultravisor to destroy a guest page and make it + * accessible to the host. The destroy clears the page instead of + * exporting. + * + * @paddr: Absolute host address of page to be destroyed + */ +int uv_destroy_page(unsigned long paddr) +{ + struct uv_cb_cfs uvcb = { + .header.cmd = UVC_CMD_DESTR_SEC_STOR, + .header.len = sizeof(uvcb), + .paddr = paddr + }; + + if (uv_call(0, (u64)&uvcb)) + return -EINVAL; + return 0; +} + /* * Requests the Ultravisor to encrypt a guest page and make it * accessible to the host for paging (export). diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 373542ca1113..cfb0017f33a7 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2679,7 +2679,7 @@ static int __s390_reset_acc(pte_t *ptep, unsigned long addr, pte_t pte = READ_ONCE(*ptep); if (pte_present(pte)) - WARN_ON_ONCE(uv_convert_from_secure(pte_val(pte) & PAGE_MASK)); + WARN_ON_ONCE(uv_destroy_page(pte_val(pte) & PAGE_MASK)); return 0; } -- cgit v1.2.3 From 980d5f9ab36b6cfe473a8371a7e11bd168c9e630 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Wed, 2 Sep 2020 16:52:06 +0200 Subject: s390/boot: enable .bss section for compressed kernel - Support static uninitialized variables in compressed kernel. - Remove chkbss script - Get rid of workarounds for not having .bss section Signed-off-by: Alexander Egorenkov Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/boot/Makefile | 4 ---- arch/s390/boot/compressed/Makefile | 4 ---- arch/s390/boot/compressed/decompressor.c | 1 - arch/s390/boot/compressed/vmlinux.lds.S | 22 +++++++++++++--------- arch/s390/boot/head.S | 6 ++++++ arch/s390/boot/ipl_parm.c | 4 ++-- arch/s390/boot/startup.c | 3 +++ arch/s390/kernel/setup.c | 2 +- arch/s390/scripts/Makefile.chkbss | 20 -------------------- 9 files changed, 25 insertions(+), 41 deletions(-) delete mode 100644 arch/s390/scripts/Makefile.chkbss (limited to 'arch') diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 45b33b83de08..41a64b8dce25 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -73,7 +73,3 @@ $(obj)/startup.a: $(OBJECTS) FORCE install: sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ System.map "$(INSTALL_PATH)" - -chkbss := $(obj-y) -chkbss-target := startup.a -include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index fa529c5b4486..b235ed95a3d8 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -62,7 +62,3 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed $(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE $(call if_changed,objcopy) - -chkbss := $(filter-out piggy.o info.o, $(obj-y)) -chkbss-target := vmlinux.bin -include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/compressed/decompressor.c index 368fd372c875..3061b11c4d27 100644 --- a/arch/s390/boot/compressed/decompressor.c +++ b/arch/s390/boot/compressed/decompressor.c @@ -16,7 +16,6 @@ * gzip declarations */ #define STATIC static -#define STATIC_RW_DATA static __section(.data) #undef memset #undef memcpy diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index 44561b2c3712..9427e2cd0c15 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -58,6 +58,19 @@ SECTIONS BOOT_DATA BOOT_DATA_PRESERVED + /* + * This is the BSS section of the decompressor and not of the decompressed Linux kernel. + * It will consume place in the decompressor's image. + */ + . = ALIGN(8); + .bss : { + _bss = . ; + *(.bss) + *(.bss.*) + *(COMMON) + _ebss = .; + } + /* * uncompressed image info used by the decompressor it should match * struct vmlinux_info. It comes from .vmlinux.info section of @@ -81,15 +94,6 @@ SECTIONS FILL(0xff); . = ALIGN(4096); } - . = ALIGN(256); - .bss : { - _bss = . ; - *(.bss) - *(.bss.*) - *(COMMON) - . = ALIGN(8); /* For convenience during zeroing */ - _ebss = .; - } _end = .; /* Sections to be discarded */ diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index dae10961d072..fd78755d996d 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -313,6 +313,12 @@ ENTRY(startup_kdump) spt 6f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) l %r15,.Lstack-.LPG0(%r13) + // Clear decompressor's BSS section + larl %r2,_bss + slgr %r3,%r3 + larl %r4,_ebss + slgr %r4,%r2 + brasl %r14,memset brasl %r14,verify_facilities brasl %r14,startup_kernel diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 8e222a666025..ae230ebd6420 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -21,7 +21,7 @@ unsigned long __bootdata(memory_end); int __bootdata(memory_end_set); int __bootdata(noexec_disabled); -int kaslr_enabled __section(.data); +int kaslr_enabled; static inline int __diag308(unsigned long subcode, void *addr) { @@ -209,7 +209,7 @@ static void modify_fac_list(char *str) check_cleared_facilities(); } -static char command_line_buf[COMMAND_LINE_SIZE] __section(.data); +static char command_line_buf[COMMAND_LINE_SIZE]; void parse_boot_command_line(void) { char *param, *val; diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 3b3a11f95269..81835483169b 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -120,6 +120,9 @@ static void handle_relocs(unsigned long offset) } } +/* + * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's. + */ static void clear_bss_section(void) { memset((void *)vmlinux.default_lma + vmlinux.image_size, 0, vmlinux.bss_size); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f04252cb6004..047793902ce6 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -307,7 +307,7 @@ void machine_power_off(void) void (*pm_power_off)(void) = machine_power_off; EXPORT_SYMBOL_GPL(pm_power_off); -void *restart_stack __section(.data); +void *restart_stack; unsigned long stack_alloc(void) { diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss deleted file mode 100644 index f4f4c2c6dee9..000000000000 --- a/arch/s390/scripts/Makefile.chkbss +++ /dev/null @@ -1,20 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -chkbss-target ?= built-in.a -$(obj)/$(chkbss-target): chkbss - -chkbss-files := $(addsuffix .chkbss, $(chkbss)) -clean-files += $(chkbss-files) - -PHONY += chkbss -chkbss: $(addprefix $(obj)/, $(chkbss-files)) - -quiet_cmd_chkbss = CHKBSS $< - cmd_chkbss = \ - if ! $(OBJSIZE) --common $< | $(AWK) 'END { if ($$3) exit 1 }'; then \ - echo "error: $< .bss section is not empty" >&2; exit 1; \ - fi; \ - touch $@; - -$(obj)/%.o.chkbss: $(obj)/%.o - $(call cmd,chkbss) -- cgit v1.2.3 From 48111b4838480d1357783f4231c351bb2ba2d27d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 15 Sep 2020 12:52:36 +0200 Subject: s390/mm,ptdump: add proper ifdefs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use ifdefs instead of IS_ENABLED() to avoid compile error for !PTDUMP_DEBUGFS: arch/s390/mm/dump_pagetables.c: In function ‘pt_dump_init’: arch/s390/mm/dump_pagetables.c:248:64: error: ‘ptdump_fops’ undeclared (first use in this function); did you mean ‘pidfd_fops’? debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); Reported-by: Julian Wiedmann Fixes: 08c8e685c7c9 ("s390: add ARCH_HAS_DEBUG_WX support") Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/dump_pagetables.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 052223c92fb1..0356ac6d7dad 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -244,8 +244,9 @@ static int pt_dump_init(void) address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size; address_markers[VMALLOC_NR].start_address = VMALLOC_START; address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; - if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS)) - debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); +#ifdef CONFIG_PTDUMP_DEBUGFS + debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); +#endif /* CONFIG_PTDUMP_DEBUGFS */ return 0; } device_initcall(pt_dump_init); -- cgit v1.2.3 From 4904e1941ee334bf1f90e4017a37f7c8a52b685c Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 15 Sep 2020 10:42:41 +0200 Subject: s390/pci: add missing pci_iov.h include this fixes a missing prototype compiler warning spotted by the kernel test robot. Fixes: abb95b7550f8 ("s390/pci: consolidate SR-IOV specific code") Reported-by: kernel test robot Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci_iov.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c index 35fca14ebb35..ead062bf2b41 100644 --- a/arch/s390/pci/pci_iov.c +++ b/arch/s390/pci/pci_iov.c @@ -13,6 +13,8 @@ #include #include +#include "pci_iov.h" + static struct resource iov_res = { .name = "PCI IOV res", .start = 0, -- cgit v1.2.3 From ee4b2ce6d1e3baa412148a7b0889f6643749c665 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 10 Sep 2020 22:51:17 +0200 Subject: s390/mm,ptdump: sort markers Kasan configuration options and size of physical memory present could affect kernel memory layout. In particular vmemmap, vmalloc and modules might come before kasan shadow or after it. To make ptdump correctly output markers in the right order markers have to be sorted. To preserve the original order of markers with the same start address avoid using sort() from lib/sort.c (which is not stable sorting algorithm) and sort markers in place. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/dump_pagetables.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch') diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 0356ac6d7dad..8f9ff7e7187d 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -228,6 +228,24 @@ static int ptdump_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(ptdump); #endif /* CONFIG_PTDUMP_DEBUGFS */ +/* + * Heapsort from lib/sort.c is not a stable sorting algorithm, do a simple + * insertion sort to preserve the original order of markers with the same + * start address. + */ +static void sort_address_markers(void) +{ + struct addr_marker tmp; + int i, j; + + for (i = 1; i < ARRAY_SIZE(address_markers) - 1; i++) { + tmp = address_markers[i]; + for (j = i - 1; j >= 0 && address_markers[j].start_address > tmp.start_address; j--) + address_markers[j + 1] = address_markers[j]; + address_markers[j + 1] = tmp; + } +} + static int pt_dump_init(void) { /* @@ -244,6 +262,7 @@ static int pt_dump_init(void) address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size; address_markers[VMALLOC_NR].start_address = VMALLOC_START; address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; + sort_address_markers(); #ifdef CONFIG_PTDUMP_DEBUGFS debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); #endif /* CONFIG_PTDUMP_DEBUGFS */ -- cgit v1.2.3 From 8f78657c291f9e5ec26a2a9187938b374629d176 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 10 Sep 2020 22:54:58 +0200 Subject: s390/kasan: avoid unnecessary moving of vmemmap Currently vmemmap area is unconditionally moved beyond Kasan shadow memory. When Kasan is not enabled vmemmap area position is calculated in setup_memory_end() and depends on limiting factors like ultravisor secure storage limit. Try to follow the same logic with Kasan enabled as well and avoid unnecessary vmemmap area position changes unless it really intersects with Kasan shadow. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/setup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 047793902ce6..d63b08bacdf9 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -586,11 +586,15 @@ static void __init setup_memory_end(void) /* Take care that memory_end is set and <= vmemmap */ memory_end = min(memory_end ?: max_physmem_end, (unsigned long)vmemmap); #ifdef CONFIG_KASAN - /* fit in kasan shadow memory region between 1:1 and vmemmap */ memory_end = min(memory_end, KASAN_SHADOW_START); - vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END); #endif vmemmap_size = SECTION_ALIGN_UP(memory_end / PAGE_SIZE) * sizeof(struct page); +#ifdef CONFIG_KASAN + /* move vmemmap above kasan shadow only if stands in a way */ + if (KASAN_SHADOW_END > (unsigned long)vmemmap && + (unsigned long)vmemmap + vmemmap_size > KASAN_SHADOW_START) + vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END); +#endif max_pfn = max_low_pfn = PFN_DOWN(memory_end); memblock_remove(memory_end, ULONG_MAX); -- cgit v1.2.3 From 1d6671ae46e52e383bc6eea8d33bdd32ae61b323 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 11 Sep 2020 11:38:21 +0200 Subject: s390/protvirt: parse prot_virt option in the decompressor To make early kernel address space layout definition possible parse prot_virt option in the decompressor and pass it to the uncompressed kernel. This enables kasan to take ultravisor secure storage limit into consideration and pre-define vmalloc position correctly. Signed-off-by: Vasily Gorbik --- arch/s390/boot/ipl_parm.c | 8 ++++++++ arch/s390/boot/uv.c | 3 +++ arch/s390/kernel/setup.c | 3 +-- arch/s390/kernel/uv.c | 40 ++++++++++++++++------------------------ 4 files changed, 28 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index ae230ebd6420..92ebc4a58fe2 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -254,6 +254,14 @@ void parse_boot_command_line(void) if (!strcmp(param, "nokaslr")) kaslr_enabled = 0; + +#if IS_ENABLED(CONFIG_KVM) + if (!strcmp(param, "prot_virt")) { + rc = kstrtobool(val, &enabled); + if (!rc && enabled) + prot_virt_host = 1; + } +#endif } } diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index f887a479cdc7..a15c033f53ca 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -7,6 +7,9 @@ #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST int __bootdata_preserved(prot_virt_guest); #endif +#if IS_ENABLED(CONFIG_KVM) +int __bootdata_preserved(prot_virt_host); +#endif struct uv_info __bootdata_preserved(uv_info); void uv_query_info(void) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d63b08bacdf9..dd3fa7039cb0 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1137,8 +1137,7 @@ void __init setup_arch(char **cmdline_p) free_mem_detect_info(); remove_oldmem(); - if (is_prot_virt_host()) - setup_uv(); + setup_uv(); setup_memory_end(); setup_memory(); dma_contiguous_reserve(memory_end); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index d3399b8a9b23..4233245737bd 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -26,33 +26,10 @@ int __bootdata_preserved(prot_virt_guest); struct uv_info __bootdata_preserved(uv_info); #if IS_ENABLED(CONFIG_KVM) -int prot_virt_host; +int __bootdata_preserved(prot_virt_host); EXPORT_SYMBOL(prot_virt_host); EXPORT_SYMBOL(uv_info); -static int __init prot_virt_setup(char *val) -{ - bool enabled; - int rc; - - rc = kstrtobool(val, &enabled); - if (!rc && enabled) - prot_virt_host = 1; - - if (is_prot_virt_guest() && prot_virt_host) { - prot_virt_host = 0; - pr_warn("Protected virtualization not available in protected guests."); - } - - if (prot_virt_host && !test_facility(158)) { - prot_virt_host = 0; - pr_warn("Protected virtualization not supported by the hardware."); - } - - return rc; -} -early_param("prot_virt", prot_virt_setup); - static int __init uv_init(unsigned long stor_base, unsigned long stor_len) { struct uv_cb_init uvcb = { @@ -74,6 +51,21 @@ void __init setup_uv(void) { unsigned long uv_stor_base; + if (!is_prot_virt_host()) + return; + + if (is_prot_virt_guest()) { + prot_virt_host = 0; + pr_warn("Protected virtualization not available in protected guests."); + return; + } + + if (!test_facility(158)) { + prot_virt_host = 0; + pr_warn("Protected virtualization not supported by the hardware."); + return; + } + uv_stor_base = (unsigned long)memblock_alloc_try_nid( uv_info.uv_base_stor_len, SZ_1M, SZ_2G, MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); -- cgit v1.2.3 From c2314cb2dd4140cb14b79a8139be34459777f421 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 11 Sep 2020 11:40:21 +0200 Subject: s390/protvirt: support ultravisor without secure storage limit Avoid potential crash due to lack of secure storage limit. Check that max_sec_stor_addr is not 0 before adjusting vmalloc position. Signed-off-by: Vasily Gorbik --- arch/s390/kernel/uv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 4233245737bd..1a166a1119c0 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -90,7 +90,8 @@ fail: void adjust_to_uv_max(unsigned long *vmax) { - *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr); + if (uv_info.max_sec_stor_addr) + *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr); } /* -- cgit v1.2.3 From c360c9a238d1754b1ee8f1c3368ef36794e1daab Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 11 Sep 2020 11:44:47 +0200 Subject: s390/kasan: support protvirt with 4-level paging Currently the kernel crashes in Kasan instrumentation code if CONFIG_KASAN_S390_4_LEVEL_PAGING is used on protected virtualization capable machine where the ultravisor imposes addressing limitations on the host and those limitations are lower then KASAN_SHADOW_OFFSET. The problem is that Kasan has to know in advance where vmalloc/modules areas would be. With protected virtualization enabled vmalloc/modules areas are moved down to the ultravisor secure storage limit while kasan still expects them at the very end of 4-level paging address space. To fix that make Kasan recognize when protected virtualization is enabled and predefine vmalloc/modules areas position which are compliant with ultravisor secure storage limit. Kasan shadow itself stays in place and might reside above that ultravisor secure storage limit. One slight difference compaired to a kernel without Kasan enabled is that vmalloc/modules areas position is not reverted to default if ultravisor initialization fails. It would still be below the ultravisor secure storage limit. Kernel layout with kasan, 4-level paging and protected virtualization enabled (ultravisor secure storage limit is at 0x0000800000000000): ---[ vmemmap Area Start ]--- 0x0000400000000000-0x0000400080000000 ---[ vmemmap Area End ]--- ---[ vmalloc Area Start ]--- 0x00007fe000000000-0x00007fff80000000 ---[ vmalloc Area End ]--- ---[ Modules Area Start ]--- 0x00007fff80000000-0x0000800000000000 ---[ Modules Area End ]--- ---[ Kasan Shadow Start ]--- 0x0018000000000000-0x001c000000000000 ---[ Kasan Shadow End ]--- 0x001c000000000000-0x0020000000000000 1P PGD I Kernel layout with kasan, 4-level paging and protected virtualization disabled/unsupported: ---[ vmemmap Area Start ]--- 0x0000400000000000-0x0000400060000000 ---[ vmemmap Area End ]--- ---[ Kasan Shadow Start ]--- 0x0018000000000000-0x001c000000000000 ---[ Kasan Shadow End ]--- ---[ vmalloc Area Start ]--- 0x001fffe000000000-0x001fffff80000000 ---[ vmalloc Area End ]--- ---[ Modules Area Start ]--- 0x001fffff80000000-0x0020000000000000 ---[ Modules Area End ]--- Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/kasan.h | 1 + arch/s390/kernel/setup.c | 23 +++++++++-------------- arch/s390/kernel/uv.c | 3 +++ arch/s390/mm/kasan_init.c | 36 ++++++++++++++++++++++++++++++------ 4 files changed, 43 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index 89d6886040c8..e9bf486de136 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -19,6 +19,7 @@ extern void kasan_early_init(void); extern void kasan_copy_shadow(pgd_t *dst); extern void kasan_free_early_identity(void); +extern unsigned long kasan_vmax; #else static inline void kasan_early_init(void) { } static inline void kasan_copy_shadow(pgd_t *dst) { } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index dd3fa7039cb0..ae2f4d946048 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -552,22 +552,17 @@ static void __init setup_memory_end(void) unsigned long vmax, tmp; /* Choose kernel address space layout: 3 or 4 levels. */ - if (IS_ENABLED(CONFIG_KASAN)) { - vmax = IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) - ? _REGION1_SIZE - : _REGION2_SIZE; - } else { - tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; - tmp = tmp * (sizeof(struct page) + PAGE_SIZE); - if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE) - vmax = _REGION2_SIZE; /* 3-level kernel page table */ - else - vmax = _REGION1_SIZE; /* 4-level kernel page table */ - } - + tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; + tmp = tmp * (sizeof(struct page) + PAGE_SIZE); + if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE) + vmax = _REGION2_SIZE; /* 3-level kernel page table */ + else + vmax = _REGION1_SIZE; /* 4-level kernel page table */ if (is_prot_virt_host()) adjust_to_uv_max(&vmax); - +#ifdef CONFIG_KASAN + vmax = kasan_vmax; +#endif /* module area is at the end of the kernel address space. */ MODULES_END = vmax; MODULES_VADDR = MODULES_END - MODULES_LEN; diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 1a166a1119c0..14bd9d58edc9 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -51,6 +51,9 @@ void __init setup_uv(void) { unsigned long uv_stor_base; + /* + * keep these conditions in line with kasan init code has_uv_sec_stor_limit() + */ if (!is_prot_virt_host()) return; diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 1a27a7143349..5646b39c728a 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -11,7 +11,9 @@ #include #include #include +#include +unsigned long kasan_vmax; static unsigned long segment_pos __initdata; static unsigned long segment_low __initdata; static unsigned long pgalloc_pos __initdata; @@ -256,14 +258,31 @@ static void __init kasan_early_detect_facilities(void) } } +static bool __init has_uv_sec_stor_limit(void) +{ + /* + * keep these conditions in line with setup_uv() + */ + if (!is_prot_virt_host()) + return false; + + if (is_prot_virt_guest()) + return false; + + if (!test_facility(158)) + return false; + + return !!uv_info.max_sec_stor_addr; +} + void __init kasan_early_init(void) { unsigned long untracked_mem_end; unsigned long shadow_alloc_size; + unsigned long vmax_unlimited; unsigned long initrd_end; unsigned long asce_type; unsigned long memsize; - unsigned long vmax; unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO); pte_t pte_z; pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); @@ -291,7 +310,9 @@ void __init kasan_early_init(void) BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); crst_table_init((unsigned long *)early_pg_dir, _REGION2_ENTRY_EMPTY); - untracked_mem_end = vmax = _REGION1_SIZE; + untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION1_SIZE; + if (has_uv_sec_stor_limit()) + kasan_vmax = min(vmax_unlimited, uv_info.max_sec_stor_addr); asce_type = _ASCE_TYPE_REGION2; } else { /* 3 level paging */ @@ -299,7 +320,7 @@ void __init kasan_early_init(void) BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE)); crst_table_init((unsigned long *)early_pg_dir, _REGION3_ENTRY_EMPTY); - untracked_mem_end = vmax = _REGION2_SIZE; + untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION2_SIZE; asce_type = _ASCE_TYPE_REGION3; } @@ -369,17 +390,20 @@ void __init kasan_early_init(void) /* populate kasan shadow (for identity mapping and zero page mapping) */ kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP); if (IS_ENABLED(CONFIG_MODULES)) - untracked_mem_end = vmax - MODULES_LEN; + untracked_mem_end = kasan_vmax - MODULES_LEN; if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - untracked_mem_end = vmax - vmalloc_size - MODULES_LEN; + untracked_mem_end = kasan_vmax - vmalloc_size - MODULES_LEN; /* shallowly populate kasan shadow for vmalloc and modules */ kasan_early_vmemmap_populate(__sha(untracked_mem_end), - __sha(vmax), POPULATE_SHALLOW); + __sha(kasan_vmax), POPULATE_SHALLOW); } /* populate kasan shadow for untracked memory */ kasan_early_vmemmap_populate(__sha(max_physmem_end), __sha(untracked_mem_end), POPULATE_ZERO_SHADOW); + kasan_early_vmemmap_populate(__sha(kasan_vmax), + __sha(vmax_unlimited), + POPULATE_ZERO_SHADOW); /* memory allocated for identity mapping structs will be freed later */ pgalloc_freeable = pgalloc_pos; /* populate identity mapping */ -- cgit v1.2.3 From fc3f61e1bcd5f88bfb7241bf509a3f63bab49ea8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 14 Sep 2020 13:15:15 +0200 Subject: s390/dis: get rid of set_fs() usage Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/dis.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index f304802ecf7b..a7eab7be4db0 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -482,31 +482,37 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) return (int) (ptr - buffer); } +static int copy_from_regs(struct pt_regs *regs, void *dst, void *src, int len) +{ + if (user_mode(regs)) { + if (copy_from_user(dst, (char __user *)src, len)) + return -EFAULT; + } else { + if (copy_from_kernel_nofault(dst, src, len)) + return -EFAULT; + } + return 0; +} + void show_code(struct pt_regs *regs) { char *mode = user_mode(regs) ? "User" : "Krnl"; unsigned char code[64]; char buffer[128], *ptr; - mm_segment_t old_fs; unsigned long addr; int start, end, opsize, hops, i; /* Get a snapshot of the 64 bytes surrounding the fault address. */ - old_fs = get_fs(); - set_fs(user_mode(regs) ? USER_DS : KERNEL_DS); for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) { addr = regs->psw.addr - 34 + start; - if (__copy_from_user(code + start - 2, - (char __user *) addr, 2)) + if (copy_from_regs(regs, code + start - 2, (void *)addr, 2)) break; } for (end = 32; end < 64; end += 2) { addr = regs->psw.addr + end - 32; - if (__copy_from_user(code + end, - (char __user *) addr, 2)) + if (copy_from_regs(regs, code + end, (void *)addr, 2)) break; } - set_fs(old_fs); /* Code snapshot useable ? */ if ((regs->psw.addr & 1) || start >= end) { printk("%s Code: Bad PSW.\n", mode); -- cgit v1.2.3 From 110a6dbb2eca6b10bf60c61a51063d7fe1e55078 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 14 Sep 2020 13:42:25 +0200 Subject: s390/uaccess: add HAVE_GET_KERNEL_NOFAULT support Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/uaccess.h | 111 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) (limited to 'arch') diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index f09444d6aeab..23c85801cf04 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -278,4 +278,115 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo int copy_to_user_real(void __user *dest, void *src, unsigned long count); void *s390_kernel_write(void *dst, const void *src, size_t size); +#define HAVE_GET_KERNEL_NOFAULT + +int __noreturn __put_kernel_bad(void); + +#define __put_kernel_asm(val, to, insn) \ +({ \ + int __rc; \ + \ + asm volatile( \ + "0: " insn " %2,%1\n" \ + "1: xr %0,%0\n" \ + "2:\n" \ + ".pushsection .fixup, \"ax\"\n" \ + "3: lhi %0,%3\n" \ + " jg 2b\n" \ + ".popsection\n" \ + EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + : "=d" (__rc), "+Q" (*(to)) \ + : "d" (val), "K" (-EFAULT) \ + : "cc"); \ + __rc; \ +}) + +#define __put_kernel_nofault(dst, src, type, err_label) \ +do { \ + u64 __x = (u64)(*((type *)(src))); \ + int __pk_err; \ + \ + switch (sizeof(type)) { \ + case 1: \ + __pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \ + break; \ + case 2: \ + __pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \ + break; \ + case 4: \ + __pk_err = __put_kernel_asm(__x, (type *)(dst), "st"); \ + break; \ + case 8: \ + __pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \ + break; \ + default: \ + __pk_err = __put_kernel_bad(); \ + break; \ + } \ + if (unlikely(__pk_err)) \ + goto err_label; \ +} while (0) + +int __noreturn __get_kernel_bad(void); + +#define __get_kernel_asm(val, from, insn) \ +({ \ + int __rc; \ + \ + asm volatile( \ + "0: " insn " %1,%2\n" \ + "1: xr %0,%0\n" \ + "2:\n" \ + ".pushsection .fixup, \"ax\"\n" \ + "3: lhi %0,%3\n" \ + " jg 2b\n" \ + ".popsection\n" \ + EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + : "=d" (__rc), "+d" (val) \ + : "Q" (*(from)), "K" (-EFAULT) \ + : "cc"); \ + __rc; \ +}) + +#define __get_kernel_nofault(dst, src, type, err_label) \ +do { \ + int __gk_err; \ + \ + switch (sizeof(type)) { \ + case 1: { \ + u8 __x = 0; \ + \ + __gk_err = __get_kernel_asm(__x, (type *)(src), "ic"); \ + *((type *)(dst)) = (type)__x; \ + break; \ + }; \ + case 2: { \ + u16 __x = 0; \ + \ + __gk_err = __get_kernel_asm(__x, (type *)(src), "lh"); \ + *((type *)(dst)) = (type)__x; \ + break; \ + }; \ + case 4: { \ + u32 __x = 0; \ + \ + __gk_err = __get_kernel_asm(__x, (type *)(src), "l"); \ + *((type *)(dst)) = (type)__x; \ + break; \ + }; \ + case 8: { \ + u64 __x = 0; \ + \ + __gk_err = __get_kernel_asm(__x, (type *)(src), "lg"); \ + *((type *)(dst)) = (type)__x; \ + break; \ + }; \ + default: \ + __gk_err = __get_kernel_bad(); \ + break; \ + } \ + if (unlikely(__gk_err)) \ + goto err_label; \ +} while (0) + #endif /* __S390_UACCESS_H */ -- cgit v1.2.3 From 61f2e77489182b9b0e2fabe875e151fd46d286a1 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Wed, 16 Sep 2020 10:50:29 +0800 Subject: s390/diag: convert to use DEFINE_SEQ_ATTRIBUTE macro Use DEFINE_SEQ_ATTRIBUTE macro to simplify the code. Signed-off-by: Liu Shixin Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/diag.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index ccba63aaeb47..b8b0cd7b008f 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -104,18 +104,7 @@ static const struct seq_operations show_diag_stat_sops = { .show = show_diag_stat, }; -static int show_diag_stat_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &show_diag_stat_sops); -} - -static const struct file_operations show_diag_stat_fops = { - .open = show_diag_stat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - +DEFINE_SEQ_ATTRIBUTE(show_diag_stat); static int __init show_diag_stat_init(void) { -- cgit v1.2.3 From bcf1650c9b826602ad860f4465a3b66be611508a Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 16 Sep 2020 12:02:49 +0200 Subject: s390/boot: avoid unnecessary zeroing of .bss section .bss section is a part of the decompressor's image now, linker fills it with zeros already. No need do it with memset additionally. Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/boot/head.S | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index fd78755d996d..dae10961d072 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -313,12 +313,6 @@ ENTRY(startup_kdump) spt 6f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) l %r15,.Lstack-.LPG0(%r13) - // Clear decompressor's BSS section - larl %r2,_bss - slgr %r3,%r3 - larl %r4,_ebss - slgr %r4,%r2 - brasl %r14,memset brasl %r14,verify_facilities brasl %r14,startup_kernel -- cgit v1.2.3 From 5596c4c106baf3c915724dc0ae3ed293b4d1af55 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 18 Sep 2020 19:04:36 +0200 Subject: s390/sclp: remove unused sclp_early_printk_forced This reverts commit 55a5542a5462 ("s390/hibernate: fix error handling when suspend cpu != resume cpu"). It added sclp_early_printk_force() which is no longer used since commit 394216275c7d ("s390: remove broken hibernate / power management support"). No hibernate - no problem. Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/sclp.h | 3 +-- arch/s390/kernel/early_printk.c | 2 +- drivers/s390/char/sclp_early_core.c | 11 +++-------- 3 files changed, 5 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index c563f8368b19..90f34c7e2752 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -114,8 +114,7 @@ int sclp_early_get_core_info(struct sclp_core_info *info); void sclp_early_get_ipl_info(struct sclp_ipl_info *info); void sclp_early_detect(void); void sclp_early_printk(const char *s); -void sclp_early_printk_force(const char *s); -void __sclp_early_printk(const char *s, unsigned int len, unsigned int force); +void __sclp_early_printk(const char *s, unsigned int len); int sclp_early_get_memsize(unsigned long *mem); int sclp_early_get_hsa_size(unsigned long *hsa_size); diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c index 6f24d83bc5dc..d9d53f44008a 100644 --- a/arch/s390/kernel/early_printk.c +++ b/arch/s390/kernel/early_printk.c @@ -10,7 +10,7 @@ static void sclp_early_write(struct console *con, const char *s, unsigned int len) { - __sclp_early_printk(s, len, 0); + __sclp_early_printk(s, len); } static struct console sclp_early_console = { diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index 958621326ece..a960afa974bf 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -214,11 +214,11 @@ static int sclp_early_setup(int disable, int *have_linemode, int *have_vt220) * Output one or more lines of text on the SCLP console (VT220 and / * or line-mode). */ -void __sclp_early_printk(const char *str, unsigned int len, unsigned int force) +void __sclp_early_printk(const char *str, unsigned int len) { int have_linemode, have_vt220; - if (!force && sclp_init_state != sclp_init_state_uninitialized) + if (sclp_init_state != sclp_init_state_uninitialized) return; if (sclp_early_setup(0, &have_linemode, &have_vt220) != 0) return; @@ -231,12 +231,7 @@ void __sclp_early_printk(const char *str, unsigned int len, unsigned int force) void sclp_early_printk(const char *str) { - __sclp_early_printk(str, strlen(str), 0); -} - -void sclp_early_printk_force(const char *str) -{ - __sclp_early_printk(str, strlen(str), 1); + __sclp_early_printk(str, strlen(str)); } int __init sclp_early_read_info(void) -- cgit v1.2.3 From fa6999e326fe7851ecbd572b8cb9be8e930ebf41 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 21 Sep 2020 10:45:55 +0200 Subject: s390/pkey: support CCA and EP11 secure ECC private keys This patch extends the pkey kernel module to support CCA and EP11 secure ECC (private) keys as source for deriving ECC protected (private) keys. There is yet another new ioctl to support this: PKEY_KBLOB2PROTK3 can handle all the old keys plus CCA and EP11 secure ECC keys. For details see ioctl description in pkey.h. The CPACF unit currently only supports a subset of 5 different ECC curves (P-256, P-384, P-521, ED25519, ED448) and so only keys of this curve type can be transformed into protected keys. However, the pkey and the cca/ep11 low level functions do not check this but simple pass-through the key blob to the firmware onto the crypto cards. So most likely the failure will be a response carrying an error code resulting in user space errno value EIO instead of EINVAL. Deriving a protected key from an EP11 ECC secure key requires a CEX7 in EP11 mode. Deriving a protected key from an CCA ECC secure key requires a CEX7 in CCA mode. Together with this new ioctl the ioctls for querying lists of apqns (PKEY_APQNS4K and PKEY_APQNS4KT) have been extended to support EP11 and CCA ECC secure key type and key blobs. Together with this ioctl there comes a new struct ep11kblob_header which is to be prepended onto the EP11 key blob. See details in pkey.h for the fields in there. The older EP11 AES key blob with some info stored in the (unused) session field is also supported with this new ioctl. Signed-off-by: Harald Freudenberger Reviewed-by: Ingo Franzki Signed-off-by: Vasily Gorbik --- arch/s390/include/uapi/asm/pkey.h | 77 +++++++-- drivers/s390/crypto/pkey_api.c | 244 ++++++++++++++++++++++++++-- drivers/s390/crypto/zcrypt_ccamisc.c | 193 ++++++++++++++++++++++ drivers/s390/crypto/zcrypt_ccamisc.h | 44 ++++- drivers/s390/crypto/zcrypt_ep11misc.c | 292 ++++++++++++++++++++++++++++------ drivers/s390/crypto/zcrypt_ep11misc.h | 63 ++++++-- 6 files changed, 818 insertions(+), 95 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h index d27d7d329263..7349e96d28a0 100644 --- a/arch/s390/include/uapi/asm/pkey.h +++ b/arch/s390/include/uapi/asm/pkey.h @@ -35,12 +35,16 @@ #define PKEY_KEYTYPE_AES_128 1 #define PKEY_KEYTYPE_AES_192 2 #define PKEY_KEYTYPE_AES_256 3 +#define PKEY_KEYTYPE_ECC 4 /* the newer ioctls use a pkey_key_type enum for type information */ enum pkey_key_type { PKEY_TYPE_CCA_DATA = (__u32) 1, PKEY_TYPE_CCA_CIPHER = (__u32) 2, PKEY_TYPE_EP11 = (__u32) 3, + PKEY_TYPE_CCA_ECC = (__u32) 0x1f, + PKEY_TYPE_EP11_AES = (__u32) 6, + PKEY_TYPE_EP11_ECC = (__u32) 7, }; /* the newer ioctls use a pkey_key_size enum for key size information */ @@ -88,6 +92,20 @@ struct pkey_clrkey { __u8 clrkey[MAXCLRKEYSIZE]; /* 16, 24, or 32 byte clear key value */ }; +/* + * EP11 key blobs of type PKEY_TYPE_EP11_AES and PKEY_TYPE_EP11_ECC + * are ep11 blobs prepended by this header: + */ +struct ep11kblob_header { + __u8 type; /* always 0x00 */ + __u8 hver; /* header version, currently needs to be 0x00 */ + __u16 len; /* total length in bytes (including this header) */ + __u8 version; /* PKEY_TYPE_EP11_AES or PKEY_TYPE_EP11_ECC */ + __u8 res0; /* unused */ + __u16 bitlen; /* clear key bit len, 0 for unknown */ + __u8 res1[8]; /* unused */ +} __packed; + /* * Generate CCA AES secure key. */ @@ -304,7 +322,7 @@ struct pkey_verifykey2 { #define PKEY_VERIFYKEY2 _IOWR(PKEY_IOCTL_MAGIC, 0x17, struct pkey_verifykey2) /* - * Transform a key blob (of any type) into a protected key, version 2. + * Transform a key blob into a protected key, version 2. * There needs to be a list of apqns given with at least one entry in there. * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain * is not supported. The implementation walks through the list of apqns and @@ -313,6 +331,8 @@ struct pkey_verifykey2 { * list is tried until success (return 0) or the end of the list is reached * (return -1 with errno ENODEV). You may use the PKEY_APQNS4K ioctl to * generate a list of apqns based on the key. + * Deriving ECC protected keys from ECC secure keys is not supported with + * this ioctl, use PKEY_KBLOB2PROTK3 for this purpose. */ struct pkey_kblob2pkey2 { __u8 __user *key; /* in: pointer to key blob */ @@ -326,17 +346,17 @@ struct pkey_kblob2pkey2 { /* * Build a list of APQNs based on a key blob given. * Is able to find out which type of secure key is given (CCA AES secure - * key, CCA AES cipher key or EP11 AES key) and tries to find all matching - * crypto cards based on the MKVP and maybe other criterias (like CCA AES - * cipher keys need a CEX5C or higher, EP11 keys with BLOB_PKEY_EXTRACTABLE - * need a CEX7 and EP11 api version 4). The list of APQNs is further filtered - * by the key's mkvp which needs to match to either the current mkvp (CCA and - * EP11) or the alternate mkvp (old mkvp, CCA adapters only) of the apqns. The - * flags argument may be used to limit the matching apqns. If the - * PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current mkvp of each apqn is - * compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP. If both are given, it - * is assumed to return apqns where either the current or the alternate mkvp - * matches. At least one of the matching flags needs to be given. + * key, CCA AES cipher key, CCA ECC private key, EP11 AES key, EP11 ECC private + * key) and tries to find all matching crypto cards based on the MKVP and maybe + * other criterias (like CCA AES cipher keys need a CEX5C or higher, EP11 keys + * with BLOB_PKEY_EXTRACTABLE need a CEX7 and EP11 api version 4). The list of + * APQNs is further filtered by the key's mkvp which needs to match to either + * the current mkvp (CCA and EP11) or the alternate mkvp (old mkvp, CCA adapters + * only) of the apqns. The flags argument may be used to limit the matching + * apqns. If the PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current mkvp of + * each apqn is compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP. If both + * are given, it is assumed to return apqns where either the current or the + * alternate mkvp matches. At least one of the matching flags needs to be given. * The flags argument for EP11 keys has no further action and is currently * ignored (but needs to be given as PKEY_FLAGS_MATCH_CUR_MKVP) as there is only * the wkvp from the key to match against the apqn's wkvp. @@ -365,9 +385,10 @@ struct pkey_apqns4key { * restrict the list by given master key verification patterns. * For different key types there may be different ways to match the * master key verification patterns. For CCA keys (CCA data key and CCA - * cipher key) the first 8 bytes of cur_mkvp refer to the current mkvp value - * of the apqn and the first 8 bytes of the alt_mkvp refer to the old mkvp. - * The flags argument controls if the apqns current and/or alternate mkvp + * cipher key) the first 8 bytes of cur_mkvp refer to the current AES mkvp value + * of the apqn and the first 8 bytes of the alt_mkvp refer to the old AES mkvp. + * For CCA ECC keys it is similar but the match is against the APKA current/old + * mkvp. The flags argument controls if the apqns current and/or alternate mkvp * should match. If the PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current * mkvp of each apqn is compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP. * If both are given, it is assumed to return apqns where either the @@ -397,4 +418,30 @@ struct pkey_apqns4keytype { }; #define PKEY_APQNS4KT _IOWR(PKEY_IOCTL_MAGIC, 0x1C, struct pkey_apqns4keytype) +/* + * Transform a key blob into a protected key, version 3. + * The difference to version 2 of this ioctl is that the protected key + * buffer is now explicitly and not within a struct pkey_protkey any more. + * So this ioctl is also able to handle EP11 and CCA ECC secure keys and + * provide ECC protected keys. + * There needs to be a list of apqns given with at least one entry in there. + * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain + * is not supported. The implementation walks through the list of apqns and + * tries to send the request to each apqn without any further checking (like + * card type or online state). If the apqn fails, simple the next one in the + * list is tried until success (return 0) or the end of the list is reached + * (return -1 with errno ENODEV). You may use the PKEY_APQNS4K ioctl to + * generate a list of apqns based on the key. + */ +struct pkey_kblob2pkey3 { + __u8 __user *key; /* in: pointer to key blob */ + __u32 keylen; /* in: key blob size */ + struct pkey_apqn __user *apqns; /* in: ptr to list of apqn targets */ + __u32 apqn_entries; /* in: # of apqn target list entries */ + __u32 pkeytype; /* out: prot key type (enum pkey_key_type) */ + __u32 pkeylen; /* in/out: size of pkey buffer/actual len of pkey */ + __u8 __user *pkey; /* in: pkey blob buffer space ptr */ +}; +#define PKEY_KBLOB2PROTK3 _IOWR(PKEY_IOCTL_MAGIC, 0x1D, struct pkey_kblob2pkey3) + #endif /* _UAPI_PKEY_H */ diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index e48c13acc5da..99cb60ea663d 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -31,8 +31,9 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("IBM Corporation"); MODULE_DESCRIPTION("s390 protected key interface"); -#define KEYBLOBBUFSIZE 8192 /* key buffer size used for internal processing */ -#define MAXAPQNSINLIST 64 /* max 64 apqns within a apqn list */ +#define KEYBLOBBUFSIZE 8192 /* key buffer size used for internal processing */ +#define PROTKEYBLOBBUFSIZE 256 /* protected key buffer size used internal */ +#define MAXAPQNSINLIST 64 /* max 64 apqns within a apqn list */ /* mask of available pckmo subfunctions, fetched once at module init */ static cpacf_mask_t pckmo_functions; @@ -237,8 +238,9 @@ static int pkey_ep11key2pkey(const u8 *key, struct pkey_protkey *pkey) for (rc = -ENODEV, i = 0; i < nr_apqns; i++) { card = apqns[i] >> 16; dom = apqns[i] & 0xFFFF; - rc = ep11_key2protkey(card, dom, key, kb->head.len, - pkey->protkey, &pkey->len, &pkey->type); + pkey->len = sizeof(pkey->protkey); + rc = ep11_kblob2protkey(card, dom, key, kb->head.len, + pkey->protkey, &pkey->len, &pkey->type); if (rc == 0) break; } @@ -449,15 +451,21 @@ static int pkey_nonccatok2pkey(const u8 *key, u32 keylen, break; } case TOKVER_EP11_AES: { - if (keylen < MINEP11AESKEYBLOBSIZE) - goto out; /* check ep11 key for exportable as protected key */ - rc = ep11_check_aeskeyblob(debug_info, 3, key, 0, 1); + rc = ep11_check_aes_key(debug_info, 3, key, keylen, 1); if (rc) goto out; rc = pkey_ep11key2pkey(key, protkey); break; } + case TOKVER_EP11_AES_WITH_HEADER: + /* check ep11 key with header for exportable as protected key */ + rc = ep11_check_aes_key_with_hdr(debug_info, 3, key, keylen, 1); + if (rc) + goto out; + rc = pkey_ep11key2pkey(key + sizeof(struct ep11kblob_header), + protkey); + break; default: DEBUG_ERR("%s unknown/unsupported non-CCA token version %d\n", __func__, hdr->version); @@ -719,7 +727,7 @@ static int pkey_verifykey2(const u8 *key, size_t keylen, && hdr->version == TOKVER_EP11_AES) { struct ep11keyblob *kb = (struct ep11keyblob *)key; - rc = ep11_check_aeskeyblob(debug_info, 3, key, 0, 1); + rc = ep11_check_aes_key(debug_info, 3, key, keylen, 1); if (rc) goto out; if (ktype) @@ -780,7 +788,7 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns, if (hdr->version == TOKVER_EP11_AES) { if (keylen < sizeof(struct ep11keyblob)) return -EINVAL; - if (ep11_check_aeskeyblob(debug_info, 3, key, 0, 1)) + if (ep11_check_aes_key(debug_info, 3, key, keylen, 1)) return -EINVAL; } else { return pkey_nonccatok2pkey(key, keylen, pkey); @@ -806,9 +814,10 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns, else { /* EP11 AES secure key blob */ struct ep11keyblob *kb = (struct ep11keyblob *) key; - rc = ep11_key2protkey(card, dom, key, kb->head.len, - pkey->protkey, &pkey->len, - &pkey->type); + pkey->len = sizeof(pkey->protkey); + rc = ep11_kblob2protkey(card, dom, key, kb->head.len, + pkey->protkey, &pkey->len, + &pkey->type); } if (rc == 0) break; @@ -827,7 +836,27 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags, if (keylen < sizeof(struct keytoken_header) || flags == 0) return -EINVAL; - if (hdr->type == TOKTYPE_NON_CCA && hdr->version == TOKVER_EP11_AES) { + if (hdr->type == TOKTYPE_NON_CCA + && (hdr->version == TOKVER_EP11_AES_WITH_HEADER + || hdr->version == TOKVER_EP11_ECC_WITH_HEADER) + && is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) { + int minhwtype = 0, api = 0; + struct ep11keyblob *kb = (struct ep11keyblob *) + (key + sizeof(struct ep11kblob_header)); + + if (flags != PKEY_FLAGS_MATCH_CUR_MKVP) + return -EINVAL; + if (kb->attr & EP11_BLOB_PKEY_EXTRACTABLE) { + minhwtype = ZCRYPT_CEX7; + api = EP11_API_V; + } + rc = ep11_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF, + minhwtype, api, kb->wkvp); + if (rc) + goto out; + } else if (hdr->type == TOKTYPE_NON_CCA + && hdr->version == TOKVER_EP11_AES + && is_ep11_keyblob(key)) { int minhwtype = 0, api = 0; struct ep11keyblob *kb = (struct ep11keyblob *) key; @@ -869,6 +898,24 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags, cur_mkvp, old_mkvp, 1); if (rc) goto out; + } else if (hdr->type == TOKTYPE_CCA_INTERNAL_PKA) { + u64 cur_mkvp = 0, old_mkvp = 0; + struct eccprivkeytoken *t = (struct eccprivkeytoken *)key; + + if (t->secid == 0x20) { + if (flags & PKEY_FLAGS_MATCH_CUR_MKVP) + cur_mkvp = t->mkvp; + if (flags & PKEY_FLAGS_MATCH_ALT_MKVP) + old_mkvp = t->mkvp; + } else { + /* unknown cca internal 2 token type */ + return -EINVAL; + } + rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF, + ZCRYPT_CEX7, APKA_MK_SET, + cur_mkvp, old_mkvp, 1); + if (rc) + goto out; } else return -EINVAL; @@ -907,7 +954,22 @@ static int pkey_apqns4keytype(enum pkey_key_type ktype, cur_mkvp, old_mkvp, 1); if (rc) goto out; - } else if (ktype == PKEY_TYPE_EP11) { + } else if (ktype == PKEY_TYPE_CCA_ECC) { + u64 cur_mkvp = 0, old_mkvp = 0; + + if (flags & PKEY_FLAGS_MATCH_CUR_MKVP) + cur_mkvp = *((u64 *) cur_mkvp); + if (flags & PKEY_FLAGS_MATCH_ALT_MKVP) + old_mkvp = *((u64 *) alt_mkvp); + rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF, + ZCRYPT_CEX7, APKA_MK_SET, + cur_mkvp, old_mkvp, 1); + if (rc) + goto out; + + } else if (ktype == PKEY_TYPE_EP11 || + ktype == PKEY_TYPE_EP11_AES || + ktype == PKEY_TYPE_EP11_ECC) { u8 *wkvp = NULL; if (flags & PKEY_FLAGS_MATCH_CUR_MKVP) @@ -933,6 +995,111 @@ out: return rc; } +static int pkey_keyblob2pkey3(const struct pkey_apqn *apqns, size_t nr_apqns, + const u8 *key, size_t keylen, u32 *protkeytype, + u8 *protkey, u32 *protkeylen) +{ + int i, card, dom, rc; + struct keytoken_header *hdr = (struct keytoken_header *)key; + + /* check for at least one apqn given */ + if (!apqns || !nr_apqns) + return -EINVAL; + + if (keylen < sizeof(struct keytoken_header)) + return -EINVAL; + + if (hdr->type == TOKTYPE_NON_CCA + && hdr->version == TOKVER_EP11_AES_WITH_HEADER + && is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) { + /* EP11 AES key blob with header */ + if (ep11_check_aes_key_with_hdr(debug_info, 3, key, keylen, 1)) + return -EINVAL; + } else if (hdr->type == TOKTYPE_NON_CCA + && hdr->version == TOKVER_EP11_ECC_WITH_HEADER + && is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) { + /* EP11 ECC key blob with header */ + if (ep11_check_ecc_key_with_hdr(debug_info, 3, key, keylen, 1)) + return -EINVAL; + } else if (hdr->type == TOKTYPE_NON_CCA + && hdr->version == TOKVER_EP11_AES + && is_ep11_keyblob(key)) { + /* EP11 AES key blob with header in session field */ + if (ep11_check_aes_key(debug_info, 3, key, keylen, 1)) + return -EINVAL; + } else if (hdr->type == TOKTYPE_CCA_INTERNAL) { + if (hdr->version == TOKVER_CCA_AES) { + /* CCA AES data key */ + if (keylen != sizeof(struct secaeskeytoken)) + return -EINVAL; + if (cca_check_secaeskeytoken(debug_info, 3, key, 0)) + return -EINVAL; + } else if (hdr->version == TOKVER_CCA_VLSC) { + /* CCA AES cipher key */ + if (keylen < hdr->len || keylen > MAXCCAVLSCTOKENSIZE) + return -EINVAL; + if (cca_check_secaescipherkey(debug_info, 3, key, 0, 1)) + return -EINVAL; + } else { + DEBUG_ERR("%s unknown CCA internal token version %d\n", + __func__, hdr->version); + return -EINVAL; + } + } else if (hdr->type == TOKTYPE_CCA_INTERNAL_PKA) { + /* CCA ECC (private) key */ + if (keylen < sizeof(struct eccprivkeytoken)) + return -EINVAL; + if (cca_check_sececckeytoken(debug_info, 3, key, keylen, 1)) + return -EINVAL; + } else if (hdr->type == TOKTYPE_NON_CCA) { + struct pkey_protkey pkey; + + rc = pkey_nonccatok2pkey(key, keylen, &pkey); + if (rc) + return rc; + memcpy(protkey, pkey.protkey, pkey.len); + *protkeylen = pkey.len; + *protkeytype = pkey.type; + return 0; + } else { + DEBUG_ERR("%s unknown/unsupported blob type %d\n", + __func__, hdr->type); + return -EINVAL; + } + + /* simple try all apqns from the list */ + for (rc = -ENODEV, i = 0; rc && i < nr_apqns; i++) { + card = apqns[i].card; + dom = apqns[i].domain; + if (hdr->type == TOKTYPE_NON_CCA + && (hdr->version == TOKVER_EP11_AES_WITH_HEADER + || hdr->version == TOKVER_EP11_ECC_WITH_HEADER) + && is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) + rc = ep11_kblob2protkey(card, dom, key, hdr->len, + protkey, protkeylen, protkeytype); + else if (hdr->type == TOKTYPE_NON_CCA + && hdr->version == TOKVER_EP11_AES + && is_ep11_keyblob(key)) + rc = ep11_kblob2protkey(card, dom, key, hdr->len, + protkey, protkeylen, protkeytype); + else if (hdr->type == TOKTYPE_CCA_INTERNAL && + hdr->version == TOKVER_CCA_AES) + rc = cca_sec2protkey(card, dom, key, protkey, + protkeylen, protkeytype); + else if (hdr->type == TOKTYPE_CCA_INTERNAL && + hdr->version == TOKVER_CCA_VLSC) + rc = cca_cipher2protkey(card, dom, key, protkey, + protkeylen, protkeytype); + else if (hdr->type == TOKTYPE_CCA_INTERNAL_PKA) + rc = cca_ecc2protkey(card, dom, key, protkey, + protkeylen, protkeytype); + else + return -EINVAL; + } + + return rc; +} + /* * File io functions */ @@ -1333,6 +1500,55 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd, kfree(apqns); break; } + case PKEY_KBLOB2PROTK3: { + struct pkey_kblob2pkey3 __user *utp = (void __user *) arg; + struct pkey_kblob2pkey3 ktp; + struct pkey_apqn *apqns = NULL; + u32 protkeylen = PROTKEYBLOBBUFSIZE; + u8 *kkey, *protkey; + + if (copy_from_user(&ktp, utp, sizeof(ktp))) + return -EFAULT; + apqns = _copy_apqns_from_user(ktp.apqns, ktp.apqn_entries); + if (IS_ERR(apqns)) + return PTR_ERR(apqns); + kkey = _copy_key_from_user(ktp.key, ktp.keylen); + if (IS_ERR(kkey)) { + kfree(apqns); + return PTR_ERR(kkey); + } + protkey = kmalloc(protkeylen, GFP_KERNEL); + if (!protkey) { + kfree(apqns); + kfree(kkey); + return -ENOMEM; + } + rc = pkey_keyblob2pkey3(apqns, ktp.apqn_entries, kkey, + ktp.keylen, &ktp.pkeytype, + protkey, &protkeylen); + DEBUG_DBG("%s pkey_keyblob2pkey3()=%d\n", __func__, rc); + kfree(apqns); + kfree(kkey); + if (rc) { + kfree(protkey); + break; + } + if (ktp.pkey && ktp.pkeylen) { + if (protkeylen > ktp.pkeylen) { + kfree(protkey); + return -EINVAL; + } + if (copy_to_user(ktp.pkey, protkey, protkeylen)) { + kfree(protkey); + return -EFAULT; + } + } + kfree(protkey); + ktp.pkeylen = protkeylen; + if (copy_to_user(utp, &ktp, sizeof(ktp))) + return -EFAULT; + break; + } default: /* unknown/unsupported ioctl cmd */ return -ENOTTY; diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index e969188a1ec4..b9d293ae228c 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c @@ -172,6 +172,49 @@ int cca_check_secaescipherkey(debug_info_t *dbg, int dbflvl, } EXPORT_SYMBOL(cca_check_secaescipherkey); +/* + * Simple check if the token is a valid CCA secure ECC private + * key token. Returns 0 on success or errno value on failure. + */ +int cca_check_sececckeytoken(debug_info_t *dbg, int dbflvl, + const u8 *token, size_t keysize, + int checkcpacfexport) +{ + struct eccprivkeytoken *t = (struct eccprivkeytoken *) token; + +#define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__) + + if (t->type != TOKTYPE_CCA_INTERNAL_PKA) { + if (dbg) + DBF("%s token check failed, type 0x%02x != 0x%02x\n", + __func__, (int) t->type, TOKTYPE_CCA_INTERNAL_PKA); + return -EINVAL; + } + if (t->len > keysize) { + if (dbg) + DBF("%s token check failed, len %d > keysize %zu\n", + __func__, (int) t->len, keysize); + return -EINVAL; + } + if (t->secid != 0x20) { + if (dbg) + DBF("%s token check failed, secid 0x%02x != 0x20\n", + __func__, (int) t->secid); + return -EINVAL; + } + if (checkcpacfexport && !(t->kutc & 0x01)) { + if (dbg) + DBF("%s token check failed, XPRTCPAC bit is 0\n", + __func__); + return -EINVAL; + } + +#undef DBF + + return 0; +} +EXPORT_SYMBOL(cca_check_sececckeytoken); + /* * Allocate consecutive memory for request CPRB, request param * block, reply CPRB and reply param block and fill in values @@ -1297,6 +1340,156 @@ out: } EXPORT_SYMBOL(cca_cipher2protkey); +/* + * Derive protected key from CCA ECC secure private key. + */ +int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key, + u8 *protkey, u32 *protkeylen, u32 *protkeytype) +{ + int rc; + u8 *mem, *ptr; + struct CPRBX *preqcblk, *prepcblk; + struct ica_xcRB xcrb; + struct aureqparm { + u8 subfunc_code[2]; + u16 rule_array_len; + u8 rule_array[8]; + struct { + u16 len; + u16 tk_blob_len; + u16 tk_blob_tag; + u8 tk_blob[66]; + } vud; + struct { + u16 len; + u16 cca_key_token_len; + u16 cca_key_token_flags; + u8 cca_key_token[0]; + } kb; + } __packed * preqparm; + struct aurepparm { + u8 subfunc_code[2]; + u16 rule_array_len; + struct { + u16 len; + u16 sublen; + u16 tag; + struct cpacfkeyblock { + u8 version; /* version of this struct */ + u8 flags[2]; + u8 algo; + u8 form; + u8 pad1[3]; + u16 keylen; + u8 key[0]; /* the key (keylen bytes) */ + u16 keyattrlen; + u8 keyattr[32]; + u8 pad2[1]; + u8 vptype; + u8 vp[32]; /* verification pattern */ + } ckb; + } vud; + struct { + u16 len; + } kb; + } __packed * prepparm; + int keylen = ((struct eccprivkeytoken *)key)->len; + + /* get already prepared memory for 2 cprbs with param block each */ + rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem, &preqcblk, &prepcblk); + if (rc) + return rc; + + /* fill request cprb struct */ + preqcblk->domain = domain; + + /* fill request cprb param block with AU request */ + preqparm = (struct aureqparm __force *) preqcblk->req_parmb; + memcpy(preqparm->subfunc_code, "AU", 2); + preqparm->rule_array_len = + sizeof(preqparm->rule_array_len) + + sizeof(preqparm->rule_array); + memcpy(preqparm->rule_array, "EXPT-SK ", 8); + /* vud, tk blob */ + preqparm->vud.len = sizeof(preqparm->vud); + preqparm->vud.tk_blob_len = sizeof(preqparm->vud.tk_blob) + + 2 * sizeof(uint16_t); + preqparm->vud.tk_blob_tag = 0x00C2; + /* kb, cca token */ + preqparm->kb.len = keylen + 3 * sizeof(uint16_t); + preqparm->kb.cca_key_token_len = keylen + 2 * sizeof(uint16_t); + memcpy(preqparm->kb.cca_key_token, key, keylen); + /* now fill length of param block into cprb */ + preqcblk->req_parml = sizeof(struct aureqparm) + keylen; + + /* fill xcrb struct */ + prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk); + + /* forward xcrb with request CPRB and reply CPRB to zcrypt dd */ + rc = zcrypt_send_cprb(&xcrb); + if (rc) { + DEBUG_ERR( + "%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n", + __func__, (int) cardnr, (int) domain, rc); + goto out; + } + + /* check response returncode and reasoncode */ + if (prepcblk->ccp_rtcode != 0) { + DEBUG_ERR( + "%s unwrap secure key failure, card response %d/%d\n", + __func__, + (int) prepcblk->ccp_rtcode, + (int) prepcblk->ccp_rscode); + rc = -EIO; + goto out; + } + if (prepcblk->ccp_rscode != 0) { + DEBUG_WARN( + "%s unwrap secure key warning, card response %d/%d\n", + __func__, + (int) prepcblk->ccp_rtcode, + (int) prepcblk->ccp_rscode); + } + + /* process response cprb param block */ + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct aurepparm *) ptr; + + /* check the returned keyblock */ + if (prepparm->vud.ckb.version != 0x02) { + DEBUG_ERR("%s reply param keyblock version mismatch 0x%02x != 0x02\n", + __func__, (int) prepparm->vud.ckb.version); + rc = -EIO; + goto out; + } + if (prepparm->vud.ckb.algo != 0x81) { + DEBUG_ERR( + "%s reply param keyblock algo mismatch 0x%02x != 0x81\n", + __func__, (int) prepparm->vud.ckb.algo); + rc = -EIO; + goto out; + } + + /* copy the translated protected key */ + if (prepparm->vud.ckb.keylen > *protkeylen) { + DEBUG_ERR("%s prot keylen mismatch %d > buffersize %u\n", + __func__, prepparm->vud.ckb.keylen, *protkeylen); + rc = -EIO; + goto out; + } + memcpy(protkey, prepparm->vud.ckb.key, prepparm->vud.ckb.keylen); + *protkeylen = prepparm->vud.ckb.keylen; + if (protkeytype) + *protkeytype = PKEY_KEYTYPE_ECC; + +out: + free_cprbmem(mem, PARMBSIZE, 0); + return rc; +} +EXPORT_SYMBOL(cca_ecc2protkey); + /* * query cryptographic facility from CCA adapter */ diff --git a/drivers/s390/crypto/zcrypt_ccamisc.h b/drivers/s390/crypto/zcrypt_ccamisc.h index 4d88a1d6af21..e7105443d5cb 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.h +++ b/drivers/s390/crypto/zcrypt_ccamisc.h @@ -14,8 +14,9 @@ #include /* Key token types */ -#define TOKTYPE_NON_CCA 0x00 /* Non-CCA key token */ -#define TOKTYPE_CCA_INTERNAL 0x01 /* CCA internal key token */ +#define TOKTYPE_NON_CCA 0x00 /* Non-CCA key token */ +#define TOKTYPE_CCA_INTERNAL 0x01 /* CCA internal sym key token */ +#define TOKTYPE_CCA_INTERNAL_PKA 0x1f /* CCA internal asym key token */ /* For TOKTYPE_NON_CCA: */ #define TOKVER_PROTECTED_KEY 0x01 /* Protected key token */ @@ -93,6 +94,31 @@ struct cipherkeytoken { u8 vdata[]; /* variable part data follows */ } __packed; +/* inside view of an CCA secure ECC private key */ +struct eccprivkeytoken { + u8 type; /* 0x1f for internal asym key token */ + u8 version; /* should be 0x00 */ + u16 len; /* total key token length in bytes */ + u8 res1[4]; + u8 secid; /* 0x20 for ECC priv key section marker */ + u8 secver; /* section version */ + u16 seclen; /* section length */ + u8 wtype; /* wrapping method, 0x00 clear, 0x01 AES */ + u8 htype; /* hash method, 0x02 for SHA-256 */ + u8 res2[2]; + u8 kutc; /* key usage and translation control */ + u8 ctype; /* curve type */ + u8 kfs; /* key format and security */ + u8 ksrc; /* key source */ + u16 pbitlen; /* length of prime p in bits */ + u16 ibmadlen; /* IBM associated data length in bytes */ + u64 mkvp; /* master key verification pattern */ + u8 opk[48]; /* encrypted object protection key data */ + u16 adatalen; /* associated data length in bytes */ + u16 fseclen; /* formated section length in bytes */ + u8 more_data[]; /* more data follows */ +} __packed; + /* Some defines for the CCA AES cipherkeytoken kmf1 field */ #define KMF1_XPRT_SYM 0x8000 #define KMF1_XPRT_UASY 0x4000 @@ -122,6 +148,14 @@ int cca_check_secaescipherkey(debug_info_t *dbg, int dbflvl, const u8 *token, int keybitsize, int checkcpacfexport); +/* + * Simple check if the token is a valid CCA secure ECC private + * key token. Returns 0 on success or errno value on failure. + */ +int cca_check_sececckeytoken(debug_info_t *dbg, int dbflvl, + const u8 *token, size_t keysize, + int checkcpacfexport); + /* * Generate (random) CCA AES DATA secure key. */ @@ -158,6 +192,12 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey, int cca_clr2cipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags, const u8 *clrkey, u8 *keybuf, size_t *keybufsize); +/* + * Derive proteced key from CCA ECC secure private key. + */ +int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key, + u8 *protkey, u32 *protkeylen, u32 *protkeytype); + /* * Query cryptographic facility from CCA adapter */ diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c index 60b6bec21c32..9ce5a71da69b 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.c +++ b/drivers/s390/crypto/zcrypt_ep11misc.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "ap_bus.h" #include "zcrypt_api.h" @@ -113,16 +114,156 @@ static void __exit card_cache_free(void) } /* - * Simple check if the key blob is a valid EP11 secure AES key. + * Simple check if the key blob is a valid EP11 AES key blob with header. */ -int ep11_check_aeskeyblob(debug_info_t *dbg, int dbflvl, - const u8 *key, int keybitsize, - int checkcpacfexport) +int ep11_check_aes_key_with_hdr(debug_info_t *dbg, int dbflvl, + const u8 *key, size_t keylen, int checkcpacfexp) +{ + struct ep11kblob_header *hdr = (struct ep11kblob_header *) key; + struct ep11keyblob *kb = (struct ep11keyblob *) (key + sizeof(*hdr)); + +#define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__) + + if (keylen < sizeof(*hdr) + sizeof(*kb)) { + DBF("%s key check failed, keylen %zu < %zu\n", + __func__, keylen, sizeof(*hdr) + sizeof(*kb)); + return -EINVAL; + } + + if (hdr->type != TOKTYPE_NON_CCA) { + if (dbg) + DBF("%s key check failed, type 0x%02x != 0x%02x\n", + __func__, (int) hdr->type, TOKTYPE_NON_CCA); + return -EINVAL; + } + if (hdr->hver != 0x00) { + if (dbg) + DBF("%s key check failed, header version 0x%02x != 0x00\n", + __func__, (int) hdr->hver); + return -EINVAL; + } + if (hdr->version != TOKVER_EP11_AES_WITH_HEADER) { + if (dbg) + DBF("%s key check failed, version 0x%02x != 0x%02x\n", + __func__, (int) hdr->version, TOKVER_EP11_AES_WITH_HEADER); + return -EINVAL; + } + if (hdr->len > keylen) { + if (dbg) + DBF("%s key check failed, header len %d keylen %zu mismatch\n", + __func__, (int) hdr->len, keylen); + return -EINVAL; + } + if (hdr->len < sizeof(*hdr) + sizeof(*kb)) { + if (dbg) + DBF("%s key check failed, header len %d < %zu\n", + __func__, (int) hdr->len, sizeof(*hdr) + sizeof(*kb)); + return -EINVAL; + } + + if (kb->version != EP11_STRUCT_MAGIC) { + if (dbg) + DBF("%s key check failed, blob magic 0x%04x != 0x%04x\n", + __func__, (int) kb->version, EP11_STRUCT_MAGIC); + return -EINVAL; + } + if (checkcpacfexp && !(kb->attr & EP11_BLOB_PKEY_EXTRACTABLE)) { + if (dbg) + DBF("%s key check failed, PKEY_EXTRACTABLE is off\n", + __func__); + return -EINVAL; + } + +#undef DBF + + return 0; +} +EXPORT_SYMBOL(ep11_check_aes_key_with_hdr); + +/* + * Simple check if the key blob is a valid EP11 ECC key blob with header. + */ +int ep11_check_ecc_key_with_hdr(debug_info_t *dbg, int dbflvl, + const u8 *key, size_t keylen, int checkcpacfexp) +{ + struct ep11kblob_header *hdr = (struct ep11kblob_header *) key; + struct ep11keyblob *kb = (struct ep11keyblob *) (key + sizeof(*hdr)); + +#define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__) + + if (keylen < sizeof(*hdr) + sizeof(*kb)) { + DBF("%s key check failed, keylen %zu < %zu\n", + __func__, keylen, sizeof(*hdr) + sizeof(*kb)); + return -EINVAL; + } + + if (hdr->type != TOKTYPE_NON_CCA) { + if (dbg) + DBF("%s key check failed, type 0x%02x != 0x%02x\n", + __func__, (int) hdr->type, TOKTYPE_NON_CCA); + return -EINVAL; + } + if (hdr->hver != 0x00) { + if (dbg) + DBF("%s key check failed, header version 0x%02x != 0x00\n", + __func__, (int) hdr->hver); + return -EINVAL; + } + if (hdr->version != TOKVER_EP11_ECC_WITH_HEADER) { + if (dbg) + DBF("%s key check failed, version 0x%02x != 0x%02x\n", + __func__, (int) hdr->version, TOKVER_EP11_ECC_WITH_HEADER); + return -EINVAL; + } + if (hdr->len > keylen) { + if (dbg) + DBF("%s key check failed, header len %d keylen %zu mismatch\n", + __func__, (int) hdr->len, keylen); + return -EINVAL; + } + if (hdr->len < sizeof(*hdr) + sizeof(*kb)) { + if (dbg) + DBF("%s key check failed, header len %d < %zu\n", + __func__, (int) hdr->len, sizeof(*hdr) + sizeof(*kb)); + return -EINVAL; + } + + if (kb->version != EP11_STRUCT_MAGIC) { + if (dbg) + DBF("%s key check failed, blob magic 0x%04x != 0x%04x\n", + __func__, (int) kb->version, EP11_STRUCT_MAGIC); + return -EINVAL; + } + if (checkcpacfexp && !(kb->attr & EP11_BLOB_PKEY_EXTRACTABLE)) { + if (dbg) + DBF("%s key check failed, PKEY_EXTRACTABLE is off\n", + __func__); + return -EINVAL; + } + +#undef DBF + + return 0; +} +EXPORT_SYMBOL(ep11_check_ecc_key_with_hdr); + +/* + * Simple check if the key blob is a valid EP11 AES key blob with + * the header in the session field (old style EP11 AES key). + */ +int ep11_check_aes_key(debug_info_t *dbg, int dbflvl, + const u8 *key, size_t keylen, int checkcpacfexp) { struct ep11keyblob *kb = (struct ep11keyblob *) key; #define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__) + if (keylen < sizeof(*kb)) { + DBF("%s key check failed, keylen %zu < %zu\n", + __func__, keylen, sizeof(*kb)); + return -EINVAL; + } + if (kb->head.type != TOKTYPE_NON_CCA) { if (dbg) DBF("%s key check failed, type 0x%02x != 0x%02x\n", @@ -135,39 +276,37 @@ int ep11_check_aeskeyblob(debug_info_t *dbg, int dbflvl, __func__, (int) kb->head.version, TOKVER_EP11_AES); return -EINVAL; } - if (kb->version != EP11_STRUCT_MAGIC) { + if (kb->head.len > keylen) { if (dbg) - DBF("%s key check failed, magic 0x%04x != 0x%04x\n", - __func__, (int) kb->version, EP11_STRUCT_MAGIC); + DBF("%s key check failed, header len %d keylen %zu mismatch\n", + __func__, (int) kb->head.len, keylen); return -EINVAL; } - switch (kb->head.keybitlen) { - case 128: - case 192: - case 256: - break; - default: + if (kb->head.len < sizeof(*kb)) { if (dbg) - DBF("%s key check failed, keybitlen %d invalid\n", - __func__, (int) kb->head.keybitlen); + DBF("%s key check failed, header len %d < %zu\n", + __func__, (int) kb->head.len, sizeof(*kb)); return -EINVAL; } - if (keybitsize > 0 && keybitsize != (int) kb->head.keybitlen) { - DBF("%s key check failed, keybitsize %d\n", - __func__, keybitsize); + + if (kb->version != EP11_STRUCT_MAGIC) { + if (dbg) + DBF("%s key check failed, blob magic 0x%04x != 0x%04x\n", + __func__, (int) kb->version, EP11_STRUCT_MAGIC); return -EINVAL; } - if (checkcpacfexport && !(kb->attr & EP11_BLOB_PKEY_EXTRACTABLE)) { + if (checkcpacfexp && !(kb->attr & EP11_BLOB_PKEY_EXTRACTABLE)) { if (dbg) - DBF("%s key check failed, PKEY_EXTRACTABLE is 0\n", + DBF("%s key check failed, PKEY_EXTRACTABLE is off\n", __func__); return -EINVAL; } + #undef DBF return 0; } -EXPORT_SYMBOL(ep11_check_aeskeyblob); +EXPORT_SYMBOL(ep11_check_aes_key); /* * Allocate and prepare ep11 cprb plus additional payload. @@ -954,7 +1093,7 @@ static int ep11_wrapkey(u16 card, u16 domain, u8 data_tag; u8 data_lenfmt; u16 data_len; - u8 data[512]; + u8 data[1024]; } __packed * rep_pl; struct ep11_cprb *req = NULL, *rep = NULL; struct ep11_target_dev target; @@ -962,8 +1101,17 @@ static int ep11_wrapkey(u16 card, u16 domain, struct ep11keyblob *kb; size_t req_pl_size; int api, rc = -ENOMEM; + bool has_header = false; u8 *p; + /* maybe the session field holds a header with key info */ + kb = (struct ep11keyblob *) key; + if (kb->head.type == TOKTYPE_NON_CCA && + kb->head.version == TOKVER_EP11_AES) { + has_header = true; + keysize = kb->head.len < keysize ? kb->head.len : keysize; + } + /* request cprb and payload */ req_pl_size = sizeof(struct wk_req_pl) + (iv ? 16 : 0) + ASN1TAGLEN(keysize) + 4; @@ -989,9 +1137,10 @@ static int ep11_wrapkey(u16 card, u16 domain, /* key blob */ p += asn1tag_write(p, 0x04, key, keysize); /* maybe the key argument needs the head data cleaned out */ - kb = (struct ep11keyblob *)(p - keysize); - if (kb->head.version == TOKVER_EP11_AES) + if (has_header) { + kb = (struct ep11keyblob *)(p - keysize); memset(&kb->head, 0, sizeof(kb->head)); + } /* empty kek tag */ *p++ = 0x04; *p++ = 0; @@ -1114,12 +1263,12 @@ out: } EXPORT_SYMBOL(ep11_clr2keyblob); -int ep11_key2protkey(u16 card, u16 dom, const u8 *key, size_t keylen, - u8 *protkey, u32 *protkeylen, u32 *protkeytype) +int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen, + u8 *protkey, u32 *protkeylen, u32 *protkeytype) { int rc = -EIO; u8 *wkbuf = NULL; - size_t wkbuflen = 256; + size_t wkbuflen, keylen; struct wk_info { u16 version; u8 res1[16]; @@ -1129,8 +1278,33 @@ int ep11_key2protkey(u16 card, u16 dom, const u8 *key, size_t keylen, u8 res2[8]; u8 pkey[0]; } __packed * wki; + const u8 *key; + struct ep11kblob_header *hdr; + + /* key with or without header ? */ + hdr = (struct ep11kblob_header *) keyblob; + if (hdr->type == TOKTYPE_NON_CCA + && (hdr->version == TOKVER_EP11_AES_WITH_HEADER + || hdr->version == TOKVER_EP11_ECC_WITH_HEADER) + && is_ep11_keyblob(keyblob + sizeof(struct ep11kblob_header))) { + /* EP11 AES or ECC key with header */ + key = keyblob + sizeof(struct ep11kblob_header); + keylen = hdr->len - sizeof(struct ep11kblob_header); + } else if (hdr->type == TOKTYPE_NON_CCA + && hdr->version == TOKVER_EP11_AES + && is_ep11_keyblob(keyblob)) { + /* EP11 AES key (old style) */ + key = keyblob; + keylen = hdr->len; + } else if (is_ep11_keyblob(keyblob)) { + /* raw EP11 key blob */ + key = keyblob; + keylen = keybloblen; + } else + return -EINVAL; /* alloc temp working buffer */ + wkbuflen = (keylen + AES_BLOCK_SIZE) & (~(AES_BLOCK_SIZE - 1)); wkbuf = kmalloc(wkbuflen, GFP_ATOMIC); if (!wkbuf) return -ENOMEM; @@ -1147,46 +1321,68 @@ int ep11_key2protkey(u16 card, u16 dom, const u8 *key, size_t keylen, wki = (struct wk_info *) wkbuf; /* check struct version and pkey type */ - if (wki->version != 1 || wki->pkeytype != 1) { + if (wki->version != 1 || wki->pkeytype < 1 || wki->pkeytype > 5) { DEBUG_ERR("%s wk info version %d or pkeytype %d mismatch.\n", __func__, (int) wki->version, (int) wki->pkeytype); rc = -EIO; goto out; } - /* copy the tanslated protected key */ - switch (wki->pkeysize) { - case 16+32: - /* AES 128 protected key */ - if (protkeytype) - *protkeytype = PKEY_KEYTYPE_AES_128; - break; - case 24+32: - /* AES 192 protected key */ - if (protkeytype) - *protkeytype = PKEY_KEYTYPE_AES_192; + /* check protected key type field */ + switch (wki->pkeytype) { + case 1: /* AES */ + switch (wki->pkeysize) { + case 16+32: + /* AES 128 protected key */ + if (protkeytype) + *protkeytype = PKEY_KEYTYPE_AES_128; + break; + case 24+32: + /* AES 192 protected key */ + if (protkeytype) + *protkeytype = PKEY_KEYTYPE_AES_192; + break; + case 32+32: + /* AES 256 protected key */ + if (protkeytype) + *protkeytype = PKEY_KEYTYPE_AES_256; + break; + default: + DEBUG_ERR("%s unknown/unsupported AES pkeysize %d\n", + __func__, (int) wki->pkeysize); + rc = -EIO; + goto out; + } break; - case 32+32: - /* AES 256 protected key */ + case 3: /* EC-P */ + case 4: /* EC-ED */ + case 5: /* EC-BP */ if (protkeytype) - *protkeytype = PKEY_KEYTYPE_AES_256; + *protkeytype = PKEY_KEYTYPE_ECC; break; + case 2: /* TDES */ default: - DEBUG_ERR("%s unknown/unsupported pkeysize %d\n", - __func__, (int) wki->pkeysize); + DEBUG_ERR("%s unknown/unsupported key type %d\n", + __func__, (int) wki->pkeytype); rc = -EIO; goto out; } + + /* copy the tanslated protected key */ + if (wki->pkeysize > *protkeylen) { + DEBUG_ERR("%s wk info pkeysize %llu > protkeysize %u\n", + __func__, wki->pkeysize, *protkeylen); + rc = -EINVAL; + goto out; + } memcpy(protkey, wki->pkey, wki->pkeysize); - if (protkeylen) - *protkeylen = (u32) wki->pkeysize; - rc = 0; + *protkeylen = wki->pkeysize; out: kfree(wkbuf); return rc; } -EXPORT_SYMBOL(ep11_key2protkey); +EXPORT_SYMBOL(ep11_kblob2protkey); int ep11_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, int minhwtype, int minapi, const u8 *wkvp) diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h index e3ed5ed1de86..1e02b197c003 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.h +++ b/drivers/s390/crypto/zcrypt_ep11misc.h @@ -12,22 +12,28 @@ #include #include -#define TOKVER_EP11_AES 0x03 /* EP11 AES key blob */ - #define EP11_API_V 4 /* highest known and supported EP11 API version */ - #define EP11_STRUCT_MAGIC 0x1234 -#define EP11_BLOB_PKEY_EXTRACTABLE 0x200000 +#define EP11_BLOB_PKEY_EXTRACTABLE 0x00200000 + +/* + * Internal used values for the version field of the key header. + * Should match to the enum pkey_key_type in pkey.h. + */ +#define TOKVER_EP11_AES 0x03 /* EP11 AES key blob (old style) */ +#define TOKVER_EP11_AES_WITH_HEADER 0x06 /* EP11 AES key blob with header */ +#define TOKVER_EP11_ECC_WITH_HEADER 0x07 /* EP11 ECC key blob with header */ /* inside view of an EP11 secure key blob */ struct ep11keyblob { union { u8 session[32]; + /* only used for PKEY_TYPE_EP11: */ struct { u8 type; /* 0x00 (TOKTYPE_NON_CCA) */ u8 res0; /* unused */ u16 len; /* total length in bytes of this blob */ - u8 version; /* 0x06 (TOKVER_EP11_AES) */ + u8 version; /* 0x03 (TOKVER_EP11_AES) */ u8 res1; /* unused */ u16 keybitlen; /* clear key bit len, 0 for unknown */ } head; @@ -41,16 +47,41 @@ struct ep11keyblob { u8 mac[32]; } __packed; +/* check ep11 key magic to find out if this is an ep11 key blob */ +static inline bool is_ep11_keyblob(const u8 *key) +{ + struct ep11keyblob *kb = (struct ep11keyblob *) key; + + return (kb->version == EP11_STRUCT_MAGIC); +} + +/* + * Simple check if the key blob is a valid EP11 AES key blob with header. + * If checkcpacfexport is enabled, the key is also checked for the + * attributes needed to export this key for CPACF use. + * Returns 0 on success or errno value on failure. + */ +int ep11_check_aes_key_with_hdr(debug_info_t *dbg, int dbflvl, + const u8 *key, size_t keylen, int checkcpacfexp); + /* - * Simple check if the key blob is a valid EP11 secure AES key. - * If keybitsize is given, the bitsize of the key is also checked. + * Simple check if the key blob is a valid EP11 ECC key blob with header. * If checkcpacfexport is enabled, the key is also checked for the * attributes needed to export this key for CPACF use. * Returns 0 on success or errno value on failure. */ -int ep11_check_aeskeyblob(debug_info_t *dbg, int dbflvl, - const u8 *key, int keybitsize, - int checkcpacfexport); +int ep11_check_ecc_key_with_hdr(debug_info_t *dbg, int dbflvl, + const u8 *key, size_t keylen, int checkcpacfexp); + +/* + * Simple check if the key blob is a valid EP11 AES key blob with + * the header in the session field (old style EP11 AES key). + * If checkcpacfexport is enabled, the key is also checked for the + * attributes needed to export this key for CPACF use. + * Returns 0 on success or errno value on failure. + */ +int ep11_check_aes_key(debug_info_t *dbg, int dbflvl, + const u8 *key, size_t keylen, int checkcpacfexp); /* EP11 card info struct */ struct ep11_card_info { @@ -91,12 +122,6 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, int ep11_clr2keyblob(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags, const u8 *clrkey, u8 *keybuf, size_t *keybufsize); -/* - * Derive proteced key from EP11 AES secure key blob. - */ -int ep11_key2protkey(u16 cardnr, u16 domain, const u8 *key, size_t keylen, - u8 *protkey, u32 *protkeylen, u32 *protkeytype); - /* * Build a list of ep11 apqns meeting the following constrains: * - apqn is online and is in fact an EP11 apqn @@ -119,6 +144,12 @@ int ep11_key2protkey(u16 cardnr, u16 domain, const u8 *key, size_t keylen, int ep11_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, int minhwtype, int minapi, const u8 *wkvp); +/* + * Derive proteced key from EP11 key blob (AES and ECC keys). + */ +int ep11_kblob2protkey(u16 card, u16 dom, const u8 *key, size_t keylen, + u8 *protkey, u32 *protkeylen, u32 *protkeytype); + void zcrypt_ep11misc_exit(void); #endif /* _ZCRYPT_EP11MISC_H_ */ -- cgit v1.2.3 From b3bd02495cb339124f13135d51940cf48d83e5cb Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 15 Sep 2020 08:53:50 +0200 Subject: s390/stp: add locking to sysfs functions The sysfs function might race with stp_work_fn. To prevent that, add the required locking. Another issue is that the sysfs functions are checking the stp_online flag, but this flag just holds the user setting whether STP is enabled. Add a flag to clock_sync_flag whether stp_info holds valid data and use that instead. Cc: stable@vger.kernel.org Signed-off-by: Sven Schnelle Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/kernel/time.c | 118 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 85 insertions(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index bc806e1547d6..bee380340bad 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -299,8 +299,9 @@ static DEFINE_PER_CPU(atomic_t, clock_sync_word); static DEFINE_MUTEX(clock_sync_mutex); static unsigned long clock_sync_flags; -#define CLOCK_SYNC_HAS_STP 0 -#define CLOCK_SYNC_STP 1 +#define CLOCK_SYNC_HAS_STP 0 +#define CLOCK_SYNC_STP 1 +#define CLOCK_SYNC_STPINFO_VALID 2 /* * The get_clock function for the physical clock. It will get the current @@ -535,6 +536,22 @@ void stp_queue_work(void) queue_work(time_sync_wq, &stp_work); } +static int __store_stpinfo(void) +{ + int rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + + if (rc) + clear_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); + else + set_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); + return rc; +} + +static int stpinfo_valid(void) +{ + return stp_online && test_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); +} + static int stp_sync_clock(void *data) { struct clock_sync_data *sync = data; @@ -557,8 +574,7 @@ static int stp_sync_clock(void *data) if (rc == 0) { sync->clock_delta = clock_delta; clock_sync_global(clock_delta); - rc = chsc_sstpi(stp_page, &stp_info, - sizeof(struct stp_sstpi)); + rc = __store_stpinfo(); if (rc == 0 && stp_info.tmd != 2) rc = -EAGAIN; } @@ -604,7 +620,7 @@ static void stp_work_fn(struct work_struct *work) if (rc) goto out_unlock; - rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + rc = __store_stpinfo(); if (rc || stp_info.c == 0) goto out_unlock; @@ -641,10 +657,14 @@ static ssize_t ctn_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%016llx\n", - *(unsigned long long *) stp_info.ctnid); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%016llx\n", + *(unsigned long long *) stp_info.ctnid); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(ctn_id); @@ -653,9 +673,13 @@ static ssize_t ctn_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.ctn); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.ctn); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(ctn_type); @@ -664,9 +688,13 @@ static ssize_t dst_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x2000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x2000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(dst_offset); @@ -675,9 +703,13 @@ static ssize_t leap_seconds_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x8000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x8000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(leap_seconds); @@ -686,9 +718,13 @@ static ssize_t stratum_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(stratum); @@ -697,9 +733,13 @@ static ssize_t time_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x0800)) - return -ENODATA; - return sprintf(buf, "%i\n", (int) stp_info.tto); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x0800)) + ret = sprintf(buf, "%i\n", (int) stp_info.tto); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(time_offset); @@ -708,9 +748,13 @@ static ssize_t time_zone_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x4000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x4000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(time_zone_offset); @@ -719,9 +763,13 @@ static ssize_t timing_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.tmd); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.tmd); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(timing_mode); @@ -730,9 +778,13 @@ static ssize_t timing_state_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.tst); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.tst); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(timing_state); -- cgit v1.2.3 From bb7d066a091654b6d6c0b6061bda438bf36c6613 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 3 Aug 2020 08:50:38 +0200 Subject: s390/stp: use __packed Use __packed instead of __attribute__((packed)) Signed-off-by: Sven Schnelle Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/stp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h index f0ddefb06ec8..572f6f4286e2 100644 --- a/arch/s390/include/asm/stp.h +++ b/arch/s390/include/asm/stp.h @@ -6,6 +6,8 @@ #ifndef __S390_STP_H #define __S390_STP_H +#include + /* notifier for syncs */ extern struct atomic_notifier_head s390_epoch_delta_notifier; @@ -16,7 +18,7 @@ struct stp_irq_parm { unsigned int lac : 1; /* Link availability change */ unsigned int tcpc : 1; /* Time control parameter change */ unsigned int _pad2 : 15; -} __attribute__ ((packed)); +} __packed; #define STP_OP_SYNC 1 #define STP_OP_CTRL 3 @@ -42,7 +44,7 @@ struct stp_sstpi { unsigned int rsvd5; unsigned int todoff[4]; unsigned int rsvd6[48]; -} __attribute__ ((packed)); +} __packed; /* Functions needed by the machine check handler */ int stp_sync_check(void); -- cgit v1.2.3 From 4ab79ed223d2ff32ffe16cfd841e064199341c34 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 8 Sep 2020 10:14:00 +0200 Subject: s390/stp: use u32 instead of unsigned int In hardware-dependent headers using u32 is easier to read and less error-prone. Signed-off-by: Sven Schnelle Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/stp.h | 50 ++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h index 572f6f4286e2..a5c0bc1e5b90 100644 --- a/arch/s390/include/asm/stp.h +++ b/arch/s390/include/asm/stp.h @@ -13,37 +13,37 @@ extern struct atomic_notifier_head s390_epoch_delta_notifier; /* STP interruption parameter */ struct stp_irq_parm { - unsigned int _pad0 : 14; - unsigned int tsc : 1; /* Timing status change */ - unsigned int lac : 1; /* Link availability change */ - unsigned int tcpc : 1; /* Time control parameter change */ - unsigned int _pad2 : 15; + u32 : 14; + u32 tsc : 1; /* Timing status change */ + u32 lac : 1; /* Link availability change */ + u32 tcpc : 1; /* Time control parameter change */ + u32 : 15; } __packed; #define STP_OP_SYNC 1 #define STP_OP_CTRL 3 struct stp_sstpi { - unsigned int rsvd0; - unsigned int rsvd1 : 8; - unsigned int stratum : 8; - unsigned int vbits : 16; - unsigned int leaps : 16; - unsigned int tmd : 4; - unsigned int ctn : 4; - unsigned int rsvd2 : 3; - unsigned int c : 1; - unsigned int tst : 4; - unsigned int tzo : 16; - unsigned int dsto : 16; - unsigned int ctrl : 16; - unsigned int rsvd3 : 16; - unsigned int tto; - unsigned int rsvd4; - unsigned int ctnid[3]; - unsigned int rsvd5; - unsigned int todoff[4]; - unsigned int rsvd6[48]; + u32 : 32; + u32 : 8; + u32 stratum : 8; + u32 vbits : 16; + u32 leaps : 16; + u32 tmd : 4; + u32 ctn : 4; + u32 : 3; + u32 c : 1; + u32 tst : 4; + u32 tzo : 16; + u32 dsto : 16; + u32 ctrl : 16; + u32 : 16; + u32 tto; + u32 : 32; + u32 ctnid[3]; + u32 : 32; + u32 todoff[4]; + u32 rsvd[48]; } __packed; /* Functions needed by the machine check handler */ -- cgit v1.2.3 From b2539aa0d7ff1e42c74a9dd8c73ec1c2771c9e5d Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 12 Jun 2020 12:59:19 +0200 Subject: s390/stp: add support for leap seconds In the current implementation, leap seconds are only synchronized during the bootup process when the STP clock is synced. If the Leap second offset (LSO) changes the machine must be rebooted, which is not desired. This patch adds the required code to handle Leap second changes during runtime. If the Leap second changes, a Configuration change machine check is triggered. The STP code than schedules a Leap second insertion/deletion with do_adjtimex(). Signed-off-by: Sven Schnelle Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/cio.h | 1 + arch/s390/include/asm/stp.h | 46 +++++++++++++++++++++- arch/s390/kernel/time.c | 94 ++++++++++++++++++++++++++++++++++++++++----- drivers/s390/cio/chsc.c | 21 ++++++++++ 4 files changed, 152 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index b5bfb3123cb1..953a7316b30a 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -372,6 +372,7 @@ struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages); /* Function from drivers/s390/cio/chsc.c */ int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); int chsc_sstpi(void *page, void *result, size_t size); +int chsc_stzi(void *page, void *result, size_t size); int chsc_sgib(u32 origin); #endif diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h index a5c0bc1e5b90..ba07463897c1 100644 --- a/arch/s390/include/asm/stp.h +++ b/arch/s390/include/asm/stp.h @@ -25,7 +25,9 @@ struct stp_irq_parm { struct stp_sstpi { u32 : 32; - u32 : 8; + u32 tu : 1; + u32 lu : 1; + u32 : 6; u32 stratum : 8; u32 vbits : 16; u32 leaps : 16; @@ -46,6 +48,48 @@ struct stp_sstpi { u32 rsvd[48]; } __packed; +struct stp_tzib { + u32 tzan : 16; + u32 : 16; + u32 tzo : 16; + u32 dsto : 16; + u32 stn; + u32 dstn; + u64 dst_on_alg; + u64 dst_off_alg; +} __packed; + +struct stp_tcpib { + u32 atcode : 4; + u32 ntcode : 4; + u32 d : 1; + u32 : 23; + s32 tto; + struct stp_tzib atzib; + struct stp_tzib ntzib; + s32 adst_offset : 16; + s32 ndst_offset : 16; + u32 rsvd1; + u64 ntzib_update; + u64 ndsto_update; +} __packed; + +struct stp_lsoib { + u32 p : 1; + u32 : 31; + s32 also : 16; + s32 nlso : 16; + u64 nlsout; +} __packed; + +struct stp_stzi { + u32 rsvd0[3]; + u64 data_ts; + u32 rsvd1[22]; + struct stp_tcpib tcpib; + struct stp_lsoib lsoib; +} __packed; + /* Functions needed by the machine check handler */ int stp_sync_check(void); int stp_island_check(void); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index bee380340bad..49648d574b35 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -598,6 +598,81 @@ static int stp_sync_clock(void *data) return 0; } +static int stp_clear_leap(void) +{ + struct __kernel_timex txc; + int ret; + + memset(&txc, 0, sizeof(txc)); + + ret = do_adjtimex(&txc); + if (ret < 0) + return ret; + + txc.modes = ADJ_STATUS; + txc.status &= ~(STA_INS|STA_DEL); + return do_adjtimex(&txc); +} + +static void stp_check_leap(void) +{ + struct stp_stzi stzi; + struct stp_lsoib *lsoib = &stzi.lsoib; + struct __kernel_timex txc; + int64_t timediff; + int leapdiff, ret; + + if (!stp_info.lu || !check_sync_clock()) { + /* + * Either a scheduled leap second was removed by the operator, + * or STP is out of sync. In both cases, clear the leap second + * kernel flags. + */ + if (stp_clear_leap() < 0) + pr_err("failed to clear leap second flags\n"); + return; + } + + if (chsc_stzi(stp_page, &stzi, sizeof(stzi))) { + pr_err("stzi failed\n"); + return; + } + + timediff = tod_to_ns(lsoib->nlsout - get_tod_clock()) / NSEC_PER_SEC; + leapdiff = lsoib->nlso - lsoib->also; + + if (leapdiff != 1 && leapdiff != -1) { + pr_err("Cannot schedule %d leap seconds\n", leapdiff); + return; + } + + if (timediff < 0) { + if (stp_clear_leap() < 0) + pr_err("failed to clear leap second flags\n"); + } else if (timediff < 7200) { + memset(&txc, 0, sizeof(txc)); + ret = do_adjtimex(&txc); + if (ret < 0) + return; + + txc.modes = ADJ_STATUS; + if (leapdiff > 0) + txc.status |= STA_INS; + else + txc.status |= STA_DEL; + ret = do_adjtimex(&txc); + if (ret < 0) + pr_err("failed to set leap second flags\n"); + /* arm Timer to clear leap second flags */ + mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC)); + } else { + /* The day the leap second is scheduled for hasn't been reached. Retry + * in one hour. + */ + mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC)); + } +} + /* * STP work. Check for the STP state and take over the clock * synchronization if the STP clock source is usable. @@ -616,7 +691,7 @@ static void stp_work_fn(struct work_struct *work) goto out_unlock; } - rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0, NULL); + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xf0e0, NULL); if (rc) goto out_unlock; @@ -625,14 +700,13 @@ static void stp_work_fn(struct work_struct *work) goto out_unlock; /* Skip synchronization if the clock is already in sync. */ - if (check_sync_clock()) - goto out_unlock; - - memset(&stp_sync, 0, sizeof(stp_sync)); - cpus_read_lock(); - atomic_set(&stp_sync.cpus, num_online_cpus() - 1); - stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask); - cpus_read_unlock(); + if (!check_sync_clock()) { + memset(&stp_sync, 0, sizeof(stp_sync)); + cpus_read_lock(); + atomic_set(&stp_sync.cpus, num_online_cpus() - 1); + stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask); + cpus_read_unlock(); + } if (!check_sync_clock()) /* @@ -640,6 +714,8 @@ static void stp_work_fn(struct work_struct *work) * Retry after a second. */ mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC)); + else if (stp_info.lu) + stp_check_leap(); out_unlock: mutex_unlock(&stp_work_mutex); diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index c314e9495c1b..e612e570447a 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -1260,6 +1260,27 @@ int chsc_sstpi(void *page, void *result, size_t size) return (rr->response.code == 0x0001) ? 0 : -EIO; } +int chsc_stzi(void *page, void *result, size_t size) +{ + struct { + struct chsc_header request; + unsigned int rsvd0[3]; + struct chsc_header response; + char data[]; + } *rr; + int rc; + + memset(page, 0, PAGE_SIZE); + rr = page; + rr->request.length = 0x0010; + rr->request.code = 0x003e; + rc = chsc(rr); + if (rc) + return -EIO; + memcpy(result, &rr->data, size); + return (rr->response.code == 0x0001) ? 0 : -EIO; +} + int chsc_siosl(struct subchannel_id schid) { struct { -- cgit v1.2.3 From 4fb53dde770cc095a6e279ca9d9c72d49488cdb3 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 17 Jun 2020 10:58:47 +0200 Subject: s390/stp: add sysfs file to show scheduled leap seconds This patch introduces /sys/devices/system/stp/scheduled_leap_seconds, which will contain either 0,0 if no leap second is scheduled, or the UTC timestamp + leap second offset. Signed-off-by: Sven Schnelle Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/kernel/time.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 49648d574b35..da271f8ec8d6 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -790,6 +790,34 @@ static ssize_t leap_seconds_show(struct device *dev, static DEVICE_ATTR_RO(leap_seconds); +static ssize_t leap_seconds_scheduled_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct stp_stzi stzi; + ssize_t ret; + + mutex_lock(&stp_work_mutex); + if (!stpinfo_valid() || !(stp_info.vbits & 0x8000) || !stp_info.lu) { + mutex_unlock(&stp_work_mutex); + return -ENODATA; + } + + ret = chsc_stzi(stp_page, &stzi, sizeof(stzi)); + mutex_unlock(&stp_work_mutex); + if (ret < 0) + return ret; + + if (!stzi.lsoib.p) + return sprintf(buf, "0,0\n"); + + return sprintf(buf, "%llu,%d\n", + tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC, + stzi.lsoib.nlso - stzi.lsoib.also); +} + +static DEVICE_ATTR_RO(leap_seconds_scheduled); + static ssize_t stratum_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -906,6 +934,7 @@ static struct device_attribute *stp_attributes[] = { &dev_attr_dst_offset, &dev_attr_leap_seconds, &dev_attr_online, + &dev_attr_leap_seconds_scheduled, &dev_attr_stratum, &dev_attr_time_offset, &dev_attr_time_zone_offset, -- cgit v1.2.3 From ad5ceb33eee128346475f5efa672e6402ae15e51 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 21 Sep 2020 17:23:42 +0200 Subject: s390/stp: unify stp_work_mutex and clock_sync_mutex No need to have two mutexes, and while at it rename it to stp_mutex. Signed-off-by: Sven Schnelle Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/kernel/time.c | 53 ++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index da271f8ec8d6..0ac30ee2c633 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -296,7 +296,7 @@ void __init time_init(void) } static DEFINE_PER_CPU(atomic_t, clock_sync_word); -static DEFINE_MUTEX(clock_sync_mutex); +static DEFINE_MUTEX(stp_mutex); static unsigned long clock_sync_flags; #define CLOCK_SYNC_HAS_STP 0 @@ -445,7 +445,6 @@ static struct stp_sstpi stp_info; static void *stp_page; static void stp_work_fn(struct work_struct *work); -static DEFINE_MUTEX(stp_work_mutex); static DECLARE_WORK(stp_work, stp_work_fn); static struct timer_list stp_timer; @@ -683,7 +682,7 @@ static void stp_work_fn(struct work_struct *work) int rc; /* prevent multiple execution. */ - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (!stp_online) { chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL); @@ -718,7 +717,7 @@ static void stp_work_fn(struct work_struct *work) stp_check_leap(); out_unlock: - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); } /* @@ -735,11 +734,11 @@ static ssize_t ctn_id_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid()) ret = sprintf(buf, "%016llx\n", *(unsigned long long *) stp_info.ctnid); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -751,10 +750,10 @@ static ssize_t ctn_type_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid()) ret = sprintf(buf, "%i\n", stp_info.ctn); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -766,10 +765,10 @@ static ssize_t dst_offset_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid() && (stp_info.vbits & 0x2000)) ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -781,10 +780,10 @@ static ssize_t leap_seconds_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid() && (stp_info.vbits & 0x8000)) ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -797,14 +796,14 @@ static ssize_t leap_seconds_scheduled_show(struct device *dev, struct stp_stzi stzi; ssize_t ret; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (!stpinfo_valid() || !(stp_info.vbits & 0x8000) || !stp_info.lu) { - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return -ENODATA; } ret = chsc_stzi(stp_page, &stzi, sizeof(stzi)); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); if (ret < 0) return ret; @@ -824,10 +823,10 @@ static ssize_t stratum_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid()) ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -839,10 +838,10 @@ static ssize_t time_offset_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid() && (stp_info.vbits & 0x0800)) ret = sprintf(buf, "%i\n", (int) stp_info.tto); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -854,10 +853,10 @@ static ssize_t time_zone_offset_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid() && (stp_info.vbits & 0x4000)) ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -869,10 +868,10 @@ static ssize_t timing_mode_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid()) ret = sprintf(buf, "%i\n", stp_info.tmd); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -884,10 +883,10 @@ static ssize_t timing_state_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid()) ret = sprintf(buf, "%i\n", stp_info.tst); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -911,14 +910,14 @@ static ssize_t online_store(struct device *dev, return -EINVAL; if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) return -EOPNOTSUPP; - mutex_lock(&clock_sync_mutex); + mutex_lock(&stp_mutex); stp_online = value; if (stp_online) set_bit(CLOCK_SYNC_STP, &clock_sync_flags); else clear_bit(CLOCK_SYNC_STP, &clock_sync_flags); queue_work(time_sync_wq, &stp_work); - mutex_unlock(&clock_sync_mutex); + mutex_unlock(&stp_mutex); return count; } -- cgit v1.2.3 From 07a699bc43d1feb2abe3d47781e2db3d08554bcc Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 23 Sep 2020 09:37:43 +0200 Subject: s390/kaslr: avoid mixing valid random value and an error code 0 is a valid random value. To avoid mixing it with error code 0 as an return code make get_random() take extra argument to output random value and return an error code. Reviewed-by: Philipp Rudo Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/boot/kaslr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index d4442163ffa9..c8549a0474e1 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -42,7 +42,7 @@ static int check_prng(void) return PRNG_MODE_TDES; } -static unsigned long get_random(unsigned long limit) +static int get_random(unsigned long limit, unsigned long *value) { struct prng_parm prng = { /* initial parameter block for tdes mode, copied from libica */ @@ -84,9 +84,10 @@ static unsigned long get_random(unsigned long limit) (u8 *) &random, sizeof(random)); break; default: - random = 0; + return -1; } - return random % limit; + *value = random % limit; + return 0; } unsigned long get_random_base(unsigned long safe_addr) @@ -143,8 +144,7 @@ unsigned long get_random_base(unsigned long safe_addr) return 0; } - base = get_random(block_sum); - if (base == 0) + if (get_random(block_sum, &base)) return 0; if (base < safe_addr) base = safe_addr; -- cgit v1.2.3 From 5c46f2768c4bbf8e0beebea9adac1320015816b7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 18 Sep 2020 16:02:45 +0200 Subject: s390/kaslr: correct and explain randomization base generation Currently there are several minor problems with randomization base generation code: 1. It might misbehave in low memory conditions. In particular there might be enough space for the kernel on [0, block_sum] but after if (base < safe_addr) base = safe_addr; it might not be enough anymore. 2. It does not correctly handle minimal address constraint. In condition if (base < safe_addr) base = safe_addr; a synthetic value is compared with an address. If we have a memory setup with memory holes due to offline memory regions, and safe_addr is close to the end of the first online memory block - we might position the kernel in invalid memory. 3. block_sum calculation logic contains off-by-one error. Let's say we have a memory block in which the kernel fits perfectly (end - start == kernel_size). In this case: if (end - start < kernel_size) continue; block_sum += end - start - kernel_size; block_sum is not increased, while it is a valid kernel position. So, address problems listed and explain algorithm used. Besides that restructuring the code makes it possible to extend kernel positioning algorithm further. Currently we pick position in between single [min, max] range (min = safe_addr, max = memory_limit). In future we can do that for multiple ranges as well (by calling count_valid_kernel_positions for each range). Reviewed-by: Philipp Rudo Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/boot/kaslr.c | 130 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 92 insertions(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index c8549a0474e1..d844a5ef9089 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -90,14 +90,95 @@ static int get_random(unsigned long limit, unsigned long *value) return 0; } +/* + * To randomize kernel base address we have to consider several facts: + * 1. physical online memory might not be continuous and have holes. mem_detect + * info contains list of online memory ranges we should consider. + * 2. we have several memory regions which are occupied and we should not + * overlap and destroy them. Currently safe_addr tells us the border below + * which all those occupied regions are. We are safe to use anything above + * safe_addr. + * 3. the upper limit might apply as well, even if memory above that limit is + * online. Currently those limitations are: + * 3.1. Limit set by "mem=" kernel command line option + * 3.2. memory reserved at the end for kasan initialization. + * 4. kernel base address must be aligned to THREAD_SIZE (kernel stack size). + * Which is required for CONFIG_CHECK_STACK. Currently THREAD_SIZE is 4 pages + * (16 pages when the kernel is built with kasan enabled) + * Assumptions: + * 1. kernel size (including .bss size) and upper memory limit are page aligned. + * 2. mem_detect memory region start is THREAD_SIZE aligned / end is PAGE_SIZE + * aligned (in practice memory configurations granularity on z/VM and LPAR + * is 1mb). + * + * To guarantee uniform distribution of kernel base address among all suitable + * addresses we generate random value just once. For that we need to build a + * continuous range in which every value would be suitable. We can build this + * range by simply counting all suitable addresses (let's call them positions) + * which would be valid as kernel base address. To count positions we iterate + * over online memory ranges. For each range which is big enough for the + * kernel image we count all suitable addresses we can put the kernel image at + * that is + * (end - start - kernel_size) / THREAD_SIZE + 1 + * Two functions count_valid_kernel_positions and position_to_address help + * to count positions in memory range given and then convert position back + * to address. + */ +static unsigned long count_valid_kernel_positions(unsigned long kernel_size, + unsigned long _min, + unsigned long _max) +{ + unsigned long start, end, pos = 0; + int i; + + for_each_mem_detect_block(i, &start, &end) { + if (_min >= end) + continue; + if (start >= _max) + break; + start = max(_min, start); + end = min(_max, end); + if (end - start < kernel_size) + continue; + pos += (end - start - kernel_size) / THREAD_SIZE + 1; + } + + return pos; +} + +static unsigned long position_to_address(unsigned long pos, unsigned long kernel_size, + unsigned long _min, unsigned long _max) +{ + unsigned long start, end; + int i; + + for_each_mem_detect_block(i, &start, &end) { + if (_min >= end) + continue; + if (start >= _max) + break; + start = max(_min, start); + end = min(_max, end); + if (end - start < kernel_size) + continue; + if ((end - start - kernel_size) / THREAD_SIZE + 1 >= pos) + return start + (pos - 1) * THREAD_SIZE; + pos -= (end - start - kernel_size) / THREAD_SIZE + 1; + } + + return 0; +} + unsigned long get_random_base(unsigned long safe_addr) { - unsigned long memory_limit = memory_end_set ? memory_end : 0; - unsigned long base, start, end, kernel_size; - unsigned long block_sum, offset; + unsigned long memory_limit = get_mem_detect_end(); + unsigned long base_pos, max_pos, kernel_size; unsigned long kasan_needs; int i; + if (memory_end_set) + memory_limit = min(memory_limit, memory_end); + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE) { if (safe_addr < INITRD_START + INITRD_SIZE) safe_addr = INITRD_START + INITRD_SIZE; @@ -127,44 +208,17 @@ unsigned long get_random_base(unsigned long safe_addr) } kernel_size = vmlinux.image_size + vmlinux.bss_size; - block_sum = 0; - for_each_mem_detect_block(i, &start, &end) { - if (memory_limit) { - if (start >= memory_limit) - break; - if (end > memory_limit) - end = memory_limit; - } - if (end - start < kernel_size) - continue; - block_sum += end - start - kernel_size; - } - if (!block_sum) { + if (safe_addr + kernel_size > memory_limit) + return 0; + + max_pos = count_valid_kernel_positions(kernel_size, safe_addr, memory_limit); + if (!max_pos) { sclp_early_printk("KASLR disabled: not enough memory\n"); return 0; } - if (get_random(block_sum, &base)) + /* we need a value in the range [1, base_pos] inclusive */ + if (get_random(max_pos, &base_pos)) return 0; - if (base < safe_addr) - base = safe_addr; - block_sum = offset = 0; - for_each_mem_detect_block(i, &start, &end) { - if (memory_limit) { - if (start >= memory_limit) - break; - if (end > memory_limit) - end = memory_limit; - } - if (end - start < kernel_size) - continue; - block_sum += end - start - kernel_size; - if (base <= block_sum) { - base = start + base - offset; - base = ALIGN_DOWN(base, THREAD_SIZE); - break; - } - offset = block_sum; - } - return base; + return position_to_address(base_pos + 1, kernel_size, safe_addr, memory_limit); } -- cgit v1.2.3 From 1c7c83e8d2351ee3d736094115e447a5da8e5369 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 24 Sep 2020 18:29:29 +0200 Subject: s390: remove unused _swsusp_reset_dma Since commit 394216275c7d ("s390: remove broken hibernate / power management support") _swsusp_reset_dma is unused and could be safely removed. Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/boot/startup.c | 2 -- arch/s390/boot/text_dma.S | 17 ----------------- arch/s390/include/asm/setup.h | 1 - arch/s390/kernel/setup.c | 1 - 4 files changed, 21 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 81835483169b..90842936545b 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -48,8 +48,6 @@ struct diag_ops __bootdata_preserved(diag_dma_ops) = { }; static struct diag210 _diag210_tmp_dma __section(.dma.data); struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma; -void _swsusp_reset_dma(void); -unsigned long __bootdata_preserved(__swsusp_reset_dma) = __pa(_swsusp_reset_dma); void error(char *x) { diff --git a/arch/s390/boot/text_dma.S b/arch/s390/boot/text_dma.S index 9715715c4c28..f7c77cd518f2 100644 --- a/arch/s390/boot/text_dma.S +++ b/arch/s390/boot/text_dma.S @@ -96,23 +96,6 @@ ENTRY(_diag0c_dma) BR_EX_DMA_r14 ENDPROC(_diag0c_dma) -/* - * void _swsusp_reset_dma(void) - */ -ENTRY(_swsusp_reset_dma) - larl %r1,restart_entry - larl %r2,.Lrestart_diag308_psw - og %r1,0(%r2) - stg %r1,0(%r0) - lghi %r0,0 - diag %r0,%r0,0x308 -restart_entry: - lhi %r1,1 - sigp %r1,%r0,SIGP_SET_ARCHITECTURE - sam64 - BR_EX_DMA_r14 -ENDPROC(_swsusp_reset_dma) - /* * void _diag308_reset_dma(void) * diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 7b104f156e34..396db1638417 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -92,7 +92,6 @@ extern int memory_end_set; extern unsigned long memory_end; extern unsigned long vmalloc_size; extern unsigned long max_physmem_end; -extern unsigned long __swsusp_reset_dma; /* The Write Back bit position in the physaddr is given by the SLPC PCI */ extern unsigned long mio_wb_bit_mask; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index ae2f4d946048..c1b78aae270b 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -102,7 +102,6 @@ struct mem_detect_info __bootdata(mem_detect); struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table); struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table); -unsigned long __bootdata_preserved(__swsusp_reset_dma); unsigned long __bootdata_preserved(__stext_dma); unsigned long __bootdata_preserved(__etext_dma); unsigned long __bootdata_preserved(__sdma); -- cgit v1.2.3 From 2835c2ea95d50625108e47a459e1a47f6be836ce Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 24 Sep 2020 19:07:04 +0200 Subject: s390/startup: avoid save_area_sync overflow Currently we overflow save_area_sync and write over save_area_async. Although this is not a real problem make startup_pgm_check_handler consistent with late pgm check handler and store [%r0,%r7] directly into gpregs_save_area. Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/boot/head.S | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index dae10961d072..1a2c2b1ed964 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -360,22 +360,23 @@ ENTRY(startup_kdump) # the save area and does disabled wait with a faulty address. # ENTRY(startup_pgm_check_handler) - stmg %r0,%r15,__LC_SAVE_AREA_SYNC - la %r1,4095 - stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r1) - mvc __LC_GPREGS_SAVE_AREA-4095(128,%r1),__LC_SAVE_AREA_SYNC - mvc __LC_PSW_SAVE_AREA-4095(16,%r1),__LC_PGM_OLD_PSW + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + la %r8,4095 + stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8) + stmg %r0,%r7,__LC_GPREGS_SAVE_AREA-4095(%r8) + mvc __LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA_SYNC + mvc __LC_PSW_SAVE_AREA-4095(16,%r8),__LC_PGM_OLD_PSW mvc __LC_RETURN_PSW(16),__LC_PGM_OLD_PSW ni __LC_RETURN_PSW,0xfc # remove IO and EX bits ni __LC_RETURN_PSW+1,0xfb # remove MCHK bit oi __LC_RETURN_PSW+1,0x2 # set wait state bit - larl %r2,.Lold_psw_disabled_wait - stg %r2,__LC_PGM_NEW_PSW+8 - l %r15,.Ldump_info_stack-.Lold_psw_disabled_wait(%r2) + larl %r9,.Lold_psw_disabled_wait + stg %r9,__LC_PGM_NEW_PSW+8 + l %r15,.Ldump_info_stack-.Lold_psw_disabled_wait(%r9) brasl %r14,print_pgm_check_info .Lold_psw_disabled_wait: - la %r1,4095 - lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1) + la %r8,4095 + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8) lpswe __LC_RETURN_PSW # disabled wait .Ldump_info_stack: .long 0x5000 + PAGE_SIZE - STACK_FRAME_OVERHEAD -- cgit v1.2.3 From ad3e6948f90ac3d71f9c03ec92009d99dfb561e9 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 28 Sep 2020 07:22:50 +0200 Subject: s390: remove cad commandline option remove the cad command line option as the instruction was never published and never used by userspace. Signed-off-by: Sven Schnelle Reviewed-by: Vasily Gorbik Acked-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- arch/s390/kernel/early.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 078277231858..705844f73934 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -274,19 +274,6 @@ static int __init disable_vector_extension(char *str) } early_param("novx", disable_vector_extension); -static int __init cad_setup(char *str) -{ - bool enabled; - int rc; - - rc = kstrtobool(str, &enabled); - if (!rc && enabled && test_facility(128)) - /* Enable problem state CAD. */ - __ctl_set_bit(2, 3); - return rc; -} -early_param("cad", cad_setup); - char __bootdata(early_command_line)[COMMAND_LINE_SIZE]; static void __init setup_boot_command_line(void) { -- cgit v1.2.3 From 54530ce6a184ed8c0accc3c50b659590ec445222 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sat, 26 Sep 2020 00:08:54 +0200 Subject: s390/cio: remove unused channel_subsystem_reinit Added with commit 77e844b96440 ("s390/hibernate: add early resume function") unused since commit 394216275c7d ("s390: remove broken hibernate / power management support"). Reviewed-by: Vineeth Vijayan Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/cio.h | 1 - drivers/s390/cio/css.c | 14 -------------- 2 files changed, 15 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 953a7316b30a..5c58756d6476 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -356,7 +356,6 @@ static inline u8 pathmask_to_pos(u8 mask) return 8 - ffs(mask); } -void channel_subsystem_reinit(void); extern void css_schedule_reprobe(void); extern void *cio_dma_zalloc(size_t size); diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index aca022239b33..c17d6c99f404 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -1350,20 +1350,6 @@ static int __init channel_subsystem_init_sync(void) } subsys_initcall_sync(channel_subsystem_init_sync); -void channel_subsystem_reinit(void) -{ - struct channel_path *chp; - struct chp_id chpid; - - chsc_enable_facility(CHSC_SDA_OC_MSS); - chp_id_for_each(&chpid) { - chp = chpid_to_chp(chpid); - if (chp) - chp_update_desc(chp); - } - cmf_reactivate(); -} - #ifdef CONFIG_PROC_FS static ssize_t cio_settle_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) -- cgit v1.2.3 From 3731ac579519d6c24ad2c5ac91959586b1919b5c Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 27 Sep 2020 21:34:55 +0200 Subject: s390/vdso: remove orphaned declarations Remove couple of declarations which are unused since commit 4bff8cb54502 ("s390: convert to GENERIC_VDSO"). Acked-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/vdso.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 82f86b3c394b..29b44a930e71 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -35,9 +35,7 @@ struct vdso_per_cpu_data { }; extern struct vdso_data *vdso_data; -extern struct vdso_data boot_vdso_data; -void vdso_alloc_boot_cpu(struct lowcore *lowcore); int vdso_alloc_per_cpu(struct lowcore *lowcore); void vdso_free_per_cpu(struct lowcore *lowcore); -- cgit v1.2.3 From 86cde618e718d0d286a565bcabfce5bdfc304685 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 25 Sep 2020 22:42:30 +0200 Subject: s390/startup: correct "dfltcc" option parsing Currently if just "dfltcc" is passed as a kernel command line option "val" going to be NULL, this leads to reading at address 0 in strcmp(val, "off") Fix that by making sure "val" is not NULL. This does not affect option handling logic. Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/boot/ipl_parm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 92ebc4a58fe2..1add096e09a6 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -230,7 +230,7 @@ void parse_boot_command_line(void) if (!strcmp(param, "vmalloc") && val) vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE); - if (!strcmp(param, "dfltcc")) { + if (!strcmp(param, "dfltcc") && val) { if (!strcmp(val, "off")) zlib_dfltcc_support = ZLIB_DFLTCC_DISABLED; else if (!strcmp(val, "on")) -- cgit v1.2.3 From 3ca8b855b0112906caecab88f04a8786a2d40906 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 11 Aug 2019 20:23:56 +0200 Subject: s390/startup: add kaslr_offset to pgm check info print startup pgm check handler is active since the very beginning of kernel code execution until uncompressed kernel sets up s390_base_pgm_handler. It is useful not just for the decompressor debugging itself, but also for early code of uncompressed kernel, in particular Kasan initialization. But since there is no stack trace or symbolic representation of failing psw address it is impossible to figure out faulty code location without knowing Kaslr kernel base. So, let's add it to the startup pgm check info printed as well. Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/boot/pgm_check_info.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index 83b5b7915c32..d3ab20ec517e 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "boot.h" @@ -42,6 +43,13 @@ void print_pgm_check_info(void) add_str(p, "\n"); sclp_early_printk(buf); + if (kaslr_enabled) { + p = add_str(buf, "Kernel random base: "); + p = add_val_as_hex(p, __kaslr_offset); + add_str(p, "\n"); + sclp_early_printk(buf); + } + p = add_str(buf, "PSW : "); p = add_val_as_hex(p, S390_lowcore.psw_save_area.mask); p = add_str(p, " "); -- cgit v1.2.3 From 402e9228f7a6a90e4fad44b358350ae358f1bc3c Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 27 Sep 2020 22:07:40 +0200 Subject: s390: remove orphaned function declarations arch/s390/pci/pci_bus.h: zpci_bus_init - only declaration left after commit 05bc1be6db4b ("s390/pci: create zPCI bus") arch/s390/include/asm/gmap.h: gmap_pte_notify - only declaration left after commit 4be130a08420 ("s390/mm: add shadow gmap support") arch/s390/include/asm/pgalloc.h: rcu_table_freelist_finish - only declaration left after commit 36409f6353fc ("[S390] use generic RCU page-table freeing code") arch/s390/include/asm/tlbflush.h: smp_ptlb_all - only declaration left after commit 5a79859ae0f3 ("s390: remove 31 bit support") arch/s390/include/asm/vtimer.h: init_cpu_vtimer - only declaration left after commit b5f87f15e200 ("s390/idle: consolidate idle functions and definitions") arch/s390/include/asm/pci.h: zpci_debug_info - only declaration left after commit 386aa051fb4b ("s390/pci: remove per device debug attribute") arch/s390/include/asm/vdso.h: vdso_alloc_boot_cpu - only declaration left after commit 4bff8cb54502 ("s390: convert to GENERIC_VDSO") arch/s390/include/asm/smp.h: smp_vcpu_scheduled - only declaration left after commit 67626fadd269 ("s390: enforce CONFIG_SMP") arch/s390/kernel/entry.h: restart_call_handler - only declaration left after commit 8b646bd75908 ("[S390] rework smp code") arch/s390/kernel/entry.h: startup_init_nobss - only declaration left after commit 2e83e0eb85ca ("s390: clean .bss before running uncompressed kernel") arch/s390/kernel/entry.h: s390_early_resume - only declaration left after commit 394216275c7d ("s390: remove broken hibernate / power management support") drivers/s390/char/raw3270.h: raw3270_request_alloc_bootmem - only declaration left after commit 33403dcfcdfd ("[S390] 3270 console: convert from bootmem to slab") drivers/s390/cio/device.h: ccw_device_schedule_sch_unregister - only declaration left after commit 37de53bb5290 ("[S390] cio: introduce ccw device todos") drivers/s390/char/tape.h: tape_hotplug_event - has only declaration since recorded git history. drivers/s390/char/tape.h: tape_oper_handler - has only declaration since recorded git history. drivers/s390/char/tape.h: tape_noper_handler - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_check_locate - only declaration left after commit 161beff8f40d ("s390/tape: remove tape block leftovers") drivers/s390/char/tape_std.h: tape_std_default_handler - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_unexpect_uchk_handler - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_irq - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_error_recovery - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_error_recovery_has_failed - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_error_recovery_succeded - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_error_recovery_do_retry - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_error_recovery_read_opposite - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_error_recovery_HWBUG - has only declaration since recorded git history. Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/gmap.h | 2 -- arch/s390/include/asm/pci.h | 1 - arch/s390/include/asm/pgalloc.h | 2 -- arch/s390/include/asm/smp.h | 1 - arch/s390/include/asm/tlbflush.h | 2 -- arch/s390/include/asm/vtimer.h | 2 -- arch/s390/kernel/entry.h | 3 --- arch/s390/pci/pci_bus.h | 1 - drivers/s390/char/raw3270.h | 1 - drivers/s390/char/tape.h | 3 --- drivers/s390/char/tape_std.h | 12 ------------ drivers/s390/cio/device.h | 1 - 12 files changed, 31 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index a816fb4734b8..40264f60b0da 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -140,8 +140,6 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte); void gmap_register_pte_notifier(struct gmap_notifier *); void gmap_unregister_pte_notifier(struct gmap_notifier *); -void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *, - unsigned long bits); int gmap_mprotect_notify(struct gmap *, unsigned long start, unsigned long len, int prot); diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 178a24e0af5f..b5380a251df2 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -279,7 +279,6 @@ int zpci_debug_init(void); void zpci_debug_exit(void); void zpci_debug_init_device(struct zpci_dev *, const char *); void zpci_debug_exit_device(struct zpci_dev *); -void zpci_debug_info(struct zpci_dev *, struct seq_file *); /* Error reporting */ int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *); diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 74a352f8c0d1..d1297d6bbdcf 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -146,8 +146,6 @@ static inline void pmd_populate(struct mm_struct *mm, #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) -extern void rcu_table_freelist_finish(void); - void vmem_map_init(void); void *vmem_crst_alloc(unsigned long val); pte_t *vmem_pte_alloc(void); diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 7e155fb6c254..01e360004481 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -31,7 +31,6 @@ extern void smp_emergency_stop(void); extern int smp_find_processor_id(u16 address); extern int smp_store_status(int cpu); extern void smp_save_dump_cpus(void); -extern int smp_vcpu_scheduled(int cpu); extern void smp_yield_cpu(int cpu); extern void smp_cpu_set_polarization(int cpu, int val); extern int smp_cpu_get_polarization(int cpu); diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index acce6a08a1fa..6448bb5be10c 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -30,8 +30,6 @@ static inline void __tlb_flush_idte(unsigned long asce) : : "a" (opt), "a" (asce) : "cc"); } -void smp_ptlb_all(void); - /* * Flush all TLB entries on all CPUs. */ diff --git a/arch/s390/include/asm/vtimer.h b/arch/s390/include/asm/vtimer.h index 42f707d1c1e8..e601adaa6320 100644 --- a/arch/s390/include/asm/vtimer.h +++ b/arch/s390/include/asm/vtimer.h @@ -25,8 +25,6 @@ extern void add_virt_timer_periodic(struct vtimer_list *timer); extern int mod_virt_timer(struct vtimer_list *timer, u64 expires); extern int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires); extern int del_virt_timer(struct vtimer_list *timer); - -extern void init_cpu_vtimer(void); extern void vtime_init(void); #endif /* _ASM_S390_TIMER_H */ diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index faca269d5f27..412a54e6aa81 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -17,7 +17,6 @@ void ext_int_handler(void); void io_int_handler(void); void mcck_int_handler(void); void restart_int_handler(void); -void restart_call_handler(void); asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); @@ -61,12 +60,10 @@ void do_notify_resume(struct pt_regs *regs); void __init init_IRQ(void); void do_IRQ(struct pt_regs *regs, int irq); void do_restart(void); -void __init startup_init_nobss(void); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); int setup_profiling_timer(unsigned int multiplier); void __init time_init(void); -void s390_early_resume(void); unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip); struct s390_mmap_arg_struct; diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index 8d19723ed5c0..f8dfac0b5b71 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -9,7 +9,6 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops); void zpci_bus_device_unregister(struct zpci_dev *zdev); -int zpci_bus_init(void); void zpci_release_device(struct kref *kref); static inline void zpci_zdev_put(struct zpci_dev *zdev) diff --git a/drivers/s390/char/raw3270.h b/drivers/s390/char/raw3270.h index 08f36e973b43..8d979e0ee605 100644 --- a/drivers/s390/char/raw3270.h +++ b/drivers/s390/char/raw3270.h @@ -110,7 +110,6 @@ struct raw3270_request { }; struct raw3270_request *raw3270_request_alloc(size_t size); -struct raw3270_request *raw3270_request_alloc_bootmem(size_t size); void raw3270_request_free(struct raw3270_request *); void raw3270_request_reset(struct raw3270_request *); void raw3270_request_set_cmd(struct raw3270_request *, u8 cmd); diff --git a/drivers/s390/char/tape.h b/drivers/s390/char/tape.h index 8bec5f9ea92c..e2c60475dfa8 100644 --- a/drivers/s390/char/tape.h +++ b/drivers/s390/char/tape.h @@ -238,7 +238,6 @@ extern int tape_do_io(struct tape_device *, struct tape_request *); extern int tape_do_io_async(struct tape_device *, struct tape_request *); extern int tape_do_io_interruptible(struct tape_device *, struct tape_request *); extern int tape_cancel_io(struct tape_device *, struct tape_request *); -void tape_hotplug_event(struct tape_device *, int major, int action); static inline int tape_do_io_free(struct tape_device *device, struct tape_request *request) @@ -258,8 +257,6 @@ tape_do_io_async_free(struct tape_device *device, struct tape_request *request) tape_do_io_async(device, request); } -extern int tape_oper_handler(int irq, int status); -extern void tape_noper_handler(int irq, int status); extern int tape_open(struct tape_device *); extern int tape_release(struct tape_device *); extern int tape_mtop(struct tape_device *, int, int); diff --git a/drivers/s390/char/tape_std.h b/drivers/s390/char/tape_std.h index 53ec8e2870d4..dcc63ff587f9 100644 --- a/drivers/s390/char/tape_std.h +++ b/drivers/s390/char/tape_std.h @@ -101,7 +101,6 @@ struct tape_request *tape_std_read_block(struct tape_device *, size_t); void tape_std_read_backward(struct tape_device *device, struct tape_request *request); struct tape_request *tape_std_write_block(struct tape_device *, size_t); -void tape_std_check_locate(struct tape_device *, struct tape_request *); /* Some non-mtop commands. */ int tape_std_assign(struct tape_device *); @@ -131,19 +130,8 @@ int tape_std_mtunload(struct tape_device *, int); int tape_std_mtweof(struct tape_device *, int); /* Event handlers */ -void tape_std_default_handler(struct tape_device *); -void tape_std_unexpect_uchk_handler(struct tape_device *); -void tape_std_irq(struct tape_device *); void tape_std_process_eov(struct tape_device *); -// the error recovery stuff: -void tape_std_error_recovery(struct tape_device *); -void tape_std_error_recovery_has_failed(struct tape_device *,int error_id); -void tape_std_error_recovery_succeded(struct tape_device *); -void tape_std_error_recovery_do_retry(struct tape_device *); -void tape_std_error_recovery_read_opposite(struct tape_device *); -void tape_std_error_recovery_HWBUG(struct tape_device *, int condno); - /* S390 tape types */ enum s390_tape_type { tape_3480, diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h index f5c427ec24b1..853b6a8ca095 100644 --- a/drivers/s390/cio/device.h +++ b/drivers/s390/cio/device.h @@ -96,7 +96,6 @@ int ccw_device_online(struct ccw_device *); int ccw_device_offline(struct ccw_device *); void ccw_device_update_sense_data(struct ccw_device *); int ccw_device_test_sense_data(struct ccw_device *); -void ccw_device_schedule_sch_unregister(struct ccw_device *); int ccw_purge_blacklisted(void); void ccw_device_sched_todo(struct ccw_device *cdev, enum cdev_todo todo); struct ccw_device *get_ccwdev_by_dev_id(struct ccw_dev_id *dev_id); -- cgit v1.2.3 From d70e38cb1deef3b2acee4cd36d33fef4c98abf28 Mon Sep 17 00:00:00 2001 From: "Jason J. Herne" Date: Tue, 17 Mar 2020 09:23:41 -0400 Subject: s390: nvme dump support Add the nvme dump ipl type, associated data, and sysfs entries. This allows booting into a stand alone dump environment that resides on an nvme device. Signed-off-by: Jason J. Herne Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/ipl.h | 1 + arch/s390/kernel/ipl.c | 71 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 7d5cfdda5277..a72d195bf92d 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -66,6 +66,7 @@ enum ipl_type { IPL_TYPE_FCP_DUMP = 8, IPL_TYPE_NSS = 16, IPL_TYPE_NVME = 32, + IPL_TYPE_NVME_DUMP = 64, }; struct ipl_info diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 90a2a17239b0..c5f9d6f88d27 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -40,10 +40,12 @@ #define IPL_FCP_STR "fcp" #define IPL_FCP_DUMP_STR "fcp_dump" #define IPL_NVME_STR "nvme" +#define IPL_NVME_DUMP_STR "nvme_dump" #define IPL_NSS_STR "nss" #define DUMP_CCW_STR "ccw" #define DUMP_FCP_STR "fcp" +#define DUMP_NVME_STR "nvme" #define DUMP_NONE_STR "none" /* @@ -96,6 +98,8 @@ static char *ipl_type_str(enum ipl_type type) return IPL_NSS_STR; case IPL_TYPE_NVME: return IPL_NVME_STR; + case IPL_TYPE_NVME_DUMP: + return IPL_NVME_DUMP_STR; case IPL_TYPE_UNKNOWN: default: return IPL_UNKNOWN_STR; @@ -106,6 +110,7 @@ enum dump_type { DUMP_TYPE_NONE = 1, DUMP_TYPE_CCW = 2, DUMP_TYPE_FCP = 4, + DUMP_TYPE_NVME = 8, }; static char *dump_type_str(enum dump_type type) @@ -117,6 +122,8 @@ static char *dump_type_str(enum dump_type type) return DUMP_CCW_STR; case DUMP_TYPE_FCP: return DUMP_FCP_STR; + case DUMP_TYPE_NVME: + return DUMP_NVME_STR; default: return NULL; } @@ -144,6 +151,7 @@ static struct ipl_parameter_block *reipl_block_actual; static int dump_capabilities = DUMP_TYPE_NONE; static enum dump_type dump_type = DUMP_TYPE_NONE; static struct ipl_parameter_block *dump_block_fcp; +static struct ipl_parameter_block *dump_block_nvme; static struct ipl_parameter_block *dump_block_ccw; static struct sclp_ipl_info sclp_ipl_info; @@ -266,7 +274,10 @@ static __init enum ipl_type get_ipl_type(void) else return IPL_TYPE_FCP; case IPL_PBT_NVME: - return IPL_TYPE_NVME; + if (ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP) + return IPL_TYPE_NVME_DUMP; + else + return IPL_TYPE_NVME; } return IPL_TYPE_UNKNOWN; } @@ -324,6 +335,7 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj, case IPL_TYPE_FCP_DUMP: return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno); case IPL_TYPE_NVME: + case IPL_TYPE_NVME_DUMP: return sprintf(page, "%08ux\n", ipl_block.nvme.fid); default: return 0; @@ -531,6 +543,7 @@ static int __init ipl_init(void) rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group); break; case IPL_TYPE_NVME: + case IPL_TYPE_NVME_DUMP: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group); break; default: @@ -1109,6 +1122,7 @@ static void __reipl_run(void *unused) diag308(DIAG308_LOAD_CLEAR, NULL); break; case IPL_TYPE_FCP_DUMP: + case IPL_TYPE_NVME_DUMP: break; } disabled_wait(); @@ -1382,6 +1396,29 @@ static struct attribute_group dump_fcp_attr_group = { .attrs = dump_fcp_attrs, }; +/* NVME dump device attributes */ +DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n", + dump_block_nvme->nvme.fid); +DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n", + dump_block_nvme->nvme.nsid); +DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n", + dump_block_nvme->nvme.bootprog); +DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n", + dump_block_nvme->nvme.br_lba); + +static struct attribute *dump_nvme_attrs[] = { + &sys_dump_nvme_fid_attr.attr, + &sys_dump_nvme_nsid_attr.attr, + &sys_dump_nvme_bootprog_attr.attr, + &sys_dump_nvme_br_lba_attr.attr, + NULL, +}; + +static struct attribute_group dump_nvme_attr_group = { + .name = IPL_NVME_STR, + .attrs = dump_nvme_attrs, +}; + /* CCW dump device attributes */ DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ccw); @@ -1423,6 +1460,8 @@ static ssize_t dump_type_store(struct kobject *kobj, rc = dump_set_type(DUMP_TYPE_CCW); else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0) rc = dump_set_type(DUMP_TYPE_FCP); + else if (strncmp(buf, DUMP_NVME_STR, strlen(DUMP_NVME_STR)) == 0) + rc = dump_set_type(DUMP_TYPE_NVME); return (rc != 0) ? rc : len; } @@ -1450,6 +1489,9 @@ static void __dump_run(void *unused) case DUMP_TYPE_FCP: diag308_dump(dump_block_fcp); break; + case DUMP_TYPE_NVME: + diag308_dump(dump_block_nvme); + break; default: break; } @@ -1506,6 +1548,29 @@ static int __init dump_fcp_init(void) return 0; } +static int __init dump_nvme_init(void) +{ + int rc; + + if (!sclp_ipl_info.has_dump) + return 0; /* LDIPL DUMP is not installed */ + dump_block_nvme = (void *) get_zeroed_page(GFP_KERNEL); + if (!dump_block_nvme) + return -ENOMEM; + rc = sysfs_create_group(&dump_kset->kobj, &dump_nvme_attr_group); + if (rc) { + free_page((unsigned long)dump_block_nvme); + return rc; + } + dump_block_nvme->hdr.len = IPL_BP_NVME_LEN; + dump_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION; + dump_block_nvme->fcp.len = IPL_BP0_NVME_LEN; + dump_block_nvme->fcp.pbt = IPL_PBT_NVME; + dump_block_nvme->fcp.opt = IPL_PB0_NVME_OPT_DUMP; + dump_capabilities |= DUMP_TYPE_NVME; + return 0; +} + static int __init dump_init(void) { int rc; @@ -1522,6 +1587,9 @@ static int __init dump_init(void) if (rc) return rc; rc = dump_fcp_init(); + if (rc) + return rc; + rc = dump_nvme_init(); if (rc) return rc; dump_set_type(DUMP_TYPE_NONE); @@ -1956,6 +2024,7 @@ void __init setup_ipl(void) ipl_info.data.fcp.lun = ipl_block.fcp.lun; break; case IPL_TYPE_NVME: + case IPL_TYPE_NVME_DUMP: ipl_info.data.nvme.fid = ipl_block.nvme.fid; ipl_info.data.nvme.nsid = ipl_block.nvme.nsid; break; -- cgit v1.2.3 From d9f12e48d08ec08ace574050a838e001e442ee38 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Tue, 29 Sep 2020 20:23:17 +0200 Subject: s390/ipl: support NVMe IPL kernel parameters Enable extracting of extra kernel command-line parameters from the NVMe IPL block passed by the firmware to the kernel at boot. Signed-off-by: Alexander Egorenkov Reviewed-by: Vasily Gorbik Reviewed-by: Philipp Rudo Signed-off-by: Vasily Gorbik --- arch/s390/boot/ipl_parm.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 1add096e09a6..f26c34e6f1e6 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -70,30 +70,44 @@ static size_t scpdata_length(const u8 *buf, size_t count) static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size, const struct ipl_parameter_block *ipb) { - size_t count; - size_t i; + const __u8 *scp_data; + __u32 scp_data_len; int has_lowercase; + size_t count = 0; + size_t i; + + switch (ipb->pb0_hdr.pbt) { + case IPL_PBT_FCP: + scp_data_len = ipb->fcp.scp_data_len; + scp_data = ipb->fcp.scp_data; + break; + case IPL_PBT_NVME: + scp_data_len = ipb->nvme.scp_data_len; + scp_data = ipb->nvme.scp_data; + break; + default: + goto out; + } - count = min(size - 1, scpdata_length(ipb->fcp.scp_data, - ipb->fcp.scp_data_len)); + count = min(size - 1, scpdata_length(scp_data, scp_data_len)); if (!count) goto out; has_lowercase = 0; for (i = 0; i < count; i++) { - if (!isascii(ipb->fcp.scp_data[i])) { + if (!isascii(scp_data[i])) { count = 0; goto out; } - if (!has_lowercase && islower(ipb->fcp.scp_data[i])) + if (!has_lowercase && islower(scp_data[i])) has_lowercase = 1; } if (has_lowercase) - memcpy(dest, ipb->fcp.scp_data, count); + memcpy(dest, scp_data, count); else for (i = 0; i < count; i++) - dest[i] = tolower(ipb->fcp.scp_data[i]); + dest[i] = tolower(scp_data[i]); out: dest[count] = '\0'; return count; @@ -115,6 +129,7 @@ static void append_ipl_block_parm(void) parm, COMMAND_LINE_SIZE - len - 1, &ipl_block); break; case IPL_PBT_FCP: + case IPL_PBT_NVME: rc = ipl_block_get_ascii_scpdata( parm, COMMAND_LINE_SIZE - len - 1, &ipl_block); break; -- cgit v1.2.3 From bd37b36832f62bf42ab66da8744191d99252a6e3 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Tue, 29 Sep 2020 20:24:55 +0200 Subject: s390/nvme: support firmware-assisted dump to NVMe disks From the kernel perspective NVMe dump works exactly like zFCP dump. Therefore, adapt all places where code explicitly tests only for IPL of type FCP DUMP. And also set the memory end correctly in this case. Signed-off-by: Alexander Egorenkov Reviewed-by: Vasily Gorbik Reviewed-by: Philipp Rudo Signed-off-by: Vasily Gorbik --- arch/s390/boot/ipl_parm.c | 15 ++++++++++++--- arch/s390/include/asm/ipl.h | 6 ++++++ arch/s390/kernel/crash_dump.c | 16 ++++++++-------- arch/s390/kernel/setup.c | 4 ++-- arch/s390/kernel/smp.c | 12 ++++++------ drivers/s390/char/sclp_sdias.c | 2 +- drivers/s390/char/zcore.c | 17 ++++++++++++----- 7 files changed, 47 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index f26c34e6f1e6..f94b91d72620 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -280,14 +280,23 @@ void parse_boot_command_line(void) } } +static inline bool is_ipl_block_dump(void) +{ + if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP && + ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) + return true; + if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME && + ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP) + return true; + return false; +} + void setup_memory_end(void) { #ifdef CONFIG_CRASH_DUMP if (OLDMEM_BASE) { kaslr_enabled = 0; - } else if (ipl_block_valid && - ipl_block.pb0_hdr.pbt == IPL_PBT_FCP && - ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) { + } else if (ipl_block_valid && is_ipl_block_dump()) { kaslr_enabled = 0; if (!sclp_early_get_hsa_size(&memory_end) && memory_end) memory_end_set = 1; diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index a72d195bf92d..a9e2c7295b35 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -95,6 +95,12 @@ extern struct ipl_info ipl_info; extern void setup_ipl(void); extern void set_os_info_reipl_block(void); +static inline bool is_ipl_type_dump(void) +{ + return (ipl_info.type == IPL_TYPE_FCP_DUMP) || + (ipl_info.type == IPL_TYPE_NVME_DUMP); +} + struct ipl_report { struct ipl_parameter_block *ipib; struct list_head components; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index c42ce348103c..205b2e2648aa 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -141,7 +141,7 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count) while (count) { from = __pa(src); if (!OLDMEM_BASE && from < sclp.hsa_size) { - /* Copy from zfcpdump HSA area */ + /* Copy from zfcp/nvme dump HSA area */ len = min(count, sclp.hsa_size - from); rc = memcpy_hsa_kernel(dst, from, len); if (rc) @@ -184,7 +184,7 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count) while (count) { from = __pa(src); if (!OLDMEM_BASE && from < sclp.hsa_size) { - /* Copy from zfcpdump HSA area */ + /* Copy from zfcp/nvme dump HSA area */ len = min(count, sclp.hsa_size - from); rc = memcpy_hsa_user(dst, from, len); if (rc) @@ -258,7 +258,7 @@ static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma, } /* - * Remap "oldmem" for zfcpdump + * Remap "oldmem" for zfcp/nvme dump * * We only map available memory above HSA size. Memory below HSA size * is read on demand using the copy_oldmem_page() function. @@ -283,7 +283,7 @@ static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma, } /* - * Remap "oldmem" for kdump or zfcpdump + * Remap "oldmem" for kdump or zfcp/nvme dump */ int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot) @@ -632,11 +632,11 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) u32 alloc_size; u64 hdr_off; - /* If we are not in kdump or zfcpdump mode return */ - if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP) + /* If we are not in kdump or zfcp/nvme dump mode return */ + if (!OLDMEM_BASE && !is_ipl_type_dump()) return 0; - /* If we cannot get HSA size for zfcpdump return error */ - if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp.hsa_size) + /* If we cannot get HSA size for zfcp/nvme dump return error */ + if (is_ipl_type_dump() && !sclp.hsa_size) return -ENODEV; /* For kdump, exclude previous crashkernel memory */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c1b78aae270b..419a0604959f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -251,7 +251,7 @@ static void __init conmode_default(void) #ifdef CONFIG_CRASH_DUMP static void __init setup_zfcpdump(void) { - if (ipl_info.type != IPL_TYPE_FCP_DUMP) + if (!is_ipl_type_dump()) return; if (OLDMEM_BASE) return; @@ -1175,7 +1175,7 @@ void __init setup_arch(char **cmdline_p) if (IS_ENABLED(CONFIG_EXPOLINE)) nospec_init_branches(); - /* Setup zfcpdump support */ + /* Setup zfcp/nvme dump support */ setup_zfcpdump(); /* Add system specific data to the random pool */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 85700bd85f98..ebfe86d097f0 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -606,14 +606,14 @@ int smp_store_status(int cpu) /* * Collect CPU state of the previous, crashed system. * There are four cases: - * 1) standard zfcp dump - * condition: OLDMEM_BASE == NULL && ipl_info.type == IPL_TYPE_FCP_DUMP + * 1) standard zfcp/nvme dump + * condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true * The state for all CPUs except the boot CPU needs to be collected * with sigp stop-and-store-status. The boot CPU state is located in * the absolute lowcore of the memory stored in the HSA. The zcore code * will copy the boot CPU state from the HSA. - * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory) - * condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP + * 2) stand-alone kdump for SCSI/NVMe (zfcp/nvme dump with swapped memory) + * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == true * The state for all CPUs except the boot CPU needs to be collected * with sigp stop-and-store-status. The firmware or the boot-loader * stored the registers of the boot CPU in the absolute lowcore in the @@ -660,7 +660,7 @@ void __init smp_save_dump_cpus(void) unsigned long page; bool is_boot_cpu; - if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP)) + if (!(OLDMEM_BASE || is_ipl_type_dump())) /* No previous system present, normal boot. */ return; /* Allocate a page as dumping area for the store status sigps */ @@ -686,7 +686,7 @@ void __init smp_save_dump_cpus(void) /* Get the vector registers */ smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page); /* - * For a zfcp dump OLDMEM_BASE == NULL and the registers + * For a zfcp/nvme dump OLDMEM_BASE == NULL and the registers * of the boot CPU are stored in the HSA. To retrieve * these registers an SCLP request is required which is * done by drivers/s390/char/zcore.c:init_cpu_info() diff --git a/drivers/s390/char/sclp_sdias.c b/drivers/s390/char/sclp_sdias.c index be8cad61b4cf..215d4b4a5ff5 100644 --- a/drivers/s390/char/sclp_sdias.c +++ b/drivers/s390/char/sclp_sdias.c @@ -257,7 +257,7 @@ static int __init sclp_sdias_init_async(void) int __init sclp_sdias_init(void) { - if (ipl_info.type != IPL_TYPE_FCP_DUMP) + if (!is_ipl_type_dump()) return 0; sclp_sdias_sccb = (void *) __get_free_page(GFP_KERNEL | GFP_DMA); BUG_ON(!sclp_sdias_sccb); diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index d29f1b71618e..1515fdc3c1ab 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-1.0+ /* * zcore module to export memory content and register sets for creating system - * dumps on SCSI disks (zfcpdump). + * dumps on SCSI/NVMe disks (zfcp/nvme dump). * * For more information please refer to Documentation/s390/zfcpdump.rst * @@ -243,7 +243,7 @@ static int __init zcore_init(void) unsigned char arch; int rc; - if (ipl_info.type != IPL_TYPE_FCP_DUMP) + if (!is_ipl_type_dump()) return -ENODATA; if (OLDMEM_BASE) return -ENODATA; @@ -252,9 +252,16 @@ static int __init zcore_init(void) debug_register_view(zcore_dbf, &debug_sprintf_view); debug_set_level(zcore_dbf, 6); - TRACE("devno: %x\n", ipl_info.data.fcp.dev_id.devno); - TRACE("wwpn: %llx\n", (unsigned long long) ipl_info.data.fcp.wwpn); - TRACE("lun: %llx\n", (unsigned long long) ipl_info.data.fcp.lun); + if (ipl_info.type == IPL_TYPE_FCP_DUMP) { + TRACE("type: fcp\n"); + TRACE("devno: %x\n", ipl_info.data.fcp.dev_id.devno); + TRACE("wwpn: %llx\n", (unsigned long long) ipl_info.data.fcp.wwpn); + TRACE("lun: %llx\n", (unsigned long long) ipl_info.data.fcp.lun); + } else if (ipl_info.type == IPL_TYPE_NVME_DUMP) { + TRACE("type: nvme\n"); + TRACE("fid: %x\n", ipl_info.data.nvme.fid); + TRACE("nsid: %x\n", ipl_info.data.nvme.nsid); + } rc = sclp_sdias_init(); if (rc) -- cgit v1.2.3 From 5627b9224b00334e4c91122ebbb8536a9a575969 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 23 Jun 2020 19:10:08 +0200 Subject: s390/ipl: add support to control memory clearing for nvme re-IPL Re-IPL for nvme is currently done by using diag 308 with the "Load Clear" subcode, which means that all memory will be cleared. This can increase re-IPL duration considerably on very large machines. For list-directed IPL like nvme or fcp IPL, a "Load Normal" subcode was introduced with z14. The "Load Normal" diag 308 subcode allows to re-IPL without clearing memory. This patch adds a new "clear" sysfs attribute to /sys/firmware/reipl/nvme, which can be set to either "0" or "1" to disable or enable re-IPL with memory clearing. The default value is "0", which disables memory clearing. Signed-off-by: Gerald Schaefer Reviewed-by: Vasily Gorbik Tested-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ipl.c | 48 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index c5f9d6f88d27..98b3aca1de8e 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -156,6 +156,7 @@ static struct ipl_parameter_block *dump_block_ccw; static struct sclp_ipl_info sclp_ipl_info; +static bool reipl_nvme_clear; static bool reipl_fcp_clear; static bool reipl_ccw_clear; @@ -886,6 +887,24 @@ static struct attribute_group reipl_nvme_attr_group = { .bin_attrs = reipl_nvme_bin_attrs }; +static ssize_t reipl_nvme_clear_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%u\n", reipl_nvme_clear); +} + +static ssize_t reipl_nvme_clear_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + if (strtobool(buf, &reipl_nvme_clear) < 0) + return -EINVAL; + return len; +} + +static struct kobj_attribute sys_reipl_nvme_clear_attr = + __ATTR(clear, 0644, reipl_nvme_clear_show, reipl_nvme_clear_store); + /* CCW reipl device attributes */ DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw); @@ -1112,7 +1131,10 @@ static void __reipl_run(void *unused) break; case IPL_TYPE_NVME: diag308(DIAG308_SET, reipl_block_nvme); - diag308(DIAG308_LOAD_CLEAR, NULL); + if (reipl_nvme_clear) + diag308(DIAG308_LOAD_CLEAR, NULL); + else + diag308(DIAG308_LOAD_NORMAL, NULL); break; case IPL_TYPE_NSS: diag308(DIAG308_SET, reipl_block_nss); @@ -1233,8 +1255,9 @@ static int __init reipl_fcp_init(void) &sys_reipl_fcp_clear_attr.attr); if (rc) goto out2; - } else + } else { reipl_fcp_clear = true; + } if (ipl_info.type == IPL_TYPE_FCP) { memcpy(reipl_block_fcp, &ipl_block, sizeof(ipl_block)); @@ -1280,10 +1303,16 @@ static int __init reipl_nvme_init(void) } rc = sysfs_create_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group); - if (rc) { - kset_unregister(reipl_nvme_kset); - free_page((unsigned long) reipl_block_nvme); - return rc; + if (rc) + goto out1; + + if (test_facility(141)) { + rc = sysfs_create_file(&reipl_nvme_kset->kobj, + &sys_reipl_nvme_clear_attr.attr); + if (rc) + goto out2; + } else { + reipl_nvme_clear = true; } if (ipl_info.type == IPL_TYPE_NVME) { @@ -1304,6 +1333,13 @@ static int __init reipl_nvme_init(void) } reipl_capabilities |= IPL_TYPE_NVME; return 0; + +out2: + sysfs_remove_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group); +out1: + kset_unregister(reipl_nvme_kset); + free_page((unsigned long) reipl_block_nvme); + return rc; } static int __init reipl_type_init(void) -- cgit v1.2.3 From 21a66717079c3d74e8573cd48743728e4a244507 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 24 Sep 2020 01:01:29 +0200 Subject: s390/kasan: make sure int handler always run with DAT on Since commit 998f5bbe3dbd ("s390/kasan: fix early pgm check handler execution") early pgm check handler is executed with DAT on if Kasan is enabled. Still there is a window between setup_lowcore_dat_off() and setup_lowcore_dat_on() when int handlers could be executed with DAT off under Kasan. If this happens the kernel ends up in pgm check loop due to Kasan shadow memory access attempts. With Kasan enabled paging is initialized much earlier and DAT flag has to be on at all times instrumented code is executed. Make sure int handlers are set up to be called with DAT on right away in this case. Signed-off-by: Vasily Gorbik --- arch/s390/kernel/setup.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 419a0604959f..dc4d461095cc 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -372,8 +372,12 @@ void __init arch_call_rest_init(void) static void __init setup_lowcore_dat_off(void) { + unsigned long int_psw_mask = PSW_KERNEL_BITS; struct lowcore *lc; + if (IS_ENABLED(CONFIG_KASAN)) + int_psw_mask |= PSW_MASK_DAT; + /* * Setup lowcore for boot cpu */ @@ -385,15 +389,15 @@ static void __init setup_lowcore_dat_off(void) lc->restart_psw.mask = PSW_KERNEL_BITS; lc->restart_psw.addr = (unsigned long) restart_int_handler; - lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; + lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; lc->external_new_psw.addr = (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; + lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; lc->svc_new_psw.addr = (unsigned long) system_call; - lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; + lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; lc->program_new_psw.addr = (unsigned long) pgm_check_handler; lc->mcck_new_psw.mask = PSW_KERNEL_BITS; lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; + lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; lc->io_new_psw.addr = (unsigned long) io_int_handler; lc->clock_comparator = clock_comparator_max; lc->nodat_stack = ((unsigned long) &init_thread_union) -- cgit v1.2.3 From 100a980c174bed82e0178766809ac664e59ca037 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 27 Sep 2020 01:34:25 +0200 Subject: s390: remove orphaned extern variables declarations arch/s390/kernel/entry.h: suspend_zero_pages - only declaration left after commit 394216275c7d ("s390: remove broken hibernate / power management support") arch/s390/include/asm/setup.h: vmhalt_cmd - only declaration left after commit 99ca4e582d4a ("[S390] kernel: Shutdown Actions Interface") arch/s390/include/asm/setup.h: vmpoff_cmd - only declaration left after commit 99ca4e582d4a ("[S390] kernel: Shutdown Actions Interface") Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/setup.h | 3 --- arch/s390/kernel/entry.h | 1 - 2 files changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 396db1638417..bdb242a1544e 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -121,9 +121,6 @@ extern unsigned int console_mode; extern unsigned int console_devno; extern unsigned int console_irq; -extern char vmhalt_cmd[]; -extern char vmpoff_cmd[]; - #define CONSOLE_IS_UNDEFINED (console_mode == 0) #define CONSOLE_IS_SCLP (console_mode == 1) #define CONSOLE_IS_3215 (console_mode == 2) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 412a54e6aa81..6475a885cd60 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -9,7 +9,6 @@ #include extern void *restart_stack; -extern unsigned long suspend_zero_pages; void system_call(void); void pgm_check_handler(void); -- cgit v1.2.3 From 4ec95ed312c4ce877fb04084991754cb8cd33f01 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 1 Oct 2020 02:22:54 +0200 Subject: s390/startup: correct early pgm check info formatting Early sclp console messages are printed in line mode on z/VM and LPAR, but under kvm newlines matter. Add a missing newline between "kernel version" and "Kernel fault". Signed-off-by: Vasily Gorbik --- arch/s390/boot/pgm_check_info.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index d3ab20ec517e..a3c9862bcede 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -33,7 +33,8 @@ void print_pgm_check_info(void) char *p; add_str(buf, "Linux version "); - strlcat(buf, kernel_version, sizeof(buf)); + strlcat(buf, kernel_version, sizeof(buf) - 1); + strlcat(buf, "\n", sizeof(buf)); sclp_early_printk(buf); p = add_str(buf, "Kernel fault: interruption code "); -- cgit v1.2.3 From 0671cc1048744c9a6f1c896baa85966a5abc42a0 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 27 Jul 2020 14:34:57 +0200 Subject: s390/sclp: Add support for SCLP AP adapter config/deconfig Add support for AP bus adapter config and deconfig to the sclp core code. The code is statically build into the kernel when ZCRYPT is configured either as module or with static support. This is the base functionality for having configure/deconfigure support in the AP bus and card code. Another patch will exploit this soon. Signed-off-by: Harald Freudenberger Suggested-by: Pierre Morel Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/sclp.h | 2 ++ drivers/s390/char/Makefile | 2 ++ drivers/s390/char/sclp.h | 2 +- drivers/s390/char/sclp_ap.c | 63 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 drivers/s390/char/sclp_ap.c (limited to 'arch') diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 90f34c7e2752..a7bdd128d85b 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -128,6 +128,8 @@ int sclp_chp_deconfigure(struct chp_id chpid); int sclp_chp_read_info(struct sclp_chp_info *info); int sclp_pci_configure(u32 fid); int sclp_pci_deconfigure(u32 fid); +int sclp_ap_configure(u32 apid); +int sclp_ap_deconfigure(u32 apid); int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid); int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count); int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count); diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index 845e12ac5954..c6fdb81a068a 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -34,6 +34,8 @@ obj-$(CONFIG_SCLP_VT220_TTY) += sclp_vt220.o obj-$(CONFIG_PCI) += sclp_pci.o +obj-$(subst m,y,$(CONFIG_ZCRYPT)) += sclp_ap.o + obj-$(CONFIG_VMLOGRDR) += vmlogrdr.o obj-$(CONFIG_VMCP) += vmcp.o diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index ccc2d759c575..69d9cde9ff5a 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -229,7 +229,7 @@ static inline void sclp_fill_core_info(struct sclp_core_info *info, #define SCLP_HAS_CPU_INFO (sclp.facilities & 0x0800000000000000ULL) #define SCLP_HAS_CPU_RECONFIG (sclp.facilities & 0x0400000000000000ULL) #define SCLP_HAS_PCI_RECONFIG (sclp.facilities & 0x0000000040000000ULL) - +#define SCLP_HAS_AP_RECONFIG (sclp.facilities & 0x0000000100000000ULL) struct gds_subvector { u8 length; diff --git a/drivers/s390/char/sclp_ap.c b/drivers/s390/char/sclp_ap.c new file mode 100644 index 000000000000..0dd1ca712795 --- /dev/null +++ b/drivers/s390/char/sclp_ap.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * s390 crypto adapter related sclp functions. + * + * Copyright IBM Corp. 2020 + */ +#define KMSG_COMPONENT "sclp_cmd" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include "sclp.h" + +#define SCLP_CMDW_CONFIGURE_AP 0x001f0001 +#define SCLP_CMDW_DECONFIGURE_AP 0x001e0001 + +struct ap_cfg_sccb { + struct sccb_header header; +} __packed; + +static int do_ap_configure(sclp_cmdw_t cmd, u32 apid) +{ + struct ap_cfg_sccb *sccb; + int rc; + + if (!SCLP_HAS_AP_RECONFIG) + return -EOPNOTSUPP; + + sccb = (struct ap_cfg_sccb *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!sccb) + return -ENOMEM; + + sccb->header.length = PAGE_SIZE; + cmd |= (apid & 0xFF) << 8; + rc = sclp_sync_request(cmd, sccb); + if (rc) + goto out; + switch (sccb->header.response_code) { + case 0x0020: case 0x0120: case 0x0440: case 0x0450: + break; + default: + pr_warn("configure AP adapter %u failed: cmd=0x%08x response=0x%04x\n", + apid, cmd, sccb->header.response_code); + rc = -EIO; + break; + } +out: + free_page((unsigned long) sccb); + return rc; +} + +int sclp_ap_configure(u32 apid) +{ + return do_ap_configure(SCLP_CMDW_CONFIGURE_AP, apid); +} +EXPORT_SYMBOL(sclp_ap_configure); + +int sclp_ap_deconfigure(u32 apid) +{ + return do_ap_configure(SCLP_CMDW_DECONFIGURE_AP, apid); +} +EXPORT_SYMBOL(sclp_ap_deconfigure); -- cgit v1.2.3 From 4aa32ee3c058847ff935d1a65da309b67b65354a Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 2 Oct 2020 11:16:49 +0200 Subject: s390/lib: fix kernel doc for memcmp() s/count/n Signed-off-by: Julian Wiedmann Acked-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- arch/s390/lib/string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 0e30e6e43b0c..93b3209b94a2 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -333,7 +333,7 @@ EXPORT_SYMBOL(memchr); * memcmp - Compare two areas of memory * @s1: One area of memory * @s2: Another area of memory - * @count: The size of the area. + * @n: The size of the area. */ #ifdef __HAVE_ARCH_MEMCMP int memcmp(const void *s1, const void *s2, size_t n) -- cgit v1.2.3 From eefc69a09ca5b441ee136f9fb68ab5970cfc2d51 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 2 Oct 2020 11:18:19 +0200 Subject: s390/sie: fix typo in SIGP code description s/ait address/at address Signed-off-by: Julian Wiedmann Acked-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- arch/s390/include/uapi/asm/sie.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h index 6ca1e68d7103..ede318653c87 100644 --- a/arch/s390/include/uapi/asm/sie.h +++ b/arch/s390/include/uapi/asm/sie.h @@ -29,7 +29,7 @@ { 0x13, "SIGP conditional emergency signal" }, \ { 0x15, "SIGP sense running" }, \ { 0x16, "SIGP set multithreading"}, \ - { 0x17, "SIGP store additional status ait address"} + { 0x17, "SIGP store additional status at address"} #define icpt_prog_codes \ { 0x0001, "Prog Operation" }, \ -- cgit v1.2.3 From b61e1f3281c5a53f24f47849873463514f58c1b8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 18 Sep 2020 10:26:19 +0200 Subject: s390/kprobes: move insn_page to text segment Move the in-kernel kprobes insn page to text segment. Rationale: having that page in rw data segment is suboptimal, since as soon as a kprobe is set, this will split the 1:1 kernel mapping for a single page which get new permissions. Note: there is always at least one kprobe present for the kretprobe trampoline; so the mapping will always be split into smaller 4k mappings because of this. Moving the kprobes insn page into text segment makes sure that the page is mapped RO/X in any case, and avoids that the 1:1 mapping is split. The kprobe insn_page is defined as a dummy function which is filled with "br %r14" instructions. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/Makefile | 1 + arch/s390/kernel/entry.h | 2 ++ arch/s390/kernel/kprobes.c | 6 ++---- arch/s390/kernel/kprobes_insn_page.S | 22 ++++++++++++++++++++++ 4 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 arch/s390/kernel/kprobes_insn_page.S (limited to 'arch') diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index efca70970761..dd73b7f07423 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -57,6 +57,7 @@ obj-$(CONFIG_COMPAT) += $(compat-obj-y) obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_KPROBES) += kprobes_insn_page.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 6475a885cd60..0f7e4e9176e0 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -87,4 +87,6 @@ void set_fs_fixup(void); unsigned long stack_alloc(void); void stack_free(unsigned long stack); +extern char kprobes_insn_page[]; + #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index b34fa4eef742..6574774d404e 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -22,6 +22,7 @@ #include #include #include +#include "entry.h" DEFINE_PER_CPU(struct kprobe *, current_kprobe); DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -31,7 +32,6 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { }; DEFINE_INSN_CACHE_OPS(s390_insn); static int insn_page_in_use; -static char insn_page[PAGE_SIZE] __aligned(PAGE_SIZE); void *alloc_insn_page(void) { @@ -53,13 +53,11 @@ static void *alloc_s390_insn_page(void) { if (xchg(&insn_page_in_use, 1) == 1) return NULL; - __set_memory((unsigned long) &insn_page, 1, SET_MEMORY_RO | SET_MEMORY_X); - return &insn_page; + return &kprobes_insn_page; } static void free_s390_insn_page(void *page) { - __set_memory((unsigned long) page, 1, SET_MEMORY_RW | SET_MEMORY_NX); xchg(&insn_page_in_use, 0); } diff --git a/arch/s390/kernel/kprobes_insn_page.S b/arch/s390/kernel/kprobes_insn_page.S new file mode 100644 index 000000000000..f6cb022ef8c8 --- /dev/null +++ b/arch/s390/kernel/kprobes_insn_page.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include + +/* + * insn_page is a special 4k aligned dummy function for kprobes. + * It will contain all kprobed instructions that are out-of-line executed. + * The page must be within the kernel image to guarantee that the + * out-of-line instructions are within 2GB distance of their original + * location. Using a dummy function ensures that the insn_page is within + * the text section of the kernel and mapped read-only/executable from + * the beginning on, thus avoiding to split large mappings if the page + * would be in the data section instead. + */ + .section .kprobes.text, "ax" + .align 4096 +ENTRY(kprobes_insn_page) + .rept 2048 + .word 0x07fe + .endr +ENDPROC(kprobes_insn_page) + .previous -- cgit v1.2.3 From db5273975622af17e265d5d96b41427cde4c25b0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 8 Oct 2020 16:28:15 +0200 Subject: s390/uaccess: add default cases for __put_user_fn()/__get_user_fn() Add default cases for __put_user_fn()/__get_user_fn(). This doesn't fix anything since the functions are only called with sane values. However we get rid of smatch warnings: ./arch/s390/include/asm/uaccess.h:143 __get_user_fn() error: uninitialized symbol 'rc'. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/uaccess.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 23c85801cf04..bf47d93ff1c6 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -60,6 +60,9 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); #define INLINE_COPY_TO_USER #endif +int __put_user_bad(void) __attribute__((noreturn)); +int __get_user_bad(void) __attribute__((noreturn)); + #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES #define __put_get_user_asm(to, from, size, spec) \ @@ -109,6 +112,9 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon (unsigned long *)x, size, spec); break; + default: + __put_user_bad(); + break; } return rc; } @@ -139,6 +145,9 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign (unsigned long __user *)ptr, size, spec); break; + default: + __get_user_bad(); + break; } return rc; } @@ -190,8 +199,6 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s }) -int __put_user_bad(void) __attribute__((noreturn)); - #define __get_user(x, ptr) \ ({ \ int __gu_err = -EFAULT; \ @@ -238,8 +245,6 @@ int __put_user_bad(void) __attribute__((noreturn)); __get_user(x, ptr); \ }) -int __get_user_bad(void) __attribute__((noreturn)); - unsigned long __must_check raw_copy_in_user(void __user *to, const void __user *from, unsigned long n); -- cgit v1.2.3 From 10e5afb3d260f2d2521889d87ebdefb7fc3d4087 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 8 Oct 2020 16:43:17 +0200 Subject: s390/uaccess: fix indentation Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index bf47d93ff1c6..c868e7ee49b3 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -188,7 +188,7 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s default: \ __put_user_bad(); \ break; \ - } \ + } \ __builtin_expect(__pu_err, 0); \ }) -- cgit v1.2.3