summaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/Makefile13
-rw-r--r--arch/s390/boot/startup.c1
-rw-r--r--arch/s390/boot/uv.c1
-rw-r--r--arch/s390/include/asm/ap.h220
-rw-r--r--arch/s390/include/asm/cpu_mcf.h36
-rw-r--r--arch/s390/include/asm/ctl_reg.h2
-rw-r--r--arch/s390/include/asm/elf.h15
-rw-r--r--arch/s390/include/asm/entry-common.h1
-rw-r--r--arch/s390/include/asm/linkage.h2
-rw-r--r--arch/s390/include/asm/nmi.h6
-rw-r--r--arch/s390/include/asm/preempt.h16
-rw-r--r--arch/s390/include/asm/ptrace.h24
-rw-r--r--arch/s390/include/asm/setup.h2
-rw-r--r--arch/s390/include/asm/softirq_stack.h13
-rw-r--r--arch/s390/include/asm/stacktrace.h116
-rw-r--r--arch/s390/include/asm/uv.h8
-rw-r--r--arch/s390/include/asm/vdso.h25
-rw-r--r--arch/s390/include/asm/vdso/gettimeofday.h1
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kernel/asm-offsets.c6
-rw-r--r--arch/s390/kernel/compat_signal.c13
-rw-r--r--arch/s390/kernel/early.c4
-rw-r--r--arch/s390/kernel/entry.S141
-rw-r--r--arch/s390/kernel/irq.c28
-rw-r--r--arch/s390/kernel/kprobes.c9
-rw-r--r--arch/s390/kernel/machine_kexec.c3
-rw-r--r--arch/s390/kernel/nmi.c129
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c1026
-rw-r--r--arch/s390/kernel/perf_cpum_cf_common.c27
-rw-r--r--arch/s390/kernel/perf_cpum_cf_diag.c1148
-rw-r--r--arch/s390/kernel/process.c6
-rw-r--r--arch/s390/kernel/setup.c3
-rw-r--r--arch/s390/kernel/signal.c39
-rw-r--r--arch/s390/kernel/smp.c15
-rw-r--r--arch/s390/kernel/syscall.c34
-rw-r--r--arch/s390/kernel/traps.c14
-rw-r--r--arch/s390/kernel/uv.c10
-rw-r--r--arch/s390/kernel/vdso.c63
-rw-r--r--arch/s390/kernel/vdso32/.gitignore2
-rw-r--r--arch/s390/kernel/vdso32/Makefile75
-rwxr-xr-xarch/s390/kernel/vdso32/gen_vdso_offsets.sh15
-rw-r--r--arch/s390/kernel/vdso32/note.S13
-rw-r--r--arch/s390/kernel/vdso32/vdso32.lds.S141
-rw-r--r--arch/s390/kernel/vdso32/vdso32_wrapper.S15
-rw-r--r--arch/s390/kernel/vdso32/vdso_user_wrapper.S21
-rw-r--r--arch/s390/kernel/vdso64/Makefile8
-rwxr-xr-xarch/s390/kernel/vdso64/gen_vdso_offsets.sh15
-rw-r--r--arch/s390/kernel/vdso64/vdso64.lds.S5
-rw-r--r--arch/s390/kernel/vdso64/vdso_user_wrapper.S17
-rw-r--r--arch/s390/lib/string.c2
-rw-r--r--arch/s390/lib/test_unwind.c5
-rw-r--r--arch/s390/lib/uaccess.c2
-rw-r--r--arch/s390/mm/fault.c49
-rw-r--r--arch/s390/mm/maccess.c13
55 files changed, 1971 insertions, 1651 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 07b2328f29e0..a0e2130f0100 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -163,7 +163,6 @@ config S390
select HAVE_GCC_PLUGINS
select HAVE_GENERIC_VDSO
select HAVE_IOREMAP_PROT if PCI
- select HAVE_IRQ_EXIT_ON_IRQ_STACK
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZ4
@@ -438,6 +437,7 @@ config COMPAT
select COMPAT_OLD_SIGACTION
select HAVE_UID16
depends on MULTIUSER
+ depends on !CC_IS_CLANG
help
Select this option if you want to enable your system kernel to
handle system-calls from ELF binaries for 31 bit ESA. This option
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 098abe3a56f3..95c75e653e43 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -166,6 +166,19 @@ archheaders:
archprepare:
$(Q)$(MAKE) $(build)=$(syscalls) kapi
$(Q)$(MAKE) $(build)=$(tools) kapi
+ifeq ($(KBUILD_EXTMOD),)
+# We need to generate vdso-offsets.h before compiling certain files in kernel/.
+# In order to do that, we should use the archprepare target, but we can't since
+# asm-offsets.h is included in some files used to generate vdso-offsets.h, and
+# asm-offsets.h is built in prepare0, for which archprepare is a dependency.
+# Therefore we need to generate the header after prepare0 has been made, hence
+# this hack.
+prepare: vdso_prepare
+vdso_prepare: prepare0
+ $(Q)$(MAKE) $(build)=arch/s390/kernel/vdso64 include/generated/vdso64-offsets.h
+ $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
+ $(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h)
+endif
# Don't use tabs in echo arguments
define archhelp
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index bbe4df6c2f8b..d0cf21641e3a 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -23,6 +23,7 @@ unsigned long __bootdata_preserved(vmemmap_size);
unsigned long __bootdata_preserved(MODULES_VADDR);
unsigned long __bootdata_preserved(MODULES_END);
unsigned long __bootdata(ident_map_size);
+int __bootdata(is_full_image) = 1;
u64 __bootdata_preserved(stfle_fac_list[16]);
u64 __bootdata_preserved(alt_stfle_fac_list[16]);
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index 82b99b916243..f6b0c4f43c99 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -36,6 +36,7 @@ void uv_query_info(void)
uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE);
uv_info.max_num_sec_conf = uvcb.max_num_sec_conf;
uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id;
+ uv_info.uv_feature_indications = uvcb.uv_feature_indications;
}
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index 837d1699b109..3afbee21dc1f 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -53,18 +53,20 @@ struct ap_queue_status {
*/
static inline bool ap_instructions_available(void)
{
- register unsigned long reg0 asm ("0") = AP_MKQID(0, 0);
- register unsigned long reg1 asm ("1") = 0;
- register unsigned long reg2 asm ("2") = 0;
+ unsigned long reg0 = AP_MKQID(0, 0);
+ unsigned long reg1 = 0;
asm volatile(
- " .long 0xb2af0000\n" /* PQAP(TAPQ) */
- "0: la %0,1\n"
+ " lgr 0,%[reg0]\n" /* qid into gr0 */
+ " lghi 1,0\n" /* 0 into gr1 */
+ " lghi 2,0\n" /* 0 into gr2 */
+ " .long 0xb2af0000\n" /* PQAP(TAPQ) */
+ "0: la %[reg1],1\n" /* 1 into reg1 */
"1:\n"
EX_TABLE(0b, 1b)
- : "+d" (reg1), "+d" (reg2)
- : "d" (reg0)
- : "cc");
+ : [reg1] "+&d" (reg1)
+ : [reg0] "d" (reg0)
+ : "cc", "0", "1", "2");
return reg1 != 0;
}
@@ -77,14 +79,18 @@ static inline bool ap_instructions_available(void)
*/
static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info)
{
- register unsigned long reg0 asm ("0") = qid;
- register struct ap_queue_status reg1 asm ("1");
- register unsigned long reg2 asm ("2");
-
- asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */
- : "=d" (reg1), "=d" (reg2)
- : "d" (reg0)
- : "cc");
+ struct ap_queue_status reg1;
+ unsigned long reg2;
+
+ asm volatile(
+ " lgr 0,%[qid]\n" /* qid into gr0 */
+ " lghi 2,0\n" /* 0 into gr2 */
+ " .long 0xb2af0000\n" /* PQAP(TAPQ) */
+ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg2],2\n" /* gr2 into reg2 */
+ : [reg1] "=&d" (reg1), [reg2] "=&d" (reg2)
+ : [qid] "d" (qid)
+ : "cc", "0", "1", "2");
if (info)
*info = reg2;
return reg1;
@@ -115,14 +121,16 @@ static inline struct ap_queue_status ap_test_queue(ap_qid_t qid,
*/
static inline struct ap_queue_status ap_rapq(ap_qid_t qid)
{
- register unsigned long reg0 asm ("0") = qid | (1UL << 24);
- register struct ap_queue_status reg1 asm ("1");
+ unsigned long reg0 = qid | (1UL << 24); /* fc 1UL is RAPQ */
+ struct ap_queue_status reg1;
asm volatile(
- ".long 0xb2af0000" /* PQAP(RAPQ) */
- : "=d" (reg1)
- : "d" (reg0)
- : "cc");
+ " lgr 0,%[reg0]\n" /* qid arg into gr0 */
+ " .long 0xb2af0000\n" /* PQAP(RAPQ) */
+ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ : [reg1] "=&d" (reg1)
+ : [reg0] "d" (reg0)
+ : "cc", "0", "1");
return reg1;
}
@@ -134,14 +142,16 @@ static inline struct ap_queue_status ap_rapq(ap_qid_t qid)
*/
static inline struct ap_queue_status ap_zapq(ap_qid_t qid)
{
- register unsigned long reg0 asm ("0") = qid | (2UL << 24);
- register struct ap_queue_status reg1 asm ("1");
+ unsigned long reg0 = qid | (2UL << 24); /* fc 2UL is ZAPQ */
+ struct ap_queue_status reg1;
asm volatile(
- ".long 0xb2af0000" /* PQAP(ZAPQ) */
- : "=d" (reg1)
- : "d" (reg0)
- : "cc");
+ " lgr 0,%[reg0]\n" /* qid arg into gr0 */
+ " .long 0xb2af0000\n" /* PQAP(ZAPQ) */
+ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ : [reg1] "=&d" (reg1)
+ : [reg0] "d" (reg0)
+ : "cc", "0", "1");
return reg1;
}
@@ -172,18 +182,20 @@ struct ap_config_info {
*/
static inline int ap_qci(struct ap_config_info *config)
{
- register unsigned long reg0 asm ("0") = 4UL << 24;
- register unsigned long reg1 asm ("1") = -EOPNOTSUPP;
- register struct ap_config_info *reg2 asm ("2") = config;
+ unsigned long reg0 = 4UL << 24; /* fc 4UL is QCI */
+ unsigned long reg1 = -EOPNOTSUPP;
+ struct ap_config_info *reg2 = config;
asm volatile(
- ".long 0xb2af0000\n" /* PQAP(QCI) */
- "0: la %0,0\n"
+ " lgr 0,%[reg0]\n" /* QCI fc into gr0 */
+ " lgr 2,%[reg2]\n" /* ptr to config into gr2 */
+ " .long 0xb2af0000\n" /* PQAP(QCI) */
+ "0: la %[reg1],0\n" /* good case, QCI fc available */
"1:\n"
EX_TABLE(0b, 1b)
- : "+d" (reg1)
- : "d" (reg0), "d" (reg2)
- : "cc", "memory");
+ : [reg1] "+&d" (reg1)
+ : [reg0] "d" (reg0), [reg2] "d" (reg2)
+ : "cc", "memory", "0", "2");
return reg1;
}
@@ -220,21 +232,25 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
struct ap_qirq_ctrl qirqctrl,
void *ind)
{
- register unsigned long reg0 asm ("0") = qid | (3UL << 24);
- register union {
+ unsigned long reg0 = qid | (3UL << 24); /* fc 3UL is AQIC */
+ union {
unsigned long value;
struct ap_qirq_ctrl qirqctrl;
struct ap_queue_status status;
- } reg1 asm ("1");
- register void *reg2 asm ("2") = ind;
+ } reg1;
+ void *reg2 = ind;
reg1.qirqctrl = qirqctrl;
asm volatile(
- ".long 0xb2af0000" /* PQAP(AQIC) */
- : "+d" (reg1)
- : "d" (reg0), "d" (reg2)
- : "cc");
+ " lgr 0,%[reg0]\n" /* qid param into gr0 */
+ " lgr 1,%[reg1]\n" /* irq ctrl into gr1 */
+ " lgr 2,%[reg2]\n" /* ni addr into gr2 */
+ " .long 0xb2af0000\n" /* PQAP(AQIC) */
+ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ : [reg1] "+&d" (reg1)
+ : [reg0] "d" (reg0), [reg2] "d" (reg2)
+ : "cc", "0", "1", "2");
return reg1.status;
}
@@ -268,21 +284,24 @@ union ap_qact_ap_info {
static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit,
union ap_qact_ap_info *apinfo)
{
- register unsigned long reg0 asm ("0") = qid | (5UL << 24)
- | ((ifbit & 0x01) << 22);
- register union {
+ unsigned long reg0 = qid | (5UL << 24) | ((ifbit & 0x01) << 22);
+ union {
unsigned long value;
struct ap_queue_status status;
- } reg1 asm ("1");
- register unsigned long reg2 asm ("2");
+ } reg1;
+ unsigned long reg2;
reg1.value = apinfo->val;
asm volatile(
- ".long 0xb2af0000" /* PQAP(QACT) */
- : "+d" (reg1), "=d" (reg2)
- : "d" (reg0)
- : "cc");
+ " lgr 0,%[reg0]\n" /* qid param into gr0 */
+ " lgr 1,%[reg1]\n" /* qact in info into gr1 */
+ " .long 0xb2af0000\n" /* PQAP(QACT) */
+ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg2],2\n" /* qact out info into reg2 */
+ : [reg1] "+&d" (reg1), [reg2] "=&d" (reg2)
+ : [reg0] "d" (reg0)
+ : "cc", "0", "1", "2");
apinfo->val = reg2;
return reg1.status;
}
@@ -303,19 +322,24 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid,
unsigned long long psmid,
void *msg, size_t length)
{
- register unsigned long reg0 asm ("0") = qid | 0x40000000UL;
- register struct ap_queue_status reg1 asm ("1");
- register unsigned long reg2 asm ("2") = (unsigned long) msg;
- register unsigned long reg3 asm ("3") = (unsigned long) length;
- register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32);
- register unsigned long reg5 asm ("5") = psmid & 0xffffffff;
+ unsigned long reg0 = qid | 0x40000000UL; /* 0x4... is last msg part */
+ union register_pair nqap_r1, nqap_r2;
+ struct ap_queue_status reg1;
+
+ nqap_r1.even = (unsigned int)(psmid >> 32);
+ nqap_r1.odd = psmid & 0xffffffff;
+ nqap_r2.even = (unsigned long)msg;
+ nqap_r2.odd = (unsigned long)length;
asm volatile (
- "0: .long 0xb2ad0042\n" /* NQAP */
- " brc 2,0b"
- : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3)
- : "d" (reg4), "d" (reg5)
- : "cc", "memory");
+ " lgr 0,%[reg0]\n" /* qid param in gr0 */
+ "0: .insn rre,0xb2ad0000,%[nqap_r1],%[nqap_r2]\n"
+ " brc 2,0b\n" /* handle partial completion */
+ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1),
+ [nqap_r2] "+&d" (nqap_r2.pair)
+ : [nqap_r1] "d" (nqap_r1.pair)
+ : "cc", "memory", "0", "1");
return reg1;
}
@@ -325,6 +349,8 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid,
* @psmid: Pointer to program supplied message identifier
* @msg: The message text
* @length: The message length
+ * @reslength: Resitual length on return
+ * @resgr0: input: gr0 value (only used if != 0), output: resitual gr0 content
*
* Returns AP queue status structure.
* Condition code 1 on DQAP means the receive has taken place
@@ -336,27 +362,65 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid,
* Note that gpr2 is used by the DQAP instruction to keep track of
* any 'residual' length, in case the instruction gets interrupted.
* Hence it gets zeroed before the instruction.
+ * If the message does not fit into the buffer, this function will
+ * return with a truncated message and the reply in the firmware queue
+ * is not removed. This is indicated to the caller with an
+ * ap_queue_status response_code value of all bits on (0xFF) and (if
+ * the reslength ptr is given) the remaining length is stored in
+ * *reslength and (if the resgr0 ptr is given) the updated gr0 value
+ * for further processing of this msg entry is stored in *resgr0. The
+ * caller needs to detect this situation and should invoke ap_dqap
+ * with a valid resgr0 ptr and a value in there != 0 to indicate that
+ * *resgr0 is to be used instead of qid to further process this entry.
*/
static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
unsigned long long *psmid,
- void *msg, size_t length)
+ void *msg, size_t length,
+ size_t *reslength,
+ unsigned long *resgr0)
{
- register unsigned long reg0 asm("0") = qid | 0x80000000UL;
- register struct ap_queue_status reg1 asm ("1");
- register unsigned long reg2 asm("2") = 0UL;
- register unsigned long reg4 asm("4") = (unsigned long) msg;
- register unsigned long reg5 asm("5") = (unsigned long) length;
- register unsigned long reg6 asm("6") = 0UL;
- register unsigned long reg7 asm("7") = 0UL;
+ unsigned long reg0 = resgr0 && *resgr0 ? *resgr0 : qid | 0x80000000UL;
+ struct ap_queue_status reg1;
+ unsigned long reg2;
+ union register_pair rp1, rp2;
+ rp1.even = 0UL;
+ rp1.odd = 0UL;
+ rp2.even = (unsigned long)msg;
+ rp2.odd = (unsigned long)length;
asm volatile(
- "0: .long 0xb2ae0064\n" /* DQAP */
- " brc 6,0b\n"
- : "+d" (reg0), "=d" (reg1), "+d" (reg2),
- "+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7)
- : : "cc", "memory");
- *psmid = (((unsigned long long) reg6) << 32) + reg7;
+ " lgr 0,%[reg0]\n" /* qid param into gr0 */
+ " lghi 2,0\n" /* 0 into gr2 (res length) */
+ "0: ltgr %N[rp2],%N[rp2]\n" /* check buf len */
+ " jz 2f\n" /* go out if buf len is 0 */
+ "1: .insn rre,0xb2ae0000,%[rp1],%[rp2]\n"
+ " brc 6,0b\n" /* handle partial complete */
+ "2: lgr %[reg0],0\n" /* gr0 (qid + info) into reg0 */
+ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg2],2\n" /* gr2 (res length) into reg2 */
+ : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1), [reg2] "=&d" (reg2),
+ [rp1] "+&d" (rp1.pair), [rp2] "+&d" (rp2.pair)
+ :
+ : "cc", "memory", "0", "1", "2");
+
+ if (reslength)
+ *reslength = reg2;
+ if (reg2 != 0 && rp2.odd == 0) {
+ /*
+ * Partially complete, status in gr1 is not set.
+ * Signal the caller that this dqap is only partially received
+ * with a special status response code 0xFF and *resgr0 updated
+ */
+ reg1.response_code = 0xFF;
+ if (resgr0)
+ *resgr0 = reg0;
+ } else {
+ *psmid = (((unsigned long long)rp1.even) << 32) + rp1.odd;
+ if (resgr0)
+ *resgr0 = 0;
+ }
+
return reg1;
}
diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h
index 4dcefddb7751..ca0e0e5ddbc4 100644
--- a/arch/s390/include/asm/cpu_mcf.h
+++ b/arch/s390/include/asm/cpu_mcf.h
@@ -32,39 +32,22 @@ static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = {
[CPUMF_CTR_SET_MT_DIAG] = 0x20,
};
-static inline void ctr_set_enable(u64 *state, int ctr_set)
-{
- *state |= cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT;
-}
-static inline void ctr_set_disable(u64 *state, int ctr_set)
-{
- *state &= ~(cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT);
-}
-static inline void ctr_set_start(u64 *state, int ctr_set)
-{
- *state |= cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT;
-}
-static inline void ctr_set_stop(u64 *state, int ctr_set)
-{
- *state &= ~(cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT);
-}
-
-static inline void ctr_set_multiple_enable(u64 *state, u64 ctrsets)
+static inline void ctr_set_enable(u64 *state, u64 ctrsets)
{
*state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT;
}
-static inline void ctr_set_multiple_disable(u64 *state, u64 ctrsets)
+static inline void ctr_set_disable(u64 *state, u64 ctrsets)
{
*state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT);
}
-static inline void ctr_set_multiple_start(u64 *state, u64 ctrsets)
+static inline void ctr_set_start(u64 *state, u64 ctrsets)
{
*state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT;
}
-static inline void ctr_set_multiple_stop(u64 *state, u64 ctrsets)
+static inline void ctr_set_stop(u64 *state, u64 ctrsets)
{
*state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT);
}
@@ -92,8 +75,15 @@ struct cpu_cf_events {
struct cpumf_ctr_info info;
atomic_t ctr_set[CPUMF_CTR_SET_MAX];
atomic64_t alert;
- u64 state;
+ u64 state; /* For perf_event_open SVC */
+ u64 dev_state; /* For /dev/hwctr */
unsigned int flags;
+ size_t used; /* Bytes used in data */
+ size_t usedss; /* Bytes used in start/stop */
+ unsigned char start[PAGE_SIZE]; /* Counter set at event add */
+ unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */
+ unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */
+ unsigned int sets; /* # Counter set saved in memory */
};
DECLARE_PER_CPU(struct cpu_cf_events, cpu_cf_events);
@@ -124,4 +114,6 @@ static inline int stccm_avail(void)
size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
struct cpumf_ctr_info *info);
+int cfset_online_cpu(unsigned int cpu);
+int cfset_offline_cpu(unsigned int cpu);
#endif /* _ASM_S390_CPU_MCF_H */
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index ed5efbb531c4..adc0179fa34e 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -21,8 +21,6 @@
#define CR0_INTERRUPT_KEY_SUBMASK BIT(63 - 57)
#define CR0_MEASUREMENT_ALERT_SUBMASK BIT(63 - 58)
-#define CR2_GUARDED_STORAGE BIT(63 - 59)
-
#define CR14_UNUSED_32 BIT(63 - 32)
#define CR14_UNUSED_33 BIT(63 - 33)
#define CR14_CHANNEL_REPORT_SUBMASK BIT(63 - 35)
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 66d51ad090ab..bd00c94620d3 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -144,10 +144,6 @@ typedef s390_compat_regs compat_elf_gregset_t;
#include <linux/sched/mm.h> /* for task_struct */
#include <asm/mmu_context.h>
-#include <asm/vdso.h>
-
-extern unsigned int vdso_enabled;
-
/*
* This is used to ensure we don't load something for the wrong architecture.
*/
@@ -176,7 +172,7 @@ struct arch_elf_state {
!current->mm->context.alloc_pgste) { \
set_thread_flag(TIF_PGSTE); \
set_pt_regs_flag(task_pt_regs(current), \
- PIF_SYSCALL_RESTART); \
+ PIF_EXECVE_PGSTE_RESTART); \
_state->rc = -EAGAIN; \
} \
_state->rc; \
@@ -268,11 +264,10 @@ do { \
#define STACK_RND_MASK MMAP_RND_MASK
/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
-#define ARCH_DLINFO \
-do { \
- if (vdso_enabled) \
- NEW_AUX_ENT(AT_SYSINFO_EHDR, \
- (unsigned long)current->mm->context.vdso_base); \
+#define ARCH_DLINFO \
+do { \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, \
+ (unsigned long)current->mm->context.vdso_base); \
} while (0)
struct linux_binprm;
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index baa8005090c3..17aead80aadb 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -14,7 +14,6 @@
#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP)
void do_per_trap(struct pt_regs *regs);
-void do_syscall(struct pt_regs *regs);
#ifdef CONFIG_DEBUG_ENTRY
static __always_inline void arch_check_user_regs(struct pt_regs *regs)
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
index a0a7a2c72bd4..24e8fed150cf 100644
--- a/arch/s390/include/asm/linkage.h
+++ b/arch/s390/include/asm/linkage.h
@@ -5,7 +5,7 @@
#include <asm/asm-const.h>
#include <linux/stringify.h>
-#define __ALIGN .align 4, 0x07
+#define __ALIGN .align 16, 0x07
#define __ALIGN_STR __stringify(__ALIGN)
/*
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 20e51c9ff240..2db45d7e68aa 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -23,12 +23,16 @@
#define MCCK_CODE_SYSTEM_DAMAGE BIT(63)
#define MCCK_CODE_EXT_DAMAGE BIT(63 - 5)
#define MCCK_CODE_CP BIT(63 - 9)
-#define MCCK_CODE_CPU_TIMER_VALID BIT(63 - 46)
+#define MCCK_CODE_STG_ERROR BIT(63 - 16)
+#define MCCK_CODE_STG_KEY_ERROR BIT(63 - 18)
+#define MCCK_CODE_STG_DEGRAD BIT(63 - 19)
#define MCCK_CODE_PSW_MWP_VALID BIT(63 - 20)
#define MCCK_CODE_PSW_IA_VALID BIT(63 - 23)
+#define MCCK_CODE_STG_FAIL_ADDR BIT(63 - 24)
#define MCCK_CODE_CR_VALID BIT(63 - 29)
#define MCCK_CODE_GS_VALID BIT(63 - 36)
#define MCCK_CODE_FC_VALID BIT(63 - 43)
+#define MCCK_CODE_CPU_TIMER_VALID BIT(63 - 46)
#ifndef __ASSEMBLY__
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index 23ff51be7e29..d9d5350cc3ec 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -29,12 +29,6 @@ static inline void preempt_count_set(int pc)
old, new) != old);
}
-#define init_task_preempt_count(p) do { } while (0)
-
-#define init_idle_preempt_count(p, cpu) do { \
- S390_lowcore.preempt_count = PREEMPT_DISABLED; \
-} while (0)
-
static inline void set_preempt_need_resched(void)
{
__atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
@@ -88,12 +82,6 @@ static inline void preempt_count_set(int pc)
S390_lowcore.preempt_count = pc;
}
-#define init_task_preempt_count(p) do { } while (0)
-
-#define init_idle_preempt_count(p, cpu) do { \
- S390_lowcore.preempt_count = PREEMPT_DISABLED; \
-} while (0)
-
static inline void set_preempt_need_resched(void)
{
}
@@ -130,6 +118,10 @@ static inline bool should_resched(int preempt_offset)
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+#define init_task_preempt_count(p) do { } while (0)
+/* Deferred to CPU bringup time */
+#define init_idle_preempt_count(p, cpu) do { } while (0)
+
#ifdef CONFIG_PREEMPTION
extern void preempt_schedule(void);
#define __preempt_schedule() preempt_schedule()
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index c7850d649373..61b22aa990e7 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -11,15 +11,15 @@
#include <uapi/asm/ptrace.h>
#include <asm/tpi.h>
-#define PIF_SYSCALL 0 /* inside a system call */
-#define PIF_SYSCALL_RESTART 1 /* restart the current system call */
-#define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */
-#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */
+#define PIF_SYSCALL 0 /* inside a system call */
+#define PIF_EXECVE_PGSTE_RESTART 1 /* restart execve for PGSTE binaries */
+#define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */
+#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */
-#define _PIF_SYSCALL BIT(PIF_SYSCALL)
-#define _PIF_SYSCALL_RESTART BIT(PIF_SYSCALL_RESTART)
-#define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET)
-#define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT)
+#define _PIF_SYSCALL BIT(PIF_SYSCALL)
+#define _PIF_EXECVE_PGSTE_RESTART BIT(PIF_EXECVE_PGSTE_RESTART)
+#define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET)
+#define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT)
#ifndef __ASSEMBLY__
@@ -162,6 +162,14 @@ static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
return !!(regs->flags & (1UL << flag));
}
+static inline int test_and_clear_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+ int ret = test_pt_regs_flag(regs, flag);
+
+ clear_pt_regs_flag(regs, flag);
+ return ret;
+}
+
/*
* These are defined as per linux/ptrace.h, which see.
*/
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index a8b75da3c1b8..3a77aa96d092 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -159,6 +159,8 @@ static inline unsigned long kaslr_offset(void)
return __kaslr_offset;
}
+extern int is_full_image;
+
static inline u32 gen_lpswe(unsigned long addr)
{
BUILD_BUG_ON(addr > 0xfff);
diff --git a/arch/s390/include/asm/softirq_stack.h b/arch/s390/include/asm/softirq_stack.h
new file mode 100644
index 000000000000..fd17f25704bd
--- /dev/null
+++ b/arch/s390/include/asm/softirq_stack.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __ASM_S390_SOFTIRQ_STACK_H
+#define __ASM_S390_SOFTIRQ_STACK_H
+
+#include <asm/lowcore.h>
+#include <asm/stacktrace.h>
+
+static inline void do_softirq_own_stack(void)
+{
+ call_on_stack(0, S390_lowcore.async_stack, void, __do_softirq);
+}
+
+#endif /* __ASM_S390_SOFTIRQ_STACK_H */
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 76c6034428be..3d8a4b94c620 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -74,23 +74,6 @@ struct stack_frame {
((unsigned long)__builtin_frame_address(0) - \
offsetof(struct stack_frame, back_chain))
-#define CALL_ARGS_0() \
- register unsigned long r2 asm("2")
-#define CALL_ARGS_1(arg1) \
- register unsigned long r2 asm("2") = (unsigned long)(arg1)
-#define CALL_ARGS_2(arg1, arg2) \
- CALL_ARGS_1(arg1); \
- register unsigned long r3 asm("3") = (unsigned long)(arg2)
-#define CALL_ARGS_3(arg1, arg2, arg3) \
- CALL_ARGS_2(arg1, arg2); \
- register unsigned long r4 asm("4") = (unsigned long)(arg3)
-#define CALL_ARGS_4(arg1, arg2, arg3, arg4) \
- CALL_ARGS_3(arg1, arg2, arg3); \
- register unsigned long r4 asm("5") = (unsigned long)(arg4)
-#define CALL_ARGS_5(arg1, arg2, arg3, arg4, arg5) \
- CALL_ARGS_4(arg1, arg2, arg3, arg4); \
- register unsigned long r4 asm("6") = (unsigned long)(arg5)
-
/*
* To keep this simple mark register 2-6 as being changed (volatile)
* by the called function, even though register 6 is saved/nonvolatile.
@@ -109,34 +92,113 @@ struct stack_frame {
#define CALL_CLOBBER_1 CALL_CLOBBER_2, "3"
#define CALL_CLOBBER_0 CALL_CLOBBER_1
-#define CALL_ON_STACK(fn, stack, nr, args...) \
+#define CALL_LARGS_0(...) \
+ long dummy = 0
+#define CALL_LARGS_1(t1, a1) \
+ long arg1 = (long)(t1)(a1)
+#define CALL_LARGS_2(t1, a1, t2, a2) \
+ CALL_LARGS_1(t1, a1); \
+ long arg2 = (long)(t2)(a2)
+#define CALL_LARGS_3(t1, a1, t2, a2, t3, a3) \
+ CALL_LARGS_2(t1, a1, t2, a2); \
+ long arg3 = (long)(t3)(a3)
+#define CALL_LARGS_4(t1, a1, t2, a2, t3, a3, t4, a4) \
+ CALL_LARGS_3(t1, a1, t2, a2, t3, a3); \
+ long arg4 = (long)(t4)(a4)
+#define CALL_LARGS_5(t1, a1, t2, a2, t3, a3, t4, a4, t5, a5) \
+ CALL_LARGS_4(t1, a1, t2, a2, t3, a3, t4, a4); \
+ long arg5 = (long)(t5)(a5)
+
+#define CALL_REGS_0 \
+ register long r2 asm("2") = dummy
+#define CALL_REGS_1 \
+ register long r2 asm("2") = arg1
+#define CALL_REGS_2 \
+ CALL_REGS_1; \
+ register long r3 asm("3") = arg2
+#define CALL_REGS_3 \
+ CALL_REGS_2; \
+ register long r4 asm("4") = arg3
+#define CALL_REGS_4 \
+ CALL_REGS_3; \
+ register long r5 asm("5") = arg4
+#define CALL_REGS_5 \
+ CALL_REGS_4; \
+ register long r6 asm("6") = arg5
+
+#define CALL_TYPECHECK_0(...)
+#define CALL_TYPECHECK_1(t, a, ...) \
+ typecheck(t, a)
+#define CALL_TYPECHECK_2(t, a, ...) \
+ CALL_TYPECHECK_1(__VA_ARGS__); \
+ typecheck(t, a)
+#define CALL_TYPECHECK_3(t, a, ...) \
+ CALL_TYPECHECK_2(__VA_ARGS__); \
+ typecheck(t, a)
+#define CALL_TYPECHECK_4(t, a, ...) \
+ CALL_TYPECHECK_3(__VA_ARGS__); \
+ typecheck(t, a)
+#define CALL_TYPECHECK_5(t, a, ...) \
+ CALL_TYPECHECK_4(__VA_ARGS__); \
+ typecheck(t, a)
+
+#define CALL_PARM_0(...) void
+#define CALL_PARM_1(t, a, ...) t
+#define CALL_PARM_2(t, a, ...) t, CALL_PARM_1(__VA_ARGS__)
+#define CALL_PARM_3(t, a, ...) t, CALL_PARM_2(__VA_ARGS__)
+#define CALL_PARM_4(t, a, ...) t, CALL_PARM_3(__VA_ARGS__)
+#define CALL_PARM_5(t, a, ...) t, CALL_PARM_4(__VA_ARGS__)
+#define CALL_PARM_6(t, a, ...) t, CALL_PARM_5(__VA_ARGS__)
+
+/*
+ * Use call_on_stack() to call a function switching to a specified
+ * stack. Proper sign and zero extension of function arguments is
+ * done. Usage:
+ *
+ * rc = call_on_stack(nr, stack, rettype, fn, t1, a1, t2, a2, ...)
+ *
+ * - nr specifies the number of function arguments of fn.
+ * - stack specifies the stack to be used.
+ * - fn is the function to be called.
+ * - rettype is the return type of fn.
+ * - t1, a1, ... are pairs, where t1 must match the type of the first
+ * argument of fn, t2 the second, etc. a1 is the corresponding
+ * first function argument (not name), etc.
+ */
+#define call_on_stack(nr, stack, rettype, fn, ...) \
({ \
+ rettype (*__fn)(CALL_PARM_##nr(__VA_ARGS__)) = fn; \
unsigned long frame = current_frame_address(); \
- CALL_ARGS_##nr(args); \
+ unsigned long __stack = stack; \
unsigned long prev; \
+ CALL_LARGS_##nr(__VA_ARGS__); \
+ CALL_REGS_##nr; \
\
+ CALL_TYPECHECK_##nr(__VA_ARGS__); \
asm volatile( \
- " la %[_prev],0(15)\n" \
+ " lgr %[_prev],15\n" \
" lg 15,%[_stack]\n" \
" stg %[_frame],%[_bc](15)\n" \
" brasl 14,%[_fn]\n" \
- " la 15,0(%[_prev])\n" \
- : [_prev] "=&a" (prev), CALL_FMT_##nr \
- : [_stack] "R" (stack), \
+ " lgr 15,%[_prev]\n" \
+ : [_prev] "=&d" (prev), CALL_FMT_##nr \
+ : [_stack] "R" (__stack), \
[_bc] "i" (offsetof(struct stack_frame, back_chain)), \
[_frame] "d" (frame), \
- [_fn] "X" (fn) : CALL_CLOBBER_##nr); \
- r2; \
+ [_fn] "X" (__fn) : CALL_CLOBBER_##nr); \
+ (rettype)r2; \
})
-#define CALL_ON_STACK_NORETURN(fn, stack) \
+#define call_on_stack_noreturn(fn, stack) \
({ \
+ void (*__fn)(void) = fn; \
+ \
asm volatile( \
" la 15,0(%[_stack])\n" \
" xc %[_bc](8,15),%[_bc](15)\n" \
" brasl 14,%[_fn]\n" \
::[_bc] "i" (offsetof(struct stack_frame, back_chain)), \
- [_stack] "a" (stack), [_fn] "X" (fn)); \
+ [_stack] "a" (stack), [_fn] "X" (__fn)); \
BUG(); \
})
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index 7b98d4caee77..12c5f006c136 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -73,6 +73,10 @@ enum uv_cmds_inst {
BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
};
+enum uv_feat_ind {
+ BIT_UV_FEAT_MISC = 0,
+};
+
struct uv_cb_header {
u16 len;
u16 cmd; /* Command Code */
@@ -97,7 +101,8 @@ struct uv_cb_qui {
u64 max_guest_stor_addr;
u8 reserved88[158 - 136];
u16 max_guest_cpu_id;
- u8 reserveda0[200 - 160];
+ u64 uv_feature_indications;
+ u8 reserveda0[200 - 168];
} __packed __aligned(8);
/* Initialize Ultravisor */
@@ -274,6 +279,7 @@ struct uv_info {
unsigned long max_sec_stor_addr;
unsigned int max_num_sec_conf;
unsigned short max_guest_cpu_id;
+ unsigned long uv_feature_indications;
};
extern struct uv_info uv_info;
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index b45e3dddd2c2..53165aa7813a 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -4,18 +4,31 @@
#include <vdso/datapage.h>
-/* Default link address for the vDSO */
-#define VDSO64_LBASE 0
+#ifndef __ASSEMBLY__
-#define __VVAR_PAGES 2
+#include <generated/vdso64-offsets.h>
+#ifdef CONFIG_COMPAT
+#include <generated/vdso32-offsets.h>
+#endif
-#define VDSO_VERSION_STRING LINUX_2.6.29
-
-#ifndef __ASSEMBLY__
+#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name))
+#ifdef CONFIG_COMPAT
+#define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name))
+#else
+#define VDSO32_SYMBOL(tsk, name) (-1UL)
+#endif
extern struct vdso_data *vdso_data;
int vdso_getcpu_init(void);
#endif /* __ASSEMBLY__ */
+
+/* Default link address for the vDSO */
+#define VDSO_LBASE 0
+
+#define __VVAR_PAGES 2
+
+#define VDSO_VERSION_STRING LINUX_2.6.29
+
#endif /* __S390_VDSO_H__ */
diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h
index 383c53c3dddd..d6465b22ffe3 100644
--- a/arch/s390/include/asm/vdso/gettimeofday.h
+++ b/arch/s390/include/asm/vdso/gettimeofday.h
@@ -8,7 +8,6 @@
#include <asm/timex.h>
#include <asm/unistd.h>
-#include <asm/vdso.h>
#include <linux/compiler.h>
#define vdso_calc_delta __arch_vdso_calc_delta
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 68ca1834316f..4a44ba5a2d73 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -71,10 +71,10 @@ obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
-obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_diag.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
# vdso
obj-y += vdso64/
+obj-$(CONFIG_COMPAT) += vdso32/
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index f53605a3dfcd..77ff2130cb04 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -14,8 +14,6 @@
#include <linux/pgtable.h>
#include <asm/idle.h>
#include <asm/gmap.h>
-#include <asm/nmi.h>
-#include <asm/setup.h>
#include <asm/stacktrace.h>
int main(void)
@@ -108,7 +106,6 @@ int main(void)
OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock);
- OFFSET(__LC_CLOCK_COMPARATOR, lowcore, clock_comparator);
OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock);
OFFSET(__LC_CURRENT, lowcore, current_task);
OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
@@ -145,9 +142,6 @@ int main(void)
OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area);
OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb);
BLANK();
- /* extended machine check save area */
- OFFSET(__MCESA_GS_SAVE_AREA, mcesa, guarded_storage_save_area);
- BLANK();
/* gmap/sie offsets */
OFFSET(__GMAP_ASCE, gmap, asce);
OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c);
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 1d0e17ec93eb..cca142fbb516 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -28,6 +28,7 @@
#include <linux/uaccess.h>
#include <asm/lowcore.h>
#include <asm/switch_to.h>
+#include <asm/vdso.h>
#include "compat_linux.h"
#include "compat_ptrace.h"
#include "entry.h"
@@ -118,7 +119,6 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
- clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART);
return 0;
}
@@ -304,11 +304,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set,
restorer = (unsigned long __force)
ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
} else {
- /* Signal frames without vectors registers are short ! */
- __u16 __user *svc = (void __user *) frame + frame_size - 2;
- if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
- return -EFAULT;
- restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE;
+ restorer = VDSO32_SYMBOL(current, sigreturn);
}
/* Set up registers for signal handler */
@@ -371,10 +367,7 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
restorer = (unsigned long __force)
ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
} else {
- __u16 __user *svc = &frame->svc_insn;
- if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
- return -EFAULT;
- restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE;
+ restorer = VDSO32_SYMBOL(current, rt_sigreturn);
}
/* Create siginfo on the signal stack */
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index c2cf79d353cf..fb84e3fc1686 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -33,6 +33,8 @@
#include <asm/switch_to.h>
#include "entry.h"
+int __bootdata(is_full_image);
+
static void __init reset_tod_clock(void)
{
union tod_clock clk;
@@ -279,7 +281,7 @@ static void __init setup_boot_command_line(void)
static void __init check_image_bootable(void)
{
- if (!memcmp(EP_STRING, (void *)EP_OFFSET, strlen(EP_STRING)))
+ if (is_full_image)
return;
sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n");
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 3e8c6669373a..5a2f70cbd3a9 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -14,7 +14,6 @@
#include <asm/alternative-asm.h>
#include <asm/processor.h>
#include <asm/cache.h>
-#include <asm/ctl_reg.h>
#include <asm/dwarf.h>
#include <asm/errno.h>
#include <asm/ptrace.h>
@@ -129,6 +128,24 @@ _LPP_OFFSET = __LC_LPP
"jnz .+8; .long 0xb2e8d000", 82
.endm
+ /*
+ * The CHKSTG macro jumps to the provided label in case the
+ * machine check interruption code reports one of unrecoverable
+ * storage errors:
+ * - Storage error uncorrected
+ * - Storage key error uncorrected
+ * - Storage degradation with Failing-storage-address validity
+ */
+ .macro CHKSTG errlabel
+ TSTMSK __LC_MCCK_CODE,(MCCK_CODE_STG_ERROR|MCCK_CODE_STG_KEY_ERROR)
+ jnz \errlabel
+ TSTMSK __LC_MCCK_CODE,MCCK_CODE_STG_DEGRAD
+ jz oklabel\@
+ TSTMSK __LC_MCCK_CODE,MCCK_CODE_STG_FAIL_ADDR
+ jnz \errlabel
+oklabel\@:
+ .endm
+
#if IS_ENABLED(CONFIG_KVM)
/*
* The OUTSIDE macro jumps to the provided label in case the value
@@ -148,6 +165,13 @@ _LPP_OFFSET = __LC_LPP
clgr %r14,%r13
jhe \outside_label
.endm
+
+ .macro SIEEXIT
+ lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer
+ ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
+ larl %r9,sie_exit # skip forward to sie_exit
+ .endm
#endif
GEN_BR_THUNK %r14
@@ -235,7 +259,6 @@ ENTRY(sie64a)
# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
# Other instructions between sie64a and .Lsie_done should not cause program
# interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
-# See also .Lcleanup_sie
.Lrewind_pad6:
nopr 7
.Lrewind_pad4:
@@ -341,10 +364,7 @@ ENTRY(pgm_check_handler)
#if IS_ENABLED(CONFIG_KVM)
# cleanup critical section for program checks in sie64a
OUTSIDE %r9,.Lsie_gmap,.Lsie_done,1f
- lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
- ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
- larl %r9,sie_exit # skip forward to sie_exit
+ SIEEXIT
lghi %r10,_PIF_GUEST_FAULT
#endif
1: tmhh %r8,0x4000 # PER bit set in old PSW ?
@@ -410,7 +430,8 @@ ENTRY(\name)
jnz 1f
#if IS_ENABLED(CONFIG_KVM)
OUTSIDE %r9,.Lsie_gmap,.Lsie_done,0f
- brasl %r14,.Lcleanup_sie
+ BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+ SIEEXIT
#endif
0: CHECK_STACK __LC_SAVE_AREA_ASYNC
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
@@ -484,8 +505,6 @@ ENTRY(mcck_int_handler)
BPOFF
la %r1,4095 # validate r1
spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer
- sckc __LC_CLOCK_COMPARATOR # validate comparator
- lam %a0,%a15,__LC_AREGS_SAVE_AREA-4095(%r1) # validate acrs
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs
lg %r12,__LC_CURRENT
lmg %r8,%r9,__LC_MCK_OLD_PSW
@@ -496,41 +515,7 @@ ENTRY(mcck_int_handler)
la %r14,4095
lctlg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r14) # validate ctl regs
ptlb
- lg %r11,__LC_MCESAD-4095(%r14) # extended machine check save area
- nill %r11,0xfc00 # MCESA_ORIGIN_MASK
- TSTMSK __LC_CREGS_SAVE_AREA+16-4095(%r14),CR2_GUARDED_STORAGE
- jno 0f
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_GS_VALID
- jno 0f
- .insn rxy,0xe3000000004d,0,__MCESA_GS_SAVE_AREA(%r11) # LGSC
-0: l %r14,__LC_FP_CREG_SAVE_AREA-4095(%r14)
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_FC_VALID
- jo 0f
- sr %r14,%r14
-0: sfpc %r14
- TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
- jo 0f
- lghi %r14,__LC_FPREGS_SAVE_AREA
- ld %f0,0(%r14)
- ld %f1,8(%r14)
- ld %f2,16(%r14)
- ld %f3,24(%r14)
- ld %f4,32(%r14)
- ld %f5,40(%r14)
- ld %f6,48(%r14)
- ld %f7,56(%r14)
- ld %f8,64(%r14)
- ld %f9,72(%r14)
- ld %f10,80(%r14)
- ld %f11,88(%r14)
- ld %f12,96(%r14)
- ld %f13,104(%r14)
- ld %f14,112(%r14)
- ld %f15,120(%r14)
- j 1f
-0: VLM %v0,%v15,0,%r11
- VLM %v16,%v31,256,%r11
-1: lghi %r14,__LC_CPU_TIMER_SAVE_AREA
+ lghi %r14,__LC_CPU_TIMER_SAVE_AREA
mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
jo 3f
@@ -546,24 +531,29 @@ ENTRY(mcck_int_handler)
3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID
jno .Lmcck_panic
tmhh %r8,0x0001 # interrupting from user ?
- jnz 4f
+ jnz 6f
TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
jno .Lmcck_panic
-4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
- tmhh %r8,0x0001 # interrupting from user ?
- jnz .Lmcck_user
#if IS_ENABLED(CONFIG_KVM)
- OUTSIDE %r9,.Lsie_gmap,.Lsie_done,.Lmcck_stack
- OUTSIDE %r9,.Lsie_entry,.Lsie_skip,5f
+ OUTSIDE %r9,.Lsie_gmap,.Lsie_done,6f
+ OUTSIDE %r9,.Lsie_entry,.Lsie_skip,4f
oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
-5: brasl %r14,.Lcleanup_sie
-#endif
+ j 5f
+4: CHKSTG .Lmcck_panic
+5: larl %r14,.Lstosm_tmp
+ stosm 0(%r14),0x04 # turn dat on, keep irqs off
+ BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+ SIEEXIT
j .Lmcck_stack
-.Lmcck_user:
+#endif
+6: CHKSTG .Lmcck_panic
+ larl %r14,.Lstosm_tmp
+ stosm 0(%r14),0x04 # turn dat on, keep irqs off
+ tmhh %r8,0x0001 # interrupting from user ?
+ jz .Lmcck_stack
BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
.Lmcck_stack:
lg %r15,__LC_MCCK_STACK
-.Lmcck_skip:
la %r11,STACK_FRAME_OVERHEAD(%r15)
stctg %c1,%c1,__PT_CR1(%r11)
lctlg %c1,%c1,__LC_KERNEL_ASCE
@@ -605,8 +595,33 @@ ENTRY(mcck_int_handler)
b __LC_RETURN_MCCK_LPSWE
.Lmcck_panic:
- lg %r15,__LC_NODAT_STACK
- j .Lmcck_skip
+ /*
+ * Iterate over all possible CPU addresses in the range 0..0xffff
+ * and stop each CPU using signal processor. Use compare and swap
+ * to allow just one CPU-stopper and prevent concurrent CPUs from
+ * stopping each other while leaving the others running.
+ */
+ lhi %r5,0
+ lhi %r6,1
+ larl %r7,.Lstop_lock
+ cs %r5,%r6,0(%r7) # single CPU-stopper only
+ jnz 4f
+ larl %r7,.Lthis_cpu
+ stap 0(%r7) # this CPU address
+ lh %r4,0(%r7)
+ nilh %r4,0
+ lhi %r0,1
+ sll %r0,16 # CPU counter
+ lhi %r3,0 # next CPU address
+0: cr %r3,%r4
+ je 2f
+1: sigp %r1,%r3,SIGP_STOP # stop next CPU
+ brc SIGP_CC_BUSY,1b
+2: ahi %r3,1
+ brct %r0,0b
+3: sigp %r1,%r4,SIGP_STOP # stop this CPU
+ brc SIGP_CC_BUSY,3b
+4: j 4b
ENDPROC(mcck_int_handler)
#
@@ -657,15 +672,11 @@ ENTRY(stack_overflow)
ENDPROC(stack_overflow)
#endif
-#if IS_ENABLED(CONFIG_KVM)
-.Lcleanup_sie:
- BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
- lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer
- ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_KERNEL_ASCE
- larl %r9,sie_exit # skip forward to sie_exit
- BR_EX %r14,%r13
-#endif
+ .section .data, "aw"
+ .align 4
+.Lstop_lock: .long 0
+.Lthis_cpu: .short 0
+.Lstosm_tmp: .byte 0
.section .rodata, "a"
#define SYSCALL(esame,emu) .quad __s390x_ ## esame
.globl sys_call_table
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index c0df4060d28d..234d085257eb 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -110,15 +110,17 @@ static int on_async_stack(void)
{
unsigned long frame = current_frame_address();
- return !!!((S390_lowcore.async_stack - frame) >> (PAGE_SHIFT + THREAD_SIZE_ORDER));
+ return ((S390_lowcore.async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0;
}
static void do_irq_async(struct pt_regs *regs, int irq)
{
- if (on_async_stack())
+ if (on_async_stack()) {
do_IRQ(regs, irq);
- else
- CALL_ON_STACK(do_IRQ, S390_lowcore.async_stack, 2, regs, irq);
+ } else {
+ call_on_stack(2, S390_lowcore.async_stack, void, do_IRQ,
+ struct pt_regs *, regs, int, irq);
+ }
}
static int irq_pending(struct pt_regs *regs)
@@ -266,24 +268,6 @@ unsigned int arch_dynirq_lower_bound(unsigned int from)
}
/*
- * Switch to the asynchronous interrupt stack for softirq execution.
- */
-void do_softirq_own_stack(void)
-{
- unsigned long old, new;
-
- old = current_stack_pointer();
- /* Check against async. stack address range. */
- new = S390_lowcore.async_stack;
- if (((new - old) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)) != 0) {
- CALL_ON_STACK(__do_softirq, new, 0);
- } else {
- /* We are already on the async stack. */
- __do_softirq();
- }
-}
-
-/*
* ext_int_hash[index] is the list head for all external interrupts that hash
* to this index.
*/
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 528bb31815c3..52d056a5f89f 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -92,11 +92,6 @@ static void copy_instruction(struct kprobe *p)
}
NOKPROBE_SYMBOL(copy_instruction);
-static inline int is_kernel_addr(void *addr)
-{
- return addr < (void *)_end;
-}
-
static int s390_get_insn_slot(struct kprobe *p)
{
/*
@@ -105,7 +100,7 @@ static int s390_get_insn_slot(struct kprobe *p)
* field can be patched and executed within the insn slot.
*/
p->ainsn.insn = NULL;
- if (is_kernel_addr(p->addr))
+ if (is_kernel((unsigned long)p->addr))
p->ainsn.insn = get_s390_insn_slot();
else if (is_module_addr(p->addr))
p->ainsn.insn = get_insn_slot();
@@ -117,7 +112,7 @@ static void s390_free_insn_slot(struct kprobe *p)
{
if (!p->ainsn.insn)
return;
- if (is_kernel_addr(p->addr))
+ if (is_kernel((unsigned long)p->addr))
free_s390_insn_slot(p->ainsn.insn, 0);
else
free_insn_slot(p->ainsn.insn, 0);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index d91989c7bd6a..1005a6935fbe 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -132,7 +132,8 @@ static bool kdump_csum_valid(struct kimage *image)
int rc;
preempt_disable();
- rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image);
+ rc = call_on_stack(1, S390_lowcore.nodat_stack, unsigned long, do_start_kdump,
+ unsigned long, (unsigned long)image);
preempt_enable();
return rc == 0;
#else
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 11f8c296f60d..20f8e1868853 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -189,12 +189,16 @@ void noinstr s390_handle_mcck(void)
* returns 0 if all required registers are available
* returns 1 otherwise
*/
-static int notrace s390_check_registers(union mci mci, int umode)
+static int notrace s390_validate_registers(union mci mci, int umode)
{
+ struct mcesa *mcesa;
+ void *fpt_save_area;
union ctlreg2 cr2;
int kill_task;
+ u64 zero;
kill_task = 0;
+ zero = 0;
if (!mci.gr) {
/*
@@ -205,14 +209,6 @@ static int notrace s390_check_registers(union mci mci, int umode)
s390_handle_damage();
kill_task = 1;
}
- /* Check control registers */
- if (!mci.cr) {
- /*
- * Control registers have unknown contents.
- * Can't recover and therefore stopping machine.
- */
- s390_handle_damage();
- }
if (!mci.fp) {
/*
* Floating point registers can't be restored. If the
@@ -225,35 +221,89 @@ static int notrace s390_check_registers(union mci mci, int umode)
if (!test_cpu_flag(CIF_FPU))
kill_task = 1;
}
+ fpt_save_area = &S390_lowcore.floating_pt_save_area;
if (!mci.fc) {
/*
* Floating point control register can't be restored.
* If the kernel currently uses the floating pointer
* registers and needs the FPC register the system is
* stopped. If the process has its floating pointer
- * registers loaded it is terminated.
+ * registers loaded it is terminated. Otherwise the
+ * FPC is just validated.
*/
if (S390_lowcore.fpu_flags & KERNEL_FPC)
s390_handle_damage();
+ asm volatile(
+ " lfpc %0\n"
+ :
+ : "Q" (zero));
if (!test_cpu_flag(CIF_FPU))
kill_task = 1;
+ } else {
+ asm volatile(
+ " lfpc %0\n"
+ :
+ : "Q" (S390_lowcore.fpt_creg_save_area));
}
- if (MACHINE_HAS_VX) {
+ mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
+ if (!MACHINE_HAS_VX) {
+ /* Validate floating point registers */
+ asm volatile(
+ " ld 0,0(%0)\n"
+ " ld 1,8(%0)\n"
+ " ld 2,16(%0)\n"
+ " ld 3,24(%0)\n"
+ " ld 4,32(%0)\n"
+ " ld 5,40(%0)\n"
+ " ld 6,48(%0)\n"
+ " ld 7,56(%0)\n"
+ " ld 8,64(%0)\n"
+ " ld 9,72(%0)\n"
+ " ld 10,80(%0)\n"
+ " ld 11,88(%0)\n"
+ " ld 12,96(%0)\n"
+ " ld 13,104(%0)\n"
+ " ld 14,112(%0)\n"
+ " ld 15,120(%0)\n"
+ :
+ : "a" (fpt_save_area)
+ : "memory");
+ } else {
+ /* Validate vector registers */
+ union ctlreg0 cr0;
+
if (!mci.vr) {
/*
* Vector registers can't be restored. If the kernel
* currently uses vector registers the system is
* stopped. If the process has its vector registers
- * loaded it is terminated.
+ * loaded it is terminated. Otherwise just validate
+ * the registers.
*/
if (S390_lowcore.fpu_flags & KERNEL_VXR)
s390_handle_damage();
if (!test_cpu_flag(CIF_FPU))
kill_task = 1;
}
+ cr0.val = S390_lowcore.cregs_save_area[0];
+ cr0.afp = cr0.vx = 1;
+ __ctl_load(cr0.val, 0, 0);
+ asm volatile(
+ " la 1,%0\n"
+ " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
+ " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
+ :
+ : "Q" (*(struct vx_array *)mcesa->vector_save_area)
+ : "1");
+ __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
}
- /* Check if access registers are valid */
+ /* Validate access registers */
+ asm volatile(
+ " lam 0,15,0(%0)\n"
+ :
+ : "a" (&S390_lowcore.access_regs_save_area)
+ : "memory");
if (!mci.ar) {
/*
* Access registers have unknown contents.
@@ -261,7 +311,7 @@ static int notrace s390_check_registers(union mci mci, int umode)
*/
kill_task = 1;
}
- /* Check guarded storage registers */
+ /* Validate guarded storage registers */
cr2.val = S390_lowcore.cregs_save_area[2];
if (cr2.gse) {
if (!mci.gs) {
@@ -271,31 +321,26 @@ static int notrace s390_check_registers(union mci mci, int umode)
* It has to be terminated.
*/
kill_task = 1;
+ } else {
+ load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area);
}
}
- /* Check if old PSW is valid */
- if (!mci.wp) {
- /*
- * Can't tell if we come from user or kernel mode
- * -> stopping machine.
- */
- s390_handle_damage();
- }
- /* Check for invalid kernel instruction address */
- if (!mci.ia && !umode) {
- /*
- * The instruction address got lost while running
- * in the kernel -> stopping machine.
- */
- s390_handle_damage();
- }
+ /*
+ * The getcpu vdso syscall reads CPU number from the programmable
+ * field of the TOD clock. Disregard the TOD programmable register
+ * validity bit and load the CPU number into the TOD programmable
+ * field unconditionally.
+ */
+ set_tod_programmable_field(raw_smp_processor_id());
+ /* Validate clock comparator register */
+ set_clock_comparator(S390_lowcore.clock_comparator);
if (!mci.ms || !mci.pm || !mci.ia)
kill_task = 1;
return kill_task;
}
-NOKPROBE_SYMBOL(s390_check_registers);
+NOKPROBE_SYMBOL(s390_validate_registers);
/*
* Backup the guest's machine check info to its description block
@@ -353,11 +398,6 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
mci.val = S390_lowcore.mcck_interruption_code;
mcck = this_cpu_ptr(&cpu_mcck);
- if (mci.sd) {
- /* System damage -> stopping machine */
- s390_handle_damage();
- }
-
/*
* Reinject the instruction processing damages' machine checks
* including Delayed Access Exception into the guest
@@ -398,7 +438,7 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
s390_handle_damage();
}
}
- if (s390_check_registers(mci, user_mode(regs))) {
+ if (s390_validate_registers(mci, user_mode(regs))) {
/*
* Couldn't restore all register contents for the
* user space process -> mark task for termination.
@@ -428,21 +468,6 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
mcck_pending = 1;
}
- /*
- * Reinject storage related machine checks into the guest if they
- * happen when the guest is running.
- */
- if (!test_cpu_flag(CIF_MCCK_GUEST)) {
- if (mci.se)
- /* Storage error uncorrected */
- s390_handle_damage();
- if (mci.ke)
- /* Storage key-error uncorrected */
- s390_handle_damage();
- if (mci.ds && mci.fa)
- /* Storage degradation */
- s390_handle_damage();
- }
if (mci.cp) {
/* Channel report word pending */
mcck->channel_report = 1;
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 1b7a0525fbed..975a00c8c564 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -2,8 +2,9 @@
/*
* Performance event support for s390x - CPU-measurement Counter Facility
*
- * Copyright IBM Corp. 2012, 2019
+ * Copyright IBM Corp. 2012, 2021
* Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
+ * Thomas Richter <tmricht@linux.ibm.com>
*/
#define KMSG_COMPONENT "cpum_cf"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
@@ -14,7 +15,223 @@
#include <linux/notifier.h>
#include <linux/init.h>
#include <linux/export.h>
+#include <linux/miscdevice.h>
+
#include <asm/cpu_mcf.h>
+#include <asm/hwctrset.h>
+#include <asm/debug.h>
+
+static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */
+static debug_info_t *cf_dbg;
+
+#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */
+ /* interval in seconds */
+
+/* Counter sets are stored as data stream in a page sized memory buffer and
+ * exported to user space via raw data attached to the event sample data.
+ * Each counter set starts with an eight byte header consisting of:
+ * - a two byte eye catcher (0xfeef)
+ * - a one byte counter set number
+ * - a two byte counter set size (indicates the number of counters in this set)
+ * - a three byte reserved value (must be zero) to make the header the same
+ * size as a counter value.
+ * All counter values are eight byte in size.
+ *
+ * All counter sets are followed by a 64 byte trailer.
+ * The trailer consists of a:
+ * - flag field indicating valid fields when corresponding bit set
+ * - the counter facility first and second version number
+ * - the CPU speed if nonzero
+ * - the time stamp the counter sets have been collected
+ * - the time of day (TOD) base value
+ * - the machine type.
+ *
+ * The counter sets are saved when the process is prepared to be executed on a
+ * CPU and saved again when the process is going to be removed from a CPU.
+ * The difference of both counter sets are calculated and stored in the event
+ * sample data area.
+ */
+struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int set:16; /* 16-31 Counter set identifier */
+ unsigned int ctr:16; /* 32-47 Number of stored counters */
+ unsigned int res1:16; /* 48-63 Reserved */
+};
+
+struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */
+ /* 0 - 7 */
+ union {
+ struct {
+ unsigned int clock_base:1; /* TOD clock base set */
+ unsigned int speed:1; /* CPU speed set */
+ /* Measurement alerts */
+ unsigned int mtda:1; /* Loss of MT ctr. data alert */
+ unsigned int caca:1; /* Counter auth. change alert */
+ unsigned int lcda:1; /* Loss of counter data alert */
+ };
+ unsigned long flags; /* 0-63 All indicators */
+ };
+ /* 8 - 15 */
+ unsigned int cfvn:16; /* 64-79 Ctr First Version */
+ unsigned int csvn:16; /* 80-95 Ctr Second Version */
+ unsigned int cpu_speed:32; /* 96-127 CPU speed */
+ /* 16 - 23 */
+ unsigned long timestamp; /* 128-191 Timestamp (TOD) */
+ /* 24 - 55 */
+ union {
+ struct {
+ unsigned long progusage1;
+ unsigned long progusage2;
+ unsigned long progusage3;
+ unsigned long tod_base;
+ };
+ unsigned long progusage[4];
+ };
+ /* 56 - 63 */
+ unsigned int mach_type:16; /* Machine type */
+ unsigned int res1:16; /* Reserved */
+ unsigned int res2:32; /* Reserved */
+};
+
+/* Create the trailer data at the end of a page. */
+static void cfdiag_trailer(struct cf_trailer_entry *te)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpuid cpuid;
+
+ te->cfvn = cpuhw->info.cfvn; /* Counter version numbers */
+ te->csvn = cpuhw->info.csvn;
+
+ get_cpu_id(&cpuid); /* Machine type */
+ te->mach_type = cpuid.machine;
+ te->cpu_speed = cfdiag_cpu_speed;
+ if (te->cpu_speed)
+ te->speed = 1;
+ te->clock_base = 1; /* Save clock base */
+ te->tod_base = tod_clock_base.tod;
+ te->timestamp = get_tod_clock_fast();
+}
+
+/* Read a counter set. The counter set number determines the counter set and
+ * the CPUM-CF first and second version number determine the number of
+ * available counters in each counter set.
+ * Each counter set starts with header containing the counter set number and
+ * the number of eight byte counters.
+ *
+ * The functions returns the number of bytes occupied by this counter set
+ * including the header.
+ * If there is no counter in the counter set, this counter set is useless and
+ * zero is returned on this case.
+ *
+ * Note that the counter sets may not be enabled or active and the stcctm
+ * instruction might return error 3. Depending on error_ok value this is ok,
+ * for example when called from cpumf_pmu_start() call back function.
+ */
+static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
+ size_t room, bool error_ok)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ size_t ctrset_size, need = 0;
+ int rc = 3; /* Assume write failure */
+
+ ctrdata->def = CF_DIAG_CTRSET_DEF;
+ ctrdata->set = ctrset;
+ ctrdata->res1 = 0;
+ ctrset_size = cpum_cf_ctrset_size(ctrset, &cpuhw->info);
+
+ if (ctrset_size) { /* Save data */
+ need = ctrset_size * sizeof(u64) + sizeof(*ctrdata);
+ if (need <= room) {
+ rc = ctr_stcctm(ctrset, ctrset_size,
+ (u64 *)(ctrdata + 1));
+ }
+ if (rc != 3 || error_ok)
+ ctrdata->ctr = ctrset_size;
+ else
+ need = 0;
+ }
+
+ debug_sprintf_event(cf_dbg, 3,
+ "%s ctrset %d ctrset_size %zu cfvn %d csvn %d"
+ " need %zd rc %d\n", __func__, ctrset, ctrset_size,
+ cpuhw->info.cfvn, cpuhw->info.csvn, need, rc);
+ return need;
+}
+
+/* Read out all counter sets and save them in the provided data buffer.
+ * The last 64 byte host an artificial trailer entry.
+ */
+static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth,
+ bool error_ok)
+{
+ struct cf_trailer_entry *trailer;
+ size_t offset = 0, done;
+ int i;
+
+ memset(data, 0, sz);
+ sz -= sizeof(*trailer); /* Always room for trailer */
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+ struct cf_ctrset_entry *ctrdata = data + offset;
+
+ if (!(auth & cpumf_ctr_ctl[i]))
+ continue; /* Counter set not authorized */
+
+ done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok);
+ offset += done;
+ }
+ trailer = data + offset;
+ cfdiag_trailer(trailer);
+ return offset + sizeof(*trailer);
+}
+
+/* Calculate the difference for each counter in a counter set. */
+static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters)
+{
+ for (; --counters >= 0; ++pstart, ++pstop)
+ if (*pstop >= *pstart)
+ *pstop -= *pstart;
+ else
+ *pstop = *pstart - *pstop + 1;
+}
+
+/* Scan the counter sets and calculate the difference of each counter
+ * in each set. The result is the increment of each counter during the
+ * period the counter set has been activated.
+ *
+ * Return true on success.
+ */
+static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth)
+{
+ struct cf_trailer_entry *trailer_start, *trailer_stop;
+ struct cf_ctrset_entry *ctrstart, *ctrstop;
+ size_t offset = 0;
+
+ auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1;
+ do {
+ ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset);
+ ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset);
+
+ if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) {
+ pr_err_once("cpum_cf_diag counter set compare error "
+ "in set %i\n", ctrstart->set);
+ return 0;
+ }
+ auth &= ~cpumf_ctr_ctl[ctrstart->set];
+ if (ctrstart->def == CF_DIAG_CTRSET_DEF) {
+ cfdiag_diffctrset((u64 *)(ctrstart + 1),
+ (u64 *)(ctrstop + 1), ctrstart->ctr);
+ offset += ctrstart->ctr * sizeof(u64) +
+ sizeof(*ctrstart);
+ }
+ } while (ctrstart->def && auth);
+
+ /* Save time_stamp from start of event in stop's trailer */
+ trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset);
+ trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset);
+ trailer_stop->progusage[0] = trailer_start->timestamp;
+
+ return 1;
+}
static enum cpumf_ctr_set get_counter_set(u64 event)
{
@@ -34,7 +251,8 @@ static enum cpumf_ctr_set get_counter_set(u64 event)
return set;
}
-static int validate_ctr_version(const struct hw_perf_event *hwc)
+static int validate_ctr_version(const struct hw_perf_event *hwc,
+ enum cpumf_ctr_set set)
{
struct cpu_cf_events *cpuhw;
int err = 0;
@@ -43,7 +261,7 @@ static int validate_ctr_version(const struct hw_perf_event *hwc)
cpuhw = &get_cpu_var(cpu_cf_events);
/* check required version for counter sets */
- switch (hwc->config_base) {
+ switch (set) {
case CPUMF_CTR_SET_BASIC:
case CPUMF_CTR_SET_USER:
if (cpuhw->info.cfvn < 1)
@@ -86,6 +304,8 @@ static int validate_ctr_version(const struct hw_perf_event *hwc)
(cpuhw->info.act_ctl & mtdiag_ctl)))
err = -EOPNOTSUPP;
break;
+ case CPUMF_CTR_SET_MAX:
+ err = -EOPNOTSUPP;
}
put_cpu_var(cpu_cf_events);
@@ -95,7 +315,6 @@ static int validate_ctr_version(const struct hw_perf_event *hwc)
static int validate_ctr_auth(const struct hw_perf_event *hwc)
{
struct cpu_cf_events *cpuhw;
- u64 ctrs_state;
int err = 0;
cpuhw = &get_cpu_var(cpu_cf_events);
@@ -105,8 +324,7 @@ static int validate_ctr_auth(const struct hw_perf_event *hwc)
* return with -ENOENT in order to fall back to other
* PMUs that might suffice the event request.
*/
- ctrs_state = cpumf_ctr_ctl[hwc->config_base];
- if (!(ctrs_state & cpuhw->info.auth_ctl))
+ if (!(hwc->config_base & cpuhw->info.auth_ctl))
err = -ENOENT;
put_cpu_var(cpu_cf_events);
@@ -126,7 +344,7 @@ static void cpumf_pmu_enable(struct pmu *pmu)
if (cpuhw->flags & PMU_F_ENABLED)
return;
- err = lcctl(cpuhw->state);
+ err = lcctl(cpuhw->state | cpuhw->dev_state);
if (err) {
pr_err("Enabling the performance measuring unit "
"failed with rc=%x\n", err);
@@ -151,6 +369,7 @@ static void cpumf_pmu_disable(struct pmu *pmu)
return;
inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
+ inactive |= cpuhw->dev_state;
err = lcctl(inactive);
if (err) {
pr_err("Disabling the performance measuring unit "
@@ -199,6 +418,14 @@ static const int cpumf_generic_events_user[] = {
[PERF_COUNT_HW_BUS_CYCLES] = -1,
};
+static void cpumf_hw_inuse(void)
+{
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_inc_return(&num_events) == 1)
+ __kernel_cpumcf_begin();
+ mutex_unlock(&pmc_reserve_mutex);
+}
+
static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
{
struct perf_event_attr *attr = &event->attr;
@@ -258,11 +485,11 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
/*
* Use the hardware perf event structure to store the
* counter number in the 'config' member and the counter
- * set number in the 'config_base'. The counter set number
- * is then later used to enable/disable the counter(s).
+ * set number in the 'config_base' as bit mask.
+ * It is later used to enable/disable the counter(s).
*/
hwc->config = ev;
- hwc->config_base = set;
+ hwc->config_base = cpumf_ctr_ctl[set];
break;
case CPUMF_CTR_SET_MAX:
/* The counter could not be associated to a counter set */
@@ -270,22 +497,13 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
}
/* Initialize for using the CPU-measurement counter facility */
- if (!atomic_inc_not_zero(&num_events)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_read(&num_events) == 0 && __kernel_cpumcf_begin())
- err = -EBUSY;
- else
- atomic_inc(&num_events);
- mutex_unlock(&pmc_reserve_mutex);
- }
- if (err)
- return err;
+ cpumf_hw_inuse();
event->destroy = hw_perf_event_destroy;
/* Finally, validate version and authorization of the counter set */
err = validate_ctr_auth(hwc);
if (!err)
- err = validate_ctr_version(hwc);
+ err = validate_ctr_version(hwc, set);
return err;
}
@@ -361,6 +579,7 @@ static void cpumf_pmu_start(struct perf_event *event, int flags)
{
struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
struct hw_perf_event *hwc = &event->hw;
+ int i;
if (!(hwc->state & PERF_HES_STOPPED))
return;
@@ -376,29 +595,92 @@ static void cpumf_pmu_start(struct perf_event *event, int flags)
* needs to be synchronized. At this point, the counter set can be in
* the inactive or disabled state.
*/
- hw_perf_event_reset(event);
+ if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) {
+ cpuhw->usedss = cfdiag_getctr(cpuhw->start,
+ sizeof(cpuhw->start),
+ hwc->config_base, true);
+ } else {
+ hw_perf_event_reset(event);
+ }
- /* increment refcount for this counter set */
- atomic_inc(&cpuhw->ctr_set[hwc->config_base]);
+ /* Increment refcount for counter sets */
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
+ if ((hwc->config_base & cpumf_ctr_ctl[i]))
+ atomic_inc(&cpuhw->ctr_set[i]);
+}
+
+/* Create perf event sample with the counter sets as raw data. The sample
+ * is then pushed to the event subsystem and the function checks for
+ * possible event overflows. If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int cfdiag_push_sample(struct perf_event *event,
+ struct cpu_cf_events *cpuhw)
+{
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ int overflow;
+
+ /* Setup perf sample */
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ memset(&regs, 0, sizeof(regs));
+ memset(&raw, 0, sizeof(raw));
+
+ if (event->attr.sample_type & PERF_SAMPLE_CPU)
+ data.cpu_entry.cpu = event->cpu;
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ raw.frag.size = cpuhw->usedss;
+ raw.frag.data = cpuhw->stop;
+ raw.size = raw.frag.size;
+ data.raw = &raw;
+ }
+
+ overflow = perf_event_overflow(event, &data, &regs);
+ debug_sprintf_event(cf_dbg, 3,
+ "%s event %#llx sample_type %#llx raw %d ov %d\n",
+ __func__, event->hw.config,
+ event->attr.sample_type, raw.size, overflow);
+ if (overflow)
+ event->pmu->stop(event, 0);
+
+ perf_event_update_userpage(event);
+ return overflow;
}
static void cpumf_pmu_stop(struct perf_event *event, int flags)
{
struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
struct hw_perf_event *hwc = &event->hw;
+ int i;
if (!(hwc->state & PERF_HES_STOPPED)) {
/* Decrement reference count for this counter set and if this
* is the last used counter in the set, clear activation
* control and set the counter set state to inactive.
*/
- if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base]))
- ctr_set_stop(&cpuhw->state, hwc->config_base);
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+ if (!(hwc->config_base & cpumf_ctr_ctl[i]))
+ continue;
+ if (!atomic_dec_return(&cpuhw->ctr_set[i]))
+ ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]);
+ }
hwc->state |= PERF_HES_STOPPED;
}
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
- hw_perf_event_update(event);
+ if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) {
+ local64_inc(&event->count);
+ cpuhw->usedss = cfdiag_getctr(cpuhw->stop,
+ sizeof(cpuhw->stop),
+ event->hw.config_base,
+ false);
+ if (cfdiag_diffctr(cpuhw, event->hw.config_base))
+ cfdiag_push_sample(event, cpuhw);
+ } else
+ hw_perf_event_update(event);
hwc->state |= PERF_HES_UPTODATE;
}
}
@@ -419,6 +701,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags)
static void cpumf_pmu_del(struct perf_event *event, int flags)
{
struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ int i;
cpumf_pmu_stop(event, PERF_EF_UPDATE);
@@ -430,8 +713,9 @@ static void cpumf_pmu_del(struct perf_event *event, int flags)
* clear enable control and resets all counters in a set. Therefore,
* cpumf_pmu_start() always has to reenable a counter set.
*/
- if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base]))
- ctr_set_disable(&cpuhw->state, event->hw.config_base);
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
+ if (!atomic_read(&cpuhw->ctr_set[i]))
+ ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]);
}
/* Performance monitoring unit for s390x */
@@ -448,6 +732,7 @@ static struct pmu cpumf_pmu = {
.read = cpumf_pmu_read,
};
+static int cfset_init(void);
static int __init cpumf_pmu_init(void)
{
int rc;
@@ -455,10 +740,689 @@ static int __init cpumf_pmu_init(void)
if (!kernel_cpumcf_avail())
return -ENODEV;
+ /* Setup s390dbf facility */
+ cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128);
+ if (!cf_dbg) {
+ pr_err("Registration of s390dbf(cpum_cf) failed\n");
+ return -ENOMEM;
+ };
+ debug_register_view(cf_dbg, &debug_sprintf_view);
+
cpumf_pmu.attr_groups = cpumf_cf_event_group();
rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
- if (rc)
+ if (rc) {
+ debug_unregister_view(cf_dbg, &debug_sprintf_view);
+ debug_unregister(cf_dbg);
pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
+ } else if (stccm_avail()) { /* Setup counter set device */
+ cfset_init();
+ }
+ return rc;
+}
+
+/* Support for the CPU Measurement Facility counter set extraction using
+ * device /dev/hwctr. This allows user space programs to extract complete
+ * counter set via normal file operations.
+ */
+
+static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Excl. access */
+static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */
+struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */
+ unsigned int sets; /* Counter set bit mask */
+ atomic_t cpus_ack; /* # CPUs successfully executed func */
+};
+
+static struct cfset_request { /* CPUs and counter set bit mask */
+ unsigned long ctrset; /* Bit mask of counter set to read */
+ cpumask_t mask; /* CPU mask to read from */
+} cfset_request;
+
+static void cfset_ctrset_clear(void)
+{
+ cpumask_clear(&cfset_request.mask);
+ cfset_request.ctrset = 0;
+}
+
+/* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access
+ * path is currently used.
+ * The cpu_cf_events::dev_state is used to denote counter sets in use by this
+ * interface. It is always or'ed in. If this interface is not active, its
+ * value is zero and no additional counter sets will be included.
+ *
+ * The cpu_cf_events::state is used by the perf_event_open SVC and remains
+ * unchanged.
+ *
+ * perf_pmu_enable() and perf_pmu_enable() and its call backs
+ * cpumf_pmu_enable() and cpumf_pmu_disable() are called by the
+ * performance measurement subsystem to enable per process
+ * CPU Measurement counter facility.
+ * The XXX_enable() and XXX_disable functions are used to turn off
+ * x86 performance monitoring interrupt (PMI) during scheduling.
+ * s390 uses these calls to temporarily stop and resume the active CPU
+ * counters sets during scheduling.
+ *
+ * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr
+ * device access. The perf_event_open() SVC interface makes a lot of effort
+ * to only run the counters while the calling process is actively scheduled
+ * to run.
+ * When /dev/hwctr interface is also used at the same time, the counter sets
+ * will keep running, even when the process is scheduled off a CPU.
+ * However this is not a problem and does not lead to wrong counter values
+ * for the perf_event_open() SVC. The current counter value will be recorded
+ * during schedule-in. At schedule-out time the current counter value is
+ * extracted again and the delta is calculated and added to the event.
+ */
+/* Stop all counter sets via ioctl interface */
+static void cfset_ioctl_off(void *parm)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cfset_call_on_cpu_parm *p = parm;
+ int rc;
+
+ cpuhw->dev_state = 0;
+ for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
+ if ((p->sets & cpumf_ctr_ctl[rc]))
+ atomic_dec(&cpuhw->ctr_set[rc]);
+ rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */
+ if (rc)
+ pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n",
+ cpuhw->state, S390_HWCTR_DEVICE, rc);
+ cpuhw->flags &= ~PMU_F_IN_USE;
+ debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n",
+ __func__, rc, cpuhw->state, cpuhw->dev_state);
+}
+
+/* Start counter sets on particular CPU */
+static void cfset_ioctl_on(void *parm)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cfset_call_on_cpu_parm *p = parm;
+ int rc;
+
+ cpuhw->flags |= PMU_F_IN_USE;
+ ctr_set_enable(&cpuhw->dev_state, p->sets);
+ ctr_set_start(&cpuhw->dev_state, p->sets);
+ for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
+ if ((p->sets & cpumf_ctr_ctl[rc]))
+ atomic_inc(&cpuhw->ctr_set[rc]);
+ rc = lcctl(cpuhw->dev_state | cpuhw->state); /* Start counter sets */
+ if (!rc)
+ atomic_inc(&p->cpus_ack);
+ else
+ pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n",
+ cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc);
+ debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n",
+ __func__, rc, cpuhw->state, cpuhw->dev_state);
+}
+
+static void cfset_release_cpu(void *p)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ int rc;
+
+ debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n",
+ __func__, cpuhw->state, cpuhw->dev_state);
+ rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */
+ if (rc)
+ pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n",
+ cpuhw->state, S390_HWCTR_DEVICE, rc);
+ cpuhw->dev_state = 0;
+}
+
+/* Release function is also called when application gets terminated without
+ * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command.
+ */
+static int cfset_release(struct inode *inode, struct file *file)
+{
+ on_each_cpu(cfset_release_cpu, NULL, 1);
+ hw_perf_event_destroy(NULL);
+ cfset_ctrset_clear();
+ atomic_set(&cfset_opencnt, 0);
+ return 0;
+}
+
+static int cfset_open(struct inode *inode, struct file *file)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ /* Only one user space program can open /dev/hwctr */
+ if (atomic_xchg(&cfset_opencnt, 1))
+ return -EBUSY;
+
+ cpumf_hw_inuse();
+ file->private_data = NULL;
+ /* nonseekable_open() never fails */
+ return nonseekable_open(inode, file);
+}
+
+static int cfset_all_stop(void)
+{
+ struct cfset_call_on_cpu_parm p = {
+ .sets = cfset_request.ctrset,
+ };
+ cpumask_var_t mask;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+ cpumask_and(mask, &cfset_request.mask, cpu_online_mask);
+ on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1);
+ free_cpumask_var(mask);
+ return 0;
+}
+
+static int cfset_all_start(void)
+{
+ struct cfset_call_on_cpu_parm p = {
+ .sets = cfset_request.ctrset,
+ .cpus_ack = ATOMIC_INIT(0),
+ };
+ cpumask_var_t mask;
+ int rc = 0;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+ cpumask_and(mask, &cfset_request.mask, cpu_online_mask);
+ on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1);
+ if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) {
+ on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1);
+ rc = -EIO;
+ debug_sprintf_event(cf_dbg, 4, "%s CPUs missing", __func__);
+ }
+ free_cpumask_var(mask);
+ return rc;
+}
+
+
+/* Return the maximum required space for all possible CPUs in case one
+ * CPU will be onlined during the START, READ, STOP cycles.
+ * To find out the size of the counter sets, any one CPU will do. They
+ * all have the same counter sets.
+ */
+static size_t cfset_needspace(unsigned int sets)
+{
+ struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events);
+ size_t bytes = 0;
+ int i;
+
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+ if (!(sets & cpumf_ctr_ctl[i]))
+ continue;
+ bytes += cpum_cf_ctrset_size(i, &cpuhw->info) * sizeof(u64) +
+ sizeof(((struct s390_ctrset_setdata *)0)->set) +
+ sizeof(((struct s390_ctrset_setdata *)0)->no_cnts);
+ }
+ bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids *
+ (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) +
+ sizeof(((struct s390_ctrset_cpudata *)0)->no_sets));
+ put_cpu_ptr(&cpu_cf_events);
+ return bytes;
+}
+
+static int cfset_all_copy(unsigned long arg, cpumask_t *mask)
+{
+ struct s390_ctrset_read __user *ctrset_read;
+ unsigned int cpu, cpus, rc;
+ void __user *uptr;
+
+ ctrset_read = (struct s390_ctrset_read __user *)arg;
+ uptr = ctrset_read->data;
+ for_each_cpu(cpu, mask) {
+ struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu);
+ struct s390_ctrset_cpudata __user *ctrset_cpudata;
+
+ ctrset_cpudata = uptr;
+ rc = put_user(cpu, &ctrset_cpudata->cpu_nr);
+ rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets);
+ rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data,
+ cpuhw->used);
+ if (rc)
+ return -EFAULT;
+ uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used;
+ cond_resched();
+ }
+ cpus = cpumask_weight(mask);
+ if (put_user(cpus, &ctrset_read->no_cpus))
+ return -EFAULT;
+ debug_sprintf_event(cf_dbg, 4, "%s copied %ld\n", __func__,
+ uptr - (void __user *)ctrset_read->data);
+ return 0;
+}
+
+static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
+ int ctrset_size, size_t room)
+{
+ size_t need = 0;
+ int rc = -1;
+
+ need = sizeof(*p) + sizeof(u64) * ctrset_size;
+ if (need <= room) {
+ p->set = cpumf_ctr_ctl[ctrset];
+ p->no_cnts = ctrset_size;
+ rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv);
+ if (rc == 3) /* Nothing stored */
+ need = 0;
+ }
+ return need;
+}
+
+/* Read all counter sets. */
+static void cfset_cpu_read(void *parm)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cfset_call_on_cpu_parm *p = parm;
+ int set, set_size;
+ size_t space;
+
+ /* No data saved yet */
+ cpuhw->used = 0;
+ cpuhw->sets = 0;
+ memset(cpuhw->data, 0, sizeof(cpuhw->data));
+
+ /* Scan the counter sets */
+ for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) {
+ struct s390_ctrset_setdata *sp = (void *)cpuhw->data +
+ cpuhw->used;
+
+ if (!(p->sets & cpumf_ctr_ctl[set]))
+ continue; /* Counter set not in list */
+ set_size = cpum_cf_ctrset_size(set, &cpuhw->info);
+ space = sizeof(cpuhw->data) - cpuhw->used;
+ space = cfset_cpuset_read(sp, set, set_size, space);
+ if (space) {
+ cpuhw->used += space;
+ cpuhw->sets += 1;
+ }
+ }
+ debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__,
+ cpuhw->sets, cpuhw->used);
+}
+
+static int cfset_all_read(unsigned long arg)
+{
+ struct cfset_call_on_cpu_parm p;
+ cpumask_var_t mask;
+ int rc;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ p.sets = cfset_request.ctrset;
+ cpumask_and(mask, &cfset_request.mask, cpu_online_mask);
+ on_each_cpu_mask(mask, cfset_cpu_read, &p, 1);
+ rc = cfset_all_copy(arg, mask);
+ free_cpumask_var(mask);
return rc;
}
-subsys_initcall(cpumf_pmu_init);
+
+static long cfset_ioctl_read(unsigned long arg)
+{
+ struct s390_ctrset_read read;
+ int ret = 0;
+
+ if (copy_from_user(&read, (char __user *)arg, sizeof(read)))
+ return -EFAULT;
+ ret = cfset_all_read(arg);
+ return ret;
+}
+
+static long cfset_ioctl_stop(void)
+{
+ int ret = ENXIO;
+
+ if (cfset_request.ctrset) {
+ ret = cfset_all_stop();
+ cfset_ctrset_clear();
+ }
+ return ret;
+}
+
+static long cfset_ioctl_start(unsigned long arg)
+{
+ struct s390_ctrset_start __user *ustart;
+ struct s390_ctrset_start start;
+ void __user *umask;
+ unsigned int len;
+ int ret = 0;
+ size_t need;
+
+ if (cfset_request.ctrset)
+ return -EBUSY;
+ ustart = (struct s390_ctrset_start __user *)arg;
+ if (copy_from_user(&start, ustart, sizeof(start)))
+ return -EFAULT;
+ if (start.version != S390_HWCTR_START_VERSION)
+ return -EINVAL;
+ if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] |
+ cpumf_ctr_ctl[CPUMF_CTR_SET_USER] |
+ cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] |
+ cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] |
+ cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]))
+ return -EINVAL; /* Invalid counter set */
+ if (!start.counter_sets)
+ return -EINVAL; /* No counter set at all? */
+ cpumask_clear(&cfset_request.mask);
+ len = min_t(u64, start.cpumask_len, cpumask_size());
+ umask = (void __user *)start.cpumask;
+ if (copy_from_user(&cfset_request.mask, umask, len))
+ return -EFAULT;
+ if (cpumask_empty(&cfset_request.mask))
+ return -EINVAL;
+ need = cfset_needspace(start.counter_sets);
+ if (put_user(need, &ustart->data_bytes))
+ ret = -EFAULT;
+ if (ret)
+ goto out;
+ cfset_request.ctrset = start.counter_sets;
+ ret = cfset_all_start();
+out:
+ if (ret)
+ cfset_ctrset_clear();
+ debug_sprintf_event(cf_dbg, 4, "%s sets %#lx need %ld ret %d\n",
+ __func__, cfset_request.ctrset, need, ret);
+ return ret;
+}
+
+/* Entry point to the /dev/hwctr device interface.
+ * The ioctl system call supports three subcommands:
+ * S390_HWCTR_START: Start the specified counter sets on a CPU list. The
+ * counter set keeps running until explicitly stopped. Returns the number
+ * of bytes needed to store the counter values. If another S390_HWCTR_START
+ * ioctl subcommand is called without a previous S390_HWCTR_STOP stop
+ * command, -EBUSY is returned.
+ * S390_HWCTR_READ: Read the counter set values from specified CPU list given
+ * with the S390_HWCTR_START command.
+ * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the
+ * previous S390_HWCTR_START subcommand.
+ */
+static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ int ret;
+
+ get_online_cpus();
+ mutex_lock(&cfset_ctrset_mutex);
+ switch (cmd) {
+ case S390_HWCTR_START:
+ ret = cfset_ioctl_start(arg);
+ break;
+ case S390_HWCTR_STOP:
+ ret = cfset_ioctl_stop();
+ break;
+ case S390_HWCTR_READ:
+ ret = cfset_ioctl_read(arg);
+ break;
+ default:
+ ret = -ENOTTY;
+ break;
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
+ put_online_cpus();
+ return ret;
+}
+
+static const struct file_operations cfset_fops = {
+ .owner = THIS_MODULE,
+ .open = cfset_open,
+ .release = cfset_release,
+ .unlocked_ioctl = cfset_ioctl,
+ .compat_ioctl = cfset_ioctl,
+ .llseek = no_llseek
+};
+
+static struct miscdevice cfset_dev = {
+ .name = S390_HWCTR_DEVICE,
+ .minor = MISC_DYNAMIC_MINOR,
+ .fops = &cfset_fops,
+};
+
+int cfset_online_cpu(unsigned int cpu)
+{
+ struct cfset_call_on_cpu_parm p;
+
+ mutex_lock(&cfset_ctrset_mutex);
+ if (cfset_request.ctrset) {
+ p.sets = cfset_request.ctrset;
+ cfset_ioctl_on(&p);
+ cpumask_set_cpu(cpu, &cfset_request.mask);
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
+ return 0;
+}
+
+int cfset_offline_cpu(unsigned int cpu)
+{
+ struct cfset_call_on_cpu_parm p;
+
+ mutex_lock(&cfset_ctrset_mutex);
+ if (cfset_request.ctrset) {
+ p.sets = cfset_request.ctrset;
+ cfset_ioctl_off(&p);
+ cpumask_clear_cpu(cpu, &cfset_request.mask);
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
+ return 0;
+}
+
+static void cfdiag_read(struct perf_event *event)
+{
+ debug_sprintf_event(cf_dbg, 3, "%s event %#llx count %ld\n", __func__,
+ event->attr.config, local64_read(&event->count));
+}
+
+static int get_authctrsets(void)
+{
+ struct cpu_cf_events *cpuhw;
+ unsigned long auth = 0;
+ enum cpumf_ctr_set i;
+
+ cpuhw = &get_cpu_var(cpu_cf_events);
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+ if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i])
+ auth |= cpumf_ctr_ctl[i];
+ }
+ put_cpu_var(cpu_cf_events);
+ return auth;
+}
+
+/* Setup the event. Test for authorized counter sets and only include counter
+ * sets which are authorized at the time of the setup. Including unauthorized
+ * counter sets result in specification exception (and panic).
+ */
+static int cfdiag_event_init2(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ int err = 0;
+
+ /* Set sample_period to indicate sampling */
+ event->hw.config = attr->config;
+ event->hw.sample_period = attr->sample_period;
+ local64_set(&event->hw.period_left, event->hw.sample_period);
+ local64_set(&event->count, 0);
+ event->hw.last_period = event->hw.sample_period;
+
+ /* Add all authorized counter sets to config_base. The
+ * the hardware init function is either called per-cpu or just once
+ * for all CPUS (event->cpu == -1). This depends on the whether
+ * counting is started for all CPUs or on a per workload base where
+ * the perf event moves from one CPU to another CPU.
+ * Checking the authorization on any CPU is fine as the hardware
+ * applies the same authorization settings to all CPUs.
+ */
+ event->hw.config_base = get_authctrsets();
+
+ /* No authorized counter sets, nothing to count/sample */
+ if (!event->hw.config_base)
+ err = -EINVAL;
+
+ debug_sprintf_event(cf_dbg, 5, "%s err %d config_base %#lx\n",
+ __func__, err, event->hw.config_base);
+ return err;
+}
+
+static int cfdiag_event_init(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ int err = -ENOENT;
+
+ if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG ||
+ event->attr.type != event->pmu->type)
+ goto out;
+
+ /* Raw events are used to access counters directly,
+ * hence do not permit excludes.
+ * This event is useless without PERF_SAMPLE_RAW to return counter set
+ * values as raw data.
+ */
+ if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv ||
+ !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ /* Initialize for using the CPU-measurement counter facility */
+ cpumf_hw_inuse();
+ event->destroy = hw_perf_event_destroy;
+
+ err = cfdiag_event_init2(event);
+ if (unlikely(err))
+ event->destroy(event);
+out:
+ return err;
+}
+
+/* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used
+ * to collect the complete counter sets for a scheduled process. Target
+ * are complete counter sets attached as raw data to the artificial event.
+ * This results in complete counter sets available when a process is
+ * scheduled. Contains the delta of every counter while the process was
+ * running.
+ */
+CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG);
+
+static struct attribute *cfdiag_events_attr[] = {
+ CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG),
+ NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cfdiag_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group cfdiag_events_group = {
+ .name = "events",
+ .attrs = cfdiag_events_attr,
+};
+static struct attribute_group cfdiag_format_group = {
+ .name = "format",
+ .attrs = cfdiag_format_attr,
+};
+static const struct attribute_group *cfdiag_attr_groups[] = {
+ &cfdiag_events_group,
+ &cfdiag_format_group,
+ NULL,
+};
+
+/* Performance monitoring unit for event CF_DIAG. Since this event
+ * is also started and stopped via the perf_event_open() system call, use
+ * the same event enable/disable call back functions. They do not
+ * have a pointer to the perf_event strcture as first parameter.
+ *
+ * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common.
+ * Reuse them and distinguish the event (always first parameter) via
+ * 'config' member.
+ */
+static struct pmu cf_diag = {
+ .task_ctx_nr = perf_sw_context,
+ .event_init = cfdiag_event_init,
+ .pmu_enable = cpumf_pmu_enable,
+ .pmu_disable = cpumf_pmu_disable,
+ .add = cpumf_pmu_add,
+ .del = cpumf_pmu_del,
+ .start = cpumf_pmu_start,
+ .stop = cpumf_pmu_stop,
+ .read = cfdiag_read,
+
+ .attr_groups = cfdiag_attr_groups
+};
+
+/* Calculate memory needed to store all counter sets together with header and
+ * trailer data. This is independent of the counter set authorization which
+ * can vary depending on the configuration.
+ */
+static size_t cfdiag_maxsize(struct cpumf_ctr_info *info)
+{
+ size_t max_size = sizeof(struct cf_trailer_entry);
+ enum cpumf_ctr_set i;
+
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+ size_t size = cpum_cf_ctrset_size(i, info);
+
+ if (size)
+ max_size += size * sizeof(u64) +
+ sizeof(struct cf_ctrset_entry);
+ }
+ return max_size;
+}
+
+/* Get the CPU speed, try sampling facility first and CPU attributes second. */
+static void cfdiag_get_cpu_speed(void)
+{
+ if (cpum_sf_avail()) { /* Sampling facility first */
+ struct hws_qsi_info_block si;
+
+ memset(&si, 0, sizeof(si));
+ if (!qsi(&si)) {
+ cfdiag_cpu_speed = si.cpu_speed;
+ return;
+ }
+ }
+
+ /* Fallback: CPU speed extract static part. Used in case
+ * CPU Measurement Sampling Facility is turned off.
+ */
+ if (test_facility(34)) {
+ unsigned long mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
+
+ if (mhz != -1UL)
+ cfdiag_cpu_speed = mhz & 0xffffffff;
+ }
+}
+
+static int cfset_init(void)
+{
+ struct cpumf_ctr_info info;
+ size_t need;
+ int rc;
+
+ if (qctri(&info))
+ return -ENODEV;
+
+ cfdiag_get_cpu_speed();
+ /* Make sure the counter set data fits into predefined buffer. */
+ need = cfdiag_maxsize(&info);
+ if (need > sizeof(((struct cpu_cf_events *)0)->start)) {
+ pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n",
+ need);
+ return -ENOMEM;
+ }
+
+ rc = misc_register(&cfset_dev);
+ if (rc) {
+ pr_err("Registration of /dev/%s failed rc=%i\n",
+ cfset_dev.name, rc);
+ goto out;
+ }
+
+ rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
+ if (rc) {
+ misc_deregister(&cfset_dev);
+ pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n",
+ rc);
+ }
+out:
+ return rc;
+}
+
+device_initcall(cpumf_pmu_init);
diff --git a/arch/s390/kernel/perf_cpum_cf_common.c b/arch/s390/kernel/perf_cpum_cf_common.c
index 2300fbaac556..30f0242de4a5 100644
--- a/arch/s390/kernel/perf_cpum_cf_common.c
+++ b/arch/s390/kernel/perf_cpum_cf_common.c
@@ -29,7 +29,11 @@ DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events) = {
},
.alert = ATOMIC64_INIT(0),
.state = 0,
+ .dev_state = 0,
.flags = 0,
+ .used = 0,
+ .usedss = 0,
+ .sets = 0
};
/* Indicator whether the CPU-Measurement Counter Facility Support is ready */
static bool cpum_cf_initalized;
@@ -96,25 +100,10 @@ bool kernel_cpumcf_avail(void)
}
EXPORT_SYMBOL(kernel_cpumcf_avail);
-
-/* Reserve/release functions for sharing perf hardware */
-static DEFINE_SPINLOCK(cpumcf_owner_lock);
-static void *cpumcf_owner;
-
/* Initialize the CPU-measurement counter facility */
int __kernel_cpumcf_begin(void)
{
int flags = PMC_INIT;
- int err = 0;
-
- spin_lock(&cpumcf_owner_lock);
- if (cpumcf_owner)
- err = -EBUSY;
- else
- cpumcf_owner = __builtin_return_address(0);
- spin_unlock(&cpumcf_owner_lock);
- if (err)
- return err;
on_each_cpu(cpum_cf_setup_cpu, &flags, 1);
irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
@@ -144,10 +133,6 @@ void __kernel_cpumcf_end(void)
on_each_cpu(cpum_cf_setup_cpu, &flags, 1);
irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
- spin_lock(&cpumcf_owner_lock);
- cpumcf_owner = NULL;
- spin_unlock(&cpumcf_owner_lock);
}
EXPORT_SYMBOL(__kernel_cpumcf_end);
@@ -161,11 +146,13 @@ static int cpum_cf_setup(unsigned int cpu, int flags)
static int cpum_cf_online_cpu(unsigned int cpu)
{
- return cpum_cf_setup(cpu, PMC_INIT);
+ cpum_cf_setup(cpu, PMC_INIT);
+ return cfset_online_cpu(cpu);
}
static int cpum_cf_offline_cpu(unsigned int cpu)
{
+ cfset_offline_cpu(cpu);
return cpum_cf_setup(cpu, PMC_RELEASE);
}
diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c
deleted file mode 100644
index 08c985c1097c..000000000000
--- a/arch/s390/kernel/perf_cpum_cf_diag.c
+++ /dev/null
@@ -1,1148 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Performance event support for s390x - CPU-measurement Counter Sets
- *
- * Copyright IBM Corp. 2019, 2021
- * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
- * Thomas Richer <tmricht@linux.ibm.com>
- */
-#define KMSG_COMPONENT "cpum_cf_diag"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/kernel_stat.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/processor.h>
-#include <linux/miscdevice.h>
-#include <linux/mutex.h>
-
-#include <asm/ctl_reg.h>
-#include <asm/irq.h>
-#include <asm/cpu_mcf.h>
-#include <asm/timex.h>
-#include <asm/debug.h>
-
-#include <asm/hwctrset.h>
-
-#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */
- /* interval in seconds */
-static unsigned int cf_diag_cpu_speed;
-static debug_info_t *cf_diag_dbg;
-
-struct cf_diag_csd { /* Counter set data per CPU */
- size_t used; /* Bytes used in data/start */
- unsigned char start[PAGE_SIZE]; /* Counter set at event start */
- unsigned char data[PAGE_SIZE]; /* Counter set at event delete */
- unsigned int sets; /* # Counter set saved in data */
-};
-static DEFINE_PER_CPU(struct cf_diag_csd, cf_diag_csd);
-
-/* Counter sets are stored as data stream in a page sized memory buffer and
- * exported to user space via raw data attached to the event sample data.
- * Each counter set starts with an eight byte header consisting of:
- * - a two byte eye catcher (0xfeef)
- * - a one byte counter set number
- * - a two byte counter set size (indicates the number of counters in this set)
- * - a three byte reserved value (must be zero) to make the header the same
- * size as a counter value.
- * All counter values are eight byte in size.
- *
- * All counter sets are followed by a 64 byte trailer.
- * The trailer consists of a:
- * - flag field indicating valid fields when corresponding bit set
- * - the counter facility first and second version number
- * - the CPU speed if nonzero
- * - the time stamp the counter sets have been collected
- * - the time of day (TOD) base value
- * - the machine type.
- *
- * The counter sets are saved when the process is prepared to be executed on a
- * CPU and saved again when the process is going to be removed from a CPU.
- * The difference of both counter sets are calculated and stored in the event
- * sample data area.
- */
-
-struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */
- unsigned int def:16; /* 0-15 Data Entry Format */
- unsigned int set:16; /* 16-31 Counter set identifier */
- unsigned int ctr:16; /* 32-47 Number of stored counters */
- unsigned int res1:16; /* 48-63 Reserved */
-};
-
-struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */
- /* 0 - 7 */
- union {
- struct {
- unsigned int clock_base:1; /* TOD clock base set */
- unsigned int speed:1; /* CPU speed set */
- /* Measurement alerts */
- unsigned int mtda:1; /* Loss of MT ctr. data alert */
- unsigned int caca:1; /* Counter auth. change alert */
- unsigned int lcda:1; /* Loss of counter data alert */
- };
- unsigned long flags; /* 0-63 All indicators */
- };
- /* 8 - 15 */
- unsigned int cfvn:16; /* 64-79 Ctr First Version */
- unsigned int csvn:16; /* 80-95 Ctr Second Version */
- unsigned int cpu_speed:32; /* 96-127 CPU speed */
- /* 16 - 23 */
- unsigned long timestamp; /* 128-191 Timestamp (TOD) */
- /* 24 - 55 */
- union {
- struct {
- unsigned long progusage1;
- unsigned long progusage2;
- unsigned long progusage3;
- unsigned long tod_base;
- };
- unsigned long progusage[4];
- };
- /* 56 - 63 */
- unsigned int mach_type:16; /* Machine type */
- unsigned int res1:16; /* Reserved */
- unsigned int res2:32; /* Reserved */
-};
-
-/* Create the trailer data at the end of a page. */
-static void cf_diag_trailer(struct cf_trailer_entry *te)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- struct cpuid cpuid;
-
- te->cfvn = cpuhw->info.cfvn; /* Counter version numbers */
- te->csvn = cpuhw->info.csvn;
-
- get_cpu_id(&cpuid); /* Machine type */
- te->mach_type = cpuid.machine;
- te->cpu_speed = cf_diag_cpu_speed;
- if (te->cpu_speed)
- te->speed = 1;
- te->clock_base = 1; /* Save clock base */
- te->tod_base = tod_clock_base.tod;
- te->timestamp = get_tod_clock_fast();
-}
-
-/*
- * Change the CPUMF state to active.
- * Enable and activate the CPU-counter sets according
- * to the per-cpu control state.
- */
-static void cf_diag_enable(struct pmu *pmu)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- int err;
-
- debug_sprintf_event(cf_diag_dbg, 5,
- "%s pmu %p cpu %d flags %#x state %#llx\n",
- __func__, pmu, smp_processor_id(), cpuhw->flags,
- cpuhw->state);
- if (cpuhw->flags & PMU_F_ENABLED)
- return;
-
- err = lcctl(cpuhw->state);
- if (err) {
- pr_err("Enabling the performance measuring unit "
- "failed with rc=%x\n", err);
- return;
- }
- cpuhw->flags |= PMU_F_ENABLED;
-}
-
-/*
- * Change the CPUMF state to inactive.
- * Disable and enable (inactive) the CPU-counter sets according
- * to the per-cpu control state.
- */
-static void cf_diag_disable(struct pmu *pmu)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- u64 inactive;
- int err;
-
- debug_sprintf_event(cf_diag_dbg, 5,
- "%s pmu %p cpu %d flags %#x state %#llx\n",
- __func__, pmu, smp_processor_id(), cpuhw->flags,
- cpuhw->state);
- if (!(cpuhw->flags & PMU_F_ENABLED))
- return;
-
- inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
- err = lcctl(inactive);
- if (err) {
- pr_err("Disabling the performance measuring unit "
- "failed with rc=%x\n", err);
- return;
- }
- cpuhw->flags &= ~PMU_F_ENABLED;
-}
-
-/* Number of perf events counting hardware events */
-static atomic_t cf_diag_events = ATOMIC_INIT(0);
-/* Used to avoid races in calling reserve/release_cpumf_hardware */
-static DEFINE_MUTEX(cf_diag_reserve_mutex);
-
-/* Release the PMU if event is the last perf event */
-static void cf_diag_perf_event_destroy(struct perf_event *event)
-{
- debug_sprintf_event(cf_diag_dbg, 5,
- "%s event %p cpu %d cf_diag_events %d\n",
- __func__, event, smp_processor_id(),
- atomic_read(&cf_diag_events));
- if (atomic_dec_return(&cf_diag_events) == 0)
- __kernel_cpumcf_end();
-}
-
-static int get_authctrsets(void)
-{
- struct cpu_cf_events *cpuhw;
- unsigned long auth = 0;
- enum cpumf_ctr_set i;
-
- cpuhw = &get_cpu_var(cpu_cf_events);
- for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
- if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i])
- auth |= cpumf_ctr_ctl[i];
- }
- put_cpu_var(cpu_cf_events);
- return auth;
-}
-
-/* Setup the event. Test for authorized counter sets and only include counter
- * sets which are authorized at the time of the setup. Including unauthorized
- * counter sets result in specification exception (and panic).
- */
-static int __hw_perf_event_init(struct perf_event *event)
-{
- struct perf_event_attr *attr = &event->attr;
- int err = 0;
-
- debug_sprintf_event(cf_diag_dbg, 5, "%s event %p cpu %d\n", __func__,
- event, event->cpu);
-
- event->hw.config = attr->config;
-
- /* Add all authorized counter sets to config_base. The
- * the hardware init function is either called per-cpu or just once
- * for all CPUS (event->cpu == -1). This depends on the whether
- * counting is started for all CPUs or on a per workload base where
- * the perf event moves from one CPU to another CPU.
- * Checking the authorization on any CPU is fine as the hardware
- * applies the same authorization settings to all CPUs.
- */
- event->hw.config_base = get_authctrsets();
-
- /* No authorized counter sets, nothing to count/sample */
- if (!event->hw.config_base) {
- err = -EINVAL;
- goto out;
- }
-
- /* Set sample_period to indicate sampling */
- event->hw.sample_period = attr->sample_period;
- local64_set(&event->hw.period_left, event->hw.sample_period);
- event->hw.last_period = event->hw.sample_period;
-out:
- debug_sprintf_event(cf_diag_dbg, 5, "%s err %d config_base %#lx\n",
- __func__, err, event->hw.config_base);
- return err;
-}
-
-/* Return 0 if the CPU-measurement counter facility is currently free
- * and an error otherwise.
- */
-static int cf_diag_perf_event_inuse(void)
-{
- int err = 0;
-
- if (!atomic_inc_not_zero(&cf_diag_events)) {
- mutex_lock(&cf_diag_reserve_mutex);
- if (atomic_read(&cf_diag_events) == 0 &&
- __kernel_cpumcf_begin())
- err = -EBUSY;
- else
- err = atomic_inc_return(&cf_diag_events);
- mutex_unlock(&cf_diag_reserve_mutex);
- }
- return err;
-}
-
-static int cf_diag_event_init(struct perf_event *event)
-{
- struct perf_event_attr *attr = &event->attr;
- int err = -ENOENT;
-
- debug_sprintf_event(cf_diag_dbg, 5,
- "%s event %p cpu %d config %#llx type:%u "
- "sample_type %#llx cf_diag_events %d\n", __func__,
- event, event->cpu, attr->config, event->pmu->type,
- attr->sample_type, atomic_read(&cf_diag_events));
-
- if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG ||
- event->attr.type != event->pmu->type)
- goto out;
-
- /* Raw events are used to access counters directly,
- * hence do not permit excludes.
- * This event is usesless without PERF_SAMPLE_RAW to return counter set
- * values as raw data.
- */
- if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv ||
- !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) {
- err = -EOPNOTSUPP;
- goto out;
- }
-
- /* Initialize for using the CPU-measurement counter facility */
- err = cf_diag_perf_event_inuse();
- if (err < 0)
- goto out;
- event->destroy = cf_diag_perf_event_destroy;
-
- err = __hw_perf_event_init(event);
- if (unlikely(err))
- event->destroy(event);
-out:
- debug_sprintf_event(cf_diag_dbg, 5, "%s err %d\n", __func__, err);
- return err;
-}
-
-static void cf_diag_read(struct perf_event *event)
-{
- debug_sprintf_event(cf_diag_dbg, 5, "%s event %p\n", __func__, event);
-}
-
-/* Calculate memory needed to store all counter sets together with header and
- * trailer data. This is independend of the counter set authorization which
- * can vary depending on the configuration.
- */
-static size_t cf_diag_ctrset_maxsize(struct cpumf_ctr_info *info)
-{
- size_t max_size = sizeof(struct cf_trailer_entry);
- enum cpumf_ctr_set i;
-
- for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
- size_t size = cpum_cf_ctrset_size(i, info);
-
- if (size)
- max_size += size * sizeof(u64) +
- sizeof(struct cf_ctrset_entry);
- }
- debug_sprintf_event(cf_diag_dbg, 5, "%s max_size %zu\n", __func__,
- max_size);
-
- return max_size;
-}
-
-/* Read a counter set. The counter set number determines which counter set and
- * the CPUM-CF first and second version number determine the number of
- * available counters in this counter set.
- * Each counter set starts with header containing the counter set number and
- * the number of 8 byte counters.
- *
- * The functions returns the number of bytes occupied by this counter set
- * including the header.
- * If there is no counter in the counter set, this counter set is useless and
- * zero is returned on this case.
- */
-static size_t cf_diag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
- size_t room)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- size_t ctrset_size, need = 0;
- int rc = 3; /* Assume write failure */
-
- ctrdata->def = CF_DIAG_CTRSET_DEF;
- ctrdata->set = ctrset;
- ctrdata->res1 = 0;
- ctrset_size = cpum_cf_ctrset_size(ctrset, &cpuhw->info);
-
- if (ctrset_size) { /* Save data */
- need = ctrset_size * sizeof(u64) + sizeof(*ctrdata);
- if (need <= room)
- rc = ctr_stcctm(ctrset, ctrset_size,
- (u64 *)(ctrdata + 1));
- if (rc != 3)
- ctrdata->ctr = ctrset_size;
- else
- need = 0;
- }
-
- debug_sprintf_event(cf_diag_dbg, 6,
- "%s ctrset %d ctrset_size %zu cfvn %d csvn %d"
- " need %zd rc %d\n",
- __func__, ctrset, ctrset_size, cpuhw->info.cfvn,
- cpuhw->info.csvn, need, rc);
- return need;
-}
-
-/* Read out all counter sets and save them in the provided data buffer.
- * The last 64 byte host an artificial trailer entry.
- */
-static size_t cf_diag_getctr(void *data, size_t sz, unsigned long auth)
-{
- struct cf_trailer_entry *trailer;
- size_t offset = 0, done;
- int i;
-
- memset(data, 0, sz);
- sz -= sizeof(*trailer); /* Always room for trailer */
- for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
- struct cf_ctrset_entry *ctrdata = data + offset;
-
- if (!(auth & cpumf_ctr_ctl[i]))
- continue; /* Counter set not authorized */
-
- done = cf_diag_getctrset(ctrdata, i, sz - offset);
- offset += done;
- debug_sprintf_event(cf_diag_dbg, 6,
- "%s ctrset %d offset %zu done %zu\n",
- __func__, i, offset, done);
- }
- trailer = data + offset;
- cf_diag_trailer(trailer);
- return offset + sizeof(*trailer);
-}
-
-/* Calculate the difference for each counter in a counter set. */
-static void cf_diag_diffctrset(u64 *pstart, u64 *pstop, int counters)
-{
- for (; --counters >= 0; ++pstart, ++pstop)
- if (*pstop >= *pstart)
- *pstop -= *pstart;
- else
- *pstop = *pstart - *pstop;
-}
-
-/* Scan the counter sets and calculate the difference of each counter
- * in each set. The result is the increment of each counter during the
- * period the counter set has been activated.
- *
- * Return true on success.
- */
-static int cf_diag_diffctr(struct cf_diag_csd *csd, unsigned long auth)
-{
- struct cf_trailer_entry *trailer_start, *trailer_stop;
- struct cf_ctrset_entry *ctrstart, *ctrstop;
- size_t offset = 0;
-
- auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1;
- do {
- ctrstart = (struct cf_ctrset_entry *)(csd->start + offset);
- ctrstop = (struct cf_ctrset_entry *)(csd->data + offset);
-
- if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) {
- pr_err("cpum_cf_diag counter set compare error "
- "in set %i\n", ctrstart->set);
- return 0;
- }
- auth &= ~cpumf_ctr_ctl[ctrstart->set];
- if (ctrstart->def == CF_DIAG_CTRSET_DEF) {
- cf_diag_diffctrset((u64 *)(ctrstart + 1),
- (u64 *)(ctrstop + 1), ctrstart->ctr);
- offset += ctrstart->ctr * sizeof(u64) +
- sizeof(*ctrstart);
- }
- debug_sprintf_event(cf_diag_dbg, 6,
- "%s set %d ctr %d offset %zu auth %lx\n",
- __func__, ctrstart->set, ctrstart->ctr,
- offset, auth);
- } while (ctrstart->def && auth);
-
- /* Save time_stamp from start of event in stop's trailer */
- trailer_start = (struct cf_trailer_entry *)(csd->start + offset);
- trailer_stop = (struct cf_trailer_entry *)(csd->data + offset);
- trailer_stop->progusage[0] = trailer_start->timestamp;
-
- return 1;
-}
-
-/* Create perf event sample with the counter sets as raw data. The sample
- * is then pushed to the event subsystem and the function checks for
- * possible event overflows. If an event overflow occurs, the PMU is
- * stopped.
- *
- * Return non-zero if an event overflow occurred.
- */
-static int cf_diag_push_sample(struct perf_event *event,
- struct cf_diag_csd *csd)
-{
- struct perf_sample_data data;
- struct perf_raw_record raw;
- struct pt_regs regs;
- int overflow;
-
- /* Setup perf sample */
- perf_sample_data_init(&data, 0, event->hw.last_period);
- memset(&regs, 0, sizeof(regs));
- memset(&raw, 0, sizeof(raw));
-
- if (event->attr.sample_type & PERF_SAMPLE_CPU)
- data.cpu_entry.cpu = event->cpu;
- if (event->attr.sample_type & PERF_SAMPLE_RAW) {
- raw.frag.size = csd->used;
- raw.frag.data = csd->data;
- raw.size = csd->used;
- data.raw = &raw;
- }
-
- overflow = perf_event_overflow(event, &data, &regs);
- debug_sprintf_event(cf_diag_dbg, 6,
- "%s event %p cpu %d sample_type %#llx raw %d "
- "ov %d\n", __func__, event, event->cpu,
- event->attr.sample_type, raw.size, overflow);
- if (overflow)
- event->pmu->stop(event, 0);
-
- perf_event_update_userpage(event);
- return overflow;
-}
-
-static void cf_diag_start(struct perf_event *event, int flags)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd);
- struct hw_perf_event *hwc = &event->hw;
-
- debug_sprintf_event(cf_diag_dbg, 5,
- "%s event %p cpu %d flags %#x hwc-state %#x\n",
- __func__, event, event->cpu, flags, hwc->state);
- if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
- return;
-
- /* (Re-)enable and activate all counter sets */
- lcctl(0); /* Reset counter sets */
- hwc->state = 0;
- ctr_set_multiple_enable(&cpuhw->state, hwc->config_base);
- lcctl(cpuhw->state); /* Enable counter sets */
- csd->used = cf_diag_getctr(csd->start, sizeof(csd->start),
- event->hw.config_base);
- ctr_set_multiple_start(&cpuhw->state, hwc->config_base);
- /* Function cf_diag_enable() starts the counter sets. */
-}
-
-static void cf_diag_stop(struct perf_event *event, int flags)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd);
- struct hw_perf_event *hwc = &event->hw;
-
- debug_sprintf_event(cf_diag_dbg, 5,
- "%s event %p cpu %d flags %#x hwc-state %#x\n",
- __func__, event, event->cpu, flags, hwc->state);
-
- /* Deactivate all counter sets */
- ctr_set_multiple_stop(&cpuhw->state, hwc->config_base);
- local64_inc(&event->count);
- csd->used = cf_diag_getctr(csd->data, sizeof(csd->data),
- event->hw.config_base);
- if (cf_diag_diffctr(csd, event->hw.config_base))
- cf_diag_push_sample(event, csd);
- hwc->state |= PERF_HES_STOPPED;
-}
-
-static int cf_diag_add(struct perf_event *event, int flags)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- int err = 0;
-
- debug_sprintf_event(cf_diag_dbg, 5,
- "%s event %p cpu %d flags %#x cpuhw %p\n",
- __func__, event, event->cpu, flags, cpuhw);
-
- if (cpuhw->flags & PMU_F_IN_USE) {
- err = -EAGAIN;
- goto out;
- }
-
- event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
- cpuhw->flags |= PMU_F_IN_USE;
- if (flags & PERF_EF_START)
- cf_diag_start(event, PERF_EF_RELOAD);
-out:
- debug_sprintf_event(cf_diag_dbg, 5, "%s err %d\n", __func__, err);
- return err;
-}
-
-static void cf_diag_del(struct perf_event *event, int flags)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
-
- debug_sprintf_event(cf_diag_dbg, 5,
- "%s event %p cpu %d flags %#x\n",
- __func__, event, event->cpu, flags);
-
- cf_diag_stop(event, PERF_EF_UPDATE);
- ctr_set_multiple_stop(&cpuhw->state, event->hw.config_base);
- ctr_set_multiple_disable(&cpuhw->state, event->hw.config_base);
- cpuhw->flags &= ~PMU_F_IN_USE;
-}
-
-/* Default counter set events and format attribute groups */
-
-CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG);
-
-static struct attribute *cf_diag_events_attr[] = {
- CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG),
- NULL,
-};
-
-PMU_FORMAT_ATTR(event, "config:0-63");
-
-static struct attribute *cf_diag_format_attr[] = {
- &format_attr_event.attr,
- NULL,
-};
-
-static struct attribute_group cf_diag_events_group = {
- .name = "events",
- .attrs = cf_diag_events_attr,
-};
-static struct attribute_group cf_diag_format_group = {
- .name = "format",
- .attrs = cf_diag_format_attr,
-};
-static const struct attribute_group *cf_diag_attr_groups[] = {
- &cf_diag_events_group,
- &cf_diag_format_group,
- NULL,
-};
-
-/* Performance monitoring unit for s390x */
-static struct pmu cf_diag = {
- .task_ctx_nr = perf_sw_context,
- .pmu_enable = cf_diag_enable,
- .pmu_disable = cf_diag_disable,
- .event_init = cf_diag_event_init,
- .add = cf_diag_add,
- .del = cf_diag_del,
- .start = cf_diag_start,
- .stop = cf_diag_stop,
- .read = cf_diag_read,
-
- .attr_groups = cf_diag_attr_groups
-};
-
-/* Get the CPU speed, try sampling facility first and CPU attributes second. */
-static void cf_diag_get_cpu_speed(void)
-{
- if (cpum_sf_avail()) { /* Sampling facility first */
- struct hws_qsi_info_block si;
-
- memset(&si, 0, sizeof(si));
- if (!qsi(&si)) {
- cf_diag_cpu_speed = si.cpu_speed;
- return;
- }
- }
-
- if (test_facility(34)) { /* CPU speed extract static part */
- unsigned long mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
-
- if (mhz != -1UL)
- cf_diag_cpu_speed = mhz & 0xffffffff;
- }
-}
-
-/* Code to create device and file I/O operations */
-static atomic_t ctrset_opencnt = ATOMIC_INIT(0); /* Excl. access */
-
-static int cf_diag_open(struct inode *inode, struct file *file)
-{
- int err = 0;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- if (atomic_xchg(&ctrset_opencnt, 1))
- return -EBUSY;
-
- /* Avoid concurrent access with perf_event_open() system call */
- mutex_lock(&cf_diag_reserve_mutex);
- if (atomic_read(&cf_diag_events) || __kernel_cpumcf_begin())
- err = -EBUSY;
- mutex_unlock(&cf_diag_reserve_mutex);
- if (err) {
- atomic_set(&ctrset_opencnt, 0);
- return err;
- }
- file->private_data = NULL;
- debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__);
- /* nonseekable_open() never fails */
- return nonseekable_open(inode, file);
-}
-
-/* Variables for ioctl() interface support */
-static DEFINE_MUTEX(cf_diag_ctrset_mutex);
-static struct cf_diag_ctrset {
- unsigned long ctrset; /* Bit mask of counter set to read */
- cpumask_t mask; /* CPU mask to read from */
-} cf_diag_ctrset;
-
-static void cf_diag_ctrset_clear(void)
-{
- cpumask_clear(&cf_diag_ctrset.mask);
- cf_diag_ctrset.ctrset = 0;
-}
-
-static void cf_diag_release_cpu(void *p)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
-
- debug_sprintf_event(cf_diag_dbg, 3, "%s cpu %d\n", __func__,
- smp_processor_id());
- lcctl(0); /* Reset counter sets */
- cpuhw->state = 0; /* Save state in CPU hardware state */
-}
-
-/* Release function is also called when application gets terminated without
- * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command.
- * Since only one application is allowed to open the device, simple stop all
- * CPU counter sets.
- */
-static int cf_diag_release(struct inode *inode, struct file *file)
-{
- on_each_cpu(cf_diag_release_cpu, NULL, 1);
- cf_diag_ctrset_clear();
- atomic_set(&ctrset_opencnt, 0);
- __kernel_cpumcf_end();
- debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__);
- return 0;
-}
-
-struct cf_diag_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */
- unsigned int sets; /* Counter set bit mask */
- atomic_t cpus_ack; /* # CPUs successfully executed func */
-};
-
-static int cf_diag_all_copy(unsigned long arg, cpumask_t *mask)
-{
- struct s390_ctrset_read __user *ctrset_read;
- unsigned int cpu, cpus, rc;
- void __user *uptr;
-
- ctrset_read = (struct s390_ctrset_read __user *)arg;
- uptr = ctrset_read->data;
- for_each_cpu(cpu, mask) {
- struct cf_diag_csd *csd = per_cpu_ptr(&cf_diag_csd, cpu);
- struct s390_ctrset_cpudata __user *ctrset_cpudata;
-
- ctrset_cpudata = uptr;
- debug_sprintf_event(cf_diag_dbg, 5, "%s cpu %d used %zd\n",
- __func__, cpu, csd->used);
- rc = put_user(cpu, &ctrset_cpudata->cpu_nr);
- rc |= put_user(csd->sets, &ctrset_cpudata->no_sets);
- rc |= copy_to_user(ctrset_cpudata->data, csd->data, csd->used);
- if (rc)
- return -EFAULT;
- uptr += sizeof(struct s390_ctrset_cpudata) + csd->used;
- cond_resched();
- }
- cpus = cpumask_weight(mask);
- if (put_user(cpus, &ctrset_read->no_cpus))
- return -EFAULT;
- debug_sprintf_event(cf_diag_dbg, 5, "%s copied %ld\n",
- __func__, uptr - (void __user *)ctrset_read->data);
- return 0;
-}
-
-static size_t cf_diag_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
- int ctrset_size, size_t room)
-{
- size_t need = 0;
- int rc = -1;
-
- need = sizeof(*p) + sizeof(u64) * ctrset_size;
- debug_sprintf_event(cf_diag_dbg, 5,
- "%s room %zd need %zd set %#x set_size %d\n",
- __func__, room, need, ctrset, ctrset_size);
- if (need <= room) {
- p->set = cpumf_ctr_ctl[ctrset];
- p->no_cnts = ctrset_size;
- rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv);
- if (rc == 3) /* Nothing stored */
- need = 0;
- }
- debug_sprintf_event(cf_diag_dbg, 5, "%s need %zd rc %d\n", __func__,
- need, rc);
- return need;
-}
-
-/* Read all counter sets. Since the perf_event_open() system call with
- * event cpum_cf_diag/.../ is blocked when this interface is active, reuse
- * the perf_event_open() data buffer to store the counter sets.
- */
-static void cf_diag_cpu_read(void *parm)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd);
- struct cf_diag_call_on_cpu_parm *p = parm;
- int set, set_size;
- size_t space;
-
- debug_sprintf_event(cf_diag_dbg, 5,
- "%s new %#x flags %#x state %#llx\n",
- __func__, p->sets, cpuhw->flags,
- cpuhw->state);
- /* No data saved yet */
- csd->used = 0;
- csd->sets = 0;
- memset(csd->data, 0, sizeof(csd->data));
-
- /* Scan the counter sets */
- for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) {
- struct s390_ctrset_setdata *sp = (void *)csd->data + csd->used;
-
- if (!(p->sets & cpumf_ctr_ctl[set]))
- continue; /* Counter set not in list */
- set_size = cpum_cf_ctrset_size(set, &cpuhw->info);
- space = sizeof(csd->data) - csd->used;
- space = cf_diag_cpuset_read(sp, set, set_size, space);
- if (space) {
- csd->used += space;
- csd->sets += 1;
- }
- debug_sprintf_event(cf_diag_dbg, 5, "%s sp %px space %zd\n",
- __func__, sp, space);
- }
- debug_sprintf_event(cf_diag_dbg, 5, "%s sets %d used %zd\n", __func__,
- csd->sets, csd->used);
-}
-
-static int cf_diag_all_read(unsigned long arg)
-{
- struct cf_diag_call_on_cpu_parm p;
- cpumask_var_t mask;
- int rc;
-
- debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__);
- if (!alloc_cpumask_var(&mask, GFP_KERNEL))
- return -ENOMEM;
-
- p.sets = cf_diag_ctrset.ctrset;
- cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask);
- on_each_cpu_mask(mask, cf_diag_cpu_read, &p, 1);
- rc = cf_diag_all_copy(arg, mask);
- free_cpumask_var(mask);
- debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d\n", __func__, rc);
- return rc;
-}
-
-/* Stop all counter sets via ioctl interface */
-static void cf_diag_ioctl_off(void *parm)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- struct cf_diag_call_on_cpu_parm *p = parm;
- int rc;
-
- debug_sprintf_event(cf_diag_dbg, 5,
- "%s new %#x flags %#x state %#llx\n",
- __func__, p->sets, cpuhw->flags,
- cpuhw->state);
-
- ctr_set_multiple_disable(&cpuhw->state, p->sets);
- ctr_set_multiple_stop(&cpuhw->state, p->sets);
- rc = lcctl(cpuhw->state); /* Stop counter sets */
- if (!cpuhw->state)
- cpuhw->flags &= ~PMU_F_IN_USE;
- debug_sprintf_event(cf_diag_dbg, 5,
- "%s rc %d flags %#x state %#llx\n", __func__,
- rc, cpuhw->flags, cpuhw->state);
-}
-
-/* Start counter sets on particular CPU */
-static void cf_diag_ioctl_on(void *parm)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- struct cf_diag_call_on_cpu_parm *p = parm;
- int rc;
-
- debug_sprintf_event(cf_diag_dbg, 5,
- "%s new %#x flags %#x state %#llx\n",
- __func__, p->sets, cpuhw->flags,
- cpuhw->state);
-
- if (!(cpuhw->flags & PMU_F_IN_USE))
- cpuhw->state = 0;
- cpuhw->flags |= PMU_F_IN_USE;
- rc = lcctl(cpuhw->state); /* Reset unused counter sets */
- ctr_set_multiple_enable(&cpuhw->state, p->sets);
- ctr_set_multiple_start(&cpuhw->state, p->sets);
- rc |= lcctl(cpuhw->state); /* Start counter sets */
- if (!rc)
- atomic_inc(&p->cpus_ack);
- debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d state %#llx\n",
- __func__, rc, cpuhw->state);
-}
-
-static int cf_diag_all_stop(void)
-{
- struct cf_diag_call_on_cpu_parm p = {
- .sets = cf_diag_ctrset.ctrset,
- };
- cpumask_var_t mask;
-
- if (!alloc_cpumask_var(&mask, GFP_KERNEL))
- return -ENOMEM;
- cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask);
- on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1);
- free_cpumask_var(mask);
- return 0;
-}
-
-static int cf_diag_all_start(void)
-{
- struct cf_diag_call_on_cpu_parm p = {
- .sets = cf_diag_ctrset.ctrset,
- .cpus_ack = ATOMIC_INIT(0),
- };
- cpumask_var_t mask;
- int rc = 0;
-
- if (!alloc_cpumask_var(&mask, GFP_KERNEL))
- return -ENOMEM;
- cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask);
- on_each_cpu_mask(mask, cf_diag_ioctl_on, &p, 1);
- if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) {
- on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1);
- rc = -EIO;
- }
- free_cpumask_var(mask);
- return rc;
-}
-
-/* Return the maximum required space for all possible CPUs in case one
- * CPU will be onlined during the START, READ, STOP cycles.
- * To find out the size of the counter sets, any one CPU will do. They
- * all have the same counter sets.
- */
-static size_t cf_diag_needspace(unsigned int sets)
-{
- struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events);
- size_t bytes = 0;
- int i;
-
- for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
- if (!(sets & cpumf_ctr_ctl[i]))
- continue;
- bytes += cpum_cf_ctrset_size(i, &cpuhw->info) * sizeof(u64) +
- sizeof(((struct s390_ctrset_setdata *)0)->set) +
- sizeof(((struct s390_ctrset_setdata *)0)->no_cnts);
- }
- bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids *
- (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) +
- sizeof(((struct s390_ctrset_cpudata *)0)->no_sets));
- debug_sprintf_event(cf_diag_dbg, 5, "%s bytes %ld\n", __func__,
- bytes);
- put_cpu_ptr(&cpu_cf_events);
- return bytes;
-}
-
-static long cf_diag_ioctl_read(unsigned long arg)
-{
- struct s390_ctrset_read read;
- int ret = 0;
-
- debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__);
- if (copy_from_user(&read, (char __user *)arg, sizeof(read)))
- return -EFAULT;
- ret = cf_diag_all_read(arg);
- debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret);
- return ret;
-}
-
-static long cf_diag_ioctl_stop(void)
-{
- int ret;
-
- debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__);
- ret = cf_diag_all_stop();
- cf_diag_ctrset_clear();
- debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret);
- return ret;
-}
-
-static long cf_diag_ioctl_start(unsigned long arg)
-{
- struct s390_ctrset_start __user *ustart;
- struct s390_ctrset_start start;
- void __user *umask;
- unsigned int len;
- int ret = 0;
- size_t need;
-
- if (cf_diag_ctrset.ctrset)
- return -EBUSY;
- ustart = (struct s390_ctrset_start __user *)arg;
- if (copy_from_user(&start, ustart, sizeof(start)))
- return -EFAULT;
- if (start.version != S390_HWCTR_START_VERSION)
- return -EINVAL;
- if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] |
- cpumf_ctr_ctl[CPUMF_CTR_SET_USER] |
- cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] |
- cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] |
- cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]))
- return -EINVAL; /* Invalid counter set */
- if (!start.counter_sets)
- return -EINVAL; /* No counter set at all? */
- cpumask_clear(&cf_diag_ctrset.mask);
- len = min_t(u64, start.cpumask_len, cpumask_size());
- umask = (void __user *)start.cpumask;
- if (copy_from_user(&cf_diag_ctrset.mask, umask, len))
- return -EFAULT;
- if (cpumask_empty(&cf_diag_ctrset.mask))
- return -EINVAL;
- need = cf_diag_needspace(start.counter_sets);
- if (put_user(need, &ustart->data_bytes))
- ret = -EFAULT;
- if (ret)
- goto out;
- cf_diag_ctrset.ctrset = start.counter_sets;
- ret = cf_diag_all_start();
-out:
- if (ret)
- cf_diag_ctrset_clear();
- debug_sprintf_event(cf_diag_dbg, 2, "%s sets %#lx need %ld ret %d\n",
- __func__, cf_diag_ctrset.ctrset, need, ret);
- return ret;
-}
-
-static long cf_diag_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- int ret;
-
- debug_sprintf_event(cf_diag_dbg, 2, "%s cmd %#x arg %lx\n", __func__,
- cmd, arg);
- get_online_cpus();
- mutex_lock(&cf_diag_ctrset_mutex);
- switch (cmd) {
- case S390_HWCTR_START:
- ret = cf_diag_ioctl_start(arg);
- break;
- case S390_HWCTR_STOP:
- ret = cf_diag_ioctl_stop();
- break;
- case S390_HWCTR_READ:
- ret = cf_diag_ioctl_read(arg);
- break;
- default:
- ret = -ENOTTY;
- break;
- }
- mutex_unlock(&cf_diag_ctrset_mutex);
- put_online_cpus();
- debug_sprintf_event(cf_diag_dbg, 2, "%s ret %d\n", __func__, ret);
- return ret;
-}
-
-static const struct file_operations cf_diag_fops = {
- .owner = THIS_MODULE,
- .open = cf_diag_open,
- .release = cf_diag_release,
- .unlocked_ioctl = cf_diag_ioctl,
- .compat_ioctl = cf_diag_ioctl,
- .llseek = no_llseek
-};
-
-static struct miscdevice cf_diag_dev = {
- .name = S390_HWCTR_DEVICE,
- .minor = MISC_DYNAMIC_MINOR,
- .fops = &cf_diag_fops,
-};
-
-static int cf_diag_online_cpu(unsigned int cpu)
-{
- struct cf_diag_call_on_cpu_parm p;
-
- mutex_lock(&cf_diag_ctrset_mutex);
- if (!cf_diag_ctrset.ctrset)
- goto out;
- p.sets = cf_diag_ctrset.ctrset;
- cf_diag_ioctl_on(&p);
-out:
- mutex_unlock(&cf_diag_ctrset_mutex);
- return 0;
-}
-
-static int cf_diag_offline_cpu(unsigned int cpu)
-{
- struct cf_diag_call_on_cpu_parm p;
-
- mutex_lock(&cf_diag_ctrset_mutex);
- if (!cf_diag_ctrset.ctrset)
- goto out;
- p.sets = cf_diag_ctrset.ctrset;
- cf_diag_ioctl_off(&p);
-out:
- mutex_unlock(&cf_diag_ctrset_mutex);
- return 0;
-}
-
-/* Initialize the counter set PMU to generate complete counter set data as
- * event raw data. This relies on the CPU Measurement Counter Facility device
- * already being loaded and initialized.
- */
-static int __init cf_diag_init(void)
-{
- struct cpumf_ctr_info info;
- size_t need;
- int rc;
-
- if (!kernel_cpumcf_avail() || !stccm_avail() || qctri(&info))
- return -ENODEV;
- cf_diag_get_cpu_speed();
-
- /* Make sure the counter set data fits into predefined buffer. */
- need = cf_diag_ctrset_maxsize(&info);
- if (need > sizeof(((struct cf_diag_csd *)0)->start)) {
- pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n",
- need);
- return -ENOMEM;
- }
-
- rc = misc_register(&cf_diag_dev);
- if (rc) {
- pr_err("Registration of /dev/" S390_HWCTR_DEVICE
- "failed rc=%d\n", rc);
- goto out;
- }
-
- /* Setup s390dbf facility */
- cf_diag_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128);
- if (!cf_diag_dbg) {
- pr_err("Registration of s390dbf(cpum_cf_diag) failed\n");
- rc = -ENOMEM;
- goto out_dbf;
- }
- debug_register_view(cf_diag_dbg, &debug_sprintf_view);
-
- rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
- if (rc) {
- pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n",
- rc);
- goto out_perf;
- }
- rc = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_S390_CFD_ONLINE,
- "perf/s390/cfd:online",
- cf_diag_online_cpu, cf_diag_offline_cpu);
- if (!rc)
- goto out;
-
- pr_err("Registration of CPUHP_AP_PERF_S390_CFD_ONLINE failed rc=%i\n",
- rc);
- perf_pmu_unregister(&cf_diag);
-out_perf:
- debug_unregister_view(cf_diag_dbg, &debug_sprintf_view);
- debug_unregister(cf_diag_dbg);
-out_dbf:
- misc_deregister(&cf_diag_dev);
-out:
- return rc;
-}
-device_initcall(cf_diag_init);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 7ae5dde9c54d..350e94d0cac2 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -166,6 +166,12 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
p->thread.acrs[1] = (unsigned int)tls;
}
}
+ /*
+ * s390 stores the svc return address in arch_data when calling
+ * sigreturn()/restart_syscall() via vdso. 1 means no valid address
+ * stored.
+ */
+ p->restart_block.arch_data = 1;
return 0;
}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 5486d82470b5..ff0f9e838916 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -354,7 +354,7 @@ void __init arch_call_rest_init(void)
set_task_stack_end_magic(current);
stack += STACK_INIT_OFFSET;
S390_lowcore.kernel_stack = stack;
- CALL_ON_STACK_NORETURN(rest_init, stack);
+ call_on_stack_noreturn(rest_init, stack);
}
static void __init setup_lowcore_dat_off(void)
@@ -442,6 +442,7 @@ static void __init setup_lowcore_dat_off(void)
lc->br_r1_trampoline = 0x07f1; /* br %r1 */
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
+ lc->preempt_count = PREEMPT_DISABLED;
set_prefix((u32)(unsigned long) lc);
lowcore_ptr[0] = lc;
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 080e7aed181f..78ef53b29958 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -32,6 +32,7 @@
#include <linux/uaccess.h>
#include <asm/lowcore.h>
#include <asm/switch_to.h>
+#include <asm/vdso.h>
#include "entry.h"
/*
@@ -171,7 +172,6 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
fpregs_load(&user_sregs.fpregs, &current->thread.fpu);
clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
- clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART);
return 0;
}
@@ -334,15 +334,10 @@ static int setup_frame(int sig, struct k_sigaction *ka,
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
- if (ka->sa.sa_flags & SA_RESTORER) {
+ if (ka->sa.sa_flags & SA_RESTORER)
restorer = (unsigned long) ka->sa.sa_restorer;
- } else {
- /* Signal frame without vector registers are short ! */
- __u16 __user *svc = (void __user *) frame + frame_size - 2;
- if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
- return -EFAULT;
- restorer = (unsigned long) svc;
- }
+ else
+ restorer = VDSO64_SYMBOL(current, sigreturn);
/* Set up registers for signal handler */
regs->gprs[14] = restorer;
@@ -397,14 +392,10 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
- if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+ if (ksig->ka.sa.sa_flags & SA_RESTORER)
restorer = (unsigned long) ksig->ka.sa.sa_restorer;
- } else {
- __u16 __user *svc = &frame->svc_insn;
- if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
- return -EFAULT;
- restorer = (unsigned long) svc;
- }
+ else
+ restorer = VDSO64_SYMBOL(current, rt_sigreturn);
/* Create siginfo on the signal stack */
if (copy_siginfo_to_user(&frame->info, &ksig->info))
@@ -501,7 +492,7 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal)
}
/* No longer in a system call */
clear_pt_regs_flag(regs, PIF_SYSCALL);
- clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART);
+
rseq_signal_deliver(&ksig, regs);
if (is_compat_task())
handle_signal32(&ksig, oldset, regs);
@@ -517,14 +508,20 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal)
switch (regs->gprs[2]) {
case -ERESTART_RESTARTBLOCK:
/* Restart with sys_restart_syscall */
- regs->int_code = __NR_restart_syscall;
- fallthrough;
+ regs->gprs[2] = regs->orig_gpr2;
+ current->restart_block.arch_data = regs->psw.addr;
+ if (is_compat_task())
+ regs->psw.addr = VDSO32_SYMBOL(current, restart_syscall);
+ else
+ regs->psw.addr = VDSO64_SYMBOL(current, restart_syscall);
+ if (test_thread_flag(TIF_SINGLE_STEP))
+ clear_thread_flag(TIF_PER_TRAP);
+ break;
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
- /* Restart system call with magic TIF bit. */
regs->gprs[2] = regs->orig_gpr2;
- set_pt_regs_flag(regs, PIF_SYSCALL_RESTART);
+ regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
if (test_thread_flag(TIF_SINGLE_STEP))
clear_thread_flag(TIF_PER_TRAP);
break;
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index ff42d3aa0f00..8984711f72ed 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -210,6 +210,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
lc->br_r1_trampoline = 0x07f1; /* br %r1 */
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
+ lc->preempt_count = PREEMPT_DISABLED;
if (nmi_alloc_per_cpu(lc))
goto out;
lowcore_ptr[cpu] = lc;
@@ -300,24 +301,28 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
}
+typedef void (pcpu_delegate_fn)(void *);
+
/*
* Call function via PSW restart on pcpu and stop the current cpu.
*/
-static void __pcpu_delegate(void (*func)(void*), void *data)
+static void __pcpu_delegate(pcpu_delegate_fn *func, void *data)
{
func(data); /* should not return */
}
static void __no_sanitize_address pcpu_delegate(struct pcpu *pcpu,
- void (*func)(void *),
+ pcpu_delegate_fn *func,
void *data, unsigned long stack)
{
struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
unsigned long source_cpu = stap();
__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
- if (pcpu->address == source_cpu)
- CALL_ON_STACK(__pcpu_delegate, stack, 2, func, data);
+ if (pcpu->address == source_cpu) {
+ call_on_stack(2, stack, void, __pcpu_delegate,
+ pcpu_delegate_fn *, func, void *, data);
+ }
/* Stop target cpu (if func returns this stops the current cpu). */
pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
/* Restart func on the target cpu and stop the current cpu. */
@@ -898,7 +903,7 @@ static void __no_sanitize_address smp_start_secondary(void *cpuvoid)
S390_lowcore.restart_source = -1UL;
__ctl_load(S390_lowcore.cregs_save_area, 0, 15);
__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
- CALL_ON_STACK_NORETURN(smp_init_secondary, S390_lowcore.kernel_stack);
+ call_on_stack_noreturn(smp_init_secondary, S390_lowcore.kernel_stack);
}
/* Upping and downing of CPUs */
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
index 76f7916cc30f..8fe2d23b64f4 100644
--- a/arch/s390/kernel/syscall.c
+++ b/arch/s390/kernel/syscall.c
@@ -108,7 +108,7 @@ SYSCALL_DEFINE0(ni_syscall)
return -ENOSYS;
}
-void do_syscall(struct pt_regs *regs)
+static void do_syscall(struct pt_regs *regs)
{
unsigned long nr;
@@ -121,6 +121,10 @@ void do_syscall(struct pt_regs *regs)
regs->gprs[2] = nr;
+ if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) {
+ regs->psw.addr = current->restart_block.arch_data;
+ current->restart_block.arch_data = 1;
+ }
nr = syscall_enter_from_user_mode_work(regs, nr);
/*
@@ -130,13 +134,16 @@ void do_syscall(struct pt_regs *regs)
* work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here
* and if set, the syscall will be skipped.
*/
- if (!test_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)) {
- regs->gprs[2] = -ENOSYS;
- if (likely(nr < NR_syscalls))
- regs->gprs[2] = current->thread.sys_call_table[nr](regs);
- } else {
- clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET);
- }
+
+ if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)))
+ goto out;
+ regs->gprs[2] = -ENOSYS;
+ if (likely(nr >= NR_syscalls))
+ goto out;
+ do {
+ regs->gprs[2] = current->thread.sys_call_table[nr](regs);
+ } while (test_and_clear_pt_regs_flag(regs, PIF_EXECVE_PGSTE_RESTART));
+out:
syscall_exit_to_user_mode_work(regs);
}
@@ -154,13 +161,8 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
if (per_trap)
set_thread_flag(TIF_PER_TRAP);
- for (;;) {
- regs->flags = 0;
- set_pt_regs_flag(regs, PIF_SYSCALL);
- do_syscall(regs);
- if (!test_pt_regs_flag(regs, PIF_SYSCALL_RESTART))
- break;
- local_irq_enable();
- }
+ regs->flags = 0;
+ set_pt_regs_flag(regs, PIF_SYSCALL);
+ do_syscall(regs);
exit_to_user_mode();
}
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 019c5748b607..76947275fe8b 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -277,6 +277,8 @@ static void __init test_monitor_call(void)
{
int val = 1;
+ if (!IS_ENABLED(CONFIG_BUG))
+ return;
asm volatile(
" mc 0,0\n"
"0: xgr %0,%0\n"
@@ -299,10 +301,9 @@ static void (*pgm_check_table[128])(struct pt_regs *regs);
void noinstr __do_pgm_check(struct pt_regs *regs)
{
unsigned long last_break = S390_lowcore.breaking_event_addr;
- unsigned int trapnr, syscall_redirect = 0;
+ unsigned int trapnr;
irqentry_state_t state;
- add_random_kstack_offset();
regs->int_code = *(u32 *)&S390_lowcore.pgm_ilc;
regs->int_parm_long = S390_lowcore.trans_exc_code;
@@ -344,18 +345,9 @@ void noinstr __do_pgm_check(struct pt_regs *regs)
trapnr = regs->int_code & PGM_INT_CODE_MASK;
if (trapnr)
pgm_check_table[trapnr](regs);
- syscall_redirect = user_mode(regs) && test_pt_regs_flag(regs, PIF_SYSCALL);
out:
local_irq_disable();
irqentry_exit(regs, state);
-
- if (syscall_redirect) {
- enter_from_user_mode(regs);
- local_irq_enable();
- regs->orig_gpr2 = regs->gprs[2];
- do_syscall(regs);
- exit_to_user_mode();
- }
}
/*
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 6be2167943bb..aeb0a15bcbb7 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -358,6 +358,15 @@ static ssize_t uv_query_facilities(struct kobject *kobj,
static struct kobj_attribute uv_query_facilities_attr =
__ATTR(facilities, 0444, uv_query_facilities, NULL);
+static ssize_t uv_query_feature_indications(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.uv_feature_indications);
+}
+
+static struct kobj_attribute uv_query_feature_indications_attr =
+ __ATTR(feature_indications, 0444, uv_query_feature_indications, NULL);
+
static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
@@ -390,6 +399,7 @@ static struct kobj_attribute uv_query_max_guest_addr_attr =
static struct attribute *uv_query_attrs[] = {
&uv_query_facilities_attr.attr,
+ &uv_query_feature_indications_attr.attr,
&uv_query_max_guest_cpus_attr.attr,
&uv_query_max_guest_vms_attr.attr,
&uv_query_max_guest_addr_attr.attr,
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 8c4e07d533c8..99694260cac9 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -20,7 +20,7 @@
#include <asm/vdso.h>
extern char vdso64_start[], vdso64_end[];
-static unsigned int vdso_pages;
+extern char vdso32_start[], vdso32_end[];
static struct vm_special_mapping vvar_mapping;
@@ -37,18 +37,6 @@ enum vvar_pages {
VVAR_NR_PAGES,
};
-unsigned int __read_mostly vdso_enabled = 1;
-
-static int __init vdso_setup(char *str)
-{
- bool enabled;
-
- if (!kstrtobool(str, &enabled))
- vdso_enabled = enabled;
- return 1;
-}
-__setup("vdso=", vdso_setup);
-
#ifdef CONFIG_TIME_NS
struct vdso_data *arch_get_vdso_data(void *vvar_page)
{
@@ -155,7 +143,12 @@ static struct vm_special_mapping vvar_mapping = {
.fault = vvar_fault,
};
-static struct vm_special_mapping vdso_mapping = {
+static struct vm_special_mapping vdso64_mapping = {
+ .name = "[vdso]",
+ .mremap = vdso_mremap,
+};
+
+static struct vm_special_mapping vdso32_mapping = {
.name = "[vdso]",
.mremap = vdso_mremap,
};
@@ -171,16 +164,22 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
unsigned long vdso_text_len, vdso_mapping_len;
unsigned long vvar_start, vdso_text_start;
+ struct vm_special_mapping *vdso_mapping;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int rc;
BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
- if (!vdso_enabled || is_compat_task())
- return 0;
if (mmap_write_lock_killable(mm))
return -EINTR;
- vdso_text_len = vdso_pages << PAGE_SHIFT;
+
+ if (is_compat_task()) {
+ vdso_text_len = vdso32_end - vdso32_start;
+ vdso_mapping = &vdso32_mapping;
+ } else {
+ vdso_text_len = vdso64_end - vdso64_start;
+ vdso_mapping = &vdso64_mapping;
+ }
vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
vvar_start = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
rc = vvar_start;
@@ -198,7 +197,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- &vdso_mapping);
+ vdso_mapping);
if (IS_ERR(vma)) {
do_munmap(mm, vvar_start, PAGE_SIZE, NULL);
rc = PTR_ERR(vma);
@@ -211,21 +210,25 @@ out:
return rc;
}
-static int __init vdso_init(void)
+static struct page ** __init vdso_setup_pages(void *start, void *end)
{
- struct page **pages;
+ int pages = (end - start) >> PAGE_SHIFT;
+ struct page **pagelist;
int i;
- vdso_pages = (vdso64_end - vdso64_start) >> PAGE_SHIFT;
- pages = kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL);
- if (!pages) {
- vdso_enabled = 0;
- return -ENOMEM;
- }
- for (i = 0; i < vdso_pages; i++)
- pages[i] = virt_to_page(vdso64_start + i * PAGE_SIZE);
- pages[vdso_pages] = NULL;
- vdso_mapping.pages = pages;
+ pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
+ if (!pagelist)
+ panic("%s: Cannot allocate page list for VDSO", __func__);
+ for (i = 0; i < pages; i++)
+ pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
+ return pagelist;
+}
+
+static int __init vdso_init(void)
+{
+ vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end);
+ if (IS_ENABLED(CONFIG_COMPAT))
+ vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end);
return 0;
}
arch_initcall(vdso_init);
diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore
new file mode 100644
index 000000000000..5167384843b9
--- /dev/null
+++ b/arch/s390/kernel/vdso32/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+vdso32.lds
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
new file mode 100644
index 000000000000..b2349a3f4fa3
--- /dev/null
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: GPL-2.0
+# List of files in the vdso
+
+KCOV_INSTRUMENT := n
+ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE
+ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT
+
+include $(srctree)/lib/vdso/Makefile
+obj-vdso32 = vdso_user_wrapper-32.o note-32.o
+
+# Build rules
+
+targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+KBUILD_AFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
+
+KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS_32 += -m31 -s
+
+KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin
+
+LDFLAGS_vdso32.so.dbg += -fPIC -shared -nostdlib -soname=linux-vdso32.so.1 \
+ --hash-style=both --build-id=sha1 -melf_s390 -T
+
+$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
+
+obj-y += vdso32_wrapper.o
+CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
+
+# Disable gcov profiling, ubsan and kasan for VDSO code
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
+
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
+ $(call if_changed,ld)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+$(obj-vdso32): %-32.o: %.S FORCE
+ $(call if_changed_dep,vdso32as)
+
+# actual build commands
+quiet_cmd_vdso32as = VDSO32A $@
+ cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdso32cc = VDSO32C $@
+ cmd_vdso32cc = $(CC) $(c_flags) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso32.so: $(obj)/vdso32.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso_install: vdso32.so
+
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE
+ $(call if_changed,vdsosym)
diff --git a/arch/s390/kernel/vdso32/gen_vdso_offsets.sh b/arch/s390/kernel/vdso32/gen_vdso_offsets.sh
new file mode 100755
index 000000000000..9c4f951e227d
--- /dev/null
+++ b/arch/s390/kernel/vdso32/gen_vdso_offsets.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Inspired by arm64 version.
+#
+
+LC_ALL=C
+sed -n 's/\([0-9a-f]*\) . __kernel_compat_\(.*\)/\#define vdso32_offset_\2\t0x\1/p'
diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S
new file mode 100644
index 000000000000..db19d0680a0a
--- /dev/null
+++ b/arch/s390/kernel/vdso32/note.S
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+ .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
new file mode 100644
index 000000000000..bff50b6acd6d
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+
+#include <asm/page.h>
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
+OUTPUT_ARCH(s390:31-bit)
+ENTRY(_start)
+
+SECTIONS
+{
+ PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+ PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
+ . = VDSO_LBASE + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN(16);
+ .text : {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ } :text
+ PROVIDE(__etext = .);
+ PROVIDE(_etext = .);
+ PROVIDE(etext = .);
+
+ /*
+ * Other stuff is appended to the text segment:
+ */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
+
+ .rela.dyn ALIGN(8) : { *(.rela.dyn) }
+ .got ALIGN(8) : { *(.got .toc) }
+
+ _end = .;
+ PROVIDE(end = .);
+
+ /*
+ * Stabs debugging sections are here too.
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+
+ /*
+ * DWARF debug sections.
+ * Symbols in the DWARF debugging sections are relative to the
+ * beginning of the section so we begin them at 0.
+ */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+
+ /DISCARD/ : {
+ *(.note.GNU-stack)
+ *(.branch_lt)
+ *(.data .data.* .gnu.linkonce.d.* .sdata*)
+ *(.bss .sbss .dynbss .dynsbss)
+ }
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME 0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ /*
+ * Has to be there for the kernel to find
+ */
+ __kernel_compat_restart_syscall;
+ __kernel_compat_rt_sigreturn;
+ __kernel_compat_sigreturn;
+ local: *;
+ };
+}
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S
new file mode 100644
index 000000000000..de2fb930471a
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .globl vdso32_start, vdso32_end
+ .balign PAGE_SIZE
+vdso32_start:
+ .incbin "arch/s390/kernel/vdso32/vdso32.so"
+ .balign PAGE_SIZE
+vdso32_end:
+
+ .previous
diff --git a/arch/s390/kernel/vdso32/vdso_user_wrapper.S b/arch/s390/kernel/vdso32/vdso_user_wrapper.S
new file mode 100644
index 000000000000..3f42f27f978c
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso_user_wrapper.S
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/unistd.h>
+#include <asm/dwarf.h>
+
+.macro vdso_syscall func,syscall
+ .globl __kernel_compat_\func
+ .type __kernel_compat_\func,@function
+ .align 8
+__kernel_compat_\func:
+ CFI_STARTPROC
+ svc \syscall
+ /* Make sure we notice when a syscall returns, which shouldn't happen */
+ .word 0
+ CFI_ENDPROC
+ .size __kernel_compat_\func,.-__kernel_compat_\func
+.endm
+
+vdso_syscall restart_syscall,__NR_restart_syscall
+vdso_syscall sigreturn,__NR_sigreturn
+vdso_syscall rt_sigreturn,__NR_rt_sigreturn
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index a6e0fb6b91d6..2a2092ce19f1 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -74,3 +74,11 @@ vdso64.so: $(obj)/vdso64.so.dbg
$(call cmd,vdso_install)
vdso_install: vdso64.so
+
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE
+ $(call if_changed,vdsosym)
diff --git a/arch/s390/kernel/vdso64/gen_vdso_offsets.sh b/arch/s390/kernel/vdso64/gen_vdso_offsets.sh
new file mode 100755
index 000000000000..37f05cb38dad
--- /dev/null
+++ b/arch/s390/kernel/vdso64/gen_vdso_offsets.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Inspired by arm64 version.
+#
+
+LC_ALL=C
+sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso64_offset_\2\t0x\1/p'
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
index 518f1ea405f4..d4fb336d747b 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -17,7 +17,7 @@ SECTIONS
#ifdef CONFIG_TIME_NS
PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
#endif
- . = VDSO64_LBASE + SIZEOF_HEADERS;
+ . = VDSO_LBASE + SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
@@ -137,6 +137,9 @@ VERSION
__kernel_clock_gettime;
__kernel_clock_getres;
__kernel_getcpu;
+ __kernel_restart_syscall;
+ __kernel_rt_sigreturn;
+ __kernel_sigreturn;
local: *;
};
}
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
index f773505c7e63..97f0c0a669a5 100644
--- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -37,3 +37,20 @@ vdso_func gettimeofday
vdso_func clock_getres
vdso_func clock_gettime
vdso_func getcpu
+
+.macro vdso_syscall func,syscall
+ .globl __kernel_\func
+ .type __kernel_\func,@function
+ .align 8
+__kernel_\func:
+ CFI_STARTPROC
+ svc \syscall
+ /* Make sure we notice when a syscall returns, which shouldn't happen */
+ .word 0
+ CFI_ENDPROC
+ .size __kernel_\func,.-__kernel_\func
+.endm
+
+vdso_syscall restart_syscall,__NR_restart_syscall
+vdso_syscall sigreturn,__NR_sigreturn
+vdso_syscall rt_sigreturn,__NR_rt_sigreturn
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index ec5b76bde4d8..cfcdf76d6a95 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -162,7 +162,7 @@ char *strcat(char *dest, const char *src)
" jo 0b\n"
"1: mvst %[dummy],%[src]\n"
" jo 1b\n"
- : [dummy] "=&a" (dummy), [dest] "+&a" (dest), [src] "+&a" (src)
+ : [dummy] "+&a" (dummy), [dest] "+&a" (dest), [src] "+&a" (src)
:
: "cc", "memory", "0");
return ret;
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index 2f32802f79ce..ecf327d743a0 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -120,7 +120,7 @@ static struct unwindme *unwindme;
#define UWM_REGS 0x2 /* Pass regs to test_unwind(). */
#define UWM_SP 0x4 /* Pass sp to test_unwind(). */
#define UWM_CALLER 0x8 /* Unwind starting from caller. */
-#define UWM_SWITCH_STACK 0x10 /* Use CALL_ON_STACK. */
+#define UWM_SWITCH_STACK 0x10 /* Use call_on_stack. */
#define UWM_IRQ 0x20 /* Unwind from irq context. */
#define UWM_PGM 0x40 /* Unwind from program check handler. */
@@ -211,7 +211,8 @@ static noinline int unwindme_func2(struct unwindme *u)
if (u->flags & UWM_SWITCH_STACK) {
local_irq_save(flags);
local_mcck_disable();
- rc = CALL_ON_STACK(unwindme_func3, S390_lowcore.nodat_stack, 1, u);
+ rc = call_on_stack(1, S390_lowcore.nodat_stack,
+ int, unwindme_func3, struct unwindme *, u);
local_mcck_enable();
local_irq_restore(flags);
return rc;
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 67606d932825..7ec8b1fa0f08 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -224,7 +224,7 @@ static inline unsigned long copy_in_user_mvcos(void __user *to, const void __use
EX_TABLE(0b,3b)
: "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
: [spec] "d" (0x810081UL)
- : "cc", "memory");
+ : "cc", "memory", "0");
return size;
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 8ae3dc5783fd..e33c43b38afe 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -285,26 +285,6 @@ static noinline void do_sigbus(struct pt_regs *regs)
(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
}
-static noinline int signal_return(struct pt_regs *regs)
-{
- u16 instruction;
- int rc;
-
- rc = __get_user(instruction, (u16 __user *) regs->psw.addr);
- if (rc)
- return rc;
- if (instruction == 0x0a77) {
- set_pt_regs_flag(regs, PIF_SYSCALL);
- regs->int_code = 0x00040077;
- return 0;
- } else if (instruction == 0x0aad) {
- set_pt_regs_flag(regs, PIF_SYSCALL);
- regs->int_code = 0x000400ad;
- return 0;
- }
- return -EACCES;
-}
-
static noinline void do_fault_error(struct pt_regs *regs, int access,
vm_fault_t fault)
{
@@ -312,9 +292,6 @@ static noinline void do_fault_error(struct pt_regs *regs, int access,
switch (fault) {
case VM_FAULT_BADACCESS:
- if (access == VM_EXEC && signal_return(regs) == 0)
- break;
- fallthrough;
case VM_FAULT_BADMAP:
/* Bad memory access. Check if it is kernel or user space. */
if (user_mode(regs)) {
@@ -792,6 +769,32 @@ void do_secure_storage_access(struct pt_regs *regs)
struct page *page;
int rc;
+ /*
+ * bit 61 tells us if the address is valid, if it's not we
+ * have a major problem and should stop the kernel or send a
+ * SIGSEGV to the process. Unfortunately bit 61 is not
+ * reliable without the misc UV feature so we need to check
+ * for that as well.
+ */
+ if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) &&
+ !test_bit_inv(61, &regs->int_parm_long)) {
+ /*
+ * When this happens, userspace did something that it
+ * was not supposed to do, e.g. branching into secure
+ * memory. Trigger a segmentation fault.
+ */
+ if (user_mode(regs)) {
+ send_sig(SIGSEGV, current, 0);
+ return;
+ }
+
+ /*
+ * The kernel should never run into this case and we
+ * have no way out of this situation.
+ */
+ panic("Unexpected PGM 0x3d with TEID bit 61=0");
+ }
+
switch (get_fault_type(regs)) {
case USER_FAULT:
mm = current->mm;
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 1f1f906344ff..a0f54bd5e98a 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -125,12 +125,18 @@ static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest,
*/
int memcpy_real(void *dest, void *src, size_t count)
{
+ unsigned long _dest = (unsigned long)dest;
+ unsigned long _src = (unsigned long)src;
+ unsigned long _count = (unsigned long)count;
int rc;
if (S390_lowcore.nodat_stack != 0) {
preempt_disable();
- rc = CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, 3,
- dest, src, count);
+ rc = call_on_stack(3, S390_lowcore.nodat_stack,
+ unsigned long, _memcpy_real,
+ unsigned long, _dest,
+ unsigned long, _src,
+ unsigned long, _count);
preempt_enable();
return rc;
}
@@ -139,8 +145,7 @@ int memcpy_real(void *dest, void *src, size_t count)
* not set up yet. Just call _memcpy_real on the early boot
* stack
*/
- return _memcpy_real((unsigned long) dest,(unsigned long) src,
- (unsigned long) count);
+ return _memcpy_real(_dest, _src, _count);
}
/*