From d8346b7d9bab37e6cc712ff1622c65ff98bdfef8 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Thu, 20 Dec 2012 15:32:08 +0100
Subject: KVM: s390: Support for I/O interrupts.

Add support for handling I/O interrupts (standard, subchannel-related
ones and rudimentary adapter interrupts).

The subchannel-identifying parameters are encoded into the interrupt
type.

I/O interrupts are floating, so they can't be injected on a specific
vcpu.

Reviewed-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/uapi/linux/kvm.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index e6e5d4b13708..54540bdd3340 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -401,6 +401,15 @@ struct kvm_s390_psw {
 #define KVM_S390_INT_SERVICE		0xffff2401u
 #define KVM_S390_INT_EMERGENCY		0xffff1201u
 #define KVM_S390_INT_EXTERNAL_CALL	0xffff1202u
+/* Anything below 0xfffe0000u is taken by INT_IO */
+#define KVM_S390_INT_IO(ai,cssid,ssid,schid)   \
+	(((schid)) |			       \
+	 ((ssid) << 16) |		       \
+	 ((cssid) << 18) |		       \
+	 ((ai) << 26))
+#define KVM_S390_INT_IO_MIN		0x00000000u
+#define KVM_S390_INT_IO_MAX		0xfffdffffu
+
 
 struct kvm_s390_interrupt {
 	__u32 type;
-- 
cgit v1.2.3


From 48a3e950f4cee6a345ffbe9baf599f1e9a54c479 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Thu, 20 Dec 2012 15:32:09 +0100
Subject: KVM: s390: Add support for machine checks.

Add support for injecting machine checks (only repressible
conditions for now).

This is a bit more involved than I/O interrupts, for these reasons:

- Machine checks come in both floating and cpu varieties.
- We don't have a bit for machine checks enabling, but have to use
  a roundabout approach with trapping PSW changing instructions and
  watching for opened machine checks.

Reviewed-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 Documentation/virtual/kvm/api.txt |   4 ++
 arch/s390/include/asm/kvm_host.h  |   8 +++
 arch/s390/kvm/intercept.c         |   2 +
 arch/s390/kvm/interrupt.c         | 112 +++++++++++++++++++++++++++++++
 arch/s390/kvm/kvm-s390.h          |   3 +
 arch/s390/kvm/priv.c              | 135 ++++++++++++++++++++++++++++++++++++++
 arch/s390/kvm/trace-s390.h        |   6 +-
 include/uapi/linux/kvm.h          |   1 +
 8 files changed, 268 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 83bd92b52936..8a0de309932f 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2073,6 +2073,10 @@ KVM_S390_INT_IO(ai,cssid,ssid,schid) (vm) - compound value to indicate an
     I/O interrupt (ai - adapter interrupt; cssid,ssid,schid - subchannel);
     I/O interruption parameters in parm (subchannel) and parm64 (intparm,
     interruption subclass)
+KVM_S390_MCHK (vm, vcpu) - machine check interrupt; cr 14 bits in parm,
+                           machine check interrupt code in parm64 (note that
+                           machine checks needing further payload are not
+                           supported by this ioctl)
 
 Note that the vcpu ioctl is asynchronous to vcpu execution.
 
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a8e35c43df78..29363d155cd5 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -75,8 +75,10 @@ struct kvm_s390_sie_block {
 	__u8	reserved40[4];		/* 0x0040 */
 #define LCTL_CR0	0x8000
 #define LCTL_CR6	0x0200
+#define LCTL_CR14	0x0002
 	__u16   lctl;			/* 0x0044 */
 	__s16	icpua;			/* 0x0046 */
+#define ICTL_LPSW 0x00400000
 	__u32	ictl;			/* 0x0048 */
 	__u32	eca;			/* 0x004c */
 	__u8	icptcode;		/* 0x0050 */
@@ -187,6 +189,11 @@ struct kvm_s390_emerg_info {
 	__u16 code;
 };
 
+struct kvm_s390_mchk_info {
+	__u64 cr14;
+	__u64 mcic;
+};
+
 struct kvm_s390_interrupt_info {
 	struct list_head list;
 	u64	type;
@@ -197,6 +204,7 @@ struct kvm_s390_interrupt_info {
 		struct kvm_s390_emerg_info emerg;
 		struct kvm_s390_extcall_info extcall;
 		struct kvm_s390_prefix_info prefix;
+		struct kvm_s390_mchk_info mchk;
 	};
 };
 
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index df6c0ad085aa..950c13ecaf60 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -97,10 +97,12 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
 
 static const intercept_handler_t instruction_handlers[256] = {
 	[0x01] = kvm_s390_handle_01,
+	[0x82] = kvm_s390_handle_lpsw,
 	[0x83] = kvm_s390_handle_diag,
 	[0xae] = kvm_s390_handle_sigp,
 	[0xb2] = kvm_s390_handle_b2,
 	[0xb7] = handle_lctl,
+	[0xb9] = kvm_s390_handle_b9,
 	[0xe5] = kvm_s390_handle_e5,
 	[0xeb] = handle_lctlg,
 };
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 52cdf20906ab..b3b4748485ee 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -41,6 +41,11 @@ static int psw_ioint_disabled(struct kvm_vcpu *vcpu)
 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO);
 }
 
+static int psw_mchk_disabled(struct kvm_vcpu *vcpu)
+{
+	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK);
+}
+
 static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
 {
 	if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
@@ -82,6 +87,12 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
 	case KVM_S390_SIGP_SET_PREFIX:
 	case KVM_S390_RESTART:
 		return 1;
+	case KVM_S390_MCHK:
+		if (psw_mchk_disabled(vcpu))
+			return 0;
+		if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14)
+			return 1;
+		return 0;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
 		if (psw_ioint_disabled(vcpu))
 			return 0;
@@ -116,6 +127,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
 		CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
 		&vcpu->arch.sie_block->cpuflags);
 	vcpu->arch.sie_block->lctl = 0x0000;
+	vcpu->arch.sie_block->ictl &= ~ICTL_LPSW;
 }
 
 static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
@@ -139,6 +151,12 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
 	case KVM_S390_SIGP_STOP:
 		__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
 		break;
+	case KVM_S390_MCHK:
+		if (psw_mchk_disabled(vcpu))
+			vcpu->arch.sie_block->ictl |= ICTL_LPSW;
+		else
+			vcpu->arch.sie_block->lctl |= LCTL_CR14;
+		break;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
 		if (psw_ioint_disabled(vcpu))
 			__set_cpuflag(vcpu, CPUSTAT_IO_INT);
@@ -326,6 +344,32 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 			exception = 1;
 		break;
 
+	case KVM_S390_MCHK:
+		VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+			   inti->mchk.mcic);
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+						 inti->mchk.cr14,
+						 inti->mchk.mcic);
+		rc = kvm_s390_vcpu_store_status(vcpu,
+						KVM_S390_STORE_STATUS_PREFIXED);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_to_guest(vcpu, __LC_MCK_OLD_PSW,
+				   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+				     __LC_MCK_NEW_PSW, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
 	{
 		__u32 param0 = ((__u32)inti->io.subchannel_id << 16) |
@@ -588,6 +632,61 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 	}
 }
 
+void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
+	struct kvm_s390_interrupt_info  *n, *inti = NULL;
+	int deliver;
+
+	__reset_intercept_indicators(vcpu);
+	if (atomic_read(&li->active)) {
+		do {
+			deliver = 0;
+			spin_lock_bh(&li->lock);
+			list_for_each_entry_safe(inti, n, &li->list, list) {
+				if ((inti->type == KVM_S390_MCHK) &&
+				    __interrupt_is_deliverable(vcpu, inti)) {
+					list_del(&inti->list);
+					deliver = 1;
+					break;
+				}
+				__set_intercept_indicator(vcpu, inti);
+			}
+			if (list_empty(&li->list))
+				atomic_set(&li->active, 0);
+			spin_unlock_bh(&li->lock);
+			if (deliver) {
+				__do_deliver_interrupt(vcpu, inti);
+				kfree(inti);
+			}
+		} while (deliver);
+	}
+
+	if (atomic_read(&fi->active)) {
+		do {
+			deliver = 0;
+			spin_lock(&fi->lock);
+			list_for_each_entry_safe(inti, n, &fi->list, list) {
+				if ((inti->type == KVM_S390_MCHK) &&
+				    __interrupt_is_deliverable(vcpu, inti)) {
+					list_del(&inti->list);
+					deliver = 1;
+					break;
+				}
+				__set_intercept_indicator(vcpu, inti);
+			}
+			if (list_empty(&fi->list))
+				atomic_set(&fi->active, 0);
+			spin_unlock(&fi->lock);
+			if (deliver) {
+				__do_deliver_interrupt(vcpu, inti);
+				kfree(inti);
+			}
+		} while (deliver);
+	}
+}
+
 int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -641,6 +740,13 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 	case KVM_S390_INT_EMERGENCY:
 		kfree(inti);
 		return -EINVAL;
+	case KVM_S390_MCHK:
+		VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
+			 s390int->parm64);
+		inti->type = s390int->type;
+		inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
+		inti->mchk.mcic = s390int->parm64;
+		break;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
 		if (s390int->type & IOINT_AI_MASK)
 			VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
@@ -749,6 +855,12 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 		inti->type = s390int->type;
 		inti->emerg.code = s390int->parm;
 		break;
+	case KVM_S390_MCHK:
+		VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
+			   s390int->parm64);
+		inti->type = s390int->type;
+		inti->mchk.mcic = s390int->parm64;
+		break;
 	case KVM_S390_INT_VIRTIO:
 	case KVM_S390_INT_SERVICE:
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index dccc0242b7ca..1f7cc6ccf102 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -106,6 +106,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
 enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
 void kvm_s390_tasklet(unsigned long parm);
 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
+void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
 int kvm_s390_inject_vm(struct kvm *kvm,
 		struct kvm_s390_interrupt *s390int);
 int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
@@ -117,6 +118,8 @@ int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
 
 /* implemented in sigp.c */
 int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index d715842f56ca..d3cbcd3c9ada 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -18,6 +18,8 @@
 #include <asm/debug.h>
 #include <asm/ebcdic.h>
 #include <asm/sysinfo.h>
+#include <asm/ptrace.h>
+#include <asm/compat.h>
 #include "gaccess.h"
 #include "kvm-s390.h"
 #include "trace.h"
@@ -166,6 +168,99 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static void handle_new_psw(struct kvm_vcpu *vcpu)
+{
+	/* Check whether the new psw is enabled for machine checks. */
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK)
+		kvm_s390_deliver_pending_machine_checks(vcpu);
+}
+
+#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
+#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
+#define PSW_ADDR_24 0x00000000000fffffUL
+#define PSW_ADDR_31 0x000000007fffffffUL
+
+int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
+{
+	u64 addr;
+	psw_compat_t new_psw;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu,
+						   PGM_PRIVILEGED_OPERATION);
+
+	addr = kvm_s390_get_base_disp_s(vcpu);
+
+	if (addr & 7) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	if (!(new_psw.mask & PSW32_MASK_BASE)) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	vcpu->arch.sie_block->gpsw.mask =
+		(new_psw.mask & ~PSW32_MASK_BASE) << 32;
+	vcpu->arch.sie_block->gpsw.addr = new_psw.addr;
+
+	if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) ||
+	    (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) &&
+	     (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) ||
+	    ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
+	     PSW_MASK_EA)) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	handle_new_psw(vcpu);
+out:
+	return 0;
+}
+
+static int handle_lpswe(struct kvm_vcpu *vcpu)
+{
+	u64 addr;
+	psw_t new_psw;
+
+	addr = kvm_s390_get_base_disp_s(vcpu);
+
+	if (addr & 7) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	vcpu->arch.sie_block->gpsw.mask = new_psw.mask;
+	vcpu->arch.sie_block->gpsw.addr = new_psw.addr;
+
+	if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) ||
+	    (((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
+	      PSW_MASK_BA) &&
+	     (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) ||
+	    (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) &&
+	     (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) ||
+	    ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
+	     PSW_MASK_EA)) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	handle_new_psw(vcpu);
+out:
+	return 0;
+}
+
 static int handle_stidp(struct kvm_vcpu *vcpu)
 {
 	u64 operand2;
@@ -292,6 +387,7 @@ static const intercept_handler_t priv_handlers[256] = {
 	[0x5f] = handle_chsc,
 	[0x7d] = handle_stsi,
 	[0xb1] = handle_stfl,
+	[0xb2] = handle_lpswe,
 };
 
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
@@ -316,6 +412,45 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
 	return -EOPNOTSUPP;
 }
 
+static int handle_epsw(struct kvm_vcpu *vcpu)
+{
+	int reg1, reg2;
+
+	reg1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 24;
+	reg2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
+
+	/* This basically extracts the mask half of the psw. */
+	vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000;
+	vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
+	if (reg2) {
+		vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000;
+		vcpu->run->s.regs.gprs[reg2] |=
+			vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffff;
+	}
+	return 0;
+}
+
+static const intercept_handler_t b9_handlers[256] = {
+	[0x8d] = handle_epsw,
+};
+
+int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
+{
+	intercept_handler_t handler;
+
+	/* This is handled just as for the B2 instructions. */
+	handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
+	if (handler) {
+		if ((handler != handle_epsw) &&
+		    (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE))
+			return kvm_s390_inject_program_int(vcpu,
+						   PGM_PRIVILEGED_OPERATION);
+		else
+			return handler(vcpu);
+	}
+	return -EOPNOTSUPP;
+}
+
 static int handle_tprot(struct kvm_vcpu *vcpu)
 {
 	u64 address1, address2;
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 90fdf85b5ff7..95fbc1ab88dc 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -141,13 +141,13 @@ TRACE_EVENT(kvm_s390_inject_vcpu,
  * Trace point for the actual delivery of interrupts.
  */
 TRACE_EVENT(kvm_s390_deliver_interrupt,
-	    TP_PROTO(unsigned int id, __u64 type, __u32 data0, __u64 data1),
+	    TP_PROTO(unsigned int id, __u64 type, __u64 data0, __u64 data1),
 	    TP_ARGS(id, type, data0, data1),
 
 	    TP_STRUCT__entry(
 		    __field(int, id)
 		    __field(__u32, inttype)
-		    __field(__u32, data0)
+		    __field(__u64, data0)
 		    __field(__u64, data1)
 		    ),
 
@@ -159,7 +159,7 @@ TRACE_EVENT(kvm_s390_deliver_interrupt,
 		    ),
 
 	    TP_printk("deliver interrupt (vcpu %d): type:%x (%s) "	\
-		      "data:%08x %016llx",
+		      "data:%08llx %016llx",
 		      __entry->id, __entry->inttype,
 		      __print_symbolic(__entry->inttype, kvm_s390_int_type),
 		      __entry->data0, __entry->data1)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 54540bdd3340..80bb3b801116 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -397,6 +397,7 @@ struct kvm_s390_psw {
 #define KVM_S390_PROGRAM_INT		0xfffe0001u
 #define KVM_S390_SIGP_SET_PREFIX	0xfffe0002u
 #define KVM_S390_RESTART		0xfffe0003u
+#define KVM_S390_MCHK			0xfffe1000u
 #define KVM_S390_INT_VIRTIO		0xffff2603u
 #define KVM_S390_INT_SERVICE		0xffff2401u
 #define KVM_S390_INT_EMERGENCY		0xffff1201u
-- 
cgit v1.2.3


From fa6b7fe9928d50444c29b29c8563746c6b0c6299 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Thu, 20 Dec 2012 15:32:12 +0100
Subject: KVM: s390: Add support for channel I/O instructions.

Add a new capability, KVM_CAP_S390_CSS_SUPPORT, which will pass
intercepts for channel I/O instructions to userspace. Only I/O
instructions interacting with I/O interrupts need to be handled
in-kernel:

- TEST PENDING INTERRUPTION (tpi) dequeues and stores pending
  interrupts entirely in-kernel.
- TEST SUBCHANNEL (tsch) dequeues pending interrupts in-kernel
  and exits via KVM_EXIT_S390_TSCH to userspace for subchannel-
  related processing.

Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Reviewed-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 30 +++++++++++++
 arch/s390/include/asm/kvm_host.h  |  1 +
 arch/s390/kvm/intercept.c         |  1 +
 arch/s390/kvm/interrupt.c         | 37 ++++++++++++++++
 arch/s390/kvm/kvm-s390.c          | 12 ++++++
 arch/s390/kvm/kvm-s390.h          |  2 +
 arch/s390/kvm/priv.c              | 91 +++++++++++++++++++++++++++++++++++++--
 arch/s390/kvm/trace-s390.h        | 20 +++++++++
 include/trace/events/kvm.h        |  2 +-
 include/uapi/linux/kvm.h          | 11 +++++
 10 files changed, 202 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 73bd159c5559..f2d6391178b9 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2350,6 +2350,22 @@ The possible hypercalls are defined in the Power Architecture Platform
 Requirements (PAPR) document available from www.power.org (free
 developer registration required to access it).
 
+		/* KVM_EXIT_S390_TSCH */
+		struct {
+			__u16 subchannel_id;
+			__u16 subchannel_nr;
+			__u32 io_int_parm;
+			__u32 io_int_word;
+			__u32 ipb;
+			__u8 dequeued;
+		} s390_tsch;
+
+s390 specific. This exit occurs when KVM_CAP_S390_CSS_SUPPORT has been enabled
+and TEST SUBCHANNEL was intercepted. If dequeued is set, a pending I/O
+interrupt for the target subchannel has been dequeued and subchannel_id,
+subchannel_nr, io_int_parm and io_int_word contain the parameters for that
+interrupt. ipb is needed for instruction parameter decoding.
+
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -2471,3 +2487,17 @@ For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
    where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
  - The tsize field of mas1 shall be set to 4K on TLB0, even though the
    hardware ignores this value for TLB0.
+
+6.4 KVM_CAP_S390_CSS_SUPPORT
+
+Architectures: s390
+Parameters: none
+Returns: 0 on success; -1 on error
+
+This capability enables support for handling of channel I/O instructions.
+
+TEST PENDING INTERRUPTION and the interrupt portion of TEST SUBCHANNEL are
+handled in-kernel, while the other I/O instructions are passed to userspace.
+
+When this capability is enabled, KVM_EXIT_S390_TSCH will occur on TEST
+SUBCHANNEL intercepts.
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 29363d155cd5..16bd5d169cdb 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -262,6 +262,7 @@ struct kvm_arch{
 	debug_info_t *dbf;
 	struct kvm_s390_float_interrupt float_int;
 	struct gmap *gmap;
+	int css_support;
 };
 
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 71af87dbb42c..f26ff1e31bdb 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -264,6 +264,7 @@ static const intercept_handler_t intercept_funcs[] = {
 	[0x0C >> 2] = handle_instruction_and_prog,
 	[0x10 >> 2] = handle_noop,
 	[0x14 >> 2] = handle_noop,
+	[0x18 >> 2] = handle_noop,
 	[0x1C >> 2] = kvm_s390_handle_wait,
 	[0x20 >> 2] = handle_validity,
 	[0x28 >> 2] = handle_stop,
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index b3b4748485ee..9a128357fd15 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -709,6 +709,43 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 	return 0;
 }
 
+struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
+						    u64 cr6, u64 schid)
+{
+	struct kvm_s390_float_interrupt *fi;
+	struct kvm_s390_interrupt_info *inti, *iter;
+
+	if ((!schid && !cr6) || (schid && cr6))
+		return NULL;
+	mutex_lock(&kvm->lock);
+	fi = &kvm->arch.float_int;
+	spin_lock(&fi->lock);
+	inti = NULL;
+	list_for_each_entry(iter, &fi->list, list) {
+		if (!is_ioint(iter->type))
+			continue;
+		if (cr6 && ((cr6 & iter->io.io_int_word) == 0))
+			continue;
+		if (schid) {
+			if (((schid & 0x00000000ffff0000) >> 16) !=
+			    iter->io.subchannel_id)
+				continue;
+			if ((schid & 0x000000000000ffff) !=
+			    iter->io.subchannel_nr)
+				continue;
+		}
+		inti = iter;
+		break;
+	}
+	if (inti)
+		list_del_init(&inti->list);
+	if (list_empty(&fi->list))
+		atomic_set(&fi->active, 0);
+	spin_unlock(&fi->lock);
+	mutex_unlock(&kvm->lock);
+	return inti;
+}
+
 int kvm_s390_inject_vm(struct kvm *kvm,
 		       struct kvm_s390_interrupt *s390int)
 {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 5ff26033825c..5b01f0953900 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -141,6 +141,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_SYNC_REGS:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_ENABLE_CAP:
+	case KVM_CAP_S390_CSS_SUPPORT:
 		r = 1;
 		break;
 	case KVM_CAP_NR_VCPUS:
@@ -235,6 +236,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 		if (!kvm->arch.gmap)
 			goto out_nogmap;
 	}
+
+	kvm->arch.css_support = 0;
+
 	return 0;
 out_nogmap:
 	debug_unregister(kvm->arch.dbf);
@@ -658,6 +662,7 @@ rerun_vcpu:
 	case KVM_EXIT_INTR:
 	case KVM_EXIT_S390_RESET:
 	case KVM_EXIT_S390_UCONTROL:
+	case KVM_EXIT_S390_TSCH:
 		break;
 	default:
 		BUG();
@@ -818,6 +823,13 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 
 	switch (cap->cap) {
+	case KVM_CAP_S390_CSS_SUPPORT:
+		if (!vcpu->kvm->arch.css_support) {
+			vcpu->kvm->arch.css_support = 1;
+			trace_kvm_s390_enable_css(vcpu->kvm);
+		}
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 211b340385a7..3e05deff21b6 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -113,6 +113,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 		struct kvm_s390_interrupt *s390int);
 int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
+struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
+						    u64 cr6, u64 schid);
 
 /* implemented in priv.c */
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 8ad776f87856..0ef9894606e5 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -127,15 +127,98 @@ static int handle_skey(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static int handle_io_inst(struct kvm_vcpu *vcpu)
+static int handle_tpi(struct kvm_vcpu *vcpu)
 {
-	VCPU_EVENT(vcpu, 4, "%s", "I/O instruction");
-	/* condition code 3 */
+	u64 addr;
+	struct kvm_s390_interrupt_info *inti;
+	int cc;
+
+	addr = kvm_s390_get_base_disp_s(vcpu);
+
+	inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0);
+	if (inti) {
+		if (addr) {
+			/*
+			 * Store the two-word I/O interruption code into the
+			 * provided area.
+			 */
+			put_guest_u16(vcpu, addr, inti->io.subchannel_id);
+			put_guest_u16(vcpu, addr + 2, inti->io.subchannel_nr);
+			put_guest_u32(vcpu, addr + 4, inti->io.io_int_parm);
+		} else {
+			/*
+			 * Store the three-word I/O interruption code into
+			 * the appropriate lowcore area.
+			 */
+			put_guest_u16(vcpu, 184, inti->io.subchannel_id);
+			put_guest_u16(vcpu, 186, inti->io.subchannel_nr);
+			put_guest_u32(vcpu, 188, inti->io.io_int_parm);
+			put_guest_u32(vcpu, 192, inti->io.io_int_word);
+		}
+		cc = 1;
+	} else
+		cc = 0;
+	kfree(inti);
+	/* Set condition code and we're done. */
 	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
-	vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+	vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44;
 	return 0;
 }
 
+static int handle_tsch(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_interrupt_info *inti;
+
+	inti = kvm_s390_get_io_int(vcpu->kvm, 0,
+				   vcpu->run->s.regs.gprs[1]);
+
+	/*
+	 * Prepare exit to userspace.
+	 * We indicate whether we dequeued a pending I/O interrupt
+	 * so that userspace can re-inject it if the instruction gets
+	 * a program check. While this may re-order the pending I/O
+	 * interrupts, this is no problem since the priority is kept
+	 * intact.
+	 */
+	vcpu->run->exit_reason = KVM_EXIT_S390_TSCH;
+	vcpu->run->s390_tsch.dequeued = !!inti;
+	if (inti) {
+		vcpu->run->s390_tsch.subchannel_id = inti->io.subchannel_id;
+		vcpu->run->s390_tsch.subchannel_nr = inti->io.subchannel_nr;
+		vcpu->run->s390_tsch.io_int_parm = inti->io.io_int_parm;
+		vcpu->run->s390_tsch.io_int_word = inti->io.io_int_word;
+	}
+	vcpu->run->s390_tsch.ipb = vcpu->arch.sie_block->ipb;
+	kfree(inti);
+	return -EREMOTE;
+}
+
+static int handle_io_inst(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 4, "%s", "I/O instruction");
+
+	if (vcpu->kvm->arch.css_support) {
+		/*
+		 * Most I/O instructions will be handled by userspace.
+		 * Exceptions are tpi and the interrupt portion of tsch.
+		 */
+		if (vcpu->arch.sie_block->ipa == 0xb236)
+			return handle_tpi(vcpu);
+		if (vcpu->arch.sie_block->ipa == 0xb235)
+			return handle_tsch(vcpu);
+		/* Handle in userspace. */
+		return -EOPNOTSUPP;
+	} else {
+		/*
+		 * Set condition code 3 to stop the guest from issueing channel
+		 * I/O instructions.
+		 */
+		vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+		vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+		return 0;
+	}
+}
+
 static int handle_stfl(struct kvm_vcpu *vcpu)
 {
 	unsigned int facility_list;
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 95fbc1ab88dc..13f30f58a2df 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -204,6 +204,26 @@ TRACE_EVENT(kvm_s390_stop_request,
 	);
 
 
+/*
+ * Trace point for enabling channel I/O instruction support.
+ */
+TRACE_EVENT(kvm_s390_enable_css,
+	    TP_PROTO(void *kvm),
+	    TP_ARGS(kvm),
+
+	    TP_STRUCT__entry(
+		    __field(void *, kvm)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->kvm = kvm;
+		    ),
+
+	    TP_printk("enabling channel I/O support (kvm @ %p)\n",
+		      __entry->kvm)
+	);
+
+
 #endif /* _TRACE_KVMS390_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 7ef9e759f499..a23f47c884cf 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -14,7 +14,7 @@
 	ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR),	\
 	ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\
 	ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI), ERSN(PAPR_HCALL),	\
-	ERSN(S390_UCONTROL)
+	ERSN(S390_UCONTROL), ERSN(S390_TSCH)
 
 TRACE_EVENT(kvm_userspace_exit,
 	    TP_PROTO(__u32 reason, int errno),
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 80bb3b801116..8bb0bf83afc5 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -168,6 +168,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_PAPR_HCALL	  19
 #define KVM_EXIT_S390_UCONTROL	  20
 #define KVM_EXIT_WATCHDOG         21
+#define KVM_EXIT_S390_TSCH        22
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -285,6 +286,15 @@ struct kvm_run {
 			__u64 ret;
 			__u64 args[9];
 		} papr_hcall;
+		/* KVM_EXIT_S390_TSCH */
+		struct {
+			__u16 subchannel_id;
+			__u16 subchannel_nr;
+			__u32 io_int_parm;
+			__u32 io_int_word;
+			__u32 ipb;
+			__u8 dequeued;
+		} s390_tsch;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -645,6 +655,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_IRQFD_RESAMPLE 82
 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83
 #define KVM_CAP_PPC_HTAB_FD 84
+#define KVM_CAP_S390_CSS_SUPPORT 85
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 1c810636556c8d53a37406b34a64d9b9b0161aa6 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 4 Jan 2013 18:12:48 +0100
Subject: KVM: PPC: BookE: Implement EPR exit

The External Proxy Facility in FSL BookE chips allows the interrupt
controller to automatically acknowledge an interrupt as soon as a
core gets its pending external interrupt delivered.

Today, user space implements the interrupt controller, so we need to
check on it during such a cycle.

This patch implements logic for user space to enable EPR exiting,
disable EPR exiting and EPR exiting itself, so that user space can
acknowledge an interrupt when an external interrupt has successfully
been delivered into the guest vcpu.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt   | 40 +++++++++++++++++++++++++++++++++++--
 arch/powerpc/include/asm/kvm_host.h |  2 ++
 arch/powerpc/include/asm/kvm_ppc.h  |  9 +++++++++
 arch/powerpc/kvm/booke.c            | 14 ++++++++++++-
 arch/powerpc/kvm/powerpc.c          | 10 ++++++++++
 include/linux/kvm_host.h            |  1 +
 include/uapi/linux/kvm.h            |  6 ++++++
 7 files changed, 79 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 4fc2bfcb16d5..a98ed09269d7 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2246,8 +2246,8 @@ executed a memory-mapped I/O instruction which could not be satisfied
 by kvm.  The 'data' member contains the written data if 'is_write' is
 true, and should be filled by application code otherwise.
 
-NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR
-      and KVM_EXIT_PAPR the corresponding
+NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR,
+      KVM_EXIT_PAPR and KVM_EXIT_EPR the corresponding
 operations are complete (and guest state is consistent) only after userspace
 has re-entered the kernel with KVM_RUN.  The kernel side will first finish
 incomplete operations and then check for pending signals.  Userspace
@@ -2366,6 +2366,25 @@ interrupt for the target subchannel has been dequeued and subchannel_id,
 subchannel_nr, io_int_parm and io_int_word contain the parameters for that
 interrupt. ipb is needed for instruction parameter decoding.
 
+		/* KVM_EXIT_EPR */
+		struct {
+			__u32 epr;
+		} epr;
+
+On FSL BookE PowerPC chips, the interrupt controller has a fast patch
+interrupt acknowledge path to the core. When the core successfully
+delivers an interrupt, it automatically populates the EPR register with
+the interrupt vector number and acknowledges the interrupt inside
+the interrupt controller.
+
+In case the interrupt controller lives in user space, we need to do
+the interrupt acknowledge cycle through it to fetch the next to be
+delivered interrupt vector using this exit.
+
+It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an
+external interrupt has just been delivered into the guest. User space
+should put the acknowledged interrupt vector into the 'epr' field.
+
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -2501,3 +2520,20 @@ handled in-kernel, while the other I/O instructions are passed to userspace.
 
 When this capability is enabled, KVM_EXIT_S390_TSCH will occur on TEST
 SUBCHANNEL intercepts.
+
+6.5 KVM_CAP_PPC_EPR
+
+Architectures: ppc
+Parameters: args[0] defines whether the proxy facility is active
+Returns: 0 on success; -1 on error
+
+This capability enables or disables the delivery of interrupts through the
+external proxy facility.
+
+When enabled (args[0] != 0), every time the guest gets an external interrupt
+delivered, it automatically exits into user space with a KVM_EXIT_EPR exit
+to receive the topmost interrupt vector.
+
+When disabled (args[0] == 0), behavior is as if this facility is unsupported.
+
+When this capability is enabled, KVM_EXIT_EPR can occur.
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index ab49c6cf891c..8a72d59467eb 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -520,6 +520,8 @@ struct kvm_vcpu_arch {
 	u8 sane;
 	u8 cpu_type;
 	u8 hcall_needed;
+	u8 epr_enabled;
+	u8 epr_needed;
 
 	u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 5f5f69abd281..493630e209c8 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -264,6 +264,15 @@ static inline void kvm_linear_init(void)
 {}
 #endif
 
+static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+	mtspr(SPRN_GEPR, epr);
+#elif defined(CONFIG_BOOKE)
+	vcpu->arch.epr = epr;
+#endif
+}
+
 int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
 			      struct kvm_config_tlb *cfg);
 int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 964f4475f55c..940ec806187e 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -306,7 +306,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 {
 	int allowed = 0;
 	ulong msr_mask = 0;
-	bool update_esr = false, update_dear = false;
+	bool update_esr = false, update_dear = false, update_epr = false;
 	ulong crit_raw = vcpu->arch.shared->critical;
 	ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
 	bool crit;
@@ -330,6 +330,9 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 		keep_irq = true;
 	}
 
+	if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled)
+		update_epr = true;
+
 	switch (priority) {
 	case BOOKE_IRQPRIO_DTLB_MISS:
 	case BOOKE_IRQPRIO_DATA_STORAGE:
@@ -408,6 +411,8 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 			set_guest_esr(vcpu, vcpu->arch.queued_esr);
 		if (update_dear == true)
 			set_guest_dear(vcpu, vcpu->arch.queued_dear);
+		if (update_epr == true)
+			kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
 
 		new_msr &= msr_mask;
 #if defined(CONFIG_64BIT)
@@ -615,6 +620,13 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 		r = 0;
 	}
 
+	if (kvm_check_request(KVM_REQ_EPR_EXIT, vcpu)) {
+		vcpu->run->epr.epr = 0;
+		vcpu->arch.epr_needed = true;
+		vcpu->run->exit_reason = KVM_EXIT_EPR;
+		r = 0;
+	}
+
 	return r;
 }
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index e2225e5b8a4c..934413cd3a1b 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -306,6 +306,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 #ifdef CONFIG_BOOKE
 	case KVM_CAP_PPC_BOOKE_SREGS:
 	case KVM_CAP_PPC_BOOKE_WATCHDOG:
+	case KVM_CAP_PPC_EPR:
 #else
 	case KVM_CAP_PPC_SEGSTATE:
 	case KVM_CAP_PPC_HIOR:
@@ -721,6 +722,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		for (i = 0; i < 9; ++i)
 			kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]);
 		vcpu->arch.hcall_needed = 0;
+#ifdef CONFIG_BOOKE
+	} else if (vcpu->arch.epr_needed) {
+		kvmppc_set_epr(vcpu, run->epr.epr);
+		vcpu->arch.epr_needed = 0;
+#endif
 	}
 
 	r = kvmppc_vcpu_run(run, vcpu);
@@ -762,6 +768,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		r = 0;
 		vcpu->arch.papr_enabled = true;
 		break;
+	case KVM_CAP_PPC_EPR:
+		r = 0;
+		vcpu->arch.epr_enabled = cap->args[0];
+		break;
 #ifdef CONFIG_BOOKE
 	case KVM_CAP_PPC_BOOKE_WATCHDOG:
 		r = 0;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cbe0d683e2e5..4dd7d7531e69 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -122,6 +122,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_WATCHDOG          18
 #define KVM_REQ_MASTERCLOCK_UPDATE 19
 #define KVM_REQ_MCLOCK_INPROGRESS 20
+#define KVM_REQ_EPR_EXIT          21
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 8bb0bf83afc5..9a2db5767ed5 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -169,6 +169,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_S390_UCONTROL	  20
 #define KVM_EXIT_WATCHDOG         21
 #define KVM_EXIT_S390_TSCH        22
+#define KVM_EXIT_EPR              23
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -295,6 +296,10 @@ struct kvm_run {
 			__u32 ipb;
 			__u8 dequeued;
 		} s390_tsch;
+		/* KVM_EXIT_EPR */
+		struct {
+			__u32 epr;
+		} epr;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -656,6 +661,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83
 #define KVM_CAP_PPC_HTAB_FD 84
 #define KVM_CAP_S390_CSS_SUPPORT 85
+#define KVM_CAP_PPC_EPR 86
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3