diff options
Diffstat (limited to 'arch/s390/kvm')
-rw-r--r-- | arch/s390/kvm/Kconfig | 1 | ||||
-rw-r--r-- | arch/s390/kvm/Makefile | 2 | ||||
-rw-r--r-- | arch/s390/kvm/diag.c | 12 | ||||
-rw-r--r-- | arch/s390/kvm/gaccess.c | 95 | ||||
-rw-r--r-- | arch/s390/kvm/gaccess.h | 38 | ||||
-rw-r--r-- | arch/s390/kvm/guestdbg.c | 8 | ||||
-rw-r--r-- | arch/s390/kvm/intercept.c | 85 | ||||
-rw-r--r-- | arch/s390/kvm/interrupt.c | 242 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.c | 661 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.h | 35 | ||||
-rw-r--r-- | arch/s390/kvm/priv.c | 20 | ||||
-rw-r--r-- | arch/s390/kvm/sigp.c | 8 | ||||
-rw-r--r-- | arch/s390/kvm/trace-s390.h | 6 |
13 files changed, 806 insertions, 407 deletions
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 5fce52cf0e57..5ea5af3c7db7 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -29,6 +29,7 @@ config KVM select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select SRCU + select KVM_VFIO ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index b3b553469650..d42fa38c2429 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -7,7 +7,7 @@ # as published by the Free Software Foundation. KVM := ../../../virt/kvm -common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 5fbfb88f8477..1ea4095b67d7 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -14,6 +14,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <asm/pgalloc.h> +#include <asm/gmap.h> #include <asm/virtio-ccw.h> #include "kvm-s390.h" #include "trace.h" @@ -155,10 +156,8 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *tcpu; int tid; - int i; tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; vcpu->stat.diagnose_9c++; @@ -167,12 +166,9 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) if (tid == vcpu->vcpu_id) return 0; - kvm_for_each_vcpu(i, tcpu, kvm) - if (tcpu->vcpu_id == tid) { - kvm_vcpu_yield_to(tcpu); - break; - } - + tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid); + if (tcpu) + kvm_vcpu_yield_to(tcpu); return 0; } diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index a7559f7207df..66938d283b77 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -259,10 +259,14 @@ struct aste { int ipte_lock_held(struct kvm_vcpu *vcpu) { - union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control; + if (vcpu->arch.sie_block->eca & 1) { + int rc; - if (vcpu->arch.sie_block->eca & 1) - return ic->kh != 0; + read_lock(&vcpu->kvm->arch.sca_lock); + rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0; + read_unlock(&vcpu->kvm->arch.sca_lock); + return rc; + } return vcpu->kvm->arch.ipte_lock_count != 0; } @@ -274,16 +278,20 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu) vcpu->kvm->arch.ipte_lock_count++; if (vcpu->kvm->arch.ipte_lock_count > 1) goto out; - ic = &vcpu->kvm->arch.sca->ipte_control; +retry: + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); - while (old.k) { + if (old.k) { + read_unlock(&vcpu->kvm->arch.sca_lock); cond_resched(); - old = READ_ONCE(*ic); + goto retry; } new = old; new.k = 1; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); out: mutex_unlock(&vcpu->kvm->arch.ipte_mutex); } @@ -296,12 +304,14 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu) vcpu->kvm->arch.ipte_lock_count--; if (vcpu->kvm->arch.ipte_lock_count) goto out; - ic = &vcpu->kvm->arch.sca->ipte_control; + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); new = old; new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); wake_up(&vcpu->kvm->arch.ipte_wq); out: mutex_unlock(&vcpu->kvm->arch.ipte_mutex); @@ -311,24 +321,29 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - ic = &vcpu->kvm->arch.sca->ipte_control; +retry: + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); - while (old.kg) { + if (old.kg) { + read_unlock(&vcpu->kvm->arch.sca_lock); cond_resched(); - old = READ_ONCE(*ic); + goto retry; } new = old; new.k = 1; new.kh++; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); } static void ipte_unlock_siif(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - ic = &vcpu->kvm->arch.sca->ipte_control; + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); new = old; @@ -336,6 +351,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) if (!new.kh) new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); if (!new.kh) wake_up(&vcpu->kvm->arch.ipte_wq); } @@ -357,7 +373,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu) } static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, - int write) + enum gacc_mode mode) { union alet alet; struct ale ale; @@ -438,7 +454,7 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, } } - if (ale.fo == 1 && write) + if (ale.fo == 1 && mode == GACC_STORE) return PGM_PROTECTION; asce->val = aste.asce; @@ -461,25 +477,28 @@ enum { }; static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, - ar_t ar, int write) + ar_t ar, enum gacc_mode mode) { int rc; - psw_t *psw = &vcpu->arch.sie_block->gpsw; + struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; struct trans_exc_code_bits *tec_bits; memset(pgm, 0, sizeof(*pgm)); tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec_bits->fsi = write ? FSI_STORE : FSI_FETCH; - tec_bits->as = psw_bits(*psw).as; + tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; + tec_bits->as = psw.as; - if (!psw_bits(*psw).t) { + if (!psw.t) { asce->val = 0; asce->r = 1; return 0; } - switch (psw_bits(vcpu->arch.sie_block->gpsw).as) { + if (mode == GACC_IFETCH) + psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY; + + switch (psw.as) { case PSW_AS_PRIMARY: asce->val = vcpu->arch.sie_block->gcr[1]; return 0; @@ -490,7 +509,7 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, asce->val = vcpu->arch.sie_block->gcr[13]; return 0; case PSW_AS_ACCREG: - rc = ar_translation(vcpu, asce, ar, write); + rc = ar_translation(vcpu, asce, ar, mode); switch (rc) { case PGM_ALEN_TRANSLATION: case PGM_ALE_SEQUENCE: @@ -522,7 +541,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) * @gva: guest virtual address * @gpa: points to where guest physical (absolute) address should be stored * @asce: effective asce - * @write: indicates if access is a write access + * @mode: indicates the access mode to be used * * Translate a guest virtual address into a guest absolute address by means * of dynamic address translation as specified by the architecture. @@ -538,7 +557,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) */ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, unsigned long *gpa, const union asce asce, - int write) + enum gacc_mode mode) { union vaddress vaddr = {.addr = gva}; union raddress raddr = {.addr = gva}; @@ -683,7 +702,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, real_address: raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); absolute_address: - if (write && dat_protection) + if (mode == GACC_STORE && dat_protection) return PGM_PROTECTION; if (kvm_is_error_gpa(vcpu->kvm, raddr.addr)) return PGM_ADDRESSING; @@ -712,7 +731,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, unsigned long *pages, unsigned long nr_pages, - const union asce asce, int write) + const union asce asce, enum gacc_mode mode) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; @@ -724,13 +743,13 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, while (nr_pages) { ga = kvm_s390_logical_to_effective(vcpu, ga); tec_bits->addr = ga >> PAGE_SHIFT; - if (write && lap_enabled && is_low_address(ga)) { + if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) { pgm->code = PGM_PROTECTION; return pgm->code; } ga &= PAGE_MASK; if (psw_bits(*psw).t) { - rc = guest_translate(vcpu, ga, pages, asce, write); + rc = guest_translate(vcpu, ga, pages, asce, mode); if (rc < 0) return rc; if (rc == PGM_PROTECTION) @@ -752,7 +771,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, } int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, - unsigned long len, int write) + unsigned long len, enum gacc_mode mode) { psw_t *psw = &vcpu->arch.sie_block->gpsw; unsigned long _len, nr_pages, gpa, idx; @@ -764,7 +783,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, if (!len) return 0; - rc = get_vcpu_asce(vcpu, &asce, ar, write); + rc = get_vcpu_asce(vcpu, &asce, ar, mode); if (rc) return rc; nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; @@ -776,11 +795,11 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, need_ipte_lock = psw_bits(*psw).t && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); - rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write); + rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode); for (idx = 0; idx < nr_pages && !rc; idx++) { gpa = *(pages + idx) + (ga & ~PAGE_MASK); _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); - if (write) + if (mode == GACC_STORE) rc = kvm_write_guest(vcpu->kvm, gpa, data, _len); else rc = kvm_read_guest(vcpu->kvm, gpa, data, _len); @@ -796,7 +815,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, } int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, - void *data, unsigned long len, int write) + void *data, unsigned long len, enum gacc_mode mode) { unsigned long _len, gpa; int rc = 0; @@ -804,7 +823,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, while (len && !rc) { gpa = kvm_s390_real_to_abs(vcpu, gra); _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); - if (write) + if (mode) rc = write_guest_abs(vcpu, gpa, data, _len); else rc = read_guest_abs(vcpu, gpa, data, _len); @@ -825,7 +844,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * has to take care of this. */ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, - unsigned long *gpa, int write) + unsigned long *gpa, enum gacc_mode mode) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; @@ -835,19 +854,19 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, gva = kvm_s390_logical_to_effective(vcpu, gva); tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - rc = get_vcpu_asce(vcpu, &asce, ar, write); + rc = get_vcpu_asce(vcpu, &asce, ar, mode); tec->addr = gva >> PAGE_SHIFT; if (rc) return rc; if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { - if (write) { + if (mode == GACC_STORE) { rc = pgm->code = PGM_PROTECTION; return rc; } } if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ - rc = guest_translate(vcpu, gva, gpa, asce, write); + rc = guest_translate(vcpu, gva, gpa, asce, mode); if (rc > 0) { if (rc == PGM_PROTECTION) tec->b61 = 1; @@ -867,7 +886,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, * check_gva_range - test a range of guest virtual addresses for accessibility */ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, - unsigned long length, int is_write) + unsigned long length, enum gacc_mode mode) { unsigned long gpa; unsigned long currlen; @@ -876,7 +895,7 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, ipte_lock(vcpu); while (length > 0 && !rc) { currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE)); - rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write); + rc = guest_translate_address(vcpu, gva, ar, &gpa, mode); gva += currlen; length -= currlen; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index ef03726cc661..df0a79dd8159 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -155,16 +155,22 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data, return kvm_read_guest(vcpu->kvm, gpa, data, len); } +enum gacc_mode { + GACC_FETCH, + GACC_STORE, + GACC_IFETCH, +}; + int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, - ar_t ar, unsigned long *gpa, int write); + ar_t ar, unsigned long *gpa, enum gacc_mode mode); int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, - unsigned long length, int is_write); + unsigned long length, enum gacc_mode mode); int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, - unsigned long len, int write); + unsigned long len, enum gacc_mode mode); int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, - void *data, unsigned long len, int write); + void *data, unsigned long len, enum gacc_mode mode); /** * write_guest - copy data from kernel space to guest space @@ -215,7 +221,7 @@ static inline __must_check int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, ar, data, len, 1); + return access_guest(vcpu, ga, ar, data, len, GACC_STORE); } /** @@ -235,7 +241,27 @@ static inline __must_check int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, ar, data, len, 0); + return access_guest(vcpu, ga, ar, data, len, GACC_FETCH); +} + +/** + * read_guest_instr - copy instruction data from guest space to kernel space + * @vcpu: virtual cpu + * @data: destination address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from the current psw address (guest space) to @data (kernel + * space). + * + * The behaviour of read_guest_instr is identical to read_guest, except that + * instruction data will be read from primary space when in home-space or + * address-space mode. + */ +static inline __must_check +int read_guest_instr(struct kvm_vcpu *vcpu, void *data, unsigned long len) +{ + return access_guest(vcpu, vcpu->arch.sie_block->gpsw.addr, 0, data, len, + GACC_IFETCH); } /** diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index 47518a324d75..e8c6843b9600 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -17,7 +17,7 @@ /* * Extends the address range given by *start and *stop to include the address * range starting with estart and the length len. Takes care of overflowing - * intervals and tries to minimize the overall intervall size. + * intervals and tries to minimize the overall interval size. */ static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len) { @@ -72,7 +72,7 @@ static void enable_all_hw_bp(struct kvm_vcpu *vcpu) return; /* - * If the guest is not interrested in branching events, we can savely + * If the guest is not interested in branching events, we can safely * limit them to the PER address range. */ if (!(*cr9 & PER_EVENT_BRANCH)) @@ -116,7 +116,7 @@ static void enable_all_hw_wp(struct kvm_vcpu *vcpu) if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) { *cr9 &= ~PER_CONTROL_ALTERATION; *cr10 = 0; - *cr11 = PSW_ADDR_INSN; + *cr11 = -1UL; } else { *cr9 &= ~PER_CONTROL_ALTERATION; *cr9 |= PER_EVENT_STORE; @@ -159,7 +159,7 @@ void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gcr[0] &= ~0x800ul; vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH; vcpu->arch.sie_block->gcr[10] = 0; - vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN; + vcpu->arch.sie_block->gcr[11] = -1UL; } if (guestdbg_hw_bp_enabled(vcpu)) { diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index b4a5aa110cec..2e6b54e4d3f9 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -38,25 +38,37 @@ static const intercept_handler_t instruction_handlers[256] = { [0xeb] = kvm_s390_handle_eb, }; -void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc) +u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) { struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; + u8 ilen = 0; - /* Use the length of the EXECUTE instruction if necessary */ - if (sie_block->icptstatus & 1) { - ilc = (sie_block->icptstatus >> 4) & 0x6; - if (!ilc) - ilc = 4; + switch (vcpu->arch.sie_block->icptcode) { + case ICPT_INST: + case ICPT_INSTPROGI: + case ICPT_OPEREXC: + case ICPT_PARTEXEC: + case ICPT_IOINST: + /* instruction only stored for these icptcodes */ + ilen = insn_length(vcpu->arch.sie_block->ipa >> 8); + /* Use the length of the EXECUTE instruction if necessary */ + if (sie_block->icptstatus & 1) { + ilen = (sie_block->icptstatus >> 4) & 0x6; + if (!ilen) + ilen = 4; + } + break; + case ICPT_PROGI: + /* bit 1+2 of pgmilc are the ilc, so we directly get ilen */ + ilen = vcpu->arch.sie_block->pgmilc & 0x6; + break; } - sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc); + return ilen; } static int handle_noop(struct kvm_vcpu *vcpu) { switch (vcpu->arch.sie_block->icptcode) { - case 0x0: - vcpu->stat.exit_null++; - break; case 0x10: vcpu->stat.exit_external_request++; break; @@ -124,11 +136,13 @@ static int handle_instruction(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } -static void __extract_prog_irq(struct kvm_vcpu *vcpu, - struct kvm_s390_pgm_info *pgm_info) +static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu) { - memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info)); - pgm_info->code = vcpu->arch.sie_block->iprcc; + struct kvm_s390_pgm_info pgm_info = { + .code = vcpu->arch.sie_block->iprcc, + /* the PSW has already been rewound */ + .flags = KVM_S390_PGM_FLAGS_NO_REWIND, + }; switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) { case PGM_AFX_TRANSLATION: @@ -141,7 +155,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, case PGM_PRIMARY_AUTHORITY: case PGM_SECONDARY_AUTHORITY: case PGM_SPACE_SWITCH: - pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc; break; case PGM_ALEN_TRANSLATION: case PGM_ALE_SEQUENCE: @@ -149,7 +163,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, case PGM_ASTE_SEQUENCE: case PGM_ASTE_VALIDITY: case PGM_EXTENDED_AUTHORITY: - pgm_info->exc_access_id = vcpu->arch.sie_block->eai; + pgm_info.exc_access_id = vcpu->arch.sie_block->eai; break; case PGM_ASCE_TYPE: case PGM_PAGE_TRANSLATION: @@ -157,32 +171,33 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: case PGM_SEGMENT_TRANSLATION: - pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; - pgm_info->exc_access_id = vcpu->arch.sie_block->eai; - pgm_info->op_access_id = vcpu->arch.sie_block->oai; + pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info.exc_access_id = vcpu->arch.sie_block->eai; + pgm_info.op_access_id = vcpu->arch.sie_block->oai; break; case PGM_MONITOR: - pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn; - pgm_info->mon_code = vcpu->arch.sie_block->tecmc; + pgm_info.mon_class_nr = vcpu->arch.sie_block->mcn; + pgm_info.mon_code = vcpu->arch.sie_block->tecmc; break; case PGM_VECTOR_PROCESSING: case PGM_DATA: - pgm_info->data_exc_code = vcpu->arch.sie_block->dxc; + pgm_info.data_exc_code = vcpu->arch.sie_block->dxc; break; case PGM_PROTECTION: - pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; - pgm_info->exc_access_id = vcpu->arch.sie_block->eai; + pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info.exc_access_id = vcpu->arch.sie_block->eai; break; default: break; } if (vcpu->arch.sie_block->iprcc & PGM_PER) { - pgm_info->per_code = vcpu->arch.sie_block->perc; - pgm_info->per_atmid = vcpu->arch.sie_block->peratmid; - pgm_info->per_address = vcpu->arch.sie_block->peraddr; - pgm_info->per_access_id = vcpu->arch.sie_block->peraid; + pgm_info.per_code = vcpu->arch.sie_block->perc; + pgm_info.per_atmid = vcpu->arch.sie_block->peratmid; + pgm_info.per_address = vcpu->arch.sie_block->peraddr; + pgm_info.per_access_id = vcpu->arch.sie_block->peraid; } + return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } /* @@ -211,7 +226,6 @@ static int handle_itdb(struct kvm_vcpu *vcpu) static int handle_prog(struct kvm_vcpu *vcpu) { - struct kvm_s390_pgm_info pgm_info; psw_t psw; int rc; @@ -237,8 +251,7 @@ static int handle_prog(struct kvm_vcpu *vcpu) if (rc) return rc; - __extract_prog_irq(vcpu, &pgm_info); - return kvm_s390_inject_prog_irq(vcpu, &pgm_info); + return inject_prog_on_prog_intercept(vcpu); } /** @@ -305,7 +318,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) /* Make sure that the source is paged-in */ rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2], - reg2, &srcaddr, 0); + reg2, &srcaddr, GACC_FETCH); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); @@ -314,14 +327,14 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) /* Make sure that the destination is paged-in */ rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1], - reg1, &dstaddr, 1); + reg1, &dstaddr, GACC_STORE); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); if (rc != 0) return rc; - kvm_s390_rewind_psw(vcpu, 4); + kvm_s390_retry_instr(vcpu); return 0; } @@ -338,8 +351,10 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) { + if (kvm_is_ucontrol(vcpu->kvm)) + return -EOPNOTSUPP; + switch (vcpu->arch.sie_block->icptcode) { - case 0x00: case 0x10: case 0x18: return handle_noop(vcpu); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 373e32346d68..84efc2ba6a90 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -23,6 +23,7 @@ #include <asm/uaccess.h> #include <asm/sclp.h> #include <asm/isc.h> +#include <asm/gmap.h> #include "kvm-s390.h" #include "gaccess.h" #include "trace-s390.h" @@ -34,6 +35,106 @@ #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 +/* handle external calls via sigp interpretation facility */ +static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) +{ + int c, scn; + + if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND)) + return 0; + + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl sigp_ctrl = + sca->cpu[vcpu->vcpu_id].sigp_ctrl; + + c = sigp_ctrl.c; + scn = sigp_ctrl.scn; + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl sigp_ctrl = + sca->cpu[vcpu->vcpu_id].sigp_ctrl; + + c = sigp_ctrl.c; + scn = sigp_ctrl.scn; + } + read_unlock(&vcpu->kvm->arch.sca_lock); + + if (src_id) + *src_id = scn; + + return c; +} + +static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) +{ + int expect, rc; + + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + + new_val.scn = src_id; + new_val.c = 1; + old_val.c = 0; + + expect = old_val.value; + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + + new_val.scn = src_id; + new_val.c = 1; + old_val.c = 0; + + expect = old_val.value; + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); + } + read_unlock(&vcpu->kvm->arch.sca_lock); + + if (rc != expect) { + /* another external call is pending */ + return -EBUSY; + } + atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); + return 0; +} + +static void sca_clear_ext_call(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc, expect; + + atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union esca_sigp_ctrl old = *sigp_ctrl; + + expect = old.value; + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union bsca_sigp_ctrl old = *sigp_ctrl; + + expect = old.value; + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + } + read_unlock(&vcpu->kvm->arch.sca_lock); + WARN_ON(rc != expect); /* cannot clear? */ +} + int psw_extint_disabled(struct kvm_vcpu *vcpu) { return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); @@ -82,8 +183,9 @@ static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu) static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu) { - return (vcpu->arch.sie_block->cputm >> 63) && - cpu_timer_interrupts_enabled(vcpu); + if (!cpu_timer_interrupts_enabled(vcpu)) + return 0; + return kvm_s390_get_cpu_timer(vcpu) >> 63; } static inline int is_ioirq(unsigned long irq_type) @@ -235,23 +337,6 @@ static void set_intercept_indicators(struct kvm_vcpu *vcpu) set_intercept_indicators_stop(vcpu); } -static u16 get_ilc(struct kvm_vcpu *vcpu) -{ - switch (vcpu->arch.sie_block->icptcode) { - case ICPT_INST: - case ICPT_INSTPROGI: - case ICPT_OPEREXC: - case ICPT_PARTEXEC: - case ICPT_IOINST: - /* last instruction only stored for these icptcodes */ - return insn_length(vcpu->arch.sie_block->ipa >> 8); - case ICPT_PROGI: - return vcpu->arch.sie_block->pgmilc; - default: - return 0; - } -} - static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -399,9 +484,9 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu) trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0); rc = write_guest_lc(vcpu, - offsetof(struct _lowcore, restart_old_psw), + offsetof(struct lowcore, restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw), + rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); clear_bit(IRQ_PEND_RESTART, &li->pending_irqs); return rc ? -EFAULT : 0; @@ -488,7 +573,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_pgm_info pgm_info; int rc = 0, nullifying = false; - u16 ilc = get_ilc(vcpu); + u16 ilen; spin_lock(&li->lock); pgm_info = li->irq.pgm; @@ -496,8 +581,9 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) memset(&li->irq.pgm, 0, sizeof(pgm_info)); spin_unlock(&li->lock); - VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d", - pgm_info.code, ilc); + ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK; + VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d", + pgm_info.code, ilen); vcpu->stat.deliver_program_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, pgm_info.code, 0); @@ -581,10 +667,11 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) (u8 *) __LC_PER_ACCESS_ID); } - if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST) - kvm_s390_rewind_psw(vcpu, ilc); + if (nullifying && !(pgm_info.flags & KVM_S390_PGM_FLAGS_NO_REWIND)) + kvm_s390_rewind_psw(vcpu, ilen); - rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC); + /* bit 1+2 of the target are the ilc, so we can directly use ilen */ + rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC); rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea, (u64 *) __LC_LAST_BREAK); rc |= put_guest_lc(vcpu, pgm_info.code, @@ -792,13 +879,11 @@ static const deliver_irq_t deliver_irq_funcs[] = { int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; if (!sclp.has_sigpif) return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); - return (sigp_ctrl & SIGP_CTRL_C) && - (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND); + return sca_ext_call_pending(vcpu, NULL); } int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) @@ -825,9 +910,35 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu); } +static u64 __calculate_sltime(struct kvm_vcpu *vcpu) +{ + u64 now, cputm, sltime = 0; + + if (ckc_interrupts_enabled(vcpu)) { + now = kvm_s390_get_tod_clock_fast(vcpu->kvm); + sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); + /* already expired or overflow? */ + if (!sltime || vcpu->arch.sie_block->ckc <= now) + return 0; + if (cpu_timer_interrupts_enabled(vcpu)) { + cputm = kvm_s390_get_cpu_timer(vcpu); + /* already expired? */ + if (cputm >> 63) + return 0; + return min(sltime, tod_to_ns(cputm)); + } + } else if (cpu_timer_interrupts_enabled(vcpu)) { + sltime = kvm_s390_get_cpu_timer(vcpu); + /* already expired? */ + if (sltime >> 63) + return 0; + } + return sltime; +} + int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) { - u64 now, sltime; + u64 sltime; vcpu->stat.exit_wait_state++; @@ -840,22 +951,20 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; /* disabled wait */ } - if (!ckc_interrupts_enabled(vcpu)) { + if (!ckc_interrupts_enabled(vcpu) && + !cpu_timer_interrupts_enabled(vcpu)) { VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); __set_cpu_idle(vcpu); goto no_timer; } - now = kvm_s390_get_tod_clock_fast(vcpu->kvm); - sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); - - /* underflow */ - if (vcpu->arch.sie_block->ckc < now) + sltime = __calculate_sltime(vcpu); + if (!sltime) return 0; __set_cpu_idle(vcpu); hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); - VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime); + VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime); no_timer: srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); kvm_vcpu_block(vcpu); @@ -868,13 +977,13 @@ no_timer: void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) { - if (waitqueue_active(&vcpu->wq)) { + if (swait_active(&vcpu->wq)) { /* * The vcpu gave up the cpu voluntarily, mark it as a good * yield-candidate. */ vcpu->preempted = true; - wake_up_interruptible(&vcpu->wq); + swake_up(&vcpu->wq); vcpu->stat.halt_wakeup++; } } @@ -882,18 +991,16 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; - u64 now, sltime; + u64 sltime; vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); - now = kvm_s390_get_tod_clock_fast(vcpu->kvm); - sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); + sltime = __calculate_sltime(vcpu); /* * If the monotonic clock runs faster than the tod clock we might be * woken up too early and have to go back to sleep to avoid deadlocks. */ - if (vcpu->arch.sie_block->ckc > now && - hrtimer_forward_now(timer, ns_to_ktime(sltime))) + if (sltime && hrtimer_forward_now(timer, ns_to_ktime(sltime))) return HRTIMER_RESTART; kvm_s390_vcpu_wakeup(vcpu); return HRTIMER_NORESTART; @@ -909,9 +1016,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) memset(&li->irq, 0, sizeof(li->irq)); spin_unlock(&li->lock); - /* clear pending external calls set by sigp interpretation facility */ - atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0; + sca_clear_ext_call(vcpu); } int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) @@ -963,8 +1068,16 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, irq->u.pgm.code, 0); + if (!(irq->u.pgm.flags & KVM_S390_PGM_FLAGS_ILC_VALID)) { + /* auto detection if no valid ILC was given */ + irq->u.pgm.flags &= ~KVM_S390_PGM_FLAGS_ILC_MASK; + irq->u.pgm.flags |= kvm_s390_get_ilen(vcpu); + irq->u.pgm.flags |= KVM_S390_PGM_FLAGS_ILC_VALID; + } + if (irq->u.pgm.code == PGM_PER) { li->irq.pgm.code |= PGM_PER; + li->irq.pgm.flags = irq->u.pgm.flags; /* only modify PER related information */ li->irq.pgm.per_address = irq->u.pgm.per_address; li->irq.pgm.per_code = irq->u.pgm.per_code; @@ -973,6 +1086,7 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) } else if (!(irq->u.pgm.code & PGM_PER)) { li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) | irq->u.pgm.code; + li->irq.pgm.flags = irq->u.pgm.flags; /* only modify non-PER information */ li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code; li->irq.pgm.mon_code = irq->u.pgm.mon_code; @@ -1003,21 +1117,6 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) return 0; } -static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id) -{ - unsigned char new_val, old_val; - uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; - - new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK); - old_val = *sigp_ctrl & ~SIGP_CTRL_C; - if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) { - /* another external call is pending */ - return -EBUSY; - } - atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); - return 0; -} - static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -1030,12 +1129,11 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) src_id, 0); /* sending vcpu invalid */ - if (src_id >= KVM_MAX_VCPUS || - kvm_get_vcpu(vcpu->kvm, src_id) == NULL) + if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL) return -EINVAL; if (sclp.has_sigpif) - return __inject_extcall_sigpif(vcpu, src_id); + return sca_inject_ext_call(vcpu, src_id); if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) return -EBUSY; @@ -1110,6 +1208,10 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, irq->u.emerg.code, 0); + /* sending vcpu invalid */ + if (kvm_get_vcpu_by_id(vcpu->kvm, irq->u.emerg.code) == NULL) + return -EINVAL; + set_bit(irq->u.emerg.code, li->sigp_emerg_pending); set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); atomic_or(CPUSTAT_EXT_INT, li->cpuflags); @@ -2200,7 +2302,7 @@ static void store_local_irq(struct kvm_s390_local_interrupt *li, int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) { - uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; + int scn; unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)]; struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; unsigned long pending_irqs; @@ -2240,14 +2342,12 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) } } - if ((sigp_ctrl & SIGP_CTRL_C) && - (atomic_read(&vcpu->arch.sie_block->cpuflags) & - CPUSTAT_ECALL_PEND)) { + if (sca_ext_call_pending(vcpu, &scn)) { if (n + sizeof(irq) > len) return -ENOBUFS; memset(&irq, 0, sizeof(irq)); irq.type = KVM_S390_INT_EXTERNAL_CALL; - irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK; + irq.u.extcall.code = scn; if (copy_to_user(&buf[n], &irq, sizeof(irq))) return -EFAULT; n += sizeof(irq); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8fe2f1c722dc..668c087513e5 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -30,6 +30,7 @@ #include <asm/lowcore.h> #include <asm/etr.h> #include <asm/pgtable.h> +#include <asm/gmap.h> #include <asm/nmi.h> #include <asm/switch_to.h> #include <asm/isc.h> @@ -158,6 +159,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, kvm->arch.epoch -= *delta; kvm_for_each_vcpu(i, vcpu, kvm) { vcpu->arch.sie_block->epoch -= *delta; + if (vcpu->arch.cputm_enabled) + vcpu->arch.cputm_start += *delta; } } return NOTIFY_OK; @@ -246,7 +249,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: - r = KVM_MAX_VCPUS; + r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS + : KVM_S390_BSCA_CPU_SLOTS; break; case KVM_CAP_NR_MEMSLOTS: r = KVM_USER_MEM_SLOTS; @@ -257,6 +261,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_VECTOR_REGISTERS: r = MACHINE_HAS_VX; break; + case KVM_CAP_S390_RI: + r = test_facility(64); + break; default: r = 0; } @@ -270,19 +277,22 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm, unsigned long address; struct gmap *gmap = kvm->arch.gmap; - down_read(&gmap->mm->mmap_sem); /* Loop over all guest pages */ last_gfn = memslot->base_gfn + memslot->npages; for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { address = gfn_to_hva_memslot(memslot, cur_gfn); - if (gmap_test_and_clear_dirty(address, gmap)) + if (test_and_clear_guest_dirty(gmap->mm, address)) mark_page_dirty(kvm, cur_gfn); + if (fatal_signal_pending(current)) + return; + cond_resched(); } - up_read(&gmap->mm->mmap_sem); } /* Section: vm related */ +static void sca_del_vcpu(struct kvm_vcpu *vcpu); + /* * Get (and clear) the dirty memory log for a memory slot. */ @@ -342,15 +352,33 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) r = 0; break; case KVM_CAP_S390_VECTOR_REGISTERS: - if (MACHINE_HAS_VX) { - set_kvm_facility(kvm->arch.model.fac->mask, 129); - set_kvm_facility(kvm->arch.model.fac->list, 129); + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus)) { + r = -EBUSY; + } else if (MACHINE_HAS_VX) { + set_kvm_facility(kvm->arch.model.fac_mask, 129); + set_kvm_facility(kvm->arch.model.fac_list, 129); r = 0; } else r = -EINVAL; + mutex_unlock(&kvm->lock); VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_RI: + r = -EINVAL; + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus)) { + r = -EBUSY; + } else if (test_facility(64)) { + set_kvm_facility(kvm->arch.model.fac_mask, 64); + set_kvm_facility(kvm->arch.model.fac_list, 64); + r = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", + r ? "(not available)" : "(success)"); + break; case KVM_CAP_S390_USER_STSI: VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); kvm->arch.user_stsi = 1; @@ -371,8 +399,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att case KVM_S390_VM_MEM_LIMIT_SIZE: ret = 0; VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", - kvm->arch.gmap->asce_end); - if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr)) + kvm->arch.mem_limit); + if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) ret = -EFAULT; break; default: @@ -424,9 +452,17 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att if (get_user(new_limit, (u64 __user *)attr->addr)) return -EFAULT; - if (new_limit > kvm->arch.gmap->asce_end) + if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && + new_limit > kvm->arch.mem_limit) return -E2BIG; + if (!new_limit) + return -EINVAL; + + /* gmap_alloc takes last usable address */ + if (new_limit != KVM_S390_NO_MEM_LIMIT) + new_limit -= 1; + ret = -EBUSY; mutex_lock(&kvm->lock); if (atomic_read(&kvm->online_vcpus) == 0) { @@ -443,7 +479,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att } } mutex_unlock(&kvm->lock); - VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit); + VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); + VM_EVENT(kvm, 3, "New guest asce: 0x%pK", + (void *) kvm->arch.gmap->asce); break; } default: @@ -617,7 +655,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) memcpy(&kvm->arch.model.cpu_id, &proc->cpuid, sizeof(struct cpuid)); kvm->arch.model.ibc = proc->ibc; - memcpy(kvm->arch.model.fac->list, proc->fac_list, + memcpy(kvm->arch.model.fac_list, proc->fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); } else ret = -EFAULT; @@ -651,7 +689,8 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) } memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid)); proc->ibc = kvm->arch.model.ibc; - memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE); + memcpy(&proc->fac_list, kvm->arch.model.fac_list, + S390_ARCH_FAC_LIST_SIZE_BYTE); if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) ret = -EFAULT; kfree(proc); @@ -671,7 +710,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) } get_cpu_id((struct cpuid *) &mach->cpuid); mach->ibc = sclp.ibc; - memcpy(&mach->fac_mask, kvm->arch.model.fac->mask, + memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); @@ -1020,7 +1059,7 @@ static int kvm_s390_apxa_installed(void) u8 config[128]; int cc; - if (test_facility(2) && test_facility(12)) { + if (test_facility(12)) { cc = kvm_s390_query_ap_config(config); if (cc) @@ -1048,16 +1087,12 @@ static void kvm_s390_get_cpu_id(struct cpuid *cpu_id) cpu_id->version = 0xff; } -static int kvm_s390_crypto_init(struct kvm *kvm) +static void kvm_s390_crypto_init(struct kvm *kvm) { if (!test_kvm_facility(kvm, 76)) - return 0; - - kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb), - GFP_KERNEL | GFP_DMA); - if (!kvm->arch.crypto.crycb) - return -ENOMEM; + return; + kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; kvm_s390_set_crycb_format(kvm); /* Enable AES/DEA protected key functions by default */ @@ -1067,8 +1102,15 @@ static int kvm_s390_crypto_init(struct kvm *kvm) sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); +} - return 0; +static void sca_dispose(struct kvm *kvm) +{ + if (kvm->arch.use_esca) + free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); + else + free_page((unsigned long)(kvm->arch.sca)); + kvm->arch.sca = NULL; } int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) @@ -1094,14 +1136,17 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) rc = -ENOMEM; - kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); + kvm->arch.use_esca = 0; /* start with basic SCA */ + rwlock_init(&kvm->arch.sca_lock); + kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL); if (!kvm->arch.sca) goto out_err; spin_lock(&kvm_lock); sca_offset += 16; - if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE) + if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) sca_offset = 0; - kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); + kvm->arch.sca = (struct bsca_block *) + ((char *) kvm->arch.sca + sca_offset); spin_unlock(&kvm_lock); sprintf(debug_name, "kvm-%u", current->pid); @@ -1110,37 +1155,30 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.dbf) goto out_err; - /* - * The architectural maximum amount of facilities is 16 kbit. To store - * this amount, 2 kbyte of memory is required. Thus we need a full - * page to hold the guest facility list (arch.model.fac->list) and the - * facility mask (arch.model.fac->mask). Its address size has to be - * 31 bits and word aligned. - */ - kvm->arch.model.fac = - (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA); - if (!kvm->arch.model.fac) + kvm->arch.sie_page2 = + (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!kvm->arch.sie_page2) goto out_err; /* Populate the facility mask initially. */ - memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list, + memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { if (i < kvm_s390_fac_list_mask_size()) - kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i]; + kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i]; else - kvm->arch.model.fac->mask[i] = 0UL; + kvm->arch.model.fac_mask[i] = 0UL; } /* Populate the facility list initially. */ - memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask, + kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; + memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id); kvm->arch.model.ibc = sclp.ibc & 0x0fff; - if (kvm_s390_crypto_init(kvm) < 0) - goto out_err; + kvm_s390_crypto_init(kvm); spin_lock_init(&kvm->arch.float_int.lock); for (i = 0; i < FIRQ_LIST_COUNT; i++) @@ -1153,8 +1191,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (type & KVM_VM_S390_UCONTROL) { kvm->arch.gmap = NULL; + kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; } else { - kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1); + if (sclp.hamax == U64_MAX) + kvm->arch.mem_limit = TASK_MAX_SIZE; + else + kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE, + sclp.hamax + 1); + kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1); if (!kvm->arch.gmap) goto out_err; kvm->arch.gmap->private = kvm; @@ -1166,14 +1210,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.epoch = 0; spin_lock_init(&kvm->arch.start_stop_lock); - KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid); + KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); return 0; out_err: - kfree(kvm->arch.crypto.crycb); - free_page((unsigned long)kvm->arch.model.fac); + free_page((unsigned long)kvm->arch.sie_page2); debug_unregister(kvm->arch.dbf); - free_page((unsigned long)(kvm->arch.sca)); + sca_dispose(kvm); KVM_EVENT(3, "creation of vm failed: %d", rc); return rc; } @@ -1184,14 +1227,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); kvm_s390_clear_local_irqs(vcpu); kvm_clear_async_pf_completion_queue(vcpu); - if (!kvm_is_ucontrol(vcpu->kvm)) { - clear_bit(63 - vcpu->vcpu_id, - (unsigned long *) &vcpu->kvm->arch.sca->mcn); - if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == - (__u64) vcpu->arch.sie_block) - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; - } - smp_mb(); + if (!kvm_is_ucontrol(vcpu->kvm)) + sca_del_vcpu(vcpu); if (kvm_is_ucontrol(vcpu->kvm)) gmap_free(vcpu->arch.gmap); @@ -1223,15 +1260,14 @@ static void kvm_free_vcpus(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); - free_page((unsigned long)kvm->arch.model.fac); - free_page((unsigned long)(kvm->arch.sca)); + sca_dispose(kvm); debug_unregister(kvm->arch.dbf); - kfree(kvm->arch.crypto.crycb); + free_page((unsigned long)kvm->arch.sie_page2); if (!kvm_is_ucontrol(kvm)) gmap_free(kvm->arch.gmap); kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); - KVM_EVENT(3, "vm 0x%p destroyed", kvm); + KVM_EVENT(3, "vm 0x%pK destroyed", kvm); } /* Section: vcpu related */ @@ -1245,6 +1281,117 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) return 0; } +static void sca_del_vcpu(struct kvm_vcpu *vcpu) +{ + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + + clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); + sca->cpu[vcpu->vcpu_id].sda = 0; + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + + clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); + sca->cpu[vcpu->vcpu_id].sda = 0; + } + read_unlock(&vcpu->kvm->arch.sca_lock); +} + +static void sca_add_vcpu(struct kvm_vcpu *vcpu) +{ + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + + sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; + vcpu->arch.sie_block->ecb2 |= 0x04U; + set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + + sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); + } + read_unlock(&vcpu->kvm->arch.sca_lock); +} + +/* Basic SCA to Extended SCA data copy routines */ +static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) +{ + d->sda = s->sda; + d->sigp_ctrl.c = s->sigp_ctrl.c; + d->sigp_ctrl.scn = s->sigp_ctrl.scn; +} + +static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) +{ + int i; + + d->ipte_control = s->ipte_control; + d->mcn[0] = s->mcn; + for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) + sca_copy_entry(&d->cpu[i], &s->cpu[i]); +} + +static int sca_switch_to_extended(struct kvm *kvm) +{ + struct bsca_block *old_sca = kvm->arch.sca; + struct esca_block *new_sca; + struct kvm_vcpu *vcpu; + unsigned int vcpu_idx; + u32 scaol, scaoh; + + new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); + if (!new_sca) + return -ENOMEM; + + scaoh = (u32)((u64)(new_sca) >> 32); + scaol = (u32)(u64)(new_sca) & ~0x3fU; + + kvm_s390_vcpu_block_all(kvm); + write_lock(&kvm->arch.sca_lock); + + sca_copy_b_to_e(new_sca, old_sca); + + kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { + vcpu->arch.sie_block->scaoh = scaoh; + vcpu->arch.sie_block->scaol = scaol; + vcpu->arch.sie_block->ecb2 |= 0x04U; + } + kvm->arch.sca = new_sca; + kvm->arch.use_esca = 1; + + write_unlock(&kvm->arch.sca_lock); + kvm_s390_vcpu_unblock_all(kvm); + + free_page((unsigned long)old_sca); + + VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", + old_sca, kvm->arch.sca); + return 0; +} + +static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) +{ + int rc; + + if (id < KVM_S390_BSCA_CPU_SLOTS) + return true; + if (!sclp.has_esca) + return false; + + mutex_lock(&kvm->lock); + rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); + mutex_unlock(&kvm->lock); + + return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; +} + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; @@ -1255,8 +1402,15 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) KVM_SYNC_CRS | KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT; - if (test_kvm_facility(vcpu->kvm, 129)) + if (test_kvm_facility(vcpu->kvm, 64)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; + /* fprs can be synchronized via vrs, even if the guest has no vx. With + * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. + */ + if (MACHINE_HAS_VX) vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; + else + vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; if (kvm_is_ucontrol(vcpu->kvm)) return __kvm_ucontrol_vcpu_init(vcpu); @@ -1264,44 +1418,105 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) return 0; } -/* - * Backs up the current FP/VX register save area on a particular - * destination. Used to switch between different register save - * areas. - */ -static inline void save_fpu_to(struct fpu *dst) +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) { - dst->fpc = current->thread.fpu.fpc; - dst->regs = current->thread.fpu.regs; + WARN_ON_ONCE(vcpu->arch.cputm_start != 0); + raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); + vcpu->arch.cputm_start = get_tod_clock_fast(); + raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); } -/* - * Switches the FP/VX register save area from which to lazy - * restore register contents. - */ -static inline void load_fpu_from(struct fpu *from) +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) { - current->thread.fpu.fpc = from->fpc; - current->thread.fpu.regs = from->regs; + WARN_ON_ONCE(vcpu->arch.cputm_start == 0); + raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); + vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; + vcpu->arch.cputm_start = 0; + raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); } -void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) { - /* Save host register state */ - save_fpu_regs(); - save_fpu_to(&vcpu->arch.host_fpregs); + WARN_ON_ONCE(vcpu->arch.cputm_enabled); + vcpu->arch.cputm_enabled = true; + __start_cpu_timer_accounting(vcpu); +} - if (test_kvm_facility(vcpu->kvm, 129)) { - current->thread.fpu.fpc = vcpu->run->s.regs.fpc; +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(!vcpu->arch.cputm_enabled); + __stop_cpu_timer_accounting(vcpu); + vcpu->arch.cputm_enabled = false; +} + +static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + __enable_cpu_timer_accounting(vcpu); + preempt_enable(); +} + +static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + __disable_cpu_timer_accounting(vcpu); + preempt_enable(); +} + +/* set the cpu timer - may only be called from the VCPU thread itself */ +void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) +{ + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); + if (vcpu->arch.cputm_enabled) + vcpu->arch.cputm_start = get_tod_clock_fast(); + vcpu->arch.sie_block->cputm = cputm; + raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); + preempt_enable(); +} + +/* update and get the cpu timer - can also be called from other VCPU threads */ +__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) +{ + unsigned int seq; + __u64 value; + + if (unlikely(!vcpu->arch.cputm_enabled)) + return vcpu->arch.sie_block->cputm; + + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + do { + seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); /* - * Use the register save area in the SIE-control block - * for register restore and save in kvm_arch_vcpu_put() + * If the writer would ever execute a read in the critical + * section, e.g. in irq context, we have a deadlock. */ - current->thread.fpu.vxrs = - (__vector128 *)&vcpu->run->s.regs.vrs; - } else - load_fpu_from(&vcpu->arch.guest_fpregs); + WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); + value = vcpu->arch.sie_block->cputm; + /* if cputm_start is 0, accounting is being started/stopped */ + if (likely(vcpu->arch.cputm_start)) + value -= get_tod_clock_fast() - vcpu->arch.cputm_start; + } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); + preempt_enable(); + return value; +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + /* Save host register state */ + save_fpu_regs(); + vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; + vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; + if (MACHINE_HAS_VX) + current->thread.fpu.regs = vcpu->run->s.regs.vrs; + else + current->thread.fpu.regs = vcpu->run->s.regs.fprs; + current->thread.fpu.fpc = vcpu->run->s.regs.fpc; if (test_fp_ctl(current->thread.fpu.fpc)) /* User space provided an invalid FPC, let's clear it */ current->thread.fpu.fpc = 0; @@ -1310,26 +1525,26 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) restore_access_regs(vcpu->run->s.regs.acrs); gmap_enable(vcpu->arch.gmap); atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) + __start_cpu_timer_accounting(vcpu); + vcpu->cpu = cpu; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + vcpu->cpu = -1; + if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) + __stop_cpu_timer_accounting(vcpu); atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); + /* Save guest register state */ save_fpu_regs(); + vcpu->run->s.regs.fpc = current->thread.fpu.fpc; - if (test_kvm_facility(vcpu->kvm, 129)) - /* - * kvm_arch_vcpu_load() set up the register save area to - * the &vcpu->run->s.regs.vrs and, thus, the vector registers - * are already saved. Only the floating-point control must be - * copied. - */ - vcpu->run->s.regs.fpc = current->thread.fpu.fpc; - else - save_fpu_to(&vcpu->arch.guest_fpregs); - load_fpu_from(&vcpu->arch.host_fpregs); + /* Restore host register state */ + current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; + current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; save_access_regs(vcpu->run->s.regs.acrs); restore_access_regs(vcpu->arch.host_acrs); @@ -1341,14 +1556,15 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gpsw.mask = 0UL; vcpu->arch.sie_block->gpsw.addr = 0UL; kvm_s390_set_prefix(vcpu, 0); - vcpu->arch.sie_block->cputm = 0UL; + kvm_s390_set_cpu_timer(vcpu, 0); vcpu->arch.sie_block->ckc = 0UL; vcpu->arch.sie_block->todpr = 0; memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); vcpu->arch.sie_block->gcr[0] = 0xE0UL; vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; - vcpu->arch.guest_fpregs.fpc = 0; - asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); + /* make sure the new fpc will be lazily loaded */ + save_fpu_regs(); + current->thread.fpu.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; @@ -1365,8 +1581,11 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; preempt_enable(); mutex_unlock(&vcpu->kvm->lock); - if (!kvm_is_ucontrol(vcpu->kvm)) + if (!kvm_is_ucontrol(vcpu->kvm)) { vcpu->arch.gmap = vcpu->kvm->arch.gmap; + sca_add_vcpu(vcpu); + } + } static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) @@ -1407,7 +1626,8 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) vcpu->arch.cpu_id = model->cpu_id; vcpu->arch.sie_block->ibc = model->ibc; - vcpu->arch.sie_block->fac = (int) (long) model->fac->list; + if (test_kvm_facility(vcpu->kvm, 7)) + vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; } int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) @@ -1435,10 +1655,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->eca |= 1; if (sclp.has_sigpif) vcpu->arch.sie_block->eca |= 0x10000000U; + if (test_kvm_facility(vcpu->kvm, 64)) + vcpu->arch.sie_block->ecb3 |= 0x01; if (test_kvm_facility(vcpu->kvm, 129)) { vcpu->arch.sie_block->eca |= 0x00020000; vcpu->arch.sie_block->ecd |= 0x20000000; } + vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; if (vcpu->kvm->arch.use_cmma) { @@ -1461,7 +1684,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, struct sie_page *sie_page; int rc = -EINVAL; - if (id >= KVM_MAX_VCPUS) + if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) goto out; rc = -ENOMEM; @@ -1478,42 +1701,16 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; vcpu->arch.sie_block->icpua = id; - if (!kvm_is_ucontrol(kvm)) { - if (!kvm->arch.sca) { - WARN_ON_ONCE(1); - goto out_free_cpu; - } - if (!kvm->arch.sca->cpu[id].sda) - kvm->arch.sca->cpu[id].sda = - (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = - (__u32)(((__u64)kvm->arch.sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; - set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); - } - spin_lock_init(&vcpu->arch.local_int.lock); vcpu->arch.local_int.float_int = &kvm->arch.float_int; vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; - - /* - * Allocate a save area for floating-point registers. If the vector - * extension is available, register contents are saved in the SIE - * control block. The allocated save area is still required in - * particular places, for example, in kvm_s390_vcpu_store_status(). - */ - vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS, - GFP_KERNEL); - if (!vcpu->arch.guest_fpregs.fprs) { - rc = -ENOMEM; - goto out_free_sie_block; - } + seqcount_init(&vcpu->arch.cputm_seqcount); rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) goto out_free_sie_block; - VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, + VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, vcpu->arch.sie_block); trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); @@ -1608,7 +1805,7 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, (u64 __user *)reg->addr); break; case KVM_REG_S390_CPU_TIMER: - r = put_user(vcpu->arch.sie_block->cputm, + r = put_user(kvm_s390_get_cpu_timer(vcpu), (u64 __user *)reg->addr); break; case KVM_REG_S390_CLOCK_COMP: @@ -1646,6 +1843,7 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { int r = -EINVAL; + __u64 val; switch (reg->id) { case KVM_REG_S390_TODPR: @@ -1657,8 +1855,9 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, (u64 __user *)reg->addr); break; case KVM_REG_S390_CPU_TIMER: - r = get_user(vcpu->arch.sie_block->cputm, - (u64 __user *)reg->addr); + r = get_user(val, (u64 __user *)reg->addr); + if (!r) + kvm_s390_set_cpu_timer(vcpu, val); break; case KVM_REG_S390_CLOCK_COMP: r = get_user(vcpu->arch.sie_block->ckc, @@ -1730,19 +1929,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { + /* make sure the new values will be lazily loaded */ + save_fpu_regs(); if (test_fp_ctl(fpu->fpc)) return -EINVAL; - memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); - vcpu->arch.guest_fpregs.fpc = fpu->fpc; - save_fpu_regs(); - load_fpu_from(&vcpu->arch.guest_fpregs); + current->thread.fpu.fpc = fpu->fpc; + if (MACHINE_HAS_VX) + convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs); + else + memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs)); return 0; } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); - fpu->fpc = vcpu->arch.guest_fpregs.fpc; + /* make sure we have the latest values */ + save_fpu_regs(); + if (MACHINE_HAS_VX) + convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs); + else + memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs)); + fpu->fpc = current->thread.fpu.fpc; return 0; } @@ -2009,7 +2216,8 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) */ kvm_check_async_pf_completion(vcpu); - memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); + vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; + vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; if (need_resched()) schedule(); @@ -2042,8 +2250,10 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) { - psw_t *psw = &vcpu->arch.sie_block->gpsw; - u8 opcode; + struct kvm_s390_pgm_info pgm_info = { + .code = PGM_ADDRESSING, + }; + u8 opcode, ilen; int rc; VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); @@ -2057,18 +2267,25 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) * to look up the current opcode to get the length of the instruction * to be able to forward the PSW. */ - rc = read_guest(vcpu, psw->addr, 0, &opcode, 1); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); - psw->addr = __rewind_psw(*psw, -insn_length(opcode)); - - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = read_guest_instr(vcpu, &opcode, 1); + ilen = insn_length(opcode); + if (rc < 0) { + return rc; + } else if (rc) { + /* Instruction-Fetching Exceptions - we can't detect the ilen. + * Forward by arbitrary ilc, injection will take care of + * nullification if necessary. + */ + pgm_info = vcpu->arch.pgm; + ilen = 4; + } + pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; + kvm_s390_forward_psw(vcpu, ilen); + return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) { - int rc = -1; - VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); @@ -2076,40 +2293,36 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) if (guestdbg_enabled(vcpu)) kvm_s390_restore_guest_per_regs(vcpu); - if (exit_reason >= 0) { - rc = 0; + vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; + vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; + + if (vcpu->arch.sie_block->icptcode > 0) { + int rc = kvm_handle_sie_intercept(vcpu); + + if (rc != -EOPNOTSUPP) + return rc; + vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; + vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; + vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; + vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; + return -EREMOTE; + } else if (exit_reason != -EFAULT) { + vcpu->stat.exit_null++; + return 0; } else if (kvm_is_ucontrol(vcpu->kvm)) { vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; vcpu->run->s390_ucontrol.trans_exc_code = current->thread.gmap_addr; vcpu->run->s390_ucontrol.pgm_code = 0x10; - rc = -EREMOTE; - + return -EREMOTE; } else if (current->thread.gmap_pfault) { trace_kvm_s390_major_guest_pfault(vcpu); current->thread.gmap_pfault = 0; - if (kvm_arch_setup_async_pf(vcpu)) { - rc = 0; - } else { - gpa_t gpa = current->thread.gmap_addr; - rc = kvm_arch_fault_in_page(vcpu, gpa, 1); - } + if (kvm_arch_setup_async_pf(vcpu)) + return 0; + return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); } - - if (rc == -1) - rc = vcpu_post_run_fault_in_sie(vcpu); - - memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); - - if (rc == 0) { - if (kvm_is_ucontrol(vcpu->kvm)) - /* Don't exit for host interrupts. */ - rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; - else - rc = kvm_handle_sie_intercept(vcpu); - } - - return rc; + return vcpu_post_run_fault_in_sie(vcpu); } static int __vcpu_run(struct kvm_vcpu *vcpu) @@ -2134,10 +2347,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) */ local_irq_disable(); __kvm_guest_enter(); + __disable_cpu_timer_accounting(vcpu); local_irq_enable(); exit_reason = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); local_irq_disable(); + __enable_cpu_timer_accounting(vcpu); __kvm_guest_exit(); local_irq_enable(); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); @@ -2161,7 +2376,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { - vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm; + kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; @@ -2183,7 +2398,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); - kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm; + kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; @@ -2215,6 +2430,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } sync_regs(vcpu, kvm_run); + enable_cpu_timer_accounting(vcpu); might_fault(); rc = __vcpu_run(vcpu); @@ -2229,21 +2445,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rc = 0; } - if (rc == -EOPNOTSUPP) { - /* intercept cannot be handled in-kernel, prepare kvm-run */ - kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; - kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; - kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; - kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; - rc = 0; - } - if (rc == -EREMOTE) { - /* intercept was handled, but userspace support is needed - * kvm_run has been prepared by the handler */ + /* userspace support is needed, kvm_run has been prepared */ rc = 0; } + disable_cpu_timer_accounting(vcpu); store_regs(vcpu, kvm_run); if (vcpu->sigset_active) @@ -2262,41 +2469,51 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) { unsigned char archmode = 1; + freg_t fprs[NUM_FPRS]; unsigned int px; - u64 clkcomp; + u64 clkcomp, cputm; int rc; + px = kvm_s390_get_prefix(vcpu); if (gpa == KVM_S390_STORE_STATUS_NOADDR) { if (write_guest_abs(vcpu, 163, &archmode, 1)) return -EFAULT; - gpa = SAVE_AREA_BASE; + gpa = 0; } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { if (write_guest_real(vcpu, 163, &archmode, 1)) return -EFAULT; - gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE); + gpa = px; + } else + gpa -= __LC_FPREGS_SAVE_AREA; + + /* manually convert vector registers if necessary */ + if (MACHINE_HAS_VX) { + convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); + rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, + fprs, 128); + } else { + rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, + vcpu->run->s.regs.fprs, 128); } - rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs), - vcpu->arch.guest_fpregs.fprs, 128); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs), + rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, vcpu->run->s.regs.gprs, 128); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw), + rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, &vcpu->arch.sie_block->gpsw, 16); - px = kvm_s390_get_prefix(vcpu); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg), + rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, &px, 4); - rc |= write_guest_abs(vcpu, - gpa + offsetof(struct save_area, fp_ctrl_reg), - &vcpu->arch.guest_fpregs.fpc, 4); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg), + rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, + &vcpu->run->s.regs.fpc, 4); + rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, &vcpu->arch.sie_block->todpr, 4); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer), - &vcpu->arch.sie_block->cputm, 8); + cputm = kvm_s390_get_cpu_timer(vcpu); + rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, + &cputm, 8); clkcomp = vcpu->arch.sie_block->ckc >> 8; - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp), + rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, &clkcomp, 8); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs), + rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, &vcpu->run->s.regs.acrs, 64); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs), + rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, &vcpu->arch.sie_block->gcr, 128); return rc ? -EFAULT : 0; } @@ -2309,19 +2526,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) * it into the save area */ save_fpu_regs(); - if (test_kvm_facility(vcpu->kvm, 129)) { - /* - * If the vector extension is available, the vector registers - * which overlaps with floating-point registers are saved in - * the SIE-control block. Hence, extract the floating-point - * registers and the FPC value and store them in the - * guest_fpregs structure. - */ - vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc; - convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs, - current->thread.fpu.vxrs); - } else - save_fpu_to(&vcpu->arch.guest_fpregs); + vcpu->run->s.regs.fpc = current->thread.fpu.fpc; save_access_regs(vcpu->run->s.regs.acrs); return kvm_s390_store_status_unloaded(vcpu, addr); @@ -2508,7 +2713,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, switch (mop->op) { case KVM_S390_MEMOP_LOGICAL_READ: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false); + r = check_gva_range(vcpu, mop->gaddr, mop->ar, + mop->size, GACC_FETCH); break; } r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); @@ -2519,7 +2725,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, break; case KVM_S390_MEMOP_LOGICAL_WRITE: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true); + r = check_gva_range(vcpu, mop->gaddr, mop->ar, + mop->size, GACC_STORE); break; } if (copy_from_user(tmpbuf, uaddr, mop->size)) { @@ -2732,6 +2939,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (mem->memory_size & 0xffffful) return -EINVAL; + if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) + return -EINVAL; + return 0; } @@ -2763,6 +2973,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, static int __init kvm_s390_init(void) { + if (!sclp.has_sief2) { + pr_info("SIE not available\n"); + return -ENODEV; + } + return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 1e70e00d3c5e..8621ab00ec8e 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -19,6 +19,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <asm/facility.h> +#include <asm/processor.h> typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); @@ -53,6 +54,11 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; } +static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) +{ + return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_WAIT; +} + static inline int kvm_is_ucontrol(struct kvm *kvm) { #ifdef CONFIG_KVM_S390_UCONTROL @@ -154,8 +160,8 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) /* test availability of facility in a kvm instance */ static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr) { - return __test_facility(nr, kvm->arch.model.fac->mask) && - __test_facility(nr, kvm->arch.model.fac->list); + return __test_facility(nr, kvm->arch.model.fac_mask) && + __test_facility(nr, kvm->arch.model.fac_list); } static inline int set_kvm_facility(u64 *fac_list, unsigned long nr) @@ -212,8 +218,22 @@ int kvm_s390_reinject_io_int(struct kvm *kvm, int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); /* implemented in intercept.c */ -void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc); +u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu); int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); +static inline void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilen) +{ + struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; + + sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilen); +} +static inline void kvm_s390_forward_psw(struct kvm_vcpu *vcpu, int ilen) +{ + kvm_s390_rewind_psw(vcpu, -ilen); +} +static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu) +{ + kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu)); +} /* implemented in priv.c */ int is_valid_psw(psw_t *psw); @@ -248,6 +268,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); unsigned long kvm_s390_fac_list_mask_size(void); extern unsigned long kvm_s390_fac_list_mask[]; +void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm); +__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu); /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); @@ -340,4 +362,11 @@ void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu); void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu); void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); +/* support for Basic/Extended SCA handling */ +static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm) +{ + struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */ + + return &sca->ipte_control; +} #endif diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 77191b85ea7a..0a1591d3d25d 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -23,6 +23,7 @@ #include <asm/sysinfo.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> +#include <asm/gmap.h> #include <asm/io.h> #include <asm/ptrace.h> #include <asm/compat.h> @@ -173,7 +174,7 @@ static int handle_skey(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - kvm_s390_rewind_psw(vcpu, 4); + kvm_s390_retry_instr(vcpu); VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); return 0; } @@ -184,7 +185,7 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu) if (psw_bits(vcpu->arch.sie_block->gpsw).p) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu)); - kvm_s390_rewind_psw(vcpu, 4); + kvm_s390_retry_instr(vcpu); VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation"); return 0; } @@ -354,8 +355,8 @@ static int handle_stfl(struct kvm_vcpu *vcpu) * We need to shift the lower 32 facility bits (bit 0-31) from a u64 * into a u32 memory representation. They will remain bits 0-31. */ - fac = *vcpu->kvm->arch.model.fac->list >> 32; - rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list), + fac = *vcpu->kvm->arch.model.fac_list >> 32; + rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list), &fac, sizeof(fac)); if (rc) return rc; @@ -660,7 +661,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); - if (!MACHINE_HAS_PFMF) + if (!test_kvm_facility(vcpu->kvm, 8)) return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) @@ -759,8 +760,8 @@ static int handle_essa(struct kvm_vcpu *vcpu) if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - /* Rewind PSW to repeat the ESSA instruction */ - kvm_s390_rewind_psw(vcpu, 4); + /* Retry the ESSA instruction */ + kvm_s390_retry_instr(vcpu); vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); down_read(&gmap->mm->mmap_sem); @@ -981,11 +982,12 @@ static int handle_tprot(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) ipte_lock(vcpu); - ret = guest_translate_address(vcpu, address1, ar, &gpa, 1); + ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE); if (ret == PGM_PROTECTION) { /* Write protected? Try again with read-only... */ cc = 1; - ret = guest_translate_address(vcpu, address1, ar, &gpa, 0); + ret = guest_translate_address(vcpu, address1, ar, &gpa, + GACC_FETCH); } if (ret) { if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) { diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index da690b69f9fe..77c22d685c7a 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -291,12 +291,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, u16 cpu_addr, u32 parameter, u64 *status_reg) { int rc; - struct kvm_vcpu *dst_vcpu; + struct kvm_vcpu *dst_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); - if (cpu_addr >= KVM_MAX_VCPUS) - return SIGP_CC_NOT_OPERATIONAL; - - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; @@ -478,7 +474,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); if (order_code == SIGP_EXTERNAL_CALL) { - dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); BUG_ON(dest_vcpu == NULL); kvm_s390_vcpu_wakeup(dest_vcpu); diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index cc1d6c68356f..396485bca191 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -55,8 +55,8 @@ TRACE_EVENT(kvm_s390_create_vcpu, __entry->sie_block = sie_block; ), - TP_printk("create cpu %d at %p, sie block at %p", __entry->id, - __entry->vcpu, __entry->sie_block) + TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK", + __entry->id, __entry->vcpu, __entry->sie_block) ); TRACE_EVENT(kvm_s390_destroy_vcpu, @@ -254,7 +254,7 @@ TRACE_EVENT(kvm_s390_enable_css, __entry->kvm = kvm; ), - TP_printk("enabling channel I/O support (kvm @ %p)\n", + TP_printk("enabling channel I/O support (kvm @ %pK)\n", __entry->kvm) ); |