From 72e1ad4200d5ed5c5adf120b77fb2900a29a48e5 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 19 Sep 2017 12:34:06 +0200 Subject: KVM: s390: document memory ordering for kvm_s390_vcpu_wakeup swait_active does not enforce any ordering and it can therefore trigger some subtle races when the CPU moves the read for the check before a previous store and that store is then used on another CPU that is preparing the swait. On s390 there is a call to swait_active in kvm_s390_vcpu_wakeup. The good thing is, on s390 all potential races cannot happen because all callers of kvm_s390_vcpu_wakeup do not store (no race) or use an atomic operation, which handles memory ordering. Since this is not guaranteed by the Linux semantics (but by the implementation on s390) let's add smp_mb_after_atomic to make this obvious and document the ordering. Suggested-by: Paolo Bonzini Acked-by: Halil Pasic Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index a832ad031cee..23d8fb25c5dd 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1074,6 +1074,12 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) * in kvm_vcpu_block without having the waitqueue set (polling) */ vcpu->valid_wakeup = true; + /* + * This is mostly to document, that the read in swait_active could + * be moved before other stores, leading to subtle races. + * All current users do not store or use an atomic like update + */ + smp_mb__after_atomic(); if (swait_active(&vcpu->wq)) { /* * The vcpu gave up the cpu voluntarily, mark it as a good -- cgit v1.2.3 From ba850a8e64fbbd5f6407d5931124d00ced0528cc Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Fri, 13 Oct 2017 13:38:46 -0400 Subject: KVM: s390: SIE considerations for AP Queue virtualization The Crypto Control Block (CRYCB) is referenced by the SIE state description and controls KVM guest access to the Adjunct Processor (AP) adapters, usage domains and control domains. This patch defines the AP control blocks to be used for controlling guest access to the AP adapters and domains. Signed-off-by: Tony Krowiak Message-Id: <1507916344-3896-2-git-send-email-akrowiak@linux.vnet.ibm.com> Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index fd006a272024..f3a9b5a445b6 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -685,11 +685,28 @@ struct kvm_s390_crypto { __u8 dea_kw; }; +#define APCB0_MASK_SIZE 1 +struct kvm_s390_apcb0 { + __u64 apm[APCB0_MASK_SIZE]; /* 0x0000 */ + __u64 aqm[APCB0_MASK_SIZE]; /* 0x0008 */ + __u64 adm[APCB0_MASK_SIZE]; /* 0x0010 */ + __u64 reserved18; /* 0x0018 */ +}; + +#define APCB1_MASK_SIZE 4 +struct kvm_s390_apcb1 { + __u64 apm[APCB1_MASK_SIZE]; /* 0x0000 */ + __u64 aqm[APCB1_MASK_SIZE]; /* 0x0020 */ + __u64 adm[APCB1_MASK_SIZE]; /* 0x0040 */ + __u64 reserved60[4]; /* 0x0060 */ +}; + struct kvm_s390_crypto_cb { - __u8 reserved00[72]; /* 0x0000 */ - __u8 dea_wrapping_key_mask[24]; /* 0x0048 */ - __u8 aes_wrapping_key_mask[32]; /* 0x0060 */ - __u8 reserved80[128]; /* 0x0080 */ + struct kvm_s390_apcb0 apcb0; /* 0x0000 */ + __u8 reserved20[0x0048 - 0x0020]; /* 0x0020 */ + __u8 dea_wrapping_key_mask[24]; /* 0x0048 */ + __u8 aes_wrapping_key_mask[32]; /* 0x0060 */ + struct kvm_s390_apcb1 apcb1; /* 0x0080 */ }; /* -- cgit v1.2.3 From f7a6509fe002e3909cb41c09e807b7f3ca4a361b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 1 Sep 2017 17:11:43 +0200 Subject: KVM: s390: vsie: use common code functions for pinning We will not see -ENOMEM (gfn_to_hva() will return KVM_ERR_PTR_BAD_PAGE for all errors). So we can also get rid of special handling in the callers of pin_guest_page() and always assume that it is a g2 error. As also kvm_s390_inject_program_int() should never fail, we can simplify pin_scb(), too. Signed-off-by: David Hildenbrand Message-Id: <20170901151143.22714-1-david@redhat.com> Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/vsie.c | 50 +++++++++++++++++------------------------------- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 4 ++-- 3 files changed, 21 insertions(+), 34 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index b18b5652e5c5..a311938b63b3 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -443,22 +443,14 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) * * Returns: - 0 on success * - -EINVAL if the gpa is not valid guest storage - * - -ENOMEM if out of memory */ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa) { struct page *page; - hva_t hva; - int rc; - hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); - if (kvm_is_error_hva(hva)) + page = gfn_to_page(kvm, gpa_to_gfn(gpa)); + if (is_error_page(page)) return -EINVAL; - rc = get_user_pages_fast(hva, 1, 1, &page); - if (rc < 0) - return rc; - else if (rc != 1) - return -ENOMEM; *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK); return 0; } @@ -466,11 +458,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa) /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */ static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa) { - struct page *page; - - page = virt_to_page(hpa); - set_page_dirty_lock(page); - put_page(page); + kvm_release_pfn_dirty(hpa >> PAGE_SHIFT); /* mark the page always as dirty for migration */ mark_page_dirty(kvm, gpa_to_gfn(gpa)); } @@ -557,7 +545,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) rc = set_validity_icpt(scb_s, 0x003bU); if (!rc) { rc = pin_guest_page(vcpu->kvm, gpa, &hpa); - if (rc == -EINVAL) + if (rc) rc = set_validity_icpt(scb_s, 0x0034U); } if (rc) @@ -574,10 +562,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) } /* 256 bytes cannot cross page boundaries */ rc = pin_guest_page(vcpu->kvm, gpa, &hpa); - if (rc == -EINVAL) + if (rc) { rc = set_validity_icpt(scb_s, 0x0080U); - if (rc) goto unpin; + } scb_s->itdba = hpa; } @@ -592,10 +580,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) * if this block gets bigger, we have to shadow it. */ rc = pin_guest_page(vcpu->kvm, gpa, &hpa); - if (rc == -EINVAL) + if (rc) { rc = set_validity_icpt(scb_s, 0x1310U); - if (rc) goto unpin; + } scb_s->gvrd = hpa; } @@ -607,11 +595,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) } /* 64 bytes cannot cross page boundaries */ rc = pin_guest_page(vcpu->kvm, gpa, &hpa); - if (rc == -EINVAL) + if (rc) { rc = set_validity_icpt(scb_s, 0x0043U); - /* Validity 0x0044 will be checked by SIE */ - if (rc) goto unpin; + } + /* Validity 0x0044 will be checked by SIE */ scb_s->riccbd = hpa; } if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { @@ -635,10 +623,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) * cross page boundaries */ rc = pin_guest_page(vcpu->kvm, gpa, &hpa); - if (rc == -EINVAL) + if (rc) { rc = set_validity_icpt(scb_s, 0x10b0U); - if (rc) goto unpin; + } scb_s->sdnxo = hpa | sdnxc; } return 0; @@ -663,7 +651,6 @@ static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, * * Returns: - 0 if the scb was pinned. * - > 0 if control has to be given to guest 2 - * - -ENOMEM if out of memory */ static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, gpa_t gpa) @@ -672,14 +659,13 @@ static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, int rc; rc = pin_guest_page(vcpu->kvm, gpa, &hpa); - if (rc == -EINVAL) { + if (rc) { rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - if (!rc) - rc = 1; + WARN_ON_ONCE(rc); + return 1; } - if (!rc) - vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa; - return rc; + vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa; + return 0; } /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6882538eda32..2e754b7c282c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -667,6 +667,7 @@ kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool *writable); void kvm_release_pfn_clean(kvm_pfn_t pfn); +void kvm_release_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_accessed(kvm_pfn_t pfn); void kvm_get_pfn(kvm_pfn_t pfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9deb5a245b83..37731f661be5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -122,7 +122,6 @@ static void hardware_disable_all(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); -static void kvm_release_pfn_dirty(kvm_pfn_t pfn); static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn); __visible bool kvm_rebooting; @@ -1679,11 +1678,12 @@ void kvm_release_page_dirty(struct page *page) } EXPORT_SYMBOL_GPL(kvm_release_page_dirty); -static void kvm_release_pfn_dirty(kvm_pfn_t pfn) +void kvm_release_pfn_dirty(kvm_pfn_t pfn) { kvm_set_pfn_dirty(pfn); kvm_release_pfn_clean(pfn); } +EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); void kvm_set_pfn_dirty(kvm_pfn_t pfn) { -- cgit v1.2.3 From ee739f4b216e9394281cf99e6d93c67bdf4a37d2 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Mon, 3 Jul 2017 15:32:50 +0200 Subject: KVM: s390: abstract conversion between isc and enum irq_types The abstraction of the conversion between an isc value and an irq_type by means of functions isc_to_irq_type() and irq_type_to_isc() allows to clarify the respective operations where used. Signed-off-by: Michael Mueller Reviewed-by: Halil Pasic Reviewed-by: Pierre Morel Reviewed-by: Christian Borntraeger Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 23d8fb25c5dd..a3da4f3065aa 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -213,6 +213,16 @@ static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) vcpu->arch.local_int.pending_irqs; } +static inline int isc_to_irq_type(unsigned long isc) +{ + return IRQ_PEND_IO_ISC_0 + isc; +} + +static inline int irq_type_to_isc(unsigned long irq_type) +{ + return irq_type - IRQ_PEND_IO_ISC_0; +} + static unsigned long disable_iscs(struct kvm_vcpu *vcpu, unsigned long active_mask) { @@ -220,7 +230,7 @@ static unsigned long disable_iscs(struct kvm_vcpu *vcpu, for (i = 0; i <= MAX_ISC; i++) if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i))) - active_mask &= ~(1UL << (IRQ_PEND_IO_ISC_0 + i)); + active_mask &= ~(1UL << (isc_to_irq_type(i))); return active_mask; } @@ -901,7 +911,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, fi = &vcpu->kvm->arch.float_int; spin_lock(&fi->lock); - isc_list = &fi->lists[irq_type - IRQ_PEND_IO_ISC_0]; + isc_list = &fi->lists[irq_type_to_isc(irq_type)]; inti = list_first_entry_or_null(isc_list, struct kvm_s390_interrupt_info, list); @@ -1401,7 +1411,7 @@ static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm, list_del_init(&iter->list); fi->counters[FIRQ_CNTR_IO] -= 1; if (list_empty(isc_list)) - clear_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs); + clear_bit(isc_to_irq_type(isc), &fi->pending_irqs); spin_unlock(&fi->lock); return iter; } @@ -1528,7 +1538,7 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) isc = int_word_to_isc(inti->io.io_int_word); list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc]; list_add_tail(&inti->list, list); - set_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs); + set_bit(isc_to_irq_type(isc), &fi->pending_irqs); spin_unlock(&fi->lock); return 0; } -- cgit v1.2.3 From 4dd6f17eb913d3d23dd6c07950627ac2c3068dca Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Thu, 6 Jul 2017 14:22:20 +0200 Subject: KVM: s390: clear_io_irq() requests are not expected for adapter interrupts There is a chance to delete not yet delivered I/O interrupts if an exploiter uses the subsystem identification word 0x0000 while processing a KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl. -EINVAL will be returned now instead in that case. Classic interrupts will always have bit 0x10000 set in the schid while adapter interrupts have a zero schid. The clear_io_irq interface is only useful for classic interrupts (as adapter interrupts belong to many devices). Let's make this interface more strict and forbid a schid of 0. Signed-off-by: Michael Mueller Reviewed-by: Halil Pasic Reviewed-by: Christian Borntraeger Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/devices/s390_flic.txt | 3 +++ arch/s390/kvm/interrupt.c | 2 ++ 2 files changed, 5 insertions(+) (limited to 'arch/s390') diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt index 2f1cbf1301d2..27ad53c7149d 100644 --- a/Documentation/virtual/kvm/devices/s390_flic.txt +++ b/Documentation/virtual/kvm/devices/s390_flic.txt @@ -156,3 +156,6 @@ FLIC with an unknown group or attribute gives the error code EINVAL (instead of ENXIO, as specified in the API documentation). It is not possible to conclude that a FLIC operation is unavailable based on the error code resulting from a usage attempt. + +Note: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a zero +schid is specified. diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index a3da4f3065aa..c8aacced23fb 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2191,6 +2191,8 @@ static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr) return -EINVAL; if (copy_from_user(&schid, (void __user *) attr->addr, sizeof(schid))) return -EFAULT; + if (!schid) + return -EINVAL; kfree(kvm_s390_get_io_int(kvm, isc_mask, schid)); /* * If userspace is conforming to the architecture, we can have at most -- cgit v1.2.3 From da9a1446d248f673a8560ce46251ff620214ab7b Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 9 Nov 2017 10:00:45 +0100 Subject: KVM: s390: provide a capability for AIS state migration The AIS capability was introduced in 4.12, while the interface to migrate the state was added in 4.13. Unfortunately it is not possible for userspace to detect the migration capability without creating a flic kvm device. As in QEMU the cpu model detection runs on the "none" machine this will result in cpu model issues regarding the "ais" capability. To get the "ais" capability properly let's add a new KVM capability that tells userspace that AIS states can be migrated. Signed-off-by: Christian Borntraeger Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Acked-by: Halil Pasic --- Documentation/virtual/kvm/api.txt | 9 +++++++++ Documentation/virtual/kvm/devices/s390_flic.txt | 2 ++ arch/s390/kvm/kvm-s390.c | 1 + include/uapi/linux/kvm.h | 1 + 4 files changed, 13 insertions(+) (limited to 'arch/s390') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e63a35fafef0..49540e53c4bd 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4347,3 +4347,12 @@ This capability indicates that userspace can load HV_X64_MSR_VP_INDEX msr. Its value is used to denote the target vcpu for a SynIC interrupt. For compatibilty, KVM initializes this msr to KVM's internal vcpu index. When this capability is absent, userspace can still query this msr's value. + +8.13 KVM_CAP_S390_AIS_MIGRATION + +Architectures: s390 +Parameters: none + +This capability indicates if the flic device will be able to get/set the +AIS states for migration via the KVM_DEV_FLIC_AISM_ALL attribute and allows +to discover this without having to create a flic device. diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt index 27ad53c7149d..a4e20a090174 100644 --- a/Documentation/virtual/kvm/devices/s390_flic.txt +++ b/Documentation/virtual/kvm/devices/s390_flic.txt @@ -151,6 +151,8 @@ struct kvm_s390_ais_all { to an ISC (MSB0 bit 0 to ISC 0 and so on). The combination of simm bit and nimm bit presents AIS mode for a ISC. + KVM_DEV_FLIC_AISM_ALL is indicated by KVM_CAP_S390_AIS_MIGRATION. + Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on FLIC with an unknown group or attribute gives the error code EINVAL (instead of ENXIO, as specified in the API documentation). It is not possible to conclude diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index de6a5b790da0..8f4b655f65d7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -395,6 +395,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_USER_INSTR0: case KVM_CAP_S390_CMMA_MIGRATION: case KVM_CAP_S390_AIS: + case KVM_CAP_S390_AIS_MIGRATION: r = 1; break; case KVM_CAP_S390_MEM_OP: diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 838887587411..b60595696836 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -930,6 +930,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_SMT_POSSIBLE 147 #define KVM_CAP_HYPERV_SYNIC2 148 #define KVM_CAP_HYPERV_VP_INDEX 149 +#define KVM_CAP_S390_AIS_MIGRATION 150 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3