diff options
Diffstat (limited to 'arch/s390')
-rw-r--r-- | arch/s390/include/asm/irq.h | 1 | ||||
-rw-r--r-- | arch/s390/include/asm/kvm_host.h | 98 | ||||
-rw-r--r-- | arch/s390/include/asm/pgtable.h | 2 | ||||
-rw-r--r-- | arch/s390/include/asm/processor.h | 1 | ||||
-rw-r--r-- | arch/s390/include/uapi/asm/kvm.h | 43 | ||||
-rw-r--r-- | arch/s390/kernel/irq.c | 1 | ||||
-rw-r--r-- | arch/s390/kvm/Kconfig | 4 | ||||
-rw-r--r-- | arch/s390/kvm/Makefile | 2 | ||||
-rw-r--r-- | arch/s390/kvm/diag.c | 84 | ||||
-rw-r--r-- | arch/s390/kvm/interrupt.c | 704 | ||||
-rw-r--r-- | arch/s390/kvm/irq.h | 22 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.c | 212 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.h | 7 | ||||
-rw-r--r-- | arch/s390/kvm/priv.c | 7 | ||||
-rw-r--r-- | arch/s390/kvm/sigp.c | 157 | ||||
-rw-r--r-- | arch/s390/kvm/trace.h | 46 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 26 |
17 files changed, 1204 insertions, 213 deletions
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 5f8bcc5fe423..35f0faab5361 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -53,6 +53,7 @@ enum interruption_class { IRQIO_PCI, IRQIO_MSI, IRQIO_VIR, + IRQIO_VAI, NMI_NMI, CPU_RST, NR_ARCH_IRQS diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 9bf95bb30f1a..154b60089be9 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -16,12 +16,22 @@ #include <linux/hrtimer.h> #include <linux/interrupt.h> #include <linux/kvm_host.h> +#include <linux/kvm.h> #include <asm/debug.h> #include <asm/cpu.h> +#include <asm/isc.h> #define KVM_MAX_VCPUS 64 #define KVM_USER_MEM_SLOTS 32 +/* + * These seem to be used for allocating ->chip in the routing table, + * which we don't use. 4096 is an out-of-thin-air value. If we need + * to look at ->chip later on, we'll need to revisit this. + */ +#define KVM_NR_IRQCHIPS 1 +#define KVM_IRQCHIP_NUM_PINS 4096 + struct sca_entry { atomic_t scn; __u32 reserved; @@ -108,7 +118,9 @@ struct kvm_s390_sie_block { __u32 fac; /* 0x01a0 */ __u8 reserved1a4[20]; /* 0x01a4 */ __u64 cbrlo; /* 0x01b8 */ - __u8 reserved1c0[40]; /* 0x01c0 */ + __u8 reserved1c0[30]; /* 0x01c0 */ + __u64 pp; /* 0x01de */ + __u8 reserved1e6[2]; /* 0x01e6 */ __u64 itdba; /* 0x01e8 */ __u8 reserved1f0[16]; /* 0x01f0 */ } __attribute__((packed)); @@ -171,18 +183,6 @@ struct kvm_vcpu_stat { u32 diagnose_9c; }; -struct kvm_s390_io_info { - __u16 subchannel_id; /* 0x0b8 */ - __u16 subchannel_nr; /* 0x0ba */ - __u32 io_int_parm; /* 0x0bc */ - __u32 io_int_word; /* 0x0c0 */ -}; - -struct kvm_s390_ext_info { - __u32 ext_params; - __u64 ext_params2; -}; - #define PGM_OPERATION 0x01 #define PGM_PRIVILEGED_OP 0x02 #define PGM_EXECUTE 0x03 @@ -191,27 +191,6 @@ struct kvm_s390_ext_info { #define PGM_SPECIFICATION 0x06 #define PGM_DATA 0x07 -struct kvm_s390_pgm_info { - __u16 code; -}; - -struct kvm_s390_prefix_info { - __u32 address; -}; - -struct kvm_s390_extcall_info { - __u16 code; -}; - -struct kvm_s390_emerg_info { - __u16 code; -}; - -struct kvm_s390_mchk_info { - __u64 cr14; - __u64 mcic; -}; - struct kvm_s390_interrupt_info { struct list_head list; u64 type; @@ -246,9 +225,8 @@ struct kvm_s390_float_interrupt { struct list_head list; atomic_t active; int next_rr_cpu; - unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1) - / sizeof(long)]; - struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS]; + unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + unsigned int irq_count; }; @@ -265,6 +243,10 @@ struct kvm_vcpu_arch { u64 stidp_data; }; struct gmap *gmap; +#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL) + unsigned long pfault_token; + unsigned long pfault_select; + unsigned long pfault_compare; }; struct kvm_vm_stat { @@ -274,12 +256,36 @@ struct kvm_vm_stat { struct kvm_arch_memory_slot { }; +struct s390_map_info { + struct list_head list; + __u64 guest_addr; + __u64 addr; + struct page *page; +}; + +struct s390_io_adapter { + unsigned int id; + int isc; + bool maskable; + bool masked; + bool swap; + struct rw_semaphore maps_lock; + struct list_head maps; + atomic_t nr_maps; +}; + +#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8) +#define MAX_S390_ADAPTER_MAPS 256 + struct kvm_arch{ struct sca_block *sca; debug_info_t *dbf; struct kvm_s390_float_interrupt float_int; + struct kvm_device *flic; struct gmap *gmap; int css_support; + int use_irqchip; + struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; }; #define KVM_HVA_ERR_BAD (-1UL) @@ -290,6 +296,24 @@ static inline bool kvm_is_error_hva(unsigned long addr) return IS_ERR_VALUE(addr); } +#define ASYNC_PF_PER_VCPU 64 +struct kvm_vcpu; +struct kvm_async_pf; +struct kvm_arch_async_pf { + unsigned long pfault_token; +}; + +bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); + +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); + +void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); + +void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); + extern int sie64a(struct kvm_s390_sie_block *, u64 *); extern char sie_exit; #endif diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 1ab75eaacbd4..50a75d96f939 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -782,6 +782,7 @@ static inline void pgste_set_pte(pte_t *ptep, pte_t entry) * @table: pointer to the page directory * @asce: address space control element for gmap page table * @crst_list: list of all crst tables used in the guest address space + * @pfault_enabled: defines if pfaults are applicable for the guest */ struct gmap { struct list_head list; @@ -790,6 +791,7 @@ struct gmap { unsigned long asce; void *private; struct list_head crst_list; + bool pfault_enabled; }; /** diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 0a876bc543d3..dc5fc4f90e52 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -79,6 +79,7 @@ struct thread_struct { unsigned long ksp; /* kernel stack pointer */ mm_segment_t mm_segment; unsigned long gmap_addr; /* address of last gmap fault. */ + unsigned int gmap_pfault; /* signal of a pending guest pfault */ struct per_regs per_user; /* User specified PER registers */ struct per_event per_event; /* Cause of the last PER trap */ unsigned long per_flags; /* Flags to control debug behavior */ diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index d25da598ec62..c003c6a73b1e 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -16,6 +16,44 @@ #define __KVM_S390 +/* Device control API: s390-specific devices */ +#define KVM_DEV_FLIC_GET_ALL_IRQS 1 +#define KVM_DEV_FLIC_ENQUEUE 2 +#define KVM_DEV_FLIC_CLEAR_IRQS 3 +#define KVM_DEV_FLIC_APF_ENABLE 4 +#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5 +#define KVM_DEV_FLIC_ADAPTER_REGISTER 6 +#define KVM_DEV_FLIC_ADAPTER_MODIFY 7 +/* + * We can have up to 4*64k pending subchannels + 8 adapter interrupts, + * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. + * There are also sclp and machine checks. This gives us + * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000 + * Lets round up to 8192 pages. + */ +#define KVM_S390_MAX_FLOAT_IRQS 266250 +#define KVM_S390_FLIC_MAX_BUFFER 0x2000000 + +struct kvm_s390_io_adapter { + __u32 id; + __u8 isc; + __u8 maskable; + __u8 swap; + __u8 pad; +}; + +#define KVM_S390_IO_ADAPTER_MASK 1 +#define KVM_S390_IO_ADAPTER_MAP 2 +#define KVM_S390_IO_ADAPTER_UNMAP 3 + +struct kvm_s390_io_adapter_req { + __u32 id; + __u8 type; + __u8 mask; + __u16 pad0; + __u64 addr; +}; + /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { /* general purpose regs for s390 */ @@ -57,4 +95,9 @@ struct kvm_sync_regs { #define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2) #define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3) #define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4) +#define KVM_REG_S390_PFTOKEN (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5) +#define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6) +#define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7) +#define KVM_REG_S390_PP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x8) +#define KVM_REG_S390_GBEA (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x9) #endif diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index a770be97db4d..d42b14cc72a4 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -85,6 +85,7 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" }, [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" }, [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, + [IRQIO_VAI] = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"}, [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, }; diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 70b46eacf8e1..10d529ac9821 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -23,6 +23,10 @@ config KVM select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_EVENTFD + select KVM_ASYNC_PF + select KVM_ASYNC_PF_SYNC + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 40b4c6470f88..d3adb37e93a4 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -7,7 +7,7 @@ # as published by the Free Software Foundation. KVM := ../../../virt/kvm -common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 6f9cfa500372..03a05ffb662f 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -18,6 +18,7 @@ #include "kvm-s390.h" #include "trace.h" #include "trace-s390.h" +#include "gaccess.h" static int diag_release_pages(struct kvm_vcpu *vcpu) { @@ -47,6 +48,87 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) return 0; } +static int __diag_page_ref_service(struct kvm_vcpu *vcpu) +{ + struct prs_parm { + u16 code; + u16 subcode; + u16 parm_len; + u16 parm_version; + u64 token_addr; + u64 select_mask; + u64 compare_mask; + u64 zarch; + }; + struct prs_parm parm; + int rc; + u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4; + u16 ry = (vcpu->arch.sie_block->ipa & 0x0f); + unsigned long hva_token = KVM_HVA_ERR_BAD; + + if (vcpu->run->s.regs.gprs[rx] & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + switch (parm.subcode) { + case 0: /* TOKEN */ + if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) { + /* + * If the pagefault handshake is already activated, + * the token must not be changed. We have to return + * decimal 8 instead, as mandated in SC24-6084. + */ + vcpu->run->s.regs.gprs[ry] = 8; + return 0; + } + + if ((parm.compare_mask & parm.select_mask) != parm.compare_mask || + parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr)); + if (kvm_is_error_hva(hva_token)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + vcpu->arch.pfault_token = parm.token_addr; + vcpu->arch.pfault_select = parm.select_mask; + vcpu->arch.pfault_compare = parm.compare_mask; + vcpu->run->s.regs.gprs[ry] = 0; + rc = 0; + break; + case 1: /* + * CANCEL + * Specification allows to let already pending tokens survive + * the cancel, therefore to reduce code complexity, we assume + * all outstanding tokens are already pending. + */ + if (parm.token_addr || parm.select_mask || + parm.compare_mask || parm.zarch) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + vcpu->run->s.regs.gprs[ry] = 0; + /* + * If the pfault handling was not established or is already + * canceled SC24-6084 requests to return decimal 4. + */ + if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) + vcpu->run->s.regs.gprs[ry] = 4; + else + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + + rc = 0; + break; + default: + rc = -EOPNOTSUPP; + break; + } + + return rc; +} + static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); @@ -153,6 +235,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) return __diag_time_slice_end(vcpu); case 0x9c: return __diag_time_slice_end_directed(vcpu); + case 0x258: + return __diag_page_ref_service(vcpu); case 0x308: return __diag_ipl_functions(vcpu); case 0x500: diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5f79d2d79ca7..200a8f9390b6 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1,7 +1,7 @@ /* * handling kvm guest interrupts * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008,2014 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -13,6 +13,7 @@ #include <linux/interrupt.h> #include <linux/kvm_host.h> #include <linux/hrtimer.h> +#include <linux/mmu_context.h> #include <linux/signal.h> #include <linux/slab.h> #include <asm/asm-offsets.h> @@ -31,7 +32,7 @@ static int is_ioint(u64 type) return ((type & 0xfffe0000u) != 0xfffe0000u); } -static int psw_extint_disabled(struct kvm_vcpu *vcpu) +int psw_extint_disabled(struct kvm_vcpu *vcpu) { return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); } @@ -78,11 +79,8 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, return 1; return 0; case KVM_S390_INT_SERVICE: - if (psw_extint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[0] & 0x200ul) - return 1; - return 0; + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: case KVM_S390_INT_VIRTIO: if (psw_extint_disabled(vcpu)) return 0; @@ -117,14 +115,12 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, static void __set_cpu_idle(struct kvm_vcpu *vcpu) { - BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); } static void __unset_cpu_idle(struct kvm_vcpu *vcpu) { - BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); } @@ -150,6 +146,8 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, case KVM_S390_INT_EXTERNAL_CALL: case KVM_S390_INT_EMERGENCY: case KVM_S390_INT_SERVICE: + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: case KVM_S390_INT_VIRTIO: if (psw_extint_disabled(vcpu)) __set_cpuflag(vcpu, CPUSTAT_EXT_INT); @@ -223,6 +221,30 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, rc |= put_guest(vcpu, inti->ext.ext_params, (u32 __user *)__LC_EXT_PARAMS); break; + case KVM_S390_INT_PFAULT_INIT: + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, + inti->ext.ext_params2); + rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE); + rc |= put_guest(vcpu, 0x0600, (u16 __user *) __LC_EXT_CPU_ADDR); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + rc |= put_guest(vcpu, inti->ext.ext_params2, + (u64 __user *) __LC_EXT_PARAMS2); + break; + case KVM_S390_INT_PFAULT_DONE: + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, + inti->ext.ext_params2); + rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE); + rc |= put_guest(vcpu, 0x0680, (u16 __user *) __LC_EXT_CPU_ADDR); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + rc |= put_guest(vcpu, inti->ext.ext_params2, + (u64 __user *) __LC_EXT_PARAMS2); + break; case KVM_S390_INT_VIRTIO: VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", inti->ext.ext_params, inti->ext.ext_params2); @@ -357,7 +379,7 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) return 1; } -static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; @@ -482,11 +504,26 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) struct kvm_vcpu *vcpu; vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); + vcpu->preempted = true; tasklet_schedule(&vcpu->arch.tasklet); return HRTIMER_NORESTART; } +void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_interrupt_info *n, *inti = NULL; + + spin_lock_bh(&li->lock); + list_for_each_entry_safe(inti, n, &li->list, list) { + list_del(&inti->list); + kfree(inti); + } + atomic_set(&li->active, 0); + spin_unlock_bh(&li->lock); +} + void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -528,6 +565,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) list_for_each_entry_safe(inti, n, &fi->list, list) { if (__interrupt_is_deliverable(vcpu, inti)) { list_del(&inti->list); + fi->irq_count--; deliver = 1; break; } @@ -583,6 +621,7 @@ void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) if ((inti->type == KVM_S390_MCHK) && __interrupt_is_deliverable(vcpu, inti)) { list_del(&inti->list); + fi->irq_count--; deliver = 1; break; } @@ -650,8 +689,10 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, inti = iter; break; } - if (inti) + if (inti) { list_del_init(&inti->list); + fi->irq_count--; + } if (list_empty(&fi->list)) atomic_set(&fi->active, 0); spin_unlock(&fi->lock); @@ -659,53 +700,101 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, return inti; } -int kvm_s390_inject_vm(struct kvm *kvm, - struct kvm_s390_interrupt *s390int) +static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) { struct kvm_s390_local_interrupt *li; struct kvm_s390_float_interrupt *fi; - struct kvm_s390_interrupt_info *inti, *iter; + struct kvm_s390_interrupt_info *iter; + struct kvm_vcpu *dst_vcpu = NULL; int sigcpu; + int rc = 0; + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) { + rc = -EINVAL; + goto unlock_fi; + } + fi->irq_count++; + if (!is_ioint(inti->type)) { + list_add_tail(&inti->list, &fi->list); + } else { + u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word); + + /* Keep I/O interrupts sorted in isc order. */ + list_for_each_entry(iter, &fi->list, list) { + if (!is_ioint(iter->type)) + continue; + if (int_word_to_isc_bits(iter->io.io_int_word) + <= isc_bits) + continue; + break; + } + list_add_tail(&inti->list, &iter->list); + } + atomic_set(&fi->active, 1); + sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); + if (sigcpu == KVM_MAX_VCPUS) { + do { + sigcpu = fi->next_rr_cpu++; + if (sigcpu == KVM_MAX_VCPUS) + sigcpu = fi->next_rr_cpu = 0; + } while (kvm_get_vcpu(kvm, sigcpu) == NULL); + } + dst_vcpu = kvm_get_vcpu(kvm, sigcpu); + li = &dst_vcpu->arch.local_int; + spin_lock_bh(&li->lock); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); + kvm_get_vcpu(kvm, sigcpu)->preempted = true; + spin_unlock_bh(&li->lock); +unlock_fi: + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); + return rc; +} + +int kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int) +{ + struct kvm_s390_interrupt_info *inti; inti = kzalloc(sizeof(*inti), GFP_KERNEL); if (!inti) return -ENOMEM; - switch (s390int->type) { + inti->type = s390int->type; + switch (inti->type) { case KVM_S390_INT_VIRTIO: VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx", s390int->parm, s390int->parm64); - inti->type = s390int->type; inti->ext.ext_params = s390int->parm; inti->ext.ext_params2 = s390int->parm64; break; case KVM_S390_INT_SERVICE: VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); - inti->type = s390int->type; inti->ext.ext_params = s390int->parm; break; - case KVM_S390_PROGRAM_INT: - case KVM_S390_SIGP_STOP: - case KVM_S390_INT_EXTERNAL_CALL: - case KVM_S390_INT_EMERGENCY: - kfree(inti); - return -EINVAL; + case KVM_S390_INT_PFAULT_DONE: + inti->type = s390int->type; + inti->ext.ext_params2 = s390int->parm64; + break; case KVM_S390_MCHK: VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", s390int->parm64); - inti->type = s390int->type; inti->mchk.cr14 = s390int->parm; /* upper bits are not used */ inti->mchk.mcic = s390int->parm64; break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (s390int->type & IOINT_AI_MASK) + if (inti->type & IOINT_AI_MASK) VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); else VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", s390int->type & IOINT_CSSID_MASK, s390int->type & IOINT_SSID_MASK, s390int->type & IOINT_SCHID_MASK); - inti->type = s390int->type; inti->io.subchannel_id = s390int->parm >> 16; inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; inti->io.io_int_parm = s390int->parm64 >> 32; @@ -718,43 +807,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64, 2); - mutex_lock(&kvm->lock); - fi = &kvm->arch.float_int; - spin_lock(&fi->lock); - if (!is_ioint(inti->type)) - list_add_tail(&inti->list, &fi->list); - else { - u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word); - - /* Keep I/O interrupts sorted in isc order. */ - list_for_each_entry(iter, &fi->list, list) { - if (!is_ioint(iter->type)) - continue; - if (int_word_to_isc_bits(iter->io.io_int_word) - <= isc_bits) - continue; - break; - } - list_add_tail(&inti->list, &iter->list); - } - atomic_set(&fi->active, 1); - sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); - if (sigcpu == KVM_MAX_VCPUS) { - do { - sigcpu = fi->next_rr_cpu++; - if (sigcpu == KVM_MAX_VCPUS) - sigcpu = fi->next_rr_cpu = 0; - } while (fi->local_int[sigcpu] == NULL); - } - li = fi->local_int[sigcpu]; - spin_lock_bh(&li->lock); - atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(li->wq)) - wake_up_interruptible(li->wq); - spin_unlock_bh(&li->lock); - spin_unlock(&fi->lock); - mutex_unlock(&kvm->lock); - return 0; + return __inject_vm(kvm, inti); } int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, @@ -814,6 +867,10 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, inti->type = s390int->type; inti->mchk.mcic = s390int->parm64; break; + case KVM_S390_INT_PFAULT_INIT: + inti->type = s390int->type; + inti->ext.ext_params2 = s390int->parm64; + break; case KVM_S390_INT_VIRTIO: case KVM_S390_INT_SERVICE: case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: @@ -837,7 +894,528 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); + vcpu->preempted = true; spin_unlock_bh(&li->lock); mutex_unlock(&vcpu->kvm->lock); return 0; } + +static void clear_floating_interrupts(struct kvm *kvm) +{ + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *n, *inti = NULL; + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + list_for_each_entry_safe(inti, n, &fi->list, list) { + list_del(&inti->list); + kfree(inti); + } + fi->irq_count = 0; + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); +} + +static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti, + u8 *addr) +{ + struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; + struct kvm_s390_irq irq = {0}; + + irq.type = inti->type; + switch (inti->type) { + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: + irq.u.ext = inti->ext; + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + irq.u.io = inti->io; + break; + case KVM_S390_MCHK: + irq.u.mchk = inti->mchk; + break; + default: + return -EINVAL; + } + + if (copy_to_user(uptr, &irq, sizeof(irq))) + return -EFAULT; + + return 0; +} + +static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len) +{ + struct kvm_s390_interrupt_info *inti; + struct kvm_s390_float_interrupt *fi; + int ret = 0; + int n = 0; + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + + list_for_each_entry(inti, &fi->list, list) { + if (len < sizeof(struct kvm_s390_irq)) { + /* signal userspace to try again */ + ret = -ENOMEM; + break; + } + ret = copy_irq_to_user(inti, buf); + if (ret) + break; + buf += sizeof(struct kvm_s390_irq); + len -= sizeof(struct kvm_s390_irq); + n++; + } + + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); + + return ret < 0 ? ret : n; +} + +static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + int r; + + switch (attr->group) { + case KVM_DEV_FLIC_GET_ALL_IRQS: + r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr, + attr->attr); + break; + default: + r = -EINVAL; + } + + return r; +} + +static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti, + u64 addr) +{ + struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; + void *target = NULL; + void __user *source; + u64 size; + + if (get_user(inti->type, (u64 __user *)addr)) + return -EFAULT; + + switch (inti->type) { + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: + target = (void *) &inti->ext; + source = &uptr->u.ext; + size = sizeof(inti->ext); + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + target = (void *) &inti->io; + source = &uptr->u.io; + size = sizeof(inti->io); + break; + case KVM_S390_MCHK: + target = (void *) &inti->mchk; + source = &uptr->u.mchk; + size = sizeof(inti->mchk); + break; + default: + return -EINVAL; + } + + if (copy_from_user(target, source, size)) + return -EFAULT; + + return 0; +} + +static int enqueue_floating_irq(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct kvm_s390_interrupt_info *inti = NULL; + int r = 0; + int len = attr->attr; + + if (len % sizeof(struct kvm_s390_irq) != 0) + return -EINVAL; + else if (len > KVM_S390_FLIC_MAX_BUFFER) + return -EINVAL; + + while (len >= sizeof(struct kvm_s390_irq)) { + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + r = copy_irq_from_user(inti, attr->addr); + if (r) { + kfree(inti); + return r; + } + r = __inject_vm(dev->kvm, inti); + if (r) { + kfree(inti); + return r; + } + len -= sizeof(struct kvm_s390_irq); + attr->addr += sizeof(struct kvm_s390_irq); + } + + return r; +} + +static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id) +{ + if (id >= MAX_S390_IO_ADAPTERS) + return NULL; + return kvm->arch.adapters[id]; +} + +static int register_io_adapter(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct s390_io_adapter *adapter; + struct kvm_s390_io_adapter adapter_info; + + if (copy_from_user(&adapter_info, + (void __user *)attr->addr, sizeof(adapter_info))) + return -EFAULT; + + if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) || + (dev->kvm->arch.adapters[adapter_info.id] != NULL)) + return -EINVAL; + + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) + return -ENOMEM; + + INIT_LIST_HEAD(&adapter->maps); + init_rwsem(&adapter->maps_lock); + atomic_set(&adapter->nr_maps, 0); + adapter->id = adapter_info.id; + adapter->isc = adapter_info.isc; + adapter->maskable = adapter_info.maskable; + adapter->masked = false; + adapter->swap = adapter_info.swap; + dev->kvm->arch.adapters[adapter->id] = adapter; + + return 0; +} + +int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked) +{ + int ret; + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + + if (!adapter || !adapter->maskable) + return -EINVAL; + ret = adapter->masked; + adapter->masked = masked; + return ret; +} + +static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr) +{ + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + struct s390_map_info *map; + int ret; + + if (!adapter || !addr) + return -EINVAL; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) { + ret = -ENOMEM; + goto out; + } + INIT_LIST_HEAD(&map->list); + map->guest_addr = addr; + map->addr = gmap_translate(addr, kvm->arch.gmap); + if (map->addr == -EFAULT) { + ret = -EFAULT; + goto out; + } + ret = get_user_pages_fast(map->addr, 1, 1, &map->page); + if (ret < 0) + goto out; + BUG_ON(ret != 1); + down_write(&adapter->maps_lock); + if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) { + list_add_tail(&map->list, &adapter->maps); + ret = 0; + } else { + put_page(map->page); + ret = -EINVAL; + } + up_write(&adapter->maps_lock); +out: + if (ret) + kfree(map); + return ret; +} + +static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr) +{ + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + struct s390_map_info *map, *tmp; + int found = 0; + + if (!adapter || !addr) + return -EINVAL; + + down_write(&adapter->maps_lock); + list_for_each_entry_safe(map, tmp, &adapter->maps, list) { + if (map->guest_addr == addr) { + found = 1; + atomic_dec(&adapter->nr_maps); + list_del(&map->list); + put_page(map->page); + kfree(map); + break; + } + } + up_write(&adapter->maps_lock); + + return found ? 0 : -EINVAL; +} + +void kvm_s390_destroy_adapters(struct kvm *kvm) +{ + int i; + struct s390_map_info *map, *tmp; + + for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) { + if (!kvm->arch.adapters[i]) + continue; + list_for_each_entry_safe(map, tmp, + &kvm->arch.adapters[i]->maps, list) { + list_del(&map->list); + put_page(map->page); + kfree(map); + } + kfree(kvm->arch.adapters[i]); + } +} + +static int modify_io_adapter(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct kvm_s390_io_adapter_req req; + struct s390_io_adapter *adapter; + int ret; + + if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) + return -EFAULT; + + adapter = get_io_adapter(dev->kvm, req.id); + if (!adapter) + return -EINVAL; + switch (req.type) { + case KVM_S390_IO_ADAPTER_MASK: + ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask); + if (ret > 0) + ret = 0; + break; + case KVM_S390_IO_ADAPTER_MAP: + ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr); + break; + case KVM_S390_IO_ADAPTER_UNMAP: + ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + int r = 0; + unsigned int i; + struct kvm_vcpu *vcpu; + + switch (attr->group) { + case KVM_DEV_FLIC_ENQUEUE: + r = enqueue_floating_irq(dev, attr); + break; + case KVM_DEV_FLIC_CLEAR_IRQS: + r = 0; + clear_floating_interrupts(dev->kvm); + break; + case KVM_DEV_FLIC_APF_ENABLE: + dev->kvm->arch.gmap->pfault_enabled = 1; + break; + case KVM_DEV_FLIC_APF_DISABLE_WAIT: + dev->kvm->arch.gmap->pfault_enabled = 0; + /* + * Make sure no async faults are in transition when + * clearing the queues. So we don't need to worry + * about late coming workers. + */ + synchronize_srcu(&dev->kvm->srcu); + kvm_for_each_vcpu(i, vcpu, dev->kvm) + kvm_clear_async_pf_completion_queue(vcpu); + break; + case KVM_DEV_FLIC_ADAPTER_REGISTER: + r = register_io_adapter(dev, attr); + break; + case KVM_DEV_FLIC_ADAPTER_MODIFY: + r = modify_io_adapter(dev, attr); + break; + default: + r = -EINVAL; + } + + return r; +} + +static int flic_create(struct kvm_device *dev, u32 type) +{ + if (!dev) + return -EINVAL; + if (dev->kvm->arch.flic) + return -EINVAL; + dev->kvm->arch.flic = dev; + return 0; +} + +static void flic_destroy(struct kvm_device *dev) +{ + dev->kvm->arch.flic = NULL; + kfree(dev); +} + +/* s390 floating irq controller (flic) */ +struct kvm_device_ops kvm_flic_ops = { + .name = "kvm-flic", + .get_attr = flic_get_attr, + .set_attr = flic_set_attr, + .create = flic_create, + .destroy = flic_destroy, +}; + +static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap) +{ + unsigned long bit; + + bit = bit_nr + (addr % PAGE_SIZE) * 8; + + return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit; +} + +static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter, + u64 addr) +{ + struct s390_map_info *map; + + if (!adapter) + return NULL; + + list_for_each_entry(map, &adapter->maps, list) { + if (map->guest_addr == addr) + return map; + } + return NULL; +} + +static int adapter_indicators_set(struct kvm *kvm, + struct s390_io_adapter *adapter, + struct kvm_s390_adapter_int *adapter_int) +{ + unsigned long bit; + int summary_set, idx; + struct s390_map_info *info; + void *map; + + info = get_map_info(adapter, adapter_int->ind_addr); + if (!info) + return -1; + map = page_address(info->page); + bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap); + set_bit(bit, map); + idx = srcu_read_lock(&kvm->srcu); + mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT); + set_page_dirty_lock(info->page); + info = get_map_info(adapter, adapter_int->summary_addr); + if (!info) { + srcu_read_unlock(&kvm->srcu, idx); + return -1; + } + map = page_address(info->page); + bit = get_ind_bit(info->addr, adapter_int->summary_offset, + adapter->swap); + summary_set = test_and_set_bit(bit, map); + mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT); + set_page_dirty_lock(info->page); + srcu_read_unlock(&kvm->srcu, idx); + return summary_set ? 0 : 1; +} + +/* + * < 0 - not injected due to error + * = 0 - coalesced, summary indicator already active + * > 0 - injected interrupt + */ +static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + int ret; + struct s390_io_adapter *adapter; + + /* We're only interested in the 0->1 transition. */ + if (!level) + return 0; + adapter = get_io_adapter(kvm, e->adapter.adapter_id); + if (!adapter) + return -1; + down_read(&adapter->maps_lock); + ret = adapter_indicators_set(kvm, adapter, &e->adapter); + up_read(&adapter->maps_lock); + if ((ret > 0) && !adapter->masked) { + struct kvm_s390_interrupt s390int = { + .type = KVM_S390_INT_IO(1, 0, 0, 0), + .parm = 0, + .parm64 = (adapter->isc << 27) | 0x80000000, + }; + ret = kvm_s390_inject_vm(kvm, &s390int); + if (ret == 0) + ret = 1; + } + return ret; +} + +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int ret; + + switch (ue->type) { + case KVM_IRQ_ROUTING_S390_ADAPTER: + e->set = set_adapter_int; + e->adapter.summary_addr = ue->u.adapter.summary_addr; + e->adapter.ind_addr = ue->u.adapter.ind_addr; + e->adapter.summary_offset = ue->u.adapter.summary_offset; + e->adapter.ind_offset = ue->u.adapter.ind_offset; + e->adapter.adapter_id = ue->u.adapter.adapter_id; + ret = 0; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, + int irq_source_id, int level, bool line_status) +{ + return -EINVAL; +} diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h new file mode 100644 index 000000000000..d98e4159643d --- /dev/null +++ b/arch/s390/kvm/irq.h @@ -0,0 +1,22 @@ +/* + * s390 irqchip routines + * + * Copyright IBM Corp. 2014 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + */ +#ifndef __KVM_IRQ_H +#define __KVM_IRQ_H + +#include <linux/kvm_host.h> + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + return 1; +} + +#endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 10b5db3c9bc4..b3ecb8f5b6ce 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -153,11 +153,14 @@ int kvm_dev_ioctl_check_extension(long ext) #ifdef CONFIG_KVM_S390_UCONTROL case KVM_CAP_S390_UCONTROL: #endif + case KVM_CAP_ASYNC_PF: case KVM_CAP_SYNC_REGS: case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_S390_CSS_SUPPORT: case KVM_CAP_IOEVENTFD: + case KVM_CAP_DEVICE_CTRL: + case KVM_CAP_ENABLE_CAP_VM: r = 1; break; case KVM_CAP_NR_VCPUS: @@ -186,6 +189,25 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, return 0; } +static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_S390_IRQCHIP: + kvm->arch.use_irqchip = 1; + r = 0; + break; + default: + r = -EINVAL; + break; + } + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -203,6 +225,26 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_s390_inject_vm(kvm, &s390int); break; } + case KVM_ENABLE_CAP: { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + break; + r = kvm_vm_ioctl_enable_cap(kvm, &cap); + break; + } + case KVM_CREATE_IRQCHIP: { + struct kvm_irq_routing_entry routing; + + r = -EINVAL; + if (kvm->arch.use_irqchip) { + /* Set up dummy routing. */ + memset(&routing, 0, sizeof(routing)); + kvm_set_irq_routing(kvm, &routing, 0, 0); + r = 0; + } + break; + } default: r = -ENOTTY; } @@ -214,6 +256,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int rc; char debug_name[16]; + static unsigned long sca_offset; rc = -EINVAL; #ifdef CONFIG_KVM_S390_UCONTROL @@ -235,6 +278,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); if (!kvm->arch.sca) goto out_err; + spin_lock(&kvm_lock); + sca_offset = (sca_offset + 16) & 0x7f0; + kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); + spin_unlock(&kvm_lock); sprintf(debug_name, "kvm-%u", current->pid); @@ -255,9 +302,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.gmap) goto out_nogmap; kvm->arch.gmap->private = kvm; + kvm->arch.gmap->pfault_enabled = 0; } kvm->arch.css_support = 0; + kvm->arch.use_irqchip = 0; return 0; out_nogmap: @@ -272,6 +321,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 3, "%s", "free cpu"); trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); + kvm_clear_async_pf_completion_queue(vcpu); if (!kvm_is_ucontrol(vcpu->kvm)) { clear_bit(63 - vcpu->vcpu_id, (unsigned long *) &vcpu->kvm->arch.sca->mcn); @@ -320,11 +370,14 @@ void kvm_arch_destroy_vm(struct kvm *kvm) debug_unregister(kvm->arch.dbf); if (!kvm_is_ucontrol(kvm)) gmap_free(kvm->arch.gmap); + kvm_s390_destroy_adapters(kvm); } /* Section: vcpu related */ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + kvm_clear_async_pf_completion_queue(vcpu); if (kvm_is_ucontrol(vcpu->kvm)) { vcpu->arch.gmap = gmap_alloc(current->mm); if (!vcpu->arch.gmap) @@ -385,7 +438,11 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.guest_fpregs.fpc = 0; asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); vcpu->arch.sie_block->gbea = 1; + vcpu->arch.sie_block->pp = 0; + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + kvm_clear_async_pf_completion_queue(vcpu); atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); + kvm_s390_clear_local_irqs(vcpu); } int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) @@ -466,11 +523,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, spin_lock_init(&vcpu->arch.local_int.lock); INIT_LIST_HEAD(&vcpu->arch.local_int.list); vcpu->arch.local_int.float_int = &kvm->arch.float_int; - spin_lock(&kvm->arch.float_int.lock); - kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; - spin_unlock(&kvm->arch.float_int.lock); rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) @@ -490,9 +544,7 @@ out: int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { - /* kvm common code refers to this, but never calls it */ - BUG(); - return 0; + return kvm_cpu_has_interrupt(vcpu); } void s390_vcpu_block(struct kvm_vcpu *vcpu) @@ -568,6 +620,26 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, r = put_user(vcpu->arch.sie_block->ckc, (u64 __user *)reg->addr); break; + case KVM_REG_S390_PFTOKEN: + r = put_user(vcpu->arch.pfault_token, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFCOMPARE: + r = put_user(vcpu->arch.pfault_compare, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFSELECT: + r = put_user(vcpu->arch.pfault_select, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PP: + r = put_user(vcpu->arch.sie_block->pp, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_GBEA: + r = put_user(vcpu->arch.sie_block->gbea, + (u64 __user *)reg->addr); + break; default: break; } @@ -597,6 +669,26 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, r = get_user(vcpu->arch.sie_block->ckc, (u64 __user *)reg->addr); break; + case KVM_REG_S390_PFTOKEN: + r = get_user(vcpu->arch.pfault_token, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFCOMPARE: + r = get_user(vcpu->arch.pfault_compare, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFSELECT: + r = get_user(vcpu->arch.pfault_select, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PP: + r = get_user(vcpu->arch.sie_block->pp, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_GBEA: + r = get_user(vcpu->arch.sie_block->gbea, + (u64 __user *)reg->addr); + break; default: break; } @@ -715,10 +807,100 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) return 0; } +static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu) +{ + long rc; + hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap); + struct mm_struct *mm = current->mm; + down_read(&mm->mmap_sem); + rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL); + up_read(&mm->mmap_sem); + return rc; +} + +static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, + unsigned long token) +{ + struct kvm_s390_interrupt inti; + inti.parm64 = token; + + if (start_token) { + inti.type = KVM_S390_INT_PFAULT_INIT; + WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti)); + } else { + inti.type = KVM_S390_INT_PFAULT_DONE; + WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); + } +} + +void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ + trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); + __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); +} + +void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ + trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); + __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); +} + +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ + /* s390 will always inject the page directly */ +} + +bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) +{ + /* + * s390 will always inject the page directly, + * but we still want check_async_completion to cleanup + */ + return true; +} + +static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) +{ + hva_t hva; + struct kvm_arch_async_pf arch; + int rc; + + if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) + return 0; + if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != + vcpu->arch.pfault_compare) + return 0; + if (psw_extint_disabled(vcpu)) + return 0; + if (kvm_cpu_has_interrupt(vcpu)) + return 0; + if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) + return 0; + if (!vcpu->arch.gmap->pfault_enabled) + return 0; + + hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap); + if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8)) + return 0; + + rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); + return rc; +} + static int vcpu_pre_run(struct kvm_vcpu *vcpu) { int rc, cpuflags; + /* + * On s390 notifications for arriving pages will be delivered directly + * to the guest but the house keeping for completed pfaults is + * handled outside the worker. + */ + kvm_check_async_pf_completion(vcpu); + memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); if (need_resched()) @@ -744,7 +926,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) { - int rc; + int rc = -1; VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); @@ -758,7 +940,16 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) current->thread.gmap_addr; vcpu->run->s390_ucontrol.pgm_code = 0x10; rc = -EREMOTE; - } else { + + } else if (current->thread.gmap_pfault) { + trace_kvm_s390_major_guest_pfault(vcpu); + current->thread.gmap_pfault = 0; + if (kvm_arch_setup_async_pf(vcpu) || + (kvm_arch_fault_in_sync(vcpu) >= 0)) + rc = 0; + } + + if (rc == -1) { VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); trace_kvm_s390_sie_fault(vcpu); rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); @@ -768,7 +959,8 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) if (rc == 0) { if (kvm_is_ucontrol(vcpu->kvm)) - rc = -EOPNOTSUPP; + /* Don't exit for host interrupts. */ + rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; else rc = kvm_handle_sie_intercept(vcpu); } @@ -831,8 +1023,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); - BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL); - switch (kvm_run->exit_reason) { case KVM_EXIT_S390_SIEIC: case KVM_EXIT_UNKNOWN: diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 564514f410f4..3c1e2274d9ea 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -129,6 +129,7 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); +void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu); int __must_check kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int); int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, @@ -136,6 +137,7 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid); +int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); /* implemented in priv.c */ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); @@ -161,4 +163,9 @@ bool kvm_enabled_cmma(void); /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); +/* implemented in interrupt.c */ +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); +int psw_extint_disabled(struct kvm_vcpu *vcpu); +void kvm_s390_destroy_adapters(struct kvm *kvm); + #endif diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index aacb6b129914..476e9e218f43 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -396,15 +396,10 @@ static int handle_stidp(struct kvm_vcpu *vcpu) static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; int cpus = 0; int n; - spin_lock(&fi->lock); - for (n = 0; n < KVM_MAX_VCPUS; n++) - if (fi->local_int[n]) - cpus++; - spin_unlock(&fi->lock); + cpus = atomic_read(&vcpu->kvm->online_vcpus); /* deal with other level 3 hypervisors */ if (stsi(mem, 3, 2, 2)) diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 87c2b3a3bd3e..26caeb530a78 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -23,29 +23,30 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; + int cpuflags; int rc; if (cpu_addr >= KVM_MAX_VCPUS) return SIGP_CC_NOT_OPERATIONAL; - spin_lock(&fi->lock); - if (fi->local_int[cpu_addr] == NULL) - rc = SIGP_CC_NOT_OPERATIONAL; - else if (!(atomic_read(fi->local_int[cpu_addr]->cpuflags) - & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED))) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; + + cpuflags = atomic_read(li->cpuflags); + if (!(cpuflags & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED))) rc = SIGP_CC_ORDER_CODE_ACCEPTED; else { *reg &= 0xffffffff00000000UL; - if (atomic_read(fi->local_int[cpu_addr]->cpuflags) - & CPUSTAT_ECALL_PEND) + if (cpuflags & CPUSTAT_ECALL_PEND) *reg |= SIGP_STATUS_EXT_CALL_PENDING; - if (atomic_read(fi->local_int[cpu_addr]->cpuflags) - & CPUSTAT_STOPPED) + if (cpuflags & CPUSTAT_STOPPED) *reg |= SIGP_STATUS_STOPPED; rc = SIGP_CC_STATUS_STORED; } - spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); return rc; @@ -53,12 +54,13 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li; struct kvm_s390_interrupt_info *inti; - int rc; + struct kvm_vcpu *dst_vcpu = NULL; - if (cpu_addr >= KVM_MAX_VCPUS) + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; inti = kzalloc(sizeof(*inti), GFP_KERNEL); @@ -68,13 +70,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) inti->type = KVM_S390_INT_EMERGENCY; inti->emerg.code = vcpu->vcpu_id; - spin_lock(&fi->lock); - li = fi->local_int[cpu_addr]; - if (li == NULL) { - rc = SIGP_CC_NOT_OPERATIONAL; - kfree(inti); - goto unlock; - } + li = &dst_vcpu->arch.local_int; spin_lock_bh(&li->lock); list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); @@ -82,11 +78,9 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) if (waitqueue_active(li->wq)) wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); - rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); -unlock: - spin_unlock(&fi->lock); - return rc; + + return SIGP_CC_ORDER_CODE_ACCEPTED; } static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, @@ -122,12 +116,13 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li; struct kvm_s390_interrupt_info *inti; - int rc; + struct kvm_vcpu *dst_vcpu = NULL; - if (cpu_addr >= KVM_MAX_VCPUS) + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; inti = kzalloc(sizeof(*inti), GFP_KERNEL); @@ -137,13 +132,7 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) inti->type = KVM_S390_INT_EXTERNAL_CALL; inti->extcall.code = vcpu->vcpu_id; - spin_lock(&fi->lock); - li = fi->local_int[cpu_addr]; - if (li == NULL) { - rc = SIGP_CC_NOT_OPERATIONAL; - kfree(inti); - goto unlock; - } + li = &dst_vcpu->arch.local_int; spin_lock_bh(&li->lock); list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); @@ -151,11 +140,9 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) if (waitqueue_active(li->wq)) wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); - rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); -unlock: - spin_unlock(&fi->lock); - return rc; + + return SIGP_CC_ORDER_CODE_ACCEPTED; } static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) @@ -189,31 +176,26 @@ out: static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; int rc; if (cpu_addr >= KVM_MAX_VCPUS) return SIGP_CC_NOT_OPERATIONAL; - spin_lock(&fi->lock); - li = fi->local_int[cpu_addr]; - if (li == NULL) { - rc = SIGP_CC_NOT_OPERATIONAL; - goto unlock; - } + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; rc = __inject_sigp_stop(li, action); -unlock: - spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) { /* If the CPU has already been stopped, we still have * to save the status when doing stop-and-store. This * has to be done after unlocking all spinlocks. */ - struct kvm_vcpu *dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); rc = kvm_s390_store_status_unloaded(dst_vcpu, KVM_S390_STORE_STATUS_NOADDR); } @@ -224,6 +206,8 @@ unlock: static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) { int rc; + unsigned int i; + struct kvm_vcpu *v; switch (parameter & 0xff) { case 0: @@ -231,6 +215,11 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) break; case 1: case 2: + kvm_for_each_vcpu(i, v, vcpu->kvm) { + v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + kvm_clear_async_pf_completion_queue(v); + } + rc = SIGP_CC_ORDER_CODE_ACCEPTED; break; default: @@ -242,12 +231,18 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, u64 *reg) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; - struct kvm_s390_local_interrupt *li = NULL; + struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; struct kvm_s390_interrupt_info *inti; int rc; u8 tmp; + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; + /* make sure that the new value is valid memory */ address = address & 0x7fffe000u; if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || @@ -261,18 +256,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, if (!inti) return SIGP_CC_BUSY; - spin_lock(&fi->lock); - if (cpu_addr < KVM_MAX_VCPUS) - li = fi->local_int[cpu_addr]; - - if (li == NULL) { - *reg &= 0xffffffff00000000UL; - *reg |= SIGP_STATUS_INCORRECT_STATE; - rc = SIGP_CC_STATUS_STORED; - kfree(inti); - goto out_fi; - } - spin_lock_bh(&li->lock); /* cpu must be in stopped state */ if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { @@ -295,8 +278,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); out_li: spin_unlock_bh(&li->lock); -out_fi: - spin_unlock(&fi->lock); return rc; } @@ -334,28 +315,26 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id, static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) { + struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; int rc; - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; if (cpu_addr >= KVM_MAX_VCPUS) return SIGP_CC_NOT_OPERATIONAL; - spin_lock(&fi->lock); - if (fi->local_int[cpu_addr] == NULL) - rc = SIGP_CC_NOT_OPERATIONAL; - else { - if (atomic_read(fi->local_int[cpu_addr]->cpuflags) - & CPUSTAT_RUNNING) { - /* running */ - rc = SIGP_CC_ORDER_CODE_ACCEPTED; - } else { - /* not running */ - *reg &= 0xffffffff00000000UL; - *reg |= SIGP_STATUS_NOT_RUNNING; - rc = SIGP_CC_STATUS_STORED; - } + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; + if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { + /* running */ + rc = SIGP_CC_ORDER_CODE_ACCEPTED; + } else { + /* not running */ + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_NOT_RUNNING; + rc = SIGP_CC_STATUS_STORED; } - spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr, rc); @@ -366,26 +345,22 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, /* Test whether the destination CPU is available and not busy */ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li; int rc = SIGP_CC_ORDER_CODE_ACCEPTED; + struct kvm_vcpu *dst_vcpu = NULL; if (cpu_addr >= KVM_MAX_VCPUS) return SIGP_CC_NOT_OPERATIONAL; - spin_lock(&fi->lock); - li = fi->local_int[cpu_addr]; - if (li == NULL) { - rc = SIGP_CC_NOT_OPERATIONAL; - goto out; - } - + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; spin_lock_bh(&li->lock); if (li->action_bits & ACTION_STOP_ON_STOP) rc = SIGP_CC_BUSY; spin_unlock_bh(&li->lock); -out: - spin_unlock(&fi->lock); + return rc; } diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index 3db76b2daed7..e8e7213d4cc5 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -30,6 +30,52 @@ TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \ __entry->pswmask, __entry->pswaddr, p_args) +TRACE_EVENT(kvm_s390_major_guest_pfault, + TP_PROTO(VCPU_PROTO_COMMON), + TP_ARGS(VCPU_ARGS_COMMON), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + ), + VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault") + ); + +TRACE_EVENT(kvm_s390_pfault_init, + TP_PROTO(VCPU_PROTO_COMMON, long pfault_token), + TP_ARGS(VCPU_ARGS_COMMON, pfault_token), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(long, pfault_token) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->pfault_token = pfault_token; + ), + VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token) + ); + +TRACE_EVENT(kvm_s390_pfault_done, + TP_PROTO(VCPU_PROTO_COMMON, long pfault_token), + TP_ARGS(VCPU_ARGS_COMMON, pfault_token), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(long, pfault_token) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->pfault_token = pfault_token; + ), + VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token) + ); + /* * Tracepoints for SIE entry and exit. */ diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index d95265b2719f..88cef505453b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -50,6 +50,7 @@ #define VM_FAULT_BADMAP 0x020000 #define VM_FAULT_BADACCESS 0x040000 #define VM_FAULT_SIGNAL 0x080000 +#define VM_FAULT_PFAULT 0x100000 static unsigned long store_indication __read_mostly; @@ -227,6 +228,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault) return; } case VM_FAULT_BADCONTEXT: + case VM_FAULT_PFAULT: do_no_context(regs); break; case VM_FAULT_SIGNAL: @@ -264,6 +266,9 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault) */ static inline int do_exception(struct pt_regs *regs, int access) { +#ifdef CONFIG_PGSTE + struct gmap *gmap; +#endif struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; @@ -304,9 +309,10 @@ static inline int do_exception(struct pt_regs *regs, int access) down_read(&mm->mmap_sem); #ifdef CONFIG_PGSTE - if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { - address = __gmap_fault(address, - (struct gmap *) S390_lowcore.gmap); + gmap = (struct gmap *) + ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0); + if (gmap) { + address = __gmap_fault(address, gmap); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; goto out_up; @@ -315,6 +321,8 @@ static inline int do_exception(struct pt_regs *regs, int access) fault = VM_FAULT_OOM; goto out_up; } + if (gmap->pfault_enabled) + flags |= FAULT_FLAG_RETRY_NOWAIT; } #endif @@ -371,9 +379,19 @@ retry: regs, address); } if (fault & VM_FAULT_RETRY) { +#ifdef CONFIG_PGSTE + if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { + /* FAULT_FLAG_RETRY_NOWAIT has been set, + * mmap_sem has not been released */ + current->thread.gmap_pfault = 1; + fault = VM_FAULT_PFAULT; + goto out_up; + } +#endif /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags &= ~(FAULT_FLAG_ALLOW_RETRY | + FAULT_FLAG_RETRY_NOWAIT); flags |= FAULT_FLAG_TRIED; down_read(&mm->mmap_sem); goto retry; |