summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/feature-removal-schedule.txt7
-rw-r--r--Documentation/virtual/kvm/api.txt281
-rw-r--r--Documentation/virtual/kvm/cpuid.txt6
-rw-r--r--Documentation/virtual/kvm/msr.txt4
-rw-r--r--arch/alpha/include/asm/kvm_para.h1
-rw-r--r--arch/arm/include/asm/kvm_para.h1
-rw-r--r--arch/avr32/include/asm/kvm_para.h1
-rw-r--r--arch/blackfin/include/asm/kvm_para.h1
-rw-r--r--arch/c6x/include/asm/kvm_para.h1
-rw-r--r--arch/frv/include/asm/kvm_para.h1
-rw-r--r--arch/h8300/include/asm/kvm_para.h1
-rw-r--r--arch/hexagon/include/asm/kvm_para.h1
-rw-r--r--arch/ia64/include/asm/kvm_host.h3
-rw-r--r--arch/ia64/include/asm/kvm_para.h5
-rw-r--r--arch/ia64/kvm/kvm-ia64.c30
-rw-r--r--arch/m68k/include/asm/kvm_para.h1
-rw-r--r--arch/microblaze/include/asm/kvm_para.h1
-rw-r--r--arch/mips/include/asm/kvm_para.h1
-rw-r--r--arch/mn10300/include/asm/kvm_para.h1
-rw-r--r--arch/openrisc/include/asm/kvm_para.h1
-rw-r--r--arch/parisc/include/asm/kvm_para.h1
-rw-r--r--arch/powerpc/include/asm/cputable.h23
-rw-r--r--arch/powerpc/include/asm/dbell.h3
-rw-r--r--arch/powerpc/include/asm/hvcall.h10
-rw-r--r--arch/powerpc/include/asm/hw_irq.h1
-rw-r--r--arch/powerpc/include/asm/kvm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h18
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h3
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h8
-rw-r--r--arch/powerpc/include/asm/kvm_booke.h3
-rw-r--r--arch/powerpc/include/asm/kvm_booke_hv_asm.h49
-rw-r--r--arch/powerpc/include/asm/kvm_e500.h96
-rw-r--r--arch/powerpc/include/asm/kvm_host.h60
-rw-r--r--arch/powerpc/include/asm/kvm_para.h5
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h20
-rw-r--r--arch/powerpc/include/asm/mmu-book3e.h6
-rw-r--r--arch/powerpc/include/asm/processor.h3
-rw-r--r--arch/powerpc/include/asm/reg.h2
-rw-r--r--arch/powerpc/include/asm/reg_booke.h34
-rw-r--r--arch/powerpc/include/asm/switch_to.h1
-rw-r--r--arch/powerpc/include/asm/time.h1
-rw-r--r--arch/powerpc/kernel/asm-offsets.c19
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S1
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S12
-rw-r--r--arch/powerpc/kernel/head_44x.S23
-rw-r--r--arch/powerpc/kernel/head_booke.h69
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S98
-rw-r--r--arch/powerpc/kernel/idle_power7.S7
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c4
-rw-r--r--arch/powerpc/kernel/time.c3
-rw-r--r--arch/powerpc/kvm/44x.c12
-rw-r--r--arch/powerpc/kvm/44x_emulate.c51
-rw-r--r--arch/powerpc/kvm/Kconfig28
-rw-r--r--arch/powerpc/kvm/Makefile17
-rw-r--r--arch/powerpc/kvm/book3s.c7
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c31
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c150
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c3
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c106
-rw-r--r--arch/powerpc/kvm/book3s_hv.c467
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S9
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S185
-rw-r--r--arch/powerpc/kvm/book3s_pr.c59
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c101
-rw-r--r--arch/powerpc/kvm/book3s_segment.S13
-rw-r--r--arch/powerpc/kvm/booke.c471
-rw-r--r--arch/powerpc/kvm/booke.h62
-rw-r--r--arch/powerpc/kvm/booke_emulate.c118
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S8
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S597
-rw-r--r--arch/powerpc/kvm/e500.c372
-rw-r--r--arch/powerpc/kvm/e500.h306
-rw-r--r--arch/powerpc/kvm/e500_emulate.c210
-rw-r--r--arch/powerpc/kvm/e500_tlb.c666
-rw-r--r--arch/powerpc/kvm/e500_tlb.h174
-rw-r--r--arch/powerpc/kvm/e500mc.c342
-rw-r--r--arch/powerpc/kvm/emulate.c197
-rw-r--r--arch/powerpc/kvm/powerpc.c94
-rw-r--r--arch/powerpc/kvm/timing.h6
-rw-r--r--arch/s390/include/asm/kvm.h5
-rw-r--r--arch/s390/include/asm/kvm_host.h1
-rw-r--r--arch/s390/include/asm/kvm_para.h5
-rw-r--r--arch/s390/include/asm/sclp.h1
-rw-r--r--arch/s390/kvm/diag.c29
-rw-r--r--arch/s390/kvm/intercept.c1
-rw-r--r--arch/s390/kvm/kvm-s390.c87
-rw-r--r--arch/s390/kvm/kvm-s390.h1
-rw-r--r--arch/s390/kvm/priv.c31
-rw-r--r--arch/score/include/asm/kvm_para.h1
-rw-r--r--arch/sh/include/asm/kvm_para.h1
-rw-r--r--arch/sparc/include/asm/kvm_para.h1
-rw-r--r--arch/tile/include/asm/kvm_para.h1
-rw-r--r--arch/um/include/asm/kvm_para.h1
-rw-r--r--arch/unicore32/include/asm/kvm_para.h1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h13
-rw-r--r--arch/x86/include/asm/kvm_para.h24
-rw-r--r--arch/x86/include/asm/pvclock-abi.h1
-rw-r--r--arch/x86/kernel/kvmclock.c20
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/cpuid.c5
-rw-r--r--arch/x86/kvm/emulate.c293
-rw-r--r--arch/x86/kvm/i8254.c31
-rw-r--r--arch/x86/kvm/i8254.h7
-rw-r--r--arch/x86/kvm/lapic.c31
-rw-r--r--arch/x86/kvm/mmu.c345
-rw-r--r--arch/x86/kvm/mmu_audit.c10
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/svm.c9
-rw-r--r--arch/x86/kvm/vmx.c41
-rw-r--r--arch/x86/kvm/x86.c280
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/xtensa/include/asm/kvm_para.h1
-rw-r--r--drivers/s390/char/sclp_cmd.c12
-rw-r--r--include/asm-generic/kvm_para.h22
-rw-r--r--include/linux/kvm.h42
-rw-r--r--include/linux/kvm_host.h55
-rw-r--r--kernel/watchdog.c12
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/ioapic.c10
-rw-r--r--virt/kvm/ioapic.h1
-rw-r--r--virt/kvm/irq_comm.c14
-rw-r--r--virt/kvm/kvm_main.c132
124 files changed, 5419 insertions, 1968 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 50d82ae09e2a..4ba1eb7590a7 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -588,3 +588,10 @@ Why: Remount currently allows changing bound subsystems and
replaced with conventional fsnotify.
----------------------------
+
+What: KVM debugfs statistics
+When: 2013
+Why: KVM tracepoints provide mostly equivalent information in a much more
+ flexible fashion.
+
+----------------------------
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 6386f8c0482e..930126698a0f 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2,6 +2,7 @@ The Definitive KVM (Kernel-based Virtual Machine) API Documentation
===================================================================
1. General description
+----------------------
The kvm API is a set of ioctls that are issued to control various aspects
of a virtual machine. The ioctls belong to three classes
@@ -23,7 +24,9 @@ of a virtual machine. The ioctls belong to three classes
Only run vcpu ioctls from the same thread that was used to create the
vcpu.
+
2. File descriptors
+-------------------
The kvm API is centered around file descriptors. An initial
open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
@@ -41,7 +44,9 @@ not cause harm to the host, their actual behavior is not guaranteed by
the API. The only supported use is one virtual machine per process,
and one vcpu per thread.
+
3. Extensions
+-------------
As of Linux 2.6.22, the KVM ABI has been stabilized: no backward
incompatible change are allowed. However, there is an extension
@@ -53,7 +58,9 @@ Instead, kvm defines extension identifiers and a facility to query
whether a particular extension identifier is available. If it is, a
set of ioctls is available for application use.
+
4. API description
+------------------
This section describes ioctls that can be used to control kvm guests.
For each ioctl, the following information is provided along with a
@@ -75,6 +82,7 @@ description:
Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL)
are not detailed, but errors with specific meanings are.
+
4.1 KVM_GET_API_VERSION
Capability: basic
@@ -90,6 +98,7 @@ supported. Applications should refuse to run if KVM_GET_API_VERSION
returns a value other than 12. If this check passes, all ioctls
described as 'basic' will be available.
+
4.2 KVM_CREATE_VM
Capability: basic
@@ -109,6 +118,7 @@ In order to create user controlled virtual machines on S390, check
KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as
privileged user (CAP_SYS_ADMIN).
+
4.3 KVM_GET_MSR_INDEX_LIST
Capability: basic
@@ -135,6 +145,7 @@ Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are
not returned in the MSR list, as different vcpus can have a different number
of banks, as set via the KVM_X86_SETUP_MCE ioctl.
+
4.4 KVM_CHECK_EXTENSION
Capability: basic
@@ -149,6 +160,7 @@ receives an integer that describes the extension availability.
Generally 0 means no and 1 means yes, but some extensions may report
additional information in the integer return value.
+
4.5 KVM_GET_VCPU_MMAP_SIZE
Capability: basic
@@ -161,6 +173,7 @@ The KVM_RUN ioctl (cf.) communicates with userspace via a shared
memory region. This ioctl returns the size of that region. See the
KVM_RUN documentation for details.
+
4.6 KVM_SET_MEMORY_REGION
Capability: basic
@@ -171,6 +184,7 @@ Returns: 0 on success, -1 on error
This ioctl is obsolete and has been removed.
+
4.7 KVM_CREATE_VCPU
Capability: basic
@@ -223,6 +237,7 @@ machines, the resulting vcpu fd can be memory mapped at page offset
KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual
cpu's hardware control block.
+
4.8 KVM_GET_DIRTY_LOG (vm ioctl)
Capability: basic
@@ -246,6 +261,7 @@ since the last call to this ioctl. Bit 0 is the first page in the
memory slot. Ensure the entire structure is cleared to avoid padding
issues.
+
4.9 KVM_SET_MEMORY_ALIAS
Capability: basic
@@ -256,6 +272,7 @@ Returns: 0 (success), -1 (error)
This ioctl is obsolete and has been removed.
+
4.10 KVM_RUN
Capability: basic
@@ -272,6 +289,7 @@ obtained by mmap()ing the vcpu fd at offset 0, with the size given by
KVM_GET_VCPU_MMAP_SIZE. The parameter block is formatted as a 'struct
kvm_run' (see below).
+
4.11 KVM_GET_REGS
Capability: basic
@@ -292,6 +310,7 @@ struct kvm_regs {
__u64 rip, rflags;
};
+
4.12 KVM_SET_REGS
Capability: basic
@@ -304,6 +323,7 @@ Writes the general purpose registers into the vcpu.
See KVM_GET_REGS for the data structure.
+
4.13 KVM_GET_SREGS
Capability: basic
@@ -331,6 +351,7 @@ interrupt_bitmap is a bitmap of pending external interrupts. At most
one bit may be set. This interrupt has been acknowledged by the APIC
but not yet injected into the cpu core.
+
4.14 KVM_SET_SREGS
Capability: basic
@@ -342,6 +363,7 @@ Returns: 0 on success, -1 on error
Writes special registers into the vcpu. See KVM_GET_SREGS for the
data structures.
+
4.15 KVM_TRANSLATE
Capability: basic
@@ -365,6 +387,7 @@ struct kvm_translation {
__u8 pad[5];
};
+
4.16 KVM_INTERRUPT
Capability: basic
@@ -413,6 +436,7 @@ c) KVM_INTERRUPT_SET_LEVEL
Note that any value for 'irq' other than the ones stated above is invalid
and incurs unexpected behavior.
+
4.17 KVM_DEBUG_GUEST
Capability: basic
@@ -423,6 +447,7 @@ Returns: -1 on error
Support for this has been removed. Use KVM_SET_GUEST_DEBUG instead.
+
4.18 KVM_GET_MSRS
Capability: basic
@@ -451,6 +476,7 @@ Application code should set the 'nmsrs' member (which indicates the
size of the entries array) and the 'index' member of each array entry.
kvm will fill in the 'data' member.
+
4.19 KVM_SET_MSRS
Capability: basic
@@ -466,6 +492,7 @@ Application code should set the 'nmsrs' member (which indicates the
size of the entries array), and the 'index' and 'data' members of each
array entry.
+
4.20 KVM_SET_CPUID
Capability: basic
@@ -494,6 +521,7 @@ struct kvm_cpuid {
struct kvm_cpuid_entry entries[0];
};
+
4.21 KVM_SET_SIGNAL_MASK
Capability: basic
@@ -516,6 +544,7 @@ struct kvm_signal_mask {
__u8 sigset[0];
};
+
4.22 KVM_GET_FPU
Capability: basic
@@ -541,6 +570,7 @@ struct kvm_fpu {
__u32 pad2;
};
+
4.23 KVM_SET_FPU
Capability: basic
@@ -566,6 +596,7 @@ struct kvm_fpu {
__u32 pad2;
};
+
4.24 KVM_CREATE_IRQCHIP
Capability: KVM_CAP_IRQCHIP
@@ -579,6 +610,7 @@ ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
only go to the IOAPIC. On ia64, a IOSAPIC is created.
+
4.25 KVM_IRQ_LINE
Capability: KVM_CAP_IRQCHIP
@@ -600,6 +632,7 @@ struct kvm_irq_level {
__u32 level; /* 0 or 1 */
};
+
4.26 KVM_GET_IRQCHIP
Capability: KVM_CAP_IRQCHIP
@@ -621,6 +654,7 @@ struct kvm_irqchip {
} chip;
};
+
4.27 KVM_SET_IRQCHIP
Capability: KVM_CAP_IRQCHIP
@@ -642,6 +676,7 @@ struct kvm_irqchip {
} chip;
};
+
4.28 KVM_XEN_HVM_CONFIG
Capability: KVM_CAP_XEN_HVM
@@ -666,6 +701,7 @@ struct kvm_xen_hvm_config {
__u8 pad2[30];
};
+
4.29 KVM_GET_CLOCK
Capability: KVM_CAP_ADJUST_CLOCK
@@ -684,6 +720,7 @@ struct kvm_clock_data {
__u32 pad[9];
};
+
4.30 KVM_SET_CLOCK
Capability: KVM_CAP_ADJUST_CLOCK
@@ -702,6 +739,7 @@ struct kvm_clock_data {
__u32 pad[9];
};
+
4.31 KVM_GET_VCPU_EVENTS
Capability: KVM_CAP_VCPU_EVENTS
@@ -741,6 +779,7 @@ struct kvm_vcpu_events {
KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that
interrupt.shadow contains a valid state. Otherwise, this field is undefined.
+
4.32 KVM_SET_VCPU_EVENTS
Capability: KVM_CAP_VCPU_EVENTS
@@ -767,6 +806,7 @@ If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in
the flags field to signal that interrupt.shadow contains a valid state and
shall be written into the VCPU.
+
4.33 KVM_GET_DEBUGREGS
Capability: KVM_CAP_DEBUGREGS
@@ -785,6 +825,7 @@ struct kvm_debugregs {
__u64 reserved[9];
};
+
4.34 KVM_SET_DEBUGREGS
Capability: KVM_CAP_DEBUGREGS
@@ -798,6 +839,7 @@ Writes debug registers into the vcpu.
See KVM_GET_DEBUGREGS for the data structure. The flags field is unused
yet and must be cleared on entry.
+
4.35 KVM_SET_USER_MEMORY_REGION
Capability: KVM_CAP_USER_MEM
@@ -844,6 +886,7 @@ It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl.
The KVM_SET_MEMORY_REGION does not allow fine grained control over memory
allocation and is deprecated.
+
4.36 KVM_SET_TSS_ADDR
Capability: KVM_CAP_SET_TSS_ADDR
@@ -862,6 +905,7 @@ This ioctl is required on Intel-based hosts. This is needed on Intel hardware
because of a quirk in the virtualization implementation (see the internals
documentation when it pops into existence).
+
4.37 KVM_ENABLE_CAP
Capability: KVM_CAP_ENABLE_CAP
@@ -897,6 +941,7 @@ function properly, this is the place to put them.
__u8 pad[64];
};
+
4.38 KVM_GET_MP_STATE
Capability: KVM_CAP_MP_STATE
@@ -927,6 +972,7 @@ Possible values are:
This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel
irqchip, the multiprocessing state must be maintained by userspace.
+
4.39 KVM_SET_MP_STATE
Capability: KVM_CAP_MP_STATE
@@ -941,6 +987,7 @@ arguments.
This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel
irqchip, the multiprocessing state must be maintained by userspace.
+
4.40 KVM_SET_IDENTITY_MAP_ADDR
Capability: KVM_CAP_SET_IDENTITY_MAP_ADDR
@@ -959,6 +1006,7 @@ This ioctl is required on Intel-based hosts. This is needed on Intel hardware
because of a quirk in the virtualization implementation (see the internals
documentation when it pops into existence).
+
4.41 KVM_SET_BOOT_CPU_ID
Capability: KVM_CAP_SET_BOOT_CPU_ID
@@ -971,6 +1019,7 @@ Define which vcpu is the Bootstrap Processor (BSP). Values are the same
as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default
is vcpu 0.
+
4.42 KVM_GET_XSAVE
Capability: KVM_CAP_XSAVE
@@ -985,6 +1034,7 @@ struct kvm_xsave {
This ioctl would copy current vcpu's xsave struct to the userspace.
+
4.43 KVM_SET_XSAVE
Capability: KVM_CAP_XSAVE
@@ -999,6 +1049,7 @@ struct kvm_xsave {
This ioctl would copy userspace's xsave struct to the kernel.
+
4.44 KVM_GET_XCRS
Capability: KVM_CAP_XCRS
@@ -1022,6 +1073,7 @@ struct kvm_xcrs {
This ioctl would copy current vcpu's xcrs to the userspace.
+
4.45 KVM_SET_XCRS
Capability: KVM_CAP_XCRS
@@ -1045,6 +1097,7 @@ struct kvm_xcrs {
This ioctl would set vcpu's xcr to the value userspace specified.
+
4.46 KVM_GET_SUPPORTED_CPUID
Capability: KVM_CAP_EXT_CPUID
@@ -1119,6 +1172,7 @@ support. Instead it is reported via
if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the
feature in userspace, then you can enable the feature for KVM_SET_CPUID2.
+
4.47 KVM_PPC_GET_PVINFO
Capability: KVM_CAP_PPC_GET_PVINFO
@@ -1142,6 +1196,7 @@ of 4 instructions that make up a hypercall.
If any additional field gets added to this structure later on, a bit for that
additional piece of information will be set in the flags bitmap.
+
4.48 KVM_ASSIGN_PCI_DEVICE
Capability: KVM_CAP_DEVICE_ASSIGNMENT
@@ -1185,6 +1240,7 @@ Only PCI header type 0 devices with PCI BAR resources are supported by
device assignment. The user requesting this ioctl must have read/write
access to the PCI sysfs resource files associated with the device.
+
4.49 KVM_DEASSIGN_PCI_DEVICE
Capability: KVM_CAP_DEVICE_DEASSIGNMENT
@@ -1198,6 +1254,7 @@ Ends PCI device assignment, releasing all associated resources.
See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is
used in kvm_assigned_pci_dev to identify the device.
+
4.50 KVM_ASSIGN_DEV_IRQ
Capability: KVM_CAP_ASSIGN_DEV_IRQ
@@ -1231,6 +1288,7 @@ The following flags are defined:
It is not valid to specify multiple types per host or guest IRQ. However, the
IRQ type of host and guest can differ or can even be null.
+
4.51 KVM_DEASSIGN_DEV_IRQ
Capability: KVM_CAP_ASSIGN_DEV_IRQ
@@ -1245,6 +1303,7 @@ See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified
by assigned_dev_id, flags must correspond to the IRQ type specified on
KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
+
4.52 KVM_SET_GSI_ROUTING
Capability: KVM_CAP_IRQ_ROUTING
@@ -1293,6 +1352,7 @@ struct kvm_irq_routing_msi {
__u32 pad;
};
+
4.53 KVM_ASSIGN_SET_MSIX_NR
Capability: KVM_CAP_DEVICE_MSIX
@@ -1314,6 +1374,7 @@ struct kvm_assigned_msix_nr {
#define KVM_MAX_MSIX_PER_DEV 256
+
4.54 KVM_ASSIGN_SET_MSIX_ENTRY
Capability: KVM_CAP_DEVICE_MSIX
@@ -1332,7 +1393,8 @@ struct kvm_assigned_msix_entry {
__u16 padding[3];
};
-4.54 KVM_SET_TSC_KHZ
+
+4.55 KVM_SET_TSC_KHZ
Capability: KVM_CAP_TSC_CONTROL
Architectures: x86
@@ -1343,7 +1405,8 @@ Returns: 0 on success, -1 on error
Specifies the tsc frequency for the virtual machine. The unit of the
frequency is KHz.
-4.55 KVM_GET_TSC_KHZ
+
+4.56 KVM_GET_TSC_KHZ
Capability: KVM_CAP_GET_TSC_KHZ
Architectures: x86
@@ -1355,7 +1418,8 @@ Returns the tsc frequency of the guest. The unit of the return value is
KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
error.
-4.56 KVM_GET_LAPIC
+
+4.57 KVM_GET_LAPIC
Capability: KVM_CAP_IRQCHIP
Architectures: x86
@@ -1371,7 +1435,8 @@ struct kvm_lapic_state {
Reads the Local APIC registers and copies them into the input argument. The
data format and layout are the same as documented in the architecture manual.
-4.57 KVM_SET_LAPIC
+
+4.58 KVM_SET_LAPIC
Capability: KVM_CAP_IRQCHIP
Architectures: x86
@@ -1387,7 +1452,8 @@ struct kvm_lapic_state {
Copies the input argument into the the Local APIC registers. The data format
and layout are the same as documented in the architecture manual.
-4.58 KVM_IOEVENTFD
+
+4.59 KVM_IOEVENTFD
Capability: KVM_CAP_IOEVENTFD
Architectures: all
@@ -1417,7 +1483,8 @@ The following flags are defined:
If datamatch flag is set, the event will be signaled only if the written value
to the registered address is equal to datamatch in struct kvm_ioeventfd.
-4.59 KVM_DIRTY_TLB
+
+4.60 KVM_DIRTY_TLB
Capability: KVM_CAP_SW_TLB
Architectures: ppc
@@ -1449,7 +1516,8 @@ The "num_dirty" field is a performance hint for KVM to determine whether it
should skip processing the bitmap and just invalidate everything. It must
be set to the number of set bits in the bitmap.
-4.60 KVM_ASSIGN_SET_INTX_MASK
+
+4.61 KVM_ASSIGN_SET_INTX_MASK
Capability: KVM_CAP_PCI_2_3
Architectures: x86
@@ -1482,6 +1550,7 @@ See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified
by assigned_dev_id. In the flags field, only KVM_DEV_ASSIGN_MASK_INTX is
evaluated.
+
4.62 KVM_CREATE_SPAPR_TCE
Capability: KVM_CAP_SPAPR_TCE
@@ -1517,6 +1586,7 @@ the entries written by kernel-handled H_PUT_TCE calls, and also lets
userspace update the TCE table directly which is useful in some
circumstances.
+
4.63 KVM_ALLOCATE_RMA
Capability: KVM_CAP_PPC_RMA
@@ -1549,6 +1619,7 @@ is supported; 2 if the processor requires all virtual machines to have
an RMA, or 1 if the processor can use an RMA but doesn't require it,
because it supports the Virtual RMA (VRMA) facility.
+
4.64 KVM_NMI
Capability: KVM_CAP_USER_NMI
@@ -1574,6 +1645,7 @@ following algorithm:
Some guests configure the LINT1 NMI input to cause a panic, aiding in
debugging.
+
4.65 KVM_S390_UCAS_MAP
Capability: KVM_CAP_S390_UCONTROL
@@ -1593,6 +1665,7 @@ This ioctl maps the memory at "user_addr" with the length "length" to
the vcpu's address space starting at "vcpu_addr". All parameters need to
be alligned by 1 megabyte.
+
4.66 KVM_S390_UCAS_UNMAP
Capability: KVM_CAP_S390_UCONTROL
@@ -1612,6 +1685,7 @@ This ioctl unmaps the memory in the vcpu's address space starting at
"vcpu_addr" with the length "length". The field "user_addr" is ignored.
All parameters need to be alligned by 1 megabyte.
+
4.67 KVM_S390_VCPU_FAULT
Capability: KVM_CAP_S390_UCONTROL
@@ -1628,6 +1702,7 @@ table upfront. This is useful to handle validity intercepts for user
controlled virtual machines to fault in the virtual cpu's lowcore pages
prior to calling the KVM_RUN ioctl.
+
4.68 KVM_SET_ONE_REG
Capability: KVM_CAP_ONE_REG
@@ -1653,6 +1728,7 @@ registers, find a list below:
| |
PPC | KVM_REG_PPC_HIOR | 64
+
4.69 KVM_GET_ONE_REG
Capability: KVM_CAP_ONE_REG
@@ -1669,7 +1745,193 @@ at the memory location pointed to by "addr".
The list of registers accessible using this interface is identical to the
list in 4.64.
+
+4.70 KVM_KVMCLOCK_CTRL
+
+Capability: KVM_CAP_KVMCLOCK_CTRL
+Architectures: Any that implement pvclocks (currently x86 only)
+Type: vcpu ioctl
+Parameters: None
+Returns: 0 on success, -1 on error
+
+This signals to the host kernel that the specified guest is being paused by
+userspace. The host will set a flag in the pvclock structure that is checked
+from the soft lockup watchdog. The flag is part of the pvclock structure that
+is shared between guest and host, specifically the second bit of the flags
+field of the pvclock_vcpu_time_info structure. It will be set exclusively by
+the host and read/cleared exclusively by the guest. The guest operation of
+checking and clearing the flag must an atomic operation so
+load-link/store-conditional, or equivalent must be used. There are two cases
+where the guest will clear the flag: when the soft lockup watchdog timer resets
+itself or when a soft lockup is detected. This ioctl can be called any time
+after pausing the vcpu, but before it is resumed.
+
+
+4.71 KVM_SIGNAL_MSI
+
+Capability: KVM_CAP_SIGNAL_MSI
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_msi (in)
+Returns: >0 on delivery, 0 if guest blocked the MSI, and -1 on error
+
+Directly inject a MSI message. Only valid with in-kernel irqchip that handles
+MSI messages.
+
+struct kvm_msi {
+ __u32 address_lo;
+ __u32 address_hi;
+ __u32 data;
+ __u32 flags;
+ __u8 pad[16];
+};
+
+No flags are defined so far. The corresponding field must be 0.
+
+
+4.71 KVM_CREATE_PIT2
+
+Capability: KVM_CAP_PIT2
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_pit_config (in)
+Returns: 0 on success, -1 on error
+
+Creates an in-kernel device model for the i8254 PIT. This call is only valid
+after enabling in-kernel irqchip support via KVM_CREATE_IRQCHIP. The following
+parameters have to be passed:
+
+struct kvm_pit_config {
+ __u32 flags;
+ __u32 pad[15];
+};
+
+Valid flags are:
+
+#define KVM_PIT_SPEAKER_DUMMY 1 /* emulate speaker port stub */
+
+PIT timer interrupts may use a per-VM kernel thread for injection. If it
+exists, this thread will have a name of the following pattern:
+
+kvm-pit/<owner-process-pid>
+
+When running a guest with elevated priorities, the scheduling parameters of
+this thread may have to be adjusted accordingly.
+
+This IOCTL replaces the obsolete KVM_CREATE_PIT.
+
+
+4.72 KVM_GET_PIT2
+
+Capability: KVM_CAP_PIT_STATE2
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_pit_state2 (out)
+Returns: 0 on success, -1 on error
+
+Retrieves the state of the in-kernel PIT model. Only valid after
+KVM_CREATE_PIT2. The state is returned in the following structure:
+
+struct kvm_pit_state2 {
+ struct kvm_pit_channel_state channels[3];
+ __u32 flags;
+ __u32 reserved[9];
+};
+
+Valid flags are:
+
+/* disable PIT in HPET legacy mode */
+#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
+
+This IOCTL replaces the obsolete KVM_GET_PIT.
+
+
+4.73 KVM_SET_PIT2
+
+Capability: KVM_CAP_PIT_STATE2
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_pit_state2 (in)
+Returns: 0 on success, -1 on error
+
+Sets the state of the in-kernel PIT model. Only valid after KVM_CREATE_PIT2.
+See KVM_GET_PIT2 for details on struct kvm_pit_state2.
+
+This IOCTL replaces the obsolete KVM_SET_PIT.
+
+
+4.74 KVM_PPC_GET_SMMU_INFO
+
+Capability: KVM_CAP_PPC_GET_SMMU_INFO
+Architectures: powerpc
+Type: vm ioctl
+Parameters: None
+Returns: 0 on success, -1 on error
+
+This populates and returns a structure describing the features of
+the "Server" class MMU emulation supported by KVM.
+This can in turn be used by userspace to generate the appropariate
+device-tree properties for the guest operating system.
+
+The structure contains some global informations, followed by an
+array of supported segment page sizes:
+
+ struct kvm_ppc_smmu_info {
+ __u64 flags;
+ __u32 slb_size;
+ __u32 pad;
+ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
+ };
+
+The supported flags are:
+
+ - KVM_PPC_PAGE_SIZES_REAL:
+ When that flag is set, guest page sizes must "fit" the backing
+ store page sizes. When not set, any page size in the list can
+ be used regardless of how they are backed by userspace.
+
+ - KVM_PPC_1T_SEGMENTS
+ The emulated MMU supports 1T segments in addition to the
+ standard 256M ones.
+
+The "slb_size" field indicates how many SLB entries are supported
+
+The "sps" array contains 8 entries indicating the supported base
+page sizes for a segment in increasing order. Each entry is defined
+as follow:
+
+ struct kvm_ppc_one_seg_page_size {
+ __u32 page_shift; /* Base page shift of segment (or 0) */
+ __u32 slb_enc; /* SLB encoding for BookS */
+ struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
+ };
+
+An entry with a "page_shift" of 0 is unused. Because the array is
+organized in increasing order, a lookup can stop when encoutering
+such an entry.
+
+The "slb_enc" field provides the encoding to use in the SLB for the
+page size. The bits are in positions such as the value can directly
+be OR'ed into the "vsid" argument of the slbmte instruction.
+
+The "enc" array is a list which for each of those segment base page
+size provides the list of supported actual page sizes (which can be
+only larger or equal to the base page size), along with the
+corresponding encoding in the hash PTE. Similarily, the array is
+8 entries sorted by increasing sizes and an entry with a "0" shift
+is an empty entry and a terminator:
+
+ struct kvm_ppc_one_page_size {
+ __u32 page_shift; /* Page shift (or 0) */
+ __u32 pte_enc; /* Encoding in the HPTE (>>12) */
+ };
+
+The "pte_enc" field provides a value that can OR'ed into the hash
+PTE's RPN field (ie, it needs to be shifted left by 12 to OR it
+into the hash PTE second double word).
+
5. The kvm_run structure
+------------------------
Application code obtains a pointer to the kvm_run structure by
mmap()ing a vcpu fd. From that point, application code can control
@@ -1910,7 +2172,9 @@ and usually define the validity of a groups of registers. (e.g. one bit
};
+
6. Capabilities that can be enabled
+-----------------------------------
There are certain capabilities that change the behavior of the virtual CPU when
enabled. To enable them, please see section 4.37. Below you can find a list of
@@ -1926,6 +2190,7 @@ The following information is provided along with the description:
Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL)
are not detailed, but errors with specific meanings are.
+
6.1 KVM_CAP_PPC_OSI
Architectures: ppc
@@ -1939,6 +2204,7 @@ between the guest and the host.
When this capability is enabled, KVM_EXIT_OSI can occur.
+
6.2 KVM_CAP_PPC_PAPR
Architectures: ppc
@@ -1957,6 +2223,7 @@ HTAB invisible to the guest.
When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur.
+
6.3 KVM_CAP_SW_TLB
Architectures: ppc
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index 882068538c9c..83afe65d4966 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -10,11 +10,15 @@ a guest.
KVM cpuid functions are:
function: KVM_CPUID_SIGNATURE (0x40000000)
-returns : eax = 0,
+returns : eax = 0x40000001,
ebx = 0x4b4d564b,
ecx = 0x564b4d56,
edx = 0x4d.
Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM".
+The value in eax corresponds to the maximum cpuid function present in this leaf,
+and will be updated if more functions are added in the future.
+Note also that old hosts set eax value to 0x0. This should
+be interpreted as if the value was 0x40000001.
This function queries the presence of KVM cpuid leafs.
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index 50317809113d..96b41bd97523 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -109,6 +109,10 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
0 | 24 | multiple cpus are guaranteed to
| | be monotonic
-------------------------------------------------------------
+ | | guest vcpu has been paused by
+ 1 | N/A | the host
+ | | See 4.70 in api.txt
+ -------------------------------------------------------------
Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
leaf prior to usage.
diff --git a/arch/alpha/include/asm/kvm_para.h b/arch/alpha/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/alpha/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/arm/include/asm/kvm_para.h b/arch/arm/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/arm/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/avr32/include/asm/kvm_para.h b/arch/avr32/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/avr32/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/blackfin/include/asm/kvm_para.h b/arch/blackfin/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/blackfin/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/c6x/include/asm/kvm_para.h b/arch/c6x/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/c6x/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/frv/include/asm/kvm_para.h b/arch/frv/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/frv/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/h8300/include/asm/kvm_para.h b/arch/h8300/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/h8300/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/hexagon/include/asm/kvm_para.h b/arch/hexagon/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/hexagon/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index e35b3a84a40b..6d6a5ac48d85 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -365,6 +365,7 @@ struct thash_cb {
};
struct kvm_vcpu_stat {
+ u32 halt_wakeup;
};
struct kvm_vcpu_arch {
@@ -448,6 +449,8 @@ struct kvm_vcpu_arch {
char log_buf[VMM_LOG_LEN];
union context host;
union context guest;
+
+ char mmio_data[8];
};
struct kvm_vm_stat {
diff --git a/arch/ia64/include/asm/kvm_para.h b/arch/ia64/include/asm/kvm_para.h
index 1588aee781a2..2019cb99335e 100644
--- a/arch/ia64/include/asm/kvm_para.h
+++ b/arch/ia64/include/asm/kvm_para.h
@@ -26,6 +26,11 @@ static inline unsigned int kvm_arch_para_features(void)
return 0;
}
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+ return false;
+}
+
#endif
#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 463fb3bbe11e..bd77cb507c1c 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -232,12 +232,12 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
goto mmio;
vcpu->mmio_needed = 1;
- vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr;
- vcpu->mmio_size = kvm_run->mmio.len = p->size;
+ vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr;
+ vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size;
vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
if (vcpu->mmio_is_write)
- memcpy(vcpu->mmio_data, &p->data, p->size);
+ memcpy(vcpu->arch.mmio_data, &p->data, p->size);
memcpy(kvm_run->mmio.data, &p->data, p->size);
kvm_run->exit_reason = KVM_EXIT_MMIO;
return 0;
@@ -719,7 +719,7 @@ static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
if (!vcpu->mmio_is_write)
- memcpy(&p->data, vcpu->mmio_data, 8);
+ memcpy(&p->data, vcpu->arch.mmio_data, 8);
p->state = STATE_IORESP_READY;
}
@@ -739,7 +739,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
if (vcpu->mmio_needed) {
- memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
+ memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8);
kvm_set_mmio_data(vcpu);
vcpu->mmio_read_completed = 1;
vcpu->mmio_needed = 0;
@@ -1872,21 +1872,6 @@ void kvm_arch_hardware_unsetup(void)
{
}
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
-{
- int me;
- int cpu = vcpu->cpu;
-
- if (waitqueue_active(&vcpu->wq))
- wake_up_interruptible(&vcpu->wq);
-
- me = get_cpu();
- if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu))
- if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
- smp_send_reschedule(cpu);
- put_cpu();
-}
-
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
{
return __apic_accept_irq(vcpu, irq->vector);
@@ -1956,6 +1941,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
(kvm_highest_pending_irq(vcpu) != -1);
}
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+ return (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests));
+}
+
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
diff --git a/arch/m68k/include/asm/kvm_para.h b/arch/m68k/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/m68k/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/microblaze/include/asm/kvm_para.h b/arch/microblaze/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/microblaze/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/mips/include/asm/kvm_para.h b/arch/mips/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/mips/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/mn10300/include/asm/kvm_para.h b/arch/mn10300/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/mn10300/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/openrisc/include/asm/kvm_para.h b/arch/openrisc/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/openrisc/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/parisc/include/asm/kvm_para.h b/arch/parisc/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/parisc/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index b9219e99bd2a..50d82c8a037f 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -168,6 +168,7 @@ extern const char *powerpc_base_platform;
#define CPU_FTR_LWSYNC ASM_CONST(0x0000000008000000)
#define CPU_FTR_NOEXECUTE ASM_CONST(0x0000000010000000)
#define CPU_FTR_INDEXED_DCR ASM_CONST(0x0000000020000000)
+#define CPU_FTR_EMB_HV ASM_CONST(0x0000000040000000)
/*
* Add the 64-bit processor unique features in the top half of the word;
@@ -376,7 +377,8 @@ extern const char *powerpc_base_platform;
#define CPU_FTRS_47X (CPU_FTRS_440x6)
#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \
CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \
- CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE)
+ CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \
+ CPU_FTR_DEBUG_LVL_EXC)
#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
CPU_FTR_NOEXECUTE)
@@ -385,15 +387,15 @@ extern const char *powerpc_base_platform;
CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
#define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
- CPU_FTR_DBELL)
+ CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
#define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
- CPU_FTR_DEBUG_LVL_EXC)
+ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
#define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
- CPU_FTR_DEBUG_LVL_EXC)
+ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
/* 64-bit CPUs */
@@ -486,8 +488,10 @@ enum {
CPU_FTRS_E200 |
#endif
#ifdef CONFIG_E500
- CPU_FTRS_E500 | CPU_FTRS_E500_2 | CPU_FTRS_E500MC |
- CPU_FTRS_E5500 | CPU_FTRS_E6500 |
+ CPU_FTRS_E500 | CPU_FTRS_E500_2 |
+#endif
+#ifdef CONFIG_PPC_E500MC
+ CPU_FTRS_E500MC | CPU_FTRS_E5500 | CPU_FTRS_E6500 |
#endif
0,
};
@@ -531,9 +535,12 @@ enum {
CPU_FTRS_E200 &
#endif
#ifdef CONFIG_E500
- CPU_FTRS_E500 & CPU_FTRS_E500_2 & CPU_FTRS_E500MC &
- CPU_FTRS_E5500 & CPU_FTRS_E6500 &
+ CPU_FTRS_E500 & CPU_FTRS_E500_2 &
+#endif
+#ifdef CONFIG_PPC_E500MC
+ CPU_FTRS_E500MC & CPU_FTRS_E5500 & CPU_FTRS_E6500 &
#endif
+ ~CPU_FTR_EMB_HV & /* can be removed at runtime */
CPU_FTRS_POSSIBLE,
};
#endif /* __powerpc64__ */
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index efa74ac44a35..154c067761b1 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -19,6 +19,9 @@
#define PPC_DBELL_MSG_BRDCAST (0x04000000)
#define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36))
+#define PPC_DBELL_TYPE_MASK PPC_DBELL_TYPE(0xf)
+#define PPC_DBELL_LPID(x) ((x) << (63 - 49))
+#define PPC_DBELL_PIR_MASK 0x3fff
enum ppc_dbell {
PPC_DBELL = 0, /* doorbell */
PPC_DBELL_CRIT = 1, /* critical doorbell */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 612252388190..423cf9eaf4a4 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -133,6 +133,16 @@
#define H_PP1 (1UL<<(63-62))
#define H_PP2 (1UL<<(63-63))
+/* Flags for H_REGISTER_VPA subfunction field */
+#define H_VPA_FUNC_SHIFT (63-18) /* Bit posn of subfunction code */
+#define H_VPA_FUNC_MASK 7UL
+#define H_VPA_REG_VPA 1UL /* Register Virtual Processor Area */
+#define H_VPA_REG_DTL 2UL /* Register Dispatch Trace Log */
+#define H_VPA_REG_SLB 3UL /* Register SLB shadow buffer */
+#define H_VPA_DEREG_VPA 5UL /* Deregister Virtual Processor Area */
+#define H_VPA_DEREG_DTL 6UL /* Deregister Dispatch Trace Log */
+#define H_VPA_DEREG_SLB 7UL /* Deregister SLB shadow buffer */
+
/* VASI States */
#define H_VASI_INVALID 0
#define H_VASI_ENABLED 1
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 51010bfc792e..c9aac24b02e2 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -33,6 +33,7 @@
extern void __replay_interrupt(unsigned int vector);
extern void timer_interrupt(struct pt_regs *);
+extern void performance_monitor_exception(struct pt_regs *regs);
#ifdef CONFIG_PPC64
#include <asm/paca.h>
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index b921c3f48928..1bea4d8ea6f4 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -277,6 +277,7 @@ struct kvm_sync_regs {
#define KVM_CPU_E500V2 2
#define KVM_CPU_3S_32 3
#define KVM_CPU_3S_64 4
+#define KVM_CPU_E500MC 5
/* for KVM_CAP_SPAPR_TCE */
struct kvm_create_spapr_tce {
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 7b1f0e0fc653..76fdcfef0889 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -20,6 +20,16 @@
#ifndef __POWERPC_KVM_ASM_H__
#define __POWERPC_KVM_ASM_H__
+#ifdef __ASSEMBLY__
+#ifdef CONFIG_64BIT
+#define PPC_STD(sreg, offset, areg) std sreg, (offset)(areg)
+#define PPC_LD(treg, offset, areg) ld treg, (offset)(areg)
+#else
+#define PPC_STD(sreg, offset, areg) stw sreg, (offset+4)(areg)
+#define PPC_LD(treg, offset, areg) lwz treg, (offset+4)(areg)
+#endif
+#endif
+
/* IVPR must be 64KiB-aligned. */
#define VCPU_SIZE_ORDER 4
#define VCPU_SIZE_LOG (VCPU_SIZE_ORDER + 12)
@@ -48,6 +58,14 @@
#define BOOKE_INTERRUPT_SPE_FP_DATA 33
#define BOOKE_INTERRUPT_SPE_FP_ROUND 34
#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
+#define BOOKE_INTERRUPT_DOORBELL 36
+#define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37
+
+/* booke_hv */
+#define BOOKE_INTERRUPT_GUEST_DBELL 38
+#define BOOKE_INTERRUPT_GUEST_DBELL_CRIT 39
+#define BOOKE_INTERRUPT_HV_SYSCALL 40
+#define BOOKE_INTERRUPT_HV_PRIV 41
/* book3s */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index fd07f43d6622..f0e0c6a66d97 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -453,4 +453,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
#define INS_DCBZ 0x7c0007ec
+/* LPIDs we support with this build -- runtime limit may be lower */
+#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
+
#endif /* __ASM_KVM_BOOK3S_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 1f2f5b6156bd..88609b23b775 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -79,6 +79,9 @@ struct kvmppc_host_state {
u8 napping;
#ifdef CONFIG_KVM_BOOK3S_64_HV
+ u8 hwthread_req;
+ u8 hwthread_state;
+
struct kvm_vcpu *kvm_vcpu;
struct kvmppc_vcore *kvm_vcore;
unsigned long xics_phys;
@@ -122,4 +125,9 @@ struct kvmppc_book3s_shadow_vcpu {
#endif /*__ASSEMBLY__ */
+/* Values for kvm_state */
+#define KVM_HWTHREAD_IN_KERNEL 0
+#define KVM_HWTHREAD_IN_NAP 1
+#define KVM_HWTHREAD_IN_KVM 2
+
#endif /* __ASM_KVM_BOOK3S_ASM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index a90e09188777..b7cd3356a532 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -23,6 +23,9 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
+/* LPIDs we support with this build -- runtime limit may be lower */
+#define KVMPPC_NR_LPIDS 64
+
static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
{
vcpu->arch.gpr[num] = val;
diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
new file mode 100644
index 000000000000..30a600fa1b6a
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ASM_KVM_BOOKE_HV_ASM_H
+#define ASM_KVM_BOOKE_HV_ASM_H
+
+#ifdef __ASSEMBLY__
+
+/*
+ * All exceptions from guest state must go through KVM
+ * (except for those which are delivered directly to the guest) --
+ * there are no exceptions for which we fall through directly to
+ * the normal host handler.
+ *
+ * Expected inputs (normal exceptions):
+ * SCRATCH0 = saved r10
+ * r10 = thread struct
+ * r11 = appropriate SRR1 variant (currently used as scratch)
+ * r13 = saved CR
+ * *(r10 + THREAD_NORMSAVE(0)) = saved r11
+ * *(r10 + THREAD_NORMSAVE(2)) = saved r13
+ *
+ * Expected inputs (crit/mcheck/debug exceptions):
+ * appropriate SCRATCH = saved r8
+ * r8 = exception level stack frame
+ * r9 = *(r8 + _CCR) = saved CR
+ * r11 = appropriate SRR1 variant (currently used as scratch)
+ * *(r8 + GPR9) = saved r9
+ * *(r8 + GPR10) = saved r10 (r10 not yet clobbered)
+ * *(r8 + GPR11) = saved r11
+ */
+.macro DO_KVM intno srr1
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+ mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */
+ bf 3, kvmppc_resume_\intno\()_\srr1
+ b kvmppc_handler_\intno\()_\srr1
+kvmppc_resume_\intno\()_\srr1:
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+#endif
+.endm
+
+#endif /*__ASSEMBLY__ */
+#endif /* ASM_KVM_BOOKE_HV_ASM_H */
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
deleted file mode 100644
index 8cd50a514271..000000000000
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Author: Yu Liu, <yu.liu@freescale.com>
- *
- * Description:
- * This file is derived from arch/powerpc/include/asm/kvm_44x.h,
- * by Hollis Blanchard <hollisb@us.ibm.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- */
-
-#ifndef __ASM_KVM_E500_H__
-#define __ASM_KVM_E500_H__
-
-#include <linux/kvm_host.h>
-
-#define BOOKE_INTERRUPT_SIZE 36
-
-#define E500_PID_NUM 3
-#define E500_TLB_NUM 2
-
-#define E500_TLB_VALID 1
-#define E500_TLB_DIRTY 2
-
-struct tlbe_ref {
- pfn_t pfn;
- unsigned int flags; /* E500_TLB_* */
-};
-
-struct tlbe_priv {
- struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */
-};
-
-struct vcpu_id_table;
-
-struct kvmppc_e500_tlb_params {
- int entries, ways, sets;
-};
-
-struct kvmppc_vcpu_e500 {
- /* Unmodified copy of the guest's TLB -- shared with host userspace. */
- struct kvm_book3e_206_tlb_entry *gtlb_arch;
-
- /* Starting entry number in gtlb_arch[] */
- int gtlb_offset[E500_TLB_NUM];
-
- /* KVM internal information associated with each guest TLB entry */
- struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
-
- struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM];
-
- unsigned int gtlb_nv[E500_TLB_NUM];
-
- /*
- * information associated with each host TLB entry --
- * TLB1 only for now. If/when guest TLB1 entries can be
- * mapped with host TLB0, this will be used for that too.
- *
- * We don't want to use this for guest TLB0 because then we'd
- * have the overhead of doing the translation again even if
- * the entry is still in the guest TLB (e.g. we swapped out
- * and back, and our host TLB entries got evicted).
- */
- struct tlbe_ref *tlb_refs[E500_TLB_NUM];
- unsigned int host_tlb1_nv;
-
- u32 host_pid[E500_PID_NUM];
- u32 pid[E500_PID_NUM];
- u32 svr;
-
- /* vcpu id table */
- struct vcpu_id_table *idt;
-
- u32 l1csr0;
- u32 l1csr1;
- u32 hid0;
- u32 hid1;
- u32 tlb0cfg;
- u32 tlb1cfg;
- u64 mcar;
-
- struct page **shared_tlb_pages;
- int num_shared_tlb_pages;
-
- struct kvm_vcpu vcpu;
-};
-
-static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
-{
- return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu);
-}
-
-#endif /* __ASM_KVM_E500_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 52eb9c1f4fe0..d848cdc49715 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -82,7 +82,7 @@ struct kvm_vcpu;
struct lppaca;
struct slb_shadow;
-struct dtl;
+struct dtl_entry;
struct kvm_vm_stat {
u32 remote_tlb_flush;
@@ -106,6 +106,8 @@ struct kvm_vcpu_stat {
u32 dec_exits;
u32 ext_intr_exits;
u32 halt_wakeup;
+ u32 dbell_exits;
+ u32 gdbell_exits;
#ifdef CONFIG_PPC_BOOK3S
u32 pf_storage;
u32 pf_instruc;
@@ -140,6 +142,7 @@ enum kvm_exit_types {
EMULATED_TLBSX_EXITS,
EMULATED_TLBWE_EXITS,
EMULATED_RFI_EXITS,
+ EMULATED_RFCI_EXITS,
DEC_EXITS,
EXT_INTR_EXITS,
HALT_WAKEUP,
@@ -147,6 +150,8 @@ enum kvm_exit_types {
FP_UNAVAIL,
DEBUG_EXITS,
TIMEINGUEST,
+ DBELL_EXITS,
+ GDBELL_EXITS,
__NUMBER_OF_KVM_EXIT_TYPES
};
@@ -217,10 +222,10 @@ struct kvm_arch_memory_slot {
};
struct kvm_arch {
+ unsigned int lpid;
#ifdef CONFIG_KVM_BOOK3S_64_HV
unsigned long hpt_virt;
struct revmap_entry *revmap;
- unsigned int lpid;
unsigned int host_lpid;
unsigned long host_lpcr;
unsigned long sdr1;
@@ -232,7 +237,6 @@ struct kvm_arch {
unsigned long vrma_slb_v;
int rma_setup_done;
int using_mmu_notifiers;
- struct list_head spapr_tce_tables;
spinlock_t slot_phys_lock;
unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
int slot_npages[KVM_MEM_SLOTS_NUM];
@@ -240,6 +244,9 @@ struct kvm_arch {
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
struct kvmppc_linear_info *hpt_li;
#endif /* CONFIG_KVM_BOOK3S_64_HV */
+#ifdef CONFIG_PPC_BOOK3S_64
+ struct list_head spapr_tce_tables;
+#endif
};
/*
@@ -263,6 +270,9 @@ struct kvmppc_vcore {
struct list_head runnable_threads;
spinlock_t lock;
wait_queue_head_t wq;
+ u64 stolen_tb;
+ u64 preempt_tb;
+ struct kvm_vcpu *runner;
};
#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff)
@@ -274,6 +284,19 @@ struct kvmppc_vcore {
#define VCORE_EXITING 2
#define VCORE_SLEEPING 3
+/*
+ * Struct used to manage memory for a virtual processor area
+ * registered by a PAPR guest. There are three types of area
+ * that a guest can register.
+ */
+struct kvmppc_vpa {
+ void *pinned_addr; /* Address in kernel linear mapping */
+ void *pinned_end; /* End of region */
+ unsigned long next_gpa; /* Guest phys addr for update */
+ unsigned long len; /* Number of bytes required */
+ u8 update_pending; /* 1 => update pinned_addr from next_gpa */
+};
+
struct kvmppc_pte {
ulong eaddr;
u64 vpage;
@@ -345,6 +368,17 @@ struct kvm_vcpu_arch {
u64 vsr[64];
#endif
+#ifdef CONFIG_KVM_BOOKE_HV
+ u32 host_mas4;
+ u32 host_mas6;
+ u32 shadow_epcr;
+ u32 epcr;
+ u32 shadow_msrp;
+ u32 eplc;
+ u32 epsc;
+ u32 oldpir;
+#endif
+
#ifdef CONFIG_PPC_BOOK3S
/* For Gekko paired singles */
u32 qpr[32];
@@ -370,6 +404,7 @@ struct kvm_vcpu_arch {
#endif
u32 vrsave; /* also USPRG0 */
u32 mmucr;
+ /* shadow_msr is unused for BookE HV */
ulong shadow_msr;
ulong csrr0;
ulong csrr1;
@@ -426,8 +461,12 @@ struct kvm_vcpu_arch {
ulong fault_esr;
ulong queued_dear;
ulong queued_esr;
+ u32 tlbcfg[4];
+ u32 mmucfg;
+ u32 epr;
#endif
gpa_t paddr_accessed;
+ gva_t vaddr_accessed;
u8 io_gpr; /* GPR used as IO source/target */
u8 mmio_is_bigendian;
@@ -453,11 +492,6 @@ struct kvm_vcpu_arch {
u8 prodded;
u32 last_inst;
- struct lppaca *vpa;
- struct slb_shadow *slb_shadow;
- struct dtl *dtl;
- struct dtl *dtl_end;
-
wait_queue_head_t *wqp;
struct kvmppc_vcore *vcore;
int ret;
@@ -482,6 +516,14 @@ struct kvm_vcpu_arch {
struct task_struct *run_task;
struct kvm_run *kvm_run;
pgd_t *pgdir;
+
+ spinlock_t vpa_update_lock;
+ struct kvmppc_vpa vpa;
+ struct kvmppc_vpa dtl;
+ struct dtl_entry *dtl_ptr;
+ unsigned long dtl_index;
+ u64 stolen_logged;
+ struct kvmppc_vpa slb_shadow;
#endif
};
@@ -498,4 +540,6 @@ struct kvm_vcpu_arch {
#define KVM_MMIO_REG_QPR 0x0040
#define KVM_MMIO_REG_FQPR 0x0060
+#define __KVM_HAVE_ARCH_WQP
+
#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 7b754e743003..c18916bff689 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -206,6 +206,11 @@ static inline unsigned int kvm_arch_para_features(void)
return r;
}
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+ return false;
+}
+
#endif /* __KERNEL__ */
#endif /* __POWERPC_KVM_PARA_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 9d6dee0f7d48..f68c22fa2fce 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -95,7 +95,7 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
@@ -107,8 +107,10 @@ extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int op, int *advance);
-extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
-extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
+extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn,
+ ulong val);
+extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn,
+ ulong *val);
extern int kvmppc_booke_init(void);
extern void kvmppc_booke_exit(void);
@@ -126,6 +128,8 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
+extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba, unsigned long tce);
extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
struct kvm_allocate_rma *rma);
extern struct kvmppc_linear_info *kvm_alloc_rma(void);
@@ -138,6 +142,11 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
+extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
+ struct kvm_ppc_smmu_info *info);
+
+extern int kvmppc_bookehv_init(void);
+extern void kvmppc_bookehv_exit(void);
/*
* Cuts out inst bits with ordering according to spec.
@@ -204,4 +213,9 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
struct kvm_dirty_tlb *cfg);
+long kvmppc_alloc_lpid(void);
+void kvmppc_claim_lpid(long lpid);
+void kvmppc_free_lpid(long lpid);
+void kvmppc_init_lpid(unsigned long nr_lpids);
+
#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index cdb5421877e2..eeabcdbc30f7 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -104,6 +104,8 @@
#define MAS4_TSIZED_MASK 0x00000f80 /* Default TSIZE */
#define MAS4_TSIZED_SHIFT 7
+#define MAS5_SGS 0x80000000
+
#define MAS6_SPID0 0x3FFF0000
#define MAS6_SPID1 0x00007FFE
#define MAS6_ISIZE(x) MAS1_TSIZE(x)
@@ -118,6 +120,10 @@
#define MAS7_RPN 0xFFFFFFFF
+#define MAS8_TGS 0x80000000 /* Guest space */
+#define MAS8_VF 0x40000000 /* Virtualization Fault */
+#define MAS8_TLPID 0x000000ff
+
/* Bit definitions for MMUCFG */
#define MMUCFG_MAVN 0x00000003 /* MMU Architecture Version Number */
#define MMUCFG_MAVN_V1 0x00000000 /* v1.0 */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 55e85631c42e..413a5eaef56c 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -240,6 +240,9 @@ struct thread_struct {
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
void* kvm_shadow_vcpu; /* KVM internal data */
#endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
+#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
+ struct kvm_vcpu *kvm_vcpu;
+#endif
#ifdef CONFIG_PPC64
unsigned long dscr;
int dscr_inherit;
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 9d7f0fb69028..f0cb7f461b9d 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -257,7 +257,9 @@
#define LPCR_LPES_SH 2
#define LPCR_RMI 0x00000002 /* real mode is cache inhibit */
#define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */
+#ifndef SPRN_LPID
#define SPRN_LPID 0x13F /* Logical Partition Identifier */
+#endif
#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */
#define SPRN_HMER 0x150 /* Hardware m? error recovery */
#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 8a97aa7289d3..2d916c4982c5 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -56,18 +56,30 @@
#define SPRN_SPRG7W 0x117 /* Special Purpose Register General 7 Write */
#define SPRN_EPCR 0x133 /* Embedded Processor Control Register */
#define SPRN_DBCR2 0x136 /* Debug Control Register 2 */
+#define SPRN_MSRP 0x137 /* MSR Protect Register */
#define SPRN_IAC3 0x13A /* Instruction Address Compare 3 */
#define SPRN_IAC4 0x13B /* Instruction Address Compare 4 */
#define SPRN_DVC1 0x13E /* Data Value Compare Register 1 */
#define SPRN_DVC2 0x13F /* Data Value Compare Register 2 */
+#define SPRN_LPID 0x152 /* Logical Partition ID */
#define SPRN_MAS8 0x155 /* MMU Assist Register 8 */
#define SPRN_TLB0PS 0x158 /* TLB 0 Page Size Register */
#define SPRN_TLB1PS 0x159 /* TLB 1 Page Size Register */
#define SPRN_MAS5_MAS6 0x15c /* MMU Assist Register 5 || 6 */
#define SPRN_MAS8_MAS1 0x15d /* MMU Assist Register 8 || 1 */
#define SPRN_EPTCFG 0x15e /* Embedded Page Table Config */
+#define SPRN_GSPRG0 0x170 /* Guest SPRG0 */
+#define SPRN_GSPRG1 0x171 /* Guest SPRG1 */
+#define SPRN_GSPRG2 0x172 /* Guest SPRG2 */
+#define SPRN_GSPRG3 0x173 /* Guest SPRG3 */
#define SPRN_MAS7_MAS3 0x174 /* MMU Assist Register 7 || 3 */
#define SPRN_MAS0_MAS1 0x175 /* MMU Assist Register 0 || 1 */
+#define SPRN_GSRR0 0x17A /* Guest SRR0 */
+#define SPRN_GSRR1 0x17B /* Guest SRR1 */
+#define SPRN_GEPR 0x17C /* Guest EPR */
+#define SPRN_GDEAR 0x17D /* Guest DEAR */
+#define SPRN_GPIR 0x17E /* Guest PIR */
+#define SPRN_GESR 0x17F /* Guest Exception Syndrome Register */
#define SPRN_IVOR0 0x190 /* Interrupt Vector Offset Register 0 */
#define SPRN_IVOR1 0x191 /* Interrupt Vector Offset Register 1 */
#define SPRN_IVOR2 0x192 /* Interrupt Vector Offset Register 2 */
@@ -88,6 +100,13 @@
#define SPRN_IVOR39 0x1B1 /* Interrupt Vector Offset Register 39 */
#define SPRN_IVOR40 0x1B2 /* Interrupt Vector Offset Register 40 */
#define SPRN_IVOR41 0x1B3 /* Interrupt Vector Offset Register 41 */
+#define SPRN_GIVOR2 0x1B8 /* Guest IVOR2 */
+#define SPRN_GIVOR3 0x1B9 /* Guest IVOR3 */
+#define SPRN_GIVOR4 0x1BA /* Guest IVOR4 */
+#define SPRN_GIVOR8 0x1BB /* Guest IVOR8 */
+#define SPRN_GIVOR13 0x1BC /* Guest IVOR13 */
+#define SPRN_GIVOR14 0x1BD /* Guest IVOR14 */
+#define SPRN_GIVPR 0x1BF /* Guest IVPR */
#define SPRN_SPEFSCR 0x200 /* SPE & Embedded FP Status & Control */
#define SPRN_BBEAR 0x201 /* Branch Buffer Entry Address Register */
#define SPRN_BBTAR 0x202 /* Branch Buffer Target Address Register */
@@ -240,6 +259,10 @@
#define MCSR_LDG 0x00002000UL /* Guarded Load */
#define MCSR_TLBSYNC 0x00000002UL /* Multiple tlbsyncs detected */
#define MCSR_BSL2_ERR 0x00000001UL /* Backside L2 cache error */
+
+#define MSRP_UCLEP 0x04000000 /* Protect MSR[UCLE] */
+#define MSRP_DEP 0x00000200 /* Protect MSR[DE] */
+#define MSRP_PMMP 0x00000004 /* Protect MSR[PMM] */
#endif
#ifdef CONFIG_E200
@@ -594,6 +617,17 @@
#define SPRN_EPCR_DMIUH 0x00400000 /* Disable MAS Interrupt updates
* for hypervisor */
+/* Bit definitions for EPLC/EPSC */
+#define EPC_EPR 0x80000000 /* 1 = user, 0 = kernel */
+#define EPC_EPR_SHIFT 31
+#define EPC_EAS 0x40000000 /* Address Space */
+#define EPC_EAS_SHIFT 30
+#define EPC_EGS 0x20000000 /* 1 = guest, 0 = hypervisor */
+#define EPC_EGS_SHIFT 29
+#define EPC_ELPID 0x00ff0000
+#define EPC_ELPID_SHIFT 16
+#define EPC_EPID 0x00003fff
+#define EPC_EPID_SHIFT 0
/*
* The IBM-403 is an even more odd special case, as it is much
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 1a6320290d26..200d763a0a67 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -17,6 +17,7 @@ extern struct task_struct *_switch(struct thread_struct *prev,
struct thread_struct *next);
extern void giveup_fpu(struct task_struct *);
+extern void load_up_fpu(void);
extern void disable_kernel_fp(void);
extern void enable_kernel_fp(void);
extern void flush_fp_to_thread(struct task_struct *);
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 2136f58a54e8..3b4b4a8da922 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -23,6 +23,7 @@
extern unsigned long tb_ticks_per_jiffy;
extern unsigned long tb_ticks_per_usec;
extern unsigned long tb_ticks_per_sec;
+extern struct clock_event_device decrementer_clockevent;
struct rtc_time;
extern void to_tm(int tim, struct rtc_time * tm);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4554dc2fe857..52c7ad78242e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -116,6 +116,9 @@ int main(void)
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
#endif
+#ifdef CONFIG_KVM_BOOKE_HV
+ DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu));
+#endif
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
@@ -383,6 +386,7 @@ int main(void)
#ifdef CONFIG_KVM
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
+ DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr));
@@ -425,9 +429,11 @@ int main(void)
DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4));
DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6));
+ DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
+ DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
+
/* book3s */
#ifdef CONFIG_KVM_BOOK3S_64_HV
- DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
@@ -440,9 +446,9 @@ int main(void)
DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
+ DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
#endif
#ifdef CONFIG_PPC_BOOK3S
- DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
@@ -457,7 +463,6 @@ int main(void)
DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
- DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa));
DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
@@ -533,6 +538,8 @@ int main(void)
HSTATE_FIELD(HSTATE_NAPPING, napping);
#ifdef CONFIG_KVM_BOOK3S_64_HV
+ HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req);
+ HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
@@ -593,6 +600,12 @@ int main(void)
DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr));
#endif
+#ifdef CONFIG_KVM_BOOKE_HV
+ DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4));
+ DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6));
+ DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc));
+#endif
+
#ifdef CONFIG_KVM_EXIT_TIMING
DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
arch.timing_exit.tv32.tbu));
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 8053db02b85e..69fdd2322a66 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -73,6 +73,7 @@ _GLOBAL(__setup_cpu_e500v2)
mtlr r4
blr
_GLOBAL(__setup_cpu_e500mc)
+ mr r5, r4
mflr r4
bl __e500_icache_setup
bl __e500_dcache_setup
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f7bed44ee165..1c06d2971545 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -63,11 +63,13 @@ BEGIN_FTR_SECTION
GET_PACA(r13)
#ifdef CONFIG_KVM_BOOK3S_64_HV
- lbz r0,PACAPROCSTART(r13)
- cmpwi r0,0x80
- bne 1f
- li r0,1
- stb r0,PACAPROCSTART(r13)
+ li r0,KVM_HWTHREAD_IN_KERNEL
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ sync
+ lbz r0,HSTATE_HWTHREAD_REQ(r13)
+ cmpwi r0,0
+ beq 1f
b kvm_start_guest
1:
#endif
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 22d608e8bb7d..7a2e5e421abf 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -248,10 +248,11 @@ _ENTRY(_start);
interrupt_base:
/* Critical Input Interrupt */
- CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception)
+ CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
/* Machine Check Interrupt */
- CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+ CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
+ machine_check_exception)
MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception)
/* Data Storage Interrupt */
@@ -261,7 +262,8 @@ interrupt_base:
INSTRUCTION_STORAGE_EXCEPTION
/* External Input Interrupt */
- EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \
+ do_IRQ, EXC_XFER_LITE)
/* Alignment Interrupt */
ALIGNMENT_EXCEPTION
@@ -273,29 +275,32 @@ interrupt_base:
#ifdef CONFIG_PPC_FPU
FP_UNAVAILABLE_EXCEPTION
#else
- EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
+ FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG
+ NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL)
EXC_XFER_EE_LITE(0x0c00, DoSyscall)
/* Auxiliary Processor Unavailable Interrupt */
- EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
+ AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
- EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
+ unknown_exception, EXC_XFER_EE)
/* Watchdog Timer Interrupt */
/* TODO: Add watchdog support */
#ifdef CONFIG_BOOKE_WDT
- CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException)
+ CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, WatchdogException)
#else
- CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception)
+ CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, unknown_exception)
#endif
/* Data TLB Error Interrupt */
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 0e4175388f47..5f051eeb93a2 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -2,6 +2,9 @@
#define __HEAD_BOOKE_H__
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
+#include <asm/kvm_asm.h>
+#include <asm/kvm_booke_hv_asm.h>
+
/*
* Macros used for common Book-e exception handling
*/
@@ -28,14 +31,15 @@
*/
#define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4))
-#define NORMAL_EXCEPTION_PROLOG \
+#define NORMAL_EXCEPTION_PROLOG(intno) \
mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
mfspr r10, SPRN_SPRG_THREAD; \
stw r11, THREAD_NORMSAVE(0)(r10); \
stw r13, THREAD_NORMSAVE(2)(r10); \
mfcr r13; /* save CR in r13 for now */\
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */\
- andi. r11,r11,MSR_PR; \
+ mfspr r11, SPRN_SRR1; \
+ DO_KVM BOOKE_INTERRUPT_##intno SPRN_SRR1; \
+ andi. r11, r11, MSR_PR; /* check whether user or kernel */\
mr r11, r1; \
beq 1f; \
/* if from user, start at top of this thread's kernel stack */ \
@@ -113,7 +117,7 @@
* registers as the normal prolog above. Instead we use a portion of the
* critical/machine check exception stack at low physical addresses.
*/
-#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \
+#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, intno, exc_level_srr0, exc_level_srr1) \
mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \
BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
stw r9,GPR9(r8); /* save various registers */\
@@ -121,8 +125,9 @@
stw r10,GPR10(r8); \
stw r11,GPR11(r8); \
stw r9,_CCR(r8); /* save CR on stack */\
- mfspr r10,exc_level_srr1; /* check whether user or kernel */\
- andi. r10,r10,MSR_PR; \
+ mfspr r11,exc_level_srr1; /* check whether user or kernel */\
+ DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \
+ andi. r11,r11,MSR_PR; \
mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\
@@ -162,12 +167,30 @@
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
-#define CRITICAL_EXCEPTION_PROLOG \
- EXC_LEVEL_EXCEPTION_PROLOG(CRIT, SPRN_CSRR0, SPRN_CSRR1)
+#define CRITICAL_EXCEPTION_PROLOG(intno) \
+ EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1)
#define DEBUG_EXCEPTION_PROLOG \
- EXC_LEVEL_EXCEPTION_PROLOG(DBG, SPRN_DSRR0, SPRN_DSRR1)
+ EXC_LEVEL_EXCEPTION_PROLOG(DBG, DEBUG, SPRN_DSRR0, SPRN_DSRR1)
#define MCHECK_EXCEPTION_PROLOG \
- EXC_LEVEL_EXCEPTION_PROLOG(MC, SPRN_MCSRR0, SPRN_MCSRR1)
+ EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \
+ SPRN_MCSRR0, SPRN_MCSRR1)
+
+/*
+ * Guest Doorbell -- this is a bit odd in that uses GSRR0/1 despite
+ * being delivered to the host. This exception can only happen
+ * inside a KVM guest -- so we just handle up to the DO_KVM rather
+ * than try to fit this into one of the existing prolog macros.
+ */
+#define GUEST_DOORBELL_EXCEPTION \
+ START_EXCEPTION(GuestDoorbell); \
+ mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
+ mfspr r10, SPRN_SPRG_THREAD; \
+ stw r11, THREAD_NORMSAVE(0)(r10); \
+ mfspr r11, SPRN_SRR1; \
+ stw r13, THREAD_NORMSAVE(2)(r10); \
+ mfcr r13; /* save CR in r13 for now */\
+ DO_KVM BOOKE_INTERRUPT_GUEST_DBELL SPRN_GSRR1; \
+ trap
/*
* Exception vectors.
@@ -181,16 +204,16 @@ label:
.long func; \
.long ret_from_except_full
-#define EXCEPTION(n, label, hdlr, xfer) \
+#define EXCEPTION(n, intno, label, hdlr, xfer) \
START_EXCEPTION(label); \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(intno); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
xfer(n, hdlr)
-#define CRITICAL_EXCEPTION(n, label, hdlr) \
- START_EXCEPTION(label); \
- CRITICAL_EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
+#define CRITICAL_EXCEPTION(n, intno, label, hdlr) \
+ START_EXCEPTION(label); \
+ CRITICAL_EXCEPTION_PROLOG(intno); \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
NOCOPY, crit_transfer_to_handler, \
ret_from_crit_exc)
@@ -302,7 +325,7 @@ label:
#define DEBUG_CRIT_EXCEPTION \
START_EXCEPTION(DebugCrit); \
- CRITICAL_EXCEPTION_PROLOG; \
+ CRITICAL_EXCEPTION_PROLOG(DEBUG); \
\
/* \
* If there is a single step or branch-taken exception in an \
@@ -355,7 +378,7 @@ label:
#define DATA_STORAGE_EXCEPTION \
START_EXCEPTION(DataStorage) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(DATA_STORAGE); \
mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
stw r5,_ESR(r11); \
mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \
@@ -363,7 +386,7 @@ label:
#define INSTRUCTION_STORAGE_EXCEPTION \
START_EXCEPTION(InstructionStorage) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \
mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
stw r5,_ESR(r11); \
mr r4,r12; /* Pass SRR0 as arg2 */ \
@@ -372,7 +395,7 @@ label:
#define ALIGNMENT_EXCEPTION \
START_EXCEPTION(Alignment) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(ALIGNMENT); \
mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \
stw r4,_DEAR(r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
@@ -380,7 +403,7 @@ label:
#define PROGRAM_EXCEPTION \
START_EXCEPTION(Program) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(PROGRAM); \
mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \
stw r4,_ESR(r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
@@ -388,7 +411,7 @@ label:
#define DECREMENTER_EXCEPTION \
START_EXCEPTION(Decrementer) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(DECREMENTER); \
lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \
mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \
addi r3,r1,STACK_FRAME_OVERHEAD; \
@@ -396,7 +419,7 @@ label:
#define FP_UNAVAILABLE_EXCEPTION \
START_EXCEPTION(FloatingPointUnavailable) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(FP_UNAVAIL); \
beq 1f; \
bl load_up_fpu; /* if from user, just load it up */ \
b fast_exception_return; \
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index de80e0f9a2bd..1f4434a38608 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -301,19 +301,20 @@ _ENTRY(__early_start)
interrupt_base:
/* Critical Input Interrupt */
- CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception)
+ CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
/* Machine Check Interrupt */
#ifdef CONFIG_E200
/* no RFMCI, MCSRRs on E200 */
- CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+ CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
+ machine_check_exception)
#else
MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
#endif
/* Data Storage Interrupt */
START_EXCEPTION(DataStorage)
- NORMAL_EXCEPTION_PROLOG
+ NORMAL_EXCEPTION_PROLOG(DATA_STORAGE)
mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
stw r5,_ESR(r11)
mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
@@ -328,7 +329,7 @@ interrupt_base:
INSTRUCTION_STORAGE_EXCEPTION
/* External Input Interrupt */
- EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE)
/* Alignment Interrupt */
ALIGNMENT_EXCEPTION
@@ -342,32 +343,36 @@ interrupt_base:
#else
#ifdef CONFIG_E200
/* E200 treats 'normal' floating point instructions as FP Unavail exception */
- EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE)
+ EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
+ program_check_exception, EXC_XFER_EE)
#else
- EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
+ unknown_exception, EXC_XFER_EE)
#endif
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG
+ NORMAL_EXCEPTION_PROLOG(SYSCALL)
EXC_XFER_EE_LITE(0x0c00, DoSyscall)
/* Auxiliary Processor Unavailable Interrupt */
- EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
+ unknown_exception, EXC_XFER_EE)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
- EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
+ unknown_exception, EXC_XFER_EE)
/* Watchdog Timer Interrupt */
#ifdef CONFIG_BOOKE_WDT
- CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException)
+ CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, WatchdogException)
#else
- CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception)
+ CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, unknown_exception)
#endif
/* Data TLB Error Interrupt */
@@ -375,10 +380,16 @@ interrupt_base:
mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
mfspr r10, SPRN_SPRG_THREAD
stw r11, THREAD_NORMSAVE(0)(r10)
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+ mfspr r11, SPRN_SRR1
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+#endif
stw r12, THREAD_NORMSAVE(1)(r10)
stw r13, THREAD_NORMSAVE(2)(r10)
mfcr r13
stw r13, THREAD_NORMSAVE(3)(r10)
+ DO_KVM BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1
mfspr r10, SPRN_DEAR /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
@@ -463,10 +474,16 @@ interrupt_base:
mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
mfspr r10, SPRN_SPRG_THREAD
stw r11, THREAD_NORMSAVE(0)(r10)
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+ mfspr r11, SPRN_SRR1
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+#endif
stw r12, THREAD_NORMSAVE(1)(r10)
stw r13, THREAD_NORMSAVE(2)(r10)
mfcr r13
stw r13, THREAD_NORMSAVE(3)(r10)
+ DO_KVM BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1
mfspr r10, SPRN_SRR0 /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
@@ -538,36 +555,54 @@ interrupt_base:
#ifdef CONFIG_SPE
/* SPE Unavailable */
START_EXCEPTION(SPEUnavailable)
- NORMAL_EXCEPTION_PROLOG
+ NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
bne load_up_spe
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_EE_LITE(0x2010, KernelSPE)
#else
- EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
+ unknown_exception, EXC_XFER_EE)
#endif /* CONFIG_SPE */
/* SPE Floating Point Data */
#ifdef CONFIG_SPE
- EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE);
+ EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, \
+ SPEFloatingPointException, EXC_XFER_EE);
/* SPE Floating Point Round */
- EXCEPTION(0x2050, SPEFloatingPointRound, SPEFloatingPointRoundException, EXC_XFER_EE)
+ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
+ SPEFloatingPointRoundException, EXC_XFER_EE)
#else
- EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, \
+ unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
+ unknown_exception, EXC_XFER_EE)
#endif /* CONFIG_SPE */
/* Performance Monitor */
- EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD)
+ EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
+ performance_monitor_exception, EXC_XFER_STD)
- EXCEPTION(0x2070, Doorbell, doorbell_exception, EXC_XFER_STD)
+ EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD)
- CRITICAL_EXCEPTION(0x2080, CriticalDoorbell, unknown_exception)
+ CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \
+ CriticalDoorbell, unknown_exception)
/* Debug Interrupt */
DEBUG_DEBUG_EXCEPTION
DEBUG_CRIT_EXCEPTION
+ GUEST_DOORBELL_EXCEPTION
+
+ CRITICAL_EXCEPTION(0, GUEST_DBELL_CRIT, CriticalGuestDoorbell, \
+ unknown_exception)
+
+ /* Hypercall */
+ EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE)
+
+ /* Embedded Hypervisor Privilege */
+ EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE)
+
/*
* Local functions
*/
@@ -871,8 +906,31 @@ _GLOBAL(__setup_e500mc_ivors)
mtspr SPRN_IVOR36,r3
li r3,CriticalDoorbell@l
mtspr SPRN_IVOR37,r3
+
+ /*
+ * We only want to touch IVOR38-41 if we're running on hardware
+ * that supports category E.HV. The architectural way to determine
+ * this is MMUCFG[LPIDSIZE].
+ */
+ mfspr r3, SPRN_MMUCFG
+ andis. r3, r3, MMUCFG_LPIDSIZE@h
+ beq no_hv
+ li r3,GuestDoorbell@l
+ mtspr SPRN_IVOR38,r3
+ li r3,CriticalGuestDoorbell@l
+ mtspr SPRN_IVOR39,r3
+ li r3,Hypercall@l
+ mtspr SPRN_IVOR40,r3
+ li r3,Ehvpriv@l
+ mtspr SPRN_IVOR41,r3
+skip_hv_ivors:
sync
blr
+no_hv:
+ lwz r3, CPU_SPEC_FEATURES(r5)
+ rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV
+ stw r3, CPU_SPEC_FEATURES(r5)
+ b skip_hv_ivors
#ifdef CONFIG_SPE
/*
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 0cdc9a392839..7140d838339e 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -16,6 +16,7 @@
#include <asm/asm-offsets.h>
#include <asm/ppc-opcode.h>
#include <asm/hw_irq.h>
+#include <asm/kvm_book3s_asm.h>
#undef DEBUG
@@ -81,6 +82,12 @@ _GLOBAL(power7_idle)
std r9,_MSR(r1)
std r1,PACAR1(r13)
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ /* Tell KVM we're napping */
+ li r4,KVM_HWTHREAD_IN_NAP
+ stb r4,HSTATE_HWTHREAD_STATE(r13)
+#endif
+
/* Magic NAP mode enter sequence */
std r0,0(r1)
ptesync
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 786a2700ec2d..d1f2aafcbe8c 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -190,3 +190,7 @@ EXPORT_SYMBOL(__arch_hweight16);
EXPORT_SYMBOL(__arch_hweight32);
EXPORT_SYMBOL(__arch_hweight64);
#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+EXPORT_SYMBOL_GPL(mmu_psize_defs);
+#endif
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 2c42cd72d0f5..99a995c2a3f2 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -100,7 +100,7 @@ static int decrementer_set_next_event(unsigned long evt,
static void decrementer_set_mode(enum clock_event_mode mode,
struct clock_event_device *dev);
-static struct clock_event_device decrementer_clockevent = {
+struct clock_event_device decrementer_clockevent = {
.name = "decrementer",
.rating = 200,
.irq = 0,
@@ -108,6 +108,7 @@ static struct clock_event_device decrementer_clockevent = {
.set_mode = decrementer_set_mode,
.features = CLOCK_EVT_FEAT_ONESHOT,
};
+EXPORT_SYMBOL(decrementer_clockevent);
DEFINE_PER_CPU(u64, decrementers_next_tb);
static DEFINE_PER_CPU(struct clock_event_device, decrementers);
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 7b612a76c701..50e7dbc7356c 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -29,15 +29,18 @@
#include <asm/kvm_ppc.h>
#include "44x_tlb.h"
+#include "booke.h"
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ kvmppc_booke_vcpu_load(vcpu, cpu);
kvmppc_44x_tlb_load(vcpu);
}
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
kvmppc_44x_tlb_put(vcpu);
+ kvmppc_booke_vcpu_put(vcpu);
}
int kvmppc_core_check_processor_compat(void)
@@ -160,6 +163,15 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
}
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+ return 0;
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+}
+
static int __init kvmppc_44x_init(void)
{
int r;
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 549bb2c9a47a..c8c61578fdfc 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -37,22 +37,19 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
- int dcrn;
- int ra;
- int rb;
- int rc;
- int rs;
- int rt;
- int ws;
+ int dcrn = get_dcrn(inst);
+ int ra = get_ra(inst);
+ int rb = get_rb(inst);
+ int rc = get_rc(inst);
+ int rs = get_rs(inst);
+ int rt = get_rt(inst);
+ int ws = get_ws(inst);
switch (get_op(inst)) {
case 31:
switch (get_xop(inst)) {
case XOP_MFDCR:
- dcrn = get_dcrn(inst);
- rt = get_rt(inst);
-
/* The guest may access CPR0 registers to determine the timebase
* frequency, and it must know the real host frequency because it
* can directly access the timebase registers.
@@ -88,9 +85,6 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case XOP_MTDCR:
- dcrn = get_dcrn(inst);
- rs = get_rs(inst);
-
/* emulate some access in kernel */
switch (dcrn) {
case DCRN_CPR0_CONFIG_ADDR:
@@ -108,17 +102,10 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case XOP_TLBWE:
- ra = get_ra(inst);
- rs = get_rs(inst);
- ws = get_ws(inst);
emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
break;
case XOP_TLBSX:
- rt = get_rt(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
- rc = get_rc(inst);
emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
break;
@@ -141,41 +128,41 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return emulated;
}
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
int emulated = EMULATE_DONE;
switch (sprn) {
case SPRN_PID:
- kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break;
+ kvmppc_set_pid(vcpu, spr_val); break;
case SPRN_MMUCR:
- vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break;
+ vcpu->arch.mmucr = spr_val; break;
case SPRN_CCR0:
- vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break;
+ vcpu->arch.ccr0 = spr_val; break;
case SPRN_CCR1:
- vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break;
+ vcpu->arch.ccr1 = spr_val; break;
default:
- emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
+ emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val);
}
return emulated;
}
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
int emulated = EMULATE_DONE;
switch (sprn) {
case SPRN_PID:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break;
+ *spr_val = vcpu->arch.pid; break;
case SPRN_MMUCR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break;
+ *spr_val = vcpu->arch.mmucr; break;
case SPRN_CCR0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break;
+ *spr_val = vcpu->arch.ccr0; break;
case SPRN_CCR1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break;
+ *spr_val = vcpu->arch.ccr1; break;
default:
- emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
+ emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val);
}
return emulated;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 8f64709ae331..f4dacb9c57fa 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -90,6 +90,9 @@ config KVM_BOOK3S_64_PR
depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV
select KVM_BOOK3S_PR
+config KVM_BOOKE_HV
+ bool
+
config KVM_440
bool "KVM support for PowerPC 440 processors"
depends on EXPERIMENTAL && 44x
@@ -106,7 +109,7 @@ config KVM_440
config KVM_EXIT_TIMING
bool "Detailed exit timing"
- depends on KVM_440 || KVM_E500
+ depends on KVM_440 || KVM_E500V2 || KVM_E500MC
---help---
Calculate elapsed time for every exit/enter cycle. A per-vcpu
report is available in debugfs kvm/vm#_vcpu#_timing.
@@ -115,14 +118,29 @@ config KVM_EXIT_TIMING
If unsure, say N.
-config KVM_E500
- bool "KVM support for PowerPC E500 processors"
- depends on EXPERIMENTAL && E500
+config KVM_E500V2
+ bool "KVM support for PowerPC E500v2 processors"
+ depends on EXPERIMENTAL && E500 && !PPC_E500MC
select KVM
select KVM_MMIO
---help---
Support running unmodified E500 guest kernels in virtual machines on
- E500 host processors.
+ E500v2 host processors.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ If unsure, say N.
+
+config KVM_E500MC
+ bool "KVM support for PowerPC E500MC/E5500 processors"
+ depends on EXPERIMENTAL && PPC_E500MC
+ select KVM
+ select KVM_MMIO
+ select KVM_BOOKE_HV
+ ---help---
+ Support running unmodified E500MC/E5500 (32-bit) guest kernels in
+ virtual machines on E500MC/E5500 host processors.
This module provides access to the hardware capabilities through
a character device node named /dev/kvm.
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 3688aeecc4b2..c2a08636e6d4 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -36,7 +36,17 @@ kvm-e500-objs := \
e500.o \
e500_tlb.o \
e500_emulate.o
-kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs)
+kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs)
+
+kvm-e500mc-objs := \
+ $(common-objs-y) \
+ booke.o \
+ booke_emulate.o \
+ bookehv_interrupts.o \
+ e500mc.o \
+ e500_tlb.o \
+ e500_emulate.o
+kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
../../../virt/kvm/coalesced_mmio.o \
@@ -44,6 +54,7 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
book3s_paired_singles.o \
book3s_pr.o \
book3s_pr_papr.o \
+ book3s_64_vio_hv.o \
book3s_emulate.o \
book3s_interrupts.o \
book3s_mmu_hpte.o \
@@ -68,6 +79,7 @@ kvm-book3s_64-module-objs := \
powerpc.o \
emulate.o \
book3s.o \
+ book3s_64_vio.o \
$(kvm-book3s_64-objs-y)
kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
@@ -88,7 +100,8 @@ kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
obj-$(CONFIG_KVM_440) += kvm.o
-obj-$(CONFIG_KVM_E500) += kvm.o
+obj-$(CONFIG_KVM_E500V2) += kvm.o
+obj-$(CONFIG_KVM_E500MC) += kvm.o
obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 7d54f4ed6d96..3f2a8360c857 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -258,7 +258,7 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
return true;
}
-void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
+int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
{
unsigned long *pending = &vcpu->arch.pending_exceptions;
unsigned long old_pending = vcpu->arch.pending_exceptions;
@@ -283,12 +283,17 @@ void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
/* Tell the guest about our interrupt status */
kvmppc_update_int_pending(vcpu, *pending, old_pending);
+
+ return 0;
}
pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
ulong mp_pa = vcpu->arch.magic_page_pa;
+ if (!(vcpu->arch.shared->msr & MSR_SF))
+ mp_pa = (uint32_t)mp_pa;
+
/* Magic page override */
if (unlikely(mp_pa) &&
unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) ==
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index c3beaeef3f60..80a577517584 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -36,13 +36,11 @@
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
#define MAX_LPID_970 63
-#define NR_LPIDS (LPID_RSVD + 1)
-unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)];
long kvmppc_alloc_hpt(struct kvm *kvm)
{
unsigned long hpt;
- unsigned long lpid;
+ long lpid;
struct revmap_entry *rev;
struct kvmppc_linear_info *li;
@@ -72,14 +70,9 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
}
kvm->arch.revmap = rev;
- /* Allocate the guest's logical partition ID */
- do {
- lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS);
- if (lpid >= NR_LPIDS) {
- pr_err("kvm_alloc_hpt: No LPIDs free\n");
- goto out_freeboth;
- }
- } while (test_and_set_bit(lpid, lpid_inuse));
+ lpid = kvmppc_alloc_lpid();
+ if (lpid < 0)
+ goto out_freeboth;
kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18);
kvm->arch.lpid = lpid;
@@ -96,7 +89,7 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
void kvmppc_free_hpt(struct kvm *kvm)
{
- clear_bit(kvm->arch.lpid, lpid_inuse);
+ kvmppc_free_lpid(kvm->arch.lpid);
vfree(kvm->arch.revmap);
if (kvm->arch.hpt_li)
kvm_release_hpt(kvm->arch.hpt_li);
@@ -171,8 +164,7 @@ int kvmppc_mmu_hv_init(void)
if (!cpu_has_feature(CPU_FTR_HVMODE))
return -EINVAL;
- memset(lpid_inuse, 0, sizeof(lpid_inuse));
-
+ /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */
if (cpu_has_feature(CPU_FTR_ARCH_206)) {
host_lpid = mfspr(SPRN_LPID); /* POWER7 */
rsvd_lpid = LPID_RSVD;
@@ -181,9 +173,11 @@ int kvmppc_mmu_hv_init(void)
rsvd_lpid = MAX_LPID_970;
}
- set_bit(host_lpid, lpid_inuse);
+ kvmppc_init_lpid(rsvd_lpid + 1);
+
+ kvmppc_claim_lpid(host_lpid);
/* rsvd_lpid is reserved for use in partition switching */
- set_bit(rsvd_lpid, lpid_inuse);
+ kvmppc_claim_lpid(rsvd_lpid);
return 0;
}
@@ -452,7 +446,7 @@ static int instruction_is_store(unsigned int instr)
}
static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned long gpa, int is_store)
+ unsigned long gpa, gva_t ea, int is_store)
{
int ret;
u32 last_inst;
@@ -499,6 +493,7 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
vcpu->arch.paddr_accessed = gpa;
+ vcpu->arch.vaddr_accessed = ea;
return kvmppc_emulate_mmio(run, vcpu);
}
@@ -552,7 +547,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* No memslot means it's an emulated MMIO region */
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1));
- return kvmppc_hv_emulate_mmio(run, vcpu, gpa,
+ return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
dsisr & DSISR_ISSTORE);
}
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index f2e6e48ea463..56b983e7b738 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -90,8 +90,6 @@ slb_exit_skip_ ## num:
or r10, r10, r12
slbie r10
- isync
-
/* Fill SLB with our shadow */
lbz r12, SVCPU_SLB_MAX(r3)
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
new file mode 100644
index 000000000000..72ffc899c082
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -0,0 +1,150 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+#include <linux/list.h>
+#include <linux/anon_inodes.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+#include <asm/kvm_host.h>
+#include <asm/udbg.h>
+
+#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+
+static long kvmppc_stt_npages(unsigned long window_size)
+{
+ return ALIGN((window_size >> SPAPR_TCE_SHIFT)
+ * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
+}
+
+static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
+{
+ struct kvm *kvm = stt->kvm;
+ int i;
+
+ mutex_lock(&kvm->lock);
+ list_del(&stt->list);
+ for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
+ __free_page(stt->pages[i]);
+ kfree(stt);
+ mutex_unlock(&kvm->lock);
+
+ kvm_put_kvm(kvm);
+}
+
+static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
+ struct page *page;
+
+ if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
+ return VM_FAULT_SIGBUS;
+
+ page = stt->pages[vmf->pgoff];
+ get_page(page);
+ vmf->page = page;
+ return 0;
+}
+
+static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
+ .fault = kvm_spapr_tce_fault,
+};
+
+static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ vma->vm_ops = &kvm_spapr_tce_vm_ops;
+ return 0;
+}
+
+static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
+{
+ struct kvmppc_spapr_tce_table *stt = filp->private_data;
+
+ release_spapr_tce_table(stt);
+ return 0;
+}
+
+static struct file_operations kvm_spapr_tce_fops = {
+ .mmap = kvm_spapr_tce_mmap,
+ .release = kvm_spapr_tce_release,
+};
+
+long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+ struct kvm_create_spapr_tce *args)
+{
+ struct kvmppc_spapr_tce_table *stt = NULL;
+ long npages;
+ int ret = -ENOMEM;
+ int i;
+
+ /* Check this LIOBN hasn't been previously allocated */
+ list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
+ if (stt->liobn == args->liobn)
+ return -EBUSY;
+ }
+
+ npages = kvmppc_stt_npages(args->window_size);
+
+ stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
+ GFP_KERNEL);
+ if (!stt)
+ goto fail;
+
+ stt->liobn = args->liobn;
+ stt->window_size = args->window_size;
+ stt->kvm = kvm;
+
+ for (i = 0; i < npages; i++) {
+ stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!stt->pages[i])
+ goto fail;
+ }
+
+ kvm_get_kvm(kvm);
+
+ mutex_lock(&kvm->lock);
+ list_add(&stt->list, &kvm->arch.spapr_tce_tables);
+
+ mutex_unlock(&kvm->lock);
+
+ return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
+ stt, O_RDWR);
+
+fail:
+ if (stt) {
+ for (i = 0; i < npages; i++)
+ if (stt->pages[i])
+ __free_page(stt->pages[i]);
+
+ kfree(stt);
+ }
+ return ret;
+}
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index ea0f8c537c28..30c2f3b134c6 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -38,6 +38,9 @@
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+/* WARNING: This will be called in real-mode on HV KVM and virtual
+ * mode on PR KVM
+ */
long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce)
{
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 135663a3e4fc..b9a989dc76cc 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -87,6 +87,10 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
+ int rt = get_rt(inst);
+ int rs = get_rs(inst);
+ int ra = get_ra(inst);
+ int rb = get_rb(inst);
switch (get_op(inst)) {
case 19:
@@ -106,21 +110,22 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
case 31:
switch (get_xop(inst)) {
case OP_31_XOP_MFMSR:
- kvmppc_set_gpr(vcpu, get_rt(inst),
- vcpu->arch.shared->msr);
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr);
break;
case OP_31_XOP_MTMSRD:
{
- ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst));
+ ulong rs_val = kvmppc_get_gpr(vcpu, rs);
if (inst & 0x10000) {
- vcpu->arch.shared->msr &= ~(MSR_RI | MSR_EE);
- vcpu->arch.shared->msr |= rs & (MSR_RI | MSR_EE);
+ ulong new_msr = vcpu->arch.shared->msr;
+ new_msr &= ~(MSR_RI | MSR_EE);
+ new_msr |= rs_val & (MSR_RI | MSR_EE);
+ vcpu->arch.shared->msr = new_msr;
} else
- kvmppc_set_msr(vcpu, rs);
+ kvmppc_set_msr(vcpu, rs_val);
break;
}
case OP_31_XOP_MTMSR:
- kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst)));
+ kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
break;
case OP_31_XOP_MFSR:
{
@@ -130,7 +135,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (vcpu->arch.mmu.mfsrin) {
u32 sr;
sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
- kvmppc_set_gpr(vcpu, get_rt(inst), sr);
+ kvmppc_set_gpr(vcpu, rt, sr);
}
break;
}
@@ -138,29 +143,29 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
{
int srnum;
- srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf;
+ srnum = (kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf;
if (vcpu->arch.mmu.mfsrin) {
u32 sr;
sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
- kvmppc_set_gpr(vcpu, get_rt(inst), sr);
+ kvmppc_set_gpr(vcpu, rt, sr);
}
break;
}
case OP_31_XOP_MTSR:
vcpu->arch.mmu.mtsrin(vcpu,
(inst >> 16) & 0xf,
- kvmppc_get_gpr(vcpu, get_rs(inst)));
+ kvmppc_get_gpr(vcpu, rs));
break;
case OP_31_XOP_MTSRIN:
vcpu->arch.mmu.mtsrin(vcpu,
- (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf,
- kvmppc_get_gpr(vcpu, get_rs(inst)));
+ (kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf,
+ kvmppc_get_gpr(vcpu, rs));
break;
case OP_31_XOP_TLBIE:
case OP_31_XOP_TLBIEL:
{
bool large = (inst & 0x00200000) ? true : false;
- ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst));
+ ulong addr = kvmppc_get_gpr(vcpu, rb);
vcpu->arch.mmu.tlbie(vcpu, addr, large);
break;
}
@@ -171,15 +176,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return EMULATE_FAIL;
vcpu->arch.mmu.slbmte(vcpu,
- kvmppc_get_gpr(vcpu, get_rs(inst)),
- kvmppc_get_gpr(vcpu, get_rb(inst)));
+ kvmppc_get_gpr(vcpu, rs),
+ kvmppc_get_gpr(vcpu, rb));
break;
case OP_31_XOP_SLBIE:
if (!vcpu->arch.mmu.slbie)
return EMULATE_FAIL;
vcpu->arch.mmu.slbie(vcpu,
- kvmppc_get_gpr(vcpu, get_rb(inst)));
+ kvmppc_get_gpr(vcpu, rb));
break;
case OP_31_XOP_SLBIA:
if (!vcpu->arch.mmu.slbia)
@@ -191,22 +196,22 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (!vcpu->arch.mmu.slbmfee) {
emulated = EMULATE_FAIL;
} else {
- ulong t, rb;
+ ulong t, rb_val;
- rb = kvmppc_get_gpr(vcpu, get_rb(inst));
- t = vcpu->arch.mmu.slbmfee(vcpu, rb);
- kvmppc_set_gpr(vcpu, get_rt(inst), t);
+ rb_val = kvmppc_get_gpr(vcpu, rb);
+ t = vcpu->arch.mmu.slbmfee(vcpu, rb_val);
+ kvmppc_set_gpr(vcpu, rt, t);
}
break;
case OP_31_XOP_SLBMFEV:
if (!vcpu->arch.mmu.slbmfev) {
emulated = EMULATE_FAIL;
} else {
- ulong t, rb;
+ ulong t, rb_val;
- rb = kvmppc_get_gpr(vcpu, get_rb(inst));
- t = vcpu->arch.mmu.slbmfev(vcpu, rb);
- kvmppc_set_gpr(vcpu, get_rt(inst), t);
+ rb_val = kvmppc_get_gpr(vcpu, rb);
+ t = vcpu->arch.mmu.slbmfev(vcpu, rb_val);
+ kvmppc_set_gpr(vcpu, rt, t);
}
break;
case OP_31_XOP_DCBA:
@@ -214,17 +219,17 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case OP_31_XOP_DCBZ:
{
- ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst));
- ulong ra = 0;
+ ulong rb_val = kvmppc_get_gpr(vcpu, rb);
+ ulong ra_val = 0;
ulong addr, vaddr;
u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
u32 dsisr;
int r;
- if (get_ra(inst))
- ra = kvmppc_get_gpr(vcpu, get_ra(inst));
+ if (ra)
+ ra_val = kvmppc_get_gpr(vcpu, ra);
- addr = (ra + rb) & ~31ULL;
+ addr = (ra_val + rb_val) & ~31ULL;
if (!(vcpu->arch.shared->msr & MSR_SF))
addr &= 0xffffffff;
vaddr = addr;
@@ -313,10 +318,9 @@ static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn)
return bat;
}
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
int emulated = EMULATE_DONE;
- ulong spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
case SPRN_SDR1:
@@ -428,7 +432,7 @@ unprivileged:
return emulated;
}
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
int emulated = EMULATE_DONE;
@@ -441,46 +445,46 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn);
if (sprn % 2)
- kvmppc_set_gpr(vcpu, rt, bat->raw >> 32);
+ *spr_val = bat->raw >> 32;
else
- kvmppc_set_gpr(vcpu, rt, bat->raw);
+ *spr_val = bat->raw;
break;
}
case SPRN_SDR1:
if (!spr_allowed(vcpu, PRIV_HYPER))
goto unprivileged;
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
+ *spr_val = to_book3s(vcpu)->sdr1;
break;
case SPRN_DSISR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dsisr);
+ *spr_val = vcpu->arch.shared->dsisr;
break;
case SPRN_DAR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar);
+ *spr_val = vcpu->arch.shared->dar;
break;
case SPRN_HIOR:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior);
+ *spr_val = to_book3s(vcpu)->hior;
break;
case SPRN_HID0:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]);
+ *spr_val = to_book3s(vcpu)->hid[0];
break;
case SPRN_HID1:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]);
+ *spr_val = to_book3s(vcpu)->hid[1];
break;
case SPRN_HID2:
case SPRN_HID2_GEKKO:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]);
+ *spr_val = to_book3s(vcpu)->hid[2];
break;
case SPRN_HID4:
case SPRN_HID4_GEKKO:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]);
+ *spr_val = to_book3s(vcpu)->hid[4];
break;
case SPRN_HID5:
- kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
+ *spr_val = to_book3s(vcpu)->hid[5];
break;
case SPRN_CFAR:
case SPRN_PURR:
- kvmppc_set_gpr(vcpu, rt, 0);
+ *spr_val = 0;
break;
case SPRN_GQR0:
case SPRN_GQR1:
@@ -490,8 +494,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
case SPRN_GQR5:
case SPRN_GQR6:
case SPRN_GQR7:
- kvmppc_set_gpr(vcpu, rt,
- to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]);
+ *spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0];
break;
case SPRN_THRM1:
case SPRN_THRM2:
@@ -506,7 +509,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
case SPRN_PMC3_GEKKO:
case SPRN_PMC4_GEKKO:
case SPRN_WPAR_GEKKO:
- kvmppc_set_gpr(vcpu, rt, 0);
+ *spr_val = 0;
break;
default:
unprivileged:
@@ -565,23 +568,22 @@ u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst)
ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
{
ulong dar = 0;
- ulong ra;
+ ulong ra = get_ra(inst);
+ ulong rb = get_rb(inst);
switch (get_op(inst)) {
case OP_LFS:
case OP_LFD:
case OP_STFD:
case OP_STFS:
- ra = get_ra(inst);
if (ra)
dar = kvmppc_get_gpr(vcpu, ra);
dar += (s32)((s16)inst);
break;
case 31:
- ra = get_ra(inst);
if (ra)
dar = kvmppc_get_gpr(vcpu, ra);
- dar += kvmppc_get_gpr(vcpu, get_rb(inst));
+ dar += kvmppc_get_gpr(vcpu, rb);
break;
default:
printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 108d1f580177..c6af1d623839 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -60,12 +60,20 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu);
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
local_paca->kvm_hstate.kvm_vcpu = vcpu;
- local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
+ local_paca->kvm_hstate.kvm_vcore = vc;
+ if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+ vc->stolen_tb += mftb() - vc->preempt_tb;
}
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+ vc->preempt_tb = mftb();
}
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
@@ -134,6 +142,22 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
vpa->yield_count = 1;
}
+/* Length for a per-processor buffer is passed in at offset 4 in the buffer */
+struct reg_vpa {
+ u32 dummy;
+ union {
+ u16 hword;
+ u32 word;
+ } length;
+};
+
+static int vpa_is_registered(struct kvmppc_vpa *vpap)
+{
+ if (vpap->update_pending)
+ return vpap->next_gpa != 0;
+ return vpap->pinned_addr != NULL;
+}
+
static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
unsigned long flags,
unsigned long vcpuid, unsigned long vpa)
@@ -142,88 +166,182 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
unsigned long len, nb;
void *va;
struct kvm_vcpu *tvcpu;
- int err = H_PARAMETER;
+ int err;
+ int subfunc;
+ struct kvmppc_vpa *vpap;
tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
if (!tvcpu)
return H_PARAMETER;
- flags >>= 63 - 18;
- flags &= 7;
- if (flags == 0 || flags == 4)
- return H_PARAMETER;
- if (flags < 4) {
- if (vpa & 0x7f)
+ subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
+ if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
+ subfunc == H_VPA_REG_SLB) {
+ /* Registering new area - address must be cache-line aligned */
+ if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
return H_PARAMETER;
- if (flags >= 2 && !tvcpu->arch.vpa)
- return H_RESOURCE;
- /* registering new area; convert logical addr to real */
+
+ /* convert logical addr to kernel addr and read length */
va = kvmppc_pin_guest_page(kvm, vpa, &nb);
if (va == NULL)
return H_PARAMETER;
- if (flags <= 1)
- len = *(unsigned short *)(va + 4);
+ if (subfunc == H_VPA_REG_VPA)
+ len = ((struct reg_vpa *)va)->length.hword;
else
- len = *(unsigned int *)(va + 4);
- if (len > nb)
- goto out_unpin;
- switch (flags) {
- case 1: /* register VPA */
- if (len < 640)
- goto out_unpin;
- if (tvcpu->arch.vpa)
- kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa);
- tvcpu->arch.vpa = va;
- init_vpa(vcpu, va);
- break;
- case 2: /* register DTL */
- if (len < 48)
- goto out_unpin;
- len -= len % 48;
- if (tvcpu->arch.dtl)
- kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl);
- tvcpu->arch.dtl = va;
- tvcpu->arch.dtl_end = va + len;
+ len = ((struct reg_vpa *)va)->length.word;
+ kvmppc_unpin_guest_page(kvm, va);
+
+ /* Check length */
+ if (len > nb || len < sizeof(struct reg_vpa))
+ return H_PARAMETER;
+ } else {
+ vpa = 0;
+ len = 0;
+ }
+
+ err = H_PARAMETER;
+ vpap = NULL;
+ spin_lock(&tvcpu->arch.vpa_update_lock);
+
+ switch (subfunc) {
+ case H_VPA_REG_VPA: /* register VPA */
+ if (len < sizeof(struct lppaca))
break;
- case 3: /* register SLB shadow buffer */
- if (len < 16)
- goto out_unpin;
- if (tvcpu->arch.slb_shadow)
- kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow);
- tvcpu->arch.slb_shadow = va;
+ vpap = &tvcpu->arch.vpa;
+ err = 0;
+ break;
+
+ case H_VPA_REG_DTL: /* register DTL */
+ if (len < sizeof(struct dtl_entry))
break;
- }
- } else {
- switch (flags) {
- case 5: /* unregister VPA */
- if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
- return H_RESOURCE;
- if (!tvcpu->arch.vpa)
- break;
- kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa);
- tvcpu->arch.vpa = NULL;
+ len -= len % sizeof(struct dtl_entry);
+
+ /* Check that they have previously registered a VPA */
+ err = H_RESOURCE;
+ if (!vpa_is_registered(&tvcpu->arch.vpa))
break;
- case 6: /* unregister DTL */
- if (!tvcpu->arch.dtl)
- break;
- kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl);
- tvcpu->arch.dtl = NULL;
+
+ vpap = &tvcpu->arch.dtl;
+ err = 0;
+ break;
+
+ case H_VPA_REG_SLB: /* register SLB shadow buffer */
+ /* Check that they have previously registered a VPA */
+ err = H_RESOURCE;
+ if (!vpa_is_registered(&tvcpu->arch.vpa))
break;
- case 7: /* unregister SLB shadow buffer */
- if (!tvcpu->arch.slb_shadow)
- break;
- kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow);
- tvcpu->arch.slb_shadow = NULL;
+
+ vpap = &tvcpu->arch.slb_shadow;
+ err = 0;
+ break;
+
+ case H_VPA_DEREG_VPA: /* deregister VPA */
+ /* Check they don't still have a DTL or SLB buf registered */
+ err = H_RESOURCE;
+ if (vpa_is_registered(&tvcpu->arch.dtl) ||
+ vpa_is_registered(&tvcpu->arch.slb_shadow))
break;
- }
+
+ vpap = &tvcpu->arch.vpa;
+ err = 0;
+ break;
+
+ case H_VPA_DEREG_DTL: /* deregister DTL */
+ vpap = &tvcpu->arch.dtl;
+ err = 0;
+ break;
+
+ case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */
+ vpap = &tvcpu->arch.slb_shadow;
+ err = 0;
+ break;
+ }
+
+ if (vpap) {
+ vpap->next_gpa = vpa;
+ vpap->len = len;
+ vpap->update_pending = 1;
}
- return H_SUCCESS;
- out_unpin:
- kvmppc_unpin_guest_page(kvm, va);
+ spin_unlock(&tvcpu->arch.vpa_update_lock);
+
return err;
}
+static void kvmppc_update_vpa(struct kvm *kvm, struct kvmppc_vpa *vpap)
+{
+ void *va;
+ unsigned long nb;
+
+ vpap->update_pending = 0;
+ va = NULL;
+ if (vpap->next_gpa) {
+ va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
+ if (nb < vpap->len) {
+ /*
+ * If it's now too short, it must be that userspace
+ * has changed the mappings underlying guest memory,
+ * so unregister the region.
+ */
+ kvmppc_unpin_guest_page(kvm, va);
+ va = NULL;
+ }
+ }
+ if (vpap->pinned_addr)
+ kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
+ vpap->pinned_addr = va;
+ if (va)
+ vpap->pinned_end = va + vpap->len;
+}
+
+static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ if (vcpu->arch.vpa.update_pending) {
+ kvmppc_update_vpa(kvm, &vcpu->arch.vpa);
+ init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
+ }
+ if (vcpu->arch.dtl.update_pending) {
+ kvmppc_update_vpa(kvm, &vcpu->arch.dtl);
+ vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
+ vcpu->arch.dtl_index = 0;
+ }
+ if (vcpu->arch.slb_shadow.update_pending)
+ kvmppc_update_vpa(kvm, &vcpu->arch.slb_shadow);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+}
+
+static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+ struct kvmppc_vcore *vc)
+{
+ struct dtl_entry *dt;
+ struct lppaca *vpa;
+ unsigned long old_stolen;
+
+ dt = vcpu->arch.dtl_ptr;
+ vpa = vcpu->arch.vpa.pinned_addr;
+ old_stolen = vcpu->arch.stolen_logged;
+ vcpu->arch.stolen_logged = vc->stolen_tb;
+ if (!dt || !vpa)
+ return;
+ memset(dt, 0, sizeof(struct dtl_entry));
+ dt->dispatch_reason = 7;
+ dt->processor_id = vc->pcpu + vcpu->arch.ptid;
+ dt->timebase = mftb();
+ dt->enqueue_to_dispatch_time = vc->stolen_tb - old_stolen;
+ dt->srr0 = kvmppc_get_pc(vcpu);
+ dt->srr1 = vcpu->arch.shregs.msr;
+ ++dt;
+ if (dt == vcpu->arch.dtl.pinned_end)
+ dt = vcpu->arch.dtl.pinned_addr;
+ vcpu->arch.dtl_ptr = dt;
+ /* order writing *dt vs. writing vpa->dtl_idx */
+ smp_wmb();
+ vpa->dtl_idx = ++vcpu->arch.dtl_index;
+}
+
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
{
unsigned long req = kvmppc_get_gpr(vcpu, 3);
@@ -468,6 +586,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
/* default to host PVR, since we can't spoof it */
vcpu->arch.pvr = mfspr(SPRN_PVR);
kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+ spin_lock_init(&vcpu->arch.vpa_update_lock);
kvmppc_mmu_book3s_hv_init(vcpu);
@@ -486,6 +605,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock);
init_waitqueue_head(&vcore->wq);
+ vcore->preempt_tb = mftb();
}
kvm->arch.vcores[core] = vcore;
}
@@ -498,6 +618,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
++vcore->num_threads;
spin_unlock(&vcore->lock);
vcpu->arch.vcore = vcore;
+ vcpu->arch.stolen_logged = vcore->stolen_tb;
vcpu->arch.cpu_type = KVM_CPU_3S_64;
kvmppc_sanity_check(vcpu);
@@ -512,12 +633,14 @@ out:
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.dtl)
- kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl);
- if (vcpu->arch.slb_shadow)
- kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow);
- if (vcpu->arch.vpa)
- kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa);
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ if (vcpu->arch.dtl.pinned_addr)
+ kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
+ if (vcpu->arch.slb_shadow.pinned_addr)
+ kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
+ if (vcpu->arch.vpa.pinned_addr)
+ kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
@@ -569,6 +692,45 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
list_del(&vcpu->arch.run_list);
}
+static int kvmppc_grab_hwthread(int cpu)
+{
+ struct paca_struct *tpaca;
+ long timeout = 1000;
+
+ tpaca = &paca[cpu];
+
+ /* Ensure the thread won't go into the kernel if it wakes */
+ tpaca->kvm_hstate.hwthread_req = 1;
+
+ /*
+ * If the thread is already executing in the kernel (e.g. handling
+ * a stray interrupt), wait for it to get back to nap mode.
+ * The smp_mb() is to ensure that our setting of hwthread_req
+ * is visible before we look at hwthread_state, so if this
+ * races with the code at system_reset_pSeries and the thread
+ * misses our setting of hwthread_req, we are sure to see its
+ * setting of hwthread_state, and vice versa.
+ */
+ smp_mb();
+ while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
+ if (--timeout <= 0) {
+ pr_err("KVM: couldn't grab cpu %d\n", cpu);
+ return -EBUSY;
+ }
+ udelay(1);
+ }
+ return 0;
+}
+
+static void kvmppc_release_hwthread(int cpu)
+{
+ struct paca_struct *tpaca;
+
+ tpaca = &paca[cpu];
+ tpaca->kvm_hstate.hwthread_req = 0;
+ tpaca->kvm_hstate.kvm_vcpu = NULL;
+}
+
static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
{
int cpu;
@@ -588,8 +750,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
smp_wmb();
#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
if (vcpu->arch.ptid) {
- tpaca->cpu_start = 0x80;
- wmb();
+ kvmppc_grab_hwthread(cpu);
xics_wake_cpu(cpu);
++vc->n_woken;
}
@@ -639,7 +800,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
struct kvm_vcpu *vcpu, *vcpu0, *vnext;
long ret;
u64 now;
- int ptid;
+ int ptid, i;
/* don't start if any threads have a signal pending */
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
@@ -681,17 +842,29 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
vc->nap_count = 0;
vc->entry_exit_count = 0;
vc->vcore_state = VCORE_RUNNING;
+ vc->stolen_tb += mftb() - vc->preempt_tb;
vc->in_guest = 0;
vc->pcpu = smp_processor_id();
vc->napping_threads = 0;
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
kvmppc_start_thread(vcpu);
+ if (vcpu->arch.vpa.update_pending ||
+ vcpu->arch.slb_shadow.update_pending ||
+ vcpu->arch.dtl.update_pending)
+ kvmppc_update_vpas(vcpu);
+ kvmppc_create_dtl_entry(vcpu, vc);
+ }
+ /* Grab any remaining hw threads so they can't go into the kernel */
+ for (i = ptid; i < threads_per_core; ++i)
+ kvmppc_grab_hwthread(vc->pcpu + i);
preempt_disable();
spin_unlock(&vc->lock);
kvm_guest_enter();
__kvmppc_vcore_entry(NULL, vcpu0);
+ for (i = 0; i < threads_per_core; ++i)
+ kvmppc_release_hwthread(vc->pcpu + i);
spin_lock(&vc->lock);
/* disable sending of IPIs on virtual external irqs */
@@ -737,6 +910,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
spin_lock(&vc->lock);
out:
vc->vcore_state = VCORE_INACTIVE;
+ vc->preempt_tb = mftb();
list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
arch.run_list) {
if (vcpu->arch.ret != RESUME_GUEST) {
@@ -835,6 +1009,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
spin_lock(&vc->lock);
continue;
}
+ vc->runner = vcpu;
n_ceded = 0;
list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
n_ceded += v->arch.ceded;
@@ -854,6 +1029,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
wake_up(&v->arch.cpu_run);
}
}
+ vc->runner = NULL;
}
if (signal_pending(current)) {
@@ -917,115 +1093,6 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
return r;
}
-static long kvmppc_stt_npages(unsigned long window_size)
-{
- return ALIGN((window_size >> SPAPR_TCE_SHIFT)
- * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
-}
-
-static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
-{
- struct kvm *kvm = stt->kvm;
- int i;
-
- mutex_lock(&kvm->lock);
- list_del(&stt->list);
- for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
- __free_page(stt->pages[i]);
- kfree(stt);
- mutex_unlock(&kvm->lock);
-
- kvm_put_kvm(kvm);
-}
-
-static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
- struct page *page;
-
- if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
- return VM_FAULT_SIGBUS;
-
- page = stt->pages[vmf->pgoff];
- get_page(page);
- vmf->page = page;
- return 0;
-}
-
-static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
- .fault = kvm_spapr_tce_fault,
-};
-
-static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
-{
- vma->vm_ops = &kvm_spapr_tce_vm_ops;
- return 0;
-}
-
-static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
-{
- struct kvmppc_spapr_tce_table *stt = filp->private_data;
-
- release_spapr_tce_table(stt);
- return 0;
-}
-
-static struct file_operations kvm_spapr_tce_fops = {
- .mmap = kvm_spapr_tce_mmap,
- .release = kvm_spapr_tce_release,
-};
-
-long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
- struct kvm_create_spapr_tce *args)
-{
- struct kvmppc_spapr_tce_table *stt = NULL;
- long npages;
- int ret = -ENOMEM;
- int i;
-
- /* Check this LIOBN hasn't been previously allocated */
- list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
- if (stt->liobn == args->liobn)
- return -EBUSY;
- }
-
- npages = kvmppc_stt_npages(args->window_size);
-
- stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *),
- GFP_KERNEL);
- if (!stt)
- goto fail;
-
- stt->liobn = args->liobn;
- stt->window_size = args->window_size;
- stt->kvm = kvm;
-
- for (i = 0; i < npages; i++) {
- stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!stt->pages[i])
- goto fail;
- }
-
- kvm_get_kvm(kvm);
-
- mutex_lock(&kvm->lock);
- list_add(&stt->list, &kvm->arch.spapr_tce_tables);
-
- mutex_unlock(&kvm->lock);
-
- return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
- stt, O_RDWR);
-
-fail:
- if (stt) {
- for (i = 0; i < npages; i++)
- if (stt->pages[i])
- __free_page(stt->pages[i]);
-
- kfree(stt);
- }
- return ret;
-}
/* Work out RMLS (real mode limit selector) field value for a given RMA size.
Assumes POWER7 or PPC970. */
@@ -1108,6 +1175,38 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
return fd;
}
+static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
+ int linux_psize)
+{
+ struct mmu_psize_def *def = &mmu_psize_defs[linux_psize];
+
+ if (!def->shift)
+ return;
+ (*sps)->page_shift = def->shift;
+ (*sps)->slb_enc = def->sllp;
+ (*sps)->enc[0].page_shift = def->shift;
+ (*sps)->enc[0].pte_enc = def->penc;
+ (*sps)++;
+}
+
+int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+{
+ struct kvm_ppc_one_seg_page_size *sps;
+
+ info->flags = KVM_PPC_PAGE_SIZES_REAL;
+ if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+ info->flags |= KVM_PPC_1T_SEGMENTS;
+ info->slb_size = mmu_slb_size;
+
+ /* We only support these sizes for now, and no muti-size segments */
+ sps = &info->sps[0];
+ kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K);
+ kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K);
+ kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M);
+
+ return 0;
+}
+
/*
* Get (and clear) the dirty memory log for a memory slot.
*/
@@ -1404,12 +1503,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return EMULATE_FAIL;
}
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
return EMULATE_FAIL;
}
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
return EMULATE_FAIL;
}
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index d3fb4df02c41..84035a528c80 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -68,19 +68,24 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
rotldi r10,r10,16
mtmsrd r10,1
- /* Save host PMU registers and load guest PMU registers */
+ /* Save host PMU registers */
/* R4 is live here (vcpu pointer) but not r3 or r5 */
li r3, 1
sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
mfspr r7, SPRN_MMCR0 /* save MMCR0 */
mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
+ mfspr r6, SPRN_MMCRA
+BEGIN_FTR_SECTION
+ /* On P7, clear MMCRA in order to disable SDAR updates */
+ li r5, 0
+ mtspr SPRN_MMCRA, r5
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
isync
ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
lbz r5, LPPACA_PMCINUSE(r3)
cmpwi r5, 0
beq 31f /* skip if not */
mfspr r5, SPRN_MMCR1
- mfspr r6, SPRN_MMCRA
std r7, HSTATE_MMCR(r13)
std r5, HSTATE_MMCR + 8(r13)
std r6, HSTATE_MMCR + 16(r13)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b70bf22a3ff3..a84aafce2a12 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -26,6 +26,7 @@
#include <asm/hvcall.h>
#include <asm/asm-offsets.h>
#include <asm/exception-64s.h>
+#include <asm/kvm_book3s_asm.h>
/*****************************************************************************
* *
@@ -82,6 +83,7 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
#define XICS_XIRR 4
#define XICS_QIRR 0xc
+#define XICS_IPI 2 /* interrupt source # for IPIs */
/*
* We come in here when wakened from nap mode on a secondary hw thread.
@@ -94,26 +96,54 @@ kvm_start_guest:
subi r1,r1,STACK_FRAME_OVERHEAD
ld r2,PACATOC(r13)
- /* were we napping due to cede? */
- lbz r0,HSTATE_NAPPING(r13)
- cmpwi r0,0
- bne kvm_end_cede
+ li r0,KVM_HWTHREAD_IN_KVM
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* get vcpu pointer */
- ld r4, HSTATE_KVM_VCPU(r13)
+ /* NV GPR values from power7_idle() will no longer be valid */
+ li r0,1
+ stb r0,PACA_NAPSTATELOST(r13)
- /* We got here with an IPI; clear it */
- ld r5, HSTATE_XICS_PHYS(r13)
- li r0, 0xff
- li r6, XICS_QIRR
- li r7, XICS_XIRR
- lwzcix r8, r5, r7 /* ack the interrupt */
+ /* get vcpu pointer, NULL if we have no vcpu to run */
+ ld r4,HSTATE_KVM_VCPU(r13)
+ cmpdi cr1,r4,0
+
+ /* Check the wake reason in SRR1 to see why we got here */
+ mfspr r3,SPRN_SRR1
+ rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
+ cmpwi r3,4 /* was it an external interrupt? */
+ bne 27f
+
+ /*
+ * External interrupt - for now assume it is an IPI, since we
+ * should never get any other interrupts sent to offline threads.
+ * Only do this for secondary threads.
+ */
+ beq cr1,25f
+ lwz r3,VCPU_PTID(r4)
+ cmpwi r3,0
+ beq 27f
+25: ld r5,HSTATE_XICS_PHYS(r13)
+ li r0,0xff
+ li r6,XICS_QIRR
+ li r7,XICS_XIRR
+ lwzcix r8,r5,r7 /* get and ack the interrupt */
sync
- stbcix r0, r5, r6 /* clear it */
- stwcix r8, r5, r7 /* EOI it */
+ clrldi. r9,r8,40 /* get interrupt source ID. */
+ beq 27f /* none there? */
+ cmpwi r9,XICS_IPI
+ bne 26f
+ stbcix r0,r5,r6 /* clear IPI */
+26: stwcix r8,r5,r7 /* EOI the interrupt */
- /* NV GPR values from power7_idle() will no longer be valid */
- stb r0, PACA_NAPSTATELOST(r13)
+27: /* XXX should handle hypervisor maintenance interrupts etc. here */
+
+ /* if we have no vcpu to run, go back to sleep */
+ beq cr1,kvm_no_guest
+
+ /* were we napping due to cede? */
+ lbz r0,HSTATE_NAPPING(r13)
+ cmpwi r0,0
+ bne kvm_end_cede
.global kvmppc_hv_entry
kvmppc_hv_entry:
@@ -129,24 +159,15 @@ kvmppc_hv_entry:
mflr r0
std r0, HSTATE_VMHANDLER(r13)
- ld r14, VCPU_GPR(r14)(r4)
- ld r15, VCPU_GPR(r15)(r4)
- ld r16, VCPU_GPR(r16)(r4)
- ld r17, VCPU_GPR(r17)(r4)
- ld r18, VCPU_GPR(r18)(r4)
- ld r19, VCPU_GPR(r19)(r4)
- ld r20, VCPU_GPR(r20)(r4)
- ld r21, VCPU_GPR(r21)(r4)
- ld r22, VCPU_GPR(r22)(r4)
- ld r23, VCPU_GPR(r23)(r4)
- ld r24, VCPU_GPR(r24)(r4)
- ld r25, VCPU_GPR(r25)(r4)
- ld r26, VCPU_GPR(r26)(r4)
- ld r27, VCPU_GPR(r27)(r4)
- ld r28, VCPU_GPR(r28)(r4)
- ld r29, VCPU_GPR(r29)(r4)
- ld r30, VCPU_GPR(r30)(r4)
- ld r31, VCPU_GPR(r31)(r4)
+ /* Set partition DABR */
+ /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
+ li r5,3
+ ld r6,VCPU_DABR(r4)
+ mtspr SPRN_DABRX,r5
+ mtspr SPRN_DABR,r6
+BEGIN_FTR_SECTION
+ isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* Load guest PMU registers */
/* R4 is live here (vcpu pointer) */
@@ -185,6 +206,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
/* Load up FP, VMX and VSX registers */
bl kvmppc_load_fp
+ ld r14, VCPU_GPR(r14)(r4)
+ ld r15, VCPU_GPR(r15)(r4)
+ ld r16, VCPU_GPR(r16)(r4)
+ ld r17, VCPU_GPR(r17)(r4)
+ ld r18, VCPU_GPR(r18)(r4)
+ ld r19, VCPU_GPR(r19)(r4)
+ ld r20, VCPU_GPR(r20)(r4)
+ ld r21, VCPU_GPR(r21)(r4)
+ ld r22, VCPU_GPR(r22)(r4)
+ ld r23, VCPU_GPR(r23)(r4)
+ ld r24, VCPU_GPR(r24)(r4)
+ ld r25, VCPU_GPR(r25)(r4)
+ ld r26, VCPU_GPR(r26)(r4)
+ ld r27, VCPU_GPR(r27)(r4)
+ ld r28, VCPU_GPR(r28)(r4)
+ ld r29, VCPU_GPR(r29)(r4)
+ ld r30, VCPU_GPR(r30)(r4)
+ ld r31, VCPU_GPR(r31)(r4)
+
BEGIN_FTR_SECTION
/* Switch DSCR to guest value */
ld r5, VCPU_DSCR(r4)
@@ -226,12 +266,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
mtspr SPRN_DAR, r5
mtspr SPRN_DSISR, r6
- /* Set partition DABR */
- li r5,3
- ld r6,VCPU_DABR(r4)
- mtspr SPRN_DABRX,r5
- mtspr SPRN_DABR,r6
-
BEGIN_FTR_SECTION
/* Restore AMR and UAMOR, set AMOR to all 1s */
ld r5,VCPU_AMR(r4)
@@ -925,12 +959,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_AMR,r6
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
- /* Restore host DABR and DABRX */
- ld r5,HSTATE_DABR(r13)
- li r6,7
- mtspr SPRN_DABR,r5
- mtspr SPRN_DABRX,r6
-
/* Switch DSCR back to host value */
BEGIN_FTR_SECTION
mfspr r8, SPRN_DSCR
@@ -969,6 +997,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
std r5, VCPU_SPRG2(r9)
std r6, VCPU_SPRG3(r9)
+ /* save FP state */
+ mr r3, r9
+ bl .kvmppc_save_fp
+
/* Increment yield count if they have a VPA */
ld r8, VCPU_VPA(r9) /* do they have a VPA? */
cmpdi r8, 0
@@ -983,6 +1015,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
mfspr r4, SPRN_MMCR0 /* save MMCR0 */
mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
+ mfspr r6, SPRN_MMCRA
+BEGIN_FTR_SECTION
+ /* On P7, clear MMCRA in order to disable SDAR updates */
+ li r7, 0
+ mtspr SPRN_MMCRA, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
isync
beq 21f /* if no VPA, save PMU stuff anyway */
lbz r7, LPPACA_PMCINUSE(r8)
@@ -991,7 +1029,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
b 22f
21: mfspr r5, SPRN_MMCR1
- mfspr r6, SPRN_MMCRA
std r4, VCPU_MMCR(r9)
std r5, VCPU_MMCR + 8(r9)
std r6, VCPU_MMCR + 16(r9)
@@ -1016,17 +1053,20 @@ BEGIN_FTR_SECTION
stw r11, VCPU_PMC + 28(r9)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
22:
- /* save FP state */
- mr r3, r9
- bl .kvmppc_save_fp
/* Secondary threads go off to take a nap on POWER7 */
BEGIN_FTR_SECTION
- lwz r0,VCPU_PTID(r3)
+ lwz r0,VCPU_PTID(r9)
cmpwi r0,0
bne secondary_nap
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+ /* Restore host DABR and DABRX */
+ ld r5,HSTATE_DABR(r13)
+ li r6,7
+ mtspr SPRN_DABR,r5
+ mtspr SPRN_DABRX,r6
+
/*
* Reload DEC. HDEC interrupts were disabled when
* we reloaded the host's LPCR value.
@@ -1363,7 +1403,12 @@ bounce_ext_interrupt:
_GLOBAL(kvmppc_h_set_dabr)
std r4,VCPU_DABR(r3)
- mtspr SPRN_DABR,r4
+ /* Work around P7 bug where DABR can get corrupted on mtspr */
+1: mtspr SPRN_DABR,r4
+ mfspr r5, SPRN_DABR
+ cmpd r4, r5
+ bne 1b
+ isync
li r3,0
blr
@@ -1445,8 +1490,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
* Take a nap until a decrementer or external interrupt occurs,
* with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR
*/
- li r0,0x80
- stb r0,PACAPROCSTART(r13)
+ li r0,1
+ stb r0,HSTATE_HWTHREAD_REQ(r13)
mfspr r5,SPRN_LPCR
ori r5,r5,LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR,r5
@@ -1463,26 +1508,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
kvm_end_cede:
/* Woken by external or decrementer interrupt */
ld r1, HSTATE_HOST_R1(r13)
- ld r2, PACATOC(r13)
- /* If we're a secondary thread and we got here by an IPI, ack it */
- ld r4,HSTATE_KVM_VCPU(r13)
- lwz r3,VCPU_PTID(r4)
- cmpwi r3,0
- beq 27f
- mfspr r3,SPRN_SRR1
- rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
- cmpwi r3,4 /* was it an external interrupt? */
- bne 27f
- ld r5, HSTATE_XICS_PHYS(r13)
- li r0,0xff
- li r6,XICS_QIRR
- li r7,XICS_XIRR
- lwzcix r8,r5,r7 /* ack the interrupt */
- sync
- stbcix r0,r5,r6 /* clear it */
- stwcix r8,r5,r7 /* EOI it */
-27:
/* load up FP state */
bl kvmppc_load_fp
@@ -1580,12 +1606,17 @@ secondary_nap:
stwcx. r3, 0, r4
bne 51b
+kvm_no_guest:
+ li r0, KVM_HWTHREAD_IN_NAP
+ stb r0, HSTATE_HWTHREAD_STATE(r13)
+ li r0, 0
+ std r0, HSTATE_KVM_VCPU(r13)
+
li r3, LPCR_PECE0
mfspr r4, SPRN_LPCR
rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR, r4
isync
- li r0, 0
std r0, HSTATE_SCRATCH0(r13)
ptesync
ld r0, HSTATE_SCRATCH0(r13)
@@ -1599,8 +1630,8 @@ secondary_nap:
* r3 = vcpu pointer
*/
_GLOBAL(kvmppc_save_fp)
- mfmsr r9
- ori r8,r9,MSR_FP
+ mfmsr r5
+ ori r8,r5,MSR_FP
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
oris r8,r8,MSR_VEC@h
@@ -1649,7 +1680,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
mfspr r6,SPRN_VRSAVE
stw r6,VCPU_VRSAVE(r3)
- mtmsrd r9
+ mtmsrd r5
isync
blr
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 7759053d391b..a1baec340f7e 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -120,6 +120,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
if (msr & MSR_POW) {
if (!vcpu->arch.pending_exceptions) {
kvm_vcpu_block(vcpu);
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
vcpu->stat.halt_wakeup++;
/* Unset POW bit after we woke up */
@@ -144,6 +145,21 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
}
}
+ /*
+ * When switching from 32 to 64-bit, we may have a stale 32-bit
+ * magic page around, we need to flush it. Typically 32-bit magic
+ * page will be instanciated when calling into RTAS. Note: We
+ * assume that such transition only happens while in kernel mode,
+ * ie, we never transition from user 32-bit to kernel 64-bit with
+ * a 32-bit magic page around.
+ */
+ if (vcpu->arch.magic_page_pa &&
+ !(old_msr & MSR_PR) && !(old_msr & MSR_SF) && (msr & MSR_SF)) {
+ /* going from RTAS to normal kernel code */
+ kvmppc_mmu_pte_flush(vcpu, (uint32_t)vcpu->arch.magic_page_pa,
+ ~0xFFFUL);
+ }
+
/* Preload FPU if it's enabled */
if (vcpu->arch.shared->msr & MSR_FP)
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
@@ -251,6 +267,9 @@ static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
ulong mp_pa = vcpu->arch.magic_page_pa;
+ if (!(vcpu->arch.shared->msr & MSR_SF))
+ mp_pa = (uint32_t)mp_pa;
+
if (unlikely(mp_pa) &&
unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
return 1;
@@ -351,6 +370,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* MMIO */
vcpu->stat.mmio_exits++;
vcpu->arch.paddr_accessed = pte.raddr;
+ vcpu->arch.vaddr_accessed = pte.eaddr;
r = kvmppc_emulate_mmio(run, vcpu);
if ( r == RESUME_HOST_NV )
r = RESUME_HOST;
@@ -528,6 +548,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
run->exit_reason = KVM_EXIT_UNKNOWN;
run->ready_for_interrupt_injection = 1;
+ /* We get here with MSR.EE=0, so enable it to be a nice citizen */
+ __hard_irq_enable();
+
trace_kvm_book3s_exit(exit_nr, vcpu);
preempt_enable();
kvm_resched(vcpu);
@@ -617,10 +640,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
/* We're good on these - the host merely wanted to get our attention */
case BOOK3S_INTERRUPT_DECREMENTER:
+ case BOOK3S_INTERRUPT_HV_DECREMENTER:
vcpu->stat.dec_exits++;
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_EXTERNAL:
+ case BOOK3S_INTERRUPT_EXTERNAL_LEVEL:
+ case BOOK3S_INTERRUPT_EXTERNAL_HV:
vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
@@ -628,6 +654,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_PROGRAM:
+ case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
{
enum emulation_result er;
struct kvmppc_book3s_shadow_vcpu *svcpu;
@@ -1131,6 +1158,31 @@ out:
return r;
}
+#ifdef CONFIG_PPC64
+int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+{
+ /* No flags */
+ info->flags = 0;
+
+ /* SLB is always 64 entries */
+ info->slb_size = 64;
+
+ /* Standard 4k base page size segment */
+ info->sps[0].page_shift = 12;
+ info->sps[0].slb_enc = 0;
+ info->sps[0].enc[0].page_shift = 12;
+ info->sps[0].enc[0].pte_enc = 0;
+
+ /* Standard 16M large page size segment */
+ info->sps[1].page_shift = 24;
+ info->sps[1].slb_enc = SLB_VSID_L;
+ info->sps[1].enc[0].page_shift = 24;
+ info->sps[1].enc[0].pte_enc = 0;
+
+ return 0;
+}
+#endif /* CONFIG_PPC64 */
+
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem)
{
@@ -1144,11 +1196,18 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
int kvmppc_core_init_vm(struct kvm *kvm)
{
+#ifdef CONFIG_PPC64
+ INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+#endif
+
return 0;
}
void kvmppc_core_destroy_vm(struct kvm *kvm)
{
+#ifdef CONFIG_PPC64
+ WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
+#endif
}
static int kvmppc_book3s_init(void)
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index b9589324797b..3ff9013d6e79 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -15,6 +15,8 @@
* published by the Free Software Foundation.
*/
+#include <linux/anon_inodes.h>
+
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
@@ -98,6 +100,83 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
+/* Request defs for kvmppc_h_pr_bulk_remove() */
+#define H_BULK_REMOVE_TYPE 0xc000000000000000ULL
+#define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL
+#define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL
+#define H_BULK_REMOVE_END 0xc000000000000000ULL
+#define H_BULK_REMOVE_CODE 0x3000000000000000ULL
+#define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL
+#define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL
+#define H_BULK_REMOVE_PARM 0x2000000000000000ULL
+#define H_BULK_REMOVE_HW 0x3000000000000000ULL
+#define H_BULK_REMOVE_RC 0x0c00000000000000ULL
+#define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL
+#define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL
+#define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL
+#define H_BULK_REMOVE_AVPN 0x0200000000000000ULL
+#define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL
+#define H_BULK_REMOVE_MAX_BATCH 4
+
+static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
+{
+ int i;
+ int paramnr = 4;
+ int ret = H_SUCCESS;
+
+ for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
+ unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i));
+ unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1);
+ unsigned long pteg, rb, flags;
+ unsigned long pte[2];
+ unsigned long v = 0;
+
+ if ((tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
+ break; /* Exit success */
+ } else if ((tsh & H_BULK_REMOVE_TYPE) !=
+ H_BULK_REMOVE_REQUEST) {
+ ret = H_PARAMETER;
+ break; /* Exit fail */
+ }
+
+ tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
+ tsh |= H_BULK_REMOVE_RESPONSE;
+
+ if ((tsh & H_BULK_REMOVE_ANDCOND) &&
+ (tsh & H_BULK_REMOVE_AVPN)) {
+ tsh |= H_BULK_REMOVE_PARM;
+ kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
+ ret = H_PARAMETER;
+ break; /* Exit fail */
+ }
+
+ pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
+ copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+
+ /* tsl = AVPN */
+ flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26;
+
+ if ((pte[0] & HPTE_V_VALID) == 0 ||
+ ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != tsl) ||
+ ((flags & H_ANDCOND) && (pte[0] & tsl) != 0)) {
+ tsh |= H_BULK_REMOVE_NOT_FOUND;
+ } else {
+ /* Splat the pteg in (userland) hpt */
+ copy_to_user((void __user *)pteg, &v, sizeof(v));
+
+ rb = compute_tlbie_rb(pte[0], pte[1],
+ tsh & H_BULK_REMOVE_PTEX);
+ vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+ tsh |= H_BULK_REMOVE_SUCCESS;
+ tsh |= (pte[1] & (HPTE_R_C | HPTE_R_R)) << 43;
+ }
+ kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
+ }
+ kvmppc_set_gpr(vcpu, 3, ret);
+
+ return EMULATE_DONE;
+}
+
static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
{
unsigned long flags = kvmppc_get_gpr(vcpu, 4);
@@ -134,6 +213,20 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
+static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
+{
+ unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+ unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+ unsigned long tce = kvmppc_get_gpr(vcpu, 6);
+ long rc;
+
+ rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce);
+ if (rc == H_TOO_HARD)
+ return EMULATE_FAIL;
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
{
switch (cmd) {
@@ -144,12 +237,12 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
case H_PROTECT:
return kvmppc_h_pr_protect(vcpu);
case H_BULK_REMOVE:
- /* We just flush all PTEs, so user space can
- handle the HPT modifications */
- kvmppc_mmu_pte_flush(vcpu, 0, 0);
- break;
+ return kvmppc_h_pr_bulk_remove(vcpu);
+ case H_PUT_TCE:
+ return kvmppc_h_pr_put_tce(vcpu);
case H_CEDE:
kvm_vcpu_block(vcpu);
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
vcpu->stat.halt_wakeup++;
return EMULATE_DONE;
}
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 6e6e9cef34a8..798491a268b3 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -128,24 +128,25 @@ no_dcbz32_on:
/* First clear RI in our current MSR value */
li r0, MSR_RI
andc r6, r6, r0
- MTMSR_EERI(r6)
- mtsrr0 r9
- mtsrr1 r4
PPC_LL r0, SVCPU_R0(r3)
PPC_LL r1, SVCPU_R1(r3)
PPC_LL r2, SVCPU_R2(r3)
- PPC_LL r4, SVCPU_R4(r3)
PPC_LL r5, SVCPU_R5(r3)
- PPC_LL r6, SVCPU_R6(r3)
PPC_LL r7, SVCPU_R7(r3)
PPC_LL r8, SVCPU_R8(r3)
- PPC_LL r9, SVCPU_R9(r3)
PPC_LL r10, SVCPU_R10(r3)
PPC_LL r11, SVCPU_R11(r3)
PPC_LL r12, SVCPU_R12(r3)
PPC_LL r13, SVCPU_R13(r3)
+ MTMSR_EERI(r6)
+ mtsrr0 r9
+ mtsrr1 r4
+
+ PPC_LL r4, SVCPU_R4(r3)
+ PPC_LL r6, SVCPU_R6(r3)
+ PPC_LL r9, SVCPU_R9(r3)
PPC_LL r3, (SVCPU_R3)(r3)
RFI
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ee9e1ee9c858..72f13f4a06e0 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -17,6 +17,8 @@
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
* Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ * Scott Wood <scottwood@freescale.com>
+ * Varun Sethi <varun.sethi@freescale.com>
*/
#include <linux/errno.h>
@@ -30,9 +32,12 @@
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
-#include "timing.h"
#include <asm/cacheflush.h>
+#include <asm/dbell.h>
+#include <asm/hw_irq.h>
+#include <asm/irq.h>
+#include "timing.h"
#include "booke.h"
unsigned long kvmppc_booke_handlers;
@@ -55,6 +60,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "dec", VCPU_STAT(dec_exits) },
{ "ext_intr", VCPU_STAT(ext_intr_exits) },
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
+ { "doorbell", VCPU_STAT(dbell_exits) },
+ { "guest doorbell", VCPU_STAT(gdbell_exits) },
{ NULL }
};
@@ -121,6 +128,10 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
{
u32 old_msr = vcpu->arch.shared->msr;
+#ifdef CONFIG_KVM_BOOKE_HV
+ new_msr |= MSR_GS;
+#endif
+
vcpu->arch.shared->msr = new_msr;
kvmppc_mmu_msr_notify(vcpu, old_msr);
@@ -195,17 +206,87 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
}
+static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GSRR0, srr0);
+ mtspr(SPRN_GSRR1, srr1);
+#else
+ vcpu->arch.shared->srr0 = srr0;
+ vcpu->arch.shared->srr1 = srr1;
+#endif
+}
+
+static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+ vcpu->arch.csrr0 = srr0;
+ vcpu->arch.csrr1 = srr1;
+}
+
+static void set_guest_dsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+ if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) {
+ vcpu->arch.dsrr0 = srr0;
+ vcpu->arch.dsrr1 = srr1;
+ } else {
+ set_guest_csrr(vcpu, srr0, srr1);
+ }
+}
+
+static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+ vcpu->arch.mcsrr0 = srr0;
+ vcpu->arch.mcsrr1 = srr1;
+}
+
+static unsigned long get_guest_dear(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ return mfspr(SPRN_GDEAR);
+#else
+ return vcpu->arch.shared->dar;
+#endif
+}
+
+static void set_guest_dear(struct kvm_vcpu *vcpu, unsigned long dear)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GDEAR, dear);
+#else
+ vcpu->arch.shared->dar = dear;
+#endif
+}
+
+static unsigned long get_guest_esr(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ return mfspr(SPRN_GESR);
+#else
+ return vcpu->arch.shared->esr;
+#endif
+}
+
+static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GESR, esr);
+#else
+ vcpu->arch.shared->esr = esr;
+#endif
+}
+
/* Deliver the interrupt of the corresponding priority, if possible. */
static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
unsigned int priority)
{
int allowed = 0;
- ulong uninitialized_var(msr_mask);
+ ulong msr_mask = 0;
bool update_esr = false, update_dear = false;
ulong crit_raw = vcpu->arch.shared->critical;
ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
bool crit;
bool keep_irq = false;
+ enum int_class int_class;
/* Truncate crit indicators in 32 bit mode */
if (!(vcpu->arch.shared->msr & MSR_SF)) {
@@ -241,46 +322,85 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
case BOOKE_IRQPRIO_AP_UNAVAIL:
case BOOKE_IRQPRIO_ALIGNMENT:
allowed = 1;
- msr_mask = MSR_CE|MSR_ME|MSR_DE;
+ msr_mask = MSR_CE | MSR_ME | MSR_DE;
+ int_class = INT_CLASS_NONCRIT;
break;
case BOOKE_IRQPRIO_CRITICAL:
- case BOOKE_IRQPRIO_WATCHDOG:
+ case BOOKE_IRQPRIO_DBELL_CRIT:
allowed = vcpu->arch.shared->msr & MSR_CE;
+ allowed = allowed && !crit;
msr_mask = MSR_ME;
+ int_class = INT_CLASS_CRIT;
break;
case BOOKE_IRQPRIO_MACHINE_CHECK:
allowed = vcpu->arch.shared->msr & MSR_ME;
- msr_mask = 0;
+ allowed = allowed && !crit;
+ int_class = INT_CLASS_MC;
break;
case BOOKE_IRQPRIO_DECREMENTER:
case BOOKE_IRQPRIO_FIT:
keep_irq = true;
/* fall through */
case BOOKE_IRQPRIO_EXTERNAL:
+ case BOOKE_IRQPRIO_DBELL:
allowed = vcpu->arch.shared->msr & MSR_EE;
allowed = allowed && !crit;
- msr_mask = MSR_CE|MSR_ME|MSR_DE;
+ msr_mask = MSR_CE | MSR_ME | MSR_DE;
+ int_class = INT_CLASS_NONCRIT;
break;
case BOOKE_IRQPRIO_DEBUG:
allowed = vcpu->arch.shared->msr & MSR_DE;
+ allowed = allowed && !crit;
msr_mask = MSR_ME;
+ int_class = INT_CLASS_CRIT;
break;
}
if (allowed) {
- vcpu->arch.shared->srr0 = vcpu->arch.pc;
- vcpu->arch.shared->srr1 = vcpu->arch.shared->msr;
+ switch (int_class) {
+ case INT_CLASS_NONCRIT:
+ set_guest_srr(vcpu, vcpu->arch.pc,
+ vcpu->arch.shared->msr);
+ break;
+ case INT_CLASS_CRIT:
+ set_guest_csrr(vcpu, vcpu->arch.pc,
+ vcpu->arch.shared->msr);
+ break;
+ case INT_CLASS_DBG:
+ set_guest_dsrr(vcpu, vcpu->arch.pc,
+ vcpu->arch.shared->msr);
+ break;
+ case INT_CLASS_MC:
+ set_guest_mcsrr(vcpu, vcpu->arch.pc,
+ vcpu->arch.shared->msr);
+ break;
+ }
+
vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
if (update_esr == true)
- vcpu->arch.shared->esr = vcpu->arch.queued_esr;
+ set_guest_esr(vcpu, vcpu->arch.queued_esr);
if (update_dear == true)
- vcpu->arch.shared->dar = vcpu->arch.queued_dear;
+ set_guest_dear(vcpu, vcpu->arch.queued_dear);
kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
if (!keep_irq)
clear_bit(priority, &vcpu->arch.pending_exceptions);
}
+#ifdef CONFIG_KVM_BOOKE_HV
+ /*
+ * If an interrupt is pending but masked, raise a guest doorbell
+ * so that we are notified when the guest enables the relevant
+ * MSR bit.
+ */
+ if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_EE)
+ kvmppc_set_pending_interrupt(vcpu, INT_CLASS_NONCRIT);
+ if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_CE)
+ kvmppc_set_pending_interrupt(vcpu, INT_CLASS_CRIT);
+ if (vcpu->arch.pending_exceptions & BOOKE_IRQPRIO_MACHINE_CHECK)
+ kvmppc_set_pending_interrupt(vcpu, INT_CLASS_MC);
+#endif
+
return allowed;
}
@@ -305,7 +425,7 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
}
priority = __ffs(*pending);
- while (priority <= BOOKE_IRQPRIO_MAX) {
+ while (priority < BOOKE_IRQPRIO_MAX) {
if (kvmppc_booke_irqprio_deliver(vcpu, priority))
break;
@@ -319,8 +439,9 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
}
/* Check pending exceptions and deliver one, if possible. */
-void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
+int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
{
+ int r = 0;
WARN_ON_ONCE(!irqs_disabled());
kvmppc_core_check_exceptions(vcpu);
@@ -328,16 +449,60 @@ void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
if (vcpu->arch.shared->msr & MSR_WE) {
local_irq_enable();
kvm_vcpu_block(vcpu);
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
local_irq_disable();
kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
- kvmppc_core_check_exceptions(vcpu);
+ r = 1;
};
+
+ return r;
+}
+
+/*
+ * Common checks before entering the guest world. Call with interrupts
+ * disabled.
+ *
+ * returns !0 if a signal is pending and check_signal is true
+ */
+static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
+{
+ int r = 0;
+
+ WARN_ON_ONCE(!irqs_disabled());
+ while (true) {
+ if (need_resched()) {
+ local_irq_enable();
+ cond_resched();
+ local_irq_disable();
+ continue;
+ }
+
+ if (signal_pending(current)) {
+ r = 1;
+ break;
+ }
+
+ if (kvmppc_core_prepare_to_enter(vcpu)) {
+ /* interrupts got enabled in between, so we
+ are back at square 1 */
+ continue;
+ }
+
+ break;
+ }
+
+ return r;
}
int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
int ret;
+#ifdef CONFIG_PPC_FPU
+ unsigned int fpscr;
+ int fpexc_mode;
+ u64 fpr[32];
+#endif
if (!vcpu->arch.sane) {
kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -345,17 +510,53 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
}
local_irq_disable();
-
- kvmppc_core_prepare_to_enter(vcpu);
-
- if (signal_pending(current)) {
+ if (kvmppc_prepare_to_enter(vcpu)) {
kvm_run->exit_reason = KVM_EXIT_INTR;
ret = -EINTR;
goto out;
}
kvm_guest_enter();
+
+#ifdef CONFIG_PPC_FPU
+ /* Save userspace FPU state in stack */
+ enable_kernel_fp();
+ memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
+ fpscr = current->thread.fpscr.val;
+ fpexc_mode = current->thread.fpexc_mode;
+
+ /* Restore guest FPU state to thread */
+ memcpy(current->thread.fpr, vcpu->arch.fpr, sizeof(vcpu->arch.fpr));
+ current->thread.fpscr.val = vcpu->arch.fpscr;
+
+ /*
+ * Since we can't trap on MSR_FP in GS-mode, we consider the guest
+ * as always using the FPU. Kernel usage of FP (via
+ * enable_kernel_fp()) in this thread must not occur while
+ * vcpu->fpu_active is set.
+ */
+ vcpu->fpu_active = 1;
+
+ kvmppc_load_guest_fp(vcpu);
+#endif
+
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+
+#ifdef CONFIG_PPC_FPU
+ kvmppc_save_guest_fp(vcpu);
+
+ vcpu->fpu_active = 0;
+
+ /* Save guest FPU state from thread */
+ memcpy(vcpu->arch.fpr, current->thread.fpr, sizeof(vcpu->arch.fpr));
+ vcpu->arch.fpscr = current->thread.fpscr.val;
+
+ /* Restore userspace FPU state from stack */
+ memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
+ current->thread.fpscr.val = fpscr;
+ current->thread.fpexc_mode = fpexc_mode;
+#endif
+
kvm_guest_exit();
out:
@@ -363,6 +564,84 @@ out:
return ret;
}
+static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ enum emulation_result er;
+
+ er = kvmppc_emulate_instruction(run, vcpu);
+ switch (er) {
+ case EMULATE_DONE:
+ /* don't overwrite subtypes, just account kvm_stats */
+ kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
+ /* Future optimization: only reload non-volatiles if
+ * they were actually modified by emulation. */
+ return RESUME_GUEST_NV;
+
+ case EMULATE_DO_DCR:
+ run->exit_reason = KVM_EXIT_DCR;
+ return RESUME_HOST;
+
+ case EMULATE_FAIL:
+ printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
+ __func__, vcpu->arch.pc, vcpu->arch.last_inst);
+ /* For debugging, encode the failing instruction and
+ * report it to userspace. */
+ run->hw.hardware_exit_reason = ~0ULL << 32;
+ run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
+ kvmppc_core_queue_program(vcpu, ESR_PIL);
+ return RESUME_HOST;
+
+ default:
+ BUG();
+ }
+}
+
+static void kvmppc_fill_pt_regs(struct pt_regs *regs)
+{
+ ulong r1, ip, msr, lr;
+
+ asm("mr %0, 1" : "=r"(r1));
+ asm("mflr %0" : "=r"(lr));
+ asm("mfmsr %0" : "=r"(msr));
+ asm("bl 1f; 1: mflr %0" : "=r"(ip));
+
+ memset(regs, 0, sizeof(*regs));
+ regs->gpr[1] = r1;
+ regs->nip = ip;
+ regs->msr = msr;
+ regs->link = lr;
+}
+
+static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
+ unsigned int exit_nr)
+{
+ struct pt_regs regs;
+
+ switch (exit_nr) {
+ case BOOKE_INTERRUPT_EXTERNAL:
+ kvmppc_fill_pt_regs(&regs);
+ do_IRQ(&regs);
+ break;
+ case BOOKE_INTERRUPT_DECREMENTER:
+ kvmppc_fill_pt_regs(&regs);
+ timer_interrupt(&regs);
+ break;
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64)
+ case BOOKE_INTERRUPT_DOORBELL:
+ kvmppc_fill_pt_regs(&regs);
+ doorbell_exception(&regs);
+ break;
+#endif
+ case BOOKE_INTERRUPT_MACHINE_CHECK:
+ /* FIXME */
+ break;
+ case BOOKE_INTERRUPT_PERFORMANCE_MONITOR:
+ kvmppc_fill_pt_regs(&regs);
+ performance_monitor_exception(&regs);
+ break;
+ }
+}
+
/**
* kvmppc_handle_exit
*
@@ -371,12 +650,14 @@ out:
int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int exit_nr)
{
- enum emulation_result er;
int r = RESUME_HOST;
/* update before a new last_exit_type is rewritten */
kvmppc_update_timing_stats(vcpu);
+ /* restart interrupts if they were meant for the host */
+ kvmppc_restart_interrupt(vcpu, exit_nr);
+
local_irq_enable();
run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -386,62 +667,74 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
case BOOKE_INTERRUPT_MACHINE_CHECK:
printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
kvmppc_dump_vcpu(vcpu);
+ /* For debugging, send invalid exit reason to user space */
+ run->hw.hardware_exit_reason = ~1ULL << 32;
+ run->hw.hardware_exit_reason |= mfspr(SPRN_MCSR);
r = RESUME_HOST;
break;
case BOOKE_INTERRUPT_EXTERNAL:
kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
- if (need_resched())
- cond_resched();
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_DECREMENTER:
- /* Since we switched IVPR back to the host's value, the host
- * handled this interrupt the moment we enabled interrupts.
- * Now we just offer it a chance to reschedule the guest. */
kvmppc_account_exit(vcpu, DEC_EXITS);
- if (need_resched())
- cond_resched();
r = RESUME_GUEST;
break;
+ case BOOKE_INTERRUPT_DOORBELL:
+ kvmppc_account_exit(vcpu, DBELL_EXITS);
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_GUEST_DBELL_CRIT:
+ kvmppc_account_exit(vcpu, GDBELL_EXITS);
+
+ /*
+ * We are here because there is a pending guest interrupt
+ * which could not be delivered as MSR_CE or MSR_ME was not
+ * set. Once we break from here we will retry delivery.
+ */
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_GUEST_DBELL:
+ kvmppc_account_exit(vcpu, GDBELL_EXITS);
+
+ /*
+ * We are here because there is a pending guest interrupt
+ * which could not be delivered as MSR_EE was not set. Once
+ * we break from here we will retry delivery.
+ */
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_PERFORMANCE_MONITOR:
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_HV_PRIV:
+ r = emulation_exit(run, vcpu);
+ break;
+
case BOOKE_INTERRUPT_PROGRAM:
- if (vcpu->arch.shared->msr & MSR_PR) {
- /* Program traps generated by user-level software must be handled
- * by the guest kernel. */
+ if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) {
+ /*
+ * Program traps generated by user-level software must
+ * be handled by the guest kernel.
+ *
+ * In GS mode, hypervisor privileged instructions trap
+ * on BOOKE_INTERRUPT_HV_PRIV, not here, so these are
+ * actual program interrupts, handled by the guest.
+ */
kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
r = RESUME_GUEST;
kvmppc_account_exit(vcpu, USR_PR_INST);
break;
}
- er = kvmppc_emulate_instruction(run, vcpu);
- switch (er) {
- case EMULATE_DONE:
- /* don't overwrite subtypes, just account kvm_stats */
- kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
- /* Future optimization: only reload non-volatiles if
- * they were actually modified by emulation. */
- r = RESUME_GUEST_NV;
- break;
- case EMULATE_DO_DCR:
- run->exit_reason = KVM_EXIT_DCR;
- r = RESUME_HOST;
- break;
- case EMULATE_FAIL:
- /* XXX Deliver Program interrupt to guest. */
- printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
- __func__, vcpu->arch.pc, vcpu->arch.last_inst);
- /* For debugging, encode the failing instruction and
- * report it to userspace. */
- run->hw.hardware_exit_reason = ~0ULL << 32;
- run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
- r = RESUME_HOST;
- break;
- default:
- BUG();
- }
+ r = emulation_exit(run, vcpu);
break;
case BOOKE_INTERRUPT_FP_UNAVAIL:
@@ -506,6 +799,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
+#ifdef CONFIG_KVM_BOOKE_HV
+ case BOOKE_INTERRUPT_HV_SYSCALL:
+ if (!(vcpu->arch.shared->msr & MSR_PR)) {
+ kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+ } else {
+ /*
+ * hcall from guest userspace -- send privileged
+ * instruction program check.
+ */
+ kvmppc_core_queue_program(vcpu, ESR_PPR);
+ }
+
+ r = RESUME_GUEST;
+ break;
+#else
case BOOKE_INTERRUPT_SYSCALL:
if (!(vcpu->arch.shared->msr & MSR_PR) &&
(((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
@@ -519,6 +827,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_account_exit(vcpu, SYSCALL_EXITS);
r = RESUME_GUEST;
break;
+#endif
case BOOKE_INTERRUPT_DTLB_MISS: {
unsigned long eaddr = vcpu->arch.fault_dear;
@@ -526,7 +835,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
gpa_t gpaddr;
gfn_t gfn;
-#ifdef CONFIG_KVM_E500
+#ifdef CONFIG_KVM_E500V2
if (!(vcpu->arch.shared->msr & MSR_PR) &&
(eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) {
kvmppc_map_magic(vcpu);
@@ -567,6 +876,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Guest has mapped and accessed a page which is not
* actually RAM. */
vcpu->arch.paddr_accessed = gpaddr;
+ vcpu->arch.vaddr_accessed = eaddr;
r = kvmppc_emulate_mmio(run, vcpu);
kvmppc_account_exit(vcpu, MMIO_EXITS);
}
@@ -634,15 +944,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
BUG();
}
- local_irq_disable();
-
- kvmppc_core_prepare_to_enter(vcpu);
-
+ /*
+ * To avoid clobbering exit_reason, only check for signals if we
+ * aren't already exiting to userspace for some other reason.
+ */
if (!(r & RESUME_HOST)) {
- /* To avoid clobbering exit_reason, only check for signals if
- * we aren't already exiting to userspace for some other
- * reason. */
- if (signal_pending(current)) {
+ local_irq_disable();
+ if (kvmppc_prepare_to_enter(vcpu)) {
run->exit_reason = KVM_EXIT_INTR;
r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
kvmppc_account_exit(vcpu, SIGNAL_EXITS);
@@ -659,12 +967,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
int r;
vcpu->arch.pc = 0;
- vcpu->arch.shared->msr = 0;
- vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
vcpu->arch.shared->pir = vcpu->vcpu_id;
kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
+ kvmppc_set_msr(vcpu, 0);
+#ifndef CONFIG_KVM_BOOKE_HV
+ vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
vcpu->arch.shadow_pid = 1;
+ vcpu->arch.shared->msr = 0;
+#endif
/* Eye-catching numbers so we know if the guest takes an interrupt
* before it's programmed its own IVPR/IVORs. */
@@ -745,8 +1056,8 @@ static void get_sregs_base(struct kvm_vcpu *vcpu,
sregs->u.e.csrr0 = vcpu->arch.csrr0;
sregs->u.e.csrr1 = vcpu->arch.csrr1;
sregs->u.e.mcsr = vcpu->arch.mcsr;
- sregs->u.e.esr = vcpu->arch.shared->esr;
- sregs->u.e.dear = vcpu->arch.shared->dar;
+ sregs->u.e.esr = get_guest_esr(vcpu);
+ sregs->u.e.dear = get_guest_dear(vcpu);
sregs->u.e.tsr = vcpu->arch.tsr;
sregs->u.e.tcr = vcpu->arch.tcr;
sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
@@ -763,8 +1074,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
vcpu->arch.csrr0 = sregs->u.e.csrr0;
vcpu->arch.csrr1 = sregs->u.e.csrr1;
vcpu->arch.mcsr = sregs->u.e.mcsr;
- vcpu->arch.shared->esr = sregs->u.e.esr;
- vcpu->arch.shared->dar = sregs->u.e.dear;
+ set_guest_esr(vcpu, sregs->u.e.esr);
+ set_guest_dear(vcpu, sregs->u.e.dear);
vcpu->arch.vrsave = sregs->u.e.vrsave;
kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
@@ -932,15 +1243,6 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
{
}
-int kvmppc_core_init_vm(struct kvm *kvm)
-{
- return 0;
-}
-
-void kvmppc_core_destroy_vm(struct kvm *kvm)
-{
-}
-
void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
{
vcpu->arch.tcr = new_tcr;
@@ -968,8 +1270,19 @@ void kvmppc_decrementer_func(unsigned long data)
kvmppc_set_tsr_bits(vcpu, TSR_DIS);
}
+void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ current->thread.kvm_vcpu = vcpu;
+}
+
+void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ current->thread.kvm_vcpu = NULL;
+}
+
int __init kvmppc_booke_init(void)
{
+#ifndef CONFIG_KVM_BOOKE_HV
unsigned long ivor[16];
unsigned long max_ivor = 0;
int i;
@@ -1012,7 +1325,7 @@ int __init kvmppc_booke_init(void)
}
flush_icache_range(kvmppc_booke_handlers,
kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
-
+#endif /* !BOOKE_HV */
return 0;
}
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 2fe202705a3f..ba61974c1e20 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -23,6 +23,7 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
#include <asm/kvm_ppc.h>
+#include <asm/switch_to.h>
#include "timing.h"
/* interrupt priortity ordering */
@@ -48,7 +49,20 @@
#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19
/* Internal pseudo-irqprio for level triggered externals */
#define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20
-#define BOOKE_IRQPRIO_MAX 20
+#define BOOKE_IRQPRIO_DBELL 21
+#define BOOKE_IRQPRIO_DBELL_CRIT 22
+#define BOOKE_IRQPRIO_MAX 23
+
+#define BOOKE_IRQMASK_EE ((1 << BOOKE_IRQPRIO_EXTERNAL_LEVEL) | \
+ (1 << BOOKE_IRQPRIO_PERFORMANCE_MONITOR) | \
+ (1 << BOOKE_IRQPRIO_DBELL) | \
+ (1 << BOOKE_IRQPRIO_DECREMENTER) | \
+ (1 << BOOKE_IRQPRIO_FIT) | \
+ (1 << BOOKE_IRQPRIO_EXTERNAL))
+
+#define BOOKE_IRQMASK_CE ((1 << BOOKE_IRQPRIO_DBELL_CRIT) | \
+ (1 << BOOKE_IRQPRIO_WATCHDOG) | \
+ (1 << BOOKE_IRQPRIO_CRITICAL))
extern unsigned long kvmppc_booke_handlers;
@@ -61,8 +75,8 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance);
-int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
-int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
/* low-level asm code to transfer guest state */
void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu);
@@ -71,4 +85,46 @@ void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu);
/* high-level function, manages flags, host state */
void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu);
+void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu);
+
+enum int_class {
+ INT_CLASS_NONCRIT,
+ INT_CLASS_CRIT,
+ INT_CLASS_MC,
+ INT_CLASS_DBG,
+};
+
+void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
+
+/*
+ * Load up guest vcpu FP state if it's needed.
+ * It also set the MSR_FP in thread so that host know
+ * we're holding FPU, and then host can help to save
+ * guest vcpu FP state if other threads require to use FPU.
+ * This simulates an FP unavailable fault.
+ *
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+ if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) {
+ load_up_fpu();
+ current->thread.regs->msr |= MSR_FP;
+ }
+#endif
+}
+
+/*
+ * Save guest vcpu FP state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+ if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP))
+ giveup_fpu(current);
+#endif
+}
#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 3e652da36534..6c76397f2af4 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -40,8 +40,8 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
- int rs;
- int rt;
+ int rs = get_rs(inst);
+ int rt = get_rt(inst);
switch (get_op(inst)) {
case 19:
@@ -62,19 +62,16 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
switch (get_xop(inst)) {
case OP_31_XOP_MFMSR:
- rt = get_rt(inst);
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr);
kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
break;
case OP_31_XOP_MTMSR:
- rs = get_rs(inst);
kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
break;
case OP_31_XOP_WRTEE:
- rs = get_rs(inst);
vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE)
| (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
@@ -99,22 +96,32 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return emulated;
}
-int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+/*
+ * NOTE: some of these registers are not emulated on BOOKE_HV (GS-mode).
+ * Their backing store is in real registers, and these functions
+ * will return the wrong result if called for them in another context
+ * (such as debugging).
+ */
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
int emulated = EMULATE_DONE;
- ulong spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
case SPRN_DEAR:
- vcpu->arch.shared->dar = spr_val; break;
+ vcpu->arch.shared->dar = spr_val;
+ break;
case SPRN_ESR:
- vcpu->arch.shared->esr = spr_val; break;
+ vcpu->arch.shared->esr = spr_val;
+ break;
case SPRN_DBCR0:
- vcpu->arch.dbcr0 = spr_val; break;
+ vcpu->arch.dbcr0 = spr_val;
+ break;
case SPRN_DBCR1:
- vcpu->arch.dbcr1 = spr_val; break;
+ vcpu->arch.dbcr1 = spr_val;
+ break;
case SPRN_DBSR:
- vcpu->arch.dbsr &= ~spr_val; break;
+ vcpu->arch.dbsr &= ~spr_val;
+ break;
case SPRN_TSR:
kvmppc_clr_tsr_bits(vcpu, spr_val);
break;
@@ -122,20 +129,29 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
kvmppc_set_tcr(vcpu, spr_val);
break;
- /* Note: SPRG4-7 are user-readable. These values are
- * loaded into the real SPRGs when resuming the
- * guest. */
+ /*
+ * Note: SPRG4-7 are user-readable.
+ * These values are loaded into the real SPRGs when resuming the
+ * guest (PR-mode only).
+ */
case SPRN_SPRG4:
- vcpu->arch.shared->sprg4 = spr_val; break;
+ vcpu->arch.shared->sprg4 = spr_val;
+ break;
case SPRN_SPRG5:
- vcpu->arch.shared->sprg5 = spr_val; break;
+ vcpu->arch.shared->sprg5 = spr_val;
+ break;
case SPRN_SPRG6:
- vcpu->arch.shared->sprg6 = spr_val; break;
+ vcpu->arch.shared->sprg6 = spr_val;
+ break;
case SPRN_SPRG7:
- vcpu->arch.shared->sprg7 = spr_val; break;
+ vcpu->arch.shared->sprg7 = spr_val;
+ break;
case SPRN_IVPR:
vcpu->arch.ivpr = spr_val;
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GIVPR, spr_val);
+#endif
break;
case SPRN_IVOR0:
vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val;
@@ -145,6 +161,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
break;
case SPRN_IVOR2:
vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val;
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GIVOR2, spr_val);
+#endif
break;
case SPRN_IVOR3:
vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val;
@@ -163,6 +182,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
break;
case SPRN_IVOR8:
vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val;
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_GIVOR8, spr_val);
+#endif
break;
case SPRN_IVOR9:
vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val;
@@ -193,75 +215,83 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
return emulated;
}
-int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
int emulated = EMULATE_DONE;
switch (sprn) {
case SPRN_IVPR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break;
+ *spr_val = vcpu->arch.ivpr;
+ break;
case SPRN_DEAR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break;
+ *spr_val = vcpu->arch.shared->dar;
+ break;
case SPRN_ESR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->esr); break;
+ *spr_val = vcpu->arch.shared->esr;
+ break;
case SPRN_DBCR0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break;
+ *spr_val = vcpu->arch.dbcr0;
+ break;
case SPRN_DBCR1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break;
+ *spr_val = vcpu->arch.dbcr1;
+ break;
case SPRN_DBSR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break;
+ *spr_val = vcpu->arch.dbsr;
+ break;
case SPRN_TSR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.tsr); break;
+ *spr_val = vcpu->arch.tsr;
+ break;
case SPRN_TCR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.tcr); break;
+ *spr_val = vcpu->arch.tcr;
+ break;
case SPRN_IVOR0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
break;
case SPRN_IVOR1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
break;
case SPRN_IVOR2:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
break;
case SPRN_IVOR3:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
break;
case SPRN_IVOR4:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
break;
case SPRN_IVOR5:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
break;
case SPRN_IVOR6:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
break;
case SPRN_IVOR7:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
break;
case SPRN_IVOR8:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
break;
case SPRN_IVOR9:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
break;
case SPRN_IVOR10:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
break;
case SPRN_IVOR11:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
break;
case SPRN_IVOR12:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
break;
case SPRN_IVOR13:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
break;
case SPRN_IVOR14:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
break;
case SPRN_IVOR15:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
break;
default:
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index c8c4b878795a..8feec2ff3928 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -419,13 +419,13 @@ lightweight_exit:
* written directly to the shared area, so we
* need to reload them here with the guest's values.
*/
- lwz r3, VCPU_SHARED_SPRG4(r5)
+ PPC_LD(r3, VCPU_SHARED_SPRG4, r5)
mtspr SPRN_SPRG4W, r3
- lwz r3, VCPU_SHARED_SPRG5(r5)
+ PPC_LD(r3, VCPU_SHARED_SPRG5, r5)
mtspr SPRN_SPRG5W, r3
- lwz r3, VCPU_SHARED_SPRG6(r5)
+ PPC_LD(r3, VCPU_SHARED_SPRG6, r5)
mtspr SPRN_SPRG6W, r3
- lwz r3, VCPU_SHARED_SPRG7(r5)
+ PPC_LD(r3, VCPU_SHARED_SPRG7, r5)
mtspr SPRN_SPRG7W, r3
#ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
new file mode 100644
index 000000000000..6048a00515d7
--- /dev/null
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -0,0 +1,597 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright (C) 2010-2011 Freescale Semiconductor, Inc.
+ *
+ * Author: Varun Sethi <varun.sethi@freescale.com>
+ * Author: Scott Wood <scotwood@freescale.com>
+ *
+ * This file is derived from arch/powerpc/kvm/booke_interrupts.S
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/mmu-44x.h>
+#include <asm/page.h>
+#include <asm/asm-compat.h>
+#include <asm/asm-offsets.h>
+#include <asm/bitsperlong.h>
+#include <asm/thread_info.h>
+
+#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
+
+#define GET_VCPU(vcpu, thread) \
+ PPC_LL vcpu, THREAD_KVM_VCPU(thread)
+
+#define LONGBYTES (BITS_PER_LONG / 8)
+
+#define VCPU_GPR(n) (VCPU_GPRS + (n * LONGBYTES))
+#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES))
+
+/* The host stack layout: */
+#define HOST_R1 (0 * LONGBYTES) /* Implied by stwu. */
+#define HOST_CALLEE_LR (1 * LONGBYTES)
+#define HOST_RUN (2 * LONGBYTES) /* struct kvm_run */
+/*
+ * r2 is special: it holds 'current', and it made nonvolatile in the
+ * kernel with the -ffixed-r2 gcc option.
+ */
+#define HOST_R2 (3 * LONGBYTES)
+#define HOST_CR (4 * LONGBYTES)
+#define HOST_NV_GPRS (5 * LONGBYTES)
+#define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES))
+#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + LONGBYTES)
+#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */
+#define HOST_STACK_LR (HOST_STACK_SIZE + LONGBYTES) /* In caller stack frame. */
+
+#define NEED_EMU 0x00000001 /* emulation -- save nv regs */
+#define NEED_DEAR 0x00000002 /* save faulting DEAR */
+#define NEED_ESR 0x00000004 /* save faulting ESR */
+
+/*
+ * On entry:
+ * r4 = vcpu, r5 = srr0, r6 = srr1
+ * saved in vcpu: cr, ctr, r3-r13
+ */
+.macro kvm_handler_common intno, srr0, flags
+ /* Restore host stack pointer */
+ PPC_STL r1, VCPU_GPR(r1)(r4)
+ PPC_STL r2, VCPU_GPR(r2)(r4)
+ PPC_LL r1, VCPU_HOST_STACK(r4)
+ PPC_LL r2, HOST_R2(r1)
+
+ mfspr r10, SPRN_PID
+ lwz r8, VCPU_HOST_PID(r4)
+ PPC_LL r11, VCPU_SHARED(r4)
+ PPC_STL r14, VCPU_GPR(r14)(r4) /* We need a non-volatile GPR. */
+ li r14, \intno
+
+ stw r10, VCPU_GUEST_PID(r4)
+ mtspr SPRN_PID, r8
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+ /* save exit time */
+1: mfspr r7, SPRN_TBRU
+ mfspr r8, SPRN_TBRL
+ mfspr r9, SPRN_TBRU
+ cmpw r9, r7
+ stw r8, VCPU_TIMING_EXIT_TBL(r4)
+ bne- 1b
+ stw r9, VCPU_TIMING_EXIT_TBU(r4)
+#endif
+
+ oris r8, r6, MSR_CE@h
+ PPC_STD(r6, VCPU_SHARED_MSR, r11)
+ ori r8, r8, MSR_ME | MSR_RI
+ PPC_STL r5, VCPU_PC(r4)
+
+ /*
+ * Make sure CE/ME/RI are set (if appropriate for exception type)
+ * whether or not the guest had it set. Since mfmsr/mtmsr are
+ * somewhat expensive, skip in the common case where the guest
+ * had all these bits set (and thus they're still set if
+ * appropriate for the exception type).
+ */
+ cmpw r6, r8
+ beq 1f
+ mfmsr r7
+ .if \srr0 != SPRN_MCSRR0 && \srr0 != SPRN_CSRR0
+ oris r7, r7, MSR_CE@h
+ .endif
+ .if \srr0 != SPRN_MCSRR0
+ ori r7, r7, MSR_ME | MSR_RI
+ .endif
+ mtmsr r7
+1:
+
+ .if \flags & NEED_EMU
+ /*
+ * This assumes you have external PID support.
+ * To support a bookehv CPU without external PID, you'll
+ * need to look up the TLB entry and create a temporary mapping.
+ *
+ * FIXME: we don't currently handle if the lwepx faults. PR-mode
+ * booke doesn't handle it either. Since Linux doesn't use
+ * broadcast tlbivax anymore, the only way this should happen is
+ * if the guest maps its memory execute-but-not-read, or if we
+ * somehow take a TLB miss in the middle of this entry code and
+ * evict the relevant entry. On e500mc, all kernel lowmem is
+ * bolted into TLB1 large page mappings, and we don't use
+ * broadcast invalidates, so we should not take a TLB miss here.
+ *
+ * Later we'll need to deal with faults here. Disallowing guest
+ * mappings that are execute-but-not-read could be an option on
+ * e500mc, but not on chips with an LRAT if it is used.
+ */
+
+ mfspr r3, SPRN_EPLC /* will already have correct ELPID and EGS */
+ PPC_STL r15, VCPU_GPR(r15)(r4)
+ PPC_STL r16, VCPU_GPR(r16)(r4)
+ PPC_STL r17, VCPU_GPR(r17)(r4)
+ PPC_STL r18, VCPU_GPR(r18)(r4)
+ PPC_STL r19, VCPU_GPR(r19)(r4)
+ mr r8, r3
+ PPC_STL r20, VCPU_GPR(r20)(r4)
+ rlwimi r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS
+ PPC_STL r21, VCPU_GPR(r21)(r4)
+ rlwimi r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR
+ PPC_STL r22, VCPU_GPR(r22)(r4)
+ rlwimi r8, r10, EPC_EPID_SHIFT, EPC_EPID
+ PPC_STL r23, VCPU_GPR(r23)(r4)
+ PPC_STL r24, VCPU_GPR(r24)(r4)
+ PPC_STL r25, VCPU_GPR(r25)(r4)
+ PPC_STL r26, VCPU_GPR(r26)(r4)
+ PPC_STL r27, VCPU_GPR(r27)(r4)
+ PPC_STL r28, VCPU_GPR(r28)(r4)
+ PPC_STL r29, VCPU_GPR(r29)(r4)
+ PPC_STL r30, VCPU_GPR(r30)(r4)
+ PPC_STL r31, VCPU_GPR(r31)(r4)
+ mtspr SPRN_EPLC, r8
+
+ /* disable preemption, so we are sure we hit the fixup handler */
+#ifdef CONFIG_PPC64
+ clrrdi r8,r1,THREAD_SHIFT
+#else
+ rlwinm r8,r1,0,0,31-THREAD_SHIFT /* current thread_info */
+#endif
+ li r7, 1
+ stw r7, TI_PREEMPT(r8)
+
+ isync
+
+ /*
+ * In case the read goes wrong, we catch it and write an invalid value
+ * in LAST_INST instead.
+ */
+1: lwepx r9, 0, r5
+2:
+.section .fixup, "ax"
+3: li r9, KVM_INST_FETCH_FAILED
+ b 2b
+.previous
+.section __ex_table,"a"
+ PPC_LONG_ALIGN
+ PPC_LONG 1b,3b
+.previous
+
+ mtspr SPRN_EPLC, r3
+ li r7, 0
+ stw r7, TI_PREEMPT(r8)
+ stw r9, VCPU_LAST_INST(r4)
+ .endif
+
+ .if \flags & NEED_ESR
+ mfspr r8, SPRN_ESR
+ PPC_STL r8, VCPU_FAULT_ESR(r4)
+ .endif
+
+ .if \flags & NEED_DEAR
+ mfspr r9, SPRN_DEAR
+ PPC_STL r9, VCPU_FAULT_DEAR(r4)
+ .endif
+
+ b kvmppc_resume_host
+.endm
+
+/*
+ * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
+ */
+.macro kvm_handler intno srr0, srr1, flags
+_GLOBAL(kvmppc_handler_\intno\()_\srr1)
+ GET_VCPU(r11, r10)
+ PPC_STL r3, VCPU_GPR(r3)(r11)
+ mfspr r3, SPRN_SPRG_RSCRATCH0
+ PPC_STL r4, VCPU_GPR(r4)(r11)
+ PPC_LL r4, THREAD_NORMSAVE(0)(r10)
+ PPC_STL r5, VCPU_GPR(r5)(r11)
+ stw r13, VCPU_CR(r11)
+ mfspr r5, \srr0
+ PPC_STL r3, VCPU_GPR(r10)(r11)
+ PPC_LL r3, THREAD_NORMSAVE(2)(r10)
+ PPC_STL r6, VCPU_GPR(r6)(r11)
+ PPC_STL r4, VCPU_GPR(r11)(r11)
+ mfspr r6, \srr1
+ PPC_STL r7, VCPU_GPR(r7)(r11)
+ PPC_STL r8, VCPU_GPR(r8)(r11)
+ PPC_STL r9, VCPU_GPR(r9)(r11)
+ PPC_STL r3, VCPU_GPR(r13)(r11)
+ mfctr r7
+ PPC_STL r12, VCPU_GPR(r12)(r11)
+ PPC_STL r7, VCPU_CTR(r11)
+ mr r4, r11
+ kvm_handler_common \intno, \srr0, \flags
+.endm
+
+.macro kvm_lvl_handler intno scratch srr0, srr1, flags
+_GLOBAL(kvmppc_handler_\intno\()_\srr1)
+ mfspr r10, SPRN_SPRG_THREAD
+ GET_VCPU(r11, r10)
+ PPC_STL r3, VCPU_GPR(r3)(r11)
+ mfspr r3, \scratch
+ PPC_STL r4, VCPU_GPR(r4)(r11)
+ PPC_LL r4, GPR9(r8)
+ PPC_STL r5, VCPU_GPR(r5)(r11)
+ stw r9, VCPU_CR(r11)
+ mfspr r5, \srr0
+ PPC_STL r3, VCPU_GPR(r8)(r11)
+ PPC_LL r3, GPR10(r8)
+ PPC_STL r6, VCPU_GPR(r6)(r11)
+ PPC_STL r4, VCPU_GPR(r9)(r11)
+ mfspr r6, \srr1
+ PPC_LL r4, GPR11(r8)
+ PPC_STL r7, VCPU_GPR(r7)(r11)
+ PPC_STL r3, VCPU_GPR(r10)(r11)
+ mfctr r7
+ PPC_STL r12, VCPU_GPR(r12)(r11)
+ PPC_STL r13, VCPU_GPR(r13)(r11)
+ PPC_STL r4, VCPU_GPR(r11)(r11)
+ PPC_STL r7, VCPU_CTR(r11)
+ mr r4, r11
+ kvm_handler_common \intno, \srr0, \flags
+.endm
+
+kvm_lvl_handler BOOKE_INTERRUPT_CRITICAL, \
+ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_MACHINE_CHECK, \
+ SPRN_SPRG_RSCRATCH_MC, SPRN_MCSRR0, SPRN_MCSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, \
+ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR)
+kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \
+ SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DECREMENTER, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_FIT, SPRN_SRR0, SPRN_SRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \
+ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \
+ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \
+ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_HV_PRIV, SPRN_SRR0, SPRN_SRR1, NEED_EMU
+kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, SPRN_GSRR0, SPRN_GSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, \
+ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
+ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
+ SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0
+
+
+/* Registers:
+ * SPRG_SCRATCH0: guest r10
+ * r4: vcpu pointer
+ * r11: vcpu->arch.shared
+ * r14: KVM exit number
+ */
+_GLOBAL(kvmppc_resume_host)
+ /* Save remaining volatile guest register state to vcpu. */
+ mfspr r3, SPRN_VRSAVE
+ PPC_STL r0, VCPU_GPR(r0)(r4)
+ mflr r5
+ mfspr r6, SPRN_SPRG4
+ PPC_STL r5, VCPU_LR(r4)
+ mfspr r7, SPRN_SPRG5
+ stw r3, VCPU_VRSAVE(r4)
+ PPC_STD(r6, VCPU_SHARED_SPRG4, r11)
+ mfspr r8, SPRN_SPRG6
+ PPC_STD(r7, VCPU_SHARED_SPRG5, r11)
+ mfspr r9, SPRN_SPRG7
+ PPC_STD(r8, VCPU_SHARED_SPRG6, r11)
+ mfxer r3
+ PPC_STD(r9, VCPU_SHARED_SPRG7, r11)
+
+ /* save guest MAS registers and restore host mas4 & mas6 */
+ mfspr r5, SPRN_MAS0
+ PPC_STL r3, VCPU_XER(r4)
+ mfspr r6, SPRN_MAS1
+ stw r5, VCPU_SHARED_MAS0(r11)
+ mfspr r7, SPRN_MAS2
+ stw r6, VCPU_SHARED_MAS1(r11)
+ PPC_STD(r7, VCPU_SHARED_MAS2, r11)
+ mfspr r5, SPRN_MAS3
+ mfspr r6, SPRN_MAS4
+ stw r5, VCPU_SHARED_MAS7_3+4(r11)
+ mfspr r7, SPRN_MAS6
+ stw r6, VCPU_SHARED_MAS4(r11)
+ mfspr r5, SPRN_MAS7
+ lwz r6, VCPU_HOST_MAS4(r4)
+ stw r7, VCPU_SHARED_MAS6(r11)
+ lwz r8, VCPU_HOST_MAS6(r4)
+ mtspr SPRN_MAS4, r6
+ stw r5, VCPU_SHARED_MAS7_3+0(r11)
+ mtspr SPRN_MAS6, r8
+ /* Enable MAS register updates via exception */
+ mfspr r3, SPRN_EPCR
+ rlwinm r3, r3, 0, ~SPRN_EPCR_DMIUH
+ mtspr SPRN_EPCR, r3
+ isync
+
+ /* Switch to kernel stack and jump to handler. */
+ PPC_LL r3, HOST_RUN(r1)
+ mr r5, r14 /* intno */
+ mr r14, r4 /* Save vcpu pointer. */
+ bl kvmppc_handle_exit
+
+ /* Restore vcpu pointer and the nonvolatiles we used. */
+ mr r4, r14
+ PPC_LL r14, VCPU_GPR(r14)(r4)
+
+ andi. r5, r3, RESUME_FLAG_NV
+ beq skip_nv_load
+ PPC_LL r15, VCPU_GPR(r15)(r4)
+ PPC_LL r16, VCPU_GPR(r16)(r4)
+ PPC_LL r17, VCPU_GPR(r17)(r4)
+ PPC_LL r18, VCPU_GPR(r18)(r4)
+ PPC_LL r19, VCPU_GPR(r19)(r4)
+ PPC_LL r20, VCPU_GPR(r20)(r4)
+ PPC_LL r21, VCPU_GPR(r21)(r4)
+ PPC_LL r22, VCPU_GPR(r22)(r4)
+ PPC_LL r23, VCPU_GPR(r23)(r4)
+ PPC_LL r24, VCPU_GPR(r24)(r4)
+ PPC_LL r25, VCPU_GPR(r25)(r4)
+ PPC_LL r26, VCPU_GPR(r26)(r4)
+ PPC_LL r27, VCPU_GPR(r27)(r4)
+ PPC_LL r28, VCPU_GPR(r28)(r4)
+ PPC_LL r29, VCPU_GPR(r29)(r4)
+ PPC_LL r30, VCPU_GPR(r30)(r4)
+ PPC_LL r31, VCPU_GPR(r31)(r4)
+skip_nv_load:
+ /* Should we return to the guest? */
+ andi. r5, r3, RESUME_FLAG_HOST
+ beq lightweight_exit
+
+ srawi r3, r3, 2 /* Shift -ERR back down. */
+
+heavyweight_exit:
+ /* Not returning to guest. */
+ PPC_LL r5, HOST_STACK_LR(r1)
+ lwz r6, HOST_CR(r1)
+
+ /*
+ * We already saved guest volatile register state; now save the
+ * non-volatiles.
+ */
+
+ PPC_STL r15, VCPU_GPR(r15)(r4)
+ PPC_STL r16, VCPU_GPR(r16)(r4)
+ PPC_STL r17, VCPU_GPR(r17)(r4)
+ PPC_STL r18, VCPU_GPR(r18)(r4)
+ PPC_STL r19, VCPU_GPR(r19)(r4)
+ PPC_STL r20, VCPU_GPR(r20)(r4)
+ PPC_STL r21, VCPU_GPR(r21)(r4)
+ PPC_STL r22, VCPU_GPR(r22)(r4)
+ PPC_STL r23, VCPU_GPR(r23)(r4)
+ PPC_STL r24, VCPU_GPR(r24)(r4)
+ PPC_STL r25, VCPU_GPR(r25)(r4)
+ PPC_STL r26, VCPU_GPR(r26)(r4)
+ PPC_STL r27, VCPU_GPR(r27)(r4)
+ PPC_STL r28, VCPU_GPR(r28)(r4)
+ PPC_STL r29, VCPU_GPR(r29)(r4)
+ PPC_STL r30, VCPU_GPR(r30)(r4)
+ PPC_STL r31, VCPU_GPR(r31)(r4)
+
+ /* Load host non-volatile register state from host stack. */
+ PPC_LL r14, HOST_NV_GPR(r14)(r1)
+ PPC_LL r15, HOST_NV_GPR(r15)(r1)
+ PPC_LL r16, HOST_NV_GPR(r16)(r1)
+ PPC_LL r17, HOST_NV_GPR(r17)(r1)
+ PPC_LL r18, HOST_NV_GPR(r18)(r1)
+ PPC_LL r19, HOST_NV_GPR(r19)(r1)
+ PPC_LL r20, HOST_NV_GPR(r20)(r1)
+ PPC_LL r21, HOST_NV_GPR(r21)(r1)
+ PPC_LL r22, HOST_NV_GPR(r22)(r1)
+ PPC_LL r23, HOST_NV_GPR(r23)(r1)
+ PPC_LL r24, HOST_NV_GPR(r24)(r1)
+ PPC_LL r25, HOST_NV_GPR(r25)(r1)
+ PPC_LL r26, HOST_NV_GPR(r26)(r1)
+ PPC_LL r27, HOST_NV_GPR(r27)(r1)
+ PPC_LL r28, HOST_NV_GPR(r28)(r1)
+ PPC_LL r29, HOST_NV_GPR(r29)(r1)
+ PPC_LL r30, HOST_NV_GPR(r30)(r1)
+ PPC_LL r31, HOST_NV_GPR(r31)(r1)
+
+ /* Return to kvm_vcpu_run(). */
+ mtlr r5
+ mtcr r6
+ addi r1, r1, HOST_STACK_SIZE
+ /* r3 still contains the return code from kvmppc_handle_exit(). */
+ blr
+
+/* Registers:
+ * r3: kvm_run pointer
+ * r4: vcpu pointer
+ */
+_GLOBAL(__kvmppc_vcpu_run)
+ stwu r1, -HOST_STACK_SIZE(r1)
+ PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
+
+ /* Save host state to stack. */
+ PPC_STL r3, HOST_RUN(r1)
+ mflr r3
+ mfcr r5
+ PPC_STL r3, HOST_STACK_LR(r1)
+
+ stw r5, HOST_CR(r1)
+
+ /* Save host non-volatile register state to stack. */
+ PPC_STL r14, HOST_NV_GPR(r14)(r1)
+ PPC_STL r15, HOST_NV_GPR(r15)(r1)
+ PPC_STL r16, HOST_NV_GPR(r16)(r1)
+ PPC_STL r17, HOST_NV_GPR(r17)(r1)
+ PPC_STL r18, HOST_NV_GPR(r18)(r1)
+ PPC_STL r19, HOST_NV_GPR(r19)(r1)
+ PPC_STL r20, HOST_NV_GPR(r20)(r1)
+ PPC_STL r21, HOST_NV_GPR(r21)(r1)
+ PPC_STL r22, HOST_NV_GPR(r22)(r1)
+ PPC_STL r23, HOST_NV_GPR(r23)(r1)
+ PPC_STL r24, HOST_NV_GPR(r24)(r1)
+ PPC_STL r25, HOST_NV_GPR(r25)(r1)
+ PPC_STL r26, HOST_NV_GPR(r26)(r1)
+ PPC_STL r27, HOST_NV_GPR(r27)(r1)
+ PPC_STL r28, HOST_NV_GPR(r28)(r1)
+ PPC_STL r29, HOST_NV_GPR(r29)(r1)
+ PPC_STL r30, HOST_NV_GPR(r30)(r1)
+ PPC_STL r31, HOST_NV_GPR(r31)(r1)
+
+ /* Load guest non-volatiles. */
+ PPC_LL r14, VCPU_GPR(r14)(r4)
+ PPC_LL r15, VCPU_GPR(r15)(r4)
+ PPC_LL r16, VCPU_GPR(r16)(r4)
+ PPC_LL r17, VCPU_GPR(r17)(r4)
+ PPC_LL r18, VCPU_GPR(r18)(r4)
+ PPC_LL r19, VCPU_GPR(r19)(r4)
+ PPC_LL r20, VCPU_GPR(r20)(r4)
+ PPC_LL r21, VCPU_GPR(r21)(r4)
+ PPC_LL r22, VCPU_GPR(r22)(r4)
+ PPC_LL r23, VCPU_GPR(r23)(r4)
+ PPC_LL r24, VCPU_GPR(r24)(r4)
+ PPC_LL r25, VCPU_GPR(r25)(r4)
+ PPC_LL r26, VCPU_GPR(r26)(r4)
+ PPC_LL r27, VCPU_GPR(r27)(r4)
+ PPC_LL r28, VCPU_GPR(r28)(r4)
+ PPC_LL r29, VCPU_GPR(r29)(r4)
+ PPC_LL r30, VCPU_GPR(r30)(r4)
+ PPC_LL r31, VCPU_GPR(r31)(r4)
+
+
+lightweight_exit:
+ PPC_STL r2, HOST_R2(r1)
+
+ mfspr r3, SPRN_PID
+ stw r3, VCPU_HOST_PID(r4)
+ lwz r3, VCPU_GUEST_PID(r4)
+ mtspr SPRN_PID, r3
+
+ PPC_LL r11, VCPU_SHARED(r4)
+ /* Disable MAS register updates via exception */
+ mfspr r3, SPRN_EPCR
+ oris r3, r3, SPRN_EPCR_DMIUH@h
+ mtspr SPRN_EPCR, r3
+ isync
+ /* Save host mas4 and mas6 and load guest MAS registers */
+ mfspr r3, SPRN_MAS4
+ stw r3, VCPU_HOST_MAS4(r4)
+ mfspr r3, SPRN_MAS6
+ stw r3, VCPU_HOST_MAS6(r4)
+ lwz r3, VCPU_SHARED_MAS0(r11)
+ lwz r5, VCPU_SHARED_MAS1(r11)
+ PPC_LD(r6, VCPU_SHARED_MAS2, r11)
+ lwz r7, VCPU_SHARED_MAS7_3+4(r11)
+ lwz r8, VCPU_SHARED_MAS4(r11)
+ mtspr SPRN_MAS0, r3
+ mtspr SPRN_MAS1, r5
+ mtspr SPRN_MAS2, r6
+ mtspr SPRN_MAS3, r7
+ mtspr SPRN_MAS4, r8
+ lwz r3, VCPU_SHARED_MAS6(r11)
+ lwz r5, VCPU_SHARED_MAS7_3+0(r11)
+ mtspr SPRN_MAS6, r3
+ mtspr SPRN_MAS7, r5
+
+ /*
+ * Host interrupt handlers may have clobbered these guest-readable
+ * SPRGs, so we need to reload them here with the guest's values.
+ */
+ lwz r3, VCPU_VRSAVE(r4)
+ PPC_LD(r5, VCPU_SHARED_SPRG4, r11)
+ mtspr SPRN_VRSAVE, r3
+ PPC_LD(r6, VCPU_SHARED_SPRG5, r11)
+ mtspr SPRN_SPRG4W, r5
+ PPC_LD(r7, VCPU_SHARED_SPRG6, r11)
+ mtspr SPRN_SPRG5W, r6
+ PPC_LD(r8, VCPU_SHARED_SPRG7, r11)
+ mtspr SPRN_SPRG6W, r7
+ mtspr SPRN_SPRG7W, r8
+
+ /* Load some guest volatiles. */
+ PPC_LL r3, VCPU_LR(r4)
+ PPC_LL r5, VCPU_XER(r4)
+ PPC_LL r6, VCPU_CTR(r4)
+ lwz r7, VCPU_CR(r4)
+ PPC_LL r8, VCPU_PC(r4)
+ PPC_LD(r9, VCPU_SHARED_MSR, r11)
+ PPC_LL r0, VCPU_GPR(r0)(r4)
+ PPC_LL r1, VCPU_GPR(r1)(r4)
+ PPC_LL r2, VCPU_GPR(r2)(r4)
+ PPC_LL r10, VCPU_GPR(r10)(r4)
+ PPC_LL r11, VCPU_GPR(r11)(r4)
+ PPC_LL r12, VCPU_GPR(r12)(r4)
+ PPC_LL r13, VCPU_GPR(r13)(r4)
+ mtlr r3
+ mtxer r5
+ mtctr r6
+ mtsrr0 r8
+ mtsrr1 r9
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+ /* save enter time */
+1:
+ mfspr r6, SPRN_TBRU
+ mfspr r9, SPRN_TBRL
+ mfspr r8, SPRN_TBRU
+ cmpw r8, r6
+ stw r9, VCPU_TIMING_LAST_ENTER_TBL(r4)
+ bne 1b
+ stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
+#endif
+
+ /*
+ * Don't execute any instruction which can change CR after
+ * below instruction.
+ */
+ mtcr r7
+
+ /* Finish loading guest volatiles and jump to guest. */
+ PPC_LL r5, VCPU_GPR(r5)(r4)
+ PPC_LL r6, VCPU_GPR(r6)(r4)
+ PPC_LL r7, VCPU_GPR(r7)(r4)
+ PPC_LL r8, VCPU_GPR(r8)(r4)
+ PPC_LL r9, VCPU_GPR(r9)(r4)
+
+ PPC_LL r3, VCPU_GPR(r3)(r4)
+ PPC_LL r4, VCPU_GPR(r4)(r4)
+ rfi
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index ddcd896fa2ff..b479ed77c515 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -20,11 +20,282 @@
#include <asm/reg.h>
#include <asm/cputable.h>
#include <asm/tlbflush.h>
-#include <asm/kvm_e500.h>
#include <asm/kvm_ppc.h>
+#include "../mm/mmu_decl.h"
#include "booke.h"
-#include "e500_tlb.h"
+#include "e500.h"
+
+struct id {
+ unsigned long val;
+ struct id **pentry;
+};
+
+#define NUM_TIDS 256
+
+/*
+ * This table provide mappings from:
+ * (guestAS,guestTID,guestPR) --> ID of physical cpu
+ * guestAS [0..1]
+ * guestTID [0..255]
+ * guestPR [0..1]
+ * ID [1..255]
+ * Each vcpu keeps one vcpu_id_table.
+ */
+struct vcpu_id_table {
+ struct id id[2][NUM_TIDS][2];
+};
+
+/*
+ * This table provide reversed mappings of vcpu_id_table:
+ * ID --> address of vcpu_id_table item.
+ * Each physical core has one pcpu_id_table.
+ */
+struct pcpu_id_table {
+ struct id *entry[NUM_TIDS];
+};
+
+static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids);
+
+/* This variable keeps last used shadow ID on local core.
+ * The valid range of shadow ID is [1..255] */
+static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
+
+/*
+ * Allocate a free shadow id and setup a valid sid mapping in given entry.
+ * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+static inline int local_sid_setup_one(struct id *entry)
+{
+ unsigned long sid;
+ int ret = -1;
+
+ sid = ++(__get_cpu_var(pcpu_last_used_sid));
+ if (sid < NUM_TIDS) {
+ __get_cpu_var(pcpu_sids).entry[sid] = entry;
+ entry->val = sid;
+ entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid];
+ ret = sid;
+ }
+
+ /*
+ * If sid == NUM_TIDS, we've run out of sids. We return -1, and
+ * the caller will invalidate everything and start over.
+ *
+ * sid > NUM_TIDS indicates a race, which we disable preemption to
+ * avoid.
+ */
+ WARN_ON(sid > NUM_TIDS);
+
+ return ret;
+}
+
+/*
+ * Check if given entry contain a valid shadow id mapping.
+ * An ID mapping is considered valid only if
+ * both vcpu and pcpu know this mapping.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+static inline int local_sid_lookup(struct id *entry)
+{
+ if (entry && entry->val != 0 &&
+ __get_cpu_var(pcpu_sids).entry[entry->val] == entry &&
+ entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val])
+ return entry->val;
+ return -1;
+}
+
+/* Invalidate all id mappings on local core -- call with preempt disabled */
+static inline void local_sid_destroy_all(void)
+{
+ __get_cpu_var(pcpu_last_used_sid) = 0;
+ memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids)));
+}
+
+static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL);
+ return vcpu_e500->idt;
+}
+
+static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ kfree(vcpu_e500->idt);
+ vcpu_e500->idt = NULL;
+}
+
+/* Map guest pid to shadow.
+ * We use PID to keep shadow of current guest non-zero PID,
+ * and use PID1 to keep shadow of guest zero PID.
+ * So that guest tlbe with TID=0 can be accessed at any time */
+static void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ preempt_disable();
+ vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500,
+ get_cur_as(&vcpu_e500->vcpu),
+ get_cur_pid(&vcpu_e500->vcpu),
+ get_cur_pr(&vcpu_e500->vcpu), 1);
+ vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500,
+ get_cur_as(&vcpu_e500->vcpu), 0,
+ get_cur_pr(&vcpu_e500->vcpu), 1);
+ preempt_enable();
+}
+
+/* Invalidate all mappings on vcpu */
+static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table));
+
+ /* Update shadow pid when mappings are changed */
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+}
+
+/* Invalidate one ID mapping on vcpu */
+static inline void kvmppc_e500_id_table_reset_one(
+ struct kvmppc_vcpu_e500 *vcpu_e500,
+ int as, int pid, int pr)
+{
+ struct vcpu_id_table *idt = vcpu_e500->idt;
+
+ BUG_ON(as >= 2);
+ BUG_ON(pid >= NUM_TIDS);
+ BUG_ON(pr >= 2);
+
+ idt->id[as][pid][pr].val = 0;
+ idt->id[as][pid][pr].pentry = NULL;
+
+ /* Update shadow pid when mappings are changed */
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+}
+
+/*
+ * Map guest (vcpu,AS,ID,PR) to physical core shadow id.
+ * This function first lookup if a valid mapping exists,
+ * if not, then creates a new one.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
+ unsigned int as, unsigned int gid,
+ unsigned int pr, int avoid_recursion)
+{
+ struct vcpu_id_table *idt = vcpu_e500->idt;
+ int sid;
+
+ BUG_ON(as >= 2);
+ BUG_ON(gid >= NUM_TIDS);
+ BUG_ON(pr >= 2);
+
+ sid = local_sid_lookup(&idt->id[as][gid][pr]);
+
+ while (sid <= 0) {
+ /* No mapping yet */
+ sid = local_sid_setup_one(&idt->id[as][gid][pr]);
+ if (sid <= 0) {
+ _tlbil_all();
+ local_sid_destroy_all();
+ }
+
+ /* Update shadow pid when mappings are changed */
+ if (!avoid_recursion)
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+ }
+
+ return sid;
+}
+
+unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
+ struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+ return kvmppc_e500_get_sid(to_e500(vcpu), get_tlb_ts(gtlbe),
+ get_tlb_tid(gtlbe), get_cur_pr(vcpu), 0);
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ if (vcpu->arch.pid != pid) {
+ vcpu_e500->pid[0] = vcpu->arch.pid = pid;
+ kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+ }
+}
+
+/* gtlbe must not be mapped by more than one host tlbe */
+void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+ struct vcpu_id_table *idt = vcpu_e500->idt;
+ unsigned int pr, tid, ts, pid;
+ u32 val, eaddr;
+ unsigned long flags;
+
+ ts = get_tlb_ts(gtlbe);
+ tid = get_tlb_tid(gtlbe);
+
+ preempt_disable();
+
+ /* One guest ID may be mapped to two shadow IDs */
+ for (pr = 0; pr < 2; pr++) {
+ /*
+ * The shadow PID can have a valid mapping on at most one
+ * host CPU. In the common case, it will be valid on this
+ * CPU, in which case we do a local invalidation of the
+ * specific address.
+ *
+ * If the shadow PID is not valid on the current host CPU,
+ * we invalidate the entire shadow PID.
+ */
+ pid = local_sid_lookup(&idt->id[ts][tid][pr]);
+ if (pid <= 0) {
+ kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr);
+ continue;
+ }
+
+ /*
+ * The guest is invalidating a 4K entry which is in a PID
+ * that has a valid shadow mapping on this host CPU. We
+ * search host TLB to invalidate it's shadow TLB entry,
+ * similar to __tlbil_va except that we need to look in AS1.
+ */
+ val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS;
+ eaddr = get_tlb_eaddr(gtlbe);
+
+ local_irq_save(flags);
+
+ mtspr(SPRN_MAS6, val);
+ asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr));
+ val = mfspr(SPRN_MAS1);
+ if (val & MAS1_VALID) {
+ mtspr(SPRN_MAS1, val & ~MAS1_VALID);
+ asm volatile("tlbwe");
+ }
+
+ local_irq_restore(flags);
+ }
+
+ preempt_enable();
+}
+
+void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ kvmppc_e500_id_table_reset_all(vcpu_e500);
+}
+
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
+{
+ /* Recalc shadow pid since MSR changes */
+ kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
+}
void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
{
@@ -36,17 +307,20 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- kvmppc_e500_tlb_load(vcpu, cpu);
+ kvmppc_booke_vcpu_load(vcpu, cpu);
+
+ /* Shadow PID may be expired on local core */
+ kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
}
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
- kvmppc_e500_tlb_put(vcpu);
-
#ifdef CONFIG_SPE
if (vcpu->arch.shadow_msr & MSR_SPE)
kvmppc_vcpu_disable_spe(vcpu);
#endif
+
+ kvmppc_booke_vcpu_put(vcpu);
}
int kvmppc_core_check_processor_compat(void)
@@ -61,6 +335,23 @@ int kvmppc_core_check_processor_compat(void)
return r;
}
+static void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ struct kvm_book3e_206_tlb_entry *tlbe;
+
+ /* Insert large initial mapping for guest. */
+ tlbe = get_entry(vcpu_e500, 1, 0);
+ tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
+ tlbe->mas2 = 0;
+ tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
+
+ /* 4K map for serial output. Used by kernel wrapper. */
+ tlbe = get_entry(vcpu_e500, 1, 1);
+ tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
+ tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
+ tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
+}
+
int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -76,32 +367,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
return 0;
}
-/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
-int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
- struct kvm_translation *tr)
-{
- int index;
- gva_t eaddr;
- u8 pid;
- u8 as;
-
- eaddr = tr->linear_address;
- pid = (tr->linear_address >> 32) & 0xff;
- as = (tr->linear_address >> 40) & 0x1;
-
- index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
- if (index < 0) {
- tr->valid = 0;
- return 0;
- }
-
- tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
- /* XXX what does "writeable" and "usermode" even mean? */
- tr->valid = 1;
-
- return 0;
-}
-
void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -115,19 +380,6 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
- sregs->u.e.mas0 = vcpu->arch.shared->mas0;
- sregs->u.e.mas1 = vcpu->arch.shared->mas1;
- sregs->u.e.mas2 = vcpu->arch.shared->mas2;
- sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3;
- sregs->u.e.mas4 = vcpu->arch.shared->mas4;
- sregs->u.e.mas6 = vcpu->arch.shared->mas6;
-
- sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG);
- sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg;
- sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg;
- sregs->u.e.tlbcfg[2] = 0;
- sregs->u.e.tlbcfg[3] = 0;
-
sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
@@ -135,11 +387,13 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
kvmppc_get_sregs_ivor(vcpu, sregs);
+ kvmppc_get_sregs_e500_tlb(vcpu, sregs);
}
int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int ret;
if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
@@ -147,14 +401,9 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
}
- if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
- vcpu->arch.shared->mas0 = sregs->u.e.mas0;
- vcpu->arch.shared->mas1 = sregs->u.e.mas1;
- vcpu->arch.shared->mas2 = sregs->u.e.mas2;
- vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3;
- vcpu->arch.shared->mas4 = sregs->u.e.mas4;
- vcpu->arch.shared->mas6 = sregs->u.e.mas6;
- }
+ ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs);
+ if (ret < 0)
+ return ret;
if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
return 0;
@@ -193,9 +442,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
if (err)
goto free_vcpu;
+ if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
+ goto uninit_vcpu;
+
err = kvmppc_e500_tlb_init(vcpu_e500);
if (err)
- goto uninit_vcpu;
+ goto uninit_id;
vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
if (!vcpu->arch.shared)
@@ -205,6 +457,8 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
uninit_tlb:
kvmppc_e500_tlb_uninit(vcpu_e500);
+uninit_id:
+ kvmppc_e500_id_table_free(vcpu_e500);
uninit_vcpu:
kvm_vcpu_uninit(vcpu);
free_vcpu:
@@ -218,11 +472,21 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
free_page((unsigned long)vcpu->arch.shared);
- kvm_vcpu_uninit(vcpu);
kvmppc_e500_tlb_uninit(vcpu_e500);
+ kvmppc_e500_id_table_free(vcpu_e500);
+ kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
}
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+ return 0;
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+}
+
static int __init kvmppc_e500_init(void)
{
int r, i;
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
new file mode 100644
index 000000000000..aa8b81428bf4
--- /dev/null
+++ b/arch/powerpc/kvm/e500.h
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu <yu.liu@freescale.com>
+ * Scott Wood <scottwood@freescale.com>
+ * Ashish Kalra <ashish.kalra@freescale.com>
+ * Varun Sethi <varun.sethi@freescale.com>
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.h and
+ * arch/powerpc/include/asm/kvm_44x.h by Hollis Blanchard <hollisb@us.ibm.com>,
+ * Copyright IBM Corp. 2007-2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef KVM_E500_H
+#define KVM_E500_H
+
+#include <linux/kvm_host.h>
+#include <asm/mmu-book3e.h>
+#include <asm/tlb.h>
+
+#define E500_PID_NUM 3
+#define E500_TLB_NUM 2
+
+#define E500_TLB_VALID 1
+#define E500_TLB_DIRTY 2
+#define E500_TLB_BITMAP 4
+
+struct tlbe_ref {
+ pfn_t pfn;
+ unsigned int flags; /* E500_TLB_* */
+};
+
+struct tlbe_priv {
+ struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */
+};
+
+#ifdef CONFIG_KVM_E500V2
+struct vcpu_id_table;
+#endif
+
+struct kvmppc_e500_tlb_params {
+ int entries, ways, sets;
+};
+
+struct kvmppc_vcpu_e500 {
+ struct kvm_vcpu vcpu;
+
+ /* Unmodified copy of the guest's TLB -- shared with host userspace. */
+ struct kvm_book3e_206_tlb_entry *gtlb_arch;
+
+ /* Starting entry number in gtlb_arch[] */
+ int gtlb_offset[E500_TLB_NUM];
+
+ /* KVM internal information associated with each guest TLB entry */
+ struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
+
+ struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM];
+
+ unsigned int gtlb_nv[E500_TLB_NUM];
+
+ /*
+ * information associated with each host TLB entry --
+ * TLB1 only for now. If/when guest TLB1 entries can be
+ * mapped with host TLB0, this will be used for that too.
+ *
+ * We don't want to use this for guest TLB0 because then we'd
+ * have the overhead of doing the translation again even if
+ * the entry is still in the guest TLB (e.g. we swapped out
+ * and back, and our host TLB entries got evicted).
+ */
+ struct tlbe_ref *tlb_refs[E500_TLB_NUM];
+ unsigned int host_tlb1_nv;
+
+ u32 svr;
+ u32 l1csr0;
+ u32 l1csr1;
+ u32 hid0;
+ u32 hid1;
+ u64 mcar;
+
+ struct page **shared_tlb_pages;
+ int num_shared_tlb_pages;
+
+ u64 *g2h_tlb1_map;
+ unsigned int *h2g_tlb1_rmap;
+
+ /* Minimum and maximum address mapped my TLB1 */
+ unsigned long tlb1_min_eaddr;
+ unsigned long tlb1_max_eaddr;
+
+#ifdef CONFIG_KVM_E500V2
+ u32 pid[E500_PID_NUM];
+
+ /* vcpu id table */
+ struct vcpu_id_table *idt;
+#endif
+};
+
+static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
+{
+ return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu);
+}
+
+
+/* This geometry is the legacy default -- can be overridden by userspace */
+#define KVM_E500_TLB0_WAY_SIZE 128
+#define KVM_E500_TLB0_WAY_NUM 2
+
+#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
+#define KVM_E500_TLB1_SIZE 16
+
+#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF))
+#define tlbsel_of(index) ((index) >> 16)
+#define esel_of(index) ((index) & 0xFFFF)
+
+#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
+#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
+#define MAS2_ATTRIB_MASK \
+ (MAS2_X0 | MAS2_X1)
+#define MAS3_ATTRIB_MASK \
+ (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
+ | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
+
+int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500,
+ ulong value);
+int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu);
+int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu);
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb);
+int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb);
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb);
+int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500);
+void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
+
+void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+
+
+#ifdef CONFIG_KVM_E500V2
+unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
+ unsigned int as, unsigned int gid,
+ unsigned int pr, int avoid_recursion);
+#endif
+
+/* TLB helper functions */
+static inline unsigned int
+get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 7) & 0x1f;
+}
+
+static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return tlbe->mas2 & 0xfffff000;
+}
+
+static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ unsigned int pgsize = get_tlb_size(tlbe);
+ return 1ULL << 10 << pgsize;
+}
+
+static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ u64 bytes = get_tlb_bytes(tlbe);
+ return get_tlb_eaddr(tlbe) + bytes - 1;
+}
+
+static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return tlbe->mas7_3 & ~0xfffULL;
+}
+
+static inline unsigned int
+get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 16) & 0xff;
+}
+
+static inline unsigned int
+get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 12) & 0x1;
+}
+
+static inline unsigned int
+get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 31) & 0x1;
+}
+
+static inline unsigned int
+get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 >> 30) & 0x1;
+}
+
+static inline unsigned int
+get_tlb_tsize(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ return (tlbe->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
+}
+
+static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.pid & 0xff;
+}
+
+static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu)
+{
+ return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS));
+}
+
+static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu)
+{
+ return !!(vcpu->arch.shared->msr & MSR_PR);
+}
+
+static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.shared->mas6 >> 16) & 0xff;
+}
+
+static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.shared->mas6 & 0x1;
+}
+
+static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * Manual says that tlbsel has 2 bits wide.
+ * Since we only have two TLBs, only lower bit is used.
+ */
+ return (vcpu->arch.shared->mas0 >> 28) & 0x1;
+}
+
+static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.shared->mas0 & 0xfff;
+}
+
+static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.shared->mas0 >> 16) & 0xfff;
+}
+
+static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+ const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+ gpa_t gpa;
+
+ if (!get_tlb_v(tlbe))
+ return 0;
+
+#ifndef CONFIG_KVM_BOOKE_HV
+ /* Does it match current guest AS? */
+ /* XXX what about IS != DS? */
+ if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
+ return 0;
+#endif
+
+ gpa = get_tlb_raddr(tlbe);
+ if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+ /* Mapping is not for RAM. */
+ return 0;
+
+ return 1;
+}
+
+static inline struct kvm_book3e_206_tlb_entry *get_entry(
+ struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
+{
+ int offset = vcpu_e500->gtlb_offset[tlbsel];
+ return &vcpu_e500->gtlb_arch[offset + entry];
+}
+
+void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe);
+void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500);
+
+#ifdef CONFIG_KVM_BOOKE_HV
+#define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe)
+#define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu)
+#define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS)
+#else
+unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
+ struct kvm_book3e_206_tlb_entry *gtlbe);
+
+static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ unsigned int tidseld = (vcpu->arch.shared->mas4 >> 16) & 0xf;
+
+ return vcpu_e500->pid[tidseld];
+}
+
+/* Force TS=1 for all guest mappings. */
+#define get_tlb_sts(gtlbe) (MAS1_TS)
+#endif /* !BOOKE_HV */
+
+#endif /* KVM_E500_H */
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 6d0b2bd54fb0..8b99e076dc81 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -14,27 +14,96 @@
#include <asm/kvm_ppc.h>
#include <asm/disassemble.h>
-#include <asm/kvm_e500.h>
+#include <asm/dbell.h>
#include "booke.h"
-#include "e500_tlb.h"
+#include "e500.h"
+#define XOP_MSGSND 206
+#define XOP_MSGCLR 238
#define XOP_TLBIVAX 786
#define XOP_TLBSX 914
#define XOP_TLBRE 946
#define XOP_TLBWE 978
+#define XOP_TLBILX 18
+
+#ifdef CONFIG_KVM_E500MC
+static int dbell2prio(ulong param)
+{
+ int msg = param & PPC_DBELL_TYPE_MASK;
+ int prio = -1;
+
+ switch (msg) {
+ case PPC_DBELL_TYPE(PPC_DBELL):
+ prio = BOOKE_IRQPRIO_DBELL;
+ break;
+ case PPC_DBELL_TYPE(PPC_DBELL_CRIT):
+ prio = BOOKE_IRQPRIO_DBELL_CRIT;
+ break;
+ default:
+ break;
+ }
+
+ return prio;
+}
+
+static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb)
+{
+ ulong param = vcpu->arch.gpr[rb];
+ int prio = dbell2prio(param);
+
+ if (prio < 0)
+ return EMULATE_FAIL;
+
+ clear_bit(prio, &vcpu->arch.pending_exceptions);
+ return EMULATE_DONE;
+}
+
+static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
+{
+ ulong param = vcpu->arch.gpr[rb];
+ int prio = dbell2prio(rb);
+ int pir = param & PPC_DBELL_PIR_MASK;
+ int i;
+ struct kvm_vcpu *cvcpu;
+
+ if (prio < 0)
+ return EMULATE_FAIL;
+
+ kvm_for_each_vcpu(i, cvcpu, vcpu->kvm) {
+ int cpir = cvcpu->arch.shared->pir;
+ if ((param & PPC_DBELL_MSG_BRDCAST) || (cpir == pir)) {
+ set_bit(prio, &cvcpu->arch.pending_exceptions);
+ kvm_vcpu_kick(cvcpu);
+ }
+ }
+
+ return EMULATE_DONE;
+}
+#endif
int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
- int ra;
- int rb;
+ int ra = get_ra(inst);
+ int rb = get_rb(inst);
+ int rt = get_rt(inst);
switch (get_op(inst)) {
case 31:
switch (get_xop(inst)) {
+#ifdef CONFIG_KVM_E500MC
+ case XOP_MSGSND:
+ emulated = kvmppc_e500_emul_msgsnd(vcpu, rb);
+ break;
+
+ case XOP_MSGCLR:
+ emulated = kvmppc_e500_emul_msgclr(vcpu, rb);
+ break;
+#endif
+
case XOP_TLBRE:
emulated = kvmppc_e500_emul_tlbre(vcpu);
break;
@@ -44,13 +113,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case XOP_TLBSX:
- rb = get_rb(inst);
emulated = kvmppc_e500_emul_tlbsx(vcpu,rb);
break;
+ case XOP_TLBILX:
+ emulated = kvmppc_e500_emul_tlbilx(vcpu, rt, ra, rb);
+ break;
+
case XOP_TLBIVAX:
- ra = get_ra(inst);
- rb = get_rb(inst);
emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb);
break;
@@ -70,52 +140,63 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
return emulated;
}
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int emulated = EMULATE_DONE;
- ulong spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
+#ifndef CONFIG_KVM_BOOKE_HV
case SPRN_PID:
kvmppc_set_pid(vcpu, spr_val);
break;
case SPRN_PID1:
if (spr_val != 0)
return EMULATE_FAIL;
- vcpu_e500->pid[1] = spr_val; break;
+ vcpu_e500->pid[1] = spr_val;
+ break;
case SPRN_PID2:
if (spr_val != 0)
return EMULATE_FAIL;
- vcpu_e500->pid[2] = spr_val; break;
+ vcpu_e500->pid[2] = spr_val;
+ break;
case SPRN_MAS0:
- vcpu->arch.shared->mas0 = spr_val; break;
+ vcpu->arch.shared->mas0 = spr_val;
+ break;
case SPRN_MAS1:
- vcpu->arch.shared->mas1 = spr_val; break;
+ vcpu->arch.shared->mas1 = spr_val;
+ break;
case SPRN_MAS2:
- vcpu->arch.shared->mas2 = spr_val; break;
+ vcpu->arch.shared->mas2 = spr_val;
+ break;
case SPRN_MAS3:
vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff;
vcpu->arch.shared->mas7_3 |= spr_val;
break;
case SPRN_MAS4:
- vcpu->arch.shared->mas4 = spr_val; break;
+ vcpu->arch.shared->mas4 = spr_val;
+ break;
case SPRN_MAS6:
- vcpu->arch.shared->mas6 = spr_val; break;
+ vcpu->arch.shared->mas6 = spr_val;
+ break;
case SPRN_MAS7:
vcpu->arch.shared->mas7_3 &= (u64)0xffffffff;
vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32;
break;
+#endif
case SPRN_L1CSR0:
vcpu_e500->l1csr0 = spr_val;
vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
break;
case SPRN_L1CSR1:
- vcpu_e500->l1csr1 = spr_val; break;
+ vcpu_e500->l1csr1 = spr_val;
+ break;
case SPRN_HID0:
- vcpu_e500->hid0 = spr_val; break;
+ vcpu_e500->hid0 = spr_val;
+ break;
case SPRN_HID1:
- vcpu_e500->hid1 = spr_val; break;
+ vcpu_e500->hid1 = spr_val;
+ break;
case SPRN_MMUCSR0:
emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
@@ -135,81 +216,112 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_IVOR35:
vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
break;
-
+#ifdef CONFIG_KVM_BOOKE_HV
+ case SPRN_IVOR36:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] = spr_val;
+ break;
+ case SPRN_IVOR37:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] = spr_val;
+ break;
+#endif
default:
- emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
+ emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val);
}
return emulated;
}
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int emulated = EMULATE_DONE;
- unsigned long val;
switch (sprn) {
+#ifndef CONFIG_KVM_BOOKE_HV
case SPRN_PID:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break;
+ *spr_val = vcpu_e500->pid[0];
+ break;
case SPRN_PID1:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break;
+ *spr_val = vcpu_e500->pid[1];
+ break;
case SPRN_PID2:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break;
+ *spr_val = vcpu_e500->pid[2];
+ break;
case SPRN_MAS0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas0); break;
+ *spr_val = vcpu->arch.shared->mas0;
+ break;
case SPRN_MAS1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas1); break;
+ *spr_val = vcpu->arch.shared->mas1;
+ break;
case SPRN_MAS2:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas2); break;
+ *spr_val = vcpu->arch.shared->mas2;
+ break;
case SPRN_MAS3:
- val = (u32)vcpu->arch.shared->mas7_3;
- kvmppc_set_gpr(vcpu, rt, val);
+ *spr_val = (u32)vcpu->arch.shared->mas7_3;
break;
case SPRN_MAS4:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas4); break;
+ *spr_val = vcpu->arch.shared->mas4;
+ break;
case SPRN_MAS6:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas6); break;
+ *spr_val = vcpu->arch.shared->mas6;
+ break;
case SPRN_MAS7:
- val = vcpu->arch.shared->mas7_3 >> 32;
- kvmppc_set_gpr(vcpu, rt, val);
+ *spr_val = vcpu->arch.shared->mas7_3 >> 32;
break;
+#endif
case SPRN_TLB0CFG:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
+ *spr_val = vcpu->arch.tlbcfg[0];
+ break;
case SPRN_TLB1CFG:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break;
+ *spr_val = vcpu->arch.tlbcfg[1];
+ break;
case SPRN_L1CSR0:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break;
+ *spr_val = vcpu_e500->l1csr0;
+ break;
case SPRN_L1CSR1:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break;
+ *spr_val = vcpu_e500->l1csr1;
+ break;
case SPRN_HID0:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
+ *spr_val = vcpu_e500->hid0;
+ break;
case SPRN_HID1:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
+ *spr_val = vcpu_e500->hid1;
+ break;
case SPRN_SVR:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break;
+ *spr_val = vcpu_e500->svr;
+ break;
case SPRN_MMUCSR0:
- kvmppc_set_gpr(vcpu, rt, 0); break;
+ *spr_val = 0;
+ break;
case SPRN_MMUCFG:
- kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break;
+ *spr_val = vcpu->arch.mmucfg;
+ break;
/* extra exceptions */
case SPRN_IVOR32:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
break;
case SPRN_IVOR33:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
break;
case SPRN_IVOR34:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
break;
case SPRN_IVOR35:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]);
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+ break;
+#ifdef CONFIG_KVM_BOOKE_HV
+ case SPRN_IVOR36:
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
+ break;
+ case SPRN_IVOR37:
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
break;
+#endif
default:
- emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
+ emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val);
}
return emulated;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 6e53e4164de1..c510fc961302 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -2,6 +2,9 @@
* Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Yu Liu, yu.liu@freescale.com
+ * Scott Wood, scottwood@freescale.com
+ * Ashish Kalra, ashish.kalra@freescale.com
+ * Varun Sethi, varun.sethi@freescale.com
*
* Description:
* This file is based on arch/powerpc/kvm/44x_tlb.c,
@@ -26,210 +29,15 @@
#include <linux/vmalloc.h>
#include <linux/hugetlb.h>
#include <asm/kvm_ppc.h>
-#include <asm/kvm_e500.h>
-#include "../mm/mmu_decl.h"
-#include "e500_tlb.h"
+#include "e500.h"
#include "trace.h"
#include "timing.h"
#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1)
-struct id {
- unsigned long val;
- struct id **pentry;
-};
-
-#define NUM_TIDS 256
-
-/*
- * This table provide mappings from:
- * (guestAS,guestTID,guestPR) --> ID of physical cpu
- * guestAS [0..1]
- * guestTID [0..255]
- * guestPR [0..1]
- * ID [1..255]
- * Each vcpu keeps one vcpu_id_table.
- */
-struct vcpu_id_table {
- struct id id[2][NUM_TIDS][2];
-};
-
-/*
- * This table provide reversed mappings of vcpu_id_table:
- * ID --> address of vcpu_id_table item.
- * Each physical core has one pcpu_id_table.
- */
-struct pcpu_id_table {
- struct id *entry[NUM_TIDS];
-};
-
-static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids);
-
-/* This variable keeps last used shadow ID on local core.
- * The valid range of shadow ID is [1..255] */
-static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
-
static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
-static struct kvm_book3e_206_tlb_entry *get_entry(
- struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
-{
- int offset = vcpu_e500->gtlb_offset[tlbsel];
- return &vcpu_e500->gtlb_arch[offset + entry];
-}
-
-/*
- * Allocate a free shadow id and setup a valid sid mapping in given entry.
- * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
- *
- * The caller must have preemption disabled, and keep it that way until
- * it has finished with the returned shadow id (either written into the
- * TLB or arch.shadow_pid, or discarded).
- */
-static inline int local_sid_setup_one(struct id *entry)
-{
- unsigned long sid;
- int ret = -1;
-
- sid = ++(__get_cpu_var(pcpu_last_used_sid));
- if (sid < NUM_TIDS) {
- __get_cpu_var(pcpu_sids).entry[sid] = entry;
- entry->val = sid;
- entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid];
- ret = sid;
- }
-
- /*
- * If sid == NUM_TIDS, we've run out of sids. We return -1, and
- * the caller will invalidate everything and start over.
- *
- * sid > NUM_TIDS indicates a race, which we disable preemption to
- * avoid.
- */
- WARN_ON(sid > NUM_TIDS);
-
- return ret;
-}
-
-/*
- * Check if given entry contain a valid shadow id mapping.
- * An ID mapping is considered valid only if
- * both vcpu and pcpu know this mapping.
- *
- * The caller must have preemption disabled, and keep it that way until
- * it has finished with the returned shadow id (either written into the
- * TLB or arch.shadow_pid, or discarded).
- */
-static inline int local_sid_lookup(struct id *entry)
-{
- if (entry && entry->val != 0 &&
- __get_cpu_var(pcpu_sids).entry[entry->val] == entry &&
- entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val])
- return entry->val;
- return -1;
-}
-
-/* Invalidate all id mappings on local core -- call with preempt disabled */
-static inline void local_sid_destroy_all(void)
-{
- __get_cpu_var(pcpu_last_used_sid) = 0;
- memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids)));
-}
-
-static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL);
- return vcpu_e500->idt;
-}
-
-static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- kfree(vcpu_e500->idt);
-}
-
-/* Invalidate all mappings on vcpu */
-static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table));
-
- /* Update shadow pid when mappings are changed */
- kvmppc_e500_recalc_shadow_pid(vcpu_e500);
-}
-
-/* Invalidate one ID mapping on vcpu */
-static inline void kvmppc_e500_id_table_reset_one(
- struct kvmppc_vcpu_e500 *vcpu_e500,
- int as, int pid, int pr)
-{
- struct vcpu_id_table *idt = vcpu_e500->idt;
-
- BUG_ON(as >= 2);
- BUG_ON(pid >= NUM_TIDS);
- BUG_ON(pr >= 2);
-
- idt->id[as][pid][pr].val = 0;
- idt->id[as][pid][pr].pentry = NULL;
-
- /* Update shadow pid when mappings are changed */
- kvmppc_e500_recalc_shadow_pid(vcpu_e500);
-}
-
-/*
- * Map guest (vcpu,AS,ID,PR) to physical core shadow id.
- * This function first lookup if a valid mapping exists,
- * if not, then creates a new one.
- *
- * The caller must have preemption disabled, and keep it that way until
- * it has finished with the returned shadow id (either written into the
- * TLB or arch.shadow_pid, or discarded).
- */
-static unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
- unsigned int as, unsigned int gid,
- unsigned int pr, int avoid_recursion)
-{
- struct vcpu_id_table *idt = vcpu_e500->idt;
- int sid;
-
- BUG_ON(as >= 2);
- BUG_ON(gid >= NUM_TIDS);
- BUG_ON(pr >= 2);
-
- sid = local_sid_lookup(&idt->id[as][gid][pr]);
-
- while (sid <= 0) {
- /* No mapping yet */
- sid = local_sid_setup_one(&idt->id[as][gid][pr]);
- if (sid <= 0) {
- _tlbil_all();
- local_sid_destroy_all();
- }
-
- /* Update shadow pid when mappings are changed */
- if (!avoid_recursion)
- kvmppc_e500_recalc_shadow_pid(vcpu_e500);
- }
-
- return sid;
-}
-
-/* Map guest pid to shadow.
- * We use PID to keep shadow of current guest non-zero PID,
- * and use PID1 to keep shadow of guest zero PID.
- * So that guest tlbe with TID=0 can be accessed at any time */
-void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- preempt_disable();
- vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500,
- get_cur_as(&vcpu_e500->vcpu),
- get_cur_pid(&vcpu_e500->vcpu),
- get_cur_pr(&vcpu_e500->vcpu), 1);
- vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500,
- get_cur_as(&vcpu_e500->vcpu), 0,
- get_cur_pr(&vcpu_e500->vcpu), 1);
- preempt_enable();
-}
-
static inline unsigned int gtlb0_get_next_victim(
struct kvmppc_vcpu_e500 *vcpu_e500)
{
@@ -258,6 +66,7 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
/* Mask off reserved bits. */
mas3 &= MAS3_ATTRIB_MASK;
+#ifndef CONFIG_KVM_BOOKE_HV
if (!usermode) {
/* Guest is in supervisor mode,
* so we need to translate guest
@@ -265,8 +74,9 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
mas3 &= ~E500_TLB_USER_PERM_MASK;
mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
}
-
- return mas3 | E500_TLB_SUPER_PERM_MASK;
+ mas3 |= E500_TLB_SUPER_PERM_MASK;
+#endif
+ return mas3;
}
static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
@@ -292,7 +102,16 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2);
mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_MAS8, stlbe->mas8);
+#endif
asm volatile("isync; tlbwe" : : : "memory");
+
+#ifdef CONFIG_KVM_BOOKE_HV
+ /* Must clear mas8 for other host tlbwe's */
+ mtspr(SPRN_MAS8, 0);
+ isync();
+#endif
local_irq_restore(flags);
trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1,
@@ -337,6 +156,7 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
}
}
+#ifdef CONFIG_KVM_E500V2
void kvmppc_map_magic(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
@@ -361,75 +181,41 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
__write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
preempt_enable();
}
-
-void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-
- /* Shadow PID may be expired on local core */
- kvmppc_e500_recalc_shadow_pid(vcpu_e500);
-}
-
-void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
-{
-}
+#endif
static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500,
int tlbsel, int esel)
{
struct kvm_book3e_206_tlb_entry *gtlbe =
get_entry(vcpu_e500, tlbsel, esel);
- struct vcpu_id_table *idt = vcpu_e500->idt;
- unsigned int pr, tid, ts, pid;
- u32 val, eaddr;
- unsigned long flags;
-
- ts = get_tlb_ts(gtlbe);
- tid = get_tlb_tid(gtlbe);
-
- preempt_disable();
-
- /* One guest ID may be mapped to two shadow IDs */
- for (pr = 0; pr < 2; pr++) {
- /*
- * The shadow PID can have a valid mapping on at most one
- * host CPU. In the common case, it will be valid on this
- * CPU, in which case (for TLB0) we do a local invalidation
- * of the specific address.
- *
- * If the shadow PID is not valid on the current host CPU, or
- * if we're invalidating a TLB1 entry, we invalidate the
- * entire shadow PID.
- */
- if (tlbsel == 1 ||
- (pid = local_sid_lookup(&idt->id[ts][tid][pr])) <= 0) {
- kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr);
- continue;
- }
- /*
- * The guest is invalidating a TLB0 entry which is in a PID
- * that has a valid shadow mapping on this host CPU. We
- * search host TLB0 to invalidate it's shadow TLB entry,
- * similar to __tlbil_va except that we need to look in AS1.
- */
- val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS;
- eaddr = get_tlb_eaddr(gtlbe);
+ if (tlbsel == 1 &&
+ vcpu_e500->gtlb_priv[1][esel].ref.flags & E500_TLB_BITMAP) {
+ u64 tmp = vcpu_e500->g2h_tlb1_map[esel];
+ int hw_tlb_indx;
+ unsigned long flags;
local_irq_save(flags);
-
- mtspr(SPRN_MAS6, val);
- asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr));
- val = mfspr(SPRN_MAS1);
- if (val & MAS1_VALID) {
- mtspr(SPRN_MAS1, val & ~MAS1_VALID);
+ while (tmp) {
+ hw_tlb_indx = __ilog2_u64(tmp & -tmp);
+ mtspr(SPRN_MAS0,
+ MAS0_TLBSEL(1) |
+ MAS0_ESEL(to_htlb1_esel(hw_tlb_indx)));
+ mtspr(SPRN_MAS1, 0);
asm volatile("tlbwe");
+ vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0;
+ tmp &= tmp - 1;
}
-
+ mb();
+ vcpu_e500->g2h_tlb1_map[esel] = 0;
+ vcpu_e500->gtlb_priv[1][esel].ref.flags &= ~E500_TLB_BITMAP;
local_irq_restore(flags);
+
+ return;
}
- preempt_enable();
+ /* Guest tlbe is backed by at most one host tlbe per shadow pid. */
+ kvmppc_e500_tlbil_one(vcpu_e500, gtlbe);
}
static int tlb0_set_base(gva_t addr, int sets, int ways)
@@ -475,6 +261,9 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
set_base = gtlb0_set_base(vcpu_e500, eaddr);
size = vcpu_e500->gtlb_params[0].ways;
} else {
+ if (eaddr < vcpu_e500->tlb1_min_eaddr ||
+ eaddr > vcpu_e500->tlb1_max_eaddr)
+ return -1;
set_base = 0;
}
@@ -530,6 +319,16 @@ static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
}
}
+static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ if (vcpu_e500->g2h_tlb1_map)
+ memset(vcpu_e500->g2h_tlb1_map,
+ sizeof(u64) * vcpu_e500->gtlb_params[1].entries, 0);
+ if (vcpu_e500->h2g_tlb1_rmap)
+ memset(vcpu_e500->h2g_tlb1_rmap,
+ sizeof(unsigned int) * host_tlb_params[1].entries, 0);
+}
+
static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
{
int tlbsel = 0;
@@ -547,7 +346,7 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
int stlbsel = 1;
int i;
- kvmppc_e500_id_table_reset_all(vcpu_e500);
+ kvmppc_e500_tlbil_all(vcpu_e500);
for (i = 0; i < host_tlb_params[stlbsel].entries; i++) {
struct tlbe_ref *ref =
@@ -562,19 +361,18 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
unsigned int eaddr, int as)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- unsigned int victim, pidsel, tsized;
+ unsigned int victim, tsized;
int tlbsel;
/* since we only have two TLBs, only lower bit is used. */
tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1;
victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0;
- pidsel = (vcpu->arch.shared->mas4 >> 16) & 0xf;
tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f;
vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
- | MAS1_TID(vcpu_e500->pid[pidsel])
+ | MAS1_TID(get_tlbmiss_tid(vcpu))
| MAS1_TSIZE(tsized);
vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN)
| (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK);
@@ -586,23 +384,26 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
/* TID must be supplied by the caller */
static inline void kvmppc_e500_setup_stlbe(
- struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_vcpu *vcpu,
struct kvm_book3e_206_tlb_entry *gtlbe,
int tsize, struct tlbe_ref *ref, u64 gvaddr,
struct kvm_book3e_206_tlb_entry *stlbe)
{
pfn_t pfn = ref->pfn;
+ u32 pr = vcpu->arch.shared->msr & MSR_PR;
BUG_ON(!(ref->flags & E500_TLB_VALID));
- /* Force TS=1 IPROT=0 for all guest mappings. */
- stlbe->mas1 = MAS1_TSIZE(tsize) | MAS1_TS | MAS1_VALID;
- stlbe->mas2 = (gvaddr & MAS2_EPN)
- | e500_shadow_mas2_attrib(gtlbe->mas2,
- vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
- stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT)
- | e500_shadow_mas3_attrib(gtlbe->mas7_3,
- vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
+ /* Force IPROT=0 for all guest mappings. */
+ stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
+ stlbe->mas2 = (gvaddr & MAS2_EPN) |
+ e500_shadow_mas2_attrib(gtlbe->mas2, pr);
+ stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
+ e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
+
+#ifdef CONFIG_KVM_BOOKE_HV
+ stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid;
+#endif
}
static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -736,7 +537,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
kvmppc_e500_ref_release(ref);
kvmppc_e500_ref_setup(ref, gtlbe, pfn);
- kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, ref, gvaddr, stlbe);
+ kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
+ ref, gvaddr, stlbe);
}
/* XXX only map the one-one case, for now use TLB0 */
@@ -760,7 +562,7 @@ static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
/* XXX for both one-one and one-to-many , for now use TLB1 */
static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
- struct kvm_book3e_206_tlb_entry *stlbe)
+ struct kvm_book3e_206_tlb_entry *stlbe, int esel)
{
struct tlbe_ref *ref;
unsigned int victim;
@@ -773,15 +575,74 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
ref = &vcpu_e500->tlb_refs[1][victim];
kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref);
+ vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << victim;
+ vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP;
+ if (vcpu_e500->h2g_tlb1_rmap[victim]) {
+ unsigned int idx = vcpu_e500->h2g_tlb1_rmap[victim];
+ vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << victim);
+ }
+ vcpu_e500->h2g_tlb1_rmap[victim] = esel;
+
return victim;
}
-void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
+static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ int size = vcpu_e500->gtlb_params[1].entries;
+ unsigned int offset;
+ gva_t eaddr;
+ int i;
+
+ vcpu_e500->tlb1_min_eaddr = ~0UL;
+ vcpu_e500->tlb1_max_eaddr = 0;
+ offset = vcpu_e500->gtlb_offset[1];
+
+ for (i = 0; i < size; i++) {
+ struct kvm_book3e_206_tlb_entry *tlbe =
+ &vcpu_e500->gtlb_arch[offset + i];
+
+ if (!get_tlb_v(tlbe))
+ continue;
+
+ eaddr = get_tlb_eaddr(tlbe);
+ vcpu_e500->tlb1_min_eaddr =
+ min(vcpu_e500->tlb1_min_eaddr, eaddr);
+
+ eaddr = get_tlb_end(tlbe);
+ vcpu_e500->tlb1_max_eaddr =
+ max(vcpu_e500->tlb1_max_eaddr, eaddr);
+ }
+}
+
+static int kvmppc_need_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe)
{
+ unsigned long start, end, size;
+
+ size = get_tlb_bytes(gtlbe);
+ start = get_tlb_eaddr(gtlbe) & ~(size - 1);
+ end = start + size - 1;
+
+ return vcpu_e500->tlb1_min_eaddr == start ||
+ vcpu_e500->tlb1_max_eaddr == end;
+}
+
+/* This function is supposed to be called for a adding a new valid tlb entry */
+static void kvmppc_set_tlb1map_range(struct kvm_vcpu *vcpu,
+ struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+ unsigned long start, end, size;
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- /* Recalc shadow pid since MSR changes */
- kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+ if (!get_tlb_v(gtlbe))
+ return;
+
+ size = get_tlb_bytes(gtlbe);
+ start = get_tlb_eaddr(gtlbe) & ~(size - 1);
+ end = start + size - 1;
+
+ vcpu_e500->tlb1_min_eaddr = min(vcpu_e500->tlb1_min_eaddr, start);
+ vcpu_e500->tlb1_max_eaddr = max(vcpu_e500->tlb1_max_eaddr, end);
}
static inline int kvmppc_e500_gtlbe_invalidate(
@@ -794,6 +655,9 @@ static inline int kvmppc_e500_gtlbe_invalidate(
if (unlikely(get_tlb_iprot(gtlbe)))
return -1;
+ if (tlbsel == 1 && kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe))
+ kvmppc_recalc_tlb1map_range(vcpu_e500);
+
gtlbe->mas1 = 0;
return 0;
@@ -811,7 +675,7 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
/* Invalidate all vcpu id mappings */
- kvmppc_e500_id_table_reset_all(vcpu_e500);
+ kvmppc_e500_tlbil_all(vcpu_e500);
return EMULATE_DONE;
}
@@ -844,7 +708,59 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
}
/* Invalidate all vcpu id mappings */
- kvmppc_e500_id_table_reset_all(vcpu_e500);
+ kvmppc_e500_tlbil_all(vcpu_e500);
+
+ return EMULATE_DONE;
+}
+
+static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
+ int pid, int rt)
+{
+ struct kvm_book3e_206_tlb_entry *tlbe;
+ int tid, esel;
+
+ /* invalidate all entries */
+ for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) {
+ tlbe = get_entry(vcpu_e500, tlbsel, esel);
+ tid = get_tlb_tid(tlbe);
+ if (rt == 0 || tid == pid) {
+ inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+ }
+ }
+}
+
+static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
+ int ra, int rb)
+{
+ int tlbsel, esel;
+ gva_t ea;
+
+ ea = kvmppc_get_gpr(&vcpu_e500->vcpu, rb);
+ if (ra)
+ ea += kvmppc_get_gpr(&vcpu_e500->vcpu, ra);
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1);
+ if (esel >= 0) {
+ inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+ break;
+ }
+ }
+}
+
+int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int pid = get_cur_spid(vcpu);
+
+ if (rt == 0 || rt == 1) {
+ tlbilx_all(vcpu_e500, 0, pid, rt);
+ tlbilx_all(vcpu_e500, 1, pid, rt);
+ } else if (rt == 3) {
+ tlbilx_one(vcpu_e500, pid, ra, rb);
+ }
return EMULATE_DONE;
}
@@ -929,9 +845,7 @@ static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
int stid;
preempt_disable();
- stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe),
- get_tlb_tid(gtlbe),
- get_cur_pr(&vcpu_e500->vcpu), 0);
+ stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe);
stlbe->mas1 |= MAS1_TID(stid);
write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe);
@@ -941,16 +855,21 @@ static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- struct kvm_book3e_206_tlb_entry *gtlbe;
- int tlbsel, esel;
+ struct kvm_book3e_206_tlb_entry *gtlbe, stlbe;
+ int tlbsel, esel, stlbsel, sesel;
+ int recal = 0;
tlbsel = get_tlb_tlbsel(vcpu);
esel = get_tlb_esel(vcpu, tlbsel);
gtlbe = get_entry(vcpu_e500, tlbsel, esel);
- if (get_tlb_v(gtlbe))
+ if (get_tlb_v(gtlbe)) {
inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
+ if ((tlbsel == 1) &&
+ kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe))
+ recal = 1;
+ }
gtlbe->mas1 = vcpu->arch.shared->mas1;
gtlbe->mas2 = vcpu->arch.shared->mas2;
@@ -959,10 +878,20 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1,
gtlbe->mas2, gtlbe->mas7_3);
+ if (tlbsel == 1) {
+ /*
+ * If a valid tlb1 entry is overwritten then recalculate the
+ * min/max TLB1 map address range otherwise no need to look
+ * in tlb1 array.
+ */
+ if (recal)
+ kvmppc_recalc_tlb1map_range(vcpu_e500);
+ else
+ kvmppc_set_tlb1map_range(vcpu, gtlbe);
+ }
+
/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
if (tlbe_is_host_safe(vcpu, gtlbe)) {
- struct kvm_book3e_206_tlb_entry stlbe;
- int stlbsel, sesel;
u64 eaddr;
u64 raddr;
@@ -989,7 +918,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
* are mapped on the fly. */
stlbsel = 1;
sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
- raddr >> PAGE_SHIFT, gtlbe, &stlbe);
+ raddr >> PAGE_SHIFT, gtlbe, &stlbe, esel);
break;
default:
@@ -1003,6 +932,48 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
+static int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
+ gva_t eaddr, unsigned int pid, int as)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int esel, tlbsel;
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
+ if (esel >= 0)
+ return index_of(tlbsel, esel);
+ }
+
+ return -1;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ int index;
+ gva_t eaddr;
+ u8 pid;
+ u8 as;
+
+ eaddr = tr->linear_address;
+ pid = (tr->linear_address >> 32) & 0xff;
+ as = (tr->linear_address >> 40) & 0x1;
+
+ index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
+ if (index < 0) {
+ tr->valid = 0;
+ return 0;
+ }
+
+ tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
+ /* XXX what does "writeable" and "usermode" even mean? */
+ tr->valid = 1;
+
+ return 0;
+}
+
+
int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
@@ -1066,7 +1037,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
sesel = 0; /* unused */
priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
- kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K,
+ kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
&priv->ref, eaddr, &stlbe);
break;
@@ -1075,7 +1046,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
stlbsel = 1;
sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn,
- gtlbe, &stlbe);
+ gtlbe, &stlbe, esel);
break;
}
@@ -1087,52 +1058,13 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
}
-int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
- gva_t eaddr, unsigned int pid, int as)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- int esel, tlbsel;
-
- for (tlbsel = 0; tlbsel < 2; tlbsel++) {
- esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
- if (esel >= 0)
- return index_of(tlbsel, esel);
- }
-
- return -1;
-}
-
-void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-
- if (vcpu->arch.pid != pid) {
- vcpu_e500->pid[0] = vcpu->arch.pid = pid;
- kvmppc_e500_recalc_shadow_pid(vcpu_e500);
- }
-}
-
-void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- struct kvm_book3e_206_tlb_entry *tlbe;
-
- /* Insert large initial mapping for guest. */
- tlbe = get_entry(vcpu_e500, 1, 0);
- tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
- tlbe->mas2 = 0;
- tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
-
- /* 4K map for serial output. Used by kernel wrapper. */
- tlbe = get_entry(vcpu_e500, 1, 1);
- tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
- tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
- tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
-}
-
static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
{
int i;
+ clear_tlb1_bitmap(vcpu_e500);
+ kfree(vcpu_e500->g2h_tlb1_map);
+
clear_tlb_refs(vcpu_e500);
kfree(vcpu_e500->gtlb_priv[0]);
kfree(vcpu_e500->gtlb_priv[1]);
@@ -1155,6 +1087,36 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
vcpu_e500->gtlb_arch = NULL;
}
+void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ sregs->u.e.mas0 = vcpu->arch.shared->mas0;
+ sregs->u.e.mas1 = vcpu->arch.shared->mas1;
+ sregs->u.e.mas2 = vcpu->arch.shared->mas2;
+ sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3;
+ sregs->u.e.mas4 = vcpu->arch.shared->mas4;
+ sregs->u.e.mas6 = vcpu->arch.shared->mas6;
+
+ sregs->u.e.mmucfg = vcpu->arch.mmucfg;
+ sregs->u.e.tlbcfg[0] = vcpu->arch.tlbcfg[0];
+ sregs->u.e.tlbcfg[1] = vcpu->arch.tlbcfg[1];
+ sregs->u.e.tlbcfg[2] = 0;
+ sregs->u.e.tlbcfg[3] = 0;
+}
+
+int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
+ vcpu->arch.shared->mas0 = sregs->u.e.mas0;
+ vcpu->arch.shared->mas1 = sregs->u.e.mas1;
+ vcpu->arch.shared->mas2 = sregs->u.e.mas2;
+ vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3;
+ vcpu->arch.shared->mas4 = sregs->u.e.mas4;
+ vcpu->arch.shared->mas6 = sregs->u.e.mas6;
+ }
+
+ return 0;
+}
+
int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
struct kvm_config_tlb *cfg)
{
@@ -1163,6 +1125,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
char *virt;
struct page **pages;
struct tlbe_priv *privs[2] = {};
+ u64 *g2h_bitmap = NULL;
size_t array_len;
u32 sets;
int num_pages, ret, i;
@@ -1224,10 +1187,16 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
if (!privs[0] || !privs[1])
goto err_put_page;
+ g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1],
+ GFP_KERNEL);
+ if (!g2h_bitmap)
+ goto err_put_page;
+
free_gtlb(vcpu_e500);
vcpu_e500->gtlb_priv[0] = privs[0];
vcpu_e500->gtlb_priv[1] = privs[1];
+ vcpu_e500->g2h_tlb1_map = g2h_bitmap;
vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *)
(virt + (cfg->array & (PAGE_SIZE - 1)));
@@ -1238,14 +1207,16 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
vcpu_e500->gtlb_offset[0] = 0;
vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
- vcpu_e500->tlb0cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
+
+ vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
if (params.tlb_sizes[0] <= 2048)
- vcpu_e500->tlb0cfg |= params.tlb_sizes[0];
- vcpu_e500->tlb0cfg |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
+ vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0];
+ vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
- vcpu_e500->tlb1cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
- vcpu_e500->tlb1cfg |= params.tlb_sizes[1];
- vcpu_e500->tlb1cfg |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
+ vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1];
+ vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
vcpu_e500->shared_tlb_pages = pages;
vcpu_e500->num_shared_tlb_pages = num_pages;
@@ -1256,6 +1227,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1];
vcpu_e500->gtlb_params[1].sets = 1;
+ kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;
err_put_page:
@@ -1274,13 +1246,14 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
struct kvm_dirty_tlb *dirty)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-
+ kvmppc_recalc_tlb1map_range(vcpu_e500);
clear_tlb_refs(vcpu_e500);
return 0;
}
int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
{
+ struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
@@ -1357,22 +1330,32 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
if (!vcpu_e500->gtlb_priv[1])
goto err;
- if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
+ vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(unsigned int) *
+ vcpu_e500->gtlb_params[1].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->g2h_tlb1_map)
+ goto err;
+
+ vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) *
+ host_tlb_params[1].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->h2g_tlb1_rmap)
goto err;
/* Init TLB configuration register */
- vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) &
+ vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
- vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[0].entries;
- vcpu_e500->tlb0cfg |=
+ vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries;
+ vcpu->arch.tlbcfg[0] |=
vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT;
- vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) &
+ vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
- vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[1].entries;
- vcpu_e500->tlb0cfg |=
+ vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries;
+ vcpu->arch.tlbcfg[1] |=
vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT;
+ kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;
err:
@@ -1385,8 +1368,7 @@ err:
void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
{
free_gtlb(vcpu_e500);
- kvmppc_e500_id_table_free(vcpu_e500);
-
+ kfree(vcpu_e500->h2g_tlb1_rmap);
kfree(vcpu_e500->tlb_refs[0]);
kfree(vcpu_e500->tlb_refs[1]);
}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
deleted file mode 100644
index 5c6d2d7bf058..000000000000
--- a/arch/powerpc/kvm/e500_tlb.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Author: Yu Liu, yu.liu@freescale.com
- *
- * Description:
- * This file is based on arch/powerpc/kvm/44x_tlb.h,
- * by Hollis Blanchard <hollisb@us.ibm.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- */
-
-#ifndef __KVM_E500_TLB_H__
-#define __KVM_E500_TLB_H__
-
-#include <linux/kvm_host.h>
-#include <asm/mmu-book3e.h>
-#include <asm/tlb.h>
-#include <asm/kvm_e500.h>
-
-/* This geometry is the legacy default -- can be overridden by userspace */
-#define KVM_E500_TLB0_WAY_SIZE 128
-#define KVM_E500_TLB0_WAY_NUM 2
-
-#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
-#define KVM_E500_TLB1_SIZE 16
-
-#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF))
-#define tlbsel_of(index) ((index) >> 16)
-#define esel_of(index) ((index) & 0xFFFF)
-
-#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
-#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
-#define MAS2_ATTRIB_MASK \
- (MAS2_X0 | MAS2_X1)
-#define MAS3_ATTRIB_MASK \
- (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
- | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
-
-extern void kvmppc_dump_tlbs(struct kvm_vcpu *);
-extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong);
-extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *);
-extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *);
-extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int);
-extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int);
-extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int);
-extern void kvmppc_e500_tlb_put(struct kvm_vcpu *);
-extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int);
-extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *);
-extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *);
-extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
-extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *);
-
-/* TLB helper functions */
-static inline unsigned int
-get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- return (tlbe->mas1 >> 7) & 0x1f;
-}
-
-static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- return tlbe->mas2 & 0xfffff000;
-}
-
-static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- unsigned int pgsize = get_tlb_size(tlbe);
- return 1ULL << 10 << pgsize;
-}
-
-static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- u64 bytes = get_tlb_bytes(tlbe);
- return get_tlb_eaddr(tlbe) + bytes - 1;
-}
-
-static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- return tlbe->mas7_3 & ~0xfffULL;
-}
-
-static inline unsigned int
-get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- return (tlbe->mas1 >> 16) & 0xff;
-}
-
-static inline unsigned int
-get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- return (tlbe->mas1 >> 12) & 0x1;
-}
-
-static inline unsigned int
-get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- return (tlbe->mas1 >> 31) & 0x1;
-}
-
-static inline unsigned int
-get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- return (tlbe->mas1 >> 30) & 0x1;
-}
-
-static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.pid & 0xff;
-}
-
-static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu)
-{
- return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS));
-}
-
-static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu)
-{
- return !!(vcpu->arch.shared->msr & MSR_PR);
-}
-
-static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu)
-{
- return (vcpu->arch.shared->mas6 >> 16) & 0xff;
-}
-
-static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.shared->mas6 & 0x1;
-}
-
-static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu)
-{
- /*
- * Manual says that tlbsel has 2 bits wide.
- * Since we only have two TLBs, only lower bit is used.
- */
- return (vcpu->arch.shared->mas0 >> 28) & 0x1;
-}
-
-static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.shared->mas0 & 0xfff;
-}
-
-static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu)
-{
- return (vcpu->arch.shared->mas0 >> 16) & 0xfff;
-}
-
-static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
- const struct kvm_book3e_206_tlb_entry *tlbe)
-{
- gpa_t gpa;
-
- if (!get_tlb_v(tlbe))
- return 0;
-
- /* Does it match current guest AS? */
- /* XXX what about IS != DS? */
- if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
- return 0;
-
- gpa = get_tlb_raddr(tlbe);
- if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
- /* Mapping is not for RAM. */
- return 0;
-
- return 1;
-}
-
-#endif /* __KVM_E500_TLB_H__ */
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
new file mode 100644
index 000000000000..fe6c1de6b701
--- /dev/null
+++ b/arch/powerpc/kvm/e500mc.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (C) 2010 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Varun Sethi, <varun.sethi@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/e500.c,
+ * by Yu Liu <yu.liu@freescale.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/dbell.h>
+
+#include "booke.h"
+#include "e500.h"
+
+void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type)
+{
+ enum ppc_dbell dbell_type;
+ unsigned long tag;
+
+ switch (type) {
+ case INT_CLASS_NONCRIT:
+ dbell_type = PPC_G_DBELL;
+ break;
+ case INT_CLASS_CRIT:
+ dbell_type = PPC_G_DBELL_CRIT;
+ break;
+ case INT_CLASS_MC:
+ dbell_type = PPC_G_DBELL_MC;
+ break;
+ default:
+ WARN_ONCE(1, "%s: unknown int type %d\n", __func__, type);
+ return;
+ }
+
+
+ tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id;
+ mb();
+ ppc_msgsnd(dbell_type, 0, tag);
+}
+
+/* gtlbe must not be mapped by more than one host tlb entry */
+void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+ unsigned int tid, ts;
+ u32 val, eaddr, lpid;
+ unsigned long flags;
+
+ ts = get_tlb_ts(gtlbe);
+ tid = get_tlb_tid(gtlbe);
+ lpid = vcpu_e500->vcpu.kvm->arch.lpid;
+
+ /* We search the host TLB to invalidate its shadow TLB entry */
+ val = (tid << 16) | ts;
+ eaddr = get_tlb_eaddr(gtlbe);
+
+ local_irq_save(flags);
+
+ mtspr(SPRN_MAS6, val);
+ mtspr(SPRN_MAS5, MAS5_SGS | lpid);
+
+ asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr));
+ val = mfspr(SPRN_MAS1);
+ if (val & MAS1_VALID) {
+ mtspr(SPRN_MAS1, val & ~MAS1_VALID);
+ asm volatile("tlbwe");
+ }
+ mtspr(SPRN_MAS5, 0);
+ /* NOTE: tlbsx also updates mas8, so clear it for host tlbwe */
+ mtspr(SPRN_MAS8, 0);
+ isync();
+
+ local_irq_restore(flags);
+}
+
+void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid);
+ asm volatile("tlbilxlpid");
+ mtspr(SPRN_MAS5, 0);
+ local_irq_restore(flags);
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
+{
+ vcpu->arch.pid = pid;
+}
+
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
+{
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ kvmppc_booke_vcpu_load(vcpu, cpu);
+
+ mtspr(SPRN_LPID, vcpu->kvm->arch.lpid);
+ mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
+ mtspr(SPRN_GPIR, vcpu->vcpu_id);
+ mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp);
+ mtspr(SPRN_EPLC, vcpu->arch.eplc);
+ mtspr(SPRN_EPSC, vcpu->arch.epsc);
+
+ mtspr(SPRN_GIVPR, vcpu->arch.ivpr);
+ mtspr(SPRN_GIVOR2, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
+ mtspr(SPRN_GIVOR8, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
+ mtspr(SPRN_GSPRG0, (unsigned long)vcpu->arch.shared->sprg0);
+ mtspr(SPRN_GSPRG1, (unsigned long)vcpu->arch.shared->sprg1);
+ mtspr(SPRN_GSPRG2, (unsigned long)vcpu->arch.shared->sprg2);
+ mtspr(SPRN_GSPRG3, (unsigned long)vcpu->arch.shared->sprg3);
+
+ mtspr(SPRN_GSRR0, vcpu->arch.shared->srr0);
+ mtspr(SPRN_GSRR1, vcpu->arch.shared->srr1);
+
+ mtspr(SPRN_GEPR, vcpu->arch.epr);
+ mtspr(SPRN_GDEAR, vcpu->arch.shared->dar);
+ mtspr(SPRN_GESR, vcpu->arch.shared->esr);
+
+ if (vcpu->arch.oldpir != mfspr(SPRN_PIR))
+ kvmppc_e500_tlbil_all(vcpu_e500);
+
+ kvmppc_load_guest_fp(vcpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.eplc = mfspr(SPRN_EPLC);
+ vcpu->arch.epsc = mfspr(SPRN_EPSC);
+
+ vcpu->arch.shared->sprg0 = mfspr(SPRN_GSPRG0);
+ vcpu->arch.shared->sprg1 = mfspr(SPRN_GSPRG1);
+ vcpu->arch.shared->sprg2 = mfspr(SPRN_GSPRG2);
+ vcpu->arch.shared->sprg3 = mfspr(SPRN_GSPRG3);
+
+ vcpu->arch.shared->srr0 = mfspr(SPRN_GSRR0);
+ vcpu->arch.shared->srr1 = mfspr(SPRN_GSRR1);
+
+ vcpu->arch.epr = mfspr(SPRN_GEPR);
+ vcpu->arch.shared->dar = mfspr(SPRN_GDEAR);
+ vcpu->arch.shared->esr = mfspr(SPRN_GESR);
+
+ vcpu->arch.oldpir = mfspr(SPRN_PIR);
+
+ kvmppc_booke_vcpu_put(vcpu);
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+ int r;
+
+ if (strcmp(cur_cpu_spec->cpu_name, "e500mc") == 0)
+ r = 0;
+ else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
+ r = 0;
+ else
+ r = -ENOTSUPP;
+
+ return r;
+}
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ vcpu->arch.shadow_epcr = SPRN_EPCR_DSIGS | SPRN_EPCR_DGTMI | \
+ SPRN_EPCR_DUVD;
+ vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP;
+ vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT);
+ vcpu->arch.epsc = vcpu->arch.eplc;
+
+ vcpu->arch.pvr = mfspr(SPRN_PVR);
+ vcpu_e500->svr = mfspr(SPRN_SVR);
+
+ vcpu->arch.cpu_type = KVM_CPU_E500MC;
+
+ return 0;
+}
+
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_PM |
+ KVM_SREGS_E_PC;
+ sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
+
+ sregs->u.e.impl.fsl.features = 0;
+ sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
+ sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
+ sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
+
+ kvmppc_get_sregs_e500_tlb(vcpu, sregs);
+
+ sregs->u.e.ivor_high[3] =
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+ sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
+ sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
+
+ kvmppc_get_sregs_ivor(vcpu, sregs);
+}
+
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int ret;
+
+ if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
+ vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
+ vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
+ vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
+ }
+
+ ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs);
+ if (ret < 0)
+ return ret;
+
+ if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+ return 0;
+
+ if (sregs->u.e.features & KVM_SREGS_E_PM) {
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
+ sregs->u.e.ivor_high[3];
+ }
+
+ if (sregs->u.e.features & KVM_SREGS_E_PC) {
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] =
+ sregs->u.e.ivor_high[4];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] =
+ sregs->u.e.ivor_high[5];
+ }
+
+ return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500;
+ struct kvm_vcpu *vcpu;
+ int err;
+
+ vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu_e500) {
+ err = -ENOMEM;
+ goto out;
+ }
+ vcpu = &vcpu_e500->vcpu;
+
+ /* Invalid PIR value -- this LPID dosn't have valid state on any cpu */
+ vcpu->arch.oldpir = 0xffffffff;
+
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
+ err = kvmppc_e500_tlb_init(vcpu_e500);
+ if (err)
+ goto uninit_vcpu;
+
+ vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ if (!vcpu->arch.shared)
+ goto uninit_tlb;
+
+ return vcpu;
+
+uninit_tlb:
+ kvmppc_e500_tlb_uninit(vcpu_e500);
+uninit_vcpu:
+ kvm_vcpu_uninit(vcpu);
+
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+out:
+ return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ free_page((unsigned long)vcpu->arch.shared);
+ kvmppc_e500_tlb_uninit(vcpu_e500);
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+ int lpid;
+
+ lpid = kvmppc_alloc_lpid();
+ if (lpid < 0)
+ return lpid;
+
+ kvm->arch.lpid = lpid;
+ return 0;
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+ kvmppc_free_lpid(kvm->arch.lpid);
+}
+
+static int __init kvmppc_e500mc_init(void)
+{
+ int r;
+
+ r = kvmppc_booke_init();
+ if (r)
+ return r;
+
+ kvmppc_init_lpid(64);
+ kvmppc_claim_lpid(0); /* host */
+
+ return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+}
+
+static void __exit kvmppc_e500mc_exit(void)
+{
+ kvmppc_booke_exit();
+}
+
+module_init(kvmppc_e500mc_init);
+module_exit(kvmppc_e500mc_exit);
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 968f40101883..f90e86dea7a2 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -23,6 +23,7 @@
#include <linux/types.h>
#include <linux/string.h>
#include <linux/kvm_host.h>
+#include <linux/clockchips.h>
#include <asm/reg.h>
#include <asm/time.h>
@@ -35,7 +36,9 @@
#define OP_TRAP 3
#define OP_TRAP_64 2
+#define OP_31_XOP_TRAP 4
#define OP_31_XOP_LWZX 23
+#define OP_31_XOP_TRAP_64 68
#define OP_31_XOP_LBZX 87
#define OP_31_XOP_STWX 151
#define OP_31_XOP_STBX 215
@@ -102,8 +105,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
*/
dec_time = vcpu->arch.dec;
- dec_time *= 1000;
- do_div(dec_time, tb_ticks_per_usec);
+ /*
+ * Guest timebase ticks at the same frequency as host decrementer.
+ * So use the host decrementer calculations for decrementer emulation.
+ */
+ dec_time = dec_time << decrementer_clockevent.shift;
+ do_div(dec_time, decrementer_clockevent.mult);
dec_nsec = do_div(dec_time, NSEC_PER_SEC);
hrtimer_start(&vcpu->arch.dec_timer,
ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
@@ -141,14 +148,13 @@ u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
u32 inst = kvmppc_get_last_inst(vcpu);
- u32 ea;
- int ra;
- int rb;
- int rs;
- int rt;
- int sprn;
+ int ra = get_ra(inst);
+ int rs = get_rs(inst);
+ int rt = get_rt(inst);
+ int sprn = get_sprn(inst);
enum emulation_result emulated = EMULATE_DONE;
int advance = 1;
+ ulong spr_val = 0;
/* this default type might be overwritten by subcategories */
kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
@@ -170,173 +176,143 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case 31:
switch (get_xop(inst)) {
+ case OP_31_XOP_TRAP:
+#ifdef CONFIG_64BIT
+ case OP_31_XOP_TRAP_64:
+#endif
+#ifdef CONFIG_PPC_BOOK3S
+ kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
+#else
+ kvmppc_core_queue_program(vcpu,
+ vcpu->arch.shared->esr | ESR_PTR);
+#endif
+ advance = 0;
+ break;
case OP_31_XOP_LWZX:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
case OP_31_XOP_LBZX:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
case OP_31_XOP_LBZUX:
- rt = get_rt(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
- ea = kvmppc_get_gpr(vcpu, rb);
- if (ra)
- ea += kvmppc_get_gpr(vcpu, ra);
-
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
- kvmppc_set_gpr(vcpu, ra, ea);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_31_XOP_STWX:
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
4, 1);
break;
case OP_31_XOP_STBX:
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
1, 1);
break;
case OP_31_XOP_STBUX:
- rs = get_rs(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
- ea = kvmppc_get_gpr(vcpu, rb);
- if (ra)
- ea += kvmppc_get_gpr(vcpu, ra);
-
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
1, 1);
- kvmppc_set_gpr(vcpu, rs, ea);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_31_XOP_LHAX:
- rt = get_rt(inst);
emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
break;
case OP_31_XOP_LHZX:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
break;
case OP_31_XOP_LHZUX:
- rt = get_rt(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
- ea = kvmppc_get_gpr(vcpu, rb);
- if (ra)
- ea += kvmppc_get_gpr(vcpu, ra);
-
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
- kvmppc_set_gpr(vcpu, ra, ea);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_31_XOP_MFSPR:
- sprn = get_sprn(inst);
- rt = get_rt(inst);
-
switch (sprn) {
case SPRN_SRR0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr0);
+ spr_val = vcpu->arch.shared->srr0;
break;
case SPRN_SRR1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr1);
+ spr_val = vcpu->arch.shared->srr1;
break;
case SPRN_PVR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break;
+ spr_val = vcpu->arch.pvr;
+ break;
case SPRN_PIR:
- kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break;
+ spr_val = vcpu->vcpu_id;
+ break;
case SPRN_MSSSR0:
- kvmppc_set_gpr(vcpu, rt, 0); break;
+ spr_val = 0;
+ break;
/* Note: mftb and TBRL/TBWL are user-accessible, so
* the guest can always access the real TB anyways.
* In fact, we probably will never see these traps. */
case SPRN_TBWL:
- kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break;
+ spr_val = get_tb() >> 32;
+ break;
case SPRN_TBWU:
- kvmppc_set_gpr(vcpu, rt, get_tb()); break;
+ spr_val = get_tb();
+ break;
case SPRN_SPRG0:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg0);
+ spr_val = vcpu->arch.shared->sprg0;
break;
case SPRN_SPRG1:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg1);
+ spr_val = vcpu->arch.shared->sprg1;
break;
case SPRN_SPRG2:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg2);
+ spr_val = vcpu->arch.shared->sprg2;
break;
case SPRN_SPRG3:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg3);
+ spr_val = vcpu->arch.shared->sprg3;
break;
/* Note: SPRG4-7 are user-readable, so we don't get
* a trap. */
case SPRN_DEC:
- {
- kvmppc_set_gpr(vcpu, rt,
- kvmppc_get_dec(vcpu, get_tb()));
+ spr_val = kvmppc_get_dec(vcpu, get_tb());
break;
- }
default:
- emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
- if (emulated == EMULATE_FAIL) {
- printk("mfspr: unknown spr %x\n", sprn);
- kvmppc_set_gpr(vcpu, rt, 0);
+ emulated = kvmppc_core_emulate_mfspr(vcpu, sprn,
+ &spr_val);
+ if (unlikely(emulated == EMULATE_FAIL)) {
+ printk(KERN_INFO "mfspr: unknown spr "
+ "0x%x\n", sprn);
}
break;
}
+ kvmppc_set_gpr(vcpu, rt, spr_val);
kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
break;
case OP_31_XOP_STHX:
- rs = get_rs(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 1);
break;
case OP_31_XOP_STHUX:
- rs = get_rs(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
- ea = kvmppc_get_gpr(vcpu, rb);
- if (ra)
- ea += kvmppc_get_gpr(vcpu, ra);
-
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 1);
- kvmppc_set_gpr(vcpu, ra, ea);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_31_XOP_MTSPR:
- sprn = get_sprn(inst);
- rs = get_rs(inst);
+ spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
case SPRN_SRR0:
- vcpu->arch.shared->srr0 = kvmppc_get_gpr(vcpu, rs);
+ vcpu->arch.shared->srr0 = spr_val;
break;
case SPRN_SRR1:
- vcpu->arch.shared->srr1 = kvmppc_get_gpr(vcpu, rs);
+ vcpu->arch.shared->srr1 = spr_val;
break;
/* XXX We need to context-switch the timebase for
@@ -347,27 +323,29 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case SPRN_MSSSR0: break;
case SPRN_DEC:
- vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs);
+ vcpu->arch.dec = spr_val;
kvmppc_emulate_dec(vcpu);
break;
case SPRN_SPRG0:
- vcpu->arch.shared->sprg0 = kvmppc_get_gpr(vcpu, rs);
+ vcpu->arch.shared->sprg0 = spr_val;
break;
case SPRN_SPRG1:
- vcpu->arch.shared->sprg1 = kvmppc_get_gpr(vcpu, rs);
+ vcpu->arch.shared->sprg1 = spr_val;
break;
case SPRN_SPRG2:
- vcpu->arch.shared->sprg2 = kvmppc_get_gpr(vcpu, rs);
+ vcpu->arch.shared->sprg2 = spr_val;
break;
case SPRN_SPRG3:
- vcpu->arch.shared->sprg3 = kvmppc_get_gpr(vcpu, rs);
+ vcpu->arch.shared->sprg3 = spr_val;
break;
default:
- emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
+ emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
+ spr_val);
if (emulated == EMULATE_FAIL)
- printk("mtspr: unknown spr %x\n", sprn);
+ printk(KERN_INFO "mtspr: unknown spr "
+ "0x%x\n", sprn);
break;
}
kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
@@ -382,7 +360,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_31_XOP_LWBRX:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
break;
@@ -390,25 +367,16 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_31_XOP_STWBRX:
- rs = get_rs(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
4, 0);
break;
case OP_31_XOP_LHBRX:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
break;
case OP_31_XOP_STHBRX:
- rs = get_rs(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
-
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 0);
@@ -421,99 +389,78 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_LWZ:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
case OP_LWZU:
- ra = get_ra(inst);
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_LBZ:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
case OP_LBZU:
- ra = get_ra(inst);
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_STW:
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
4, 1);
break;
case OP_STWU:
- ra = get_ra(inst);
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
4, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_STB:
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
1, 1);
break;
case OP_STBU:
- ra = get_ra(inst);
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
1, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_LHZ:
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
break;
case OP_LHZU:
- ra = get_ra(inst);
- rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_LHA:
- rt = get_rt(inst);
emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
break;
case OP_LHAU:
- ra = get_ra(inst);
- rt = get_rt(inst);
emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_STH:
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 1);
break;
case OP_STHU:
- ra = get_ra(inst);
- rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
default:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 00d7e345b3fe..1493c8de947b 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -43,6 +43,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
v->requests;
}
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+ return 1;
+}
+
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
{
int nr = kvmppc_get_gpr(vcpu, 11);
@@ -74,7 +79,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
}
case HC_VENDOR_KVM | KVM_HC_FEATURES:
r = HC_EV_SUCCESS;
-#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500)
+#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2)
/* XXX Missing magic page on 44x */
r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
#endif
@@ -109,6 +114,11 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
goto out;
#endif
+#ifdef CONFIG_KVM_BOOKE_HV
+ if (!cpu_has_feature(CPU_FTR_EMB_HV))
+ goto out;
+#endif
+
r = true;
out:
@@ -225,7 +235,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_OSI:
case KVM_CAP_PPC_GET_PVINFO:
-#ifdef CONFIG_KVM_E500
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_CAP_SW_TLB:
#endif
r = 1;
@@ -234,10 +244,12 @@ int kvm_dev_ioctl_check_extension(long ext)
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
#endif
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
r = 1;
break;
+#endif /* CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_KVM_BOOK3S_64_HV
case KVM_CAP_PPC_SMT:
r = threads_per_core;
break;
@@ -267,6 +279,11 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
+#ifdef CONFIG_PPC_BOOK3S_64
+ case KVM_CAP_PPC_GET_SMMU_INFO:
+ r = 1;
+ break;
+#endif
default:
r = 0;
break;
@@ -588,21 +605,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
return r;
}
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
-{
- int me;
- int cpu = vcpu->cpu;
-
- me = get_cpu();
- if (waitqueue_active(vcpu->arch.wqp)) {
- wake_up_interruptible(vcpu->arch.wqp);
- vcpu->stat.halt_wakeup++;
- } else if (cpu != me && cpu != -1) {
- smp_send_reschedule(vcpu->cpu);
- }
- put_cpu();
-}
-
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
{
if (irq->irq == KVM_INTERRUPT_UNSET) {
@@ -611,6 +613,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
}
kvmppc_core_queue_external(vcpu, irq);
+
kvm_vcpu_kick(vcpu);
return 0;
@@ -633,7 +636,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
r = 0;
vcpu->arch.papr_enabled = true;
break;
-#ifdef CONFIG_KVM_E500
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_CAP_SW_TLB: {
struct kvm_config_tlb cfg;
void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0];
@@ -710,7 +713,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
-#ifdef CONFIG_KVM_E500
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_DIRTY_TLB: {
struct kvm_dirty_tlb dirty;
r = -EFAULT;
@@ -720,7 +723,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
#endif
-
default:
r = -EINVAL;
}
@@ -777,7 +779,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CREATE_SPAPR_TCE: {
struct kvm_create_spapr_tce create_tce;
struct kvm *kvm = filp->private_data;
@@ -788,7 +790,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
goto out;
}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_KVM_BOOK3S_64_HV
case KVM_ALLOCATE_RMA: {
struct kvm *kvm = filp->private_data;
struct kvm_allocate_rma rma;
@@ -800,6 +804,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
#endif /* CONFIG_KVM_BOOK3S_64_HV */
+#ifdef CONFIG_PPC_BOOK3S_64
+ case KVM_PPC_GET_SMMU_INFO: {
+ struct kvm *kvm = filp->private_data;
+ struct kvm_ppc_smmu_info info;
+
+ memset(&info, 0, sizeof(info));
+ r = kvm_vm_ioctl_get_smmu_info(kvm, &info);
+ if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
+ r = -EFAULT;
+ break;
+ }
+#endif /* CONFIG_PPC_BOOK3S_64 */
default:
r = -ENOTTY;
}
@@ -808,6 +824,40 @@ out:
return r;
}
+static unsigned long lpid_inuse[BITS_TO_LONGS(KVMPPC_NR_LPIDS)];
+static unsigned long nr_lpids;
+
+long kvmppc_alloc_lpid(void)
+{
+ long lpid;
+
+ do {
+ lpid = find_first_zero_bit(lpid_inuse, KVMPPC_NR_LPIDS);
+ if (lpid >= nr_lpids) {
+ pr_err("%s: No LPIDs free\n", __func__);
+ return -ENOMEM;
+ }
+ } while (test_and_set_bit(lpid, lpid_inuse));
+
+ return lpid;
+}
+
+void kvmppc_claim_lpid(long lpid)
+{
+ set_bit(lpid, lpid_inuse);
+}
+
+void kvmppc_free_lpid(long lpid)
+{
+ clear_bit(lpid, lpid_inuse);
+}
+
+void kvmppc_init_lpid(unsigned long nr_lpids_param)
+{
+ nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param);
+ memset(lpid_inuse, 0, sizeof(lpid_inuse));
+}
+
int kvm_arch_init(void *opaque)
{
return 0;
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index 8167d42a776f..bf191e72b2d8 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -93,6 +93,12 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
case SIGNAL_EXITS:
vcpu->stat.signal_exits++;
break;
+ case DBELL_EXITS:
+ vcpu->stat.dbell_exits++;
+ break;
+ case GDBELL_EXITS:
+ vcpu->stat.gdbell_exits++;
+ break;
}
}
diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h
index 96076676e224..bdcbe0f8dd7b 100644
--- a/arch/s390/include/asm/kvm.h
+++ b/arch/s390/include/asm/kvm.h
@@ -52,4 +52,9 @@ struct kvm_sync_regs {
__u32 acrs[16]; /* access registers */
__u64 crs[16]; /* control registers */
};
+
+#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
+#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
+#define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
+#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
#endif
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 7343872890a2..dd17537b9a9d 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -148,6 +148,7 @@ struct kvm_vcpu_stat {
u32 instruction_sigp_restart;
u32 diagnose_10;
u32 diagnose_44;
+ u32 diagnose_9c;
};
struct kvm_s390_io_info {
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
index 6964db226f83..a98832961035 100644
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -149,6 +149,11 @@ static inline unsigned int kvm_arch_para_features(void)
return 0;
}
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+ return false;
+}
+
#endif
#endif /* __S390_KVM_PARA_H */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index fed7bee650a0..bf238c55740b 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -48,6 +48,7 @@ int sclp_cpu_deconfigure(u8 cpu);
void sclp_facilities_detect(void);
unsigned long long sclp_get_rnmax(void);
unsigned long long sclp_get_rzm(void);
+u8 sclp_get_fac85(void);
int sclp_sdias_blk_count(void);
int sclp_sdias_copy(void *dest, int blk_num, int nr_blks);
int sclp_chp_configure(struct chp_id chpid);
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index a353f0ea45c2..b23d9ac77dfc 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -47,9 +47,30 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
vcpu->stat.diagnose_44++;
- vcpu_put(vcpu);
- yield();
- vcpu_load(vcpu);
+ kvm_vcpu_on_spin(vcpu);
+ return 0;
+}
+
+static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_vcpu *tcpu;
+ int tid;
+ int i;
+
+ tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
+ vcpu->stat.diagnose_9c++;
+ VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid);
+
+ if (tid == vcpu->vcpu_id)
+ return 0;
+
+ kvm_for_each_vcpu(i, tcpu, kvm)
+ if (tcpu->vcpu_id == tid) {
+ kvm_vcpu_yield_to(tcpu);
+ break;
+ }
+
return 0;
}
@@ -89,6 +110,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
return diag_release_pages(vcpu);
case 0x44:
return __diag_time_slice_end(vcpu);
+ case 0x9c:
+ return __diag_time_slice_end_directed(vcpu);
case 0x308:
return __diag_ipl_functions(vcpu);
default:
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 361456577c6f..979cbe55bf5e 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -101,6 +101,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
}
static intercept_handler_t instruction_handlers[256] = {
+ [0x01] = kvm_s390_handle_01,
[0x83] = kvm_s390_handle_diag,
[0xae] = kvm_s390_handle_sigp,
[0xb2] = kvm_s390_handle_b2,
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 217ce44395a4..664766d0c83c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -28,6 +28,7 @@
#include <asm/pgtable.h>
#include <asm/nmi.h>
#include <asm/switch_to.h>
+#include <asm/sclp.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -74,6 +75,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
{ "diagnose_10", VCPU_STAT(diagnose_10) },
{ "diagnose_44", VCPU_STAT(diagnose_44) },
+ { "diagnose_9c", VCPU_STAT(diagnose_9c) },
{ NULL }
};
@@ -133,8 +135,16 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_S390_UCONTROL:
#endif
case KVM_CAP_SYNC_REGS:
+ case KVM_CAP_ONE_REG:
r = 1;
break;
+ case KVM_CAP_NR_VCPUS:
+ case KVM_CAP_MAX_VCPUS:
+ r = KVM_MAX_VCPUS;
+ break;
+ case KVM_CAP_S390_COW:
+ r = sclp_get_fac85() & 0x2;
+ break;
default:
r = 0;
}
@@ -423,6 +433,71 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
return 0;
}
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+ /* kvm common code refers to this, but never calls it */
+ BUG();
+ return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
+ struct kvm_one_reg *reg)
+{
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_S390_TODPR:
+ r = put_user(vcpu->arch.sie_block->todpr,
+ (u32 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_EPOCHDIFF:
+ r = put_user(vcpu->arch.sie_block->epoch,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_CPU_TIMER:
+ r = put_user(vcpu->arch.sie_block->cputm,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_CLOCK_COMP:
+ r = put_user(vcpu->arch.sie_block->ckc,
+ (u64 __user *)reg->addr);
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
+ struct kvm_one_reg *reg)
+{
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_S390_TODPR:
+ r = get_user(vcpu->arch.sie_block->todpr,
+ (u32 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_EPOCHDIFF:
+ r = get_user(vcpu->arch.sie_block->epoch,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_CPU_TIMER:
+ r = get_user(vcpu->arch.sie_block->cputm,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_CLOCK_COMP:
+ r = get_user(vcpu->arch.sie_block->ckc,
+ (u64 __user *)reg->addr);
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
{
kvm_s390_vcpu_initial_reset(vcpu);
@@ -753,6 +828,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_S390_INITIAL_RESET:
r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
break;
+ case KVM_SET_ONE_REG:
+ case KVM_GET_ONE_REG: {
+ struct kvm_one_reg reg;
+ r = -EFAULT;
+ if (copy_from_user(&reg, argp, sizeof(reg)))
+ break;
+ if (ioctl == KVM_SET_ONE_REG)
+ r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
+ else
+ r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
+ break;
+ }
#ifdef CONFIG_KVM_S390_UCONTROL
case KVM_S390_UCAS_MAP: {
struct kvm_s390_ucas_mapping ucasmap;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index ff28f9d1c9eb..2294377975e8 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -79,6 +79,7 @@ int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
/* implemented in priv.c */
int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
/* implemented in sigp.c */
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index e5a45dbd26ac..68a6b2ed16bf 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -380,3 +380,34 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
+static int handle_sckpf(struct kvm_vcpu *vcpu)
+{
+ u32 value;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu,
+ PGM_PRIVILEGED_OPERATION);
+
+ if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000)
+ return kvm_s390_inject_program_int(vcpu,
+ PGM_SPECIFICATION);
+
+ value = vcpu->run->s.regs.gprs[0] & 0x000000000000ffff;
+ vcpu->arch.sie_block->todpr = value;
+
+ return 0;
+}
+
+static intercept_handler_t x01_handlers[256] = {
+ [0x07] = handle_sckpf,
+};
+
+int kvm_s390_handle_01(struct kvm_vcpu *vcpu)
+{
+ intercept_handler_t handler;
+
+ handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
+ if (handler)
+ return handler(vcpu);
+ return -EOPNOTSUPP;
+}
diff --git a/arch/score/include/asm/kvm_para.h b/arch/score/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/score/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/sh/include/asm/kvm_para.h b/arch/sh/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/sh/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/sparc/include/asm/kvm_para.h b/arch/sparc/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/sparc/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/tile/include/asm/kvm_para.h b/arch/tile/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/tile/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/um/include/asm/kvm_para.h b/arch/um/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/um/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/unicore32/include/asm/kvm_para.h b/arch/unicore32/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/unicore32/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index c222e1a1b12a..1ac46c22dd50 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -200,7 +200,7 @@ typedef u32 __attribute__((vector_size(16))) sse128_t;
/* Type, address-of, and value of an instruction's operand. */
struct operand {
- enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type;
+ enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
unsigned int bytes;
union {
unsigned long orig_val;
@@ -213,12 +213,14 @@ struct operand {
unsigned seg;
} mem;
unsigned xmm;
+ unsigned mm;
} addr;
union {
unsigned long val;
u64 val64;
char valptr[sizeof(unsigned long) + 2];
sse128_t vec_val;
+ u64 mm_val;
};
};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e5b97be12d2a..db7c1f2709a2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -173,6 +173,9 @@ enum {
#define DR7_FIXED_1 0x00000400
#define DR7_VOLATILE 0xffff23ff
+/* apic attention bits */
+#define KVM_APIC_CHECK_VAPIC 0
+
/*
* We don't want allocation failures within the mmu code, so we preallocate
* enough memory for a single page fault in a cache.
@@ -238,8 +241,6 @@ struct kvm_mmu_page {
#endif
int write_flooding_count;
-
- struct rcu_head rcu;
};
struct kvm_pio_request {
@@ -338,6 +339,7 @@ struct kvm_vcpu_arch {
u64 efer;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
+ unsigned long apic_attention;
int32_t apic_arb_prio;
int mp_state;
int sipi_vector;
@@ -537,8 +539,6 @@ struct kvm_arch {
u64 hv_guest_os_id;
u64 hv_hypercall;
- atomic_t reader_counter;
-
#ifdef CONFIG_KVM_MMU_AUDIT
int audit_point;
#endif
@@ -713,8 +713,9 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
-int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
- struct kvm_memory_slot *slot);
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t gfn_offset, unsigned long mask);
void kvm_mmu_zap_all(struct kvm *kvm);
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 183922e13de1..63ab1661d00e 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -95,6 +95,14 @@ struct kvm_vcpu_pv_apf_data {
extern void kvmclock_init(void);
extern int kvm_register_clock(char *txt);
+#ifdef CONFIG_KVM_CLOCK
+bool kvm_check_and_clear_guest_paused(void);
+#else
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+ return false;
+}
+#endif /* CONFIG_KVMCLOCK */
/* This instruction is vmcall. On non-VT architectures, it will generate a
* trap that we will then rewrite to the appropriate instruction.
@@ -173,14 +181,16 @@ static inline int kvm_para_available(void)
if (boot_cpu_data.cpuid_level < 0)
return 0; /* So we don't blow up on old processors */
- cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
- memcpy(signature + 0, &ebx, 4);
- memcpy(signature + 4, &ecx, 4);
- memcpy(signature + 8, &edx, 4);
- signature[12] = 0;
+ if (cpu_has_hypervisor) {
+ cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
+ memcpy(signature + 0, &ebx, 4);
+ memcpy(signature + 4, &ecx, 4);
+ memcpy(signature + 8, &edx, 4);
+ signature[12] = 0;
- if (strcmp(signature, "KVMKVMKVM") == 0)
- return 1;
+ if (strcmp(signature, "KVMKVMKVM") == 0)
+ return 1;
+ }
return 0;
}
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 35f2d1948ada..6167fd798188 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -40,5 +40,6 @@ struct pvclock_wall_clock {
} __attribute__((__packed__));
#define PVCLOCK_TSC_STABLE_BIT (1 << 0)
+#define PVCLOCK_GUEST_STOPPED (1 << 1)
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index f8492da65bfc..086eb58c6e80 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -22,6 +22,7 @@
#include <asm/msr.h>
#include <asm/apic.h>
#include <linux/percpu.h>
+#include <linux/hardirq.h>
#include <asm/x86_init.h>
#include <asm/reboot.h>
@@ -114,6 +115,25 @@ static void kvm_get_preset_lpj(void)
preset_lpj = lpj;
}
+bool kvm_check_and_clear_guest_paused(void)
+{
+ bool ret = false;
+ struct pvclock_vcpu_time_info *src;
+
+ /*
+ * per_cpu() is safe here because this function is only called from
+ * timer functions where preemption is already disabled.
+ */
+ WARN_ON(!in_atomic());
+ src = &__get_cpu_var(hv_clock);
+ if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
+ __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED);
+ ret = true;
+ }
+
+ return ret;
+}
+
static struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_get_cycles,
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 1a7fe868f375..a28f338843ea 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -36,6 +36,7 @@ config KVM
select TASKSTATS
select TASK_DELAY_ACCT
select PERF_EVENTS
+ select HAVE_KVM_MSI
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9fed5bedaad6..7df1c6d839fb 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -247,7 +247,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.ebx */
const u32 kvm_supported_word9_x86_features =
- F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS);
+ F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
+ F(BMI2) | F(ERMS) | F(RTM);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -397,7 +398,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
case KVM_CPUID_SIGNATURE: {
char signature[12] = "KVMKVMKVM\0\0";
u32 *sigptr = (u32 *)signature;
- entry->eax = 0;
+ entry->eax = KVM_CPUID_FEATURES;
entry->ebx = sigptr[0];
entry->ecx = sigptr[1];
entry->edx = sigptr[2];
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 83756223f8aa..f95d242ee9f7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -142,6 +142,10 @@
#define Src2FS (OpFS << Src2Shift)
#define Src2GS (OpGS << Src2Shift)
#define Src2Mask (OpMask << Src2Shift)
+#define Mmx ((u64)1 << 40) /* MMX Vector instruction */
+#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
+#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
+#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
#define X2(x...) x, x
#define X3(x...) X2(x), x
@@ -557,6 +561,29 @@ static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
}
+/*
+ * x86 defines three classes of vector instructions: explicitly
+ * aligned, explicitly unaligned, and the rest, which change behaviour
+ * depending on whether they're AVX encoded or not.
+ *
+ * Also included is CMPXCHG16B which is not a vector instruction, yet it is
+ * subject to the same check.
+ */
+static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
+{
+ if (likely(size < 16))
+ return false;
+
+ if (ctxt->d & Aligned)
+ return true;
+ else if (ctxt->d & Unaligned)
+ return false;
+ else if (ctxt->d & Avx)
+ return false;
+ else
+ return true;
+}
+
static int __linearize(struct x86_emulate_ctxt *ctxt,
struct segmented_address addr,
unsigned size, bool write, bool fetch,
@@ -621,6 +648,8 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
}
if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8)
la &= (u32)-1;
+ if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
+ return emulate_gp(ctxt, 0);
*linear = la;
return X86EMUL_CONTINUE;
bad:
@@ -859,6 +888,40 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
ctxt->ops->put_fpu(ctxt);
}
+static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
+{
+ ctxt->ops->get_fpu(ctxt);
+ switch (reg) {
+ case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
+ case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
+ case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
+ case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
+ case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
+ case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
+ case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
+ case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
+ default: BUG();
+ }
+ ctxt->ops->put_fpu(ctxt);
+}
+
+static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
+{
+ ctxt->ops->get_fpu(ctxt);
+ switch (reg) {
+ case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
+ case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
+ case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
+ case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
+ case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
+ case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
+ case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
+ case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
+ default: BUG();
+ }
+ ctxt->ops->put_fpu(ctxt);
+}
+
static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
struct operand *op)
{
@@ -875,6 +938,13 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
read_sse_reg(ctxt, &op->vec_val, reg);
return;
}
+ if (ctxt->d & Mmx) {
+ reg &= 7;
+ op->type = OP_MM;
+ op->bytes = 8;
+ op->addr.mm = reg;
+ return;
+ }
op->type = OP_REG;
if (ctxt->d & ByteOp) {
@@ -902,7 +972,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */
}
- ctxt->modrm = insn_fetch(u8, ctxt);
ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6;
ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
ctxt->modrm_rm |= (ctxt->modrm & 0x07);
@@ -920,6 +989,12 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm);
return rc;
}
+ if (ctxt->d & Mmx) {
+ op->type = OP_MM;
+ op->bytes = 8;
+ op->addr.xmm = ctxt->modrm_rm & 7;
+ return rc;
+ }
fetch_register_operand(op);
return rc;
}
@@ -1387,6 +1462,9 @@ static int writeback(struct x86_emulate_ctxt *ctxt)
case OP_XMM:
write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm);
break;
+ case OP_MM:
+ write_mmx_reg(ctxt, &ctxt->dst.mm_val, ctxt->dst.addr.mm);
+ break;
case OP_NONE:
/* no writeback */
break;
@@ -2790,7 +2868,7 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
static int em_mov(struct x86_emulate_ctxt *ctxt)
{
- ctxt->dst.val = ctxt->src.val;
+ memcpy(ctxt->dst.valptr, ctxt->src.valptr, ctxt->op_bytes);
return X86EMUL_CONTINUE;
}
@@ -2870,12 +2948,6 @@ static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
}
-static int em_movdqu(struct x86_emulate_ctxt *ctxt)
-{
- memcpy(&ctxt->dst.vec_val, &ctxt->src.vec_val, ctxt->op_bytes);
- return X86EMUL_CONTINUE;
-}
-
static int em_invlpg(struct x86_emulate_ctxt *ctxt)
{
int rc;
@@ -3061,35 +3133,13 @@ static int em_btc(struct x86_emulate_ctxt *ctxt)
static int em_bsf(struct x86_emulate_ctxt *ctxt)
{
- u8 zf;
-
- __asm__ ("bsf %2, %0; setz %1"
- : "=r"(ctxt->dst.val), "=q"(zf)
- : "r"(ctxt->src.val));
-
- ctxt->eflags &= ~X86_EFLAGS_ZF;
- if (zf) {
- ctxt->eflags |= X86_EFLAGS_ZF;
- /* Disable writeback. */
- ctxt->dst.type = OP_NONE;
- }
+ emulate_2op_SrcV_nobyte(ctxt, "bsf");
return X86EMUL_CONTINUE;
}
static int em_bsr(struct x86_emulate_ctxt *ctxt)
{
- u8 zf;
-
- __asm__ ("bsr %2, %0; setz %1"
- : "=r"(ctxt->dst.val), "=q"(zf)
- : "r"(ctxt->src.val));
-
- ctxt->eflags &= ~X86_EFLAGS_ZF;
- if (zf) {
- ctxt->eflags |= X86_EFLAGS_ZF;
- /* Disable writeback. */
- ctxt->dst.type = OP_NONE;
- }
+ emulate_2op_SrcV_nobyte(ctxt, "bsr");
return X86EMUL_CONTINUE;
}
@@ -3286,8 +3336,8 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
.check_perm = (_p) }
#define N D(0)
#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
-#define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) }
-#define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) }
+#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
+#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
#define II(_f, _e, _i) \
{ .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
@@ -3307,25 +3357,25 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
static struct opcode group7_rm1[] = {
- DI(SrcNone | ModRM | Priv, monitor),
- DI(SrcNone | ModRM | Priv, mwait),
+ DI(SrcNone | Priv, monitor),
+ DI(SrcNone | Priv, mwait),
N, N, N, N, N, N,
};
static struct opcode group7_rm3[] = {
- DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa),
- II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall),
- DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa),
- DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa),
- DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme),
- DIP(SrcNone | ModRM | Prot | Priv, clgi, check_svme),
- DIP(SrcNone | ModRM | Prot | Priv, skinit, check_svme),
- DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme),
+ DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
+ II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall),
+ DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
+ DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
+ DIP(SrcNone | Prot | Priv, stgi, check_svme),
+ DIP(SrcNone | Prot | Priv, clgi, check_svme),
+ DIP(SrcNone | Prot | Priv, skinit, check_svme),
+ DIP(SrcNone | Prot | Priv, invlpga, check_svme),
};
static struct opcode group7_rm7[] = {
N,
- DIP(SrcNone | ModRM, rdtscp, check_rdtsc),
+ DIP(SrcNone, rdtscp, check_rdtsc),
N, N, N, N, N, N,
};
@@ -3341,81 +3391,86 @@ static struct opcode group1[] = {
};
static struct opcode group1A[] = {
- I(DstMem | SrcNone | ModRM | Mov | Stack, em_pop), N, N, N, N, N, N, N,
+ I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,
};
static struct opcode group3[] = {
- I(DstMem | SrcImm | ModRM, em_test),
- I(DstMem | SrcImm | ModRM, em_test),
- I(DstMem | SrcNone | ModRM | Lock, em_not),
- I(DstMem | SrcNone | ModRM | Lock, em_neg),
- I(SrcMem | ModRM, em_mul_ex),
- I(SrcMem | ModRM, em_imul_ex),
- I(SrcMem | ModRM, em_div_ex),
- I(SrcMem | ModRM, em_idiv_ex),
+ I(DstMem | SrcImm, em_test),
+ I(DstMem | SrcImm, em_test),
+ I(DstMem | SrcNone | Lock, em_not),
+ I(DstMem | SrcNone | Lock, em_neg),
+ I(SrcMem, em_mul_ex),
+ I(SrcMem, em_imul_ex),
+ I(SrcMem, em_div_ex),
+ I(SrcMem, em_idiv_ex),
};
static struct opcode group4[] = {
- I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45),
- I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45),
+ I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
+ I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
N, N, N, N, N, N,
};
static struct opcode group5[] = {
- I(DstMem | SrcNone | ModRM | Lock, em_grp45),
- I(DstMem | SrcNone | ModRM | Lock, em_grp45),
- I(SrcMem | ModRM | Stack, em_grp45),
- I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far),
- I(SrcMem | ModRM | Stack, em_grp45),
- I(SrcMemFAddr | ModRM | ImplicitOps, em_grp45),
- I(SrcMem | ModRM | Stack, em_grp45), N,
+ I(DstMem | SrcNone | Lock, em_grp45),
+ I(DstMem | SrcNone | Lock, em_grp45),
+ I(SrcMem | Stack, em_grp45),
+ I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
+ I(SrcMem | Stack, em_grp45),
+ I(SrcMemFAddr | ImplicitOps, em_grp45),
+ I(SrcMem | Stack, em_grp45), N,
};
static struct opcode group6[] = {
- DI(ModRM | Prot, sldt),
- DI(ModRM | Prot, str),
- DI(ModRM | Prot | Priv, lldt),
- DI(ModRM | Prot | Priv, ltr),
+ DI(Prot, sldt),
+ DI(Prot, str),
+ DI(Prot | Priv, lldt),
+ DI(Prot | Priv, ltr),
N, N, N, N,
};
static struct group_dual group7 = { {
- DI(ModRM | Mov | DstMem | Priv, sgdt),
- DI(ModRM | Mov | DstMem | Priv, sidt),
- II(ModRM | SrcMem | Priv, em_lgdt, lgdt),
- II(ModRM | SrcMem | Priv, em_lidt, lidt),
- II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N,
- II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw),
- II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
+ DI(Mov | DstMem | Priv, sgdt),
+ DI(Mov | DstMem | Priv, sidt),
+ II(SrcMem | Priv, em_lgdt, lgdt),
+ II(SrcMem | Priv, em_lidt, lidt),
+ II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
+ II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
+ II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
}, {
- I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall),
+ I(SrcNone | Priv | VendorSpecific, em_vmcall),
EXT(0, group7_rm1),
N, EXT(0, group7_rm3),
- II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N,
- II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7),
+ II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
+ II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
+ EXT(0, group7_rm7),
} };
static struct opcode group8[] = {
N, N, N, N,
- I(DstMem | SrcImmByte | ModRM, em_bt),
- I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_bts),
- I(DstMem | SrcImmByte | ModRM | Lock, em_btr),
- I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_btc),
+ I(DstMem | SrcImmByte, em_bt),
+ I(DstMem | SrcImmByte | Lock | PageTable, em_bts),
+ I(DstMem | SrcImmByte | Lock, em_btr),
+ I(DstMem | SrcImmByte | Lock | PageTable, em_btc),
};
static struct group_dual group9 = { {
- N, I(DstMem64 | ModRM | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
+ N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
}, {
N, N, N, N, N, N, N, N,
} };
static struct opcode group11[] = {
- I(DstMem | SrcImm | ModRM | Mov | PageTable, em_mov),
+ I(DstMem | SrcImm | Mov | PageTable, em_mov),
X7(D(Undefined)),
};
static struct gprefix pfx_0f_6f_0f_7f = {
- N, N, N, I(Sse, em_movdqu),
+ I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
+};
+
+static struct gprefix pfx_vmovntpx = {
+ I(0, em_mov), N, N, N,
};
static struct opcode opcode_table[256] = {
@@ -3464,10 +3519,10 @@ static struct opcode opcode_table[256] = {
/* 0x70 - 0x7F */
X16(D(SrcImmByte)),
/* 0x80 - 0x87 */
- G(ByteOp | DstMem | SrcImm | ModRM | Group, group1),
- G(DstMem | SrcImm | ModRM | Group, group1),
- G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1),
- G(DstMem | SrcImmByte | ModRM | Group, group1),
+ G(ByteOp | DstMem | SrcImm, group1),
+ G(DstMem | SrcImm, group1),
+ G(ByteOp | DstMem | SrcImm | No64, group1),
+ G(DstMem | SrcImmByte, group1),
I2bv(DstMem | SrcReg | ModRM, em_test),
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
/* 0x88 - 0x8F */
@@ -3549,7 +3604,8 @@ static struct opcode twobyte_table[256] = {
IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write),
IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write),
N, N, N, N,
- N, N, N, N, N, N, N, N,
+ N, N, N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx),
+ N, N, N, N,
/* 0x30 - 0x3F */
II(ImplicitOps | Priv, em_wrmsr, wrmsr),
IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
@@ -3897,17 +3953,16 @@ done_prefixes:
}
ctxt->d = opcode.flags;
+ if (ctxt->d & ModRM)
+ ctxt->modrm = insn_fetch(u8, ctxt);
+
while (ctxt->d & GroupMask) {
switch (ctxt->d & GroupMask) {
case Group:
- ctxt->modrm = insn_fetch(u8, ctxt);
- --ctxt->_eip;
goffset = (ctxt->modrm >> 3) & 7;
opcode = opcode.u.group[goffset];
break;
case GroupDual:
- ctxt->modrm = insn_fetch(u8, ctxt);
- --ctxt->_eip;
goffset = (ctxt->modrm >> 3) & 7;
if ((ctxt->modrm >> 6) == 3)
opcode = opcode.u.gdual->mod3[goffset];
@@ -3960,6 +4015,8 @@ done_prefixes:
if (ctxt->d & Sse)
ctxt->op_bytes = 16;
+ else if (ctxt->d & Mmx)
+ ctxt->op_bytes = 8;
/* ModRM and SIB bytes. */
if (ctxt->d & ModRM) {
@@ -4030,6 +4087,35 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
return false;
}
+static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
+{
+ bool fault = false;
+
+ ctxt->ops->get_fpu(ctxt);
+ asm volatile("1: fwait \n\t"
+ "2: \n\t"
+ ".pushsection .fixup,\"ax\" \n\t"
+ "3: \n\t"
+ "movb $1, %[fault] \n\t"
+ "jmp 2b \n\t"
+ ".popsection \n\t"
+ _ASM_EXTABLE(1b, 3b)
+ : [fault]"+qm"(fault));
+ ctxt->ops->put_fpu(ctxt);
+
+ if (unlikely(fault))
+ return emulate_exception(ctxt, MF_VECTOR, 0, false);
+
+ return X86EMUL_CONTINUE;
+}
+
+static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
+ struct operand *op)
+{
+ if (op->type == OP_MM)
+ read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
+}
+
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
{
struct x86_emulate_ops *ops = ctxt->ops;
@@ -4054,18 +4140,31 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
- if ((ctxt->d & Sse)
- && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)
- || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
+ if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
+ || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
rc = emulate_ud(ctxt);
goto done;
}
- if ((ctxt->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
+ if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
rc = emulate_nm(ctxt);
goto done;
}
+ if (ctxt->d & Mmx) {
+ rc = flush_pending_x87_faults(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ /*
+ * Now that we know the fpu is exception safe, we can fetch
+ * operands from it.
+ */
+ fetch_possible_mmx_operand(ctxt, &ctxt->src);
+ fetch_possible_mmx_operand(ctxt, &ctxt->src2);
+ if (!(ctxt->d & Mov))
+ fetch_possible_mmx_operand(ctxt, &ctxt->dst);
+ }
+
if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_PRE_EXCEPT);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index d68f99df690c..adba28f88d1a 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -34,7 +34,6 @@
#include <linux/kvm_host.h>
#include <linux/slab.h>
-#include <linux/workqueue.h>
#include "irq.h"
#include "i8254.h"
@@ -249,7 +248,7 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
/* in this case, we had multiple outstanding pit interrupts
* that we needed to inject. Reinject
*/
- queue_work(ps->pit->wq, &ps->pit->expired);
+ queue_kthread_work(&ps->pit->worker, &ps->pit->expired);
ps->irq_ack = 1;
spin_unlock(&ps->inject_lock);
}
@@ -270,7 +269,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
static void destroy_pit_timer(struct kvm_pit *pit)
{
hrtimer_cancel(&pit->pit_state.pit_timer.timer);
- cancel_work_sync(&pit->expired);
+ flush_kthread_work(&pit->expired);
}
static bool kpit_is_periodic(struct kvm_timer *ktimer)
@@ -284,7 +283,7 @@ static struct kvm_timer_ops kpit_ops = {
.is_periodic = kpit_is_periodic,
};
-static void pit_do_work(struct work_struct *work)
+static void pit_do_work(struct kthread_work *work)
{
struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
struct kvm *kvm = pit->kvm;
@@ -328,7 +327,7 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
atomic_inc(&ktimer->pending);
- queue_work(pt->wq, &pt->expired);
+ queue_kthread_work(&pt->worker, &pt->expired);
}
if (ktimer->t_ops->is_periodic(ktimer)) {
@@ -353,7 +352,7 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
/* TODO The new value only affected after the retriggered */
hrtimer_cancel(&pt->timer);
- cancel_work_sync(&ps->pit->expired);
+ flush_kthread_work(&ps->pit->expired);
pt->period = interval;
ps->is_periodic = is_period;
@@ -669,6 +668,8 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
{
struct kvm_pit *pit;
struct kvm_kpit_state *pit_state;
+ struct pid *pid;
+ pid_t pid_nr;
int ret;
pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
@@ -685,14 +686,20 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
mutex_lock(&pit->pit_state.lock);
spin_lock_init(&pit->pit_state.inject_lock);
- pit->wq = create_singlethread_workqueue("kvm-pit-wq");
- if (!pit->wq) {
+ pid = get_pid(task_tgid(current));
+ pid_nr = pid_vnr(pid);
+ put_pid(pid);
+
+ init_kthread_worker(&pit->worker);
+ pit->worker_task = kthread_run(kthread_worker_fn, &pit->worker,
+ "kvm-pit/%d", pid_nr);
+ if (IS_ERR(pit->worker_task)) {
mutex_unlock(&pit->pit_state.lock);
kvm_free_irq_source_id(kvm, pit->irq_source_id);
kfree(pit);
return NULL;
}
- INIT_WORK(&pit->expired, pit_do_work);
+ init_kthread_work(&pit->expired, pit_do_work);
kvm->arch.vpit = pit;
pit->kvm = kvm;
@@ -736,7 +743,7 @@ fail:
kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
kvm_free_irq_source_id(kvm, pit->irq_source_id);
- destroy_workqueue(pit->wq);
+ kthread_stop(pit->worker_task);
kfree(pit);
return NULL;
}
@@ -756,10 +763,10 @@ void kvm_free_pit(struct kvm *kvm)
mutex_lock(&kvm->arch.vpit->pit_state.lock);
timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
hrtimer_cancel(timer);
- cancel_work_sync(&kvm->arch.vpit->expired);
+ flush_kthread_work(&kvm->arch.vpit->expired);
+ kthread_stop(kvm->arch.vpit->worker_task);
kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id);
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
- destroy_workqueue(kvm->arch.vpit->wq);
kfree(kvm->arch.vpit);
}
}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 51a97426e791..fdf40425ea1d 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -1,6 +1,8 @@
#ifndef __I8254_H
#define __I8254_H
+#include <linux/kthread.h>
+
#include "iodev.h"
struct kvm_kpit_channel_state {
@@ -39,8 +41,9 @@ struct kvm_pit {
struct kvm_kpit_state pit_state;
int irq_source_id;
struct kvm_irq_mask_notifier mask_notifier;
- struct workqueue_struct *wq;
- struct work_struct expired;
+ struct kthread_worker worker;
+ struct task_struct *worker_task;
+ struct kthread_work expired;
};
#define KVM_PIT_BASE_ADDRESS 0x40
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 858432287ab6..93c15743f1ee 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -92,6 +92,11 @@ static inline int apic_test_and_clear_vector(int vec, void *bitmap)
return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
}
+static inline int apic_test_vector(int vec, void *bitmap)
+{
+ return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
static inline void apic_set_vector(int vec, void *bitmap)
{
set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -480,7 +485,6 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
static void apic_set_eoi(struct kvm_lapic *apic)
{
int vector = apic_find_highest_isr(apic);
- int trigger_mode;
/*
* Not every write EOI will has corresponding ISR,
* one example is when Kernel check timer on setup_IO_APIC
@@ -491,12 +495,15 @@ static void apic_set_eoi(struct kvm_lapic *apic)
apic_clear_vector(vector, apic->regs + APIC_ISR);
apic_update_ppr(apic);
- if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR))
- trigger_mode = IOAPIC_LEVEL_TRIG;
- else
- trigger_mode = IOAPIC_EDGE_TRIG;
- if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
+ if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
+ kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
+ int trigger_mode;
+ if (apic_test_vector(vector, apic->regs + APIC_TMR))
+ trigger_mode = IOAPIC_LEVEL_TRIG;
+ else
+ trigger_mode = IOAPIC_EDGE_TRIG;
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
+ }
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
}
@@ -1081,6 +1088,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
apic_update_ppr(apic);
vcpu->arch.apic_arb_prio = 0;
+ vcpu->arch.apic_attention = 0;
apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
"0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
@@ -1280,7 +1288,7 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
u32 data;
void *vapic;
- if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
+ if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
return;
vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
@@ -1297,7 +1305,7 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic;
void *vapic;
- if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
+ if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
return;
apic = vcpu->arch.apic;
@@ -1317,10 +1325,11 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
{
- if (!irqchip_in_kernel(vcpu->kvm))
- return;
-
vcpu->arch.apic->vapic_addr = vapic_addr;
+ if (vapic_addr)
+ __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
+ else
+ __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
}
int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4cb164268846..72102e0ab7cb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -135,8 +135,6 @@ module_param(dbg, bool, 0644);
#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
| PT64_NX_MASK)
-#define PTE_LIST_EXT 4
-
#define ACC_EXEC_MASK 1
#define ACC_WRITE_MASK PT_WRITABLE_MASK
#define ACC_USER_MASK PT_USER_MASK
@@ -151,6 +149,9 @@ module_param(dbg, bool, 0644);
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
+/* make pte_list_desc fit well in cache line */
+#define PTE_LIST_EXT 3
+
struct pte_list_desc {
u64 *sptes[PTE_LIST_EXT];
struct pte_list_desc *more;
@@ -550,19 +551,29 @@ static u64 mmu_spte_get_lockless(u64 *sptep)
static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
{
- rcu_read_lock();
- atomic_inc(&vcpu->kvm->arch.reader_counter);
-
- /* Increase the counter before walking shadow page table */
- smp_mb__after_atomic_inc();
+ /*
+ * Prevent page table teardown by making any free-er wait during
+ * kvm_flush_remote_tlbs() IPI to all active vcpus.
+ */
+ local_irq_disable();
+ vcpu->mode = READING_SHADOW_PAGE_TABLES;
+ /*
+ * Make sure a following spte read is not reordered ahead of the write
+ * to vcpu->mode.
+ */
+ smp_mb();
}
static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
{
- /* Decrease the counter after walking shadow page table finished */
- smp_mb__before_atomic_dec();
- atomic_dec(&vcpu->kvm->arch.reader_counter);
- rcu_read_unlock();
+ /*
+ * Make sure the write to vcpu->mode is not reordered in front of
+ * reads to sptes. If it does, kvm_commit_zap_page() can see us
+ * OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
+ */
+ smp_mb();
+ vcpu->mode = OUTSIDE_GUEST_MODE;
+ local_irq_enable();
}
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
@@ -841,32 +852,6 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
return count;
}
-static u64 *pte_list_next(unsigned long *pte_list, u64 *spte)
-{
- struct pte_list_desc *desc;
- u64 *prev_spte;
- int i;
-
- if (!*pte_list)
- return NULL;
- else if (!(*pte_list & 1)) {
- if (!spte)
- return (u64 *)*pte_list;
- return NULL;
- }
- desc = (struct pte_list_desc *)(*pte_list & ~1ul);
- prev_spte = NULL;
- while (desc) {
- for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) {
- if (prev_spte == spte)
- return desc->sptes[i];
- prev_spte = desc->sptes[i];
- }
- desc = desc->more;
- }
- return NULL;
-}
-
static void
pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc,
int i, struct pte_list_desc *prev_desc)
@@ -987,11 +972,6 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
return pte_list_add(vcpu, spte, rmapp);
}
-static u64 *rmap_next(unsigned long *rmapp, u64 *spte)
-{
- return pte_list_next(rmapp, spte);
-}
-
static void rmap_remove(struct kvm *kvm, u64 *spte)
{
struct kvm_mmu_page *sp;
@@ -1004,106 +984,201 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
pte_list_remove(spte, rmapp);
}
+/*
+ * Used by the following functions to iterate through the sptes linked by a
+ * rmap. All fields are private and not assumed to be used outside.
+ */
+struct rmap_iterator {
+ /* private fields */
+ struct pte_list_desc *desc; /* holds the sptep if not NULL */
+ int pos; /* index of the sptep */
+};
+
+/*
+ * Iteration must be started by this function. This should also be used after
+ * removing/dropping sptes from the rmap link because in such cases the
+ * information in the itererator may not be valid.
+ *
+ * Returns sptep if found, NULL otherwise.
+ */
+static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
+{
+ if (!rmap)
+ return NULL;
+
+ if (!(rmap & 1)) {
+ iter->desc = NULL;
+ return (u64 *)rmap;
+ }
+
+ iter->desc = (struct pte_list_desc *)(rmap & ~1ul);
+ iter->pos = 0;
+ return iter->desc->sptes[iter->pos];
+}
+
+/*
+ * Must be used with a valid iterator: e.g. after rmap_get_first().
+ *
+ * Returns sptep if found, NULL otherwise.
+ */
+static u64 *rmap_get_next(struct rmap_iterator *iter)
+{
+ if (iter->desc) {
+ if (iter->pos < PTE_LIST_EXT - 1) {
+ u64 *sptep;
+
+ ++iter->pos;
+ sptep = iter->desc->sptes[iter->pos];
+ if (sptep)
+ return sptep;
+ }
+
+ iter->desc = iter->desc->more;
+
+ if (iter->desc) {
+ iter->pos = 0;
+ /* desc->sptes[0] cannot be NULL */
+ return iter->desc->sptes[iter->pos];
+ }
+ }
+
+ return NULL;
+}
+
static void drop_spte(struct kvm *kvm, u64 *sptep)
{
if (mmu_spte_clear_track_bits(sptep))
rmap_remove(kvm, sptep);
}
-int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
- struct kvm_memory_slot *slot)
+static int __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level)
{
- unsigned long *rmapp;
- u64 *spte;
- int i, write_protected = 0;
-
- rmapp = __gfn_to_rmap(gfn, PT_PAGE_TABLE_LEVEL, slot);
- spte = rmap_next(rmapp, NULL);
- while (spte) {
- BUG_ON(!(*spte & PT_PRESENT_MASK));
- rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
- if (is_writable_pte(*spte)) {
- mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK);
- write_protected = 1;
+ u64 *sptep;
+ struct rmap_iterator iter;
+ int write_protected = 0;
+
+ for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
+ BUG_ON(!(*sptep & PT_PRESENT_MASK));
+ rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep);
+
+ if (!is_writable_pte(*sptep)) {
+ sptep = rmap_get_next(&iter);
+ continue;
}
- spte = rmap_next(rmapp, spte);
- }
- /* check for huge page mappings */
- for (i = PT_DIRECTORY_LEVEL;
- i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
- rmapp = __gfn_to_rmap(gfn, i, slot);
- spte = rmap_next(rmapp, NULL);
- while (spte) {
- BUG_ON(!(*spte & PT_PRESENT_MASK));
- BUG_ON(!is_large_pte(*spte));
- pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
- if (is_writable_pte(*spte)) {
- drop_spte(kvm, spte);
- --kvm->stat.lpages;
- spte = NULL;
- write_protected = 1;
- }
- spte = rmap_next(rmapp, spte);
+ if (level == PT_PAGE_TABLE_LEVEL) {
+ mmu_spte_update(sptep, *sptep & ~PT_WRITABLE_MASK);
+ sptep = rmap_get_next(&iter);
+ } else {
+ BUG_ON(!is_large_pte(*sptep));
+ drop_spte(kvm, sptep);
+ --kvm->stat.lpages;
+ sptep = rmap_get_first(*rmapp, &iter);
}
+
+ write_protected = 1;
}
return write_protected;
}
+/**
+ * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages
+ * @kvm: kvm instance
+ * @slot: slot to protect
+ * @gfn_offset: start of the BITS_PER_LONG pages we care about
+ * @mask: indicates which pages we should protect
+ *
+ * Used when we do not need to care about huge page mappings: e.g. during dirty
+ * logging we do not have any such mappings.
+ */
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t gfn_offset, unsigned long mask)
+{
+ unsigned long *rmapp;
+
+ while (mask) {
+ rmapp = &slot->rmap[gfn_offset + __ffs(mask)];
+ __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL);
+
+ /* clear the first set bit */
+ mask &= mask - 1;
+ }
+}
+
static int rmap_write_protect(struct kvm *kvm, u64 gfn)
{
struct kvm_memory_slot *slot;
+ unsigned long *rmapp;
+ int i;
+ int write_protected = 0;
slot = gfn_to_memslot(kvm, gfn);
- return kvm_mmu_rmap_write_protect(kvm, gfn, slot);
+
+ for (i = PT_PAGE_TABLE_LEVEL;
+ i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
+ rmapp = __gfn_to_rmap(gfn, i, slot);
+ write_protected |= __rmap_write_protect(kvm, rmapp, i);
+ }
+
+ return write_protected;
}
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long data)
{
- u64 *spte;
+ u64 *sptep;
+ struct rmap_iterator iter;
int need_tlb_flush = 0;
- while ((spte = rmap_next(rmapp, NULL))) {
- BUG_ON(!(*spte & PT_PRESENT_MASK));
- rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
- drop_spte(kvm, spte);
+ while ((sptep = rmap_get_first(*rmapp, &iter))) {
+ BUG_ON(!(*sptep & PT_PRESENT_MASK));
+ rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep);
+
+ drop_spte(kvm, sptep);
need_tlb_flush = 1;
}
+
return need_tlb_flush;
}
static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long data)
{
+ u64 *sptep;
+ struct rmap_iterator iter;
int need_flush = 0;
- u64 *spte, new_spte;
+ u64 new_spte;
pte_t *ptep = (pte_t *)data;
pfn_t new_pfn;
WARN_ON(pte_huge(*ptep));
new_pfn = pte_pfn(*ptep);
- spte = rmap_next(rmapp, NULL);
- while (spte) {
- BUG_ON(!is_shadow_present_pte(*spte));
- rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
+
+ for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
+ BUG_ON(!is_shadow_present_pte(*sptep));
+ rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep);
+
need_flush = 1;
+
if (pte_write(*ptep)) {
- drop_spte(kvm, spte);
- spte = rmap_next(rmapp, NULL);
+ drop_spte(kvm, sptep);
+ sptep = rmap_get_first(*rmapp, &iter);
} else {
- new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
+ new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
new_spte |= (u64)new_pfn << PAGE_SHIFT;
new_spte &= ~PT_WRITABLE_MASK;
new_spte &= ~SPTE_HOST_WRITEABLE;
new_spte &= ~shadow_accessed_mask;
- mmu_spte_clear_track_bits(spte);
- mmu_spte_set(spte, new_spte);
- spte = rmap_next(rmapp, spte);
+
+ mmu_spte_clear_track_bits(sptep);
+ mmu_spte_set(sptep, new_spte);
+ sptep = rmap_get_next(&iter);
}
}
+
if (need_flush)
kvm_flush_remote_tlbs(kvm);
@@ -1162,7 +1237,8 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long data)
{
- u64 *spte;
+ u64 *sptep;
+ struct rmap_iterator iter;
int young = 0;
/*
@@ -1175,25 +1251,24 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
if (!shadow_accessed_mask)
return kvm_unmap_rmapp(kvm, rmapp, data);
- spte = rmap_next(rmapp, NULL);
- while (spte) {
- int _young;
- u64 _spte = *spte;
- BUG_ON(!(_spte & PT_PRESENT_MASK));
- _young = _spte & PT_ACCESSED_MASK;
- if (_young) {
+ for (sptep = rmap_get_first(*rmapp, &iter); sptep;
+ sptep = rmap_get_next(&iter)) {
+ BUG_ON(!(*sptep & PT_PRESENT_MASK));
+
+ if (*sptep & PT_ACCESSED_MASK) {
young = 1;
- clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
+ clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)sptep);
}
- spte = rmap_next(rmapp, spte);
}
+
return young;
}
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long data)
{
- u64 *spte;
+ u64 *sptep;
+ struct rmap_iterator iter;
int young = 0;
/*
@@ -1204,16 +1279,14 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
if (!shadow_accessed_mask)
goto out;
- spte = rmap_next(rmapp, NULL);
- while (spte) {
- u64 _spte = *spte;
- BUG_ON(!(_spte & PT_PRESENT_MASK));
- young = _spte & PT_ACCESSED_MASK;
- if (young) {
+ for (sptep = rmap_get_first(*rmapp, &iter); sptep;
+ sptep = rmap_get_next(&iter)) {
+ BUG_ON(!(*sptep & PT_PRESENT_MASK));
+
+ if (*sptep & PT_ACCESSED_MASK) {
young = 1;
break;
}
- spte = rmap_next(rmapp, spte);
}
out:
return young;
@@ -1865,10 +1938,11 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
{
- u64 *parent_pte;
+ u64 *sptep;
+ struct rmap_iterator iter;
- while ((parent_pte = pte_list_next(&sp->parent_ptes, NULL)))
- drop_parent_pte(sp, parent_pte);
+ while ((sptep = rmap_get_first(sp->parent_ptes, &iter)))
+ drop_parent_pte(sp, sptep);
}
static int mmu_zap_unsync_children(struct kvm *kvm,
@@ -1925,30 +1999,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
return ret;
}
-static void kvm_mmu_isolate_pages(struct list_head *invalid_list)
-{
- struct kvm_mmu_page *sp;
-
- list_for_each_entry(sp, invalid_list, link)
- kvm_mmu_isolate_page(sp);
-}
-
-static void free_pages_rcu(struct rcu_head *head)
-{
- struct kvm_mmu_page *next, *sp;
-
- sp = container_of(head, struct kvm_mmu_page, rcu);
- while (sp) {
- if (!list_empty(&sp->link))
- next = list_first_entry(&sp->link,
- struct kvm_mmu_page, link);
- else
- next = NULL;
- kvm_mmu_free_page(sp);
- sp = next;
- }
-}
-
static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list)
{
@@ -1957,17 +2007,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
if (list_empty(invalid_list))
return;
- kvm_flush_remote_tlbs(kvm);
-
- if (atomic_read(&kvm->arch.reader_counter)) {
- kvm_mmu_isolate_pages(invalid_list);
- sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
- list_del_init(invalid_list);
+ /*
+ * wmb: make sure everyone sees our modifications to the page tables
+ * rmb: make sure we see changes to vcpu->mode
+ */
+ smp_mb();
- trace_kvm_mmu_delay_free_pages(sp);
- call_rcu(&sp->rcu, free_pages_rcu);
- return;
- }
+ /*
+ * Wait for all vcpus to exit guest mode and/or lockless shadow
+ * page table walks.
+ */
+ kvm_flush_remote_tlbs(kvm);
do {
sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
@@ -1975,7 +2025,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
kvm_mmu_isolate_page(sp);
kvm_mmu_free_page(sp);
} while (!list_empty(invalid_list));
-
}
/*
@@ -3554,7 +3603,7 @@ static bool detect_write_flooding(struct kvm_mmu_page *sp)
* Skip write-flooding detected for the sp whose level is 1, because
* it can become unsync, then the guest page is not write-protected.
*/
- if (sp->role.level == 1)
+ if (sp->role.level == PT_PAGE_TABLE_LEVEL)
return false;
return ++sp->write_flooding_count >= 3;
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 715da5a19a5b..7d7d0b9e23eb 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -192,7 +192,8 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
{
struct kvm_memory_slot *slot;
unsigned long *rmapp;
- u64 *spte;
+ u64 *sptep;
+ struct rmap_iterator iter;
if (sp->role.direct || sp->unsync || sp->role.invalid)
return;
@@ -200,13 +201,12 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
slot = gfn_to_memslot(kvm, sp->gfn);
rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
- spte = rmap_next(rmapp, NULL);
- while (spte) {
- if (is_writable_pte(*spte))
+ for (sptep = rmap_get_first(*rmapp, &iter); sptep;
+ sptep = rmap_get_next(&iter)) {
+ if (is_writable_pte(*sptep))
audit_printk(kvm, "shadow page has writable "
"mappings: gfn %llx role %x\n",
sp->gfn, sp->role.word);
- spte = rmap_next(rmapp, spte);
}
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index df5a70311be8..34f970937ef1 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -658,7 +658,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
{
int offset = 0;
- WARN_ON(sp->role.level != 1);
+ WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL);
if (PTTYPE == 32)
offset = sp->role.quadrant << PT64_LEVEL_BITS;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e334389e1c75..f75af406b268 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -22,6 +22,7 @@
#include "x86.h"
#include <linux/module.h>
+#include <linux/mod_devicetable.h>
#include <linux/kernel.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
@@ -42,6 +43,12 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
+static const struct x86_cpu_id svm_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_SVM),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
+
#define IOPM_ALLOC_ORDER 2
#define MSRPM_ALLOC_ORDER 1
@@ -3240,6 +3247,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
svm_clear_vintr(svm);
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
mark_dirty(svm->vmcb, VMCB_INTR);
+ ++svm->vcpu.stat.irq_window_exits;
/*
* If the user space waits to inject interrupts, exit as soon as
* possible
@@ -3247,7 +3255,6 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
if (!irqchip_in_kernel(svm->vcpu.kvm) &&
kvm_run->request_interrupt_window &&
!kvm_cpu_has_interrupt(&svm->vcpu)) {
- ++svm->vcpu.stat.irq_window_exits;
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
return 0;
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4ff0ab9bc3c8..32eb58866292 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -27,6 +27,7 @@
#include <linux/highmem.h>
#include <linux/sched.h>
#include <linux/moduleparam.h>
+#include <linux/mod_devicetable.h>
#include <linux/ftrace_event.h>
#include <linux/slab.h>
#include <linux/tboot.h>
@@ -51,6 +52,12 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
+static const struct x86_cpu_id vmx_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_VMX),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
+
static bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);
@@ -386,6 +393,9 @@ struct vcpu_vmx {
struct {
int loaded;
u16 fs_sel, gs_sel, ldt_sel;
+#ifdef CONFIG_X86_64
+ u16 ds_sel, es_sel;
+#endif
int gs_ldt_reload_needed;
int fs_reload_needed;
} host_state;
@@ -1411,6 +1421,11 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
}
#ifdef CONFIG_X86_64
+ savesegment(ds, vmx->host_state.ds_sel);
+ savesegment(es, vmx->host_state.es_sel);
+#endif
+
+#ifdef CONFIG_X86_64
vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
#else
@@ -1450,6 +1465,19 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
}
if (vmx->host_state.fs_reload_needed)
loadsegment(fs, vmx->host_state.fs_sel);
+#ifdef CONFIG_X86_64
+ if (unlikely(vmx->host_state.ds_sel | vmx->host_state.es_sel)) {
+ loadsegment(ds, vmx->host_state.ds_sel);
+ loadsegment(es, vmx->host_state.es_sel);
+ }
+#else
+ /*
+ * The sysexit path does not restore ds/es, so we must set them to
+ * a reasonable value ourselves.
+ */
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
+#endif
reload_tss();
#ifdef CONFIG_X86_64
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
@@ -3633,8 +3661,18 @@ static void vmx_set_constant_host_state(void)
vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
+#ifdef CONFIG_X86_64
+ /*
+ * Load null selectors, so we can avoid reloading them in
+ * __vmx_load_host_state(), in case userspace uses the null selectors
+ * too (the expected case).
+ */
+ vmcs_write16(HOST_DS_SELECTOR, 0);
+ vmcs_write16(HOST_ES_SELECTOR, 0);
+#else
vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
+#endif
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
@@ -6256,7 +6294,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
}
}
- asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
vmx->loaded_vmcs->launched = 1;
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
@@ -6343,7 +6380,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
return &vmx->vcpu;
free_vmcs:
- free_vmcs(vmx->loaded_vmcs->vmcs);
+ free_loaded_vmcs(vmx->loaded_vmcs);
free_msrs:
kfree(vmx->guest_msrs);
uninit_vcpu:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 185a2b823a2d..be6d54929fa7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2147,6 +2147,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_ASYNC_PF:
case KVM_CAP_GET_TSC_KHZ:
case KVM_CAP_PCI_2_3:
+ case KVM_CAP_KVMCLOCK_CTRL:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -2597,6 +2598,23 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
return r;
}
+/*
+ * kvm_set_guest_paused() indicates to the guest kernel that it has been
+ * stopped by the hypervisor. This function will be called from the host only.
+ * EINVAL is returned when the host attempts to set the flag for a guest that
+ * does not support pv clocks.
+ */
+static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
+{
+ struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock;
+ if (!vcpu->arch.time_page)
+ return -EINVAL;
+ src->flags |= PVCLOCK_GUEST_STOPPED;
+ mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+ return 0;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2873,6 +2891,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = vcpu->arch.virtual_tsc_khz;
goto out;
}
+ case KVM_KVMCLOCK_CTRL: {
+ r = kvm_set_guest_paused(vcpu);
+ goto out;
+ }
default:
r = -EINVAL;
}
@@ -3045,57 +3067,32 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
}
/**
- * write_protect_slot - write protect a slot for dirty logging
- * @kvm: the kvm instance
- * @memslot: the slot we protect
- * @dirty_bitmap: the bitmap indicating which pages are dirty
- * @nr_dirty_pages: the number of dirty pages
+ * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
+ * @kvm: kvm instance
+ * @log: slot id and address to which we copy the log
*
- * We have two ways to find all sptes to protect:
- * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and
- * checks ones that have a spte mapping a page in the slot.
- * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap.
+ * We need to keep it in mind that VCPU threads can write to the bitmap
+ * concurrently. So, to avoid losing data, we keep the following order for
+ * each bit:
*
- * Generally speaking, if there are not so many dirty pages compared to the
- * number of shadow pages, we should use the latter.
+ * 1. Take a snapshot of the bit and clear it if needed.
+ * 2. Write protect the corresponding page.
+ * 3. Flush TLB's if needed.
+ * 4. Copy the snapshot to the userspace.
*
- * Note that letting others write into a page marked dirty in the old bitmap
- * by using the remaining tlb entry is not a problem. That page will become
- * write protected again when we flush the tlb and then be reported dirty to
- * the user space by copying the old bitmap.
- */
-static void write_protect_slot(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- unsigned long *dirty_bitmap,
- unsigned long nr_dirty_pages)
-{
- spin_lock(&kvm->mmu_lock);
-
- /* Not many dirty pages compared to # of shadow pages. */
- if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
- unsigned long gfn_offset;
-
- for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) {
- unsigned long gfn = memslot->base_gfn + gfn_offset;
-
- kvm_mmu_rmap_write_protect(kvm, gfn, memslot);
- }
- kvm_flush_remote_tlbs(kvm);
- } else
- kvm_mmu_slot_remove_write_access(kvm, memslot->id);
-
- spin_unlock(&kvm->mmu_lock);
-}
-
-/*
- * Get (and clear) the dirty memory log for a memory slot.
+ * Between 2 and 3, the guest may write to the page using the remaining TLB
+ * entry. This is not a problem because the page will be reported dirty at
+ * step 4 using the snapshot taken before and step 3 ensures that successive
+ * writes will be logged for the next call.
*/
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
- struct kvm_dirty_log *log)
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
int r;
struct kvm_memory_slot *memslot;
- unsigned long n, nr_dirty_pages;
+ unsigned long n, i;
+ unsigned long *dirty_bitmap;
+ unsigned long *dirty_bitmap_buffer;
+ bool is_dirty = false;
mutex_lock(&kvm->slots_lock);
@@ -3104,49 +3101,42 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
goto out;
memslot = id_to_memslot(kvm->memslots, log->slot);
+
+ dirty_bitmap = memslot->dirty_bitmap;
r = -ENOENT;
- if (!memslot->dirty_bitmap)
+ if (!dirty_bitmap)
goto out;
n = kvm_dirty_bitmap_bytes(memslot);
- nr_dirty_pages = memslot->nr_dirty_pages;
- /* If nothing is dirty, don't bother messing with page tables. */
- if (nr_dirty_pages) {
- struct kvm_memslots *slots, *old_slots;
- unsigned long *dirty_bitmap, *dirty_bitmap_head;
+ dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
+ memset(dirty_bitmap_buffer, 0, n);
- dirty_bitmap = memslot->dirty_bitmap;
- dirty_bitmap_head = memslot->dirty_bitmap_head;
- if (dirty_bitmap == dirty_bitmap_head)
- dirty_bitmap_head += n / sizeof(long);
- memset(dirty_bitmap_head, 0, n);
+ spin_lock(&kvm->mmu_lock);
- r = -ENOMEM;
- slots = kmemdup(kvm->memslots, sizeof(*kvm->memslots), GFP_KERNEL);
- if (!slots)
- goto out;
+ for (i = 0; i < n / sizeof(long); i++) {
+ unsigned long mask;
+ gfn_t offset;
- memslot = id_to_memslot(slots, log->slot);
- memslot->nr_dirty_pages = 0;
- memslot->dirty_bitmap = dirty_bitmap_head;
- update_memslots(slots, NULL);
+ if (!dirty_bitmap[i])
+ continue;
- old_slots = kvm->memslots;
- rcu_assign_pointer(kvm->memslots, slots);
- synchronize_srcu_expedited(&kvm->srcu);
- kfree(old_slots);
+ is_dirty = true;
- write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages);
+ mask = xchg(&dirty_bitmap[i], 0);
+ dirty_bitmap_buffer[i] = mask;
- r = -EFAULT;
- if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
- goto out;
- } else {
- r = -EFAULT;
- if (clear_user(log->dirty_bitmap, n))
- goto out;
+ offset = i * BITS_PER_LONG;
+ kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
}
+ if (is_dirty)
+ kvm_flush_remote_tlbs(kvm);
+
+ spin_unlock(&kvm->mmu_lock);
+
+ r = -EFAULT;
+ if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
+ goto out;
r = 0;
out:
@@ -3728,9 +3718,8 @@ struct read_write_emulator_ops {
static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
{
if (vcpu->mmio_read_completed) {
- memcpy(val, vcpu->mmio_data, bytes);
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
- vcpu->mmio_phys_addr, *(u64 *)val);
+ vcpu->mmio_fragments[0].gpa, *(u64 *)val);
vcpu->mmio_read_completed = 0;
return 1;
}
@@ -3766,8 +3755,9 @@ static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
- memcpy(vcpu->mmio_data, val, bytes);
- memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
+ struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
+
+ memcpy(vcpu->run->mmio.data, frag->data, frag->len);
return X86EMUL_CONTINUE;
}
@@ -3794,10 +3784,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
gpa_t gpa;
int handled, ret;
bool write = ops->write;
-
- if (ops->read_write_prepare &&
- ops->read_write_prepare(vcpu, val, bytes))
- return X86EMUL_CONTINUE;
+ struct kvm_mmio_fragment *frag;
ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
@@ -3823,15 +3810,19 @@ mmio:
bytes -= handled;
val += handled;
- vcpu->mmio_needed = 1;
- vcpu->run->exit_reason = KVM_EXIT_MMIO;
- vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
- vcpu->mmio_size = bytes;
- vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
- vcpu->run->mmio.is_write = vcpu->mmio_is_write = write;
- vcpu->mmio_index = 0;
+ while (bytes) {
+ unsigned now = min(bytes, 8U);
- return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
+ frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
+ frag->gpa = gpa;
+ frag->data = val;
+ frag->len = now;
+
+ gpa += now;
+ val += now;
+ bytes -= now;
+ }
+ return X86EMUL_CONTINUE;
}
int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
@@ -3840,10 +3831,18 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
struct read_write_emulator_ops *ops)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ gpa_t gpa;
+ int rc;
+
+ if (ops->read_write_prepare &&
+ ops->read_write_prepare(vcpu, val, bytes))
+ return X86EMUL_CONTINUE;
+
+ vcpu->mmio_nr_fragments = 0;
/* Crossing a page boundary? */
if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
- int rc, now;
+ int now;
now = -addr & ~PAGE_MASK;
rc = emulator_read_write_onepage(addr, val, now, exception,
@@ -3856,8 +3855,25 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
bytes -= now;
}
- return emulator_read_write_onepage(addr, val, bytes, exception,
- vcpu, ops);
+ rc = emulator_read_write_onepage(addr, val, bytes, exception,
+ vcpu, ops);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ if (!vcpu->mmio_nr_fragments)
+ return rc;
+
+ gpa = vcpu->mmio_fragments[0].gpa;
+
+ vcpu->mmio_needed = 1;
+ vcpu->mmio_cur_fragment = 0;
+
+ vcpu->run->mmio.len = vcpu->mmio_fragments[0].len;
+ vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
+ vcpu->run->exit_reason = KVM_EXIT_MMIO;
+ vcpu->run->mmio.phys_addr = gpa;
+
+ return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
}
static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
@@ -5263,10 +5279,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_deliver_pmi(vcpu);
}
- r = kvm_mmu_reload(vcpu);
- if (unlikely(r))
- goto out;
-
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
inject_pending_event(vcpu);
@@ -5282,6 +5294,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
}
+ r = kvm_mmu_reload(vcpu);
+ if (unlikely(r)) {
+ kvm_x86_ops->cancel_injection(vcpu);
+ goto out;
+ }
+
preempt_disable();
kvm_x86_ops->prepare_guest_switch(vcpu);
@@ -5456,33 +5474,55 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return r;
}
+/*
+ * Implements the following, as a state machine:
+ *
+ * read:
+ * for each fragment
+ * write gpa, len
+ * exit
+ * copy data
+ * execute insn
+ *
+ * write:
+ * for each fragment
+ * write gpa, len
+ * copy data
+ * exit
+ */
static int complete_mmio(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
+ struct kvm_mmio_fragment *frag;
int r;
if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
return 1;
if (vcpu->mmio_needed) {
- vcpu->mmio_needed = 0;
+ /* Complete previous fragment */
+ frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
if (!vcpu->mmio_is_write)
- memcpy(vcpu->mmio_data + vcpu->mmio_index,
- run->mmio.data, 8);
- vcpu->mmio_index += 8;
- if (vcpu->mmio_index < vcpu->mmio_size) {
- run->exit_reason = KVM_EXIT_MMIO;
- run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
- memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
- run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
- run->mmio.is_write = vcpu->mmio_is_write;
- vcpu->mmio_needed = 1;
- return 0;
+ memcpy(frag->data, run->mmio.data, frag->len);
+ if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+ vcpu->mmio_needed = 0;
+ if (vcpu->mmio_is_write)
+ return 1;
+ vcpu->mmio_read_completed = 1;
+ goto done;
}
+ /* Initiate next fragment */
+ ++frag;
+ run->exit_reason = KVM_EXIT_MMIO;
+ run->mmio.phys_addr = frag->gpa;
if (vcpu->mmio_is_write)
- return 1;
- vcpu->mmio_read_completed = 1;
+ memcpy(run->mmio.data, frag->data, frag->len);
+ run->mmio.len = frag->len;
+ run->mmio.is_write = vcpu->mmio_is_write;
+ return 0;
+
}
+done:
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -6399,21 +6439,9 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
kvm_cpu_has_interrupt(vcpu));
}
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
- int me;
- int cpu = vcpu->cpu;
-
- if (waitqueue_active(&vcpu->wq)) {
- wake_up_interruptible(&vcpu->wq);
- ++vcpu->stat.halt_wakeup;
- }
-
- me = get_cpu();
- if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
- if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE)
- smp_send_reschedule(cpu);
- put_cpu();
+ return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
}
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index cb80c293cdd8..3d1134ddb885 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -64,7 +64,7 @@ static inline int is_pse(struct kvm_vcpu *vcpu)
static inline int is_paging(struct kvm_vcpu *vcpu)
{
- return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
+ return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG));
}
static inline u32 bit(int bitno)
diff --git a/arch/xtensa/include/asm/kvm_para.h b/arch/xtensa/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/xtensa/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index 36506366158d..766cb7b19b40 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -17,6 +17,7 @@
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/memory.h>
+#include <linux/module.h>
#include <linux/platform_device.h>
#include <asm/chpid.h>
#include <asm/sclp.h>
@@ -38,7 +39,8 @@ struct read_info_sccb {
u64 facilities; /* 48-55 */
u8 _reserved2[84 - 56]; /* 56-83 */
u8 fac84; /* 84 */
- u8 _reserved3[91 - 85]; /* 85-90 */
+ u8 fac85; /* 85 */
+ u8 _reserved3[91 - 86]; /* 86-90 */
u8 flags; /* 91 */
u8 _reserved4[100 - 92]; /* 92-99 */
u32 rnsize2; /* 100-103 */
@@ -51,6 +53,7 @@ static int __initdata early_read_info_sccb_valid;
u64 sclp_facilities;
static u8 sclp_fac84;
+static u8 sclp_fac85;
static unsigned long long rzm;
static unsigned long long rnmax;
@@ -112,6 +115,7 @@ void __init sclp_facilities_detect(void)
sccb = &early_read_info_sccb;
sclp_facilities = sccb->facilities;
sclp_fac84 = sccb->fac84;
+ sclp_fac85 = sccb->fac85;
rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2;
rzm <<= 20;
@@ -127,6 +131,12 @@ unsigned long long sclp_get_rzm(void)
return rzm;
}
+u8 sclp_get_fac85(void)
+{
+ return sclp_fac85;
+}
+EXPORT_SYMBOL_GPL(sclp_get_fac85);
+
/*
* This function will be called after sclp_facilities_detect(), which gets
* called from early.c code. Therefore the sccb should have valid contents.
diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h
new file mode 100644
index 000000000000..5cba37f9eae1
--- /dev/null
+++ b/include/asm-generic/kvm_para.h
@@ -0,0 +1,22 @@
+#ifndef _ASM_GENERIC_KVM_PARA_H
+#define _ASM_GENERIC_KVM_PARA_H
+
+#ifdef __KERNEL__
+
+/*
+ * This function is used by architectures that support kvm to avoid issuing
+ * false soft lockup messages.
+ */
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+ return false;
+}
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+ return 0;
+}
+
+#endif /* _KERNEL__ */
+
+#endif
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 6c322a90b92f..09f2b3aa2da7 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -449,6 +449,30 @@ struct kvm_ppc_pvinfo {
__u8 pad[108];
};
+/* for KVM_PPC_GET_SMMU_INFO */
+#define KVM_PPC_PAGE_SIZES_MAX_SZ 8
+
+struct kvm_ppc_one_page_size {
+ __u32 page_shift; /* Page shift (or 0) */
+ __u32 pte_enc; /* Encoding in the HPTE (>>12) */
+};
+
+struct kvm_ppc_one_seg_page_size {
+ __u32 page_shift; /* Base page shift of segment (or 0) */
+ __u32 slb_enc; /* SLB encoding for BookS */
+ struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
+#define KVM_PPC_PAGE_SIZES_REAL 0x00000001
+#define KVM_PPC_1T_SEGMENTS 0x00000002
+
+struct kvm_ppc_smmu_info {
+ __u64 flags;
+ __u32 slb_size;
+ __u32 pad;
+ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
#define KVMIO 0xAE
/* machine type bits, to be used as argument to KVM_CREATE_VM */
@@ -589,6 +613,10 @@ struct kvm_ppc_pvinfo {
#define KVM_CAP_S390_UCONTROL 73
#define KVM_CAP_SYNC_REGS 74
#define KVM_CAP_PCI_2_3 75
+#define KVM_CAP_KVMCLOCK_CTRL 76
+#define KVM_CAP_SIGNAL_MSI 77
+#define KVM_CAP_PPC_GET_SMMU_INFO 78
+#define KVM_CAP_S390_COW 79
#ifdef KVM_CAP_IRQ_ROUTING
@@ -714,6 +742,14 @@ struct kvm_one_reg {
__u64 addr;
};
+struct kvm_msi {
+ __u32 address_lo;
+ __u32 address_hi;
+ __u32 data;
+ __u32 flags;
+ __u8 pad[16];
+};
+
/*
* ioctls for VM fds
*/
@@ -788,6 +824,10 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_PCI_2_3 */
#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \
struct kvm_assigned_pci_dev)
+/* Available with KVM_CAP_SIGNAL_MSI */
+#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi)
+/* Available with KVM_CAP_PPC_GET_SMMU_INFO */
+#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info)
/*
* ioctls for vcpu fds
@@ -859,6 +899,8 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_ONE_REG */
#define KVM_GET_ONE_REG _IOW(KVMIO, 0xab, struct kvm_one_reg)
#define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg)
+/* VM is being stopped by host */
+#define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad)
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 72cbf08d45fb..c4464356b35b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -35,6 +35,20 @@
#endif
/*
+ * If we support unaligned MMIO, at most one fragment will be split into two:
+ */
+#ifdef KVM_UNALIGNED_MMIO
+# define KVM_EXTRA_MMIO_FRAGMENTS 1
+#else
+# define KVM_EXTRA_MMIO_FRAGMENTS 0
+#endif
+
+#define KVM_USER_MMIO_SIZE 8
+
+#define KVM_MAX_MMIO_FRAGMENTS \
+ (KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS)
+
+/*
* vcpu->requests bit members
*/
#define KVM_REQ_TLB_FLUSH 0
@@ -68,10 +82,11 @@ struct kvm_io_range {
struct kvm_io_device *dev;
};
+#define NR_IOBUS_DEVS 1000
+
struct kvm_io_bus {
int dev_count;
-#define NR_IOBUS_DEVS 300
- struct kvm_io_range range[NR_IOBUS_DEVS];
+ struct kvm_io_range range[];
};
enum kvm_bus {
@@ -113,7 +128,18 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
enum {
OUTSIDE_GUEST_MODE,
IN_GUEST_MODE,
- EXITING_GUEST_MODE
+ EXITING_GUEST_MODE,
+ READING_SHADOW_PAGE_TABLES,
+};
+
+/*
+ * Sometimes a large or cross-page mmio needs to be broken up into separate
+ * exits for userspace servicing.
+ */
+struct kvm_mmio_fragment {
+ gpa_t gpa;
+ void *data;
+ unsigned len;
};
struct kvm_vcpu {
@@ -143,10 +169,9 @@ struct kvm_vcpu {
int mmio_needed;
int mmio_read_completed;
int mmio_is_write;
- int mmio_size;
- int mmio_index;
- unsigned char mmio_data[KVM_MMIO_SIZE];
- gpa_t mmio_phys_addr;
+ int mmio_cur_fragment;
+ int mmio_nr_fragments;
+ struct kvm_mmio_fragment mmio_fragments[KVM_MAX_MMIO_FRAGMENTS];
#endif
#ifdef CONFIG_KVM_ASYNC_PF
@@ -178,8 +203,6 @@ struct kvm_memory_slot {
unsigned long flags;
unsigned long *rmap;
unsigned long *dirty_bitmap;
- unsigned long *dirty_bitmap_head;
- unsigned long nr_dirty_pages;
struct kvm_arch_memory_slot arch;
unsigned long userspace_addr;
int user_alloc;
@@ -438,6 +461,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
gfn_t gfn);
void kvm_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
+bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
void kvm_resched(struct kvm_vcpu *vcpu);
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
@@ -506,6 +531,7 @@ int kvm_arch_hardware_setup(void);
void kvm_arch_hardware_unsetup(void);
void kvm_arch_check_processor_compat(void *rtn);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
void kvm_free_physmem(struct kvm *kvm);
@@ -521,6 +547,15 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
}
#endif
+static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+{
+#ifdef __KVM_HAVE_ARCH_WQP
+ return vcpu->arch.wqp;
+#else
+ return &vcpu->wq;
+#endif
+}
+
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
void kvm_arch_destroy_vm(struct kvm *kvm);
void kvm_free_all_assigned_devices(struct kvm *kvm);
@@ -769,6 +804,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
unsigned flags);
void kvm_free_irq_routing(struct kvm *kvm);
+int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
+
#else
static inline void kvm_free_irq_routing(struct kvm *kvm) {}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index df30ee08bdd4..e5e1d85b8c7c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -24,6 +24,7 @@
#include <linux/sysctl.h>
#include <asm/irq_regs.h>
+#include <linux/kvm_para.h>
#include <linux/perf_event.h>
int watchdog_enabled = 1;
@@ -280,6 +281,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
__this_cpu_write(softlockup_touch_sync, false);
sched_clock_tick();
}
+
+ /* Clear the guest paused flag on watchdog reset */
+ kvm_check_and_clear_guest_paused();
__touch_watchdog();
return HRTIMER_RESTART;
}
@@ -292,6 +296,14 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
*/
duration = is_softlockup(touch_ts);
if (unlikely(duration)) {
+ /*
+ * If a virtual machine is stopped by the host it can look to
+ * the watchdog like a soft lockup, check to see if the host
+ * stopped the vm before we issue the warning
+ */
+ if (kvm_check_and_clear_guest_paused())
+ return HRTIMER_RESTART;
+
/* only warn once */
if (__this_cpu_read(soft_watchdog_warn) == true)
return HRTIMER_RESTART;
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index f63ccb0a5982..28694f4a9139 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -18,3 +18,6 @@ config KVM_MMIO
config KVM_ASYNC_PF
bool
+
+config HAVE_KVM_MSI
+ bool
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index dcaf272c26c0..26fd54dc459e 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -254,13 +254,17 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
}
}
+bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
+{
+ struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+ smp_rmb();
+ return test_bit(vector, ioapic->handled_vectors);
+}
+
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
- smp_rmb();
- if (!test_bit(vector, ioapic->handled_vectors))
- return;
spin_lock(&ioapic->lock);
__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
spin_unlock(&ioapic->lock);
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 0b190c34ccc3..32872a09b63f 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -71,6 +71,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, int dest, int dest_mode);
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
+bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
int kvm_ioapic_init(struct kvm *kvm);
void kvm_ioapic_destroy(struct kvm *kvm);
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 9f614b4e365f..a6a0365475ed 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -138,6 +138,20 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
}
+int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
+{
+ struct kvm_kernel_irq_routing_entry route;
+
+ if (!irqchip_in_kernel(kvm) || msi->flags != 0)
+ return -EINVAL;
+
+ route.msi.address_lo = msi->address_lo;
+ route.msi.address_hi = msi->address_hi;
+ route.msi.data = msi->data;
+
+ return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
+}
+
/*
* Return value:
* < 0 Interrupt was ignored (masked or not delivered for other reasons)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9739b533ca2e..7e140683ff14 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -522,12 +522,11 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
return;
if (2 * kvm_dirty_bitmap_bytes(memslot) > PAGE_SIZE)
- vfree(memslot->dirty_bitmap_head);
+ vfree(memslot->dirty_bitmap);
else
- kfree(memslot->dirty_bitmap_head);
+ kfree(memslot->dirty_bitmap);
memslot->dirty_bitmap = NULL;
- memslot->dirty_bitmap_head = NULL;
}
/*
@@ -611,8 +610,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
/*
* Allocation size is twice as large as the actual dirty bitmap size.
- * This makes it possible to do double buffering: see x86's
- * kvm_vm_ioctl_get_dirty_log().
+ * See x86's kvm_vm_ioctl_get_dirty_log() why this is needed.
*/
static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
{
@@ -627,8 +625,6 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
if (!memslot->dirty_bitmap)
return -ENOMEM;
- memslot->dirty_bitmap_head = memslot->dirty_bitmap;
- memslot->nr_dirty_pages = 0;
#endif /* !CONFIG_S390 */
return 0;
}
@@ -1477,8 +1473,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
if (memslot && memslot->dirty_bitmap) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
- if (!test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap))
- memslot->nr_dirty_pages++;
+ /* TODO: introduce set_bit_le() and use it */
+ test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap);
}
}
@@ -1515,6 +1511,30 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
finish_wait(&vcpu->wq, &wait);
}
+#ifndef CONFIG_S390
+/*
+ * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode.
+ */
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+ int me;
+ int cpu = vcpu->cpu;
+ wait_queue_head_t *wqp;
+
+ wqp = kvm_arch_vcpu_wq(vcpu);
+ if (waitqueue_active(wqp)) {
+ wake_up_interruptible(wqp);
+ ++vcpu->stat.halt_wakeup;
+ }
+
+ me = get_cpu();
+ if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
+ if (kvm_arch_vcpu_should_kick(vcpu))
+ smp_send_reschedule(cpu);
+ put_cpu();
+}
+#endif /* !CONFIG_S390 */
+
void kvm_resched(struct kvm_vcpu *vcpu)
{
if (!need_resched())
@@ -1523,6 +1543,31 @@ void kvm_resched(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_resched);
+bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
+{
+ struct pid *pid;
+ struct task_struct *task = NULL;
+
+ rcu_read_lock();
+ pid = rcu_dereference(target->pid);
+ if (pid)
+ task = get_pid_task(target->pid, PIDTYPE_PID);
+ rcu_read_unlock();
+ if (!task)
+ return false;
+ if (task->flags & PF_VCPU) {
+ put_task_struct(task);
+ return false;
+ }
+ if (yield_to(task, 1)) {
+ put_task_struct(task);
+ return true;
+ }
+ put_task_struct(task);
+ return false;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
+
void kvm_vcpu_on_spin(struct kvm_vcpu *me)
{
struct kvm *kvm = me->kvm;
@@ -1541,8 +1586,6 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
*/
for (pass = 0; pass < 2 && !yielded; pass++) {
kvm_for_each_vcpu(i, vcpu, kvm) {
- struct task_struct *task = NULL;
- struct pid *pid;
if (!pass && i < last_boosted_vcpu) {
i = last_boosted_vcpu;
continue;
@@ -1552,24 +1595,11 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
continue;
if (waitqueue_active(&vcpu->wq))
continue;
- rcu_read_lock();
- pid = rcu_dereference(vcpu->pid);
- if (pid)
- task = get_pid_task(vcpu->pid, PIDTYPE_PID);
- rcu_read_unlock();
- if (!task)
- continue;
- if (task->flags & PF_VCPU) {
- put_task_struct(task);
- continue;
- }
- if (yield_to(task, 1)) {
- put_task_struct(task);
+ if (kvm_vcpu_yield_to(vcpu)) {
kvm->last_boosted_vcpu = i;
yielded = 1;
break;
}
- put_task_struct(task);
}
}
}
@@ -2040,6 +2070,17 @@ static long kvm_vm_ioctl(struct file *filp,
mutex_unlock(&kvm->lock);
break;
#endif
+#ifdef CONFIG_HAVE_KVM_MSI
+ case KVM_SIGNAL_MSI: {
+ struct kvm_msi msi;
+
+ r = -EFAULT;
+ if (copy_from_user(&msi, argp, sizeof msi))
+ goto out;
+ r = kvm_send_userspace_msi(kvm, &msi);
+ break;
+ }
+#endif
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
if (r == -ENOTTY)
@@ -2168,6 +2209,9 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
case KVM_CAP_SET_BOOT_CPU_ID:
#endif
case KVM_CAP_INTERNAL_ERROR_DATA:
+#ifdef CONFIG_HAVE_KVM_MSI
+ case KVM_CAP_SIGNAL_MSI:
+#endif
return 1;
#ifdef CONFIG_HAVE_KVM_IRQCHIP
case KVM_CAP_IRQ_ROUTING:
@@ -2394,9 +2438,6 @@ int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
gpa_t addr, int len)
{
- if (bus->dev_count == NR_IOBUS_DEVS)
- return -ENOSPC;
-
bus->range[bus->dev_count++] = (struct kvm_io_range) {
.addr = addr,
.len = len,
@@ -2496,12 +2537,15 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
struct kvm_io_bus *new_bus, *bus;
bus = kvm->buses[bus_idx];
- if (bus->dev_count > NR_IOBUS_DEVS-1)
+ if (bus->dev_count > NR_IOBUS_DEVS - 1)
return -ENOSPC;
- new_bus = kmemdup(bus, sizeof(struct kvm_io_bus), GFP_KERNEL);
+ new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) *
+ sizeof(struct kvm_io_range)), GFP_KERNEL);
if (!new_bus)
return -ENOMEM;
+ memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count *
+ sizeof(struct kvm_io_range)));
kvm_io_bus_insert_dev(new_bus, dev, addr, len);
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
@@ -2518,27 +2562,25 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_bus *new_bus, *bus;
bus = kvm->buses[bus_idx];
-
- new_bus = kmemdup(bus, sizeof(*bus), GFP_KERNEL);
- if (!new_bus)
- return -ENOMEM;
-
r = -ENOENT;
- for (i = 0; i < new_bus->dev_count; i++)
- if (new_bus->range[i].dev == dev) {
+ for (i = 0; i < bus->dev_count; i++)
+ if (bus->range[i].dev == dev) {
r = 0;
- new_bus->dev_count--;
- new_bus->range[i] = new_bus->range[new_bus->dev_count];
- sort(new_bus->range, new_bus->dev_count,
- sizeof(struct kvm_io_range),
- kvm_io_bus_sort_cmp, NULL);
break;
}
- if (r) {
- kfree(new_bus);
+ if (r)
return r;
- }
+
+ new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) *
+ sizeof(struct kvm_io_range)), GFP_KERNEL);
+ if (!new_bus)
+ return -ENOMEM;
+
+ memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
+ new_bus->dev_count--;
+ memcpy(new_bus->range + i, bus->range + i + 1,
+ (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);