From 24294b9a3fbe00289c039fb3e80087be66b8c415 Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Tue, 17 May 2011 18:26:00 -0500 Subject: KVM: PPC: fix partial application of "exit timing in ticks" When http://www.spinics.net/lists/kvm-ppc/msg02664.html was applied to produce commit b51e7aa7ed6d8d134d02df78300ab0f91cfff4d2, the removal of the conversion in add_exit_timing was left out. Signed-off-by: Stuart Yoder Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/timing.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index 319177df9587..07b6110a4bb7 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -56,15 +56,6 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) { u64 old; - do_div(duration, tb_ticks_per_usec); - if (unlikely(duration > 0xFFFFFFFF)) { - printk(KERN_ERR"%s - duration too big -> overflow" - " duration %lld type %d exit #%d\n", - __func__, duration, type, - vcpu->arch.timing_count_type[type]); - return; - } - mutex_lock(&vcpu->arch.exit_timing_lock); vcpu->arch.timing_count_type[type]++; -- cgit v1.2.3 From a22a2daccfa3ade5cdd9ef1e8a05cf1e6ffca42b Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 7 Jun 2011 20:45:34 +0200 Subject: KVM: PPC: Resolve real-mode handlers through function exports Up until now, Book3S KVM had variables stored in the kernel that a kernel module or the kvm code in the kernel could read from to figure out where some real mode helper functions are located. This is all unnecessary. The high bits of the EA get ignore in real mode, so we can just use the pointer as is. Also, it's a lot easier on relocations when we use the normal way of resolving the address to a function, instead of jumping through hoops. This patch fixes compilation with CONFIG_RELOCATABLE=y. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 4 ++-- arch/powerpc/kvm/book3s.c | 4 ++-- arch/powerpc/kvm/book3s_exports.c | 4 ++-- arch/powerpc/kvm/book3s_rmhandlers.S | 8 -------- 4 files changed, 6 insertions(+), 14 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index d62e703f1214..70c409b1d931 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -133,8 +133,8 @@ extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); -extern ulong kvmppc_trampoline_lowmem; -extern ulong kvmppc_trampoline_enter; +extern void kvmppc_handler_lowmem_trampoline(void); +extern void kvmppc_handler_trampoline_enter(void); extern void kvmppc_rmcall(ulong srr0, ulong srr1); extern void kvmppc_load_up_fpu(void); extern void kvmppc_load_up_altivec(void); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 0f95b5cce033..73fdab8a567f 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -1342,8 +1342,8 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) vcpu_book3s->slb_nr = 64; /* remember where some real-mode handlers are */ - vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; - vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; + vcpu->arch.trampoline_lowmem = (ulong)kvmppc_handler_lowmem_trampoline; + vcpu->arch.trampoline_enter = (ulong)kvmppc_handler_trampoline_enter; vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; #ifdef CONFIG_PPC_BOOK3S_64 vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c index 1dd5a1ddfd0d..f94fd9a4c69a 100644 --- a/arch/powerpc/kvm/book3s_exports.c +++ b/arch/powerpc/kvm/book3s_exports.c @@ -20,8 +20,8 @@ #include #include -EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter); -EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem); +EXPORT_SYMBOL_GPL(kvmppc_handler_trampoline_enter); +EXPORT_SYMBOL_GPL(kvmppc_handler_lowmem_trampoline); EXPORT_SYMBOL_GPL(kvmppc_rmcall); EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); #ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 1a1b34487e71..5f5d07475c91 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -251,12 +251,4 @@ define_load_up(altivec) define_load_up(vsx) #endif -.global kvmppc_trampoline_lowmem -kvmppc_trampoline_lowmem: - PPC_LONG kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START - -.global kvmppc_trampoline_enter -kvmppc_trampoline_enter: - PPC_LONG kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START - #include "book3s_segment.S" -- cgit v1.2.3 From 685659ee70db0bac47ffd619c726cf600e504fd7 Mon Sep 17 00:00:00 2001 From: yu liu Date: Tue, 14 Jun 2011 18:34:25 -0500 Subject: powerpc/e500: Save SPEFCSR in flush_spe_to_thread() giveup_spe() saves the SPE state which is protected by MSR[SPE]. However, modifying SPEFSCR does not trap when MSR[SPE]=0. And since SPEFSCR is already saved/restored in _switch(), not all the callers want to save SPEFSCR again. Thus, saving SPEFSCR should not belong to giveup_spe(). This patch moves SPEFSCR saving to flush_spe_to_thread(), and cleans up the caller that needs to save SPEFSCR accordingly. Signed-off-by: Liu Yu Acked-by: Kumar Gala Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kernel/head_fsl_booke.S | 2 -- arch/powerpc/kernel/process.c | 1 + arch/powerpc/kernel/traps.c | 5 +---- 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 5ecf54cfa7d4..aede4f87b682 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -792,8 +792,6 @@ _GLOBAL(giveup_spe) evmwumiaa evr6, evr6, evr6 /* evr6 <- ACC = 0 * 0 + ACC */ li r4,THREAD_ACC evstddx evr6, r4, r3 /* save off accumulator */ - mfspr r6,SPRN_SPEFSCR - stw r6,THREAD_SPEFSCR(r3) /* save spefscr register value */ beq 1f lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) lis r3,MSR_SPE@h diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 91e52df3d81d..60ac2a9251db 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -213,6 +213,7 @@ void flush_spe_to_thread(struct task_struct *tsk) #ifdef CONFIG_SMP BUG_ON(tsk != current); #endif + tsk->thread.spefscr = mfspr(SPRN_SPEFSCR); giveup_spe(tsk); } preempt_enable(); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1a0141426cda..f19d9777d3c1 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1387,10 +1387,7 @@ void SPEFloatingPointException(struct pt_regs *regs) int code = 0; int err; - preempt_disable(); - if (regs->msr & MSR_SPE) - giveup_spe(current); - preempt_enable(); + flush_spe_to_thread(current); spefscr = current->thread.spefscr; fpexc_mode = current->thread.fpexc_mode; -- cgit v1.2.3 From c51584d52e3878aa9b2bb98cdfb87173e7acf560 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 14 Jun 2011 18:34:27 -0500 Subject: powerpc/e500: SPE register saving: take arbitrary struct offset Previously, these macros hardcoded THREAD_EVR0 as the base of the save area, relative to the base register passed. This base offset is now passed as a separate macro parameter, allowing reuse with other SPE save areas, such as used by KVM. Acked-by: Kumar Gala Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/ppc_asm.h | 28 ++++++++++++++++------------ arch/powerpc/kernel/head_fsl_booke.S | 6 +++--- 2 files changed, 19 insertions(+), 15 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 1b422381fc16..368f72f79808 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -150,18 +150,22 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #define REST_16VSRSU(n,b,base) REST_8VSRSU(n,b,base); REST_8VSRSU(n+8,b,base) #define REST_32VSRSU(n,b,base) REST_16VSRSU(n,b,base); REST_16VSRSU(n+16,b,base) -#define SAVE_EVR(n,s,base) evmergehi s,s,n; stw s,THREAD_EVR0+4*(n)(base) -#define SAVE_2EVRS(n,s,base) SAVE_EVR(n,s,base); SAVE_EVR(n+1,s,base) -#define SAVE_4EVRS(n,s,base) SAVE_2EVRS(n,s,base); SAVE_2EVRS(n+2,s,base) -#define SAVE_8EVRS(n,s,base) SAVE_4EVRS(n,s,base); SAVE_4EVRS(n+4,s,base) -#define SAVE_16EVRS(n,s,base) SAVE_8EVRS(n,s,base); SAVE_8EVRS(n+8,s,base) -#define SAVE_32EVRS(n,s,base) SAVE_16EVRS(n,s,base); SAVE_16EVRS(n+16,s,base) -#define REST_EVR(n,s,base) lwz s,THREAD_EVR0+4*(n)(base); evmergelo n,s,n -#define REST_2EVRS(n,s,base) REST_EVR(n,s,base); REST_EVR(n+1,s,base) -#define REST_4EVRS(n,s,base) REST_2EVRS(n,s,base); REST_2EVRS(n+2,s,base) -#define REST_8EVRS(n,s,base) REST_4EVRS(n,s,base); REST_4EVRS(n+4,s,base) -#define REST_16EVRS(n,s,base) REST_8EVRS(n,s,base); REST_8EVRS(n+8,s,base) -#define REST_32EVRS(n,s,base) REST_16EVRS(n,s,base); REST_16EVRS(n+16,s,base) +/* + * b = base register for addressing, o = base offset from register of 1st EVR + * n = first EVR, s = scratch + */ +#define SAVE_EVR(n,s,b,o) evmergehi s,s,n; stw s,o+4*(n)(b) +#define SAVE_2EVRS(n,s,b,o) SAVE_EVR(n,s,b,o); SAVE_EVR(n+1,s,b,o) +#define SAVE_4EVRS(n,s,b,o) SAVE_2EVRS(n,s,b,o); SAVE_2EVRS(n+2,s,b,o) +#define SAVE_8EVRS(n,s,b,o) SAVE_4EVRS(n,s,b,o); SAVE_4EVRS(n+4,s,b,o) +#define SAVE_16EVRS(n,s,b,o) SAVE_8EVRS(n,s,b,o); SAVE_8EVRS(n+8,s,b,o) +#define SAVE_32EVRS(n,s,b,o) SAVE_16EVRS(n,s,b,o); SAVE_16EVRS(n+16,s,b,o) +#define REST_EVR(n,s,b,o) lwz s,o+4*(n)(b); evmergelo n,s,n +#define REST_2EVRS(n,s,b,o) REST_EVR(n,s,b,o); REST_EVR(n+1,s,b,o) +#define REST_4EVRS(n,s,b,o) REST_2EVRS(n,s,b,o); REST_2EVRS(n+2,s,b,o) +#define REST_8EVRS(n,s,b,o) REST_4EVRS(n,s,b,o); REST_4EVRS(n+4,s,b,o) +#define REST_16EVRS(n,s,b,o) REST_8EVRS(n,s,b,o); REST_8EVRS(n+8,s,b,o) +#define REST_32EVRS(n,s,b,o) REST_16EVRS(n,s,b,o); REST_16EVRS(n+16,s,b,o) /* Macros to adjust thread priority for hardware multithreading */ #define HMT_VERY_LOW or 31,31,31 # very low priority diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index aede4f87b682..fe37dd0dfd17 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -656,7 +656,7 @@ load_up_spe: cmpi 0,r4,0 beq 1f addi r4,r4,THREAD /* want THREAD of last_task_used_spe */ - SAVE_32EVRS(0,r10,r4) + SAVE_32EVRS(0,r10,r4,THREAD_EVR0) evxor evr10, evr10, evr10 /* clear out evr10 */ evmwumiaa evr10, evr10, evr10 /* evr10 <- ACC = 0 * 0 + ACC */ li r5,THREAD_ACC @@ -676,7 +676,7 @@ load_up_spe: stw r4,THREAD_USED_SPE(r5) evlddx evr4,r10,r5 evmra evr4,evr4 - REST_32EVRS(0,r10,r5) + REST_32EVRS(0,r10,r5,THREAD_EVR0) #ifndef CONFIG_SMP subi r4,r5,THREAD stw r4,last_task_used_spe@l(r3) @@ -787,7 +787,7 @@ _GLOBAL(giveup_spe) addi r3,r3,THREAD /* want THREAD of task */ lwz r5,PT_REGS(r3) cmpi 0,r5,0 - SAVE_32EVRS(0, r4, r3) + SAVE_32EVRS(0, r4, r3, THREAD_EVR0) evxor evr6, evr6, evr6 /* clear out evr6 */ evmwumiaa evr6, evr6, evr6 /* evr6 <- ACC = 0 * 0 + ACC */ li r4,THREAD_ACC -- cgit v1.2.3 From ecee273fc48f7f48f0c2f074335c43aaa790c308 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 14 Jun 2011 18:34:29 -0500 Subject: KVM: PPC: booke: use shadow_msr Keep the guest MSR and the guest-mode true MSR separate, rather than modifying the guest MSR on each guest entry to produce a true MSR. Any bits which should be modified based on guest MSR must be explicitly propagated from vcpu->arch.shared->msr to vcpu->arch.shadow_msr in kvmppc_set_msr(). While we're modifying the guest entry code, reorder a few instructions to bury some load latencies. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kvm/booke.c | 1 + arch/powerpc/kvm/booke_interrupts.S | 17 ++++++----------- 4 files changed, 9 insertions(+), 13 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 186f150b9b89..12cb1807e8d7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -219,12 +219,12 @@ struct kvm_vcpu_arch { #endif #ifdef CONFIG_PPC_BOOK3S - ulong shadow_msr; ulong hflags; ulong guest_owned_ext; #endif u32 vrsave; /* also USPRG0 */ u32 mmucr; + ulong shadow_msr; ulong sprg4; ulong sprg5; ulong sprg6; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 36e1c8a29be8..25de8e4808a4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -404,12 +404,12 @@ int main(void) DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared)); DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); + DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); /* book3s */ #ifdef CONFIG_PPC_BOOK3S DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); - DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 8462b3a1c1c7..05cedb5f8210 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -514,6 +514,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.pc = 0; vcpu->arch.shared->msr = 0; + vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ vcpu->arch.shadow_pid = 1; diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index b58ccae95904..55410cc45ad7 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -24,8 +24,6 @@ #include #include -#define KVMPPC_MSR_MASK (MSR_CE|MSR_EE|MSR_PR|MSR_DE|MSR_ME|MSR_IS|MSR_DS) - #define VCPU_GPR(n) (VCPU_GPRS + (n * 4)) /* The host stack layout: */ @@ -405,20 +403,17 @@ lightweight_exit: /* Finish loading guest volatiles and jump to guest. */ lwz r3, VCPU_CTR(r4) + lwz r5, VCPU_CR(r4) + lwz r6, VCPU_PC(r4) + lwz r7, VCPU_SHADOW_MSR(r4) mtctr r3 - lwz r3, VCPU_CR(r4) - mtcr r3 + mtcr r5 + mtsrr0 r6 + mtsrr1 r7 lwz r5, VCPU_GPR(r5)(r4) lwz r6, VCPU_GPR(r6)(r4) lwz r7, VCPU_GPR(r7)(r4) lwz r8, VCPU_GPR(r8)(r4) - lwz r3, VCPU_PC(r4) - mtsrr0 r3 - lwz r3, VCPU_SHARED(r4) - lwz r3, (VCPU_SHARED_MSR + 4)(r3) - oris r3, r3, KVMPPC_MSR_MASK@h - ori r3, r3, KVMPPC_MSR_MASK@l - mtsrr1 r3 /* Clear any debug events which occurred since we disabled MSR[DE]. * XXX This gives us a 3-instruction window in which a breakpoint -- cgit v1.2.3 From 4cd35f675ba41a99a477e28a6add4a66833325f2 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 14 Jun 2011 18:34:31 -0500 Subject: KVM: PPC: e500: Save/restore SPE state This is done lazily. The SPE save will be done only if the guest has used SPE since the last preemption or heavyweight exit. Restore will be done only on demand, when enabling MSR_SPE in the shadow MSR, in response to an SPE fault or mtmsr emulation. For SPEFSCR, Linux already switches it on context switch (non-lazily), so the only remaining bit is to save it between qemu and the guest. Signed-off-by: Liu Yu Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 6 +++ arch/powerpc/include/asm/reg_booke.h | 1 + arch/powerpc/kernel/asm-offsets.c | 7 +++ arch/powerpc/kvm/booke.c | 84 +++++++++++++++++++++++++++++++++++- arch/powerpc/kvm/booke.h | 22 ++++------ arch/powerpc/kvm/booke_interrupts.S | 38 ++++++++++++++++ arch/powerpc/kvm/e500.c | 7 ++- 7 files changed, 148 insertions(+), 17 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 12cb1807e8d7..c4ce1054b866 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -195,6 +195,12 @@ struct kvm_vcpu_arch { u64 fpr[32]; u64 fpscr; +#ifdef CONFIG_SPE + ulong evr[32]; + ulong spefscr; + ulong host_spefscr; + u64 acc; +#endif #ifdef CONFIG_ALTIVEC vector128 vr[32]; vector128 vscr; diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 0f0ad9fa01c1..9ec0b39f9ddc 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -318,6 +318,7 @@ #define ESR_ILK 0x00100000 /* Instr. Cache Locking */ #define ESR_PUO 0x00040000 /* Unimplemented Operation exception */ #define ESR_BO 0x00020000 /* Byte Ordering */ +#define ESR_SPV 0x00000080 /* Signal Processing operation */ /* Bit definitions related to the DBCR0. */ #if defined(CONFIG_40x) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 25de8e4808a4..ecd2b3ad7ff6 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -497,6 +497,13 @@ int main(void) DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7)); #endif +#if defined(CONFIG_KVM) && defined(CONFIG_SPE) + DEFINE(VCPU_EVR, offsetof(struct kvm_vcpu, arch.evr[0])); + DEFINE(VCPU_ACC, offsetof(struct kvm_vcpu, arch.acc)); + DEFINE(VCPU_SPEFSCR, offsetof(struct kvm_vcpu, arch.spefscr)); + DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr)); +#endif + #ifdef CONFIG_KVM_EXIT_TIMING DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, arch.timing_exit.tv32.tbu)); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 05cedb5f8210..0ecbecb2f7cc 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -13,6 +13,7 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright IBM Corp. 2007 + * Copyright 2010-2011 Freescale Semiconductor, Inc. * * Authors: Hollis Blanchard * Christian Ehrhardt @@ -78,6 +79,57 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) } } +#ifdef CONFIG_SPE +void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + enable_kernel_spe(); + kvmppc_save_guest_spe(vcpu); + vcpu->arch.shadow_msr &= ~MSR_SPE; + preempt_enable(); +} + +static void kvmppc_vcpu_enable_spe(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + enable_kernel_spe(); + kvmppc_load_guest_spe(vcpu); + vcpu->arch.shadow_msr |= MSR_SPE; + preempt_enable(); +} + +static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.shared->msr & MSR_SPE) { + if (!(vcpu->arch.shadow_msr & MSR_SPE)) + kvmppc_vcpu_enable_spe(vcpu); + } else if (vcpu->arch.shadow_msr & MSR_SPE) { + kvmppc_vcpu_disable_spe(vcpu); + } +} +#else +static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu) +{ +} +#endif + +/* Helper function for "full" MSR writes. No need to call this if only EE is + * changing. */ +void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) +{ + if ((new_msr & MSR_PR) != (vcpu->arch.shared->msr & MSR_PR)) + kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); + + vcpu->arch.shared->msr = new_msr; + + if (vcpu->arch.shared->msr & MSR_WE) { + kvm_vcpu_block(vcpu); + kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); + }; + + kvmppc_vcpu_sync_spe(vcpu); +} + static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int priority) { @@ -344,10 +396,16 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; - case BOOKE_INTERRUPT_SPE_UNAVAIL: - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL); +#ifdef CONFIG_SPE + case BOOKE_INTERRUPT_SPE_UNAVAIL: { + if (vcpu->arch.shared->msr & MSR_SPE) + kvmppc_vcpu_enable_spe(vcpu); + else + kvmppc_booke_queue_irqprio(vcpu, + BOOKE_IRQPRIO_SPE_UNAVAIL); r = RESUME_GUEST; break; + } case BOOKE_INTERRUPT_SPE_FP_DATA: kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA); @@ -358,6 +416,28 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND); r = RESUME_GUEST; break; +#else + case BOOKE_INTERRUPT_SPE_UNAVAIL: + /* + * Guest wants SPE, but host kernel doesn't support it. Send + * an "unimplemented operation" program check to the guest. + */ + kvmppc_core_queue_program(vcpu, ESR_PUO | ESR_SPV); + r = RESUME_GUEST; + break; + + /* + * These really should never happen without CONFIG_SPE, + * as we should never enable the real MSR[SPE] in the guest. + */ + case BOOKE_INTERRUPT_SPE_FP_DATA: + case BOOKE_INTERRUPT_SPE_FP_ROUND: + printk(KERN_CRIT "%s: unexpected SPE interrupt %u at %08lx\n", + __func__, exit_nr, vcpu->arch.pc); + run->hw.hardware_exit_reason = exit_nr; + r = RESUME_HOST; + break; +#endif case BOOKE_INTERRUPT_DATA_STORAGE: kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear, diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 492bb7030358..0fa1732ddcb7 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -52,24 +52,18 @@ extern unsigned long kvmppc_booke_handlers; -/* Helper function for "full" MSR writes. No need to call this if only EE is - * changing. */ -static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) -{ - if ((new_msr & MSR_PR) != (vcpu->arch.shared->msr & MSR_PR)) - kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); - - vcpu->arch.shared->msr = new_msr; - - if (vcpu->arch.shared->msr & MSR_WE) { - kvm_vcpu_block(vcpu); - kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); - }; -} +void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance); int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); +/* low-level asm code to transfer guest state */ +void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu); +void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu); + +/* high-level function, manages flags, host state */ +void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu); + #endif /* __KVM_BOOKE_H__ */ diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 55410cc45ad7..8cb3dfe29f75 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -13,6 +13,7 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright IBM Corp. 2007 + * Copyright 2011 Freescale Semiconductor, Inc. * * Authors: Hollis Blanchard */ @@ -239,6 +240,14 @@ _GLOBAL(kvmppc_resume_host) heavyweight_exit: /* Not returning to guest. */ +#ifdef CONFIG_SPE + /* save guest SPEFSCR and load host SPEFSCR */ + mfspr r9, SPRN_SPEFSCR + stw r9, VCPU_SPEFSCR(r4) + lwz r9, VCPU_HOST_SPEFSCR(r4) + mtspr SPRN_SPEFSCR, r9 +#endif + /* We already saved guest volatile register state; now save the * non-volatiles. */ stw r15, VCPU_GPR(r15)(r4) @@ -340,6 +349,14 @@ _GLOBAL(__kvmppc_vcpu_run) lwz r30, VCPU_GPR(r30)(r4) lwz r31, VCPU_GPR(r31)(r4) +#ifdef CONFIG_SPE + /* save host SPEFSCR and load guest SPEFSCR */ + mfspr r3, SPRN_SPEFSCR + stw r3, VCPU_HOST_SPEFSCR(r4) + lwz r3, VCPU_SPEFSCR(r4) + mtspr SPRN_SPEFSCR, r3 +#endif + lightweight_exit: stw r2, HOST_R2(r1) @@ -425,3 +442,24 @@ lightweight_exit: lwz r3, VCPU_GPR(r3)(r4) lwz r4, VCPU_GPR(r4)(r4) rfi + +#ifdef CONFIG_SPE +_GLOBAL(kvmppc_save_guest_spe) + cmpi 0,r3,0 + beqlr- + SAVE_32EVRS(0, r4, r3, VCPU_EVR) + evxor evr6, evr6, evr6 + evmwumiaa evr6, evr6, evr6 + li r4,VCPU_ACC + evstddx evr6, r4, r3 /* save acc */ + blr + +_GLOBAL(kvmppc_load_guest_spe) + cmpi 0,r3,0 + beqlr- + li r4,VCPU_ACC + evlddx evr6,r4,r3 + evmra evr6,evr6 /* load acc */ + REST_32EVRS(0, r4, r3, VCPU_EVR) + blr +#endif diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 318dbc61ba44..797a7447c268 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, * @@ -41,6 +41,11 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { kvmppc_e500_tlb_put(vcpu); + +#ifdef CONFIG_SPE + if (vcpu->arch.shadow_msr & MSR_SPE) + kvmppc_vcpu_disable_spe(vcpu); +#endif } int kvmppc_core_check_processor_compat(void) -- cgit v1.2.3 From 6fc4d1eb911cced6bc103f2b36497397e99e8c43 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 14 Jun 2011 18:34:34 -0500 Subject: KVM: PPC: e500: Disable preloading TLB1 in tlb_load(). Since TLB1 loading doesn't check the shadow TLB before allocating another entry, you can get duplicates. Once shadow PIDs are enabled in a later patch, we won't need to invalidate the TLB on every switch, so this optimization won't be needed anyway. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index b18fe353397d..e0ab2160932a 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -144,24 +144,6 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu) { - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - int i; - unsigned register mas0; - - /* Load all valid TLB1 entries to reduce guest tlb miss fault */ - local_irq_disable(); - mas0 = mfspr(SPRN_MAS0); - for (i = 0; i < tlb1_max_shadow_size(); i++) { - struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i]; - - if (get_tlb_v(stlbe)) { - mtspr(SPRN_MAS0, MAS0_TLBSEL(1) - | MAS0_ESEL(to_htlb1_esel(i))); - __write_host_tlbe(stlbe); - } - } - mtspr(SPRN_MAS0, mas0); - local_irq_enable(); } void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 0ef309956cecbaf6d96c31371bf393c296886fa6 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 14 Jun 2011 18:34:35 -0500 Subject: KVM: PPC: e500: don't use MAS0 as intermediate storage. This avoids races. It also means that we use the shadow TLB way, rather than the hardware hint -- if this is a problem, we could do a tlbsx before inserting a TLB0 entry. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index e0ab2160932a..f1b37e0de86c 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -112,13 +112,18 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) /* * writing shadow tlb entry to host TLB */ -static inline void __write_host_tlbe(struct tlbe *stlbe) +static inline void __write_host_tlbe(struct tlbe *stlbe, uint32_t mas0) { + unsigned long flags; + + local_irq_save(flags); + mtspr(SPRN_MAS0, mas0); mtspr(SPRN_MAS1, stlbe->mas1); mtspr(SPRN_MAS2, stlbe->mas2); mtspr(SPRN_MAS3, stlbe->mas3); mtspr(SPRN_MAS7, stlbe->mas7); - __asm__ __volatile__ ("tlbwe\n" : : ); + asm volatile("isync; tlbwe" : : : "memory"); + local_irq_restore(flags); } static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, @@ -126,20 +131,15 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, { struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; - local_irq_disable(); if (tlbsel == 0) { - __write_host_tlbe(stlbe); + __write_host_tlbe(stlbe, + MAS0_TLBSEL(0) | + MAS0_ESEL(esel & (KVM_E500_TLB0_WAY_NUM - 1))); } else { - unsigned register mas0; - - mas0 = mfspr(SPRN_MAS0); - - mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel))); - __write_host_tlbe(stlbe); - - mtspr(SPRN_MAS0, mas0); + __write_host_tlbe(stlbe, + MAS0_TLBSEL(1) | + MAS0_ESEL(to_htlb1_esel(esel))); } - local_irq_enable(); } void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu) -- cgit v1.2.3 From 59c1f4e35c3db6c7ea5a04503a43bcbeb98977df Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 14 Jun 2011 18:34:37 -0500 Subject: KVM: PPC: e500: Eliminate shadow_pages[], and use pfns instead. This is in line with what other architectures do, and will allow us to map things other than ordinary, unreserved kernel pages -- such as dedicated devices, or large contiguous reserved regions. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_e500.h | 2 -- arch/powerpc/kvm/e500_tlb.c | 56 +++++++++++++------------------------ 2 files changed, 19 insertions(+), 39 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index 7a2a565f88c4..f181ad1d6045 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h @@ -34,8 +34,6 @@ struct kvmppc_vcpu_e500 { struct tlbe *guest_tlb[E500_TLB_NUM]; /* TLB that's actually used when the guest is running. */ struct tlbe *shadow_tlb[E500_TLB_NUM]; - /* Pages which are referenced in the shadow TLB. */ - struct page **shadow_pages[E500_TLB_NUM]; unsigned int guest_tlb_size[E500_TLB_NUM]; unsigned int shadow_tlb_size[E500_TLB_NUM]; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index f1b37e0de86c..0291c3cf5055 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -188,17 +188,16 @@ static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int esel) { struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; - struct page *page = vcpu_e500->shadow_pages[tlbsel][esel]; + unsigned long pfn; - if (page) { - vcpu_e500->shadow_pages[tlbsel][esel] = NULL; + pfn = stlbe->mas3 >> PAGE_SHIFT; + pfn |= stlbe->mas7 << (32 - PAGE_SHIFT); - if (get_tlb_v(stlbe)) { - if (tlbe_is_writable(stlbe)) - kvm_release_page_dirty(page); - else - kvm_release_page_clean(page); - } + if (get_tlb_v(stlbe)) { + if (tlbe_is_writable(stlbe)) + kvm_release_pfn_dirty(pfn); + else + kvm_release_pfn_clean(pfn); } } @@ -271,37 +270,36 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel) { - struct page *new_page; struct tlbe *stlbe; - hpa_t hpaddr; + unsigned long pfn; stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; - /* Get reference to new page. */ - new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn); - if (is_error_page(new_page)) { - printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", + /* + * Translate guest physical to true physical, acquiring + * a page reference if it is normal, non-reserved memory. + */ + pfn = gfn_to_pfn(vcpu_e500->vcpu.kvm, gfn); + if (is_error_pfn(pfn)) { + printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", (long)gfn); - kvm_release_page_clean(new_page); + kvm_release_pfn_clean(pfn); return; } - hpaddr = page_to_phys(new_page); /* Drop reference to old page. */ kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); - vcpu_e500->shadow_pages[tlbsel][esel] = new_page; - /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */ stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K) | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; stlbe->mas2 = (gvaddr & MAS2_EPN) | e500_shadow_mas2_attrib(gtlbe->mas2, vcpu_e500->vcpu.arch.shared->msr & MSR_PR); - stlbe->mas3 = (hpaddr & MAS3_RPN) + stlbe->mas3 = ((pfn << PAGE_SHIFT) & MAS3_RPN) | e500_shadow_mas3_attrib(gtlbe->mas3, vcpu_e500->vcpu.arch.shared->msr & MSR_PR); - stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN; + stlbe->mas7 = (pfn >> (32 - PAGE_SHIFT)) & MAS7_RPN; trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7); @@ -712,16 +710,6 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) if (vcpu_e500->shadow_tlb[1] == NULL) goto err_out_guest1; - vcpu_e500->shadow_pages[0] = (struct page **) - kzalloc(sizeof(struct page *) * KVM_E500_TLB0_SIZE, GFP_KERNEL); - if (vcpu_e500->shadow_pages[0] == NULL) - goto err_out_shadow1; - - vcpu_e500->shadow_pages[1] = (struct page **) - kzalloc(sizeof(struct page *) * tlb1_entry_num, GFP_KERNEL); - if (vcpu_e500->shadow_pages[1] == NULL) - goto err_out_page0; - /* Init TLB configuration register */ vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL; vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0]; @@ -730,10 +718,6 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) return 0; -err_out_page0: - kfree(vcpu_e500->shadow_pages[0]); -err_out_shadow1: - kfree(vcpu_e500->shadow_tlb[1]); err_out_guest1: kfree(vcpu_e500->guest_tlb[1]); err_out_shadow0: @@ -746,8 +730,6 @@ err_out: void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) { - kfree(vcpu_e500->shadow_pages[1]); - kfree(vcpu_e500->shadow_pages[0]); kfree(vcpu_e500->shadow_tlb[1]); kfree(vcpu_e500->guest_tlb[1]); kfree(vcpu_e500->shadow_tlb[0]); -- cgit v1.2.3 From 9973d54eeafcd1c3a2e89f0f59280c4c1e03e73b Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 14 Jun 2011 18:34:39 -0500 Subject: KVM: PPC: e500: Support large page mappings of PFNMAP vmas. This allows large pages to be used on guest mappings backed by things like /dev/mem, resulting in a significant speedup when guest memory is mapped this way (it's useful for directly-assigned MMIO, too). This is not a substitute for hugetlbfs integration, but is useful for configurations where devices are directly assigned on chips without an IOMMU -- in these cases, we need guest physical and true physical to match, and be contiguous, so static reservation and mapping via /dev/mem is the most straightforward way to set things up. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 103 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 94 insertions(+), 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 0291c3cf5055..7f808c52e64a 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -270,28 +270,113 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel) { + struct kvm_memory_slot *slot; struct tlbe *stlbe; - unsigned long pfn; + unsigned long pfn, hva; + int pfnmap = 0; + int tsize = BOOK3E_PAGESZ_4K; stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; /* * Translate guest physical to true physical, acquiring * a page reference if it is normal, non-reserved memory. + * + * gfn_to_memslot() must succeed because otherwise we wouldn't + * have gotten this far. Eventually we should just pass the slot + * pointer through from the first lookup. */ - pfn = gfn_to_pfn(vcpu_e500->vcpu.kvm, gfn); - if (is_error_pfn(pfn)) { - printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", - (long)gfn); - kvm_release_pfn_clean(pfn); - return; + slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn); + hva = gfn_to_hva_memslot(slot, gfn); + + if (tlbsel == 1) { + struct vm_area_struct *vma; + down_read(¤t->mm->mmap_sem); + + vma = find_vma(current->mm, hva); + if (vma && hva >= vma->vm_start && + (vma->vm_flags & VM_PFNMAP)) { + /* + * This VMA is a physically contiguous region (e.g. + * /dev/mem) that bypasses normal Linux page + * management. Find the overlap between the + * vma and the memslot. + */ + + unsigned long start, end; + unsigned long slot_start, slot_end; + + pfnmap = 1; + + start = vma->vm_pgoff; + end = start + + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); + + pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT); + + slot_start = pfn - (gfn - slot->base_gfn); + slot_end = slot_start + slot->npages; + + if (start < slot_start) + start = slot_start; + if (end > slot_end) + end = slot_end; + + tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> + MAS1_TSIZE_SHIFT; + + /* + * e500 doesn't implement the lowest tsize bit, + * or 1K pages. + */ + tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); + + /* + * Now find the largest tsize (up to what the guest + * requested) that will cover gfn, stay within the + * range, and for which gfn and pfn are mutually + * aligned. + */ + + for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { + unsigned long gfn_start, gfn_end, tsize_pages; + tsize_pages = 1 << (tsize - 2); + + gfn_start = gfn & ~(tsize_pages - 1); + gfn_end = gfn_start + tsize_pages; + + if (gfn_start + pfn - gfn < start) + continue; + if (gfn_end + pfn - gfn > end) + continue; + if ((gfn & (tsize_pages - 1)) != + (pfn & (tsize_pages - 1))) + continue; + + gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); + pfn &= ~(tsize_pages - 1); + break; + } + } + + up_read(¤t->mm->mmap_sem); + } + + if (likely(!pfnmap)) { + pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn); + if (is_error_pfn(pfn)) { + printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", + (long)gfn); + kvm_release_pfn_clean(pfn); + return; + } } /* Drop reference to old page. */ kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); - /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */ - stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K) + /* Force TS=1 IPROT=0 for all guest mappings. */ + stlbe->mas1 = MAS1_TSIZE(tsize) | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; stlbe->mas2 = (gvaddr & MAS2_EPN) | e500_shadow_mas2_attrib(gtlbe->mas2, -- cgit v1.2.3 From a4cd8b23ac5786943202c0174c717956947db43c Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 14 Jun 2011 18:34:41 -0500 Subject: KVM: PPC: e500: enable magic page This is a shared page used for paravirtualization. It is always present in the guest kernel's effective address space at the address indicated by the hypercall that enables it. The physical address specified by the hypercall is not used, as e500 does not have real mode. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/ppc-pv.txt | 8 +++++--- arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/booke.c | 11 +++++++++++ arch/powerpc/kvm/e500_tlb.c | 22 +++++++++++++++++++++- arch/powerpc/kvm/powerpc.c | 3 ++- 5 files changed, 40 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/virtual/kvm/ppc-pv.txt b/Documentation/virtual/kvm/ppc-pv.txt index 3ab969c59046..2b7ce190cde4 100644 --- a/Documentation/virtual/kvm/ppc-pv.txt +++ b/Documentation/virtual/kvm/ppc-pv.txt @@ -68,9 +68,11 @@ page that contains parts of supervisor visible register state. The guest can map this shared page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE. With this hypercall issued the guest always gets the magic page mapped at the -desired location in effective and physical address space. For now, we always -map the page to -4096. This way we can access it using absolute load and store -functions. The following instruction reads the first field of the magic page: +desired location. The first parameter indicates the effective address when the +MMU is enabled. The second parameter indicates the address in real mode, if +applicable to the target. For now, we always map the page to -4096. This way we +can access it using absolute load and store functions. The following +instruction reads the first field of the magic page: ld rX, -4096(0) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9345238edecf..c662f140283a 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -109,6 +109,7 @@ extern void kvmppc_booke_exit(void); extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu); +extern void kvmppc_map_magic(struct kvm_vcpu *vcpu); /* * Cuts out inst bits with ordering according to spec. diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 0ecbecb2f7cc..4538956daecf 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -472,6 +472,17 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, gpa_t gpaddr; gfn_t gfn; +#ifdef CONFIG_KVM_E500 + if (!(vcpu->arch.shared->msr & MSR_PR) && + (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) { + kvmppc_map_magic(vcpu); + kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS); + r = RESUME_GUEST; + + break; + } +#endif + /* Check the guest TLB. */ gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr); if (gtlb_index < 0) { diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 7f808c52e64a..c09e642ee537 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -76,7 +76,8 @@ static inline unsigned int tlb0_get_next_victim( static inline unsigned int tlb1_max_shadow_size(void) { - return tlb1_entry_num - tlbcam_index; + /* reserve one entry for magic page */ + return tlb1_entry_num - tlbcam_index - 1; } static inline int tlbe_is_writable(struct tlbe *tlbe) @@ -142,6 +143,25 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, } } +void kvmppc_map_magic(struct kvm_vcpu *vcpu) +{ + struct tlbe magic; + ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; + pfn_t pfn; + + pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT; + get_page(pfn_to_page(pfn)); + + magic.mas1 = MAS1_VALID | MAS1_TS | + MAS1_TSIZE(BOOK3E_PAGESZ_4K); + magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M; + magic.mas3 = (pfn << PAGE_SHIFT) | + MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; + magic.mas7 = pfn >> (32 - PAGE_SHIFT); + + __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); +} + void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu) { } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 616dd516ca1f..24e2b64b6a48 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -73,7 +73,8 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) } case HC_VENDOR_KVM | KVM_HC_FEATURES: r = HC_EV_SUCCESS; -#if defined(CONFIG_PPC_BOOK3S) /* XXX Missing magic page on BookE */ +#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500) + /* XXX Missing magic page on 44x */ r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); #endif -- cgit v1.2.3 From 08b7fa92b9250eab0f493f7721977e781a887b3d Mon Sep 17 00:00:00 2001 From: Liu Yu Date: Tue, 14 Jun 2011 18:34:59 -0500 Subject: KVM: PPC: e500: Stop keeping shadow TLB Instead of a fully separate set of TLB entries, keep just the pfn and dirty status. Signed-off-by: Liu Yu Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_e500.h | 20 ++- arch/powerpc/kvm/e500_tlb.c | 317 ++++++++++++++++-------------------- 2 files changed, 154 insertions(+), 183 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index f181ad1d6045..4a6d77a8b8a0 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h @@ -29,15 +29,23 @@ struct tlbe{ u32 mas7; }; +#define E500_TLB_VALID 1 +#define E500_TLB_DIRTY 2 + +struct tlbe_priv { + pfn_t pfn; + unsigned int flags; /* E500_TLB_* */ +}; + struct kvmppc_vcpu_e500 { /* Unmodified copy of the guest's TLB. */ - struct tlbe *guest_tlb[E500_TLB_NUM]; - /* TLB that's actually used when the guest is running. */ - struct tlbe *shadow_tlb[E500_TLB_NUM]; + struct tlbe *gtlb_arch[E500_TLB_NUM]; + + /* KVM internal information associated with each guest TLB entry */ + struct tlbe_priv *gtlb_priv[E500_TLB_NUM]; - unsigned int guest_tlb_size[E500_TLB_NUM]; - unsigned int shadow_tlb_size[E500_TLB_NUM]; - unsigned int guest_tlb_nv[E500_TLB_NUM]; + unsigned int gtlb_size[E500_TLB_NUM]; + unsigned int gtlb_nv[E500_TLB_NUM]; u32 host_pid[E500_PID_NUM]; u32 pid[E500_PID_NUM]; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index c09e642ee537..9d1e28d443c4 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -41,25 +41,14 @@ void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) for (tlbsel = 0; tlbsel < 2; tlbsel++) { printk("Guest TLB%d:\n", tlbsel); - for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) { - tlbe = &vcpu_e500->guest_tlb[tlbsel][i]; + for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) { + tlbe = &vcpu_e500->gtlb_arch[tlbsel][i]; if (tlbe->mas1 & MAS1_VALID) printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n", tlbsel, i, tlbe->mas1, tlbe->mas2, tlbe->mas3, tlbe->mas7); } } - - for (tlbsel = 0; tlbsel < 2; tlbsel++) { - printk("Shadow TLB%d:\n", tlbsel); - for (i = 0; i < vcpu_e500->shadow_tlb_size[tlbsel]; i++) { - tlbe = &vcpu_e500->shadow_tlb[tlbsel][i]; - if (tlbe->mas1 & MAS1_VALID) - printk(" S[%d][%3d] | %08X | %08X | %08X | %08X |\n", - tlbsel, i, tlbe->mas1, tlbe->mas2, - tlbe->mas3, tlbe->mas7); - } - } } static inline unsigned int tlb0_get_next_victim( @@ -67,9 +56,9 @@ static inline unsigned int tlb0_get_next_victim( { unsigned int victim; - victim = vcpu_e500->guest_tlb_nv[0]++; - if (unlikely(vcpu_e500->guest_tlb_nv[0] >= KVM_E500_TLB0_WAY_NUM)) - vcpu_e500->guest_tlb_nv[0] = 0; + victim = vcpu_e500->gtlb_nv[0]++; + if (unlikely(vcpu_e500->gtlb_nv[0] >= KVM_E500_TLB0_WAY_NUM)) + vcpu_e500->gtlb_nv[0] = 0; return victim; } @@ -128,10 +117,8 @@ static inline void __write_host_tlbe(struct tlbe *stlbe, uint32_t mas0) } static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int esel) + int tlbsel, int esel, struct tlbe *stlbe) { - struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; - if (tlbsel == 0) { __write_host_tlbe(stlbe, MAS0_TLBSEL(0) | @@ -141,6 +128,8 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel))); } + trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, + stlbe->mas3, stlbe->mas7); } void kvmppc_map_magic(struct kvm_vcpu *vcpu) @@ -178,8 +167,8 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, int i; /* XXX Replace loop with fancy data structures. */ - for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) { - struct tlbe *tlbe = &vcpu_e500->guest_tlb[tlbsel][i]; + for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) { + struct tlbe *tlbe = &vcpu_e500->gtlb_arch[tlbsel][i]; unsigned int tid; if (eaddr < get_tlb_eaddr(tlbe)) @@ -204,60 +193,33 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, return -1; } -static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int esel) +static inline void kvmppc_e500_priv_setup(struct tlbe_priv *priv, + struct tlbe *gtlbe, + pfn_t pfn) { - struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; - unsigned long pfn; - - pfn = stlbe->mas3 >> PAGE_SHIFT; - pfn |= stlbe->mas7 << (32 - PAGE_SHIFT); + priv->pfn = pfn; + priv->flags = E500_TLB_VALID; - if (get_tlb_v(stlbe)) { - if (tlbe_is_writable(stlbe)) - kvm_release_pfn_dirty(pfn); - else - kvm_release_pfn_clean(pfn); - } + if (tlbe_is_writable(gtlbe)) + priv->flags |= E500_TLB_DIRTY; } -static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int esel) +static inline void kvmppc_e500_priv_release(struct tlbe_priv *priv) { - struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; + if (priv->flags & E500_TLB_VALID) { + if (priv->flags & E500_TLB_DIRTY) + kvm_release_pfn_dirty(priv->pfn); + else + kvm_release_pfn_clean(priv->pfn); - kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); - stlbe->mas1 = 0; - trace_kvm_stlb_inval(index_of(tlbsel, esel)); + priv->flags = 0; + } } static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, - gva_t eaddr, gva_t eend, u32 tid) + int esel) { - unsigned int pid = tid & 0xff; - unsigned int i; - - /* XXX Replace loop with fancy data structures. */ - for (i = 0; i < vcpu_e500->guest_tlb_size[1]; i++) { - struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i]; - unsigned int tid; - - if (!get_tlb_v(stlbe)) - continue; - - if (eend < get_tlb_eaddr(stlbe)) - continue; - - if (eaddr > get_tlb_end(stlbe)) - continue; - - tid = get_tlb_tid(stlbe); - if (tid && (tid != pid)) - continue; - - kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i); - write_host_tlbe(vcpu_e500, 1, i); - } + mtspr(SPRN_MMUCSR0, MMUCSR0_TLB1FI); } static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, @@ -274,7 +236,7 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, tsized = (vcpu_e500->mas4 >> 7) & 0x1f; vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) - | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); + | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) | MAS1_TID(vcpu_e500->pid[pidsel]) | MAS1_TSIZE(tsized); @@ -287,16 +249,35 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, vcpu_e500->mas7 = 0; } +static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, + struct tlbe *gtlbe, int tsize, + struct tlbe_priv *priv, + u64 gvaddr, struct tlbe *stlbe) +{ + pfn_t pfn = priv->pfn; + + /* Force TS=1 IPROT=0 for all guest mappings. */ + stlbe->mas1 = MAS1_TSIZE(tsize) + | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; + stlbe->mas2 = (gvaddr & MAS2_EPN) + | e500_shadow_mas2_attrib(gtlbe->mas2, + vcpu_e500->vcpu.arch.shared->msr & MSR_PR); + stlbe->mas3 = ((pfn << PAGE_SHIFT) & MAS3_RPN) + | e500_shadow_mas3_attrib(gtlbe->mas3, + vcpu_e500->vcpu.arch.shared->msr & MSR_PR); + stlbe->mas7 = (pfn >> (32 - PAGE_SHIFT)) & MAS7_RPN; +} + + static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, - u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel) + u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel, + struct tlbe *stlbe) { struct kvm_memory_slot *slot; - struct tlbe *stlbe; unsigned long pfn, hva; int pfnmap = 0; int tsize = BOOK3E_PAGESZ_4K; - - stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; + struct tlbe_priv *priv; /* * Translate guest physical to true physical, acquiring @@ -392,35 +373,25 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, } } - /* Drop reference to old page. */ - kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); - - /* Force TS=1 IPROT=0 for all guest mappings. */ - stlbe->mas1 = MAS1_TSIZE(tsize) - | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; - stlbe->mas2 = (gvaddr & MAS2_EPN) - | e500_shadow_mas2_attrib(gtlbe->mas2, - vcpu_e500->vcpu.arch.shared->msr & MSR_PR); - stlbe->mas3 = ((pfn << PAGE_SHIFT) & MAS3_RPN) - | e500_shadow_mas3_attrib(gtlbe->mas3, - vcpu_e500->vcpu.arch.shared->msr & MSR_PR); - stlbe->mas7 = (pfn >> (32 - PAGE_SHIFT)) & MAS7_RPN; + /* Drop old priv and setup new one. */ + priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; + kvmppc_e500_priv_release(priv); + kvmppc_e500_priv_setup(priv, gtlbe, pfn); - trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, - stlbe->mas3, stlbe->mas7); + kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, priv, gvaddr, stlbe); } /* XXX only map the one-one case, for now use TLB0 */ -static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int esel) +static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, + int esel, struct tlbe *stlbe) { struct tlbe *gtlbe; - gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + gtlbe = &vcpu_e500->gtlb_arch[0][esel]; kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), get_tlb_raddr(gtlbe) >> PAGE_SHIFT, - gtlbe, tlbsel, esel); + gtlbe, 0, esel, stlbe); return esel; } @@ -429,16 +400,16 @@ static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500, * the shadow TLB. */ /* XXX for both one-one and one-to-many , for now use TLB1 */ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, - u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe) + u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, struct tlbe *stlbe) { unsigned int victim; - victim = vcpu_e500->guest_tlb_nv[1]++; + victim = vcpu_e500->gtlb_nv[1]++; - if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size())) - vcpu_e500->guest_tlb_nv[1] = 0; + if (unlikely(vcpu_e500->gtlb_nv[1] >= tlb1_max_shadow_size())) + vcpu_e500->gtlb_nv[1] = 0; - kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim); + kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim, stlbe); return victim; } @@ -449,33 +420,19 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) { if (usermode) { - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - int i; - - /* XXX Replace loop with fancy data structures. */ - for (i = 0; i < tlb1_max_shadow_size(); i++) - kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i); - _tlbil_all(); } } -static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int esel) +static inline int kvmppc_e500_gtlbe_invalidate( + struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) { - struct tlbe *gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; if (unlikely(get_tlb_iprot(gtlbe))) return -1; - if (tlbsel == 1) { - kvmppc_e500_tlb1_invalidate(vcpu_e500, get_tlb_eaddr(gtlbe), - get_tlb_end(gtlbe), - get_tlb_tid(gtlbe)); - } else { - kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel); - } - gtlbe->mas1 = 0; return 0; @@ -486,10 +443,10 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) int esel; if (value & MMUCSR0_TLB0FI) - for (esel = 0; esel < vcpu_e500->guest_tlb_size[0]; esel++) + for (esel = 0; esel < vcpu_e500->gtlb_size[0]; esel++) kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel); if (value & MMUCSR0_TLB1FI) - for (esel = 0; esel < vcpu_e500->guest_tlb_size[1]; esel++) + for (esel = 0; esel < vcpu_e500->gtlb_size[1]; esel++) kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); _tlbil_all(); @@ -513,7 +470,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) if (ia) { /* invalidate all entries */ - for (esel = 0; esel < vcpu_e500->guest_tlb_size[tlbsel]; esel++) + for (esel = 0; esel < vcpu_e500->gtlb_size[tlbsel]; esel++) kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); } else { ea &= 0xfffff000; @@ -537,9 +494,9 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu) tlbsel = get_tlb_tlbsel(vcpu_e500); esel = get_tlb_esel(vcpu_e500, tlbsel); - gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; vcpu_e500->mas0 &= ~MAS0_NV(~0); - vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); + vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); vcpu_e500->mas1 = gtlbe->mas1; vcpu_e500->mas2 = gtlbe->mas2; vcpu_e500->mas3 = gtlbe->mas3; @@ -562,14 +519,14 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) for (tlbsel = 0; tlbsel < 2; tlbsel++) { esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); if (esel >= 0) { - gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; break; } } if (gtlbe) { vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel) - | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); + | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); vcpu_e500->mas1 = gtlbe->mas1; vcpu_e500->mas2 = gtlbe->mas2; vcpu_e500->mas3 = gtlbe->mas3; @@ -582,7 +539,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0; vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) - | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); + | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0) | (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0)) | (vcpu_e500->mas4 & MAS4_TSIZED(~0)); @@ -599,23 +556,16 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - u64 eaddr; - u64 raddr; - u32 tid; struct tlbe *gtlbe; - int tlbsel, esel, stlbsel, sesel; + int tlbsel, esel; tlbsel = get_tlb_tlbsel(vcpu_e500); esel = get_tlb_esel(vcpu_e500, tlbsel); - gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; - if (get_tlb_v(gtlbe) && tlbsel == 1) { - eaddr = get_tlb_eaddr(gtlbe); - tid = get_tlb_tid(gtlbe); - kvmppc_e500_tlb1_invalidate(vcpu_e500, eaddr, - get_tlb_end(gtlbe), tid); - } + if (get_tlb_v(gtlbe) && tlbsel == 1) + kvmppc_e500_tlb1_invalidate(vcpu_e500, esel); gtlbe->mas1 = vcpu_e500->mas1; gtlbe->mas2 = vcpu_e500->mas2; @@ -627,6 +577,11 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ if (tlbe_is_host_safe(vcpu, gtlbe)) { + struct tlbe stlbe; + int stlbsel, sesel; + u64 eaddr; + u64 raddr; + switch (tlbsel) { case 0: /* TLB0 */ @@ -634,7 +589,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); stlbsel = 0; - sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel); + sesel = kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); break; @@ -649,13 +604,13 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) * are mapped on the fly. */ stlbsel = 1; sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, - raddr >> PAGE_SHIFT, gtlbe); + raddr >> PAGE_SHIFT, gtlbe, &stlbe); break; default: BUG(); } - write_host_tlbe(vcpu_e500, stlbsel, sesel); + write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe); } kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); @@ -695,7 +650,7 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index, { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); struct tlbe *gtlbe = - &vcpu_e500->guest_tlb[tlbsel_of(index)][esel_of(index)]; + &vcpu_e500->gtlb_arch[tlbsel_of(index)][esel_of(index)]; u64 pgmask = get_tlb_bytes(gtlbe) - 1; return get_tlb_raddr(gtlbe) | (eaddr & pgmask); @@ -703,38 +658,36 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index, void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) { - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - int tlbsel, i; - - for (tlbsel = 0; tlbsel < 2; tlbsel++) - for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) - kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i); - - /* discard all guest mapping */ - _tlbil_all(); } void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, unsigned int index) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct tlbe_priv *priv; + struct tlbe *gtlbe, stlbe; int tlbsel = tlbsel_of(index); int esel = esel_of(index); int stlbsel, sesel; + gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; + switch (tlbsel) { case 0: stlbsel = 0; sesel = esel; + priv = &vcpu_e500->gtlb_priv[stlbsel][sesel]; + + kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K, + priv, eaddr, &stlbe); break; case 1: { gfn_t gfn = gpaddr >> PAGE_SHIFT; - struct tlbe *gtlbe - = &vcpu_e500->guest_tlb[tlbsel][esel]; stlbsel = 1; - sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe); + sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, + gtlbe, &stlbe); break; } @@ -742,7 +695,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, BUG(); break; } - write_host_tlbe(vcpu_e500, stlbsel, sesel); + + write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe); } int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, @@ -773,14 +727,14 @@ void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) struct tlbe *tlbe; /* Insert large initial mapping for guest. */ - tlbe = &vcpu_e500->guest_tlb[1][0]; + tlbe = &vcpu_e500->gtlb_arch[1][0]; tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M); tlbe->mas2 = 0; tlbe->mas3 = E500_TLB_SUPER_PERM_MASK; tlbe->mas7 = 0; /* 4K map for serial output. Used by kernel wrapper. */ - tlbe = &vcpu_e500->guest_tlb[1][1]; + tlbe = &vcpu_e500->gtlb_arch[1][1]; tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K); tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G; tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; @@ -791,52 +745,61 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) { tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF; - vcpu_e500->guest_tlb_size[0] = KVM_E500_TLB0_SIZE; - vcpu_e500->guest_tlb[0] = + vcpu_e500->gtlb_size[0] = KVM_E500_TLB0_SIZE; + vcpu_e500->gtlb_arch[0] = kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL); - if (vcpu_e500->guest_tlb[0] == NULL) + if (vcpu_e500->gtlb_arch[0] == NULL) goto err_out; - vcpu_e500->shadow_tlb_size[0] = KVM_E500_TLB0_SIZE; - vcpu_e500->shadow_tlb[0] = - kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL); - if (vcpu_e500->shadow_tlb[0] == NULL) - goto err_out_guest0; - - vcpu_e500->guest_tlb_size[1] = KVM_E500_TLB1_SIZE; - vcpu_e500->guest_tlb[1] = + vcpu_e500->gtlb_size[1] = KVM_E500_TLB1_SIZE; + vcpu_e500->gtlb_arch[1] = kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL); - if (vcpu_e500->guest_tlb[1] == NULL) - goto err_out_shadow0; + if (vcpu_e500->gtlb_arch[1] == NULL) + goto err_out_guest0; - vcpu_e500->shadow_tlb_size[1] = tlb1_entry_num; - vcpu_e500->shadow_tlb[1] = - kzalloc(sizeof(struct tlbe) * tlb1_entry_num, GFP_KERNEL); - if (vcpu_e500->shadow_tlb[1] == NULL) + vcpu_e500->gtlb_priv[0] = (struct tlbe_priv *) + kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB0_SIZE, GFP_KERNEL); + if (vcpu_e500->gtlb_priv[0] == NULL) goto err_out_guest1; + vcpu_e500->gtlb_priv[1] = (struct tlbe_priv *) + kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB1_SIZE, GFP_KERNEL); + + if (vcpu_e500->gtlb_priv[1] == NULL) + goto err_out_priv0; /* Init TLB configuration register */ vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL; - vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0]; + vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_size[0]; vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL; - vcpu_e500->tlb1cfg |= vcpu_e500->guest_tlb_size[1]; + vcpu_e500->tlb1cfg |= vcpu_e500->gtlb_size[1]; return 0; +err_out_priv0: + kfree(vcpu_e500->gtlb_priv[0]); err_out_guest1: - kfree(vcpu_e500->guest_tlb[1]); -err_out_shadow0: - kfree(vcpu_e500->shadow_tlb[0]); + kfree(vcpu_e500->gtlb_arch[1]); err_out_guest0: - kfree(vcpu_e500->guest_tlb[0]); + kfree(vcpu_e500->gtlb_arch[0]); err_out: return -1; } void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) { - kfree(vcpu_e500->shadow_tlb[1]); - kfree(vcpu_e500->guest_tlb[1]); - kfree(vcpu_e500->shadow_tlb[0]); - kfree(vcpu_e500->guest_tlb[0]); + int stlbsel, i; + + /* release all privs */ + for (stlbsel = 0; stlbsel < 2; stlbsel++) + for (i = 0; i < vcpu_e500->gtlb_size[stlbsel]; i++) { + struct tlbe_priv *priv = + &vcpu_e500->gtlb_priv[stlbsel][i]; + kvmppc_e500_priv_release(priv); + } + + /* discard all guest mapping */ + _tlbil_all(); + + kfree(vcpu_e500->gtlb_arch[1]); + kfree(vcpu_e500->gtlb_arch[0]); } -- cgit v1.2.3 From dd9ebf1f94354b010f2ac7a98bf69168636cb08e Mon Sep 17 00:00:00 2001 From: Liu Yu Date: Tue, 14 Jun 2011 18:35:14 -0500 Subject: KVM: PPC: e500: Add shadow PID support Dynamically assign host PIDs to guest PIDs, splitting each guest PID into multiple host (shadow) PIDs based on kernel/user and MSR[IS/DS]. Use both PID0 and PID1 so that the shadow PIDs for the right mode can be selected, that correspond both to guest TID = zero and guest TID = guest PID. This allows us to significantly reduce the frequency of needing to invalidate the entire TLB. When the guest mode or PID changes, we just update the host PID0/PID1. And since the allocation of shadow PIDs is global, multiple guests can share the TLB without conflict. Note that KVM does not yet support the guest setting PID1 or PID2 to a value other than zero. This will need to be fixed for nested KVM to work. Until then, we enforce the requirement for guest PID1/PID2 to stay zero by failing the emulation if the guest tries to set them to something else. Signed-off-by: Liu Yu Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_e500.h | 8 +- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/44x_tlb.c | 4 +- arch/powerpc/kvm/booke.c | 11 +- arch/powerpc/kvm/booke.h | 1 + arch/powerpc/kvm/booke_interrupts.S | 11 ++ arch/powerpc/kvm/e500_emulate.c | 4 + arch/powerpc/kvm/e500_tlb.c | 312 +++++++++++++++++++++++++++++++++--- arch/powerpc/kvm/e500_tlb.h | 13 +- 10 files changed, 334 insertions(+), 32 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index 4a6d77a8b8a0..adbfca9dd100 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, * @@ -37,6 +37,8 @@ struct tlbe_priv { unsigned int flags; /* E500_TLB_* */ }; +struct vcpu_id_table; + struct kvmppc_vcpu_e500 { /* Unmodified copy of the guest's TLB. */ struct tlbe *gtlb_arch[E500_TLB_NUM]; @@ -59,6 +61,10 @@ struct kvmppc_vcpu_e500 { u32 mas5; u32 mas6; u32 mas7; + + /* vcpu id table */ + struct vcpu_id_table *idt; + u32 l1csr0; u32 l1csr1; u32 hid0; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index c4ce1054b866..6e05b2d13683 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -255,6 +255,7 @@ struct kvm_vcpu_arch { u32 pvr; u32 shadow_pid; + u32 shadow_pid1; u32 pid; u32 swap_pid; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index ecd2b3ad7ff6..faf846131f45 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -402,6 +402,7 @@ int main(void) DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); + DEFINE(VCPU_SHADOW_PID1, offsetof(struct kvm_vcpu, arch.shadow_pid1)); DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared)); DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 5f3cff83e089..33aa715dab28 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -387,8 +387,10 @@ static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu, } } -void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) +void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) { + int usermode = vcpu->arch.shared->msr & MSR_PR; + vcpu->arch.shadow_pid = !usermode; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 4538956daecf..9f2e4a5e1c4d 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -113,15 +113,18 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu) } #endif -/* Helper function for "full" MSR writes. No need to call this if only EE is - * changing. */ +/* + * Helper function for "full" MSR writes. No need to call this if only + * EE/CE/ME/DE/RI are changing. + */ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) { - if ((new_msr & MSR_PR) != (vcpu->arch.shared->msr & MSR_PR)) - kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); + u32 old_msr = vcpu->arch.shared->msr; vcpu->arch.shared->msr = new_msr; + kvmppc_mmu_msr_notify(vcpu, old_msr); + if (vcpu->arch.shared->msr & MSR_WE) { kvm_vcpu_block(vcpu); kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 0fa1732ddcb7..8e1fe33d64e5 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -53,6 +53,7 @@ extern unsigned long kvmppc_booke_handlers; void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); +void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance); diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 8cb3dfe29f75..42f2fb1f66e9 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -191,6 +191,12 @@ _GLOBAL(kvmppc_resume_host) lwz r3, VCPU_HOST_PID(r4) mtspr SPRN_PID, r3 +#ifdef CONFIG_FSL_BOOKE + /* we cheat and know that Linux doesn't use PID1 which is always 0 */ + lis r3, 0 + mtspr SPRN_PID1, r3 +#endif + /* Restore host IVPR before re-enabling interrupts. We cheat and know * that Linux IVPR is always 0xc0000000. */ lis r3, 0xc000 @@ -365,6 +371,11 @@ lightweight_exit: lwz r3, VCPU_SHADOW_PID(r4) mtspr SPRN_PID, r3 +#ifdef CONFIG_FSL_BOOKE + lwz r3, VCPU_SHADOW_PID1(r4) + mtspr SPRN_PID1, r3 +#endif + #ifdef CONFIG_44x iccci 0, 0 /* XXX hack */ #endif diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 69cd665a0caf..d48ae396f41e 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -81,8 +81,12 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) kvmppc_set_pid(vcpu, spr_val); break; case SPRN_PID1: + if (spr_val != 0) + return EMULATE_FAIL; vcpu_e500->pid[1] = spr_val; break; case SPRN_PID2: + if (spr_val != 0) + return EMULATE_FAIL; vcpu_e500->pid[2] = spr_val; break; case SPRN_MAS0: vcpu_e500->mas0 = spr_val; break; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 9d1e28d443c4..ea394571bbb6 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -28,8 +28,196 @@ #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) +struct id { + unsigned long val; + struct id **pentry; +}; + +#define NUM_TIDS 256 + +/* + * This table provide mappings from: + * (guestAS,guestTID,guestPR) --> ID of physical cpu + * guestAS [0..1] + * guestTID [0..255] + * guestPR [0..1] + * ID [1..255] + * Each vcpu keeps one vcpu_id_table. + */ +struct vcpu_id_table { + struct id id[2][NUM_TIDS][2]; +}; + +/* + * This table provide reversed mappings of vcpu_id_table: + * ID --> address of vcpu_id_table item. + * Each physical core has one pcpu_id_table. + */ +struct pcpu_id_table { + struct id *entry[NUM_TIDS]; +}; + +static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids); + +/* This variable keeps last used shadow ID on local core. + * The valid range of shadow ID is [1..255] */ +static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid); + static unsigned int tlb1_entry_num; +/* + * Allocate a free shadow id and setup a valid sid mapping in given entry. + * A mapping is only valid when vcpu_id_table and pcpu_id_table are match. + * + * The caller must have preemption disabled, and keep it that way until + * it has finished with the returned shadow id (either written into the + * TLB or arch.shadow_pid, or discarded). + */ +static inline int local_sid_setup_one(struct id *entry) +{ + unsigned long sid; + int ret = -1; + + sid = ++(__get_cpu_var(pcpu_last_used_sid)); + if (sid < NUM_TIDS) { + __get_cpu_var(pcpu_sids).entry[sid] = entry; + entry->val = sid; + entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid]; + ret = sid; + } + + /* + * If sid == NUM_TIDS, we've run out of sids. We return -1, and + * the caller will invalidate everything and start over. + * + * sid > NUM_TIDS indicates a race, which we disable preemption to + * avoid. + */ + WARN_ON(sid > NUM_TIDS); + + return ret; +} + +/* + * Check if given entry contain a valid shadow id mapping. + * An ID mapping is considered valid only if + * both vcpu and pcpu know this mapping. + * + * The caller must have preemption disabled, and keep it that way until + * it has finished with the returned shadow id (either written into the + * TLB or arch.shadow_pid, or discarded). + */ +static inline int local_sid_lookup(struct id *entry) +{ + if (entry && entry->val != 0 && + __get_cpu_var(pcpu_sids).entry[entry->val] == entry && + entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val]) + return entry->val; + return -1; +} + +/* Invalidate all id mappings on local core */ +static inline void local_sid_destroy_all(void) +{ + preempt_disable(); + __get_cpu_var(pcpu_last_used_sid) = 0; + memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); + preempt_enable(); +} + +static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL); + return vcpu_e500->idt; +} + +static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + kfree(vcpu_e500->idt); +} + +/* Invalidate all mappings on vcpu */ +static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table)); + + /* Update shadow pid when mappings are changed */ + kvmppc_e500_recalc_shadow_pid(vcpu_e500); +} + +/* Invalidate one ID mapping on vcpu */ +static inline void kvmppc_e500_id_table_reset_one( + struct kvmppc_vcpu_e500 *vcpu_e500, + int as, int pid, int pr) +{ + struct vcpu_id_table *idt = vcpu_e500->idt; + + BUG_ON(as >= 2); + BUG_ON(pid >= NUM_TIDS); + BUG_ON(pr >= 2); + + idt->id[as][pid][pr].val = 0; + idt->id[as][pid][pr].pentry = NULL; + + /* Update shadow pid when mappings are changed */ + kvmppc_e500_recalc_shadow_pid(vcpu_e500); +} + +/* + * Map guest (vcpu,AS,ID,PR) to physical core shadow id. + * This function first lookup if a valid mapping exists, + * if not, then creates a new one. + * + * The caller must have preemption disabled, and keep it that way until + * it has finished with the returned shadow id (either written into the + * TLB or arch.shadow_pid, or discarded). + */ +static unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, + unsigned int as, unsigned int gid, + unsigned int pr, int avoid_recursion) +{ + struct vcpu_id_table *idt = vcpu_e500->idt; + int sid; + + BUG_ON(as >= 2); + BUG_ON(gid >= NUM_TIDS); + BUG_ON(pr >= 2); + + sid = local_sid_lookup(&idt->id[as][gid][pr]); + + while (sid <= 0) { + /* No mapping yet */ + sid = local_sid_setup_one(&idt->id[as][gid][pr]); + if (sid <= 0) { + _tlbil_all(); + local_sid_destroy_all(); + } + + /* Update shadow pid when mappings are changed */ + if (!avoid_recursion) + kvmppc_e500_recalc_shadow_pid(vcpu_e500); + } + + return sid; +} + +/* Map guest pid to shadow. + * We use PID to keep shadow of current guest non-zero PID, + * and use PID1 to keep shadow of guest zero PID. + * So that guest tlbe with TID=0 can be accessed at any time */ +void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + preempt_disable(); + vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500, + get_cur_as(&vcpu_e500->vcpu), + get_cur_pid(&vcpu_e500->vcpu), + get_cur_pr(&vcpu_e500->vcpu), 1); + vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500, + get_cur_as(&vcpu_e500->vcpu), 0, + get_cur_pr(&vcpu_e500->vcpu), 1); + preempt_enable(); +} + void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -134,14 +322,19 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, void kvmppc_map_magic(struct kvm_vcpu *vcpu) { + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); struct tlbe magic; ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; + unsigned int stid; pfn_t pfn; pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT; get_page(pfn_to_page(pfn)); - magic.mas1 = MAS1_VALID | MAS1_TS | + preempt_disable(); + stid = kvmppc_e500_get_sid(vcpu_e500, 0, 0, 0, 0); + + magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) | MAS1_TSIZE(BOOK3E_PAGESZ_4K); magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M; magic.mas3 = (pfn << PAGE_SHIFT) | @@ -149,15 +342,76 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu) magic.mas7 = pfn >> (32 - PAGE_SHIFT); __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); + preempt_enable(); } void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu) { + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + /* Shadow PID may be expired on local core */ + kvmppc_e500_recalc_shadow_pid(vcpu_e500); } void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu) { - _tlbil_all(); +} + +static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) +{ + struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; + struct vcpu_id_table *idt = vcpu_e500->idt; + unsigned int pr, tid, ts, pid; + u32 val, eaddr; + unsigned long flags; + + ts = get_tlb_ts(gtlbe); + tid = get_tlb_tid(gtlbe); + + preempt_disable(); + + /* One guest ID may be mapped to two shadow IDs */ + for (pr = 0; pr < 2; pr++) { + /* + * The shadow PID can have a valid mapping on at most one + * host CPU. In the common case, it will be valid on this + * CPU, in which case (for TLB0) we do a local invalidation + * of the specific address. + * + * If the shadow PID is not valid on the current host CPU, or + * if we're invalidating a TLB1 entry, we invalidate the + * entire shadow PID. + */ + if (tlbsel == 1 || + (pid = local_sid_lookup(&idt->id[ts][tid][pr])) <= 0) { + kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr); + continue; + } + + /* + * The guest is invalidating a TLB0 entry which is in a PID + * that has a valid shadow mapping on this host CPU. We + * search host TLB0 to invalidate it's shadow TLB entry, + * similar to __tlbil_va except that we need to look in AS1. + */ + val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS; + eaddr = get_tlb_eaddr(gtlbe); + + local_irq_save(flags); + + mtspr(SPRN_MAS6, val); + asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr)); + val = mfspr(SPRN_MAS1); + if (val & MAS1_VALID) { + mtspr(SPRN_MAS1, val & ~MAS1_VALID); + asm volatile("tlbwe"); + } + + local_irq_restore(flags); + } + + preempt_enable(); } /* Search the guest TLB for a matching entry. */ @@ -216,12 +470,6 @@ static inline void kvmppc_e500_priv_release(struct tlbe_priv *priv) } } -static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, - int esel) -{ - mtspr(SPRN_MMUCSR0, MMUCSR0_TLB1FI); -} - static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, unsigned int eaddr, int as) { @@ -255,10 +503,15 @@ static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, struct tlbe *stlbe) { pfn_t pfn = priv->pfn; + unsigned int stid; + + stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe), + get_tlb_tid(gtlbe), + get_cur_pr(&vcpu_e500->vcpu), 0); /* Force TS=1 IPROT=0 for all guest mappings. */ stlbe->mas1 = MAS1_TSIZE(tsize) - | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; + | MAS1_TID(stid) | MAS1_TS | MAS1_VALID; stlbe->mas2 = (gvaddr & MAS2_EPN) | e500_shadow_mas2_attrib(gtlbe->mas2, vcpu_e500->vcpu.arch.shared->msr & MSR_PR); @@ -414,14 +667,12 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, return victim; } -/* Invalidate all guest kernel mappings when enter usermode, - * so that when they fault back in they will get the - * proper permission bits. */ -void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) +void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) { - if (usermode) { - _tlbil_all(); - } + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + /* Recalc shadow pid since MSR changes */ + kvmppc_e500_recalc_shadow_pid(vcpu_e500); } static inline int kvmppc_e500_gtlbe_invalidate( @@ -449,7 +700,8 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) for (esel = 0; esel < vcpu_e500->gtlb_size[1]; esel++) kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); - _tlbil_all(); + /* Invalidate all vcpu id mappings */ + kvmppc_e500_id_table_reset_all(vcpu_e500); return EMULATE_DONE; } @@ -480,7 +732,8 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); } - _tlbil_all(); + /* Invalidate all vcpu id mappings */ + kvmppc_e500_id_table_reset_all(vcpu_e500); return EMULATE_DONE; } @@ -564,8 +817,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; - if (get_tlb_v(gtlbe) && tlbsel == 1) - kvmppc_e500_tlb1_invalidate(vcpu_e500, esel); + if (get_tlb_v(gtlbe)) + kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel); gtlbe->mas1 = vcpu_e500->mas1; gtlbe->mas2 = vcpu_e500->mas2; @@ -582,6 +835,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) u64 eaddr; u64 raddr; + preempt_disable(); switch (tlbsel) { case 0: /* TLB0 */ @@ -611,6 +865,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) BUG(); } write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe); + preempt_enable(); } kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); @@ -672,6 +927,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; + preempt_disable(); switch (tlbsel) { case 0: stlbsel = 0; @@ -697,6 +953,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, } write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe); + preempt_enable(); } int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, @@ -718,8 +975,10 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - vcpu_e500->pid[0] = vcpu->arch.shadow_pid = - vcpu->arch.pid = pid; + if (vcpu->arch.pid != pid) { + vcpu_e500->pid[0] = vcpu->arch.pid = pid; + kvmppc_e500_recalc_shadow_pid(vcpu_e500); + } } void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) @@ -767,6 +1026,9 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) if (vcpu_e500->gtlb_priv[1] == NULL) goto err_out_priv0; + if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) + goto err_out_priv1; + /* Init TLB configuration register */ vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL; vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_size[0]; @@ -775,6 +1037,8 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) return 0; +err_out_priv1: + kfree(vcpu_e500->gtlb_priv[1]); err_out_priv0: kfree(vcpu_e500->gtlb_priv[0]); err_out_guest1: @@ -797,9 +1061,7 @@ void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) kvmppc_e500_priv_release(priv); } - /* discard all guest mapping */ - _tlbil_all(); - + kvmppc_e500_id_table_free(vcpu_e500); kfree(vcpu_e500->gtlb_arch[1]); kfree(vcpu_e500->gtlb_arch[0]); } diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h index 458946b4775d..59b88e99a235 100644 --- a/arch/powerpc/kvm/e500_tlb.h +++ b/arch/powerpc/kvm/e500_tlb.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, yu.liu@freescale.com * @@ -55,6 +55,7 @@ extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int); extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *); extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *); extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *); +extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *); /* TLB helper functions */ static inline unsigned int get_tlb_size(const struct tlbe *tlbe) @@ -110,6 +111,16 @@ static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu) return vcpu->arch.pid & 0xff; } +static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu) +{ + return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS)); +} + +static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu) +{ + return !!(vcpu->arch.shared->msr & MSR_PR); +} + static inline unsigned int get_cur_spid( const struct kvmppc_vcpu_e500 *vcpu_e500) { -- cgit v1.2.3 From 1aee47a0276f75a371e13a936a48f64eb5d3ec1b Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 14 Jun 2011 18:35:20 -0500 Subject: KVM: PPC: e500: Don't search over the entire TLB0. Only look in the 4 entries that could possibly contain the entry we're looking for. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index ea394571bbb6..13c432ea2fa8 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -418,11 +418,21 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t eaddr, int tlbsel, unsigned int pid, int as) { + int size = vcpu_e500->gtlb_size[tlbsel]; + int set_base; int i; - /* XXX Replace loop with fancy data structures. */ - for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) { - struct tlbe *tlbe = &vcpu_e500->gtlb_arch[tlbsel][i]; + if (tlbsel == 0) { + int mask = size / KVM_E500_TLB0_WAY_NUM - 1; + set_base = (eaddr >> PAGE_SHIFT) & mask; + set_base *= KVM_E500_TLB0_WAY_NUM; + size = KVM_E500_TLB0_WAY_NUM; + } else { + set_base = 0; + } + + for (i = 0; i < size; i++) { + struct tlbe *tlbe = &vcpu_e500->gtlb_arch[tlbsel][set_base + i]; unsigned int tid; if (eaddr < get_tlb_eaddr(tlbe)) @@ -441,7 +451,7 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, if (get_tlb_ts(tlbe) != as && as != -1) continue; - return i; + return set_base + i; } return -1; -- cgit v1.2.3 From 149dbdb1859be46a063a5b1b0aa99a5f999b7632 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:16:42 +0000 Subject: KVM: PPC: Fix machine checks on 32-bit Book3S Commit 69acc0d3ba ("KVM: PPC: Resolve real-mode handlers through function exports") resulted in vcpu->arch.trampoline_lowmem and vcpu->arch.trampoline_enter ending up with kernel virtual addresses rather than physical addresses. This is OK on 64-bit Book3S machines, which ignore the top 4 bits of the effective address in real mode, but on 32-bit Book3S machines, accessing these addresses in real mode causes machine check interrupts, as the hardware uses the whole effective address as the physical address in real mode. This fixes the problem by using __pa() to convert these addresses to physical addresses. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 73fdab8a567f..83500fb62c83 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -1342,8 +1343,8 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) vcpu_book3s->slb_nr = 64; /* remember where some real-mode handlers are */ - vcpu->arch.trampoline_lowmem = (ulong)kvmppc_handler_lowmem_trampoline; - vcpu->arch.trampoline_enter = (ulong)kvmppc_handler_trampoline_enter; + vcpu->arch.trampoline_lowmem = __pa(kvmppc_handler_lowmem_trampoline); + vcpu->arch.trampoline_enter = __pa(kvmppc_handler_trampoline_enter); vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; #ifdef CONFIG_PPC_BOOK3S_64 vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; -- cgit v1.2.3 From c4befc58a0cc5a8cc5b4a7234d67b6b16dec4e70 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:17:33 +0000 Subject: KVM: PPC: Move fields between struct kvm_vcpu_arch and kvmppc_vcpu_book3s This moves the slb field, which represents the state of the emulated SLB, from the kvmppc_vcpu_book3s struct to the kvm_vcpu_arch, and the hpte_hash_[v]pte[_long] fields from kvm_vcpu_arch to kvmppc_vcpu_book3s. This is in accord with the principle that the kvm_vcpu_arch struct represents the state of the emulated CPU, and the kvmppc_vcpu_book3s struct holds the auxiliary data structures used in the emulation. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 35 +++++++++-------- arch/powerpc/include/asm/kvm_host.h | 34 ++++++++--------- arch/powerpc/kvm/book3s.c | 9 +++-- arch/powerpc/kvm/book3s_64_mmu.c | 54 ++++++++++++-------------- arch/powerpc/kvm/book3s_mmu_hpte.c | 71 +++++++++++++++++++++-------------- arch/powerpc/kvm/trace.h | 2 +- 6 files changed, 107 insertions(+), 98 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 70c409b1d931..f7b2bafe7047 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -24,20 +24,6 @@ #include #include -struct kvmppc_slb { - u64 esid; - u64 vsid; - u64 orige; - u64 origv; - bool valid : 1; - bool Ks : 1; - bool Kp : 1; - bool nx : 1; - bool large : 1; /* PTEs are 16MB */ - bool tb : 1; /* 1TB segment */ - bool class : 1; -}; - struct kvmppc_bat { u64 raw; u32 bepi; @@ -67,11 +53,22 @@ struct kvmppc_sid_map { #define VSID_POOL_SIZE (SID_CONTEXTS * 16) #endif +struct hpte_cache { + struct hlist_node list_pte; + struct hlist_node list_pte_long; + struct hlist_node list_vpte; + struct hlist_node list_vpte_long; + struct rcu_head rcu_head; + u64 host_va; + u64 pfn; + ulong slot; + struct kvmppc_pte pte; +}; + struct kvmppc_vcpu_book3s { struct kvm_vcpu vcpu; struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; struct kvmppc_sid_map sid_map[SID_MAP_NUM]; - struct kvmppc_slb slb[64]; struct { u64 esid; u64 vsid; @@ -81,7 +78,6 @@ struct kvmppc_vcpu_book3s { struct kvmppc_bat dbat[8]; u64 hid[6]; u64 gqr[8]; - int slb_nr; u64 sdr1; u64 hior; u64 msr_mask; @@ -94,6 +90,13 @@ struct kvmppc_vcpu_book3s { #endif int context_id[SID_CONTEXTS]; ulong prog_flags; /* flags to inject when giving a 700 trap */ + + struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; + struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; + struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; + struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG]; + int hpte_cache_count; + spinlock_t mmu_lock; }; #define CONTEXT_HOST 0 diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 6e05b2d13683..069eb9fc6c41 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -163,16 +163,18 @@ struct kvmppc_mmu { bool (*is_dcbz32)(struct kvm_vcpu *vcpu); }; -struct hpte_cache { - struct hlist_node list_pte; - struct hlist_node list_pte_long; - struct hlist_node list_vpte; - struct hlist_node list_vpte_long; - struct rcu_head rcu_head; - u64 host_va; - u64 pfn; - ulong slot; - struct kvmppc_pte pte; +struct kvmppc_slb { + u64 esid; + u64 vsid; + u64 orige; + u64 origv; + bool valid : 1; + bool Ks : 1; + bool Kp : 1; + bool nx : 1; + bool large : 1; /* PTEs are 16MB */ + bool tb : 1; /* 1TB segment */ + bool class : 1; }; struct kvm_vcpu_arch { @@ -187,6 +189,9 @@ struct kvm_vcpu_arch { ulong highmem_handler; ulong rmcall; ulong host_paca_phys; + struct kvmppc_slb slb[64]; + int slb_max; /* # valid entries in slb[] */ + int slb_nr; /* total number of entries in SLB */ struct kvmppc_mmu mmu; #endif @@ -305,15 +310,6 @@ struct kvm_vcpu_arch { struct kvm_vcpu_arch_shared *shared; unsigned long magic_page_pa; /* phys addr to map the magic page to */ unsigned long magic_page_ea; /* effect. addr to map the magic page to */ - -#ifdef CONFIG_PPC_BOOK3S - struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; - struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; - struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; - struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG]; - int hpte_cache_count; - spinlock_t mmu_lock; -#endif }; #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 83500fb62c83..5d0babefe913 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -17,7 +17,6 @@ #include #include #include -#include "trace.h" #include #include @@ -34,6 +33,8 @@ #include #include +#include "trace.h" + #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU /* #define EXIT_DEBUG */ @@ -1191,8 +1192,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { for (i = 0; i < 64; i++) { - sregs->u.s.ppc64.slb[i].slbe = vcpu3s->slb[i].orige | i; - sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv; + sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige | i; + sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; } } else { for (i = 0; i < 16; i++) @@ -1340,7 +1341,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) vcpu->arch.pvr = 0x84202; #endif kvmppc_set_pvr(vcpu, vcpu->arch.pvr); - vcpu_book3s->slb_nr = 64; + vcpu->arch.slb_nr = 64; /* remember where some real-mode handlers are */ vcpu->arch.trampoline_lowmem = __pa(kvmppc_handler_lowmem_trampoline); diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index d7889ef3211e..c6d3e194b6b4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -41,36 +41,36 @@ static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu) } static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( - struct kvmppc_vcpu_book3s *vcpu_book3s, + struct kvm_vcpu *vcpu, gva_t eaddr) { int i; u64 esid = GET_ESID(eaddr); u64 esid_1t = GET_ESID_1T(eaddr); - for (i = 0; i < vcpu_book3s->slb_nr; i++) { + for (i = 0; i < vcpu->arch.slb_nr; i++) { u64 cmp_esid = esid; - if (!vcpu_book3s->slb[i].valid) + if (!vcpu->arch.slb[i].valid) continue; - if (vcpu_book3s->slb[i].tb) + if (vcpu->arch.slb[i].tb) cmp_esid = esid_1t; - if (vcpu_book3s->slb[i].esid == cmp_esid) - return &vcpu_book3s->slb[i]; + if (vcpu->arch.slb[i].esid == cmp_esid) + return &vcpu->arch.slb[i]; } dprintk("KVM: No SLB entry found for 0x%lx [%llx | %llx]\n", eaddr, esid, esid_1t); - for (i = 0; i < vcpu_book3s->slb_nr; i++) { - if (vcpu_book3s->slb[i].vsid) + for (i = 0; i < vcpu->arch.slb_nr; i++) { + if (vcpu->arch.slb[i].vsid) dprintk(" %d: %c%c%c %llx %llx\n", i, - vcpu_book3s->slb[i].valid ? 'v' : ' ', - vcpu_book3s->slb[i].large ? 'l' : ' ', - vcpu_book3s->slb[i].tb ? 't' : ' ', - vcpu_book3s->slb[i].esid, - vcpu_book3s->slb[i].vsid); + vcpu->arch.slb[i].valid ? 'v' : ' ', + vcpu->arch.slb[i].large ? 'l' : ' ', + vcpu->arch.slb[i].tb ? 't' : ' ', + vcpu->arch.slb[i].esid, + vcpu->arch.slb[i].vsid); } return NULL; @@ -81,7 +81,7 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, { struct kvmppc_slb *slb; - slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), eaddr); + slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr); if (!slb) return 0; @@ -180,7 +180,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, return 0; } - slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu_book3s, eaddr); + slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr); if (!slbe) goto no_seg_found; @@ -320,10 +320,10 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb) esid_1t = GET_ESID_1T(rb); slb_nr = rb & 0xfff; - if (slb_nr > vcpu_book3s->slb_nr) + if (slb_nr > vcpu->arch.slb_nr) return; - slbe = &vcpu_book3s->slb[slb_nr]; + slbe = &vcpu->arch.slb[slb_nr]; slbe->large = (rs & SLB_VSID_L) ? 1 : 0; slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0; @@ -344,38 +344,35 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb) static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr) { - struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); struct kvmppc_slb *slbe; - if (slb_nr > vcpu_book3s->slb_nr) + if (slb_nr > vcpu->arch.slb_nr) return 0; - slbe = &vcpu_book3s->slb[slb_nr]; + slbe = &vcpu->arch.slb[slb_nr]; return slbe->orige; } static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr) { - struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); struct kvmppc_slb *slbe; - if (slb_nr > vcpu_book3s->slb_nr) + if (slb_nr > vcpu->arch.slb_nr) return 0; - slbe = &vcpu_book3s->slb[slb_nr]; + slbe = &vcpu->arch.slb[slb_nr]; return slbe->origv; } static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea) { - struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); struct kvmppc_slb *slbe; dprintk("KVM MMU: slbie(0x%llx)\n", ea); - slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu_book3s, ea); + slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea); if (!slbe) return; @@ -389,13 +386,12 @@ static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea) static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu) { - struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); int i; dprintk("KVM MMU: slbia()\n"); - for (i = 1; i < vcpu_book3s->slb_nr; i++) - vcpu_book3s->slb[i].valid = false; + for (i = 1; i < vcpu->arch.slb_nr; i++) + vcpu->arch.slb[i].valid = false; if (vcpu->arch.shared->msr & MSR_IR) { kvmppc_mmu_flush_segments(vcpu); @@ -464,7 +460,7 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, ulong mp_ea = vcpu->arch.magic_page_ea; if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { - slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea); + slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea); if (slb) gvsid = slb->vsid; } diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index 79751d8dd131..41cb0017e757 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -21,7 +21,6 @@ #include #include #include -#include "trace.h" #include #include @@ -29,6 +28,8 @@ #include #include +#include "trace.h" + #define PTE_SIZE 12 static struct kmem_cache *hpte_cache; @@ -58,30 +59,31 @@ static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage) void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { u64 index; + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); trace_kvm_book3s_mmu_map(pte); - spin_lock(&vcpu->arch.mmu_lock); + spin_lock(&vcpu3s->mmu_lock); /* Add to ePTE list */ index = kvmppc_mmu_hash_pte(pte->pte.eaddr); - hlist_add_head_rcu(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]); + hlist_add_head_rcu(&pte->list_pte, &vcpu3s->hpte_hash_pte[index]); /* Add to ePTE_long list */ index = kvmppc_mmu_hash_pte_long(pte->pte.eaddr); hlist_add_head_rcu(&pte->list_pte_long, - &vcpu->arch.hpte_hash_pte_long[index]); + &vcpu3s->hpte_hash_pte_long[index]); /* Add to vPTE list */ index = kvmppc_mmu_hash_vpte(pte->pte.vpage); - hlist_add_head_rcu(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]); + hlist_add_head_rcu(&pte->list_vpte, &vcpu3s->hpte_hash_vpte[index]); /* Add to vPTE_long list */ index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage); hlist_add_head_rcu(&pte->list_vpte_long, - &vcpu->arch.hpte_hash_vpte_long[index]); + &vcpu3s->hpte_hash_vpte_long[index]); - spin_unlock(&vcpu->arch.mmu_lock); + spin_unlock(&vcpu3s->mmu_lock); } static void free_pte_rcu(struct rcu_head *head) @@ -92,16 +94,18 @@ static void free_pte_rcu(struct rcu_head *head) static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + trace_kvm_book3s_mmu_invalidate(pte); /* Different for 32 and 64 bit */ kvmppc_mmu_invalidate_pte(vcpu, pte); - spin_lock(&vcpu->arch.mmu_lock); + spin_lock(&vcpu3s->mmu_lock); /* pte already invalidated in between? */ if (hlist_unhashed(&pte->list_pte)) { - spin_unlock(&vcpu->arch.mmu_lock); + spin_unlock(&vcpu3s->mmu_lock); return; } @@ -115,14 +119,15 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) else kvm_release_pfn_clean(pte->pfn); - spin_unlock(&vcpu->arch.mmu_lock); + spin_unlock(&vcpu3s->mmu_lock); - vcpu->arch.hpte_cache_count--; + vcpu3s->hpte_cache_count--; call_rcu(&pte->rcu_head, free_pte_rcu); } static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) { + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hpte_cache *pte; struct hlist_node *node; int i; @@ -130,7 +135,7 @@ static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) rcu_read_lock(); for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { - struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; + struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i]; hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) invalidate_pte(vcpu, pte); @@ -141,12 +146,13 @@ static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea) { + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hlist_head *list; struct hlist_node *node; struct hpte_cache *pte; /* Find the list of entries in the map */ - list = &vcpu->arch.hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)]; + list = &vcpu3s->hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)]; rcu_read_lock(); @@ -160,12 +166,13 @@ static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea) static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea) { + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hlist_head *list; struct hlist_node *node; struct hpte_cache *pte; /* Find the list of entries in the map */ - list = &vcpu->arch.hpte_hash_pte_long[ + list = &vcpu3s->hpte_hash_pte_long[ kvmppc_mmu_hash_pte_long(guest_ea)]; rcu_read_lock(); @@ -203,12 +210,13 @@ void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) /* Flush with mask 0xfffffffff */ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp) { + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hlist_head *list; struct hlist_node *node; struct hpte_cache *pte; u64 vp_mask = 0xfffffffffULL; - list = &vcpu->arch.hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)]; + list = &vcpu3s->hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)]; rcu_read_lock(); @@ -223,12 +231,13 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp) /* Flush with mask 0xffffff000 */ static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) { + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hlist_head *list; struct hlist_node *node; struct hpte_cache *pte; u64 vp_mask = 0xffffff000ULL; - list = &vcpu->arch.hpte_hash_vpte_long[ + list = &vcpu3s->hpte_hash_vpte_long[ kvmppc_mmu_hash_vpte_long(guest_vp)]; rcu_read_lock(); @@ -261,6 +270,7 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) { + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hlist_node *node; struct hpte_cache *pte; int i; @@ -270,7 +280,7 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) rcu_read_lock(); for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { - struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; + struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i]; hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) if ((pte->pte.raddr >= pa_start) && @@ -283,12 +293,13 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) { + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hpte_cache *pte; pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL); - vcpu->arch.hpte_cache_count++; + vcpu3s->hpte_cache_count++; - if (vcpu->arch.hpte_cache_count == HPTEG_CACHE_NUM) + if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM) kvmppc_mmu_pte_flush_all(vcpu); return pte; @@ -309,17 +320,19 @@ static void kvmppc_mmu_hpte_init_hash(struct hlist_head *hash_list, int len) int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu) { + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + /* init hpte lookup hashes */ - kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte, - ARRAY_SIZE(vcpu->arch.hpte_hash_pte)); - kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte_long, - ARRAY_SIZE(vcpu->arch.hpte_hash_pte_long)); - kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte, - ARRAY_SIZE(vcpu->arch.hpte_hash_vpte)); - kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long, - ARRAY_SIZE(vcpu->arch.hpte_hash_vpte_long)); - - spin_lock_init(&vcpu->arch.mmu_lock); + kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_pte, + ARRAY_SIZE(vcpu3s->hpte_hash_pte)); + kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_pte_long, + ARRAY_SIZE(vcpu3s->hpte_hash_pte_long)); + kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte, + ARRAY_SIZE(vcpu3s->hpte_hash_vpte)); + kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long, + ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long)); + + spin_lock_init(&vcpu3s->mmu_lock); return 0; } diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index 3aca1b042b8c..d62a14b2cd0f 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -252,7 +252,7 @@ TRACE_EVENT(kvm_book3s_mmu_flush, ), TP_fast_assign( - __entry->count = vcpu->arch.hpte_cache_count; + __entry->count = to_book3s(vcpu)->hpte_cache_count; __entry->p1 = p1; __entry->p2 = p2; __entry->type = type; -- cgit v1.2.3 From f05ed4d56e9cff1c46d2b3049ba0c72e7e29392f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:17:58 +0000 Subject: KVM: PPC: Split out code from book3s.c into book3s_pr.c In preparation for adding code to enable KVM to use hypervisor mode on 64-bit Book 3S processors, this splits book3s.c into two files, book3s.c and book3s_pr.c, where book3s_pr.c contains the code that is specific to running the guest in problem state (user mode) and book3s.c contains code which should apply to all Book 3S processors. In doing this, we abstract some details, namely the interrupt offset, updating the interrupt pending flag, and detecting if the guest is in a critical section. These are all things that will be different when we use hypervisor mode. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 35 ++ arch/powerpc/kvm/Makefile | 2 + arch/powerpc/kvm/book3s.c | 995 +------------------------------- arch/powerpc/kvm/book3s_pr.c | 1009 +++++++++++++++++++++++++++++++++ 4 files changed, 1053 insertions(+), 988 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_pr.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index f7b2bafe7047..480fff6090db 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -113,6 +113,7 @@ extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong ea, ulong ea_mask) extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask); extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end); extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr); +extern void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr); extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); @@ -150,6 +151,20 @@ static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) return container_of(vcpu, struct kvmppc_vcpu_book3s, vcpu); } +static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) +{ + return to_book3s(vcpu)->hior; +} + +static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, + unsigned long pending_now, unsigned long old_pending) +{ + if (pending_now) + vcpu->arch.shared->int_pending = 1; + else if (old_pending) + vcpu->arch.shared->int_pending = 0; +} + static inline ulong dsisr(void) { ulong r; @@ -247,6 +262,26 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) return to_svcpu(vcpu)->fault_dar; } +static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) +{ + ulong crit_raw = vcpu->arch.shared->critical; + ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); + bool crit; + + /* Truncate crit indicators in 32 bit mode */ + if (!(vcpu->arch.shared->msr & MSR_SF)) { + crit_raw &= 0xffffffff; + crit_r1 &= 0xffffffff; + } + + /* Critical section when crit == r1 */ + crit = (crit_raw == crit_r1); + /* ... and we're in supervisor mode */ + crit = crit && !(vcpu->arch.shared->msr & MSR_PR); + + return crit; +} + /* Magic register values loaded into r3 and r4 before the 'sc' assembly * instruction for the OSI hypercalls */ #define OSI_SC_MAGIC_R3 0x113724FA diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 4d6863823f69..bf9854fa7f63 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -43,6 +43,7 @@ kvm-book3s_64-objs := \ fpu.o \ book3s_paired_singles.o \ book3s.o \ + book3s_pr.o \ book3s_emulate.o \ book3s_interrupts.o \ book3s_mmu_hpte.o \ @@ -56,6 +57,7 @@ kvm-book3s_32-objs := \ fpu.o \ book3s_paired_singles.o \ book3s.o \ + book3s_pr.o \ book3s_emulate.o \ book3s_interrupts.o \ book3s_mmu_hpte.o \ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 5d0babefe913..163e3e13c87c 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -38,17 +38,6 @@ #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU /* #define EXIT_DEBUG */ -/* #define DEBUG_EXT */ - -static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, - ulong msr); - -/* Some compatibility defines */ -#ifdef CONFIG_PPC_BOOK3S_32 -#define MSR_USER32 MSR_USER -#define MSR_USER64 MSR_USER -#define HW_PAGE_SIZE PAGE_SIZE -#endif struct kvm_stats_debugfs_item debugfs_entries[] = { { "exits", VCPU_STAT(sum_exits) }, @@ -79,100 +68,11 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) { } -void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) -{ -#ifdef CONFIG_PPC_BOOK3S_64 - memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb)); - memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, - sizeof(get_paca()->shadow_vcpu)); - to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max; -#endif - -#ifdef CONFIG_PPC_BOOK3S_32 - current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; -#endif -} - -void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_PPC_BOOK3S_64 - memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb)); - memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, - sizeof(get_paca()->shadow_vcpu)); - to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max; -#endif - - kvmppc_giveup_ext(vcpu, MSR_FP); - kvmppc_giveup_ext(vcpu, MSR_VEC); - kvmppc_giveup_ext(vcpu, MSR_VSX); -} - -static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) -{ - ulong smsr = vcpu->arch.shared->msr; - - /* Guest MSR values */ - smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE; - /* Process MSR values */ - smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; - /* External providers the guest reserved */ - smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext); - /* 64-bit Process MSR values */ -#ifdef CONFIG_PPC_BOOK3S_64 - smsr |= MSR_ISF | MSR_HV; -#endif - vcpu->arch.shadow_msr = smsr; -} - -void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) -{ - ulong old_msr = vcpu->arch.shared->msr; - -#ifdef EXIT_DEBUG - printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); -#endif - - msr &= to_book3s(vcpu)->msr_mask; - vcpu->arch.shared->msr = msr; - kvmppc_recalc_shadow_msr(vcpu); - - if (msr & MSR_POW) { - if (!vcpu->arch.pending_exceptions) { - kvm_vcpu_block(vcpu); - vcpu->stat.halt_wakeup++; - - /* Unset POW bit after we woke up */ - msr &= ~MSR_POW; - vcpu->arch.shared->msr = msr; - } - } - - if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) != - (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { - kvmppc_mmu_flush_segments(vcpu); - kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); - - /* Preload magic page segment when in kernel mode */ - if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) { - struct kvm_vcpu_arch *a = &vcpu->arch; - - if (msr & MSR_DR) - kvmppc_mmu_map_segment(vcpu, a->magic_page_ea); - else - kvmppc_mmu_map_segment(vcpu, a->magic_page_pa); - } - } - - /* Preload FPU if it's enabled */ - if (vcpu->arch.shared->msr & MSR_FP) - kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); -} - void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu); vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags; - kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec); + kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec); vcpu->arch.mmu.reset_msr(vcpu); } @@ -206,11 +106,13 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec) static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) { + unsigned long old_pending = vcpu->arch.pending_exceptions; + clear_bit(kvmppc_book3s_vec2irqprio(vec), &vcpu->arch.pending_exceptions); - if (!vcpu->arch.pending_exceptions) - vcpu->arch.shared->int_pending = 0; + kvmppc_update_int_pending(vcpu, vcpu->arch.pending_exceptions, + old_pending); } void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) @@ -269,20 +171,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) int deliver = 1; int vec = 0; ulong flags = 0ULL; - ulong crit_raw = vcpu->arch.shared->critical; - ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); - bool crit; - - /* Truncate crit indicators in 32 bit mode */ - if (!(vcpu->arch.shared->msr & MSR_SF)) { - crit_raw &= 0xffffffff; - crit_r1 &= 0xffffffff; - } - - /* Critical section when crit == r1 */ - crit = (crit_raw == crit_r1); - /* ... and we're in supervisor mode */ - crit = crit && !(vcpu->arch.shared->msr & MSR_PR); + bool crit = kvmppc_critical_section(vcpu); switch (priority) { case BOOK3S_IRQPRIO_DECREMENTER: @@ -394,64 +283,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) } /* Tell the guest about our interrupt status */ - if (*pending) - vcpu->arch.shared->int_pending = 1; - else if (old_pending) - vcpu->arch.shared->int_pending = 0; -} - -void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) -{ - u32 host_pvr; - - vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB; - vcpu->arch.pvr = pvr; -#ifdef CONFIG_PPC_BOOK3S_64 - if ((pvr >= 0x330000) && (pvr < 0x70330000)) { - kvmppc_mmu_book3s_64_init(vcpu); - to_book3s(vcpu)->hior = 0xfff00000; - to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; - } else -#endif - { - kvmppc_mmu_book3s_32_init(vcpu); - to_book3s(vcpu)->hior = 0; - to_book3s(vcpu)->msr_mask = 0xffffffffULL; - } - - /* If we are in hypervisor level on 970, we can tell the CPU to - * treat DCBZ as 32 bytes store */ - vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32; - if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) && - !strcmp(cur_cpu_spec->platform, "ppc970")) - vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; - - /* Cell performs badly if MSR_FEx are set. So let's hope nobody - really needs them in a VM on Cell and force disable them. */ - if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) - to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); - -#ifdef CONFIG_PPC_BOOK3S_32 - /* 32 bit Book3S always has 32 byte dcbz */ - vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; -#endif - - /* On some CPUs we can execute paired single operations natively */ - asm ( "mfpvr %0" : "=r"(host_pvr)); - switch (host_pvr) { - case 0x00080200: /* lonestar 2.0 */ - case 0x00088202: /* lonestar 2.2 */ - case 0x70000100: /* gekko 1.0 */ - case 0x00080100: /* gekko 2.0 */ - case 0x00083203: /* gekko 2.3a */ - case 0x00083213: /* gekko 2.3b */ - case 0x00083204: /* gekko 2.4 */ - case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ - case 0x00087200: /* broadway */ - vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS; - /* Enable HID2.PSE - in case we need it later */ - mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29)); - } + kvmppc_update_int_pending(vcpu, *pending, old_pending); } pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) @@ -473,44 +305,6 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) return gfn_to_pfn(vcpu->kvm, gfn); } -/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To - * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to - * emulate 32 bytes dcbz length. - * - * The Book3s_64 inventors also realized this case and implemented a special bit - * in the HID5 register, which is a hypervisor ressource. Thus we can't use it. - * - * My approach here is to patch the dcbz instruction on executing pages. - */ -static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) -{ - struct page *hpage; - u64 hpage_offset; - u32 *page; - int i; - - hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); - if (is_error_page(hpage)) { - kvm_release_page_clean(hpage); - return; - } - - hpage_offset = pte->raddr & ~PAGE_MASK; - hpage_offset &= ~0xFFFULL; - hpage_offset /= 4; - - get_page(hpage); - page = kmap_atomic(hpage, KM_USER0); - - /* patch dcbz into reserved instruction, so we trap */ - for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) - if ((page[i] & 0xff0007ff) == INS_DCBZ) - page[i] &= 0xfffffff7; - - kunmap_atomic(page, KM_USER0); - put_page(hpage); -} - static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, struct kvmppc_pte *pte) { @@ -608,519 +402,6 @@ mmio: return EMULATE_DO_MMIO; } -static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) -{ - ulong mp_pa = vcpu->arch.magic_page_pa; - - if (unlikely(mp_pa) && - unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) { - return 1; - } - - return kvm_is_visible_gfn(vcpu->kvm, gfn); -} - -int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, - ulong eaddr, int vec) -{ - bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE); - int r = RESUME_GUEST; - int relocated; - int page_found = 0; - struct kvmppc_pte pte; - bool is_mmio = false; - bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false; - bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false; - u64 vsid; - - relocated = data ? dr : ir; - - /* Resolve real address if translation turned on */ - if (relocated) { - page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data); - } else { - pte.may_execute = true; - pte.may_read = true; - pte.may_write = true; - pte.raddr = eaddr & KVM_PAM; - pte.eaddr = eaddr; - pte.vpage = eaddr >> 12; - } - - switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { - case 0: - pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); - break; - case MSR_DR: - case MSR_IR: - vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); - - if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR) - pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12)); - else - pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12)); - pte.vpage |= vsid; - - if (vsid == -1) - page_found = -EINVAL; - break; - } - - if (vcpu->arch.mmu.is_dcbz32(vcpu) && - (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { - /* - * If we do the dcbz hack, we have to NX on every execution, - * so we can patch the executing code. This renders our guest - * NX-less. - */ - pte.may_execute = !data; - } - - if (page_found == -ENOENT) { - /* Page not found in guest PTE entries */ - vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); - vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr; - vcpu->arch.shared->msr |= - (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); - kvmppc_book3s_queue_irqprio(vcpu, vec); - } else if (page_found == -EPERM) { - /* Storage protection */ - vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); - vcpu->arch.shared->dsisr = - to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE; - vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; - vcpu->arch.shared->msr |= - (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); - kvmppc_book3s_queue_irqprio(vcpu, vec); - } else if (page_found == -EINVAL) { - /* Page not found in guest SLB */ - vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); - kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); - } else if (!is_mmio && - kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { - /* The guest's PTE is not mapped yet. Map on the host */ - kvmppc_mmu_map_page(vcpu, &pte); - if (data) - vcpu->stat.sp_storage++; - else if (vcpu->arch.mmu.is_dcbz32(vcpu) && - (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) - kvmppc_patch_dcbz(vcpu, &pte); - } else { - /* MMIO */ - vcpu->stat.mmio_exits++; - vcpu->arch.paddr_accessed = pte.raddr; - r = kvmppc_emulate_mmio(run, vcpu); - if ( r == RESUME_HOST_NV ) - r = RESUME_HOST; - } - - return r; -} - -static inline int get_fpr_index(int i) -{ -#ifdef CONFIG_VSX - i *= 2; -#endif - return i; -} - -/* Give up external provider (FPU, Altivec, VSX) */ -void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) -{ - struct thread_struct *t = ¤t->thread; - u64 *vcpu_fpr = vcpu->arch.fpr; -#ifdef CONFIG_VSX - u64 *vcpu_vsx = vcpu->arch.vsr; -#endif - u64 *thread_fpr = (u64*)t->fpr; - int i; - - if (!(vcpu->arch.guest_owned_ext & msr)) - return; - -#ifdef DEBUG_EXT - printk(KERN_INFO "Giving up ext 0x%lx\n", msr); -#endif - - switch (msr) { - case MSR_FP: - giveup_fpu(current); - for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) - vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; - - vcpu->arch.fpscr = t->fpscr.val; - break; - case MSR_VEC: -#ifdef CONFIG_ALTIVEC - giveup_altivec(current); - memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); - vcpu->arch.vscr = t->vscr; -#endif - break; - case MSR_VSX: -#ifdef CONFIG_VSX - __giveup_vsx(current); - for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) - vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1]; -#endif - break; - default: - BUG(); - } - - vcpu->arch.guest_owned_ext &= ~msr; - current->thread.regs->msr &= ~msr; - kvmppc_recalc_shadow_msr(vcpu); -} - -static int kvmppc_read_inst(struct kvm_vcpu *vcpu) -{ - ulong srr0 = kvmppc_get_pc(vcpu); - u32 last_inst = kvmppc_get_last_inst(vcpu); - int ret; - - ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); - if (ret == -ENOENT) { - ulong msr = vcpu->arch.shared->msr; - - msr = kvmppc_set_field(msr, 33, 33, 1); - msr = kvmppc_set_field(msr, 34, 36, 0); - vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0); - kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); - return EMULATE_AGAIN; - } - - return EMULATE_DONE; -} - -static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) -{ - - /* Need to do paired single emulation? */ - if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) - return EMULATE_DONE; - - /* Read out the instruction */ - if (kvmppc_read_inst(vcpu) == EMULATE_DONE) - /* Need to emulate */ - return EMULATE_FAIL; - - return EMULATE_AGAIN; -} - -/* Handle external providers (FPU, Altivec, VSX) */ -static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, - ulong msr) -{ - struct thread_struct *t = ¤t->thread; - u64 *vcpu_fpr = vcpu->arch.fpr; -#ifdef CONFIG_VSX - u64 *vcpu_vsx = vcpu->arch.vsr; -#endif - u64 *thread_fpr = (u64*)t->fpr; - int i; - - /* When we have paired singles, we emulate in software */ - if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) - return RESUME_GUEST; - - if (!(vcpu->arch.shared->msr & msr)) { - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - return RESUME_GUEST; - } - - /* We already own the ext */ - if (vcpu->arch.guest_owned_ext & msr) { - return RESUME_GUEST; - } - -#ifdef DEBUG_EXT - printk(KERN_INFO "Loading up ext 0x%lx\n", msr); -#endif - - current->thread.regs->msr |= msr; - - switch (msr) { - case MSR_FP: - for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) - thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; - - t->fpscr.val = vcpu->arch.fpscr; - t->fpexc_mode = 0; - kvmppc_load_up_fpu(); - break; - case MSR_VEC: -#ifdef CONFIG_ALTIVEC - memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); - t->vscr = vcpu->arch.vscr; - t->vrsave = -1; - kvmppc_load_up_altivec(); -#endif - break; - case MSR_VSX: -#ifdef CONFIG_VSX - for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) - thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i]; - kvmppc_load_up_vsx(); -#endif - break; - default: - BUG(); - } - - vcpu->arch.guest_owned_ext |= msr; - - kvmppc_recalc_shadow_msr(vcpu); - - return RESUME_GUEST; -} - -int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int exit_nr) -{ - int r = RESUME_HOST; - - vcpu->stat.sum_exits++; - - run->exit_reason = KVM_EXIT_UNKNOWN; - run->ready_for_interrupt_injection = 1; - - trace_kvm_book3s_exit(exit_nr, vcpu); - kvm_resched(vcpu); - switch (exit_nr) { - case BOOK3S_INTERRUPT_INST_STORAGE: - vcpu->stat.pf_instruc++; - -#ifdef CONFIG_PPC_BOOK3S_32 - /* We set segments as unused segments when invalidating them. So - * treat the respective fault as segment fault. */ - if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] - == SR_INVALID) { - kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); - r = RESUME_GUEST; - break; - } -#endif - - /* only care about PTEG not found errors, but leave NX alone */ - if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) { - r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); - vcpu->stat.sp_instruc++; - } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && - (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { - /* - * XXX If we do the dcbz hack we use the NX bit to flush&patch the page, - * so we can't use the NX bit inside the guest. Let's cross our fingers, - * that no guest that needs the dcbz hack does NX. - */ - kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); - r = RESUME_GUEST; - } else { - vcpu->arch.shared->msr |= - to_svcpu(vcpu)->shadow_srr1 & 0x58000000; - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - r = RESUME_GUEST; - } - break; - case BOOK3S_INTERRUPT_DATA_STORAGE: - { - ulong dar = kvmppc_get_fault_dar(vcpu); - vcpu->stat.pf_storage++; - -#ifdef CONFIG_PPC_BOOK3S_32 - /* We set segments as unused segments when invalidating them. So - * treat the respective fault as segment fault. */ - if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) { - kvmppc_mmu_map_segment(vcpu, dar); - r = RESUME_GUEST; - break; - } -#endif - - /* The only case we need to handle is missing shadow PTEs */ - if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) { - r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); - } else { - vcpu->arch.shared->dar = dar; - vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr; - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - r = RESUME_GUEST; - } - break; - } - case BOOK3S_INTERRUPT_DATA_SEGMENT: - if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) { - vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); - kvmppc_book3s_queue_irqprio(vcpu, - BOOK3S_INTERRUPT_DATA_SEGMENT); - } - r = RESUME_GUEST; - break; - case BOOK3S_INTERRUPT_INST_SEGMENT: - if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) { - kvmppc_book3s_queue_irqprio(vcpu, - BOOK3S_INTERRUPT_INST_SEGMENT); - } - r = RESUME_GUEST; - break; - /* We're good on these - the host merely wanted to get our attention */ - case BOOK3S_INTERRUPT_DECREMENTER: - vcpu->stat.dec_exits++; - r = RESUME_GUEST; - break; - case BOOK3S_INTERRUPT_EXTERNAL: - vcpu->stat.ext_intr_exits++; - r = RESUME_GUEST; - break; - case BOOK3S_INTERRUPT_PERFMON: - r = RESUME_GUEST; - break; - case BOOK3S_INTERRUPT_PROGRAM: - { - enum emulation_result er; - ulong flags; - -program_interrupt: - flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull; - - if (vcpu->arch.shared->msr & MSR_PR) { -#ifdef EXIT_DEBUG - printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); -#endif - if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) != - (INS_DCBZ & 0xfffffff7)) { - kvmppc_core_queue_program(vcpu, flags); - r = RESUME_GUEST; - break; - } - } - - vcpu->stat.emulated_inst_exits++; - er = kvmppc_emulate_instruction(run, vcpu); - switch (er) { - case EMULATE_DONE: - r = RESUME_GUEST_NV; - break; - case EMULATE_AGAIN: - r = RESUME_GUEST; - break; - case EMULATE_FAIL: - printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", - __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); - kvmppc_core_queue_program(vcpu, flags); - r = RESUME_GUEST; - break; - case EMULATE_DO_MMIO: - run->exit_reason = KVM_EXIT_MMIO; - r = RESUME_HOST_NV; - break; - default: - BUG(); - } - break; - } - case BOOK3S_INTERRUPT_SYSCALL: - if (vcpu->arch.osi_enabled && - (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && - (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { - /* MOL hypercalls */ - u64 *gprs = run->osi.gprs; - int i; - - run->exit_reason = KVM_EXIT_OSI; - for (i = 0; i < 32; i++) - gprs[i] = kvmppc_get_gpr(vcpu, i); - vcpu->arch.osi_needed = 1; - r = RESUME_HOST_NV; - } else if (!(vcpu->arch.shared->msr & MSR_PR) && - (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { - /* KVM PV hypercalls */ - kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu)); - r = RESUME_GUEST; - } else { - /* Guest syscalls */ - vcpu->stat.syscall_exits++; - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - r = RESUME_GUEST; - } - break; - case BOOK3S_INTERRUPT_FP_UNAVAIL: - case BOOK3S_INTERRUPT_ALTIVEC: - case BOOK3S_INTERRUPT_VSX: - { - int ext_msr = 0; - - switch (exit_nr) { - case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; - case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; - case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; - } - - switch (kvmppc_check_ext(vcpu, exit_nr)) { - case EMULATE_DONE: - /* everything ok - let's enable the ext */ - r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); - break; - case EMULATE_FAIL: - /* we need to emulate this instruction */ - goto program_interrupt; - break; - default: - /* nothing to worry about - go again */ - break; - } - break; - } - case BOOK3S_INTERRUPT_ALIGNMENT: - if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { - vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu, - kvmppc_get_last_inst(vcpu)); - vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu, - kvmppc_get_last_inst(vcpu)); - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - } - r = RESUME_GUEST; - break; - case BOOK3S_INTERRUPT_MACHINE_CHECK: - case BOOK3S_INTERRUPT_TRACE: - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - r = RESUME_GUEST; - break; - default: - /* Ugh - bork here! What did we get? */ - printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", - exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1); - r = RESUME_HOST; - BUG(); - break; - } - - - if (!(r & RESUME_HOST)) { - /* To avoid clobbering exit_reason, only check for signals if - * we aren't already exiting to userspace for some other - * reason. */ - if (signal_pending(current)) { -#ifdef EXIT_DEBUG - printk(KERN_EMERG "KVM: Going back to host\n"); -#endif - vcpu->stat.signal_exits++; - run->exit_reason = KVM_EXIT_INTR; - r = -EINTR; - } else { - /* In case an interrupt came in that was triggered - * from userspace (like DEC), we need to check what - * to inject now! */ - kvmppc_core_deliver_interrupts(vcpu); - } - } - - trace_kvm_book3s_reenter(r, vcpu); - - return r; -} - int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { return 0; @@ -1181,69 +462,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } -int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) -{ - struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); - int i; - - sregs->pvr = vcpu->arch.pvr; - - sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; - if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { - for (i = 0; i < 64; i++) { - sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige | i; - sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; - } - } else { - for (i = 0; i < 16; i++) - sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i]; - - for (i = 0; i < 8; i++) { - sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw; - sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; - } - } - - return 0; -} - -int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) -{ - struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); - int i; - - kvmppc_set_pvr(vcpu, sregs->pvr); - - vcpu3s->sdr1 = sregs->u.s.sdr1; - if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { - for (i = 0; i < 64; i++) { - vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv, - sregs->u.s.ppc64.slb[i].slbe); - } - } else { - for (i = 0; i < 16; i++) { - vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]); - } - for (i = 0; i < 8; i++) { - kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false, - (u32)sregs->u.s.ppc32.ibat[i]); - kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true, - (u32)(sregs->u.s.ppc32.ibat[i] >> 32)); - kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false, - (u32)sregs->u.s.ppc32.dbat[i]); - kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true, - (u32)(sregs->u.s.ppc32.dbat[i] >> 32)); - } - } - - /* Flush the MMU after messing with the segments */ - kvmppc_mmu_pte_flush(vcpu, 0, 0); - - return 0; -} - int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -ENOTSUPP; @@ -1298,202 +516,3 @@ out: mutex_unlock(&kvm->slots_lock); return r; } - -int kvmppc_core_check_processor_compat(void) -{ - return 0; -} - -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) -{ - struct kvmppc_vcpu_book3s *vcpu_book3s; - struct kvm_vcpu *vcpu; - int err = -ENOMEM; - unsigned long p; - - vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); - if (!vcpu_book3s) - goto out; - - vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *) - kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL); - if (!vcpu_book3s->shadow_vcpu) - goto free_vcpu; - - vcpu = &vcpu_book3s->vcpu; - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_shadow_vcpu; - - p = __get_free_page(GFP_KERNEL|__GFP_ZERO); - /* the real shared page fills the last 4k of our page */ - vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096); - if (!p) - goto uninit_vcpu; - - vcpu->arch.host_retip = kvm_return_point; - vcpu->arch.host_msr = mfmsr(); -#ifdef CONFIG_PPC_BOOK3S_64 - /* default to book3s_64 (970fx) */ - vcpu->arch.pvr = 0x3C0301; -#else - /* default to book3s_32 (750) */ - vcpu->arch.pvr = 0x84202; -#endif - kvmppc_set_pvr(vcpu, vcpu->arch.pvr); - vcpu->arch.slb_nr = 64; - - /* remember where some real-mode handlers are */ - vcpu->arch.trampoline_lowmem = __pa(kvmppc_handler_lowmem_trampoline); - vcpu->arch.trampoline_enter = __pa(kvmppc_handler_trampoline_enter); - vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; -#ifdef CONFIG_PPC_BOOK3S_64 - vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; -#else - vcpu->arch.rmcall = (ulong)kvmppc_rmcall; -#endif - - vcpu->arch.shadow_msr = MSR_USER64; - - err = kvmppc_mmu_init(vcpu); - if (err < 0) - goto uninit_vcpu; - - return vcpu; - -uninit_vcpu: - kvm_vcpu_uninit(vcpu); -free_shadow_vcpu: - kfree(vcpu_book3s->shadow_vcpu); -free_vcpu: - vfree(vcpu_book3s); -out: - return ERR_PTR(err); -} - -void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); - - free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); - kvm_vcpu_uninit(vcpu); - kfree(vcpu_book3s->shadow_vcpu); - vfree(vcpu_book3s); -} - -extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); -int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) -{ - int ret; - double fpr[32][TS_FPRWIDTH]; - unsigned int fpscr; - int fpexc_mode; -#ifdef CONFIG_ALTIVEC - vector128 vr[32]; - vector128 vscr; - unsigned long uninitialized_var(vrsave); - int used_vr; -#endif -#ifdef CONFIG_VSX - int used_vsr; -#endif - ulong ext_msr; - - /* No need to go into the guest when all we do is going out */ - if (signal_pending(current)) { - kvm_run->exit_reason = KVM_EXIT_INTR; - return -EINTR; - } - - /* Save FPU state in stack */ - if (current->thread.regs->msr & MSR_FP) - giveup_fpu(current); - memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr)); - fpscr = current->thread.fpscr.val; - fpexc_mode = current->thread.fpexc_mode; - -#ifdef CONFIG_ALTIVEC - /* Save Altivec state in stack */ - used_vr = current->thread.used_vr; - if (used_vr) { - if (current->thread.regs->msr & MSR_VEC) - giveup_altivec(current); - memcpy(vr, current->thread.vr, sizeof(current->thread.vr)); - vscr = current->thread.vscr; - vrsave = current->thread.vrsave; - } -#endif - -#ifdef CONFIG_VSX - /* Save VSX state in stack */ - used_vsr = current->thread.used_vsr; - if (used_vsr && (current->thread.regs->msr & MSR_VSX)) - __giveup_vsx(current); -#endif - - /* Remember the MSR with disabled extensions */ - ext_msr = current->thread.regs->msr; - - /* XXX we get called with irq disabled - change that! */ - local_irq_enable(); - - /* Preload FPU if it's enabled */ - if (vcpu->arch.shared->msr & MSR_FP) - kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); - - ret = __kvmppc_vcpu_entry(kvm_run, vcpu); - - local_irq_disable(); - - current->thread.regs->msr = ext_msr; - - /* Make sure we save the guest FPU/Altivec/VSX state */ - kvmppc_giveup_ext(vcpu, MSR_FP); - kvmppc_giveup_ext(vcpu, MSR_VEC); - kvmppc_giveup_ext(vcpu, MSR_VSX); - - /* Restore FPU state from stack */ - memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr)); - current->thread.fpscr.val = fpscr; - current->thread.fpexc_mode = fpexc_mode; - -#ifdef CONFIG_ALTIVEC - /* Restore Altivec state from stack */ - if (used_vr && current->thread.used_vr) { - memcpy(current->thread.vr, vr, sizeof(current->thread.vr)); - current->thread.vscr = vscr; - current->thread.vrsave = vrsave; - } - current->thread.used_vr = used_vr; -#endif - -#ifdef CONFIG_VSX - current->thread.used_vsr = used_vsr; -#endif - - return ret; -} - -static int kvmppc_book3s_init(void) -{ - int r; - - r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, - THIS_MODULE); - - if (r) - return r; - - r = kvmppc_mmu_hpte_sysinit(); - - return r; -} - -static void kvmppc_book3s_exit(void) -{ - kvmppc_mmu_hpte_sysexit(); - kvm_exit(); -} - -module_init(kvmppc_book3s_init); -module_exit(kvmppc_book3s_exit); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c new file mode 100644 index 000000000000..fcdc97eb9411 --- /dev/null +++ b/arch/powerpc/kvm/book3s_pr.c @@ -0,0 +1,1009 @@ +/* + * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. + * + * Authors: + * Alexander Graf + * Kevin Wolf + * Paul Mackerras + * + * Description: + * Functions relating to running KVM on Book 3S processors where + * we don't have access to hypervisor mode, and we run the guest + * in problem state (user mode). + * + * This file is derived from arch/powerpc/kvm/44x.c, + * by Hollis Blanchard . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "trace.h" + +/* #define EXIT_DEBUG */ +/* #define DEBUG_EXT */ + +static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, + ulong msr); + +/* Some compatibility defines */ +#ifdef CONFIG_PPC_BOOK3S_32 +#define MSR_USER32 MSR_USER +#define MSR_USER64 MSR_USER +#define HW_PAGE_SIZE PAGE_SIZE +#endif + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb)); + memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, + sizeof(get_paca()->shadow_vcpu)); + to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max; +#endif + +#ifdef CONFIG_PPC_BOOK3S_32 + current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; +#endif +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb)); + memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, + sizeof(get_paca()->shadow_vcpu)); + to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max; +#endif + + kvmppc_giveup_ext(vcpu, MSR_FP); + kvmppc_giveup_ext(vcpu, MSR_VEC); + kvmppc_giveup_ext(vcpu, MSR_VSX); +} + +static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) +{ + ulong smsr = vcpu->arch.shared->msr; + + /* Guest MSR values */ + smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE; + /* Process MSR values */ + smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; + /* External providers the guest reserved */ + smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext); + /* 64-bit Process MSR values */ +#ifdef CONFIG_PPC_BOOK3S_64 + smsr |= MSR_ISF | MSR_HV; +#endif + vcpu->arch.shadow_msr = smsr; +} + +void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) +{ + ulong old_msr = vcpu->arch.shared->msr; + +#ifdef EXIT_DEBUG + printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); +#endif + + msr &= to_book3s(vcpu)->msr_mask; + vcpu->arch.shared->msr = msr; + kvmppc_recalc_shadow_msr(vcpu); + + if (msr & MSR_POW) { + if (!vcpu->arch.pending_exceptions) { + kvm_vcpu_block(vcpu); + vcpu->stat.halt_wakeup++; + + /* Unset POW bit after we woke up */ + msr &= ~MSR_POW; + vcpu->arch.shared->msr = msr; + } + } + + if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) != + (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { + kvmppc_mmu_flush_segments(vcpu); + kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); + + /* Preload magic page segment when in kernel mode */ + if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) { + struct kvm_vcpu_arch *a = &vcpu->arch; + + if (msr & MSR_DR) + kvmppc_mmu_map_segment(vcpu, a->magic_page_ea); + else + kvmppc_mmu_map_segment(vcpu, a->magic_page_pa); + } + } + + /* Preload FPU if it's enabled */ + if (vcpu->arch.shared->msr & MSR_FP) + kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); +} + +void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) +{ + u32 host_pvr; + + vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB; + vcpu->arch.pvr = pvr; +#ifdef CONFIG_PPC_BOOK3S_64 + if ((pvr >= 0x330000) && (pvr < 0x70330000)) { + kvmppc_mmu_book3s_64_init(vcpu); + to_book3s(vcpu)->hior = 0xfff00000; + to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; + } else +#endif + { + kvmppc_mmu_book3s_32_init(vcpu); + to_book3s(vcpu)->hior = 0; + to_book3s(vcpu)->msr_mask = 0xffffffffULL; + } + + /* If we are in hypervisor level on 970, we can tell the CPU to + * treat DCBZ as 32 bytes store */ + vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32; + if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) && + !strcmp(cur_cpu_spec->platform, "ppc970")) + vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; + + /* Cell performs badly if MSR_FEx are set. So let's hope nobody + really needs them in a VM on Cell and force disable them. */ + if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) + to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); + +#ifdef CONFIG_PPC_BOOK3S_32 + /* 32 bit Book3S always has 32 byte dcbz */ + vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; +#endif + + /* On some CPUs we can execute paired single operations natively */ + asm ( "mfpvr %0" : "=r"(host_pvr)); + switch (host_pvr) { + case 0x00080200: /* lonestar 2.0 */ + case 0x00088202: /* lonestar 2.2 */ + case 0x70000100: /* gekko 1.0 */ + case 0x00080100: /* gekko 2.0 */ + case 0x00083203: /* gekko 2.3a */ + case 0x00083213: /* gekko 2.3b */ + case 0x00083204: /* gekko 2.4 */ + case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ + case 0x00087200: /* broadway */ + vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS; + /* Enable HID2.PSE - in case we need it later */ + mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29)); + } +} + +/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To + * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to + * emulate 32 bytes dcbz length. + * + * The Book3s_64 inventors also realized this case and implemented a special bit + * in the HID5 register, which is a hypervisor ressource. Thus we can't use it. + * + * My approach here is to patch the dcbz instruction on executing pages. + */ +static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) +{ + struct page *hpage; + u64 hpage_offset; + u32 *page; + int i; + + hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); + if (is_error_page(hpage)) { + kvm_release_page_clean(hpage); + return; + } + + hpage_offset = pte->raddr & ~PAGE_MASK; + hpage_offset &= ~0xFFFULL; + hpage_offset /= 4; + + get_page(hpage); + page = kmap_atomic(hpage, KM_USER0); + + /* patch dcbz into reserved instruction, so we trap */ + for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) + if ((page[i] & 0xff0007ff) == INS_DCBZ) + page[i] &= 0xfffffff7; + + kunmap_atomic(page, KM_USER0); + put_page(hpage); +} + +static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + ulong mp_pa = vcpu->arch.magic_page_pa; + + if (unlikely(mp_pa) && + unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) { + return 1; + } + + return kvm_is_visible_gfn(vcpu->kvm, gfn); +} + +int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, + ulong eaddr, int vec) +{ + bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE); + int r = RESUME_GUEST; + int relocated; + int page_found = 0; + struct kvmppc_pte pte; + bool is_mmio = false; + bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false; + bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false; + u64 vsid; + + relocated = data ? dr : ir; + + /* Resolve real address if translation turned on */ + if (relocated) { + page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data); + } else { + pte.may_execute = true; + pte.may_read = true; + pte.may_write = true; + pte.raddr = eaddr & KVM_PAM; + pte.eaddr = eaddr; + pte.vpage = eaddr >> 12; + } + + switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { + case 0: + pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); + break; + case MSR_DR: + case MSR_IR: + vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); + + if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR) + pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12)); + else + pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12)); + pte.vpage |= vsid; + + if (vsid == -1) + page_found = -EINVAL; + break; + } + + if (vcpu->arch.mmu.is_dcbz32(vcpu) && + (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { + /* + * If we do the dcbz hack, we have to NX on every execution, + * so we can patch the executing code. This renders our guest + * NX-less. + */ + pte.may_execute = !data; + } + + if (page_found == -ENOENT) { + /* Page not found in guest PTE entries */ + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); + vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr; + vcpu->arch.shared->msr |= + (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); + kvmppc_book3s_queue_irqprio(vcpu, vec); + } else if (page_found == -EPERM) { + /* Storage protection */ + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); + vcpu->arch.shared->dsisr = + to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE; + vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; + vcpu->arch.shared->msr |= + (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); + kvmppc_book3s_queue_irqprio(vcpu, vec); + } else if (page_found == -EINVAL) { + /* Page not found in guest SLB */ + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); + kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); + } else if (!is_mmio && + kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { + /* The guest's PTE is not mapped yet. Map on the host */ + kvmppc_mmu_map_page(vcpu, &pte); + if (data) + vcpu->stat.sp_storage++; + else if (vcpu->arch.mmu.is_dcbz32(vcpu) && + (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) + kvmppc_patch_dcbz(vcpu, &pte); + } else { + /* MMIO */ + vcpu->stat.mmio_exits++; + vcpu->arch.paddr_accessed = pte.raddr; + r = kvmppc_emulate_mmio(run, vcpu); + if ( r == RESUME_HOST_NV ) + r = RESUME_HOST; + } + + return r; +} + +static inline int get_fpr_index(int i) +{ +#ifdef CONFIG_VSX + i *= 2; +#endif + return i; +} + +/* Give up external provider (FPU, Altivec, VSX) */ +void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) +{ + struct thread_struct *t = ¤t->thread; + u64 *vcpu_fpr = vcpu->arch.fpr; +#ifdef CONFIG_VSX + u64 *vcpu_vsx = vcpu->arch.vsr; +#endif + u64 *thread_fpr = (u64*)t->fpr; + int i; + + if (!(vcpu->arch.guest_owned_ext & msr)) + return; + +#ifdef DEBUG_EXT + printk(KERN_INFO "Giving up ext 0x%lx\n", msr); +#endif + + switch (msr) { + case MSR_FP: + giveup_fpu(current); + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) + vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; + + vcpu->arch.fpscr = t->fpscr.val; + break; + case MSR_VEC: +#ifdef CONFIG_ALTIVEC + giveup_altivec(current); + memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); + vcpu->arch.vscr = t->vscr; +#endif + break; + case MSR_VSX: +#ifdef CONFIG_VSX + __giveup_vsx(current); + for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) + vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1]; +#endif + break; + default: + BUG(); + } + + vcpu->arch.guest_owned_ext &= ~msr; + current->thread.regs->msr &= ~msr; + kvmppc_recalc_shadow_msr(vcpu); +} + +static int kvmppc_read_inst(struct kvm_vcpu *vcpu) +{ + ulong srr0 = kvmppc_get_pc(vcpu); + u32 last_inst = kvmppc_get_last_inst(vcpu); + int ret; + + ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); + if (ret == -ENOENT) { + ulong msr = vcpu->arch.shared->msr; + + msr = kvmppc_set_field(msr, 33, 33, 1); + msr = kvmppc_set_field(msr, 34, 36, 0); + vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); + return EMULATE_AGAIN; + } + + return EMULATE_DONE; +} + +static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) +{ + + /* Need to do paired single emulation? */ + if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) + return EMULATE_DONE; + + /* Read out the instruction */ + if (kvmppc_read_inst(vcpu) == EMULATE_DONE) + /* Need to emulate */ + return EMULATE_FAIL; + + return EMULATE_AGAIN; +} + +/* Handle external providers (FPU, Altivec, VSX) */ +static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, + ulong msr) +{ + struct thread_struct *t = ¤t->thread; + u64 *vcpu_fpr = vcpu->arch.fpr; +#ifdef CONFIG_VSX + u64 *vcpu_vsx = vcpu->arch.vsr; +#endif + u64 *thread_fpr = (u64*)t->fpr; + int i; + + /* When we have paired singles, we emulate in software */ + if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) + return RESUME_GUEST; + + if (!(vcpu->arch.shared->msr & msr)) { + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + return RESUME_GUEST; + } + + /* We already own the ext */ + if (vcpu->arch.guest_owned_ext & msr) { + return RESUME_GUEST; + } + +#ifdef DEBUG_EXT + printk(KERN_INFO "Loading up ext 0x%lx\n", msr); +#endif + + current->thread.regs->msr |= msr; + + switch (msr) { + case MSR_FP: + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) + thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; + + t->fpscr.val = vcpu->arch.fpscr; + t->fpexc_mode = 0; + kvmppc_load_up_fpu(); + break; + case MSR_VEC: +#ifdef CONFIG_ALTIVEC + memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); + t->vscr = vcpu->arch.vscr; + t->vrsave = -1; + kvmppc_load_up_altivec(); +#endif + break; + case MSR_VSX: +#ifdef CONFIG_VSX + for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) + thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i]; + kvmppc_load_up_vsx(); +#endif + break; + default: + BUG(); + } + + vcpu->arch.guest_owned_ext |= msr; + + kvmppc_recalc_shadow_msr(vcpu); + + return RESUME_GUEST; +} + +int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int exit_nr) +{ + int r = RESUME_HOST; + + vcpu->stat.sum_exits++; + + run->exit_reason = KVM_EXIT_UNKNOWN; + run->ready_for_interrupt_injection = 1; + + trace_kvm_book3s_exit(exit_nr, vcpu); + kvm_resched(vcpu); + switch (exit_nr) { + case BOOK3S_INTERRUPT_INST_STORAGE: + vcpu->stat.pf_instruc++; + +#ifdef CONFIG_PPC_BOOK3S_32 + /* We set segments as unused segments when invalidating them. So + * treat the respective fault as segment fault. */ + if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] + == SR_INVALID) { + kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); + r = RESUME_GUEST; + break; + } +#endif + + /* only care about PTEG not found errors, but leave NX alone */ + if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) { + r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); + vcpu->stat.sp_instruc++; + } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && + (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { + /* + * XXX If we do the dcbz hack we use the NX bit to flush&patch the page, + * so we can't use the NX bit inside the guest. Let's cross our fingers, + * that no guest that needs the dcbz hack does NX. + */ + kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); + r = RESUME_GUEST; + } else { + vcpu->arch.shared->msr |= + to_svcpu(vcpu)->shadow_srr1 & 0x58000000; + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + r = RESUME_GUEST; + } + break; + case BOOK3S_INTERRUPT_DATA_STORAGE: + { + ulong dar = kvmppc_get_fault_dar(vcpu); + vcpu->stat.pf_storage++; + +#ifdef CONFIG_PPC_BOOK3S_32 + /* We set segments as unused segments when invalidating them. So + * treat the respective fault as segment fault. */ + if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) { + kvmppc_mmu_map_segment(vcpu, dar); + r = RESUME_GUEST; + break; + } +#endif + + /* The only case we need to handle is missing shadow PTEs */ + if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) { + r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); + } else { + vcpu->arch.shared->dar = dar; + vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr; + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + r = RESUME_GUEST; + } + break; + } + case BOOK3S_INTERRUPT_DATA_SEGMENT: + if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) { + vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); + kvmppc_book3s_queue_irqprio(vcpu, + BOOK3S_INTERRUPT_DATA_SEGMENT); + } + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_INST_SEGMENT: + if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) { + kvmppc_book3s_queue_irqprio(vcpu, + BOOK3S_INTERRUPT_INST_SEGMENT); + } + r = RESUME_GUEST; + break; + /* We're good on these - the host merely wanted to get our attention */ + case BOOK3S_INTERRUPT_DECREMENTER: + vcpu->stat.dec_exits++; + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_EXTERNAL: + vcpu->stat.ext_intr_exits++; + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_PERFMON: + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_PROGRAM: + { + enum emulation_result er; + ulong flags; + +program_interrupt: + flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull; + + if (vcpu->arch.shared->msr & MSR_PR) { +#ifdef EXIT_DEBUG + printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); +#endif + if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) != + (INS_DCBZ & 0xfffffff7)) { + kvmppc_core_queue_program(vcpu, flags); + r = RESUME_GUEST; + break; + } + } + + vcpu->stat.emulated_inst_exits++; + er = kvmppc_emulate_instruction(run, vcpu); + switch (er) { + case EMULATE_DONE: + r = RESUME_GUEST_NV; + break; + case EMULATE_AGAIN: + r = RESUME_GUEST; + break; + case EMULATE_FAIL: + printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", + __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); + kvmppc_core_queue_program(vcpu, flags); + r = RESUME_GUEST; + break; + case EMULATE_DO_MMIO: + run->exit_reason = KVM_EXIT_MMIO; + r = RESUME_HOST_NV; + break; + default: + BUG(); + } + break; + } + case BOOK3S_INTERRUPT_SYSCALL: + if (vcpu->arch.osi_enabled && + (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && + (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { + /* MOL hypercalls */ + u64 *gprs = run->osi.gprs; + int i; + + run->exit_reason = KVM_EXIT_OSI; + for (i = 0; i < 32; i++) + gprs[i] = kvmppc_get_gpr(vcpu, i); + vcpu->arch.osi_needed = 1; + r = RESUME_HOST_NV; + } else if (!(vcpu->arch.shared->msr & MSR_PR) && + (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { + /* KVM PV hypercalls */ + kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu)); + r = RESUME_GUEST; + } else { + /* Guest syscalls */ + vcpu->stat.syscall_exits++; + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + r = RESUME_GUEST; + } + break; + case BOOK3S_INTERRUPT_FP_UNAVAIL: + case BOOK3S_INTERRUPT_ALTIVEC: + case BOOK3S_INTERRUPT_VSX: + { + int ext_msr = 0; + + switch (exit_nr) { + case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; + case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; + case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; + } + + switch (kvmppc_check_ext(vcpu, exit_nr)) { + case EMULATE_DONE: + /* everything ok - let's enable the ext */ + r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); + break; + case EMULATE_FAIL: + /* we need to emulate this instruction */ + goto program_interrupt; + break; + default: + /* nothing to worry about - go again */ + break; + } + break; + } + case BOOK3S_INTERRUPT_ALIGNMENT: + if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { + vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu, + kvmppc_get_last_inst(vcpu)); + vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu, + kvmppc_get_last_inst(vcpu)); + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + } + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_MACHINE_CHECK: + case BOOK3S_INTERRUPT_TRACE: + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + r = RESUME_GUEST; + break; + default: + /* Ugh - bork here! What did we get? */ + printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", + exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1); + r = RESUME_HOST; + BUG(); + break; + } + + + if (!(r & RESUME_HOST)) { + /* To avoid clobbering exit_reason, only check for signals if + * we aren't already exiting to userspace for some other + * reason. */ + if (signal_pending(current)) { +#ifdef EXIT_DEBUG + printk(KERN_EMERG "KVM: Going back to host\n"); +#endif + vcpu->stat.signal_exits++; + run->exit_reason = KVM_EXIT_INTR; + r = -EINTR; + } else { + /* In case an interrupt came in that was triggered + * from userspace (like DEC), we need to check what + * to inject now! */ + kvmppc_core_deliver_interrupts(vcpu); + } + } + + trace_kvm_book3s_reenter(r, vcpu); + + return r; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + int i; + + sregs->pvr = vcpu->arch.pvr; + + sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; + if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { + for (i = 0; i < 64; i++) { + sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige | i; + sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; + } + } else { + for (i = 0; i < 16; i++) + sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i]; + + for (i = 0; i < 8; i++) { + sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw; + sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; + } + } + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + int i; + + kvmppc_set_pvr(vcpu, sregs->pvr); + + vcpu3s->sdr1 = sregs->u.s.sdr1; + if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { + for (i = 0; i < 64; i++) { + vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv, + sregs->u.s.ppc64.slb[i].slbe); + } + } else { + for (i = 0; i < 16; i++) { + vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]); + } + for (i = 0; i < 8; i++) { + kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false, + (u32)sregs->u.s.ppc32.ibat[i]); + kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true, + (u32)(sregs->u.s.ppc32.ibat[i] >> 32)); + kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false, + (u32)sregs->u.s.ppc32.dbat[i]); + kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true, + (u32)(sregs->u.s.ppc32.dbat[i] >> 32)); + } + } + + /* Flush the MMU after messing with the segments */ + kvmppc_mmu_pte_flush(vcpu, 0, 0); + + return 0; +} + +int kvmppc_core_check_processor_compat(void) +{ + return 0; +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvmppc_vcpu_book3s *vcpu_book3s; + struct kvm_vcpu *vcpu; + int err = -ENOMEM; + unsigned long p; + + vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); + if (!vcpu_book3s) + goto out; + + vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *) + kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL); + if (!vcpu_book3s->shadow_vcpu) + goto free_vcpu; + + vcpu = &vcpu_book3s->vcpu; + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_shadow_vcpu; + + p = __get_free_page(GFP_KERNEL|__GFP_ZERO); + /* the real shared page fills the last 4k of our page */ + vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096); + if (!p) + goto uninit_vcpu; + + vcpu->arch.host_retip = kvm_return_point; + vcpu->arch.host_msr = mfmsr(); +#ifdef CONFIG_PPC_BOOK3S_64 + /* default to book3s_64 (970fx) */ + vcpu->arch.pvr = 0x3C0301; +#else + /* default to book3s_32 (750) */ + vcpu->arch.pvr = 0x84202; +#endif + kvmppc_set_pvr(vcpu, vcpu->arch.pvr); + vcpu->arch.slb_nr = 64; + + /* remember where some real-mode handlers are */ + vcpu->arch.trampoline_lowmem = __pa(kvmppc_handler_lowmem_trampoline); + vcpu->arch.trampoline_enter = __pa(kvmppc_handler_trampoline_enter); + vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; +#ifdef CONFIG_PPC_BOOK3S_64 + vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; +#else + vcpu->arch.rmcall = (ulong)kvmppc_rmcall; +#endif + + vcpu->arch.shadow_msr = MSR_USER64; + + err = kvmppc_mmu_init(vcpu); + if (err < 0) + goto uninit_vcpu; + + return vcpu; + +uninit_vcpu: + kvm_vcpu_uninit(vcpu); +free_shadow_vcpu: + kfree(vcpu_book3s->shadow_vcpu); +free_vcpu: + vfree(vcpu_book3s); +out: + return ERR_PTR(err); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + + free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); + kvm_vcpu_uninit(vcpu); + kfree(vcpu_book3s->shadow_vcpu); + vfree(vcpu_book3s); +} + +extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); +int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +{ + int ret; + double fpr[32][TS_FPRWIDTH]; + unsigned int fpscr; + int fpexc_mode; +#ifdef CONFIG_ALTIVEC + vector128 vr[32]; + vector128 vscr; + unsigned long uninitialized_var(vrsave); + int used_vr; +#endif +#ifdef CONFIG_VSX + int used_vsr; +#endif + ulong ext_msr; + + /* No need to go into the guest when all we do is going out */ + if (signal_pending(current)) { + kvm_run->exit_reason = KVM_EXIT_INTR; + return -EINTR; + } + + /* Save FPU state in stack */ + if (current->thread.regs->msr & MSR_FP) + giveup_fpu(current); + memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr)); + fpscr = current->thread.fpscr.val; + fpexc_mode = current->thread.fpexc_mode; + +#ifdef CONFIG_ALTIVEC + /* Save Altivec state in stack */ + used_vr = current->thread.used_vr; + if (used_vr) { + if (current->thread.regs->msr & MSR_VEC) + giveup_altivec(current); + memcpy(vr, current->thread.vr, sizeof(current->thread.vr)); + vscr = current->thread.vscr; + vrsave = current->thread.vrsave; + } +#endif + +#ifdef CONFIG_VSX + /* Save VSX state in stack */ + used_vsr = current->thread.used_vsr; + if (used_vsr && (current->thread.regs->msr & MSR_VSX)) + __giveup_vsx(current); +#endif + + /* Remember the MSR with disabled extensions */ + ext_msr = current->thread.regs->msr; + + /* XXX we get called with irq disabled - change that! */ + local_irq_enable(); + + /* Preload FPU if it's enabled */ + if (vcpu->arch.shared->msr & MSR_FP) + kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); + + ret = __kvmppc_vcpu_entry(kvm_run, vcpu); + + local_irq_disable(); + + current->thread.regs->msr = ext_msr; + + /* Make sure we save the guest FPU/Altivec/VSX state */ + kvmppc_giveup_ext(vcpu, MSR_FP); + kvmppc_giveup_ext(vcpu, MSR_VEC); + kvmppc_giveup_ext(vcpu, MSR_VSX); + + /* Restore FPU state from stack */ + memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr)); + current->thread.fpscr.val = fpscr; + current->thread.fpexc_mode = fpexc_mode; + +#ifdef CONFIG_ALTIVEC + /* Restore Altivec state from stack */ + if (used_vr && current->thread.used_vr) { + memcpy(current->thread.vr, vr, sizeof(current->thread.vr)); + current->thread.vscr = vscr; + current->thread.vrsave = vrsave; + } + current->thread.used_vr = used_vr; +#endif + +#ifdef CONFIG_VSX + current->thread.used_vsr = used_vsr; +#endif + + return ret; +} + +static int kvmppc_book3s_init(void) +{ + int r; + + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, + THIS_MODULE); + + if (r) + return r; + + r = kvmppc_mmu_hpte_sysinit(); + + return r; +} + +static void kvmppc_book3s_exit(void) +{ + kvmppc_mmu_hpte_sysexit(); + kvm_exit(); +} + +module_init(kvmppc_book3s_init); +module_exit(kvmppc_book3s_exit); -- cgit v1.2.3 From b01c8b54a1a271c0fc4243845927fe1d250767a3 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:18:26 +0000 Subject: powerpc, KVM: Rework KVM checks in first-level interrupt handlers Instead of branching out-of-line with the DO_KVM macro to check if we are in a KVM guest at the time of an interrupt, this moves the KVM check inline in the first-level interrupt handlers. This speeds up the non-KVM case and makes sure that none of the interrupt handlers are missing the check. Because the first-level interrupt handlers are now larger, some things had to be move out of line in exceptions-64s.S. This all necessitated some minor changes to the interrupt entry code in KVM. This also streamlines the book3s_32 KVM test. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/exception-64s.h | 121 ++++++++++++------ arch/powerpc/kernel/exceptions-64s.S | 189 ++++++++++++++++++----------- arch/powerpc/kvm/book3s_rmhandlers.S | 78 ++++++------ arch/powerpc/kvm/book3s_segment.S | 7 ++ arch/powerpc/platforms/iseries/exception.S | 2 +- arch/powerpc/platforms/iseries/exception.h | 4 +- 6 files changed, 247 insertions(+), 154 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index f5dfe3411f64..b6a3a443fbde 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -61,19 +61,22 @@ #define EXC_HV H #define EXC_STD -#define EXCEPTION_PROLOG_1(area) \ +#define __EXCEPTION_PROLOG_1(area, extra, vec) \ GET_PACA(r13); \ std r9,area+EX_R9(r13); /* save r9 - r12 */ \ std r10,area+EX_R10(r13); \ - std r11,area+EX_R11(r13); \ - std r12,area+EX_R12(r13); \ BEGIN_FTR_SECTION_NESTED(66); \ mfspr r10,SPRN_CFAR; \ std r10,area+EX_CFAR(r13); \ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \ - GET_SCRATCH0(r9); \ - std r9,area+EX_R13(r13); \ - mfcr r9 + mfcr r9; \ + extra(vec); \ + std r11,area+EX_R11(r13); \ + std r12,area+EX_R12(r13); \ + GET_SCRATCH0(r10); \ + std r10,area+EX_R13(r13) +#define EXCEPTION_PROLOG_1(area, extra, vec) \ + __EXCEPTION_PROLOG_1(area, extra, vec) #define __EXCEPTION_PROLOG_PSERIES_1(label, h) \ ld r12,PACAKBASE(r13); /* get high part of &label */ \ @@ -85,13 +88,54 @@ mtspr SPRN_##h##SRR1,r10; \ h##rfid; \ b . /* prevent speculative execution */ -#define EXCEPTION_PROLOG_PSERIES_1(label, h) \ +#define EXCEPTION_PROLOG_PSERIES_1(label, h) \ __EXCEPTION_PROLOG_PSERIES_1(label, h) -#define EXCEPTION_PROLOG_PSERIES(area, label, h) \ - EXCEPTION_PROLOG_1(area); \ +#define EXCEPTION_PROLOG_PSERIES(area, label, h, extra, vec) \ + EXCEPTION_PROLOG_1(area, extra, vec); \ EXCEPTION_PROLOG_PSERIES_1(label, h); +#define __KVMTEST(n) \ + lbz r10,PACA_KVM_SVCPU+SVCPU_IN_GUEST(r13); \ + cmpwi r10,0; \ + bne do_kvm_##n + +#define __KVM_HANDLER(area, h, n) \ +do_kvm_##n: \ + ld r10,area+EX_R10(r13); \ + stw r9,PACA_KVM_SVCPU+SVCPU_SCRATCH1(r13); \ + ld r9,area+EX_R9(r13); \ + std r12,PACA_KVM_SVCPU+SVCPU_SCRATCH0(r13); \ + li r12,n; \ + b kvmppc_interrupt + +#define __KVM_HANDLER_SKIP(area, h, n) \ +do_kvm_##n: \ + cmpwi r10,KVM_GUEST_MODE_SKIP; \ + ld r10,area+EX_R10(r13); \ + beq 89f; \ + stw r9,PACA_KVM_SVCPU+SVCPU_SCRATCH1(r13); \ + ld r9,area+EX_R9(r13); \ + std r12,PACA_KVM_SVCPU+SVCPU_SCRATCH0(r13); \ + li r12,n; \ + b kvmppc_interrupt; \ +89: mtocrf 0x80,r9; \ + ld r9,area+EX_R9(r13); \ + b kvmppc_skip_##h##interrupt + +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +#define KVMTEST(n) __KVMTEST(n) +#define KVM_HANDLER(area, h, n) __KVM_HANDLER(area, h, n) +#define KVM_HANDLER_SKIP(area, h, n) __KVM_HANDLER_SKIP(area, h, n) + +#else +#define KVMTEST(n) +#define KVM_HANDLER(area, h, n) +#define KVM_HANDLER_SKIP(area, h, n) +#endif + +#define NOTEST(n) + /* * The common exception prolog is used for all except a few exceptions * such as a segment miss on a kernel address. We have to be prepared @@ -164,57 +208,54 @@ .globl label##_pSeries; \ label##_pSeries: \ HMT_MEDIUM; \ - DO_KVM vec; \ SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_STD) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ + EXC_STD, KVMTEST, vec) #define STD_EXCEPTION_HV(loc, vec, label) \ . = loc; \ .globl label##_hv; \ label##_hv: \ HMT_MEDIUM; \ - DO_KVM vec; \ - SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_HV) + SET_SCRATCH0(r13); /* save r13 */ \ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ + EXC_HV, KVMTEST, vec) -#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h) \ - HMT_MEDIUM; \ - DO_KVM vec; \ - SET_SCRATCH0(r13); /* save r13 */ \ - GET_PACA(r13); \ - std r9,PACA_EXGEN+EX_R9(r13); /* save r9, r10 */ \ - std r10,PACA_EXGEN+EX_R10(r13); \ +#define __SOFTEN_TEST(h) \ lbz r10,PACASOFTIRQEN(r13); \ - mfcr r9; \ cmpwi r10,0; \ - beq masked_##h##interrupt; \ - GET_SCRATCH0(r10); \ - std r10,PACA_EXGEN+EX_R13(r13); \ - std r11,PACA_EXGEN+EX_R11(r13); \ - std r12,PACA_EXGEN+EX_R12(r13); \ - ld r12,PACAKBASE(r13); /* get high part of &label */ \ - ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ - mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ - LOAD_HANDLER(r12,label##_common) \ - mtspr SPRN_##h##SRR0,r12; \ - mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ - mtspr SPRN_##h##SRR1,r10; \ - h##rfid; \ - b . /* prevent speculative execution */ -#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h) \ - __MASKABLE_EXCEPTION_PSERIES(vec, label, h) + beq masked_##h##interrupt +#define _SOFTEN_TEST(h) __SOFTEN_TEST(h) + +#define SOFTEN_TEST(vec) \ + KVMTEST(vec); \ + _SOFTEN_TEST(EXC_STD) + +#define SOFTEN_TEST_HV(vec) \ + KVMTEST(vec); \ + _SOFTEN_TEST(EXC_HV) + +#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ + HMT_MEDIUM; \ + SET_SCRATCH0(r13); /* save r13 */ \ + __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \ + EXCEPTION_PROLOG_PSERIES_1(label##_common, h); +#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ + __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) #define MASKABLE_EXCEPTION_PSERIES(loc, vec, label) \ . = loc; \ .globl label##_pSeries; \ label##_pSeries: \ - _MASKABLE_EXCEPTION_PSERIES(vec, label, EXC_STD) + _MASKABLE_EXCEPTION_PSERIES(vec, label, \ + EXC_STD, SOFTEN_TEST) #define MASKABLE_EXCEPTION_HV(loc, vec, label) \ . = loc; \ .globl label##_hv; \ label##_hv: \ - _MASKABLE_EXCEPTION_PSERIES(vec, label, EXC_HV) + _MASKABLE_EXCEPTION_PSERIES(vec, label, \ + EXC_HV, SOFTEN_TEST_HV) #ifdef CONFIG_PPC_ISERIES #define DISABLE_INTS \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a85f4874cba7..e76472cbf3b5 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -40,7 +40,6 @@ __start_interrupts: .globl system_reset_pSeries; system_reset_pSeries: HMT_MEDIUM; - DO_KVM 0x100; SET_SCRATCH0(r13) #ifdef CONFIG_PPC_P7_NAP BEGIN_FTR_SECTION @@ -65,67 +64,45 @@ BEGIN_FTR_SECTION beq . END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) #endif /* CONFIG_PPC_P7_NAP */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + NOTEST, 0x100) . = 0x200 -_machine_check_pSeries: - HMT_MEDIUM - DO_KVM 0x200 - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD) +machine_check_pSeries_1: + /* This is moved out of line as it can be patched by FW, but + * some code path might still want to branch into the original + * vector + */ + b machine_check_pSeries . = 0x300 .globl data_access_pSeries data_access_pSeries: HMT_MEDIUM - DO_KVM 0x300 SET_SCRATCH0(r13) +#ifndef CONFIG_POWER4_ONLY BEGIN_FTR_SECTION - GET_PACA(r13) - std r9,PACA_EXSLB+EX_R9(r13) - std r10,PACA_EXSLB+EX_R10(r13) - mfspr r10,SPRN_DAR - mfspr r9,SPRN_DSISR - srdi r10,r10,60 - rlwimi r10,r9,16,0x20 - mfcr r9 - cmpwi r10,0x2c - beq do_stab_bolted_pSeries - ld r10,PACA_EXSLB+EX_R10(r13) - std r11,PACA_EXGEN+EX_R11(r13) - ld r11,PACA_EXSLB+EX_R9(r13) - std r12,PACA_EXGEN+EX_R12(r13) - GET_SCRATCH0(r12) - std r10,PACA_EXGEN+EX_R10(r13) - std r11,PACA_EXGEN+EX_R9(r13) - std r12,PACA_EXGEN+EX_R13(r13) - EXCEPTION_PROLOG_PSERIES_1(data_access_common, EXC_STD) -FTR_SECTION_ELSE - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD) -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB) + b data_access_check_stab +data_access_not_stab: +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) +#endif + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD, + KVMTEST, 0x300) . = 0x380 .globl data_access_slb_pSeries data_access_slb_pSeries: HMT_MEDIUM - DO_KVM 0x380 SET_SCRATCH0(r13) - GET_PACA(r13) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR - std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ - mfcr r9 #ifdef __DISABLED__ /* Keep that around for when we re-implement dynamic VSIDs */ cmpdi r3,0 bge slb_miss_user_pseries #endif /* __DISABLED__ */ - std r10,PACA_EXSLB+EX_R10(r13) - std r11,PACA_EXSLB+EX_R11(r13) - std r12,PACA_EXSLB+EX_R12(r13) - GET_SCRATCH0(r10) - std r10,PACA_EXSLB+EX_R13(r13) - mfspr r12,SPRN_SRR1 /* and SRR1 */ + mfspr r12,SPRN_SRR1 #ifndef CONFIG_RELOCATABLE b .slb_miss_realmode #else @@ -147,24 +124,16 @@ data_access_slb_pSeries: .globl instruction_access_slb_pSeries instruction_access_slb_pSeries: HMT_MEDIUM - DO_KVM 0x480 SET_SCRATCH0(r13) - GET_PACA(r13) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x480) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ - std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ - mfcr r9 #ifdef __DISABLED__ /* Keep that around for when we re-implement dynamic VSIDs */ cmpdi r3,0 bge slb_miss_user_pseries #endif /* __DISABLED__ */ - std r10,PACA_EXSLB+EX_R10(r13) - std r11,PACA_EXSLB+EX_R11(r13) - std r12,PACA_EXSLB+EX_R12(r13) - GET_SCRATCH0(r10) - std r10,PACA_EXSLB+EX_R13(r13) - mfspr r12,SPRN_SRR1 /* and SRR1 */ + mfspr r12,SPRN_SRR1 #ifndef CONFIG_RELOCATABLE b .slb_miss_realmode #else @@ -184,26 +153,46 @@ instruction_access_slb_pSeries: hardware_interrupt_pSeries: hardware_interrupt_hv: BEGIN_FTR_SECTION - _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD) + _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, + EXC_STD, SOFTEN_TEST) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) FTR_SECTION_ELSE - _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV) + _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, + EXC_HV, SOFTEN_TEST_HV) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) ALT_FTR_SECTION_END_IFCLR(CPU_FTR_HVMODE_206) STD_EXCEPTION_PSERIES(0x600, 0x600, alignment) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x600) + STD_EXCEPTION_PSERIES(0x700, 0x700, program_check) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x700) + STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x800) MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer) - MASKABLE_EXCEPTION_HV(0x980, 0x980, decrementer) + MASKABLE_EXCEPTION_HV(0x980, 0x982, decrementer) STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xa00) + STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xb00) . = 0xc00 .globl system_call_pSeries system_call_pSeries: HMT_MEDIUM - DO_KVM 0xc00 +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + SET_SCRATCH0(r13) + GET_PACA(r13) + std r9,PACA_EXGEN+EX_R9(r13) + std r10,PACA_EXGEN+EX_R10(r13) + mfcr r9 + KVMTEST(0xc00) + GET_SCRATCH0(r13) +#endif BEGIN_FTR_SECTION cmpdi r0,0x1ebe beq- 1f @@ -220,6 +209,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) rfid b . /* prevent speculative execution */ + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00) + /* Fast LE/BE switch system call */ 1: mfspr r12,SPRN_SRR1 xori r12,r12,MSR_LE @@ -228,6 +219,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) b . STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xd00) /* At 0xe??? we have a bunch of hypervisor exceptions, we branch * out of line to handle them @@ -262,30 +254,93 @@ vsx_unavailable_pSeries_1: #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202) #endif /* CONFIG_CBE_RAS */ + STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1300) + #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602) #endif /* CONFIG_CBE_RAS */ + STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x1700) + #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802) #endif /* CONFIG_CBE_RAS */ . = 0x3000 /*** Out of line interrupts support ***/ + /* moved from 0x200 */ +machine_check_pSeries: + .globl machine_check_fwnmi +machine_check_fwnmi: + HMT_MEDIUM + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, + EXC_STD, KVMTEST, 0x200) + KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200) + +#ifndef CONFIG_POWER4_ONLY + /* moved from 0x300 */ +data_access_check_stab: + GET_PACA(r13) + std r9,PACA_EXSLB+EX_R9(r13) + std r10,PACA_EXSLB+EX_R10(r13) + mfspr r10,SPRN_DAR + mfspr r9,SPRN_DSISR + srdi r10,r10,60 + rlwimi r10,r9,16,0x20 +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + lbz r9,PACA_KVM_SVCPU+SVCPU_IN_GUEST(r13) + rlwimi r10,r9,8,0x300 +#endif + mfcr r9 + cmpwi r10,0x2c + beq do_stab_bolted_pSeries + mtcrf 0x80,r9 + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + b data_access_not_stab +do_stab_bolted_pSeries: + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + GET_SCRATCH0(r10) + std r10,PACA_EXSLB+EX_R13(r13) + EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) +#endif /* CONFIG_POWER4_ONLY */ + + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300) + KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x400) + KVM_HANDLER(PACA_EXSLB, EXC_STD, 0x480) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x900) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982) + + .align 7 /* moved from 0xe00 */ - STD_EXCEPTION_HV(., 0xe00, h_data_storage) - STD_EXCEPTION_HV(., 0xe20, h_instr_storage) - STD_EXCEPTION_HV(., 0xe40, emulation_assist) - STD_EXCEPTION_HV(., 0xe60, hmi_exception) /* need to flush cache ? */ + STD_EXCEPTION_HV(., 0xe02, h_data_storage) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0xe02) + STD_EXCEPTION_HV(., 0xe22, h_instr_storage) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22) + STD_EXCEPTION_HV(., 0xe42, emulation_assist) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42) + STD_EXCEPTION_HV(., 0xe62, hmi_exception) /* need to flush cache ? */ + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62) /* moved from 0xf00 */ STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf00) STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf20) STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf40) /* * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -317,14 +372,6 @@ masked_Hinterrupt: hrfid b . - .align 7 -do_stab_bolted_pSeries: - std r11,PACA_EXSLB+EX_R11(r13) - std r12,PACA_EXSLB+EX_R12(r13) - GET_SCRATCH0(r10) - std r10,PACA_EXSLB+EX_R13(r13) - EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) - #ifdef CONFIG_PPC_PSERIES /* * Vectors for the FWNMI option. Share common code. @@ -334,14 +381,8 @@ do_stab_bolted_pSeries: system_reset_fwnmi: HMT_MEDIUM SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD) - - .globl machine_check_fwnmi - .align 7 -machine_check_fwnmi: - HMT_MEDIUM - SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + NOTEST, 0x100) #endif /* CONFIG_PPC_PSERIES */ diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 5f5d07475c91..dd03689fc609 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -40,37 +40,42 @@ #define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR) #define FUNC(name) GLUE(.,name) -#elif defined(CONFIG_PPC_BOOK3S_32) +kvmppc_skip_interrupt: + /* + * Here all GPRs are unchanged from when the interrupt happened + * except for r13, which is saved in SPRG_SCRATCH0. + */ + mfspr r13, SPRN_SRR0 + addi r13, r13, 4 + mtspr SPRN_SRR0, r13 + GET_SCRATCH0(r13) + rfid + b . + +kvmppc_skip_Hinterrupt: + /* + * Here all GPRs are unchanged from when the interrupt happened + * except for r13, which is saved in SPRG_SCRATCH0. + */ + mfspr r13, SPRN_HSRR0 + addi r13, r13, 4 + mtspr SPRN_HSRR0, r13 + GET_SCRATCH0(r13) + hrfid + b . -#define LOAD_SHADOW_VCPU(reg) \ - mfspr reg, SPRN_SPRG_THREAD; \ - lwz reg, THREAD_KVM_SVCPU(reg); \ - /* PPC32 can have a NULL pointer - let's check for that */ \ - mtspr SPRN_SPRG_SCRATCH1, r12; /* Save r12 */ \ - mfcr r12; \ - cmpwi reg, 0; \ - bne 1f; \ - mfspr reg, SPRN_SPRG_SCRATCH0; \ - mtcr r12; \ - mfspr r12, SPRN_SPRG_SCRATCH1; \ - b kvmppc_resume_\intno; \ -1:; \ - mtcr r12; \ - mfspr r12, SPRN_SPRG_SCRATCH1; \ - tophys(reg, reg) +#elif defined(CONFIG_PPC_BOOK3S_32) #define SHADOW_VCPU_OFF 0 #define MSR_NOIRQ MSR_KERNEL #define FUNC(name) name -#endif - .macro INTERRUPT_TRAMPOLINE intno .global kvmppc_trampoline_\intno kvmppc_trampoline_\intno: - SET_SCRATCH0(r13) /* Save r13 */ + mtspr SPRN_SPRG_SCRATCH0, r13 /* Save r13 */ /* * First thing to do is to find out if we're coming @@ -78,19 +83,28 @@ kvmppc_trampoline_\intno: * * To distinguish, we check a magic byte in the PACA/current */ - LOAD_SHADOW_VCPU(r13) - PPC_STL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) + mfspr r13, SPRN_SPRG_THREAD + lwz r13, THREAD_KVM_SVCPU(r13) + /* PPC32 can have a NULL pointer - let's check for that */ + mtspr SPRN_SPRG_SCRATCH1, r12 /* Save r12 */ mfcr r12 + cmpwi r13, 0 + bne 1f +2: mtcr r12 + mfspr r12, SPRN_SPRG_SCRATCH1 + mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */ + b kvmppc_resume_\intno /* Get back original handler */ + +1: tophys(r13, r13) stw r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) + mfspr r12, SPRN_SPRG_SCRATCH1 + stw r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) lbz r12, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) cmpwi r12, KVM_GUEST_MODE_NONE bne ..kvmppc_handler_hasmagic_\intno /* No KVM guest? Then jump back to the Linux handler! */ lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) - mtcr r12 - PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) - GET_SCRATCH0(r13) /* r13 = original r13 */ - b kvmppc_resume_\intno /* Get back original handler */ + b 2b /* Now we know we're handling a KVM guest */ ..kvmppc_handler_hasmagic_\intno: @@ -112,9 +126,6 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_MACHINE_CHECK INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL -#ifdef CONFIG_PPC_BOOK3S_64 -INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL_HV -#endif INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_FP_UNAVAIL @@ -124,14 +135,6 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_TRACE INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PERFMON INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC -/* Those are only available on 64 bit machines */ - -#ifdef CONFIG_PPC_BOOK3S_64 -INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_SEGMENT -INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_SEGMENT -INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX -#endif - /* * Bring us back to the faulting code, but skip the * faulting instruction. @@ -163,6 +166,7 @@ kvmppc_handler_skip_ins: /* And get back into the code */ RFI +#endif /* * This trampoline brings us back to a real mode handler diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 451264274b8c..4a623eb28a53 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -125,6 +125,9 @@ kvmppc_handler_trampoline_enter_end: .global kvmppc_handler_trampoline_exit kvmppc_handler_trampoline_exit: +.global kvmppc_interrupt +kvmppc_interrupt: + /* Register usage at this point: * * SPRG_SCRATCH0 = guest R13 @@ -155,12 +158,16 @@ kvmppc_handler_trampoline_exit: PPC_LL r2, (SHADOW_VCPU_OFF + SVCPU_HOST_R2)(r13) /* Save guest PC and MSR */ +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION andi. r0,r12,0x2 beq 1f mfspr r3,SPRN_HSRR0 mfspr r4,SPRN_HSRR1 andi. r12,r12,0x3ffd b 2f +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) +#endif 1: mfsrr0 r3 mfsrr1 r4 2: diff --git a/arch/powerpc/platforms/iseries/exception.S b/arch/powerpc/platforms/iseries/exception.S index 29c02f36b32f..f519ee17ff7d 100644 --- a/arch/powerpc/platforms/iseries/exception.S +++ b/arch/powerpc/platforms/iseries/exception.S @@ -167,7 +167,7 @@ BEGIN_FTR_SECTION std r12,PACA_EXGEN+EX_R13(r13) EXCEPTION_PROLOG_ISERIES_1 FTR_SECTION_ELSE - EXCEPTION_PROLOG_1(PACA_EXGEN) + EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0) EXCEPTION_PROLOG_ISERIES_1 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB) b data_access_common diff --git a/arch/powerpc/platforms/iseries/exception.h b/arch/powerpc/platforms/iseries/exception.h index bae3fba5ad8e..50271b550a99 100644 --- a/arch/powerpc/platforms/iseries/exception.h +++ b/arch/powerpc/platforms/iseries/exception.h @@ -39,7 +39,7 @@ label##_iSeries: \ HMT_MEDIUM; \ mtspr SPRN_SPRG_SCRATCH0,r13; /* save r13 */ \ - EXCEPTION_PROLOG_1(area); \ + EXCEPTION_PROLOG_1(area, NOTEST, 0); \ EXCEPTION_PROLOG_ISERIES_1; \ b label##_common @@ -48,7 +48,7 @@ label##_iSeries: \ label##_iSeries: \ HMT_MEDIUM; \ mtspr SPRN_SPRG_SCRATCH0,r13; /* save r13 */ \ - EXCEPTION_PROLOG_1(PACA_EXGEN); \ + EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0); \ lbz r10,PACASOFTIRQEN(r13); \ cmpwi 0,r10,0; \ beq- label##_iSeries_masked; \ -- cgit v1.2.3 From 3cf658b605393d793ea52416c2306b86fbde9d9a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:18:52 +0000 Subject: KVM: PPC: Deliver program interrupts right away instead of queueing them Doing so means that we don't have to save the flags anywhere and gets rid of the last reference to to_book3s(vcpu) in arch/powerpc/kvm/book3s.c. Doing so is OK because a program interrupt won't be generated at the same time as any other synchronous interrupt. If a program interrupt and an asynchronous interrupt (external or decrementer) are generated at the same time, the program interrupt will be delivered, which is correct because it has a higher priority, and then the asynchronous interrupt will be masked. We don't ever generate system reset or machine check interrupts to the guest, but if we did, then we would need to make sure they got delivered rather than the program interrupt. The current code would be wrong in this situation anyway since it would deliver the program interrupt as well as the reset/machine check interrupt. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 163e3e13c87c..f68a34d16035 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -129,8 +129,8 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) { - to_book3s(vcpu)->prog_flags = flags; - kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM); + /* might as well deliver this straight away */ + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags); } void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) @@ -170,7 +170,6 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) { int deliver = 1; int vec = 0; - ulong flags = 0ULL; bool crit = kvmppc_critical_section(vcpu); switch (priority) { @@ -206,7 +205,6 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) break; case BOOK3S_IRQPRIO_PROGRAM: vec = BOOK3S_INTERRUPT_PROGRAM; - flags = to_book3s(vcpu)->prog_flags; break; case BOOK3S_IRQPRIO_VSX: vec = BOOK3S_INTERRUPT_VSX; @@ -237,7 +235,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) #endif if (deliver) - kvmppc_inject_interrupt(vcpu, vec, flags); + kvmppc_inject_interrupt(vcpu, vec, 0); return deliver; } -- cgit v1.2.3 From f9e0554deca54a42fb2cf7f68c05a4a37461c205 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:19:22 +0000 Subject: KVM: PPC: Pass init/destroy vm and prepare/commit memory region ops down This arranges for the top-level arch/powerpc/kvm/powerpc.c file to pass down some of the calls it gets to the lower-level subarchitecture specific code. The lower-level implementations (in booke.c and book3s.c) are no-ops. The coming book3s_hv.c will need this. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 7 +++++++ arch/powerpc/kvm/book3s_pr.c | 20 ++++++++++++++++++++ arch/powerpc/kvm/booke.c | 20 ++++++++++++++++++++ arch/powerpc/kvm/powerpc.c | 9 ++++++--- 4 files changed, 53 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c662f140283a..9b6f3f99c5eb 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -111,6 +111,13 @@ extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu); extern void kvmppc_map_magic(struct kvm_vcpu *vcpu); +extern int kvmppc_core_init_vm(struct kvm *kvm); +extern void kvmppc_core_destroy_vm(struct kvm *kvm); +extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem); +extern void kvmppc_core_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index fcdc97eb9411..72b20b8749fa 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -984,6 +984,26 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return ret; } +int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) +{ + return 0; +} + +void kvmppc_core_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) +{ +} + +int kvmppc_core_init_vm(struct kvm *kvm) +{ + return 0; +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ +} + static int kvmppc_book3s_init(void) { int r; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 9f2e4a5e1c4d..9066325b23b2 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -865,6 +865,26 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return -ENOTSUPP; } +int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) +{ + return 0; +} + +void kvmppc_core_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) +{ +} + +int kvmppc_core_init_vm(struct kvm *kvm) +{ + return 0; +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ +} + int __init kvmppc_booke_init(void) { unsigned long ivor[16]; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 24e2b64b6a48..0c80e159c138 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -148,7 +148,7 @@ void kvm_arch_check_processor_compat(void *rtn) int kvm_arch_init_vm(struct kvm *kvm) { - return 0; + return kvmppc_core_init_vm(kvm); } void kvm_arch_destroy_vm(struct kvm *kvm) @@ -164,6 +164,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm->vcpus[i] = NULL; atomic_set(&kvm->online_vcpus, 0); + + kvmppc_core_destroy_vm(kvm); + mutex_unlock(&kvm->lock); } @@ -212,7 +215,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, int user_alloc) { - return 0; + return kvmppc_core_prepare_memory_region(kvm, mem); } void kvm_arch_commit_memory_region(struct kvm *kvm, @@ -220,7 +223,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_memory_slot old, int user_alloc) { - return; + kvmppc_core_commit_memory_region(kvm, mem); } -- cgit v1.2.3 From df6909e5d52f67be01862c5cb453e509aee661f1 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:19:50 +0000 Subject: KVM: PPC: Move guest enter/exit down into subarch-specific code Instead of doing the kvm_guest_enter/exit() and local_irq_dis/enable() calls in powerpc.c, this moves them down into the subarch-specific book3s_pr.c and booke.c. This eliminates an extra local_irq_enable() call in book3s_pr.c, and will be needed for when we do SMT4 guest support in the book3s hypervisor mode code. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/book3s_interrupts.S | 2 +- arch/powerpc/kvm/book3s_pr.c | 12 ++++++------ arch/powerpc/kvm/booke.c | 13 +++++++++++++ arch/powerpc/kvm/powerpc.c | 6 +----- 5 files changed, 22 insertions(+), 12 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9b6f3f99c5eb..48b7ab76de2d 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -42,6 +42,7 @@ enum emulation_result { EMULATE_AGAIN, /* something went wrong. go again */ }; +extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern char kvmppc_handlers_start[]; extern unsigned long kvmppc_handler_len; diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index 2f0bc928b08a..8c5e0e160107 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -85,7 +85,7 @@ * r3: kvm_run pointer * r4: vcpu pointer */ -_GLOBAL(__kvmppc_vcpu_entry) +_GLOBAL(__kvmppc_vcpu_run) kvm_start_entry: /* Write correct stack frame */ diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 72b20b8749fa..0c0d3f274437 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -891,8 +891,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) vfree(vcpu_book3s); } -extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); -int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { int ret; double fpr[32][TS_FPRWIDTH]; @@ -944,14 +943,15 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) /* Remember the MSR with disabled extensions */ ext_msr = current->thread.regs->msr; - /* XXX we get called with irq disabled - change that! */ - local_irq_enable(); - /* Preload FPU if it's enabled */ if (vcpu->arch.shared->msr & MSR_FP) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); - ret = __kvmppc_vcpu_entry(kvm_run, vcpu); + kvm_guest_enter(); + + ret = __kvmppc_vcpu_run(kvm_run, vcpu); + + kvm_guest_exit(); local_irq_disable(); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 9066325b23b2..ee45fa01220e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -312,6 +312,19 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) vcpu->arch.shared->int_pending = 0; } +int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +{ + int ret; + + local_irq_disable(); + kvm_guest_enter(); + ret = __kvmppc_vcpu_run(kvm_run, vcpu); + kvm_guest_exit(); + local_irq_enable(); + + return ret; +} + /** * kvmppc_handle_exit * diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0c80e159c138..026036efcde0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -500,11 +500,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvmppc_core_deliver_interrupts(vcpu); - local_irq_disable(); - kvm_guest_enter(); - r = __kvmppc_vcpu_run(run, vcpu); - kvm_guest_exit(); - local_irq_enable(); + r = kvmppc_vcpu_run(run, vcpu); if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); -- cgit v1.2.3 From 923c53caea446d246949c94703be83e68f251af7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:20:24 +0000 Subject: powerpc: Set up LPCR for running guest partitions In hypervisor mode, the LPCR controls several aspects of guest partitions, including virtual partition memory mode, and also controls whether the hypervisor decrementer interrupts are enabled. This sets up LPCR at boot time so that guest partitions will use a virtual real memory area (VRMA) composed of 16MB large pages, and hypervisor decrementer interrupts are disabled. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/reg.h | 4 ++++ arch/powerpc/kernel/cpu_setup_power7.S | 18 +++++++++++------- 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c5cae0dd176c..d879a6b91635 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -232,10 +232,12 @@ #define LPCR_VPM0 (1ul << (63-0)) #define LPCR_VPM1 (1ul << (63-1)) #define LPCR_ISL (1ul << (63-2)) +#define LPCR_VC_SH (63-2) #define LPCR_DPFD_SH (63-11) #define LPCR_VRMA_L (1ul << (63-12)) #define LPCR_VRMA_LP0 (1ul << (63-15)) #define LPCR_VRMA_LP1 (1ul << (63-16)) +#define LPCR_VRMASD_SH (63-16) #define LPCR_RMLS 0x1C000000 /* impl dependent rmo limit sel */ #define LPCR_ILE 0x02000000 /* !HV irqs set MSR:LE */ #define LPCR_PECE 0x00007000 /* powersave exit cause enable */ @@ -243,8 +245,10 @@ #define LPCR_PECE1 0x00002000 /* decrementer can cause exit */ #define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ #define LPCR_MER 0x00000800 /* Mediated External Exception */ +#define LPCR_LPES 0x0000000c #define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ #define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ +#define LPCR_LPES_SH 2 #define LPCR_RMI 0x00000002 /* real mode is cache inhibit */ #define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */ #define SPRN_LPID 0x13F /* Logical Partition Identifier */ diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S index 4f9a93fcfe07..2ef6749688e9 100644 --- a/arch/powerpc/kernel/cpu_setup_power7.S +++ b/arch/powerpc/kernel/cpu_setup_power7.S @@ -61,19 +61,23 @@ __init_LPCR: * LPES = 0b01 (HSRR0/1 used for 0x500) * PECE = 0b111 * DPFD = 4 + * HDICE = 0 + * VC = 0b100 (VPM0=1, VPM1=0, ISL=0) + * VRMASD = 0b10000 (L=1, LP=00) * * Other bits untouched for now */ mfspr r3,SPRN_LPCR - ori r3,r3,(LPCR_LPES0|LPCR_LPES1) - xori r3,r3, LPCR_LPES0 + li r5,1 + rldimi r3,r5, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) - li r5,7 - sldi r5,r5,LPCR_DPFD_SH - andc r3,r3,r5 li r5,4 - sldi r5,r5,LPCR_DPFD_SH - or r3,r3,r5 + rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3 + clrrdi r3,r3,1 /* clear HDICE */ + li r5,4 + rldimi r3,r5, LPCR_VC_SH, 0 + li r5,0x10 + rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 mtspr SPRN_LPCR,r3 isync blr -- cgit v1.2.3 From 3c42bf8a717cb636e0ed2ed77194669e2ac3ed56 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:20:58 +0000 Subject: KVM: PPC: Split host-state fields out of kvmppc_book3s_shadow_vcpu There are several fields in struct kvmppc_book3s_shadow_vcpu that temporarily store bits of host state while a guest is running, rather than anything relating to the particular guest or vcpu. This splits them out into a new kvmppc_host_state structure and modifies the definitions in asm-offsets.c to suit. On 32-bit, we have a kvmppc_host_state structure inside the kvmppc_book3s_shadow_vcpu since the assembly code needs to be able to get to them both with one pointer. On 64-bit they are separate fields in the PACA. This means that on 64-bit we don't need to copy the kvmppc_host_state in and out on vcpu load/unload, and in future will mean that the book3s_hv code doesn't need a shadow_vcpu struct in the PACA at all. That does mean that we have to be careful not to rely on any values persisting in the hstate field of the paca across any point where we could block or get preempted. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/exception-64s.h | 10 ++-- arch/powerpc/include/asm/kvm_book3s_asm.h | 27 ++++++--- arch/powerpc/include/asm/paca.h | 1 + arch/powerpc/kernel/asm-offsets.c | 94 +++++++++++++++---------------- arch/powerpc/kernel/exceptions-64s.S | 2 +- arch/powerpc/kvm/book3s_interrupts.S | 19 ++----- arch/powerpc/kvm/book3s_rmhandlers.S | 18 +++--- arch/powerpc/kvm/book3s_segment.S | 76 +++++++++++++------------ 8 files changed, 127 insertions(+), 120 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index b6a3a443fbde..296c9b66c04a 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -96,16 +96,16 @@ EXCEPTION_PROLOG_PSERIES_1(label, h); #define __KVMTEST(n) \ - lbz r10,PACA_KVM_SVCPU+SVCPU_IN_GUEST(r13); \ + lbz r10,HSTATE_IN_GUEST(r13); \ cmpwi r10,0; \ bne do_kvm_##n #define __KVM_HANDLER(area, h, n) \ do_kvm_##n: \ ld r10,area+EX_R10(r13); \ - stw r9,PACA_KVM_SVCPU+SVCPU_SCRATCH1(r13); \ + stw r9,HSTATE_SCRATCH1(r13); \ ld r9,area+EX_R9(r13); \ - std r12,PACA_KVM_SVCPU+SVCPU_SCRATCH0(r13); \ + std r12,HSTATE_SCRATCH0(r13); \ li r12,n; \ b kvmppc_interrupt @@ -114,9 +114,9 @@ do_kvm_##n: \ cmpwi r10,KVM_GUEST_MODE_SKIP; \ ld r10,area+EX_R10(r13); \ beq 89f; \ - stw r9,PACA_KVM_SVCPU+SVCPU_SCRATCH1(r13); \ + stw r9,HSTATE_SCRATCH1(r13); \ ld r9,area+EX_R9(r13); \ - std r12,PACA_KVM_SVCPU+SVCPU_SCRATCH0(r13); \ + std r12,HSTATE_SCRATCH0(r13); \ li r12,n; \ b kvmppc_interrupt; \ 89: mtocrf 0x80,r9; \ diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index d5a8a3861635..312617529864 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -60,6 +60,22 @@ kvmppc_resume_\intno: #else /*__ASSEMBLY__ */ +/* + * This struct goes in the PACA on 64-bit processors. It is used + * to store host state that needs to be saved when we enter a guest + * and restored when we exit, but isn't specific to any particular + * guest or vcpu. It also has some scratch fields used by the guest + * exit code. + */ +struct kvmppc_host_state { + ulong host_r1; + ulong host_r2; + ulong vmhandler; + ulong scratch0; + ulong scratch1; + u8 in_guest; +}; + struct kvmppc_book3s_shadow_vcpu { ulong gpr[14]; u32 cr; @@ -73,17 +89,12 @@ struct kvmppc_book3s_shadow_vcpu { ulong shadow_srr1; ulong fault_dar; - ulong host_r1; - ulong host_r2; - ulong handler; - ulong scratch0; - ulong scratch1; - ulong vmhandler; - u8 in_guest; - #ifdef CONFIG_PPC_BOOK3S_32 u32 sr[16]; /* Guest SRs */ + + struct kvmppc_host_state hstate; #endif + #ifdef CONFIG_PPC_BOOK3S_64 u8 slb_max; /* highest used guest slb entry */ struct { diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 74126765106a..58f4a18ef60c 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -149,6 +149,7 @@ struct paca_struct { #ifdef CONFIG_KVM_BOOK3S_HANDLER /* We use this to store guest state in */ struct kvmppc_book3s_shadow_vcpu shadow_vcpu; + struct kvmppc_host_state kvm_hstate; #endif }; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index faf846131f45..dabfb7346f36 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -198,11 +198,6 @@ int main(void) DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER - DEFINE(PACA_KVM_SVCPU, offsetof(struct paca_struct, shadow_vcpu)); - DEFINE(SVCPU_SLB, offsetof(struct kvmppc_book3s_shadow_vcpu, slb)); - DEFINE(SVCPU_SLB_MAX, offsetof(struct kvmppc_book3s_shadow_vcpu, slb_max)); -#endif #endif /* CONFIG_PPC64 */ /* RTAS */ @@ -416,49 +411,54 @@ int main(void) DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); - DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - - offsetof(struct kvmppc_vcpu_book3s, vcpu)); - DEFINE(SVCPU_CR, offsetof(struct kvmppc_book3s_shadow_vcpu, cr)); - DEFINE(SVCPU_XER, offsetof(struct kvmppc_book3s_shadow_vcpu, xer)); - DEFINE(SVCPU_CTR, offsetof(struct kvmppc_book3s_shadow_vcpu, ctr)); - DEFINE(SVCPU_LR, offsetof(struct kvmppc_book3s_shadow_vcpu, lr)); - DEFINE(SVCPU_PC, offsetof(struct kvmppc_book3s_shadow_vcpu, pc)); - DEFINE(SVCPU_R0, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[0])); - DEFINE(SVCPU_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[1])); - DEFINE(SVCPU_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[2])); - DEFINE(SVCPU_R3, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[3])); - DEFINE(SVCPU_R4, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[4])); - DEFINE(SVCPU_R5, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[5])); - DEFINE(SVCPU_R6, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[6])); - DEFINE(SVCPU_R7, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[7])); - DEFINE(SVCPU_R8, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[8])); - DEFINE(SVCPU_R9, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[9])); - DEFINE(SVCPU_R10, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[10])); - DEFINE(SVCPU_R11, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[11])); - DEFINE(SVCPU_R12, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[12])); - DEFINE(SVCPU_R13, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[13])); - DEFINE(SVCPU_HOST_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r1)); - DEFINE(SVCPU_HOST_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r2)); - DEFINE(SVCPU_VMHANDLER, offsetof(struct kvmppc_book3s_shadow_vcpu, - vmhandler)); - DEFINE(SVCPU_SCRATCH0, offsetof(struct kvmppc_book3s_shadow_vcpu, - scratch0)); - DEFINE(SVCPU_SCRATCH1, offsetof(struct kvmppc_book3s_shadow_vcpu, - scratch1)); - DEFINE(SVCPU_IN_GUEST, offsetof(struct kvmppc_book3s_shadow_vcpu, - in_guest)); - DEFINE(SVCPU_FAULT_DSISR, offsetof(struct kvmppc_book3s_shadow_vcpu, - fault_dsisr)); - DEFINE(SVCPU_FAULT_DAR, offsetof(struct kvmppc_book3s_shadow_vcpu, - fault_dar)); - DEFINE(SVCPU_LAST_INST, offsetof(struct kvmppc_book3s_shadow_vcpu, - last_inst)); - DEFINE(SVCPU_SHADOW_SRR1, offsetof(struct kvmppc_book3s_shadow_vcpu, - shadow_srr1)); + +#ifdef CONFIG_PPC_BOOK3S_64 +# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f)) +# define HSTATE_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, kvm_hstate.f)) +#else /* 32-bit */ +# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, f)) +# define HSTATE_FIELD(x, f) DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, hstate.f)) +#endif + + SVCPU_FIELD(SVCPU_CR, cr); + SVCPU_FIELD(SVCPU_XER, xer); + SVCPU_FIELD(SVCPU_CTR, ctr); + SVCPU_FIELD(SVCPU_LR, lr); + SVCPU_FIELD(SVCPU_PC, pc); + SVCPU_FIELD(SVCPU_R0, gpr[0]); + SVCPU_FIELD(SVCPU_R1, gpr[1]); + SVCPU_FIELD(SVCPU_R2, gpr[2]); + SVCPU_FIELD(SVCPU_R3, gpr[3]); + SVCPU_FIELD(SVCPU_R4, gpr[4]); + SVCPU_FIELD(SVCPU_R5, gpr[5]); + SVCPU_FIELD(SVCPU_R6, gpr[6]); + SVCPU_FIELD(SVCPU_R7, gpr[7]); + SVCPU_FIELD(SVCPU_R8, gpr[8]); + SVCPU_FIELD(SVCPU_R9, gpr[9]); + SVCPU_FIELD(SVCPU_R10, gpr[10]); + SVCPU_FIELD(SVCPU_R11, gpr[11]); + SVCPU_FIELD(SVCPU_R12, gpr[12]); + SVCPU_FIELD(SVCPU_R13, gpr[13]); + SVCPU_FIELD(SVCPU_FAULT_DSISR, fault_dsisr); + SVCPU_FIELD(SVCPU_FAULT_DAR, fault_dar); + SVCPU_FIELD(SVCPU_LAST_INST, last_inst); + SVCPU_FIELD(SVCPU_SHADOW_SRR1, shadow_srr1); #ifdef CONFIG_PPC_BOOK3S_32 - DEFINE(SVCPU_SR, offsetof(struct kvmppc_book3s_shadow_vcpu, sr)); + SVCPU_FIELD(SVCPU_SR, sr); #endif -#else +#ifdef CONFIG_PPC64 + SVCPU_FIELD(SVCPU_SLB, slb); + SVCPU_FIELD(SVCPU_SLB_MAX, slb_max); +#endif + + HSTATE_FIELD(HSTATE_HOST_R1, host_r1); + HSTATE_FIELD(HSTATE_HOST_R2, host_r2); + HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler); + HSTATE_FIELD(HSTATE_SCRATCH0, scratch0); + HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); + HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); + +#else /* CONFIG_PPC_BOOK3S */ DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); @@ -468,7 +468,7 @@ int main(void) DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); #endif /* CONFIG_PPC_BOOK3S */ -#endif +#endif /* CONFIG_KVM */ #ifdef CONFIG_KVM_GUEST DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared, diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e76472cbf3b5..6da00550afea 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -298,7 +298,7 @@ data_access_check_stab: srdi r10,r10,60 rlwimi r10,r9,16,0x20 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER - lbz r9,PACA_KVM_SVCPU+SVCPU_IN_GUEST(r13) + lbz r9,HSTATE_IN_GUEST(r13) rlwimi r10,r9,8,0x300 #endif mfcr r9 diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index 8c5e0e160107..c54b0e30cf3f 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -29,8 +29,7 @@ #define ULONG_SIZE 8 #define FUNC(name) GLUE(.,name) -#define GET_SHADOW_VCPU(reg) \ - addi reg, r13, PACA_KVM_SVCPU +#define GET_SHADOW_VCPU_R13 #define DISABLE_INTERRUPTS \ mfmsr r0; \ @@ -43,8 +42,8 @@ #define ULONG_SIZE 4 #define FUNC(name) name -#define GET_SHADOW_VCPU(reg) \ - lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2) +#define GET_SHADOW_VCPU_R13 \ + lwz r13, (THREAD + THREAD_KVM_SVCPU)(r2) #define DISABLE_INTERRUPTS \ mfmsr r0; \ @@ -107,17 +106,11 @@ kvm_start_entry: /* Load non-volatile guest state from the vcpu */ VCPU_LOAD_NVGPRS(r4) - GET_SHADOW_VCPU(r5) - - /* Save R1/R2 in the PACA */ - PPC_STL r1, SVCPU_HOST_R1(r5) - PPC_STL r2, SVCPU_HOST_R2(r5) +kvm_start_lightweight: - /* XXX swap in/out on load? */ + GET_SHADOW_VCPU_R13 PPC_LL r3, VCPU_HIGHMEM_HANDLER(r4) - PPC_STL r3, SVCPU_VMHANDLER(r5) - -kvm_start_lightweight: + PPC_STL r3, HSTATE_VMHANDLER(r13) PPC_LL r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */ diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index dd03689fc609..c1f877c4a884 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -36,7 +36,6 @@ #if defined(CONFIG_PPC_BOOK3S_64) #define LOAD_SHADOW_VCPU(reg) GET_PACA(reg) -#define SHADOW_VCPU_OFF PACA_KVM_SVCPU #define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR) #define FUNC(name) GLUE(.,name) @@ -66,7 +65,6 @@ kvmppc_skip_Hinterrupt: #elif defined(CONFIG_PPC_BOOK3S_32) -#define SHADOW_VCPU_OFF 0 #define MSR_NOIRQ MSR_KERNEL #define FUNC(name) name @@ -96,14 +94,14 @@ kvmppc_trampoline_\intno: b kvmppc_resume_\intno /* Get back original handler */ 1: tophys(r13, r13) - stw r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) + stw r12, HSTATE_SCRATCH1(r13) mfspr r12, SPRN_SPRG_SCRATCH1 - stw r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) - lbz r12, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) + stw r12, HSTATE_SCRATCH0(r13) + lbz r12, HSTATE_IN_GUEST(r13) cmpwi r12, KVM_GUEST_MODE_NONE bne ..kvmppc_handler_hasmagic_\intno /* No KVM guest? Then jump back to the Linux handler! */ - lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) + lwz r12, HSTATE_SCRATCH1(r13) b 2b /* Now we know we're handling a KVM guest */ @@ -146,8 +144,8 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC * * R12 = free * R13 = Shadow VCPU (PACA) - * SVCPU.SCRATCH0 = guest R12 - * SVCPU.SCRATCH1 = guest CR + * HSTATE.SCRATCH0 = guest R12 + * HSTATE.SCRATCH1 = guest CR * SPRG_SCRATCH0 = guest R13 * */ @@ -159,9 +157,9 @@ kvmppc_handler_skip_ins: mtsrr0 r12 /* Clean up all state */ - lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) + lwz r12, HSTATE_SCRATCH1(r13) mtcr r12 - PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) + PPC_LL r12, HSTATE_SCRATCH0(r13) GET_SCRATCH0(r13) /* And get back into the code */ diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 4a623eb28a53..1cc25e8c0cf1 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -22,7 +22,7 @@ #if defined(CONFIG_PPC_BOOK3S_64) #define GET_SHADOW_VCPU(reg) \ - addi reg, r13, PACA_KVM_SVCPU + mr reg, r13 #elif defined(CONFIG_PPC_BOOK3S_32) @@ -71,6 +71,10 @@ kvmppc_handler_trampoline_enter: /* r3 = shadow vcpu */ GET_SHADOW_VCPU(r3) + /* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */ + PPC_STL r1, HSTATE_HOST_R1(r3) + PPC_STL r2, HSTATE_HOST_R2(r3) + /* Move SRR0 and SRR1 into the respective regs */ PPC_LL r9, SVCPU_PC(r3) mtsrr0 r9 @@ -78,7 +82,7 @@ kvmppc_handler_trampoline_enter: /* Activate guest mode, so faults get handled by KVM */ li r11, KVM_GUEST_MODE_GUEST - stb r11, SVCPU_IN_GUEST(r3) + stb r11, HSTATE_IN_GUEST(r3) /* Switch to guest segment. This is subarch specific. */ LOAD_GUEST_SEGMENTS @@ -132,30 +136,30 @@ kvmppc_interrupt: * * SPRG_SCRATCH0 = guest R13 * R12 = exit handler id - * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64] - * SVCPU.SCRATCH0 = guest R12 - * SVCPU.SCRATCH1 = guest CR + * R13 = shadow vcpu (32-bit) or PACA (64-bit) + * HSTATE.SCRATCH0 = guest R12 + * HSTATE.SCRATCH1 = guest CR * */ /* Save registers */ - PPC_STL r0, (SHADOW_VCPU_OFF + SVCPU_R0)(r13) - PPC_STL r1, (SHADOW_VCPU_OFF + SVCPU_R1)(r13) - PPC_STL r2, (SHADOW_VCPU_OFF + SVCPU_R2)(r13) - PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_R3)(r13) - PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_R4)(r13) - PPC_STL r5, (SHADOW_VCPU_OFF + SVCPU_R5)(r13) - PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_R6)(r13) - PPC_STL r7, (SHADOW_VCPU_OFF + SVCPU_R7)(r13) - PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R8)(r13) - PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R9)(r13) - PPC_STL r10, (SHADOW_VCPU_OFF + SVCPU_R10)(r13) - PPC_STL r11, (SHADOW_VCPU_OFF + SVCPU_R11)(r13) + PPC_STL r0, SVCPU_R0(r13) + PPC_STL r1, SVCPU_R1(r13) + PPC_STL r2, SVCPU_R2(r13) + PPC_STL r3, SVCPU_R3(r13) + PPC_STL r4, SVCPU_R4(r13) + PPC_STL r5, SVCPU_R5(r13) + PPC_STL r6, SVCPU_R6(r13) + PPC_STL r7, SVCPU_R7(r13) + PPC_STL r8, SVCPU_R8(r13) + PPC_STL r9, SVCPU_R9(r13) + PPC_STL r10, SVCPU_R10(r13) + PPC_STL r11, SVCPU_R11(r13) /* Restore R1/R2 so we can handle faults */ - PPC_LL r1, (SHADOW_VCPU_OFF + SVCPU_HOST_R1)(r13) - PPC_LL r2, (SHADOW_VCPU_OFF + SVCPU_HOST_R2)(r13) + PPC_LL r1, HSTATE_HOST_R1(r13) + PPC_LL r2, HSTATE_HOST_R2(r13) /* Save guest PC and MSR */ #ifdef CONFIG_PPC64 @@ -171,17 +175,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) 1: mfsrr0 r3 mfsrr1 r4 2: - PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_PC)(r13) - PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_SHADOW_SRR1)(r13) + PPC_STL r3, SVCPU_PC(r13) + PPC_STL r4, SVCPU_SHADOW_SRR1(r13) /* Get scratch'ed off registers */ GET_SCRATCH0(r9) - PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) - lwz r7, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) + PPC_LL r8, HSTATE_SCRATCH0(r13) + lwz r7, HSTATE_SCRATCH1(r13) - PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R13)(r13) - PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R12)(r13) - stw r7, (SHADOW_VCPU_OFF + SVCPU_CR)(r13) + PPC_STL r9, SVCPU_R13(r13) + PPC_STL r8, SVCPU_R12(r13) + stw r7, SVCPU_CR(r13) /* Save more register state */ @@ -191,11 +195,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) mfctr r8 mflr r9 - stw r5, (SHADOW_VCPU_OFF + SVCPU_XER)(r13) - PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_FAULT_DAR)(r13) - stw r7, (SHADOW_VCPU_OFF + SVCPU_FAULT_DSISR)(r13) - PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_CTR)(r13) - PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_LR)(r13) + stw r5, SVCPU_XER(r13) + PPC_STL r6, SVCPU_FAULT_DAR(r13) + stw r7, SVCPU_FAULT_DSISR(r13) + PPC_STL r8, SVCPU_CTR(r13) + PPC_STL r9, SVCPU_LR(r13) /* * In order for us to easily get the last instruction, @@ -225,7 +229,7 @@ ld_last_inst: /* Set guest mode to 'jump over instruction' so if lwz faults * we'll just continue at the next IP. */ li r9, KVM_GUEST_MODE_SKIP - stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) + stb r9, HSTATE_IN_GUEST(r13) /* 1) enable paging for data */ mfmsr r9 @@ -239,13 +243,13 @@ ld_last_inst: sync #endif - stw r0, (SHADOW_VCPU_OFF + SVCPU_LAST_INST)(r13) + stw r0, SVCPU_LAST_INST(r13) no_ld_last_inst: /* Unset guest mode */ li r9, KVM_GUEST_MODE_NONE - stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) + stb r9, HSTATE_IN_GUEST(r13) /* Switch back to host MMU */ LOAD_HOST_SEGMENTS @@ -255,7 +259,7 @@ no_ld_last_inst: * R1 = host R1 * R2 = host R2 * R12 = exit handler id - * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64] + * R13 = shadow vcpu (32-bit) or PACA (64-bit) * SVCPU.* = guest * * */ @@ -265,7 +269,7 @@ no_ld_last_inst: ori r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME /* Enable paging */ mtsrr1 r7 /* Load highmem handler address */ - PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_VMHANDLER)(r13) + PPC_LL r8, HSTATE_VMHANDLER(r13) mtsrr0 r8 RFI -- cgit v1.2.3 From de56a948b9182fbcf92cb8212f114de096c2d574 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:21:34 +0000 Subject: KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 17 + arch/powerpc/include/asm/exception-64s.h | 19 +- arch/powerpc/include/asm/kvm_asm.h | 4 + arch/powerpc/include/asm/kvm_book3s.h | 137 ++++- arch/powerpc/include/asm/kvm_book3s_64.h | 2 + arch/powerpc/include/asm/kvm_book3s_asm.h | 12 + arch/powerpc/include/asm/kvm_booke.h | 4 + arch/powerpc/include/asm/kvm_host.h | 53 +- arch/powerpc/include/asm/kvm_ppc.h | 6 + arch/powerpc/include/asm/mmu-hash64.h | 10 +- arch/powerpc/include/asm/paca.h | 2 + arch/powerpc/include/asm/reg.h | 4 + arch/powerpc/kernel/asm-offsets.c | 79 +++ arch/powerpc/kernel/exceptions-64s.S | 60 +-- arch/powerpc/kernel/process.c | 3 + arch/powerpc/kernel/setup-common.c | 3 + arch/powerpc/kernel/smp.c | 1 + arch/powerpc/kvm/Kconfig | 37 +- arch/powerpc/kvm/Makefile | 17 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 258 ++++++++++ arch/powerpc/kvm/book3s_exports.c | 5 + arch/powerpc/kvm/book3s_hv.c | 445 +++++++++++++++++ arch/powerpc/kvm/book3s_hv_interrupts.S | 136 +++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 806 ++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_segment.S | 34 +- arch/powerpc/kvm/powerpc.c | 22 +- arch/powerpc/kvm/trace.h | 2 +- include/linux/kvm.h | 6 + 28 files changed, 2103 insertions(+), 81 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_64_mmu_hv.c create mode 100644 arch/powerpc/kvm/book3s_hv.c create mode 100644 arch/powerpc/kvm/book3s_hv_interrupts.S create mode 100644 arch/powerpc/kvm/book3s_hv_rmhandlers.S (limited to 'arch/powerpc') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index b2511360b8f5..e8875fef3eb8 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1532,6 +1532,23 @@ Userspace can now handle the hypercall and when it's done modify the gprs as necessary. Upon guest entry all guest GPRs will then be replaced by the values in this struct. + /* KVM_EXIT_PAPR_HCALL */ + struct { + __u64 nr; + __u64 ret; + __u64 args[9]; + } papr_hcall; + +This is used on 64-bit PowerPC when emulating a pSeries partition, +e.g. with the 'pseries' machine type in qemu. It occurs when the +guest does a hypercall using the 'sc 1' instruction. The 'nr' field +contains the hypercall number (from the guest R3), and 'args' contains +the arguments (from the guest R4 - R12). Userspace should put the +return code in 'ret' and any extra returned values in args[]. +The possible hypercalls are defined in the Power Architecture Platform +Requirements (PAPR) document available from www.power.org (free +developer registration required to access it). + /* Fix the size of the union. */ char padding[256]; }; diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 296c9b66c04a..69435da8f2ba 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -134,6 +134,17 @@ do_kvm_##n: \ #define KVM_HANDLER_SKIP(area, h, n) #endif +#ifdef CONFIG_KVM_BOOK3S_PR +#define KVMTEST_PR(n) __KVMTEST(n) +#define KVM_HANDLER_PR(area, h, n) __KVM_HANDLER(area, h, n) +#define KVM_HANDLER_PR_SKIP(area, h, n) __KVM_HANDLER_SKIP(area, h, n) + +#else +#define KVMTEST_PR(n) +#define KVM_HANDLER_PR(area, h, n) +#define KVM_HANDLER_PR_SKIP(area, h, n) +#endif + #define NOTEST(n) /* @@ -210,7 +221,7 @@ label##_pSeries: \ HMT_MEDIUM; \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ - EXC_STD, KVMTEST, vec) + EXC_STD, KVMTEST_PR, vec) #define STD_EXCEPTION_HV(loc, vec, label) \ . = loc; \ @@ -227,8 +238,8 @@ label##_hv: \ beq masked_##h##interrupt #define _SOFTEN_TEST(h) __SOFTEN_TEST(h) -#define SOFTEN_TEST(vec) \ - KVMTEST(vec); \ +#define SOFTEN_TEST_PR(vec) \ + KVMTEST_PR(vec); \ _SOFTEN_TEST(EXC_STD) #define SOFTEN_TEST_HV(vec) \ @@ -248,7 +259,7 @@ label##_hv: \ .globl label##_pSeries; \ label##_pSeries: \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ - EXC_STD, SOFTEN_TEST) + EXC_STD, SOFTEN_TEST_PR) #define MASKABLE_EXCEPTION_HV(loc, vec, label) \ . = loc; \ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 0951b17f4eb5..7b1f0e0fc653 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -64,8 +64,12 @@ #define BOOK3S_INTERRUPT_PROGRAM 0x700 #define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800 #define BOOK3S_INTERRUPT_DECREMENTER 0x900 +#define BOOK3S_INTERRUPT_HV_DECREMENTER 0x980 #define BOOK3S_INTERRUPT_SYSCALL 0xc00 #define BOOK3S_INTERRUPT_TRACE 0xd00 +#define BOOK3S_INTERRUPT_H_DATA_STORAGE 0xe00 +#define BOOK3S_INTERRUPT_H_INST_STORAGE 0xe20 +#define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40 #define BOOK3S_INTERRUPT_PERFMON 0xf00 #define BOOK3S_INTERRUPT_ALTIVEC 0xf20 #define BOOK3S_INTERRUPT_VSX 0xf40 diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 480fff6090db..5537c45d626c 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -116,6 +116,7 @@ extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr); extern void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr); extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); +extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu); extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); @@ -127,10 +128,12 @@ extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte); extern int kvmppc_mmu_hpte_sysinit(void); extern void kvmppc_mmu_hpte_sysexit(void); +extern int kvmppc_mmu_hv_init(void); extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); +extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags); extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); @@ -140,6 +143,7 @@ extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); extern void kvmppc_handler_lowmem_trampoline(void); extern void kvmppc_handler_trampoline_enter(void); extern void kvmppc_rmcall(ulong srr0, ulong srr1); +extern void kvmppc_hv_entry_trampoline(void); extern void kvmppc_load_up_fpu(void); extern void kvmppc_load_up_altivec(void); extern void kvmppc_load_up_vsx(void); @@ -151,6 +155,19 @@ static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) return container_of(vcpu, struct kvmppc_vcpu_book3s, vcpu); } +extern void kvm_return_point(void); + +/* Also add subarch specific defines */ + +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER +#include +#endif +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +#include +#endif + +#ifdef CONFIG_KVM_BOOK3S_PR + static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) { return to_book3s(vcpu)->hior; @@ -165,16 +182,6 @@ static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, vcpu->arch.shared->int_pending = 0; } -static inline ulong dsisr(void) -{ - ulong r; - asm ( "mfdsisr %0 " : "=r" (r) ); - return r; -} - -extern void kvm_return_point(void); -static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu); - static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { if ( num < 14 ) { @@ -281,6 +288,108 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) return crit; } +#else /* CONFIG_KVM_BOOK3S_PR */ + +static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, + unsigned long pending_now, unsigned long old_pending) +{ + /* Recalculate LPCR:MER based on the presence of + * a pending external interrupt + */ + if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &pending_now) || + test_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &pending_now)) + vcpu->arch.lpcr |= LPCR_MER; + else + vcpu->arch.lpcr &= ~((u64)LPCR_MER); +} + +static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) +{ + vcpu->arch.gpr[num] = val; +} + +static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) +{ + return vcpu->arch.gpr[num]; +} + +static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) +{ + vcpu->arch.cr = val; +} + +static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.cr; +} + +static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) +{ + vcpu->arch.xer = val; +} + +static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.xer; +} + +static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) +{ + vcpu->arch.ctr = val; +} + +static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.ctr; +} + +static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) +{ + vcpu->arch.lr = val; +} + +static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.lr; +} + +static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) +{ + vcpu->arch.pc = val; +} + +static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.pc; +} + +static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) +{ + ulong pc = kvmppc_get_pc(vcpu); + + /* Load the instruction manually if it failed to do so in the + * exit path */ + if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) + kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false); + + return vcpu->arch.last_inst; +} + +static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fault_dar; +} + +static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) +{ + return false; +} +#endif /* Magic register values loaded into r3 and r4 before the 'sc' assembly * instruction for the OSI hypercalls */ @@ -289,12 +398,4 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) #define INS_DCBZ 0x7c0007ec -/* Also add subarch specific defines */ - -#ifdef CONFIG_PPC_BOOK3S_32 -#include -#else -#include -#endif - #endif /* __ASM_KVM_BOOK3S_H__ */ diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 4cadd612d575..5f73388ea0af 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -20,9 +20,11 @@ #ifndef __ASM_KVM_BOOK3S_64_H__ #define __ASM_KVM_BOOK3S_64_H__ +#ifdef CONFIG_KVM_BOOK3S_PR static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu) { return &get_paca()->shadow_vcpu; } +#endif #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 312617529864..b7b039532fbc 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -70,10 +70,22 @@ kvmppc_resume_\intno: struct kvmppc_host_state { ulong host_r1; ulong host_r2; + ulong host_msr; ulong vmhandler; ulong scratch0; ulong scratch1; u8 in_guest; + +#ifdef CONFIG_KVM_BOOK3S_64_HV + struct kvm_vcpu *kvm_vcpu; + u64 dabr; + u64 host_mmcr[3]; + u32 host_pmc[6]; + u64 host_purr; + u64 host_spurr; + u64 host_dscr; + u64 dec_expires; +#endif }; struct kvmppc_book3s_shadow_vcpu { diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index 9c9ba3d59b1b..a90e09188777 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -93,4 +93,8 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) return vcpu->arch.fault_dear; } +static inline ulong kvmppc_get_msr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.shared->msr; +} #endif /* __ASM_KVM_BOOKE_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 069eb9fc6c41..4a3f790d5fc4 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -33,7 +33,9 @@ /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 +#ifdef CONFIG_KVM_MMIO #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +#endif /* We don't currently support large pages. */ #define KVM_HPAGE_GFN_SHIFT(x) 0 @@ -133,7 +135,26 @@ struct kvmppc_exit_timing { }; }; +struct kvmppc_pginfo { + unsigned long pfn; + atomic_t refcnt; +}; + struct kvm_arch { +#ifdef CONFIG_KVM_BOOK3S_64_HV + unsigned long hpt_virt; + unsigned long ram_npages; + unsigned long ram_psize; + unsigned long ram_porder; + struct kvmppc_pginfo *ram_pginfo; + unsigned int lpid; + unsigned int host_lpid; + unsigned long host_lpcr; + unsigned long sdr1; + unsigned long host_sdr1; + int tlbie_lock; + unsigned short last_vcpu[NR_CPUS]; +#endif /* CONFIG_KVM_BOOK3S_64_HV */ }; struct kvmppc_pte { @@ -190,7 +211,7 @@ struct kvm_vcpu_arch { ulong rmcall; ulong host_paca_phys; struct kvmppc_slb slb[64]; - int slb_max; /* # valid entries in slb[] */ + int slb_max; /* 1 + index of last valid entry in slb[] */ int slb_nr; /* total number of entries in SLB */ struct kvmppc_mmu mmu; #endif @@ -212,7 +233,7 @@ struct kvm_vcpu_arch { #endif #ifdef CONFIG_VSX - u64 vsr[32]; + u64 vsr[64]; #endif #ifdef CONFIG_PPC_BOOK3S @@ -220,18 +241,24 @@ struct kvm_vcpu_arch { u32 qpr[32]; #endif -#ifdef CONFIG_BOOKE ulong pc; ulong ctr; ulong lr; ulong xer; u32 cr; -#endif #ifdef CONFIG_PPC_BOOK3S ulong hflags; ulong guest_owned_ext; + ulong purr; + ulong spurr; + ulong lpcr; + ulong dscr; + ulong amr; + ulong uamor; + u32 ctrl; + ulong dabr; #endif u32 vrsave; /* also USPRG0 */ u32 mmucr; @@ -270,6 +297,9 @@ struct kvm_vcpu_arch { u32 dbcr1; u32 dbsr; + u64 mmcr[3]; + u32 pmc[6]; + #ifdef CONFIG_KVM_EXIT_TIMING struct mutex exit_timing_lock; struct kvmppc_exit_timing timing_exit; @@ -284,8 +314,12 @@ struct kvm_vcpu_arch { struct dentry *debugfs_exit_timing; #endif +#ifdef CONFIG_PPC_BOOK3S + ulong fault_dar; + u32 fault_dsisr; +#endif + #ifdef CONFIG_BOOKE - u32 last_inst; ulong fault_dear; ulong fault_esr; ulong queued_dear; @@ -300,16 +334,25 @@ struct kvm_vcpu_arch { u8 dcr_is_write; u8 osi_needed; u8 osi_enabled; + u8 hcall_needed; u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ struct hrtimer dec_timer; struct tasklet_struct tasklet; u64 dec_jiffies; + u64 dec_expires; unsigned long pending_exceptions; + u16 last_cpu; + u32 last_inst; + int trap; struct kvm_vcpu_arch_shared *shared; unsigned long magic_page_pa; /* phys addr to map the magic page to */ unsigned long magic_page_ea; /* effect. addr to map the magic page to */ + +#ifdef CONFIG_KVM_BOOK3S_64_HV + struct kvm_vcpu_arch_shared shregs; +#endif }; #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 48b7ab76de2d..0dafd53c30ed 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -112,6 +112,12 @@ extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu); extern void kvmppc_map_magic(struct kvm_vcpu *vcpu); +extern long kvmppc_alloc_hpt(struct kvm *kvm); +extern void kvmppc_free_hpt(struct kvm *kvm); +extern long kvmppc_prepare_vrma(struct kvm *kvm, + struct kvm_userspace_memory_region *mem); +extern void kvmppc_map_vrma(struct kvm *kvm, + struct kvm_userspace_memory_region *mem); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index d865bd909c7d..b445e0af4c2b 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -90,13 +90,19 @@ extern char initial_stab[]; #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) #define HPTE_R_TS ASM_CONST(0x4000000000000000) +#define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) #define HPTE_R_RPN_SHIFT 12 -#define HPTE_R_RPN ASM_CONST(0x3ffffffffffff000) -#define HPTE_R_FLAGS ASM_CONST(0x00000000000003ff) +#define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) #define HPTE_R_PP ASM_CONST(0x0000000000000003) #define HPTE_R_N ASM_CONST(0x0000000000000004) +#define HPTE_R_G ASM_CONST(0x0000000000000008) +#define HPTE_R_M ASM_CONST(0x0000000000000010) +#define HPTE_R_I ASM_CONST(0x0000000000000020) +#define HPTE_R_W ASM_CONST(0x0000000000000040) +#define HPTE_R_WIMG ASM_CONST(0x0000000000000078) #define HPTE_R_C ASM_CONST(0x0000000000000080) #define HPTE_R_R ASM_CONST(0x0000000000000100) +#define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) #define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000) diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 58f4a18ef60c..a6da12859959 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -147,8 +147,10 @@ struct paca_struct { struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */ #ifdef CONFIG_KVM_BOOK3S_HANDLER +#ifdef CONFIG_KVM_BOOK3S_PR /* We use this to store guest state in */ struct kvmppc_book3s_shadow_vcpu shadow_vcpu; +#endif struct kvmppc_host_state kvm_hstate; #endif }; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index d879a6b91635..36a611b398c5 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -189,6 +189,9 @@ #define SPRN_CTR 0x009 /* Count Register */ #define SPRN_DSCR 0x11 #define SPRN_CFAR 0x1c /* Come From Address Register */ +#define SPRN_AMR 0x1d /* Authority Mask Register */ +#define SPRN_UAMOR 0x9d /* User Authority Mask Override Register */ +#define SPRN_AMOR 0x15d /* Authority Mask Override Register */ #define SPRN_ACOP 0x1F /* Available Coprocessor Register */ #define SPRN_CTRLF 0x088 #define SPRN_CTRLT 0x098 @@ -252,6 +255,7 @@ #define LPCR_RMI 0x00000002 /* real mode is cache inhibit */ #define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */ #define SPRN_LPID 0x13F /* Logical Partition Identifier */ +#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ #define SPRN_HMER 0x150 /* Hardware m? error recovery */ #define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ #define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index dabfb7346f36..936267462cae 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -187,6 +187,7 @@ int main(void) DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); + DEFINE(LPPACA_PMCINUSE, offsetof(struct lppaca, pmcregs_in_use)); DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx)); DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx)); #endif /* CONFIG_PPC_STD_MMU_64 */ @@ -392,6 +393,29 @@ int main(void) DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); + DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr)); + DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fpscr)); +#ifdef CONFIG_ALTIVEC + DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr)); + DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vscr)); +#endif +#ifdef CONFIG_VSX + DEFINE(VCPU_VSRS, offsetof(struct kvm_vcpu, arch.vsr)); +#endif + DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); + DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); + DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); + DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); + DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); +#ifdef CONFIG_KVM_BOOK3S_64_HV + DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr)); + DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0)); + DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1)); + DEFINE(VCPU_SPRG0, offsetof(struct kvm_vcpu, arch.shregs.sprg0)); + DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1)); + DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2)); + DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3)); +#endif DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); @@ -403,17 +427,60 @@ int main(void) DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); /* book3s */ +#ifdef CONFIG_KVM_BOOK3S_64_HV + DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); + DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); + DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); + DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); + DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); + DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock)); + DEFINE(KVM_ONLINE_CPUS, offsetof(struct kvm, online_vcpus.counter)); + DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu)); + DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); + DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); +#endif #ifdef CONFIG_PPC_BOOK3S + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); + DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); + DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); + DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr)); + DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr)); + DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); + DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl)); + DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr)); DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); + DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); + DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); + DEFINE(VCPU_LPCR, offsetof(struct kvm_vcpu, arch.lpcr)); + DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); + DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); + DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); + DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max)); + DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); + DEFINE(VCPU_LAST_CPU, offsetof(struct kvm_vcpu, arch.last_cpu)); + DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); + DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); + DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); + DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); + DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - + offsetof(struct kvmppc_vcpu_book3s, vcpu)); + DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); + DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); + DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); #ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_KVM_BOOK3S_PR # define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f)) +#else +# define SVCPU_FIELD(x, f) +#endif # define HSTATE_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, kvm_hstate.f)) #else /* 32-bit */ # define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, f)) @@ -453,11 +520,23 @@ int main(void) HSTATE_FIELD(HSTATE_HOST_R1, host_r1); HSTATE_FIELD(HSTATE_HOST_R2, host_r2); + HSTATE_FIELD(HSTATE_HOST_MSR, host_msr); HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler); HSTATE_FIELD(HSTATE_SCRATCH0, scratch0); HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); +#ifdef CONFIG_KVM_BOOK3S_64_HV + HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); + HSTATE_FIELD(HSTATE_MMCR, host_mmcr); + HSTATE_FIELD(HSTATE_PMC, host_pmc); + HSTATE_FIELD(HSTATE_PURR, host_purr); + HSTATE_FIELD(HSTATE_SPURR, host_spurr); + HSTATE_FIELD(HSTATE_DSCR, host_dscr); + HSTATE_FIELD(HSTATE_DABR, dabr); + HSTATE_FIELD(HSTATE_DECEXP, dec_expires); +#endif /* CONFIG_KVM_BOOK3S_64_HV */ + #else /* CONFIG_PPC_BOOK3S */ DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6da00550afea..163c041cec24 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -87,14 +87,14 @@ data_access_not_stab: END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) #endif EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD, - KVMTEST, 0x300) + KVMTEST_PR, 0x300) . = 0x380 .globl data_access_slb_pSeries data_access_slb_pSeries: HMT_MEDIUM SET_SCRATCH0(r13) - EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR #ifdef __DISABLED__ @@ -125,7 +125,7 @@ data_access_slb_pSeries: instruction_access_slb_pSeries: HMT_MEDIUM SET_SCRATCH0(r13) - EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x480) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ #ifdef __DISABLED__ @@ -153,32 +153,32 @@ instruction_access_slb_pSeries: hardware_interrupt_pSeries: hardware_interrupt_hv: BEGIN_FTR_SECTION - _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, - EXC_STD, SOFTEN_TEST) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) - FTR_SECTION_ELSE _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV, SOFTEN_TEST_HV) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) - ALT_FTR_SECTION_END_IFCLR(CPU_FTR_HVMODE_206) + FTR_SECTION_ELSE + _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, + EXC_STD, SOFTEN_TEST_PR) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE_206) STD_EXCEPTION_PSERIES(0x600, 0x600, alignment) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x600) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x600) STD_EXCEPTION_PSERIES(0x700, 0x700, program_check) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x700) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x700) STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x800) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x800) MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer) MASKABLE_EXCEPTION_HV(0x980, 0x982, decrementer) STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xa00) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00) STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xb00) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xb00) . = 0xc00 .globl system_call_pSeries @@ -219,7 +219,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) b . STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xd00) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xd00) /* At 0xe??? we have a bunch of hypervisor exceptions, we branch * out of line to handle them @@ -254,23 +254,23 @@ vsx_unavailable_pSeries_1: #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202) + KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_HV, 0x1202) #endif /* CONFIG_CBE_RAS */ STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1300) + KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300) #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602) + KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_HV, 0x1602) #endif /* CONFIG_CBE_RAS */ STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x1700) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x1700) #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802) + KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_HV, 0x1802) #endif /* CONFIG_CBE_RAS */ . = 0x3000 @@ -297,7 +297,7 @@ data_access_check_stab: mfspr r9,SPRN_DSISR srdi r10,r10,60 rlwimi r10,r9,16,0x20 -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +#ifdef CONFIG_KVM_BOOK3S_PR lbz r9,HSTATE_IN_GUEST(r13) rlwimi r10,r9,8,0x300 #endif @@ -316,11 +316,11 @@ do_stab_bolted_pSeries: EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) #endif /* CONFIG_POWER4_ONLY */ - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300) - KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x400) - KVM_HANDLER(PACA_EXSLB, EXC_STD, 0x480) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x900) + KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x300) + KVM_HANDLER_PR_SKIP(PACA_EXSLB, EXC_STD, 0x380) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400) + KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982) .align 7 @@ -336,11 +336,11 @@ do_stab_bolted_pSeries: /* moved from 0xf00 */ STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf00) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf00) STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf20) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20) STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf40) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) /* * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -417,7 +417,11 @@ slb_miss_user_pseries: /* KVM's trampoline code needs to be close to the interrupt handlers */ #ifdef CONFIG_KVM_BOOK3S_64_HANDLER +#ifdef CONFIG_KVM_BOOK3S_PR #include "../kvm/book3s_rmhandlers.S" +#else +#include "../kvm/book3s_hv_rmhandlers.S" +#endif #endif .align 7 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 60ac2a9251db..ec2d0edeb134 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -96,6 +96,7 @@ void flush_fp_to_thread(struct task_struct *tsk) preempt_enable(); } } +EXPORT_SYMBOL_GPL(flush_fp_to_thread); void enable_kernel_fp(void) { @@ -145,6 +146,7 @@ void flush_altivec_to_thread(struct task_struct *tsk) preempt_enable(); } } +EXPORT_SYMBOL_GPL(flush_altivec_to_thread); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX @@ -186,6 +188,7 @@ void flush_vsx_to_thread(struct task_struct *tsk) preempt_enable(); } } +EXPORT_SYMBOL_GPL(flush_vsx_to_thread); #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 79fca2651b65..22051ef04bd9 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -375,6 +375,9 @@ void __init check_for_initrd(void) int threads_per_core, threads_shift; cpumask_t threads_core_mask; +EXPORT_SYMBOL_GPL(threads_per_core); +EXPORT_SYMBOL_GPL(threads_shift); +EXPORT_SYMBOL_GPL(threads_core_mask); static void __init cpu_init_thread_core_maps(int tpc) { diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8ebc6700b98d..09a85a9045d6 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -243,6 +243,7 @@ void smp_send_reschedule(int cpu) if (likely(smp_ops)) smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE); } +EXPORT_SYMBOL_GPL(smp_send_reschedule); void arch_send_call_function_single_ipi(int cpu) { diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index b7baff78f90c..5d9b78ebbaa6 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -20,7 +20,6 @@ config KVM bool select PREEMPT_NOTIFIERS select ANON_INODES - select KVM_MMIO config KVM_BOOK3S_HANDLER bool @@ -28,16 +27,22 @@ config KVM_BOOK3S_HANDLER config KVM_BOOK3S_32_HANDLER bool select KVM_BOOK3S_HANDLER + select KVM_MMIO config KVM_BOOK3S_64_HANDLER bool select KVM_BOOK3S_HANDLER +config KVM_BOOK3S_PR + bool + select KVM_MMIO + config KVM_BOOK3S_32 tristate "KVM support for PowerPC book3s_32 processors" depends on EXPERIMENTAL && PPC_BOOK3S_32 && !SMP && !PTE_64BIT select KVM select KVM_BOOK3S_32_HANDLER + select KVM_BOOK3S_PR ---help--- Support running unmodified book3s_32 guest kernels in virtual machines on book3s_32 host processors. @@ -50,8 +55,8 @@ config KVM_BOOK3S_32 config KVM_BOOK3S_64 tristate "KVM support for PowerPC book3s_64 processors" depends on EXPERIMENTAL && PPC_BOOK3S_64 - select KVM select KVM_BOOK3S_64_HANDLER + select KVM ---help--- Support running unmodified book3s_64 and book3s_32 guest kernels in virtual machines on book3s_64 host processors. @@ -61,10 +66,37 @@ config KVM_BOOK3S_64 If unsure, say N. +config KVM_BOOK3S_64_HV + bool "KVM support for POWER7 using hypervisor mode in host" + depends on KVM_BOOK3S_64 + ---help--- + Support running unmodified book3s_64 guest kernels in + virtual machines on POWER7 processors that have hypervisor + mode available to the host. + + If you say Y here, KVM will use the hardware virtualization + facilities of POWER7 (and later) processors, meaning that + guest operating systems will run at full hardware speed + using supervisor and user modes. However, this also means + that KVM is not usable under PowerVM (pHyp), is only usable + on POWER7 (or later) processors, and can only emulate + POWER5+, POWER6 and POWER7 processors. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + +config KVM_BOOK3S_64_PR + def_bool y + depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV + select KVM_BOOK3S_PR + config KVM_440 bool "KVM support for PowerPC 440 processors" depends on EXPERIMENTAL && 44x select KVM + select KVM_MMIO ---help--- Support running unmodified 440 guest kernels in virtual machines on 440 host processors. @@ -89,6 +121,7 @@ config KVM_E500 bool "KVM support for PowerPC E500 processors" depends on EXPERIMENTAL && E500 select KVM + select KVM_MMIO ---help--- Support running unmodified E500 guest kernels in virtual machines on E500 host processors. diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index bf9854fa7f63..8a435a6da665 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -38,11 +38,10 @@ kvm-e500-objs := \ e500_emulate.o kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs) -kvm-book3s_64-objs := \ - $(common-objs-y) \ +kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ + ../../../virt/kvm/coalesced_mmio.o \ fpu.o \ book3s_paired_singles.o \ - book3s.o \ book3s_pr.o \ book3s_emulate.o \ book3s_interrupts.o \ @@ -50,6 +49,18 @@ kvm-book3s_64-objs := \ book3s_64_mmu_host.o \ book3s_64_mmu.o \ book3s_32_mmu.o + +kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ + book3s_hv.o \ + book3s_hv_interrupts.o \ + book3s_64_mmu_hv.o + +kvm-book3s_64-objs := \ + ../../../virt/kvm/kvm_main.o \ + powerpc.o \ + emulate.o \ + book3s.o \ + $(kvm-book3s_64-objs-y) kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-objs) kvm-book3s_32-objs := \ diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c new file mode 100644 index 000000000000..4a4fbec61a17 --- /dev/null +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -0,0 +1,258 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2010 Paul Mackerras, IBM Corp. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/* For now use fixed-size 16MB page table */ +#define HPT_ORDER 24 +#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */ +#define HPT_HASH_MASK (HPT_NPTEG - 1) + +/* Pages in the VRMA are 16MB pages */ +#define VRMA_PAGE_ORDER 24 +#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ + +#define NR_LPIDS (LPID_RSVD + 1) +unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)]; + +long kvmppc_alloc_hpt(struct kvm *kvm) +{ + unsigned long hpt; + unsigned long lpid; + + hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|__GFP_NOWARN, + HPT_ORDER - PAGE_SHIFT); + if (!hpt) { + pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n"); + return -ENOMEM; + } + kvm->arch.hpt_virt = hpt; + + do { + lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS); + if (lpid >= NR_LPIDS) { + pr_err("kvm_alloc_hpt: No LPIDs free\n"); + free_pages(hpt, HPT_ORDER - PAGE_SHIFT); + return -ENOMEM; + } + } while (test_and_set_bit(lpid, lpid_inuse)); + + kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18); + kvm->arch.lpid = lpid; + kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); + kvm->arch.host_lpid = mfspr(SPRN_LPID); + kvm->arch.host_lpcr = mfspr(SPRN_LPCR); + + pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid); + return 0; +} + +void kvmppc_free_hpt(struct kvm *kvm) +{ + unsigned long i; + struct kvmppc_pginfo *pginfo; + + clear_bit(kvm->arch.lpid, lpid_inuse); + free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); + + if (kvm->arch.ram_pginfo) { + pginfo = kvm->arch.ram_pginfo; + kvm->arch.ram_pginfo = NULL; + for (i = 0; i < kvm->arch.ram_npages; ++i) + put_page(pfn_to_page(pginfo[i].pfn)); + kfree(pginfo); + } +} + +static unsigned long user_page_size(unsigned long addr) +{ + struct vm_area_struct *vma; + unsigned long size = PAGE_SIZE; + + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, addr); + if (vma) + size = vma_kernel_pagesize(vma); + up_read(¤t->mm->mmap_sem); + return size; +} + +static pfn_t hva_to_pfn(unsigned long addr) +{ + struct page *page[1]; + int npages; + + might_sleep(); + + npages = get_user_pages_fast(addr, 1, 1, page); + + if (unlikely(npages != 1)) + return 0; + + return page_to_pfn(page[0]); +} + +long kvmppc_prepare_vrma(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) +{ + unsigned long psize, porder; + unsigned long i, npages; + struct kvmppc_pginfo *pginfo; + pfn_t pfn; + unsigned long hva; + + /* First see what page size we have */ + psize = user_page_size(mem->userspace_addr); + /* For now, only allow 16MB pages */ + if (psize != 1ul << VRMA_PAGE_ORDER || (mem->memory_size & (psize - 1))) { + pr_err("bad psize=%lx memory_size=%llx @ %llx\n", + psize, mem->memory_size, mem->userspace_addr); + return -EINVAL; + } + porder = __ilog2(psize); + + npages = mem->memory_size >> porder; + pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo), GFP_KERNEL); + if (!pginfo) { + pr_err("kvmppc_prepare_vrma: couldn't alloc %lu bytes\n", + npages * sizeof(struct kvmppc_pginfo)); + return -ENOMEM; + } + + for (i = 0; i < npages; ++i) { + hva = mem->userspace_addr + (i << porder); + if (user_page_size(hva) != psize) + goto err; + pfn = hva_to_pfn(hva); + if (pfn == 0) { + pr_err("oops, no pfn for hva %lx\n", hva); + goto err; + } + if (pfn & ((1ul << (porder - PAGE_SHIFT)) - 1)) { + pr_err("oops, unaligned pfn %llx\n", pfn); + put_page(pfn_to_page(pfn)); + goto err; + } + pginfo[i].pfn = pfn; + } + + kvm->arch.ram_npages = npages; + kvm->arch.ram_psize = psize; + kvm->arch.ram_porder = porder; + kvm->arch.ram_pginfo = pginfo; + + return 0; + + err: + kfree(pginfo); + return -EINVAL; +} + +void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) +{ + unsigned long i; + unsigned long npages = kvm->arch.ram_npages; + unsigned long pfn; + unsigned long *hpte; + unsigned long hash; + struct kvmppc_pginfo *pginfo = kvm->arch.ram_pginfo; + + if (!pginfo) + return; + + /* VRMA can't be > 1TB */ + if (npages > 1ul << (40 - kvm->arch.ram_porder)) + npages = 1ul << (40 - kvm->arch.ram_porder); + /* Can't use more than 1 HPTE per HPTEG */ + if (npages > HPT_NPTEG) + npages = HPT_NPTEG; + + for (i = 0; i < npages; ++i) { + pfn = pginfo[i].pfn; + /* can't use hpt_hash since va > 64 bits */ + hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK; + /* + * We assume that the hash table is empty and no + * vcpus are using it at this stage. Since we create + * at most one HPTE per HPTEG, we just assume entry 7 + * is available and use it. + */ + hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 7)); + hpte += 7 * 2; + /* HPTE low word - RPN, protection, etc. */ + hpte[1] = (pfn << PAGE_SHIFT) | HPTE_R_R | HPTE_R_C | + HPTE_R_M | PP_RWXX; + wmb(); + hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | + (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED | + HPTE_V_LARGE | HPTE_V_VALID; + } +} + +int kvmppc_mmu_hv_init(void) +{ + if (!cpu_has_feature(CPU_FTR_HVMODE_206)) + return -EINVAL; + memset(lpid_inuse, 0, sizeof(lpid_inuse)); + set_bit(mfspr(SPRN_LPID), lpid_inuse); + set_bit(LPID_RSVD, lpid_inuse); + + return 0; +} + +void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +{ +} + +static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) +{ + kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); +} + +static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, + struct kvmppc_pte *gpte, bool data) +{ + return -ENOENT; +} + +void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) +{ + struct kvmppc_mmu *mmu = &vcpu->arch.mmu; + + vcpu->arch.slb_nr = 32; /* Assume POWER7 for now */ + + mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; + mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; + + vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; +} diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c index f94fd9a4c69a..88c8f26add02 100644 --- a/arch/powerpc/kvm/book3s_exports.c +++ b/arch/powerpc/kvm/book3s_exports.c @@ -20,6 +20,9 @@ #include #include +#ifdef CONFIG_KVM_BOOK3S_64_HV +EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline); +#else EXPORT_SYMBOL_GPL(kvmppc_handler_trampoline_enter); EXPORT_SYMBOL_GPL(kvmppc_handler_lowmem_trampoline); EXPORT_SYMBOL_GPL(kvmppc_rmcall); @@ -30,3 +33,5 @@ EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec); #ifdef CONFIG_VSX EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx); #endif +#endif + diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c new file mode 100644 index 000000000000..60b7300568c8 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv.c @@ -0,0 +1,445 @@ +/* + * Copyright 2011 Paul Mackerras, IBM Corp. + * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. + * + * Authors: + * Paul Mackerras + * Alexander Graf + * Kevin Wolf + * + * Description: KVM functions specific to running on Book 3S + * processors in hypervisor mode (specifically POWER7 and later). + * + * This file is derived from arch/powerpc/kvm/book3s.c, + * by Alexander Graf . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* #define EXIT_DEBUG */ +/* #define EXIT_DEBUG_SIMPLE */ +/* #define EXIT_DEBUG_INT */ + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + local_paca->kvm_hstate.kvm_vcpu = vcpu; +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ +} + +void kvmppc_vcpu_block(struct kvm_vcpu *vcpu) +{ + u64 now; + unsigned long dec_nsec; + + now = get_tb(); + if (now >= vcpu->arch.dec_expires && !kvmppc_core_pending_dec(vcpu)) + kvmppc_core_queue_dec(vcpu); + if (vcpu->arch.pending_exceptions) + return; + if (vcpu->arch.dec_expires != ~(u64)0) { + dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC / + tb_ticks_per_sec; + hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec), + HRTIMER_MODE_REL); + } + + kvm_vcpu_block(vcpu); + vcpu->stat.halt_wakeup++; + + if (vcpu->arch.dec_expires != ~(u64)0) + hrtimer_try_to_cancel(&vcpu->arch.dec_timer); +} + +void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) +{ + vcpu->arch.shregs.msr = msr; +} + +void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) +{ + vcpu->arch.pvr = pvr; +} + +void kvmppc_dump_regs(struct kvm_vcpu *vcpu) +{ + int r; + + pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id); + pr_err("pc = %.16lx msr = %.16llx trap = %x\n", + vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap); + for (r = 0; r < 16; ++r) + pr_err("r%2d = %.16lx r%d = %.16lx\n", + r, kvmppc_get_gpr(vcpu, r), + r+16, kvmppc_get_gpr(vcpu, r+16)); + pr_err("ctr = %.16lx lr = %.16lx\n", + vcpu->arch.ctr, vcpu->arch.lr); + pr_err("srr0 = %.16llx srr1 = %.16llx\n", + vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1); + pr_err("sprg0 = %.16llx sprg1 = %.16llx\n", + vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1); + pr_err("sprg2 = %.16llx sprg3 = %.16llx\n", + vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3); + pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n", + vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr); + pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar); + pr_err("fault dar = %.16lx dsisr = %.8x\n", + vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); + pr_err("SLB (%d entries):\n", vcpu->arch.slb_max); + for (r = 0; r < vcpu->arch.slb_max; ++r) + pr_err(" ESID = %.16llx VSID = %.16llx\n", + vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv); + pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n", + vcpu->arch.lpcr, vcpu->kvm->arch.sdr1, + vcpu->arch.last_inst); +} + +static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, + struct task_struct *tsk) +{ + int r = RESUME_HOST; + + vcpu->stat.sum_exits++; + + run->exit_reason = KVM_EXIT_UNKNOWN; + run->ready_for_interrupt_injection = 1; + switch (vcpu->arch.trap) { + /* We're good on these - the host merely wanted to get our attention */ + case BOOK3S_INTERRUPT_HV_DECREMENTER: + vcpu->stat.dec_exits++; + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_EXTERNAL: + vcpu->stat.ext_intr_exits++; + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_PERFMON: + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_PROGRAM: + { + ulong flags; + /* + * Normally program interrupts are delivered directly + * to the guest by the hardware, but we can get here + * as a result of a hypervisor emulation interrupt + * (e40) getting turned into a 700 by BML RTAS. + */ + flags = vcpu->arch.shregs.msr & 0x1f0000ull; + kvmppc_core_queue_program(vcpu, flags); + r = RESUME_GUEST; + break; + } + case BOOK3S_INTERRUPT_SYSCALL: + { + /* hcall - punt to userspace */ + int i; + + if (vcpu->arch.shregs.msr & MSR_PR) { + /* sc 1 from userspace - reflect to guest syscall */ + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL); + r = RESUME_GUEST; + break; + } + run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3); + for (i = 0; i < 9; ++i) + run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i); + run->exit_reason = KVM_EXIT_PAPR_HCALL; + vcpu->arch.hcall_needed = 1; + r = RESUME_HOST; + break; + } + /* + * We get these next two if the guest does a bad real-mode access, + * as we have enabled VRMA (virtualized real mode area) mode in the + * LPCR. We just generate an appropriate DSI/ISI to the guest. + */ + case BOOK3S_INTERRUPT_H_DATA_STORAGE: + vcpu->arch.shregs.dsisr = vcpu->arch.fault_dsisr; + vcpu->arch.shregs.dar = vcpu->arch.fault_dar; + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0); + r = RESUME_GUEST; + break; + case BOOK3S_INTERRUPT_H_INST_STORAGE: + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE, + 0x08000000); + r = RESUME_GUEST; + break; + /* + * This occurs if the guest executes an illegal instruction. + * We just generate a program interrupt to the guest, since + * we don't emulate any guest instructions at this stage. + */ + case BOOK3S_INTERRUPT_H_EMUL_ASSIST: + kvmppc_core_queue_program(vcpu, 0x80000); + r = RESUME_GUEST; + break; + default: + kvmppc_dump_regs(vcpu); + printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", + vcpu->arch.trap, kvmppc_get_pc(vcpu), + vcpu->arch.shregs.msr); + r = RESUME_HOST; + BUG(); + break; + } + + + if (!(r & RESUME_HOST)) { + /* To avoid clobbering exit_reason, only check for signals if + * we aren't already exiting to userspace for some other + * reason. */ + if (signal_pending(tsk)) { + vcpu->stat.signal_exits++; + run->exit_reason = KVM_EXIT_INTR; + r = -EINTR; + } else { + kvmppc_core_deliver_interrupts(vcpu); + } + } + + return r; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + int i; + + sregs->pvr = vcpu->arch.pvr; + + memset(sregs, 0, sizeof(struct kvm_sregs)); + for (i = 0; i < vcpu->arch.slb_max; i++) { + sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige; + sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; + } + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + int i, j; + + kvmppc_set_pvr(vcpu, sregs->pvr); + + j = 0; + for (i = 0; i < vcpu->arch.slb_nr; i++) { + if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) { + vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe; + vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv; + ++j; + } + } + vcpu->arch.slb_max = j; + + return 0; +} + +int kvmppc_core_check_processor_compat(void) +{ + if (cpu_has_feature(CPU_FTR_HVMODE_206)) + return 0; + return -EIO; +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvm_vcpu *vcpu; + int err = -ENOMEM; + unsigned long lpcr; + + vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); + if (!vcpu) + goto out; + + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + vcpu->arch.shared = &vcpu->arch.shregs; + vcpu->arch.last_cpu = -1; + vcpu->arch.mmcr[0] = MMCR0_FC; + vcpu->arch.ctrl = CTRL_RUNLATCH; + /* default to host PVR, since we can't spoof it */ + vcpu->arch.pvr = mfspr(SPRN_PVR); + kvmppc_set_pvr(vcpu, vcpu->arch.pvr); + + lpcr = kvm->arch.host_lpcr & (LPCR_PECE | LPCR_LPES); + lpcr |= LPCR_VPM0 | LPCR_VRMA_L | (4UL << LPCR_DPFD_SH) | LPCR_HDICE; + vcpu->arch.lpcr = lpcr; + + kvmppc_mmu_book3s_hv_init(vcpu); + + return vcpu; + +free_vcpu: + kfree(vcpu); +out: + return ERR_PTR(err); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + kvm_vcpu_uninit(vcpu); + kfree(vcpu); +} + +extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); + +int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + u64 now; + + if (signal_pending(current)) { + run->exit_reason = KVM_EXIT_INTR; + return -EINTR; + } + + flush_fp_to_thread(current); + flush_altivec_to_thread(current); + flush_vsx_to_thread(current); + preempt_disable(); + + /* + * Make sure we are running on thread 0, and that + * secondary threads are offline. + * XXX we should also block attempts to bring any + * secondary threads online. + */ + if (threads_per_core > 1) { + int cpu = smp_processor_id(); + int thr = cpu_thread_in_core(cpu); + + if (thr) + goto out; + while (++thr < threads_per_core) + if (cpu_online(cpu + thr)) + goto out; + } + + kvm_guest_enter(); + + __kvmppc_vcore_entry(NULL, vcpu); + + kvm_guest_exit(); + + preempt_enable(); + kvm_resched(vcpu); + + now = get_tb(); + /* cancel pending dec exception if dec is positive */ + if (now < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu)) + kvmppc_core_dequeue_dec(vcpu); + + return kvmppc_handle_exit(run, vcpu, current); + + out: + preempt_enable(); + return -EBUSY; +} + +int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) +{ + if (mem->guest_phys_addr == 0 && mem->memory_size != 0) + return kvmppc_prepare_vrma(kvm, mem); + return 0; +} + +void kvmppc_core_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) +{ + if (mem->guest_phys_addr == 0 && mem->memory_size != 0) + kvmppc_map_vrma(kvm, mem); +} + +int kvmppc_core_init_vm(struct kvm *kvm) +{ + long r; + + /* Allocate hashed page table */ + r = kvmppc_alloc_hpt(kvm); + + return r; +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ + kvmppc_free_hpt(kvm); +} + +/* These are stubs for now */ +void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) +{ +} + +/* We don't need to emulate any privileged instructions or dcbz */ +int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + return EMULATE_FAIL; +} + +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +{ + return EMULATE_FAIL; +} + +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +{ + return EMULATE_FAIL; +} + +static int kvmppc_book3s_hv_init(void) +{ + int r; + + r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + + if (r) + return r; + + r = kvmppc_mmu_hv_init(); + + return r; +} + +static void kvmppc_book3s_hv_exit(void) +{ + kvm_exit(); +} + +module_init(kvmppc_book3s_hv_init); +module_exit(kvmppc_book3s_hv_exit); diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S new file mode 100644 index 000000000000..532afaf19841 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -0,0 +1,136 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2011 Paul Mackerras, IBM Corp. + * + * Derived from book3s_interrupts.S, which is: + * Copyright SUSE Linux Products GmbH 2009 + * + * Authors: Alexander Graf + */ + +#include +#include +#include +#include +#include +#include +#include + +/***************************************************************************** + * * + * Guest entry / exit code that is in kernel module memory (vmalloc) * + * * + ****************************************************************************/ + +/* Registers: + * r4: vcpu pointer + */ +_GLOBAL(__kvmppc_vcore_entry) + + /* Write correct stack frame */ + mflr r0 + std r0,PPC_LR_STKOFF(r1) + + /* Save host state to the stack */ + stdu r1, -SWITCH_FRAME_SIZE(r1) + + /* Save non-volatile registers (r14 - r31) */ + SAVE_NVGPRS(r1) + + /* Save host DSCR */ + mfspr r3, SPRN_DSCR + std r3, HSTATE_DSCR(r13) + + /* Save host DABR */ + mfspr r3, SPRN_DABR + std r3, HSTATE_DABR(r13) + + /* Hard-disable interrupts */ + mfmsr r10 + std r10, HSTATE_HOST_MSR(r13) + rldicl r10,r10,48,1 + rotldi r10,r10,16 + mtmsrd r10,1 + + /* Save host PMU registers and load guest PMU registers */ + /* R4 is live here (vcpu pointer) but not r3 or r5 */ + li r3, 1 + sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ + mfspr r7, SPRN_MMCR0 /* save MMCR0 */ + mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */ + isync + ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ + lbz r5, LPPACA_PMCINUSE(r3) + cmpwi r5, 0 + beq 31f /* skip if not */ + mfspr r5, SPRN_MMCR1 + mfspr r6, SPRN_MMCRA + std r7, HSTATE_MMCR(r13) + std r5, HSTATE_MMCR + 8(r13) + std r6, HSTATE_MMCR + 16(r13) + mfspr r3, SPRN_PMC1 + mfspr r5, SPRN_PMC2 + mfspr r6, SPRN_PMC3 + mfspr r7, SPRN_PMC4 + mfspr r8, SPRN_PMC5 + mfspr r9, SPRN_PMC6 + stw r3, HSTATE_PMC(r13) + stw r5, HSTATE_PMC + 4(r13) + stw r6, HSTATE_PMC + 8(r13) + stw r7, HSTATE_PMC + 12(r13) + stw r8, HSTATE_PMC + 16(r13) + stw r9, HSTATE_PMC + 20(r13) +31: + + /* + * Put whatever is in the decrementer into the + * hypervisor decrementer. + */ + mfspr r8,SPRN_DEC + mftb r7 + mtspr SPRN_HDEC,r8 + extsw r8,r8 + add r8,r8,r7 + std r8,HSTATE_DECEXP(r13) + + /* Jump to partition switch code */ + bl .kvmppc_hv_entry_trampoline + nop + +/* + * We return here in virtual mode after the guest exits + * with something that we can't handle in real mode. + * Interrupts are enabled again at this point. + */ + +.global kvmppc_handler_highmem +kvmppc_handler_highmem: + + /* + * Register usage at this point: + * + * R1 = host R1 + * R2 = host R2 + * R12 = exit handler id + * R13 = PACA + */ + + /* Restore non-volatile host registers (r14 - r31) */ + REST_NVGPRS(r1) + + addi r1, r1, SWITCH_FRAME_SIZE + ld r0, PPC_LR_STKOFF(r1) + mtlr r0 + blr diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S new file mode 100644 index 000000000000..9af264840b98 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -0,0 +1,806 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright 2011 Paul Mackerras, IBM Corp. + * + * Derived from book3s_rmhandlers.S and other files, which are: + * + * Copyright SUSE Linux Products GmbH 2009 + * + * Authors: Alexander Graf + */ + +#include +#include +#include +#include +#include +#include + +/***************************************************************************** + * * + * Real Mode handlers that need to be in the linear mapping * + * * + ****************************************************************************/ + +#define SHADOW_VCPU_OFF PACA_KVM_SVCPU + + .globl kvmppc_skip_interrupt +kvmppc_skip_interrupt: + mfspr r13,SPRN_SRR0 + addi r13,r13,4 + mtspr SPRN_SRR0,r13 + GET_SCRATCH0(r13) + rfid + b . + + .globl kvmppc_skip_Hinterrupt +kvmppc_skip_Hinterrupt: + mfspr r13,SPRN_HSRR0 + addi r13,r13,4 + mtspr SPRN_HSRR0,r13 + GET_SCRATCH0(r13) + hrfid + b . + +/* + * Call kvmppc_handler_trampoline_enter in real mode. + * Must be called with interrupts hard-disabled. + * + * Input Registers: + * + * LR = return address to continue at after eventually re-enabling MMU + */ +_GLOBAL(kvmppc_hv_entry_trampoline) + mfmsr r10 + LOAD_REG_ADDR(r5, kvmppc_hv_entry) + li r0,MSR_RI + andc r0,r10,r0 + li r6,MSR_IR | MSR_DR + andc r6,r10,r6 + mtmsrd r0,1 /* clear RI in MSR */ + mtsrr0 r5 + mtsrr1 r6 + RFI + +#define ULONG_SIZE 8 +#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE)) + +/****************************************************************************** + * * + * Entry code * + * * + *****************************************************************************/ + +.global kvmppc_hv_entry +kvmppc_hv_entry: + + /* Required state: + * + * R4 = vcpu pointer + * MSR = ~IR|DR + * R13 = PACA + * R1 = host R1 + * all other volatile GPRS = free + */ + mflr r0 + std r0, HSTATE_VMHANDLER(r13) + + ld r14, VCPU_GPR(r14)(r4) + ld r15, VCPU_GPR(r15)(r4) + ld r16, VCPU_GPR(r16)(r4) + ld r17, VCPU_GPR(r17)(r4) + ld r18, VCPU_GPR(r18)(r4) + ld r19, VCPU_GPR(r19)(r4) + ld r20, VCPU_GPR(r20)(r4) + ld r21, VCPU_GPR(r21)(r4) + ld r22, VCPU_GPR(r22)(r4) + ld r23, VCPU_GPR(r23)(r4) + ld r24, VCPU_GPR(r24)(r4) + ld r25, VCPU_GPR(r25)(r4) + ld r26, VCPU_GPR(r26)(r4) + ld r27, VCPU_GPR(r27)(r4) + ld r28, VCPU_GPR(r28)(r4) + ld r29, VCPU_GPR(r29)(r4) + ld r30, VCPU_GPR(r30)(r4) + ld r31, VCPU_GPR(r31)(r4) + + /* Load guest PMU registers */ + /* R4 is live here (vcpu pointer) */ + li r3, 1 + sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ + mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ + isync + lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */ + lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */ + lwz r6, VCPU_PMC + 8(r4) + lwz r7, VCPU_PMC + 12(r4) + lwz r8, VCPU_PMC + 16(r4) + lwz r9, VCPU_PMC + 20(r4) + mtspr SPRN_PMC1, r3 + mtspr SPRN_PMC2, r5 + mtspr SPRN_PMC3, r6 + mtspr SPRN_PMC4, r7 + mtspr SPRN_PMC5, r8 + mtspr SPRN_PMC6, r9 + ld r3, VCPU_MMCR(r4) + ld r5, VCPU_MMCR + 8(r4) + ld r6, VCPU_MMCR + 16(r4) + mtspr SPRN_MMCR1, r5 + mtspr SPRN_MMCRA, r6 + mtspr SPRN_MMCR0, r3 + isync + + /* Load up FP, VMX and VSX registers */ + bl kvmppc_load_fp + + /* Switch DSCR to guest value */ + ld r5, VCPU_DSCR(r4) + mtspr SPRN_DSCR, r5 + + /* + * Set the decrementer to the guest decrementer. + */ + ld r8,VCPU_DEC_EXPIRES(r4) + mftb r7 + subf r3,r7,r8 + mtspr SPRN_DEC,r3 + stw r3,VCPU_DEC(r4) + + ld r5, VCPU_SPRG0(r4) + ld r6, VCPU_SPRG1(r4) + ld r7, VCPU_SPRG2(r4) + ld r8, VCPU_SPRG3(r4) + mtspr SPRN_SPRG0, r5 + mtspr SPRN_SPRG1, r6 + mtspr SPRN_SPRG2, r7 + mtspr SPRN_SPRG3, r8 + + /* Save R1 in the PACA */ + std r1, HSTATE_HOST_R1(r13) + + /* Load up DAR and DSISR */ + ld r5, VCPU_DAR(r4) + lwz r6, VCPU_DSISR(r4) + mtspr SPRN_DAR, r5 + mtspr SPRN_DSISR, r6 + + /* Set partition DABR */ + li r5,3 + ld r6,VCPU_DABR(r4) + mtspr SPRN_DABRX,r5 + mtspr SPRN_DABR,r6 + + /* Restore AMR and UAMOR, set AMOR to all 1s */ + ld r5,VCPU_AMR(r4) + ld r6,VCPU_UAMOR(r4) + li r7,-1 + mtspr SPRN_AMR,r5 + mtspr SPRN_UAMOR,r6 + mtspr SPRN_AMOR,r7 + + /* Clear out SLB */ + li r6,0 + slbmte r6,r6 + slbia + ptesync + + /* Switch to guest partition. */ + ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ + ld r6,KVM_SDR1(r9) + lwz r7,KVM_LPID(r9) + li r0,LPID_RSVD /* switch to reserved LPID */ + mtspr SPRN_LPID,r0 + ptesync + mtspr SPRN_SDR1,r6 /* switch to partition page table */ + mtspr SPRN_LPID,r7 + isync + ld r8,VCPU_LPCR(r4) + mtspr SPRN_LPCR,r8 + isync + + /* Check if HDEC expires soon */ + mfspr r3,SPRN_HDEC + cmpwi r3,10 + li r12,BOOK3S_INTERRUPT_HV_DECREMENTER + mr r9,r4 + blt hdec_soon + + /* + * Invalidate the TLB if we could possibly have stale TLB + * entries for this partition on this core due to the use + * of tlbiel. + */ + ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ + lwz r5,VCPU_VCPUID(r4) + lhz r6,PACAPACAINDEX(r13) + lhz r8,VCPU_LAST_CPU(r4) + sldi r7,r6,1 /* see if this is the same vcpu */ + add r7,r7,r9 /* as last ran on this pcpu */ + lhz r0,KVM_LAST_VCPU(r7) + cmpw r6,r8 /* on the same cpu core as last time? */ + bne 3f + cmpw r0,r5 /* same vcpu as this core last ran? */ + beq 1f +3: sth r6,VCPU_LAST_CPU(r4) /* if not, invalidate partition TLB */ + sth r5,KVM_LAST_VCPU(r7) + li r6,128 + mtctr r6 + li r7,0x800 /* IS field = 0b10 */ + ptesync +2: tlbiel r7 + addi r7,r7,0x1000 + bdnz 2b + ptesync +1: + + /* Save purr/spurr */ + mfspr r5,SPRN_PURR + mfspr r6,SPRN_SPURR + std r5,HSTATE_PURR(r13) + std r6,HSTATE_SPURR(r13) + ld r7,VCPU_PURR(r4) + ld r8,VCPU_SPURR(r4) + mtspr SPRN_PURR,r7 + mtspr SPRN_SPURR,r8 + + /* Load up guest SLB entries */ + lwz r5,VCPU_SLB_MAX(r4) + cmpwi r5,0 + beq 9f + mtctr r5 + addi r6,r4,VCPU_SLB +1: ld r8,VCPU_SLB_E(r6) + ld r9,VCPU_SLB_V(r6) + slbmte r9,r8 + addi r6,r6,VCPU_SLB_SIZE + bdnz 1b +9: + + /* Restore state of CTRL run bit; assume 1 on entry */ + lwz r5,VCPU_CTRL(r4) + andi. r5,r5,1 + bne 4f + mfspr r6,SPRN_CTRLF + clrrdi r6,r6,1 + mtspr SPRN_CTRLT,r6 +4: + ld r6, VCPU_CTR(r4) + lwz r7, VCPU_XER(r4) + + mtctr r6 + mtxer r7 + + /* Move SRR0 and SRR1 into the respective regs */ + ld r6, VCPU_SRR0(r4) + ld r7, VCPU_SRR1(r4) + mtspr SPRN_SRR0, r6 + mtspr SPRN_SRR1, r7 + + ld r10, VCPU_PC(r4) + + ld r11, VCPU_MSR(r4) /* r10 = vcpu->arch.msr & ~MSR_HV */ + rldicl r11, r11, 63 - MSR_HV_LG, 1 + rotldi r11, r11, 1 + MSR_HV_LG + ori r11, r11, MSR_ME + +fast_guest_return: + mtspr SPRN_HSRR0,r10 + mtspr SPRN_HSRR1,r11 + + /* Activate guest mode, so faults get handled by KVM */ + li r9, KVM_GUEST_MODE_GUEST + stb r9, HSTATE_IN_GUEST(r13) + + /* Enter guest */ + + ld r5, VCPU_LR(r4) + lwz r6, VCPU_CR(r4) + mtlr r5 + mtcr r6 + + ld r0, VCPU_GPR(r0)(r4) + ld r1, VCPU_GPR(r1)(r4) + ld r2, VCPU_GPR(r2)(r4) + ld r3, VCPU_GPR(r3)(r4) + ld r5, VCPU_GPR(r5)(r4) + ld r6, VCPU_GPR(r6)(r4) + ld r7, VCPU_GPR(r7)(r4) + ld r8, VCPU_GPR(r8)(r4) + ld r9, VCPU_GPR(r9)(r4) + ld r10, VCPU_GPR(r10)(r4) + ld r11, VCPU_GPR(r11)(r4) + ld r12, VCPU_GPR(r12)(r4) + ld r13, VCPU_GPR(r13)(r4) + + ld r4, VCPU_GPR(r4)(r4) + + hrfid + b . + +/****************************************************************************** + * * + * Exit code * + * * + *****************************************************************************/ + +/* + * We come here from the first-level interrupt handlers. + */ + .globl kvmppc_interrupt +kvmppc_interrupt: + /* + * Register contents: + * R12 = interrupt vector + * R13 = PACA + * guest CR, R12 saved in shadow VCPU SCRATCH1/0 + * guest R13 saved in SPRN_SCRATCH0 + */ + /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */ + std r9, HSTATE_HOST_R2(r13) + ld r9, HSTATE_KVM_VCPU(r13) + + /* Save registers */ + + std r0, VCPU_GPR(r0)(r9) + std r1, VCPU_GPR(r1)(r9) + std r2, VCPU_GPR(r2)(r9) + std r3, VCPU_GPR(r3)(r9) + std r4, VCPU_GPR(r4)(r9) + std r5, VCPU_GPR(r5)(r9) + std r6, VCPU_GPR(r6)(r9) + std r7, VCPU_GPR(r7)(r9) + std r8, VCPU_GPR(r8)(r9) + ld r0, HSTATE_HOST_R2(r13) + std r0, VCPU_GPR(r9)(r9) + std r10, VCPU_GPR(r10)(r9) + std r11, VCPU_GPR(r11)(r9) + ld r3, HSTATE_SCRATCH0(r13) + lwz r4, HSTATE_SCRATCH1(r13) + std r3, VCPU_GPR(r12)(r9) + stw r4, VCPU_CR(r9) + + /* Restore R1/R2 so we can handle faults */ + ld r1, HSTATE_HOST_R1(r13) + ld r2, PACATOC(r13) + + mfspr r10, SPRN_SRR0 + mfspr r11, SPRN_SRR1 + std r10, VCPU_SRR0(r9) + std r11, VCPU_SRR1(r9) + andi. r0, r12, 2 /* need to read HSRR0/1? */ + beq 1f + mfspr r10, SPRN_HSRR0 + mfspr r11, SPRN_HSRR1 + clrrdi r12, r12, 2 +1: std r10, VCPU_PC(r9) + std r11, VCPU_MSR(r9) + + GET_SCRATCH0(r3) + mflr r4 + std r3, VCPU_GPR(r13)(r9) + std r4, VCPU_LR(r9) + + /* Unset guest mode */ + li r0, KVM_GUEST_MODE_NONE + stb r0, HSTATE_IN_GUEST(r13) + + stw r12,VCPU_TRAP(r9) + + /* See if this is a leftover HDEC interrupt */ + cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER + bne 2f + mfspr r3,SPRN_HDEC + cmpwi r3,0 + bge ignore_hdec +2: + + /* Check for mediated interrupts (could be done earlier really ...) */ + cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL + bne+ 1f + ld r5,VCPU_LPCR(r9) + andi. r0,r11,MSR_EE + beq 1f + andi. r0,r5,LPCR_MER + bne bounce_ext_interrupt +1: + + /* Save DEC */ + mfspr r5,SPRN_DEC + mftb r6 + extsw r5,r5 + add r5,r5,r6 + std r5,VCPU_DEC_EXPIRES(r9) + + /* Save HEIR (HV emulation assist reg) in last_inst + if this is an HEI (HV emulation interrupt, e40) */ + li r3,-1 + cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST + bne 11f + mfspr r3,SPRN_HEIR +11: stw r3,VCPU_LAST_INST(r9) + + /* Save more register state */ + mfxer r5 + mfdar r6 + mfdsisr r7 + mfctr r8 + + stw r5, VCPU_XER(r9) + std r6, VCPU_DAR(r9) + stw r7, VCPU_DSISR(r9) + std r8, VCPU_CTR(r9) + /* grab HDAR & HDSISR if HV data storage interrupt (HDSI) */ + cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE + beq 6f +7: std r6, VCPU_FAULT_DAR(r9) + stw r7, VCPU_FAULT_DSISR(r9) + + /* Save guest CTRL register, set runlatch to 1 */ + mfspr r6,SPRN_CTRLF + stw r6,VCPU_CTRL(r9) + andi. r0,r6,1 + bne 4f + ori r6,r6,1 + mtspr SPRN_CTRLT,r6 +4: + /* Read the guest SLB and save it away */ + lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ + mtctr r0 + li r6,0 + addi r7,r9,VCPU_SLB + li r5,0 +1: slbmfee r8,r6 + andis. r0,r8,SLB_ESID_V@h + beq 2f + add r8,r8,r6 /* put index in */ + slbmfev r3,r6 + std r8,VCPU_SLB_E(r7) + std r3,VCPU_SLB_V(r7) + addi r7,r7,VCPU_SLB_SIZE + addi r5,r5,1 +2: addi r6,r6,1 + bdnz 1b + stw r5,VCPU_SLB_MAX(r9) + + /* + * Save the guest PURR/SPURR + */ + mfspr r5,SPRN_PURR + mfspr r6,SPRN_SPURR + ld r7,VCPU_PURR(r9) + ld r8,VCPU_SPURR(r9) + std r5,VCPU_PURR(r9) + std r6,VCPU_SPURR(r9) + subf r5,r7,r5 + subf r6,r8,r6 + + /* + * Restore host PURR/SPURR and add guest times + * so that the time in the guest gets accounted. + */ + ld r3,HSTATE_PURR(r13) + ld r4,HSTATE_SPURR(r13) + add r3,r3,r5 + add r4,r4,r6 + mtspr SPRN_PURR,r3 + mtspr SPRN_SPURR,r4 + + /* Clear out SLB */ + li r5,0 + slbmte r5,r5 + slbia + ptesync + +hdec_soon: + /* Switch back to host partition */ + ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ + ld r6,KVM_HOST_SDR1(r4) + lwz r7,KVM_HOST_LPID(r4) + li r8,LPID_RSVD /* switch to reserved LPID */ + mtspr SPRN_LPID,r8 + ptesync + mtspr SPRN_SDR1,r6 /* switch to partition page table */ + mtspr SPRN_LPID,r7 + isync + lis r8,0x7fff /* MAX_INT@h */ + mtspr SPRN_HDEC,r8 + + ld r8,KVM_HOST_LPCR(r4) + mtspr SPRN_LPCR,r8 + isync + + /* load host SLB entries */ + ld r8,PACA_SLBSHADOWPTR(r13) + + .rept SLB_NUM_BOLTED + ld r5,SLBSHADOW_SAVEAREA(r8) + ld r6,SLBSHADOW_SAVEAREA+8(r8) + andis. r7,r5,SLB_ESID_V@h + beq 1f + slbmte r6,r5 +1: addi r8,r8,16 + .endr + + /* Save and reset AMR and UAMOR before turning on the MMU */ + mfspr r5,SPRN_AMR + mfspr r6,SPRN_UAMOR + std r5,VCPU_AMR(r9) + std r6,VCPU_UAMOR(r9) + li r6,0 + mtspr SPRN_AMR,r6 + + /* Restore host DABR and DABRX */ + ld r5,HSTATE_DABR(r13) + li r6,7 + mtspr SPRN_DABR,r5 + mtspr SPRN_DABRX,r6 + + /* Switch DSCR back to host value */ + mfspr r8, SPRN_DSCR + ld r7, HSTATE_DSCR(r13) + std r8, VCPU_DSCR(r7) + mtspr SPRN_DSCR, r7 + + /* Save non-volatile GPRs */ + std r14, VCPU_GPR(r14)(r9) + std r15, VCPU_GPR(r15)(r9) + std r16, VCPU_GPR(r16)(r9) + std r17, VCPU_GPR(r17)(r9) + std r18, VCPU_GPR(r18)(r9) + std r19, VCPU_GPR(r19)(r9) + std r20, VCPU_GPR(r20)(r9) + std r21, VCPU_GPR(r21)(r9) + std r22, VCPU_GPR(r22)(r9) + std r23, VCPU_GPR(r23)(r9) + std r24, VCPU_GPR(r24)(r9) + std r25, VCPU_GPR(r25)(r9) + std r26, VCPU_GPR(r26)(r9) + std r27, VCPU_GPR(r27)(r9) + std r28, VCPU_GPR(r28)(r9) + std r29, VCPU_GPR(r29)(r9) + std r30, VCPU_GPR(r30)(r9) + std r31, VCPU_GPR(r31)(r9) + + /* Save SPRGs */ + mfspr r3, SPRN_SPRG0 + mfspr r4, SPRN_SPRG1 + mfspr r5, SPRN_SPRG2 + mfspr r6, SPRN_SPRG3 + std r3, VCPU_SPRG0(r9) + std r4, VCPU_SPRG1(r9) + std r5, VCPU_SPRG2(r9) + std r6, VCPU_SPRG3(r9) + + /* Save PMU registers */ + li r3, 1 + sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ + mfspr r4, SPRN_MMCR0 /* save MMCR0 */ + mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ + isync + mfspr r5, SPRN_MMCR1 + mfspr r6, SPRN_MMCRA + std r4, VCPU_MMCR(r9) + std r5, VCPU_MMCR + 8(r9) + std r6, VCPU_MMCR + 16(r9) + mfspr r3, SPRN_PMC1 + mfspr r4, SPRN_PMC2 + mfspr r5, SPRN_PMC3 + mfspr r6, SPRN_PMC4 + mfspr r7, SPRN_PMC5 + mfspr r8, SPRN_PMC6 + stw r3, VCPU_PMC(r9) + stw r4, VCPU_PMC + 4(r9) + stw r5, VCPU_PMC + 8(r9) + stw r6, VCPU_PMC + 12(r9) + stw r7, VCPU_PMC + 16(r9) + stw r8, VCPU_PMC + 20(r9) +22: + /* save FP state */ + mr r3, r9 + bl .kvmppc_save_fp + + /* + * Reload DEC. HDEC interrupts were disabled when + * we reloaded the host's LPCR value. + */ + ld r3, HSTATE_DECEXP(r13) + mftb r4 + subf r4, r4, r3 + mtspr SPRN_DEC, r4 + + /* Reload the host's PMU registers */ + ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ + lbz r4, LPPACA_PMCINUSE(r3) + cmpwi r4, 0 + beq 23f /* skip if not */ + lwz r3, HSTATE_PMC(r13) + lwz r4, HSTATE_PMC + 4(r13) + lwz r5, HSTATE_PMC + 8(r13) + lwz r6, HSTATE_PMC + 12(r13) + lwz r8, HSTATE_PMC + 16(r13) + lwz r9, HSTATE_PMC + 20(r13) + mtspr SPRN_PMC1, r3 + mtspr SPRN_PMC2, r4 + mtspr SPRN_PMC3, r5 + mtspr SPRN_PMC4, r6 + mtspr SPRN_PMC5, r8 + mtspr SPRN_PMC6, r9 + ld r3, HSTATE_MMCR(r13) + ld r4, HSTATE_MMCR + 8(r13) + ld r5, HSTATE_MMCR + 16(r13) + mtspr SPRN_MMCR1, r4 + mtspr SPRN_MMCRA, r5 + mtspr SPRN_MMCR0, r3 + isync +23: + /* + * For external and machine check interrupts, we need + * to call the Linux handler to process the interrupt. + * We do that by jumping to the interrupt vector address + * which we have in r12. The [h]rfid at the end of the + * handler will return to the book3s_hv_interrupts.S code. + * For other interrupts we do the rfid to get back + * to the book3s_interrupts.S code here. + */ + ld r8, HSTATE_VMHANDLER(r13) + ld r7, HSTATE_HOST_MSR(r13) + + cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL + beq 11f + cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK + + /* RFI into the highmem handler, or branch to interrupt handler */ + mfmsr r6 + mtctr r12 + li r0, MSR_RI + andc r6, r6, r0 + mtmsrd r6, 1 /* Clear RI in MSR */ + mtsrr0 r8 + mtsrr1 r7 + beqctr + RFI + +11: mtspr SPRN_HSRR0, r8 + mtspr SPRN_HSRR1, r7 + ba 0x500 + +6: mfspr r6,SPRN_HDAR + mfspr r7,SPRN_HDSISR + b 7b + +ignore_hdec: + mr r4,r9 + b fast_guest_return + +bounce_ext_interrupt: + mr r4,r9 + mtspr SPRN_SRR0,r10 + mtspr SPRN_SRR1,r11 + li r10,BOOK3S_INTERRUPT_EXTERNAL + LOAD_REG_IMMEDIATE(r11,MSR_SF | MSR_ME); + b fast_guest_return + +/* + * Save away FP, VMX and VSX registers. + * r3 = vcpu pointer +*/ +_GLOBAL(kvmppc_save_fp) + mfmsr r9 + ori r8,r9,MSR_FP +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + oris r8,r8,MSR_VEC@h +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + oris r8,r8,MSR_VSX@h +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + mtmsrd r8 + isync +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + reg = 0 + .rept 32 + li r6,reg*16+VCPU_VSRS + stxvd2x reg,r6,r3 + reg = reg + 1 + .endr +FTR_SECTION_ELSE +#endif + reg = 0 + .rept 32 + stfd reg,reg*8+VCPU_FPRS(r3) + reg = reg + 1 + .endr +#ifdef CONFIG_VSX +ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) +#endif + mffs fr0 + stfd fr0,VCPU_FPSCR(r3) + +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + reg = 0 + .rept 32 + li r6,reg*16+VCPU_VRS + stvx reg,r6,r3 + reg = reg + 1 + .endr + mfvscr vr0 + li r6,VCPU_VSCR + stvx vr0,r6,r3 +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif + mfspr r6,SPRN_VRSAVE + stw r6,VCPU_VRSAVE(r3) + mtmsrd r9 + isync + blr + +/* + * Load up FP, VMX and VSX registers + * r4 = vcpu pointer + */ + .globl kvmppc_load_fp +kvmppc_load_fp: + mfmsr r9 + ori r8,r9,MSR_FP +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + oris r8,r8,MSR_VEC@h +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + oris r8,r8,MSR_VSX@h +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + mtmsrd r8 + isync + lfd fr0,VCPU_FPSCR(r4) + MTFSF_L(fr0) +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + reg = 0 + .rept 32 + li r7,reg*16+VCPU_VSRS + lxvd2x reg,r7,r4 + reg = reg + 1 + .endr +FTR_SECTION_ELSE +#endif + reg = 0 + .rept 32 + lfd reg,reg*8+VCPU_FPRS(r4) + reg = reg + 1 + .endr +#ifdef CONFIG_VSX +ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) +#endif + +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + li r7,VCPU_VSCR + lvx vr0,r7,r4 + mtvscr vr0 + reg = 0 + .rept 32 + li r7,reg*16+VCPU_VRS + lvx reg,r7,r4 + reg = reg + 1 + .endr +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif + lwz r7,VCPU_VRSAVE(r4) + mtspr SPRN_VRSAVE,r7 + blr diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 1cc25e8c0cf1..134501691ad0 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -89,29 +89,29 @@ kvmppc_handler_trampoline_enter: /* Enter guest */ - PPC_LL r4, (SVCPU_CTR)(r3) - PPC_LL r5, (SVCPU_LR)(r3) - lwz r6, (SVCPU_CR)(r3) - lwz r7, (SVCPU_XER)(r3) + PPC_LL r4, SVCPU_CTR(r3) + PPC_LL r5, SVCPU_LR(r3) + lwz r6, SVCPU_CR(r3) + lwz r7, SVCPU_XER(r3) mtctr r4 mtlr r5 mtcr r6 mtxer r7 - PPC_LL r0, (SVCPU_R0)(r3) - PPC_LL r1, (SVCPU_R1)(r3) - PPC_LL r2, (SVCPU_R2)(r3) - PPC_LL r4, (SVCPU_R4)(r3) - PPC_LL r5, (SVCPU_R5)(r3) - PPC_LL r6, (SVCPU_R6)(r3) - PPC_LL r7, (SVCPU_R7)(r3) - PPC_LL r8, (SVCPU_R8)(r3) - PPC_LL r9, (SVCPU_R9)(r3) - PPC_LL r10, (SVCPU_R10)(r3) - PPC_LL r11, (SVCPU_R11)(r3) - PPC_LL r12, (SVCPU_R12)(r3) - PPC_LL r13, (SVCPU_R13)(r3) + PPC_LL r0, SVCPU_R0(r3) + PPC_LL r1, SVCPU_R1(r3) + PPC_LL r2, SVCPU_R2(r3) + PPC_LL r4, SVCPU_R4(r3) + PPC_LL r5, SVCPU_R5(r3) + PPC_LL r6, SVCPU_R6(r3) + PPC_LL r7, SVCPU_R7(r3) + PPC_LL r8, SVCPU_R8(r3) + PPC_LL r9, SVCPU_R9(r3) + PPC_LL r10, SVCPU_R10(r3) + PPC_LL r11, SVCPU_R11(r3) + PPC_LL r12, SVCPU_R12(r3) + PPC_LL r13, SVCPU_R13(r3) PPC_LL r3, (SVCPU_R3)(r3) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 026036efcde0..3a4f379ee70f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -38,8 +38,12 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { +#ifndef CONFIG_KVM_BOOK3S_64_HV return !(v->arch.shared->msr & MSR_WE) || !!(v->arch.pending_exceptions); +#else + return 1; +#endif } int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) @@ -184,10 +188,13 @@ int kvm_dev_ioctl_check_extension(long ext) #else case KVM_CAP_PPC_SEGSTATE: #endif - case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_PPC_IRQ_LEVEL: case KVM_CAP_ENABLE_CAP: + r = 1; + break; +#ifndef CONFIG_KVM_BOOK3S_64_HV + case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_OSI: case KVM_CAP_PPC_GET_PVINFO: r = 1; @@ -195,6 +202,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; +#endif default: r = 0; break; @@ -291,6 +299,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; + vcpu->arch.dec_expires = ~(u64)0; #ifdef CONFIG_KVM_EXIT_TIMING mutex_init(&vcpu->arch.exit_timing_lock); @@ -317,6 +326,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); #endif kvmppc_core_vcpu_load(vcpu, cpu); + vcpu->cpu = smp_processor_id(); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -325,6 +335,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) #ifdef CONFIG_BOOKE vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); #endif + vcpu->cpu = -1; } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, @@ -496,6 +507,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) for (i = 0; i < 32; i++) kvmppc_set_gpr(vcpu, i, gprs[i]); vcpu->arch.osi_needed = 0; + } else if (vcpu->arch.hcall_needed) { + int i; + + kvmppc_set_gpr(vcpu, 3, run->papr_hcall.ret); + for (i = 0; i < 9; ++i) + kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]); + vcpu->arch.hcall_needed = 0; } kvmppc_core_deliver_interrupts(vcpu); @@ -518,6 +536,8 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); vcpu->stat.halt_wakeup++; + } else if (vcpu->cpu != -1) { + smp_send_reschedule(vcpu->cpu); } return 0; diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index d62a14b2cd0f..b135d3d397db 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -103,7 +103,7 @@ TRACE_EVENT(kvm_gtlb_write, * Book3S trace points * *************************************************************************/ -#ifdef CONFIG_PPC_BOOK3S +#ifdef CONFIG_KVM_BOOK3S_PR TRACE_EVENT(kvm_book3s_exit, TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 9c9ca7c7b491..a156294fc22a 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -161,6 +161,7 @@ struct kvm_pit_config { #define KVM_EXIT_NMI 16 #define KVM_EXIT_INTERNAL_ERROR 17 #define KVM_EXIT_OSI 18 +#define KVM_EXIT_PAPR_HCALL 19 /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 @@ -264,6 +265,11 @@ struct kvm_run { struct { __u64 gprs[32]; } osi; + struct { + __u64 nr; + __u64 ret; + __u64 args[9]; + } papr_hcall; /* Fix the size of the union. */ char padding[256]; }; -- cgit v1.2.3 From a8606e20e41a8149456bafdf76ad29d47672027c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:22:05 +0000 Subject: KVM: PPC: Handle some PAPR hcalls in the kernel This adds the infrastructure for handling PAPR hcalls in the kernel, either early in the guest exit path while we are still in real mode, or later once the MMU has been turned back on and we are in the full kernel context. The advantage of handling hcalls in real mode if possible is that we avoid two partition switches -- and this will become more important when we support SMT4 guests, since a partition switch means we have to pull all of the threads in the core out of the guest. The disadvantage is that we can only access the kernel linear mapping, not anything vmalloced or ioremapped, since the MMU is off. This also adds code to handle the following hcalls in real mode: H_ENTER Add an HPTE to the hashed page table H_REMOVE Remove an HPTE from the hashed page table H_READ Read HPTEs from the hashed page table H_PROTECT Change the protection bits in an HPTE H_BULK_REMOVE Remove up to 4 HPTEs from the hashed page table H_SET_DABR Set the data address breakpoint register Plus code to handle the following hcalls in the kernel: H_CEDE Idle the vcpu until an interrupt or H_PROD hcall arrives H_PROD Wake up a ceded vcpu H_REGISTER_VPA Register a virtual processor area (VPA) The code that runs in real mode has to be in the base kernel, not in the module, if KVM is compiled as a module. The real-mode code can only access the kernel linear mapping, not vmalloc or ioremap space. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/hvcall.h | 5 + arch/powerpc/include/asm/kvm_host.h | 11 + arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kernel/asm-offsets.c | 2 + arch/powerpc/kvm/Makefile | 8 +- arch/powerpc/kvm/book3s_hv.c | 170 ++++++++++++++- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 368 ++++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 158 +++++++++++++- arch/powerpc/kvm/powerpc.c | 2 +- 9 files changed, 718 insertions(+), 7 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_hv_rm_mmu.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index fd8201dddd4b..1c324ff55ea8 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -29,6 +29,10 @@ #define H_LONG_BUSY_ORDER_100_SEC 9905 /* Long busy, hint that 100sec \ is a good time to retry */ #define H_LONG_BUSY_END_RANGE 9905 /* End of long busy range */ + +/* Internal value used in book3s_hv kvm support; not returned to guests */ +#define H_TOO_HARD 9999 + #define H_HARDWARE -1 /* Hardware error */ #define H_FUNCTION -2 /* Function not supported */ #define H_PRIVILEGE -3 /* Caller not privileged */ @@ -100,6 +104,7 @@ #define H_PAGE_SET_ACTIVE H_PAGE_STATE_CHANGE #define H_AVPN (1UL<<(63-32)) /* An avpn is provided as a sanity test */ #define H_ANDCOND (1UL<<(63-33)) +#define H_LOCAL (1UL<<(63-35)) #define H_ICACHE_INVALIDATE (1UL<<(63-40)) /* icbi, etc. (ignored for IO pages) */ #define H_ICACHE_SYNCHRONIZE (1UL<<(63-41)) /* dcbst, icbi, etc (ignored for IO pages */ #define H_COALESCE_CAND (1UL<<(63-42)) /* page is a good candidate for coalescing */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 4a3f790d5fc4..6ebf1721680a 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -59,6 +59,10 @@ struct kvm; struct kvm_run; struct kvm_vcpu; +struct lppaca; +struct slb_shadow; +struct dtl; + struct kvm_vm_stat { u32 remote_tlb_flush; }; @@ -344,7 +348,14 @@ struct kvm_vcpu_arch { u64 dec_expires; unsigned long pending_exceptions; u16 last_cpu; + u8 ceded; + u8 prodded; u32 last_inst; + + struct lppaca *vpa; + struct slb_shadow *slb_shadow; + struct dtl *dtl; + struct dtl *dtl_end; int trap; struct kvm_vcpu_arch_shared *shared; unsigned long magic_page_pa; /* phys addr to map the magic page to */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 0dafd53c30ed..2afe92e6f625 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -118,6 +118,7 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem); +extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 936267462cae..c70d106bf1a4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -189,6 +189,7 @@ int main(void) DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); DEFINE(LPPACA_PMCINUSE, offsetof(struct lppaca, pmcregs_in_use)); DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx)); + DEFINE(LPPACA_YIELDCOUNT, offsetof(struct lppaca, yield_count)); DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx)); #endif /* CONFIG_PPC_STD_MMU_64 */ DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); @@ -459,6 +460,7 @@ int main(void) DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); DEFINE(VCPU_LPCR, offsetof(struct kvm_vcpu, arch.lpcr)); + DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa)); DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 8a435a6da665..2ecffc0dc1bb 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -54,14 +54,17 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ book3s_hv.o \ book3s_hv_interrupts.o \ book3s_64_mmu_hv.o +kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ + book3s_hv_rm_mmu.o -kvm-book3s_64-objs := \ +kvm-book3s_64-module-objs := \ ../../../virt/kvm/kvm_main.o \ powerpc.o \ emulate.o \ book3s.o \ $(kvm-book3s_64-objs-y) -kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-objs) + +kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) kvm-book3s_32-objs := \ $(common-objs-y) \ @@ -83,3 +86,4 @@ obj-$(CONFIG_KVM_E500) += kvm.o obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o +obj-y += $(kvm-book3s_64-builtin-objs-y) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 60b7300568c8..af862c30b70e 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -124,6 +124,158 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu) vcpu->arch.last_inst); } +struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) +{ + int r; + struct kvm_vcpu *v, *ret = NULL; + + mutex_lock(&kvm->lock); + kvm_for_each_vcpu(r, v, kvm) { + if (v->vcpu_id == id) { + ret = v; + break; + } + } + mutex_unlock(&kvm->lock); + return ret; +} + +static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) +{ + vpa->shared_proc = 1; + vpa->yield_count = 1; +} + +static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, + unsigned long flags, + unsigned long vcpuid, unsigned long vpa) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long pg_index, ra, len; + unsigned long pg_offset; + void *va; + struct kvm_vcpu *tvcpu; + + tvcpu = kvmppc_find_vcpu(kvm, vcpuid); + if (!tvcpu) + return H_PARAMETER; + + flags >>= 63 - 18; + flags &= 7; + if (flags == 0 || flags == 4) + return H_PARAMETER; + if (flags < 4) { + if (vpa & 0x7f) + return H_PARAMETER; + /* registering new area; convert logical addr to real */ + pg_index = vpa >> kvm->arch.ram_porder; + pg_offset = vpa & (kvm->arch.ram_psize - 1); + if (pg_index >= kvm->arch.ram_npages) + return H_PARAMETER; + if (kvm->arch.ram_pginfo[pg_index].pfn == 0) + return H_PARAMETER; + ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT; + ra |= pg_offset; + va = __va(ra); + if (flags <= 1) + len = *(unsigned short *)(va + 4); + else + len = *(unsigned int *)(va + 4); + if (pg_offset + len > kvm->arch.ram_psize) + return H_PARAMETER; + switch (flags) { + case 1: /* register VPA */ + if (len < 640) + return H_PARAMETER; + tvcpu->arch.vpa = va; + init_vpa(vcpu, va); + break; + case 2: /* register DTL */ + if (len < 48) + return H_PARAMETER; + if (!tvcpu->arch.vpa) + return H_RESOURCE; + len -= len % 48; + tvcpu->arch.dtl = va; + tvcpu->arch.dtl_end = va + len; + break; + case 3: /* register SLB shadow buffer */ + if (len < 8) + return H_PARAMETER; + if (!tvcpu->arch.vpa) + return H_RESOURCE; + tvcpu->arch.slb_shadow = va; + len = (len - 16) / 16; + tvcpu->arch.slb_shadow = va; + break; + } + } else { + switch (flags) { + case 5: /* unregister VPA */ + if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl) + return H_RESOURCE; + tvcpu->arch.vpa = NULL; + break; + case 6: /* unregister DTL */ + tvcpu->arch.dtl = NULL; + break; + case 7: /* unregister SLB shadow buffer */ + tvcpu->arch.slb_shadow = NULL; + break; + } + } + return H_SUCCESS; +} + +int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) +{ + unsigned long req = kvmppc_get_gpr(vcpu, 3); + unsigned long target, ret = H_SUCCESS; + struct kvm_vcpu *tvcpu; + + switch (req) { + case H_CEDE: + vcpu->arch.shregs.msr |= MSR_EE; + vcpu->arch.ceded = 1; + smp_mb(); + if (!vcpu->arch.prodded) + kvmppc_vcpu_block(vcpu); + else + vcpu->arch.prodded = 0; + smp_mb(); + vcpu->arch.ceded = 0; + break; + case H_PROD: + target = kvmppc_get_gpr(vcpu, 4); + tvcpu = kvmppc_find_vcpu(vcpu->kvm, target); + if (!tvcpu) { + ret = H_PARAMETER; + break; + } + tvcpu->arch.prodded = 1; + smp_mb(); + if (vcpu->arch.ceded) { + if (waitqueue_active(&vcpu->wq)) { + wake_up_interruptible(&vcpu->wq); + vcpu->stat.halt_wakeup++; + } + } + break; + case H_CONFER: + break; + case H_REGISTER_VPA: + ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6)); + break; + default: + return RESUME_HOST; + } + kvmppc_set_gpr(vcpu, 3, ret); + vcpu->arch.hcall_needed = 0; + return RESUME_GUEST; +} + static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { @@ -318,7 +470,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); -int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) +static int kvmppc_run_vcpu(struct kvm_run *run, struct kvm_vcpu *vcpu) { u64 now; @@ -370,6 +522,22 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) return -EBUSY; } +int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + int r; + + do { + r = kvmppc_run_vcpu(run, vcpu); + + if (run->exit_reason == KVM_EXIT_PAPR_HCALL && + !(vcpu->arch.shregs.msr & MSR_PR)) { + r = kvmppc_pseries_do_hcall(vcpu); + kvmppc_core_deliver_interrupts(vcpu); + } + } while (r == RESUME_GUEST); + return r; +} + int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c new file mode 100644 index 000000000000..edb0aae901a3 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -0,0 +1,368 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright 2010-2011 Paul Mackerras, IBM Corp. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* For now use fixed-size 16MB page table */ +#define HPT_ORDER 24 +#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */ +#define HPT_HASH_MASK (HPT_NPTEG - 1) + +#define HPTE_V_HVLOCK 0x40UL + +static inline long lock_hpte(unsigned long *hpte, unsigned long bits) +{ + unsigned long tmp, old; + + asm volatile(" ldarx %0,0,%2\n" + " and. %1,%0,%3\n" + " bne 2f\n" + " ori %0,%0,%4\n" + " stdcx. %0,0,%2\n" + " beq+ 2f\n" + " li %1,%3\n" + "2: isync" + : "=&r" (tmp), "=&r" (old) + : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK) + : "cc", "memory"); + return old == 0; +} + +long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, + long pte_index, unsigned long pteh, unsigned long ptel) +{ + unsigned long porder; + struct kvm *kvm = vcpu->kvm; + unsigned long i, lpn, pa; + unsigned long *hpte; + + /* only handle 4k, 64k and 16M pages for now */ + porder = 12; + if (pteh & HPTE_V_LARGE) { + if ((ptel & 0xf000) == 0x1000) { + /* 64k page */ + porder = 16; + } else if ((ptel & 0xff000) == 0) { + /* 16M page */ + porder = 24; + /* lowest AVA bit must be 0 for 16M pages */ + if (pteh & 0x80) + return H_PARAMETER; + } else + return H_PARAMETER; + } + lpn = (ptel & HPTE_R_RPN) >> kvm->arch.ram_porder; + if (lpn >= kvm->arch.ram_npages || porder > kvm->arch.ram_porder) + return H_PARAMETER; + pa = kvm->arch.ram_pginfo[lpn].pfn << PAGE_SHIFT; + if (!pa) + return H_PARAMETER; + /* Check WIMG */ + if ((ptel & HPTE_R_WIMG) != HPTE_R_M && + (ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M)) + return H_PARAMETER; + pteh &= ~0x60UL; + ptel &= ~(HPTE_R_PP0 - kvm->arch.ram_psize); + ptel |= pa; + if (pte_index >= (HPT_NPTEG << 3)) + return H_PARAMETER; + if (likely((flags & H_EXACT) == 0)) { + pte_index &= ~7UL; + hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + for (i = 0; ; ++i) { + if (i == 8) + return H_PTEG_FULL; + if ((*hpte & HPTE_V_VALID) == 0 && + lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) + break; + hpte += 2; + } + } else { + i = 0; + hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) + return H_PTEG_FULL; + } + hpte[1] = ptel; + eieio(); + hpte[0] = pteh; + asm volatile("ptesync" : : : "memory"); + atomic_inc(&kvm->arch.ram_pginfo[lpn].refcnt); + vcpu->arch.gpr[4] = pte_index + i; + return H_SUCCESS; +} + +static unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, + unsigned long pte_index) +{ + unsigned long rb, va_low; + + rb = (v & ~0x7fUL) << 16; /* AVA field */ + va_low = pte_index >> 3; + if (v & HPTE_V_SECONDARY) + va_low = ~va_low; + /* xor vsid from AVA */ + if (!(v & HPTE_V_1TB_SEG)) + va_low ^= v >> 12; + else + va_low ^= v >> 24; + va_low &= 0x7ff; + if (v & HPTE_V_LARGE) { + rb |= 1; /* L field */ + if (r & 0xff000) { + /* non-16MB large page, must be 64k */ + /* (masks depend on page size) */ + rb |= 0x1000; /* page encoding in LP field */ + rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */ + rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */ + } + } else { + /* 4kB page */ + rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */ + } + rb |= (v >> 54) & 0x300; /* B field */ + return rb; +} + +#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) + +static inline int try_lock_tlbie(unsigned int *lock) +{ + unsigned int tmp, old; + unsigned int token = LOCK_TOKEN; + + asm volatile("1:lwarx %1,0,%2\n" + " cmpwi cr0,%1,0\n" + " bne 2f\n" + " stwcx. %3,0,%2\n" + " bne- 1b\n" + " isync\n" + "2:" + : "=&r" (tmp), "=&r" (old) + : "r" (lock), "r" (token) + : "cc", "memory"); + return old == 0; +} + +long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index, unsigned long avpn, + unsigned long va) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long *hpte; + unsigned long v, r, rb; + + if (pte_index >= (HPT_NPTEG << 3)) + return H_PARAMETER; + hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + while (!lock_hpte(hpte, HPTE_V_HVLOCK)) + cpu_relax(); + if ((hpte[0] & HPTE_V_VALID) == 0 || + ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) || + ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) { + hpte[0] &= ~HPTE_V_HVLOCK; + return H_NOT_FOUND; + } + if (atomic_read(&kvm->online_vcpus) == 1) + flags |= H_LOCAL; + vcpu->arch.gpr[4] = v = hpte[0] & ~HPTE_V_HVLOCK; + vcpu->arch.gpr[5] = r = hpte[1]; + rb = compute_tlbie_rb(v, r, pte_index); + hpte[0] = 0; + if (!(flags & H_LOCAL)) { + while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) + cpu_relax(); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" + : : "r" (rb), "r" (kvm->arch.lpid)); + asm volatile("ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; + } else { + asm volatile("ptesync" : : : "memory"); + asm volatile("tlbiel %0" : : "r" (rb)); + asm volatile("ptesync" : : : "memory"); + } + return H_SUCCESS; +} + +long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long *args = &vcpu->arch.gpr[4]; + unsigned long *hp, tlbrb[4]; + long int i, found; + long int n_inval = 0; + unsigned long flags, req, pte_index; + long int local = 0; + long int ret = H_SUCCESS; + + if (atomic_read(&kvm->online_vcpus) == 1) + local = 1; + for (i = 0; i < 4; ++i) { + pte_index = args[i * 2]; + flags = pte_index >> 56; + pte_index &= ((1ul << 56) - 1); + req = flags >> 6; + flags &= 3; + if (req == 3) + break; + if (req != 1 || flags == 3 || + pte_index >= (HPT_NPTEG << 3)) { + /* parameter error */ + args[i * 2] = ((0xa0 | flags) << 56) + pte_index; + ret = H_PARAMETER; + break; + } + hp = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + while (!lock_hpte(hp, HPTE_V_HVLOCK)) + cpu_relax(); + found = 0; + if (hp[0] & HPTE_V_VALID) { + switch (flags & 3) { + case 0: /* absolute */ + found = 1; + break; + case 1: /* andcond */ + if (!(hp[0] & args[i * 2 + 1])) + found = 1; + break; + case 2: /* AVPN */ + if ((hp[0] & ~0x7fUL) == args[i * 2 + 1]) + found = 1; + break; + } + } + if (!found) { + hp[0] &= ~HPTE_V_HVLOCK; + args[i * 2] = ((0x90 | flags) << 56) + pte_index; + continue; + } + /* insert R and C bits from PTE */ + flags |= (hp[1] >> 5) & 0x0c; + args[i * 2] = ((0x80 | flags) << 56) + pte_index; + tlbrb[n_inval++] = compute_tlbie_rb(hp[0], hp[1], pte_index); + hp[0] = 0; + } + if (n_inval == 0) + return ret; + + if (!local) { + while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) + cpu_relax(); + asm volatile("ptesync" : : : "memory"); + for (i = 0; i < n_inval; ++i) + asm volatile(PPC_TLBIE(%1,%0) + : : "r" (tlbrb[i]), "r" (kvm->arch.lpid)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; + } else { + asm volatile("ptesync" : : : "memory"); + for (i = 0; i < n_inval; ++i) + asm volatile("tlbiel %0" : : "r" (tlbrb[i])); + asm volatile("ptesync" : : : "memory"); + } + return ret; +} + +long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index, unsigned long avpn, + unsigned long va) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long *hpte; + unsigned long v, r, rb; + + if (pte_index >= (HPT_NPTEG << 3)) + return H_PARAMETER; + hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + while (!lock_hpte(hpte, HPTE_V_HVLOCK)) + cpu_relax(); + if ((hpte[0] & HPTE_V_VALID) == 0 || + ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) { + hpte[0] &= ~HPTE_V_HVLOCK; + return H_NOT_FOUND; + } + if (atomic_read(&kvm->online_vcpus) == 1) + flags |= H_LOCAL; + v = hpte[0]; + r = hpte[1] & ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | + HPTE_R_KEY_HI | HPTE_R_KEY_LO); + r |= (flags << 55) & HPTE_R_PP0; + r |= (flags << 48) & HPTE_R_KEY_HI; + r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); + rb = compute_tlbie_rb(v, r, pte_index); + hpte[0] = v & ~HPTE_V_VALID; + if (!(flags & H_LOCAL)) { + while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) + cpu_relax(); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" + : : "r" (rb), "r" (kvm->arch.lpid)); + asm volatile("ptesync" : : : "memory"); + kvm->arch.tlbie_lock = 0; + } else { + asm volatile("ptesync" : : : "memory"); + asm volatile("tlbiel %0" : : "r" (rb)); + asm volatile("ptesync" : : : "memory"); + } + hpte[1] = r; + eieio(); + hpte[0] = v & ~HPTE_V_HVLOCK; + asm volatile("ptesync" : : : "memory"); + return H_SUCCESS; +} + +static unsigned long reverse_xlate(struct kvm *kvm, unsigned long realaddr) +{ + long int i; + unsigned long offset, rpn; + + offset = realaddr & (kvm->arch.ram_psize - 1); + rpn = (realaddr - offset) >> PAGE_SHIFT; + for (i = 0; i < kvm->arch.ram_npages; ++i) + if (rpn == kvm->arch.ram_pginfo[i].pfn) + return (i << PAGE_SHIFT) + offset; + return HPTE_R_RPN; /* all 1s in the RPN field */ +} + +long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long *hpte, r; + int i, n = 1; + + if (pte_index >= (HPT_NPTEG << 3)) + return H_PARAMETER; + if (flags & H_READ_4) { + pte_index &= ~3; + n = 4; + } + for (i = 0; i < n; ++i, ++pte_index) { + hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + r = hpte[1]; + if ((flags & H_R_XLATE) && (hpte[0] & HPTE_V_VALID)) + r = reverse_xlate(kvm, r & HPTE_R_RPN) | + (r & ~HPTE_R_RPN); + vcpu->arch.gpr[4 + i * 2] = hpte[0]; + vcpu->arch.gpr[5 + i * 2] = r; + } + return H_SUCCESS; +} diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 9af264840b98..319ff63b1f31 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -166,6 +166,14 @@ kvmppc_hv_entry: /* Save R1 in the PACA */ std r1, HSTATE_HOST_R1(r13) + /* Increment yield count if they have a VPA */ + ld r3, VCPU_VPA(r4) + cmpdi r3, 0 + beq 25f + lwz r5, LPPACA_YIELDCOUNT(r3) + addi r5, r5, 1 + stw r5, LPPACA_YIELDCOUNT(r3) +25: /* Load up DAR and DSISR */ ld r5, VCPU_DAR(r4) lwz r6, VCPU_DSISR(r4) @@ -401,6 +409,10 @@ kvmppc_interrupt: cmpwi r3,0 bge ignore_hdec 2: + /* See if this is something we can handle in real mode */ + cmpwi r12,BOOK3S_INTERRUPT_SYSCALL + beq hcall_try_real_mode +hcall_real_cont: /* Check for mediated interrupts (could be done earlier really ...) */ cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL @@ -579,13 +591,28 @@ hdec_soon: std r5, VCPU_SPRG2(r9) std r6, VCPU_SPRG3(r9) - /* Save PMU registers */ + /* Increment yield count if they have a VPA */ + ld r8, VCPU_VPA(r9) /* do they have a VPA? */ + cmpdi r8, 0 + beq 25f + lwz r3, LPPACA_YIELDCOUNT(r8) + addi r3, r3, 1 + stw r3, LPPACA_YIELDCOUNT(r8) +25: + /* Save PMU registers if requested */ + /* r8 and cr0.eq are live here */ li r3, 1 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ mfspr r4, SPRN_MMCR0 /* save MMCR0 */ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ isync - mfspr r5, SPRN_MMCR1 + beq 21f /* if no VPA, save PMU stuff anyway */ + lbz r7, LPPACA_PMCINUSE(r8) + cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */ + bne 21f + std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ + b 22f +21: mfspr r5, SPRN_MMCR1 mfspr r6, SPRN_MMCRA std r4, VCPU_MMCR(r9) std r5, VCPU_MMCR + 8(r9) @@ -676,6 +703,125 @@ hdec_soon: mfspr r7,SPRN_HDSISR b 7b +/* + * Try to handle an hcall in real mode. + * Returns to the guest if we handle it, or continues on up to + * the kernel if we can't (i.e. if we don't have a handler for + * it, or if the handler returns H_TOO_HARD). + */ + .globl hcall_try_real_mode +hcall_try_real_mode: + ld r3,VCPU_GPR(r3)(r9) + andi. r0,r11,MSR_PR + bne hcall_real_cont + clrrdi r3,r3,2 + cmpldi r3,hcall_real_table_end - hcall_real_table + bge hcall_real_cont + LOAD_REG_ADDR(r4, hcall_real_table) + lwzx r3,r3,r4 + cmpwi r3,0 + beq hcall_real_cont + add r3,r3,r4 + mtctr r3 + mr r3,r9 /* get vcpu pointer */ + ld r4,VCPU_GPR(r4)(r9) + bctrl + cmpdi r3,H_TOO_HARD + beq hcall_real_fallback + ld r4,HSTATE_KVM_VCPU(r13) + std r3,VCPU_GPR(r3)(r4) + ld r10,VCPU_PC(r4) + ld r11,VCPU_MSR(r4) + b fast_guest_return + + /* We've attempted a real mode hcall, but it's punted it back + * to userspace. We need to restore some clobbered volatiles + * before resuming the pass-it-to-qemu path */ +hcall_real_fallback: + li r12,BOOK3S_INTERRUPT_SYSCALL + ld r9, HSTATE_KVM_VCPU(r13) + ld r11, VCPU_MSR(r9) + + b hcall_real_cont + + .globl hcall_real_table +hcall_real_table: + .long 0 /* 0 - unused */ + .long .kvmppc_h_remove - hcall_real_table + .long .kvmppc_h_enter - hcall_real_table + .long .kvmppc_h_read - hcall_real_table + .long 0 /* 0x10 - H_CLEAR_MOD */ + .long 0 /* 0x14 - H_CLEAR_REF */ + .long .kvmppc_h_protect - hcall_real_table + .long 0 /* 0x1c - H_GET_TCE */ + .long 0 /* 0x20 - H_SET_TCE */ + .long 0 /* 0x24 - H_SET_SPRG0 */ + .long .kvmppc_h_set_dabr - hcall_real_table + .long 0 /* 0x2c */ + .long 0 /* 0x30 */ + .long 0 /* 0x34 */ + .long 0 /* 0x38 */ + .long 0 /* 0x3c */ + .long 0 /* 0x40 */ + .long 0 /* 0x44 */ + .long 0 /* 0x48 */ + .long 0 /* 0x4c */ + .long 0 /* 0x50 */ + .long 0 /* 0x54 */ + .long 0 /* 0x58 */ + .long 0 /* 0x5c */ + .long 0 /* 0x60 */ + .long 0 /* 0x64 */ + .long 0 /* 0x68 */ + .long 0 /* 0x6c */ + .long 0 /* 0x70 */ + .long 0 /* 0x74 */ + .long 0 /* 0x78 */ + .long 0 /* 0x7c */ + .long 0 /* 0x80 */ + .long 0 /* 0x84 */ + .long 0 /* 0x88 */ + .long 0 /* 0x8c */ + .long 0 /* 0x90 */ + .long 0 /* 0x94 */ + .long 0 /* 0x98 */ + .long 0 /* 0x9c */ + .long 0 /* 0xa0 */ + .long 0 /* 0xa4 */ + .long 0 /* 0xa8 */ + .long 0 /* 0xac */ + .long 0 /* 0xb0 */ + .long 0 /* 0xb4 */ + .long 0 /* 0xb8 */ + .long 0 /* 0xbc */ + .long 0 /* 0xc0 */ + .long 0 /* 0xc4 */ + .long 0 /* 0xc8 */ + .long 0 /* 0xcc */ + .long 0 /* 0xd0 */ + .long 0 /* 0xd4 */ + .long 0 /* 0xd8 */ + .long 0 /* 0xdc */ + .long 0 /* 0xe0 */ + .long 0 /* 0xe4 */ + .long 0 /* 0xe8 */ + .long 0 /* 0xec */ + .long 0 /* 0xf0 */ + .long 0 /* 0xf4 */ + .long 0 /* 0xf8 */ + .long 0 /* 0xfc */ + .long 0 /* 0x100 */ + .long 0 /* 0x104 */ + .long 0 /* 0x108 */ + .long 0 /* 0x10c */ + .long 0 /* 0x110 */ + .long 0 /* 0x114 */ + .long 0 /* 0x118 */ + .long 0 /* 0x11c */ + .long 0 /* 0x120 */ + .long .kvmppc_h_bulk_remove - hcall_real_table +hcall_real_table_end: + ignore_hdec: mr r4,r9 b fast_guest_return @@ -688,10 +834,16 @@ bounce_ext_interrupt: LOAD_REG_IMMEDIATE(r11,MSR_SF | MSR_ME); b fast_guest_return +_GLOBAL(kvmppc_h_set_dabr) + std r4,VCPU_DABR(r3) + mtspr SPRN_DABR,r4 + li r3,0 + blr + /* * Save away FP, VMX and VSX registers. * r3 = vcpu pointer -*/ + */ _GLOBAL(kvmppc_save_fp) mfmsr r9 ori r8,r9,MSR_FP diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3a4f379ee70f..6fc9ee499b61 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -42,7 +42,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) return !(v->arch.shared->msr & MSR_WE) || !!(v->arch.pending_exceptions); #else - return 1; + return !(v->arch.ceded) || !!(v->arch.pending_exceptions); #endif } -- cgit v1.2.3 From 54738c097163c3f01e67ccc85462b78d4d4f495f Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 29 Jun 2011 00:22:41 +0000 Subject: KVM: PPC: Accelerate H_PUT_TCE by implementing it in real mode This improves I/O performance for guests using the PAPR paravirtualization interface by making the H_PUT_TCE hcall faster, by implementing it in real mode. H_PUT_TCE is used for updating virtual IOMMU tables, and is used both for virtual I/O and for real I/O in the PAPR interface. Since this moves the IOMMU tables into the kernel, we define a new KVM_CREATE_SPAPR_TCE ioctl to allow qemu to create the tables. The ioctl returns a file descriptor which can be used to mmap the newly created table. The qemu driver models use them in the same way as userspace managed tables, but they can be updated directly by the guest with a real-mode H_PUT_TCE implementation, reducing the number of host/guest context switches during guest IO. There are certain circumstances where it is useful for userland qemu to write to the TCE table even if the kernel H_PUT_TCE path is used most of the time. Specifically, allowing this will avoid awkwardness when we need to reset the table. More importantly, we will in the future need to write the table in order to restore its state after a checkpoint resume or migration. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 35 ++++++++++ arch/powerpc/include/asm/kvm.h | 9 +++ arch/powerpc/include/asm/kvm_book3s_64.h | 2 + arch/powerpc/include/asm/kvm_host.h | 9 +++ arch/powerpc/include/asm/kvm_ppc.h | 2 + arch/powerpc/kvm/Makefile | 3 +- arch/powerpc/kvm/book3s_64_vio_hv.c | 73 +++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 116 ++++++++++++++++++++++++++++++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +- arch/powerpc/kvm/powerpc.c | 18 +++++ include/linux/kvm.h | 2 + 11 files changed, 268 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_64_vio_hv.c (limited to 'arch/powerpc') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e8875fef3eb8..a1d344d5ff4c 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1350,6 +1350,41 @@ The following flags are defined: If datamatch flag is set, the event will be signaled only if the written value to the registered address is equal to datamatch in struct kvm_ioeventfd. +4.62 KVM_CREATE_SPAPR_TCE + +Capability: KVM_CAP_SPAPR_TCE +Architectures: powerpc +Type: vm ioctl +Parameters: struct kvm_create_spapr_tce (in) +Returns: file descriptor for manipulating the created TCE table + +This creates a virtual TCE (translation control entry) table, which +is an IOMMU for PAPR-style virtual I/O. It is used to translate +logical addresses used in virtual I/O into guest physical addresses, +and provides a scatter/gather capability for PAPR virtual I/O. + +/* for KVM_CAP_SPAPR_TCE */ +struct kvm_create_spapr_tce { + __u64 liobn; + __u32 window_size; +}; + +The liobn field gives the logical IO bus number for which to create a +TCE table. The window_size field specifies the size of the DMA window +which this TCE table will translate - the table will contain one 64 +bit TCE entry for every 4kiB of the DMA window. + +When the guest issues an H_PUT_TCE hcall on a liobn for which a TCE +table has been created using this ioctl(), the kernel will handle it +in real mode, updating the TCE table. H_PUT_TCE calls for other +liobns will cause a vm exit and must be handled by userspace. + +The return value is a file descriptor which can be passed to mmap(2) +to map the created TCE table into userspace. This lets userspace read +the entries written by kernel-handled H_PUT_TCE calls, and also lets +userspace update the TCE table directly which is useful in some +circumstances. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index d2ca5ed3877b..c3ec990daf45 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -22,6 +22,9 @@ #include +/* Select powerpc specific features in */ +#define __KVM_HAVE_SPAPR_TCE + struct kvm_regs { __u64 pc; __u64 cr; @@ -272,4 +275,10 @@ struct kvm_guest_debug_arch { #define KVM_INTERRUPT_UNSET -2U #define KVM_INTERRUPT_SET_LEVEL -3U +/* for KVM_CAP_SPAPR_TCE */ +struct kvm_create_spapr_tce { + __u64 liobn; + __u32 window_size; +}; + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 5f73388ea0af..e43fe42b9875 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -27,4 +27,6 @@ static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu) } #endif +#define SPAPR_TCE_SHIFT 12 + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 6ebf1721680a..5616e39a7fa4 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -144,6 +144,14 @@ struct kvmppc_pginfo { atomic_t refcnt; }; +struct kvmppc_spapr_tce_table { + struct list_head list; + struct kvm *kvm; + u64 liobn; + u32 window_size; + struct page *pages[0]; +}; + struct kvm_arch { #ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long hpt_virt; @@ -157,6 +165,7 @@ struct kvm_arch { unsigned long sdr1; unsigned long host_sdr1; int tlbie_lock; + struct list_head spapr_tce_tables; unsigned short last_vcpu[NR_CPUS]; #endif /* CONFIG_KVM_BOOK3S_64_HV */ }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 2afe92e6f625..99f6fcf4cf88 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -119,6 +119,8 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm, extern void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); +extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, + struct kvm_create_spapr_tce *args); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 2ecffc0dc1bb..1de3d54901d4 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -55,7 +55,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ book3s_hv_interrupts.o \ book3s_64_mmu_hv.o kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ - book3s_hv_rm_mmu.o + book3s_hv_rm_mmu.o \ + book3s_64_vio_hv.o kvm-book3s_64-module-objs := \ ../../../virt/kvm/kvm_main.o \ diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c new file mode 100644 index 000000000000..ea0f8c537c28 --- /dev/null +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -0,0 +1,73 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2010 Paul Mackerras, IBM Corp. + * Copyright 2011 David Gibson, IBM Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) + +long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, + unsigned long ioba, unsigned long tce) +{ + struct kvm *kvm = vcpu->kvm; + struct kvmppc_spapr_tce_table *stt; + + /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ + /* liobn, ioba, tce); */ + + list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { + if (stt->liobn == liobn) { + unsigned long idx = ioba >> SPAPR_TCE_SHIFT; + struct page *page; + u64 *tbl; + + /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */ + /* liobn, stt, stt->window_size); */ + if (ioba >= stt->window_size) + return H_PARAMETER; + + page = stt->pages[idx / TCES_PER_PAGE]; + tbl = (u64 *)page_address(page); + + /* FIXME: Need to validate the TCE itself */ + /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */ + tbl[idx % TCES_PER_PAGE] = tce; + return H_SUCCESS; + } + } + + /* Didn't find the liobn, punt it to userspace */ + return H_TOO_HARD; +} diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index af862c30b70e..6fe469eabce8 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -538,6 +538,116 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } +static long kvmppc_stt_npages(unsigned long window_size) +{ + return ALIGN((window_size >> SPAPR_TCE_SHIFT) + * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; +} + +static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt) +{ + struct kvm *kvm = stt->kvm; + int i; + + mutex_lock(&kvm->lock); + list_del(&stt->list); + for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++) + __free_page(stt->pages[i]); + kfree(stt); + mutex_unlock(&kvm->lock); + + kvm_put_kvm(kvm); +} + +static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; + struct page *page; + + if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size)) + return VM_FAULT_SIGBUS; + + page = stt->pages[vmf->pgoff]; + get_page(page); + vmf->page = page; + return 0; +} + +static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { + .fault = kvm_spapr_tce_fault, +}; + +static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) +{ + vma->vm_ops = &kvm_spapr_tce_vm_ops; + return 0; +} + +static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) +{ + struct kvmppc_spapr_tce_table *stt = filp->private_data; + + release_spapr_tce_table(stt); + return 0; +} + +static struct file_operations kvm_spapr_tce_fops = { + .mmap = kvm_spapr_tce_mmap, + .release = kvm_spapr_tce_release, +}; + +long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, + struct kvm_create_spapr_tce *args) +{ + struct kvmppc_spapr_tce_table *stt = NULL; + long npages; + int ret = -ENOMEM; + int i; + + /* Check this LIOBN hasn't been previously allocated */ + list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { + if (stt->liobn == args->liobn) + return -EBUSY; + } + + npages = kvmppc_stt_npages(args->window_size); + + stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *), + GFP_KERNEL); + if (!stt) + goto fail; + + stt->liobn = args->liobn; + stt->window_size = args->window_size; + stt->kvm = kvm; + + for (i = 0; i < npages; i++) { + stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!stt->pages[i]) + goto fail; + } + + kvm_get_kvm(kvm); + + mutex_lock(&kvm->lock); + list_add(&stt->list, &kvm->arch.spapr_tce_tables); + + mutex_unlock(&kvm->lock); + + return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, + stt, O_RDWR); + +fail: + if (stt) { + for (i = 0; i < npages; i++) + if (stt->pages[i]) + __free_page(stt->pages[i]); + + kfree(stt); + } + return ret; +} + int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { @@ -559,13 +669,17 @@ int kvmppc_core_init_vm(struct kvm *kvm) /* Allocate hashed page table */ r = kvmppc_alloc_hpt(kvm); + if (r) + return r; - return r; + INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); + return 0; } void kvmppc_core_destroy_vm(struct kvm *kvm) { kvmppc_free_hpt(kvm); + WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); } /* These are stubs for now */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 319ff63b1f31..e6adaadcdff2 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -754,7 +754,7 @@ hcall_real_table: .long 0 /* 0x14 - H_CLEAR_REF */ .long .kvmppc_h_protect - hcall_real_table .long 0 /* 0x1c - H_GET_TCE */ - .long 0 /* 0x20 - H_SET_TCE */ + .long .kvmppc_h_put_tce - hcall_real_table .long 0 /* 0x24 - H_SET_SPRG0 */ .long .kvmppc_h_set_dabr - hcall_real_table .long 0 /* 0x2c */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6fc9ee499b61..c78ceb9d5605 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -202,6 +202,11 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; +#endif +#ifdef CONFIG_KVM_BOOK3S_64_HV + case KVM_CAP_SPAPR_TCE: + r = 1; + break; #endif default: r = 0; @@ -653,6 +658,19 @@ long kvm_arch_vm_ioctl(struct file *filp, break; } +#ifdef CONFIG_KVM_BOOK3S_64_HV + case KVM_CREATE_SPAPR_TCE: { + struct kvm_create_spapr_tce create_tce; + struct kvm *kvm = filp->private_data; + + r = -EFAULT; + if (copy_from_user(&create_tce, argp, sizeof(create_tce))) + goto out; + r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); + goto out; + } +#endif /* CONFIG_KVM_BOOK3S_64_HV */ + default: r = -ENOTTY; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index a156294fc22a..61f56502732e 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -550,6 +550,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_TSC_CONTROL 60 #define KVM_CAP_GET_TSC_KHZ 61 #define KVM_CAP_PPC_BOOKE_SREGS 62 +#define KVM_CAP_SPAPR_TCE 63 #ifdef KVM_CAP_IRQ_ROUTING @@ -752,6 +753,7 @@ struct kvm_clock_data { /* Available with KVM_CAP_XCRS */ #define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) #define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) +#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3 From 371fefd6f2dc46668e00871930dde613b88d4bde Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:23:08 +0000 Subject: KVM: PPC: Allow book3s_hv guests to use SMT processor modes This lifts the restriction that book3s_hv guests can only run one hardware thread per core, and allows them to use up to 4 threads per core on POWER7. The host still has to run single-threaded. This capability is advertised to qemu through a new KVM_CAP_PPC_SMT capability. The return value of the ioctl querying this capability is the number of vcpus per virtual CPU core (vcore), currently 4. To use this, the host kernel should be booted with all threads active, and then all the secondary threads should be offlined. This will put the secondary threads into nap mode. KVM will then wake them from nap mode and use them for running guest code (while they are still offline). To wake the secondary threads, we send them an IPI using a new xics_wake_cpu() function, implemented in arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage we assume that the platform has a XICS interrupt controller and we are using icp-native.c to drive it. Since the woken thread will need to acknowledge and clear the IPI, we also export the base physical address of the XICS registers using kvmppc_set_xics_phys() for use in the low-level KVM book3s code. When a vcpu is created, it is assigned to a virtual CPU core. The vcore number is obtained by dividing the vcpu number by the number of threads per core in the host. This number is exported to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes to run the guest in single-threaded mode, it should make all vcpu numbers be multiples of the number of threads per core. We distinguish three states of a vcpu: runnable (i.e., ready to execute the guest), blocked (that is, idle), and busy in host. We currently implement a policy that the vcore can run only when all its threads are runnable or blocked. This way, if a vcpu needs to execute elsewhere in the kernel or in qemu, it can do so without being starved of CPU by the other vcpus. When a vcore starts to run, it executes in the context of one of the vcpu threads. The other vcpu threads all go to sleep and stay asleep until something happens requiring the vcpu thread to return to qemu, or to wake up to run the vcore (this can happen when another vcpu thread goes from busy in host state to blocked). It can happen that a vcpu goes from blocked to runnable state (e.g. because of an interrupt), and the vcore it belongs to is already running. In that case it can start to run immediately as long as the none of the vcpus in the vcore have started to exit the guest. We send the next free thread in the vcore an IPI to get it to start to execute the guest. It synchronizes with the other threads via the vcore->entry_exit_count field to make sure that it doesn't go into the guest if the other vcpus are exiting by the time that it is ready to actually enter the guest. Note that there is no fixed relationship between the hardware thread number and the vcpu number. Hardware threads are assigned to vcpus as they become runnable, so we will always use the lower-numbered hardware threads in preference to higher-numbered threads if not all the vcpus in the vcore are runnable, regardless of which vcpus are runnable. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 13 ++ arch/powerpc/include/asm/kvm.h | 1 + arch/powerpc/include/asm/kvm_book3s_asm.h | 2 + arch/powerpc/include/asm/kvm_host.h | 46 ++++- arch/powerpc/include/asm/kvm_ppc.h | 13 ++ arch/powerpc/kernel/asm-offsets.c | 6 + arch/powerpc/kernel/exceptions-64s.S | 31 ++- arch/powerpc/kernel/idle_power7.S | 2 - arch/powerpc/kvm/book3s_hv.c | 316 +++++++++++++++++++++++++++--- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 168 +++++++++++++++- arch/powerpc/kvm/powerpc.c | 4 + arch/powerpc/sysdev/xics/icp-native.c | 9 + include/linux/kvm.h | 1 + 13 files changed, 567 insertions(+), 45 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a1d344d5ff4c..681871311d3e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -180,6 +180,19 @@ KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time. If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4 cpus max. +On powerpc using book3s_hv mode, the vcpus are mapped onto virtual +threads in one or more virtual CPU cores. (This is because the +hardware requires all the hardware threads in a CPU core to be in the +same partition.) The KVM_CAP_PPC_SMT capability indicates the number +of vcpus per virtual core (vcore). The vcore id is obtained by +dividing the vcpu id by the number of vcpus per vcore. The vcpus in a +given vcore will always be in the same physical core as each other +(though that might be a different physical core from time to time). +Userspace can control the threading (SMT) mode of the guest by its +allocation of vcpu ids. For example, if userspace wants +single-threaded guest vcpus, it should make all vcpu ids be a multiple +of the number of vcpus per vcore. + 4.8 KVM_GET_DIRTY_LOG (vm ioctl) Capability: basic diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index c3ec990daf45..471bb3d85e0b 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -24,6 +24,7 @@ /* Select powerpc specific features in */ #define __KVM_HAVE_SPAPR_TCE +#define __KVM_HAVE_PPC_SMT struct kvm_regs { __u64 pc; diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index b7b039532fbc..9cfd5436782d 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -78,6 +78,8 @@ struct kvmppc_host_state { #ifdef CONFIG_KVM_BOOK3S_64_HV struct kvm_vcpu *kvm_vcpu; + struct kvmppc_vcore *kvm_vcore; + unsigned long xics_phys; u64 dabr; u64 host_mmcr[3]; u32 host_pmc[6]; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 5616e39a7fa4..0d6d569e19c7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -25,10 +25,14 @@ #include #include #include +#include +#include #include #include +#include -#define KVM_MAX_VCPUS 1 +#define KVM_MAX_VCPUS NR_CPUS +#define KVM_MAX_VCORES NR_CPUS #define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 @@ -167,9 +171,34 @@ struct kvm_arch { int tlbie_lock; struct list_head spapr_tce_tables; unsigned short last_vcpu[NR_CPUS]; + struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; #endif /* CONFIG_KVM_BOOK3S_64_HV */ }; +/* + * Struct for a virtual core. + * Note: entry_exit_count combines an entry count in the bottom 8 bits + * and an exit count in the next 8 bits. This is so that we can + * atomically increment the entry count iff the exit count is 0 + * without taking the lock. + */ +struct kvmppc_vcore { + int n_runnable; + int n_blocked; + int num_threads; + int entry_exit_count; + int n_woken; + int nap_count; + u16 pcpu; + u8 vcore_running; + u8 in_guest; + struct list_head runnable_threads; + spinlock_t lock; +}; + +#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) +#define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8) + struct kvmppc_pte { ulong eaddr; u64 vpage; @@ -365,14 +394,29 @@ struct kvm_vcpu_arch { struct slb_shadow *slb_shadow; struct dtl *dtl; struct dtl *dtl_end; + + struct kvmppc_vcore *vcore; + int ret; int trap; + int state; + int ptid; + wait_queue_head_t cpu_run; + struct kvm_vcpu_arch_shared *shared; unsigned long magic_page_pa; /* phys addr to map the magic page to */ unsigned long magic_page_ea; /* effect. addr to map the magic page to */ #ifdef CONFIG_KVM_BOOK3S_64_HV struct kvm_vcpu_arch_shared shregs; + + struct list_head run_list; + struct task_struct *run_task; + struct kvm_run *kvm_run; #endif }; +#define KVMPPC_VCPU_BUSY_IN_HOST 0 +#define KVMPPC_VCPU_BLOCKED 1 +#define KVMPPC_VCPU_RUNNABLE 2 + #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 99f6fcf4cf88..6ef734428634 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -33,6 +33,9 @@ #else #include #endif +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +#include +#endif enum emulation_result { EMULATE_DONE, /* no further processing */ @@ -169,4 +172,14 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); +#ifdef CONFIG_KVM_BOOK3S_64_HV +static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) +{ + paca[cpu].kvm_hstate.xics_phys = addr; +} +#else +static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) +{} +#endif + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c70d106bf1a4..d0f2387fd792 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -471,6 +471,10 @@ int main(void) DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); + DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid)); + DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); + DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); + DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - offsetof(struct kvmppc_vcpu_book3s, vcpu)); DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); @@ -530,6 +534,8 @@ int main(void) #ifdef CONFIG_KVM_BOOK3S_64_HV HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); + HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); + HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); HSTATE_FIELD(HSTATE_MMCR, host_mmcr); HSTATE_FIELD(HSTATE_PMC, host_pmc); HSTATE_FIELD(HSTATE_PURR, host_purr); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 163c041cec24..5bc06fdfa6c0 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -49,19 +49,32 @@ BEGIN_FTR_SECTION * state loss at this time. */ mfspr r13,SPRN_SRR1 - rlwinm r13,r13,47-31,30,31 - cmpwi cr0,r13,1 - bne 1f - b .power7_wakeup_noloss -1: cmpwi cr0,r13,2 - bne 1f - b .power7_wakeup_loss + rlwinm. r13,r13,47-31,30,31 + beq 9f + + /* waking up from powersave (nap) state */ + cmpwi cr1,r13,2 /* Total loss of HV state is fatal, we could try to use the * PIR to locate a PACA, then use an emergency stack etc... * but for now, let's just stay stuck here */ -1: cmpwi cr0,r13,3 - beq . + bgt cr1,. + GET_PACA(r13) + +#ifdef CONFIG_KVM_BOOK3S_64_HV + lbz r0,PACAPROCSTART(r13) + cmpwi r0,0x80 + bne 1f + li r0,0 + stb r0,PACAPROCSTART(r13) + b kvm_start_guest +1: +#endif + + beq cr1,2f + b .power7_wakeup_noloss +2: b .power7_wakeup_loss +9: END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) #endif /* CONFIG_PPC_P7_NAP */ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index f8f0bc7f1d4f..3a70845a51c7 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -73,7 +73,6 @@ _GLOBAL(power7_idle) b . _GLOBAL(power7_wakeup_loss) - GET_PACA(r13) ld r1,PACAR1(r13) REST_NVGPRS(r1) REST_GPR(2, r1) @@ -87,7 +86,6 @@ _GLOBAL(power7_wakeup_loss) rfid _GLOBAL(power7_wakeup_noloss) - GET_PACA(r13) ld r1,PACAR1(r13) ld r4,_MSR(r1) ld r5,_NIP(r1) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6fe469eabce8..36b6d98f1197 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -51,12 +52,16 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { local_paca->kvm_hstate.kvm_vcpu = vcpu; + local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore; } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { } +static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu); +static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu); + void kvmppc_vcpu_block(struct kvm_vcpu *vcpu) { u64 now; @@ -74,11 +79,15 @@ void kvmppc_vcpu_block(struct kvm_vcpu *vcpu) HRTIMER_MODE_REL); } + kvmppc_vcpu_blocked(vcpu); + kvm_vcpu_block(vcpu); vcpu->stat.halt_wakeup++; if (vcpu->arch.dec_expires != ~(u64)0) hrtimer_try_to_cancel(&vcpu->arch.dec_timer); + + kvmppc_vcpu_unblocked(vcpu); } void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) @@ -429,9 +438,16 @@ int kvmppc_core_check_processor_compat(void) struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; - int err = -ENOMEM; + int err = -EINVAL; + int core; + struct kvmppc_vcore *vcore; unsigned long lpcr; + core = id / threads_per_core; + if (core >= KVM_MAX_VCORES) + goto out; + + err = -ENOMEM; vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); if (!vcpu) goto out; @@ -454,6 +470,38 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) kvmppc_mmu_book3s_hv_init(vcpu); + /* + * Some vcpus may start out in stopped state. If we initialize + * them to busy-in-host state they will stop other vcpus in the + * vcore from running. Instead we initialize them to blocked + * state, effectively considering them to be stopped until we + * see the first run ioctl for them. + */ + vcpu->arch.state = KVMPPC_VCPU_BLOCKED; + + init_waitqueue_head(&vcpu->arch.cpu_run); + + mutex_lock(&kvm->lock); + vcore = kvm->arch.vcores[core]; + if (!vcore) { + vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL); + if (vcore) { + INIT_LIST_HEAD(&vcore->runnable_threads); + spin_lock_init(&vcore->lock); + } + kvm->arch.vcores[core] = vcore; + } + mutex_unlock(&kvm->lock); + + if (!vcore) + goto free_vcpu; + + spin_lock(&vcore->lock); + ++vcore->num_threads; + ++vcore->n_blocked; + spin_unlock(&vcore->lock); + vcpu->arch.vcore = vcore; + return vcpu; free_vcpu: @@ -468,21 +516,121 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) kfree(vcpu); } +static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + + spin_lock(&vc->lock); + vcpu->arch.state = KVMPPC_VCPU_BLOCKED; + ++vc->n_blocked; + if (vc->n_runnable > 0 && + vc->n_runnable + vc->n_blocked == vc->num_threads) { + vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu, + arch.run_list); + wake_up(&vcpu->arch.cpu_run); + } + spin_unlock(&vc->lock); +} + +static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + + spin_lock(&vc->lock); + vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; + --vc->n_blocked; + spin_unlock(&vc->lock); +} + extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); +extern void xics_wake_cpu(int cpu); -static int kvmppc_run_vcpu(struct kvm_run *run, struct kvm_vcpu *vcpu) +static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, + struct kvm_vcpu *vcpu) { - u64 now; + struct kvm_vcpu *v; - if (signal_pending(current)) { - run->exit_reason = KVM_EXIT_INTR; - return -EINTR; + if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) + return; + vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; + --vc->n_runnable; + /* decrement the physical thread id of each following vcpu */ + v = vcpu; + list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list) + --v->arch.ptid; + list_del(&vcpu->arch.run_list); +} + +static void kvmppc_start_thread(struct kvm_vcpu *vcpu) +{ + int cpu; + struct paca_struct *tpaca; + struct kvmppc_vcore *vc = vcpu->arch.vcore; + + cpu = vc->pcpu + vcpu->arch.ptid; + tpaca = &paca[cpu]; + tpaca->kvm_hstate.kvm_vcpu = vcpu; + tpaca->kvm_hstate.kvm_vcore = vc; + smp_wmb(); +#ifdef CONFIG_PPC_ICP_NATIVE + if (vcpu->arch.ptid) { + tpaca->cpu_start = 0x80; + tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST; + wmb(); + xics_wake_cpu(cpu); + ++vc->n_woken; } +#endif +} - flush_fp_to_thread(current); - flush_altivec_to_thread(current); - flush_vsx_to_thread(current); - preempt_disable(); +static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc) +{ + int i; + + HMT_low(); + i = 0; + while (vc->nap_count < vc->n_woken) { + if (++i >= 1000000) { + pr_err("kvmppc_wait_for_nap timeout %d %d\n", + vc->nap_count, vc->n_woken); + break; + } + cpu_relax(); + } + HMT_medium(); +} + +/* + * Check that we are on thread 0 and that any other threads in + * this core are off-line. + */ +static int on_primary_thread(void) +{ + int cpu = smp_processor_id(); + int thr = cpu_thread_in_core(cpu); + + if (thr) + return 0; + while (++thr < threads_per_core) + if (cpu_online(cpu + thr)) + return 0; + return 1; +} + +/* + * Run a set of guest threads on a physical core. + * Called with vc->lock held. + */ +static int kvmppc_run_core(struct kvmppc_vcore *vc) +{ + struct kvm_vcpu *vcpu, *vnext; + long ret; + u64 now; + + /* don't start if any threads have a signal pending */ + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + if (signal_pending(vcpu->arch.run_task)) + return 0; /* * Make sure we are running on thread 0, and that @@ -490,36 +638,150 @@ static int kvmppc_run_vcpu(struct kvm_run *run, struct kvm_vcpu *vcpu) * XXX we should also block attempts to bring any * secondary threads online. */ - if (threads_per_core > 1) { - int cpu = smp_processor_id(); - int thr = cpu_thread_in_core(cpu); - - if (thr) - goto out; - while (++thr < threads_per_core) - if (cpu_online(cpu + thr)) - goto out; + if (threads_per_core > 1 && !on_primary_thread()) { + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + vcpu->arch.ret = -EBUSY; + goto out; } - kvm_guest_enter(); + vc->n_woken = 0; + vc->nap_count = 0; + vc->entry_exit_count = 0; + vc->vcore_running = 1; + vc->in_guest = 0; + vc->pcpu = smp_processor_id(); + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + kvmppc_start_thread(vcpu); + vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu, + arch.run_list); + + spin_unlock(&vc->lock); + preempt_disable(); + kvm_guest_enter(); __kvmppc_vcore_entry(NULL, vcpu); + /* wait for secondary threads to finish writing their state to memory */ + spin_lock(&vc->lock); + if (vc->nap_count < vc->n_woken) + kvmppc_wait_for_nap(vc); + /* prevent other vcpu threads from doing kvmppc_start_thread() now */ + vc->vcore_running = 2; + spin_unlock(&vc->lock); + + /* make sure updates to secondary vcpu structs are visible now */ + smp_mb(); kvm_guest_exit(); preempt_enable(); kvm_resched(vcpu); now = get_tb(); - /* cancel pending dec exception if dec is positive */ - if (now < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu)) - kvmppc_core_dequeue_dec(vcpu); - - return kvmppc_handle_exit(run, vcpu, current); + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { + /* cancel pending dec exception if dec is positive */ + if (now < vcpu->arch.dec_expires && + kvmppc_core_pending_dec(vcpu)) + kvmppc_core_dequeue_dec(vcpu); + if (!vcpu->arch.trap) { + if (signal_pending(vcpu->arch.run_task)) { + vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR; + vcpu->arch.ret = -EINTR; + } + continue; /* didn't get to run */ + } + ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, + vcpu->arch.run_task); + vcpu->arch.ret = ret; + vcpu->arch.trap = 0; + } + spin_lock(&vc->lock); out: - preempt_enable(); - return -EBUSY; + vc->vcore_running = 0; + list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, + arch.run_list) { + if (vcpu->arch.ret != RESUME_GUEST) { + kvmppc_remove_runnable(vc, vcpu); + wake_up(&vcpu->arch.cpu_run); + } + } + + return 1; +} + +static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +{ + int ptid; + int wait_state; + struct kvmppc_vcore *vc; + DEFINE_WAIT(wait); + + /* No need to go into the guest when all we do is going out */ + if (signal_pending(current)) { + kvm_run->exit_reason = KVM_EXIT_INTR; + return -EINTR; + } + + kvm_run->exit_reason = 0; + vcpu->arch.ret = RESUME_GUEST; + vcpu->arch.trap = 0; + + flush_fp_to_thread(current); + flush_altivec_to_thread(current); + flush_vsx_to_thread(current); + + /* + * Synchronize with other threads in this virtual core + */ + vc = vcpu->arch.vcore; + spin_lock(&vc->lock); + /* This happens the first time this is called for a vcpu */ + if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED) + --vc->n_blocked; + vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; + ptid = vc->n_runnable; + vcpu->arch.run_task = current; + vcpu->arch.kvm_run = kvm_run; + vcpu->arch.ptid = ptid; + list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); + ++vc->n_runnable; + + wait_state = TASK_INTERRUPTIBLE; + while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { + if (signal_pending(current)) { + if (!vc->vcore_running) { + kvm_run->exit_reason = KVM_EXIT_INTR; + vcpu->arch.ret = -EINTR; + break; + } + /* have to wait for vcore to stop executing guest */ + wait_state = TASK_UNINTERRUPTIBLE; + smp_send_reschedule(vc->pcpu); + } + + if (!vc->vcore_running && + vc->n_runnable + vc->n_blocked == vc->num_threads) { + /* we can run now */ + if (kvmppc_run_core(vc)) + continue; + } + + if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0) + kvmppc_start_thread(vcpu); + + /* wait for other threads to come in, or wait for vcore */ + prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); + spin_unlock(&vc->lock); + schedule(); + finish_wait(&vcpu->arch.cpu_run, &wait); + spin_lock(&vc->lock); + } + + if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) + kvmppc_remove_runnable(vc, vcpu); + spin_unlock(&vc->lock); + + return vcpu->arch.ret; } int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e6adaadcdff2..c9bf177b7cf2 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -30,8 +30,6 @@ * * ****************************************************************************/ -#define SHADOW_VCPU_OFF PACA_KVM_SVCPU - .globl kvmppc_skip_interrupt kvmppc_skip_interrupt: mfspr r13,SPRN_SRR0 @@ -79,6 +77,32 @@ _GLOBAL(kvmppc_hv_entry_trampoline) * * *****************************************************************************/ +#define XICS_XIRR 4 +#define XICS_QIRR 0xc + +/* + * We come in here when wakened from nap mode on a secondary hw thread. + * Relocation is off and most register values are lost. + * r13 points to the PACA. + */ + .globl kvm_start_guest +kvm_start_guest: + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + + /* get vcpu pointer */ + ld r4, HSTATE_KVM_VCPU(r13) + + /* We got here with an IPI; clear it */ + ld r5, HSTATE_XICS_PHYS(r13) + li r0, 0xff + li r6, XICS_QIRR + li r7, XICS_XIRR + lwzcix r8, r5, r7 /* ack the interrupt */ + sync + stbcix r0, r5, r6 /* clear it */ + stwcix r8, r5, r7 /* EOI it */ + .global kvmppc_hv_entry kvmppc_hv_entry: @@ -200,7 +224,20 @@ kvmppc_hv_entry: slbia ptesync - /* Switch to guest partition. */ + /* Increment entry count iff exit count is zero. */ + ld r5,HSTATE_KVM_VCORE(r13) + addi r9,r5,VCORE_ENTRY_EXIT +21: lwarx r3,0,r9 + cmpwi r3,0x100 /* any threads starting to exit? */ + bge secondary_too_late /* if so we're too late to the party */ + addi r3,r3,1 + stwcx. r3,0,r9 + bne 21b + + /* Primary thread switches to guest partition. */ + lwz r6,VCPU_PTID(r4) + cmpwi r6,0 + bne 20f ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ ld r6,KVM_SDR1(r9) lwz r7,KVM_LPID(r9) @@ -210,7 +247,15 @@ kvmppc_hv_entry: mtspr SPRN_SDR1,r6 /* switch to partition page table */ mtspr SPRN_LPID,r7 isync - ld r8,VCPU_LPCR(r4) + li r0,1 + stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ + b 10f + + /* Secondary threads wait for primary to have done partition switch */ +20: lbz r0,VCORE_IN_GUEST(r5) + cmpwi r0,0 + beq 20b +10: ld r8,VCPU_LPCR(r4) mtspr SPRN_LPCR,r8 isync @@ -225,10 +270,12 @@ kvmppc_hv_entry: * Invalidate the TLB if we could possibly have stale TLB * entries for this partition on this core due to the use * of tlbiel. + * XXX maybe only need this on primary thread? */ ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ lwz r5,VCPU_VCPUID(r4) lhz r6,PACAPACAINDEX(r13) + rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */ lhz r8,VCPU_LAST_CPU(r4) sldi r7,r6,1 /* see if this is the same vcpu */ add r7,r7,r9 /* as last ran on this pcpu */ @@ -512,8 +559,60 @@ hcall_real_cont: ptesync hdec_soon: - /* Switch back to host partition */ + /* Increment the threads-exiting-guest count in the 0xff00 + bits of vcore->entry_exit_count */ + lwsync + ld r5,HSTATE_KVM_VCORE(r13) + addi r6,r5,VCORE_ENTRY_EXIT +41: lwarx r3,0,r6 + addi r0,r3,0x100 + stwcx. r0,0,r6 + bne 41b + + /* + * At this point we have an interrupt that we have to pass + * up to the kernel or qemu; we can't handle it in real mode. + * Thus we have to do a partition switch, so we have to + * collect the other threads, if we are the first thread + * to take an interrupt. To do this, we set the HDEC to 0, + * which causes an HDEC interrupt in all threads within 2ns + * because the HDEC register is shared between all 4 threads. + * However, we don't need to bother if this is an HDEC + * interrupt, since the other threads will already be on their + * way here in that case. + */ + cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER + beq 40f + cmpwi r3,0x100 /* Are we the first here? */ + bge 40f + cmpwi r3,1 + ble 40f + li r0,0 + mtspr SPRN_HDEC,r0 +40: + + /* Secondary threads wait for primary to do partition switch */ ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ + ld r5,HSTATE_KVM_VCORE(r13) + lwz r3,VCPU_PTID(r9) + cmpwi r3,0 + beq 15f + HMT_LOW +13: lbz r3,VCORE_IN_GUEST(r5) + cmpwi r3,0 + bne 13b + HMT_MEDIUM + b 16f + + /* Primary thread waits for all the secondaries to exit guest */ +15: lwz r3,VCORE_ENTRY_EXIT(r5) + srwi r0,r3,8 + clrldi r3,r3,56 + cmpw r3,r0 + bne 15b + isync + + /* Primary thread switches back to host partition */ ld r6,KVM_HOST_SDR1(r4) lwz r7,KVM_HOST_LPID(r4) li r8,LPID_RSVD /* switch to reserved LPID */ @@ -522,10 +621,12 @@ hdec_soon: mtspr SPRN_SDR1,r6 /* switch to partition page table */ mtspr SPRN_LPID,r7 isync + li r0,0 + stb r0,VCORE_IN_GUEST(r5) lis r8,0x7fff /* MAX_INT@h */ mtspr SPRN_HDEC,r8 - ld r8,KVM_HOST_LPCR(r4) +16: ld r8,KVM_HOST_LPCR(r4) mtspr SPRN_LPCR,r8 isync @@ -634,6 +735,11 @@ hdec_soon: mr r3, r9 bl .kvmppc_save_fp + /* Secondary threads go off to take a nap */ + lwz r0,VCPU_PTID(r3) + cmpwi r0,0 + bne secondary_nap + /* * Reload DEC. HDEC interrupts were disabled when * we reloaded the host's LPCR value. @@ -840,6 +946,56 @@ _GLOBAL(kvmppc_h_set_dabr) li r3,0 blr +secondary_too_late: + ld r5,HSTATE_KVM_VCORE(r13) + HMT_LOW +13: lbz r3,VCORE_IN_GUEST(r5) + cmpwi r3,0 + bne 13b + HMT_MEDIUM + ld r11,PACA_SLBSHADOWPTR(r13) + + .rept SLB_NUM_BOLTED + ld r5,SLBSHADOW_SAVEAREA(r11) + ld r6,SLBSHADOW_SAVEAREA+8(r11) + andis. r7,r5,SLB_ESID_V@h + beq 1f + slbmte r6,r5 +1: addi r11,r11,16 + .endr + b 50f + +secondary_nap: + /* Clear any pending IPI */ +50: ld r5, HSTATE_XICS_PHYS(r13) + li r0, 0xff + li r6, XICS_QIRR + stbcix r0, r5, r6 + + /* increment the nap count and then go to nap mode */ + ld r4, HSTATE_KVM_VCORE(r13) + addi r4, r4, VCORE_NAP_COUNT + lwsync /* make previous updates visible */ +51: lwarx r3, 0, r4 + addi r3, r3, 1 + stwcx. r3, 0, r4 + bne 51b + isync + + mfspr r4, SPRN_LPCR + li r0, LPCR_PECE + andc r4, r4, r0 + ori r4, r4, LPCR_PECE0 /* exit nap on interrupt */ + mtspr SPRN_LPCR, r4 + li r0, 0 + std r0, HSTATE_SCRATCH0(r13) + ptesync + ld r0, HSTATE_SCRATCH0(r13) +1: cmpd r0, r0 + bne 1b + nap + b . + /* * Save away FP, VMX and VSX registers. * r3 = vcpu pointer diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c78ceb9d5605..4c549664c987 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "timing.h" #include "../mm/mmu_decl.h" @@ -207,6 +208,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SPAPR_TCE: r = 1; break; + case KVM_CAP_PPC_SMT: + r = threads_per_core; + break; #endif default: r = 0; diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index 1f15ad436140..ba382b59b926 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -24,6 +25,7 @@ #include #include #include +#include struct icp_ipl { union { @@ -139,6 +141,12 @@ static void icp_native_cause_ipi(int cpu, unsigned long data) icp_native_set_qirr(cpu, IPI_PRIORITY); } +void xics_wake_cpu(int cpu) +{ + icp_native_set_qirr(cpu, IPI_PRIORITY); +} +EXPORT_SYMBOL_GPL(xics_wake_cpu); + static irqreturn_t icp_native_ipi_action(int irq, void *dev_id) { int cpu = smp_processor_id(); @@ -185,6 +193,7 @@ static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr, } icp_native_regs[cpu] = ioremap(addr, size); + kvmppc_set_xics_phys(cpu, addr); if (!icp_native_regs[cpu]) { pr_warning("icp_native: Failed ioremap for CPU %d, " "interrupt server #0x%x, addr %#lx\n", diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 61f56502732e..e2a378d97160 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -551,6 +551,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_GET_TSC_KHZ 61 #define KVM_CAP_PPC_BOOKE_SREGS 62 #define KVM_CAP_SPAPR_TCE 63 +#define KVM_CAP_PPC_SMT 64 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From aa04b4cc5be64b4fb9ef4e0fdf2418e2f4737fb2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:25:44 +0000 Subject: KVM: PPC: Allocate RMAs (Real Mode Areas) at boot for use by guests This adds infrastructure which will be needed to allow book3s_hv KVM to run on older POWER processors, including PPC970, which don't support the Virtual Real Mode Area (VRMA) facility, but only the Real Mode Offset (RMO) facility. These processors require a physically contiguous, aligned area of memory for each guest. When the guest does an access in real mode (MMU off), the address is compared against a limit value, and if it is lower, the address is ORed with an offset value (from the Real Mode Offset Register (RMOR)) and the result becomes the real address for the access. The size of the RMA has to be one of a set of supported values, which usually includes 64MB, 128MB, 256MB and some larger powers of 2. Since we are unlikely to be able to allocate 64MB or more of physically contiguous memory after the kernel has been running for a while, we allocate a pool of RMAs at boot time using the bootmem allocator. The size and number of the RMAs can be set using the kvm_rma_size=xx and kvm_rma_count=xx kernel command line options. KVM exports a new capability, KVM_CAP_PPC_RMA, to signal the availability of the pool of preallocated RMAs. The capability value is 1 if the processor can use an RMA but doesn't require one (because it supports the VRMA facility), or 2 if the processor requires an RMA for each guest. This adds a new ioctl, KVM_ALLOCATE_RMA, which allocates an RMA from the pool and returns a file descriptor which can be used to map the RMA. It also returns the size of the RMA in the argument structure. Having an RMA means we will get multiple KMV_SET_USER_MEMORY_REGION ioctl calls from userspace. To cope with this, we now preallocate the kvm->arch.ram_pginfo array when the VM is created with a size sufficient for up to 64GB of guest memory. Subsequently we will get rid of this array and use memory associated with each memslot instead. This moves most of the code that translates the user addresses into host pfns (page frame numbers) out of kvmppc_prepare_vrma up one level to kvmppc_core_prepare_memory_region. Also, instead of having to look up the VMA for each page in order to check the page size, we now check that the pages we get are compound pages of 16MB. However, if we are adding memory that is mapped to an RMA, we don't bother with calling get_user_pages_fast and instead just offset from the base pfn for the RMA. Typically the RMA gets added after vcpus are created, which makes it inconvenient to have the LPCR (logical partition control register) value in the vcpu->arch struct, since the LPCR controls whether the processor uses RMA or VRMA for the guest. This moves the LPCR value into the kvm->arch struct and arranges for the MER (mediated external request) bit, which is the only bit that varies between vcpus, to be set in assembly code when going into the guest if there is a pending external interrupt request. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 32 ++++ arch/powerpc/include/asm/kvm.h | 5 + arch/powerpc/include/asm/kvm_book3s.h | 8 - arch/powerpc/include/asm/kvm_host.h | 15 +- arch/powerpc/include/asm/kvm_ppc.h | 10 ++ arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kernel/asm-offsets.c | 4 +- arch/powerpc/kernel/setup_64.c | 3 + arch/powerpc/kvm/Makefile | 3 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 97 +----------- arch/powerpc/kvm/book3s_hv.c | 259 ++++++++++++++++++++++++++++++-- arch/powerpc/kvm/book3s_hv_builtin.c | 152 +++++++++++++++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 19 ++- arch/powerpc/kvm/powerpc.c | 13 ++ include/linux/kvm.h | 3 + 15 files changed, 505 insertions(+), 119 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_hv_builtin.c (limited to 'arch/powerpc') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 681871311d3e..b0e4b9cd6a66 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1398,6 +1398,38 @@ the entries written by kernel-handled H_PUT_TCE calls, and also lets userspace update the TCE table directly which is useful in some circumstances. +4.63 KVM_ALLOCATE_RMA + +Capability: KVM_CAP_PPC_RMA +Architectures: powerpc +Type: vm ioctl +Parameters: struct kvm_allocate_rma (out) +Returns: file descriptor for mapping the allocated RMA + +This allocates a Real Mode Area (RMA) from the pool allocated at boot +time by the kernel. An RMA is a physically-contiguous, aligned region +of memory used on older POWER processors to provide the memory which +will be accessed by real-mode (MMU off) accesses in a KVM guest. +POWER processors support a set of sizes for the RMA that usually +includes 64MB, 128MB, 256MB and some larger powers of two. + +/* for KVM_ALLOCATE_RMA */ +struct kvm_allocate_rma { + __u64 rma_size; +}; + +The return value is a file descriptor which can be passed to mmap(2) +to map the allocated RMA into userspace. The mapped area can then be +passed to the KVM_SET_USER_MEMORY_REGION ioctl to establish it as the +RMA for a virtual machine. The size of the RMA in bytes (which is +fixed at host kernel boot time) is returned in the rma_size field of +the argument structure. + +The KVM_CAP_PPC_RMA capability is 1 or 2 if the KVM_ALLOCATE_RMA ioctl +is supported; 2 if the processor requires all virtual machines to have +an RMA, or 1 if the processor can use an RMA but doesn't require it, +because it supports the Virtual RMA (VRMA) facility. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 471bb3d85e0b..a4f6c85431f8 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -282,4 +282,9 @@ struct kvm_create_spapr_tce { __u32 window_size; }; +/* for KVM_ALLOCATE_RMA */ +struct kvm_allocate_rma { + __u64 rma_size; +}; + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 5537c45d626c..3f91ebd4ae43 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -298,14 +298,6 @@ static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, unsigned long pending_now, unsigned long old_pending) { - /* Recalculate LPCR:MER based on the presence of - * a pending external interrupt - */ - if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &pending_now) || - test_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &pending_now)) - vcpu->arch.lpcr |= LPCR_MER; - else - vcpu->arch.lpcr &= ~((u64)LPCR_MER); } static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 0d6d569e19c7..f572d9cc31bd 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include @@ -156,6 +158,14 @@ struct kvmppc_spapr_tce_table { struct page *pages[0]; }; +struct kvmppc_rma_info { + void *base_virt; + unsigned long base_pfn; + unsigned long npages; + struct list_head list; + atomic_t use_count; +}; + struct kvm_arch { #ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long hpt_virt; @@ -169,6 +179,10 @@ struct kvm_arch { unsigned long sdr1; unsigned long host_sdr1; int tlbie_lock; + int n_rma_pages; + unsigned long lpcr; + unsigned long rmor; + struct kvmppc_rma_info *rma; struct list_head spapr_tce_tables; unsigned short last_vcpu[NR_CPUS]; struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; @@ -295,7 +309,6 @@ struct kvm_vcpu_arch { ulong guest_owned_ext; ulong purr; ulong spurr; - ulong lpcr; ulong dscr; ulong amr; ulong uamor; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 6ef734428634..d121f49d62b8 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -124,6 +124,10 @@ extern void kvmppc_map_vrma(struct kvm *kvm, extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); +extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, + struct kvm_allocate_rma *rma); +extern struct kvmppc_rma_info *kvm_alloc_rma(void); +extern void kvm_release_rma(struct kvmppc_rma_info *ri); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, @@ -177,9 +181,15 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) { paca[cpu].kvm_hstate.xics_phys = addr; } + +extern void kvm_rma_init(void); + #else static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) {} + +static inline void kvm_rma_init(void) +{} #endif #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 36a611b398c5..20a053c14270 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -242,6 +242,7 @@ #define LPCR_VRMA_LP1 (1ul << (63-16)) #define LPCR_VRMASD_SH (63-16) #define LPCR_RMLS 0x1C000000 /* impl dependent rmo limit sel */ +#define LPCR_RMLS_SH (63-37) #define LPCR_ILE 0x02000000 /* !HV irqs set MSR:LE */ #define LPCR_PECE 0x00007000 /* powersave exit cause enable */ #define LPCR_PECE0 0x00004000 /* ext. exceptions can cause exit */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d0f2387fd792..f4aba938166b 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -437,6 +437,8 @@ int main(void) DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock)); DEFINE(KVM_ONLINE_CPUS, offsetof(struct kvm, online_vcpus.counter)); DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu)); + DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); + DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor)); DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); #endif @@ -459,7 +461,7 @@ int main(void) DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); - DEFINE(VCPU_LPCR, offsetof(struct kvm_vcpu, arch.lpcr)); + DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa)); DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a88bf2713d41..532054f24ecb 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -63,6 +63,7 @@ #include #include #include +#include #include "setup.h" @@ -580,6 +581,8 @@ void __init setup_arch(char **cmdline_p) /* Initialize the MMU context management stuff */ mmu_context_init(); + kvm_rma_init(); + ppc64_boot_msg(0x15, "Setup Done"); } diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 1de3d54901d4..08428e2c188d 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -56,7 +56,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ book3s_64_mmu_hv.o kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ book3s_hv_rm_mmu.o \ - book3s_64_vio_hv.o + book3s_64_vio_hv.o \ + book3s_hv_builtin.o kvm-book3s_64-module-objs := \ ../../../virt/kvm/kvm_main.o \ diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 4a4fbec61a17..96ba96a16abf 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -79,103 +79,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm) void kvmppc_free_hpt(struct kvm *kvm) { - unsigned long i; - struct kvmppc_pginfo *pginfo; - clear_bit(kvm->arch.lpid, lpid_inuse); free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); - - if (kvm->arch.ram_pginfo) { - pginfo = kvm->arch.ram_pginfo; - kvm->arch.ram_pginfo = NULL; - for (i = 0; i < kvm->arch.ram_npages; ++i) - put_page(pfn_to_page(pginfo[i].pfn)); - kfree(pginfo); - } -} - -static unsigned long user_page_size(unsigned long addr) -{ - struct vm_area_struct *vma; - unsigned long size = PAGE_SIZE; - - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, addr); - if (vma) - size = vma_kernel_pagesize(vma); - up_read(¤t->mm->mmap_sem); - return size; -} - -static pfn_t hva_to_pfn(unsigned long addr) -{ - struct page *page[1]; - int npages; - - might_sleep(); - - npages = get_user_pages_fast(addr, 1, 1, page); - - if (unlikely(npages != 1)) - return 0; - - return page_to_pfn(page[0]); -} - -long kvmppc_prepare_vrma(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) -{ - unsigned long psize, porder; - unsigned long i, npages; - struct kvmppc_pginfo *pginfo; - pfn_t pfn; - unsigned long hva; - - /* First see what page size we have */ - psize = user_page_size(mem->userspace_addr); - /* For now, only allow 16MB pages */ - if (psize != 1ul << VRMA_PAGE_ORDER || (mem->memory_size & (psize - 1))) { - pr_err("bad psize=%lx memory_size=%llx @ %llx\n", - psize, mem->memory_size, mem->userspace_addr); - return -EINVAL; - } - porder = __ilog2(psize); - - npages = mem->memory_size >> porder; - pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo), GFP_KERNEL); - if (!pginfo) { - pr_err("kvmppc_prepare_vrma: couldn't alloc %lu bytes\n", - npages * sizeof(struct kvmppc_pginfo)); - return -ENOMEM; - } - - for (i = 0; i < npages; ++i) { - hva = mem->userspace_addr + (i << porder); - if (user_page_size(hva) != psize) - goto err; - pfn = hva_to_pfn(hva); - if (pfn == 0) { - pr_err("oops, no pfn for hva %lx\n", hva); - goto err; - } - if (pfn & ((1ul << (porder - PAGE_SHIFT)) - 1)) { - pr_err("oops, unaligned pfn %llx\n", pfn); - put_page(pfn_to_page(pfn)); - goto err; - } - pginfo[i].pfn = pfn; - } - - kvm->arch.ram_npages = npages; - kvm->arch.ram_psize = psize; - kvm->arch.ram_porder = porder; - kvm->arch.ram_pginfo = pginfo; - - return 0; - - err: - kfree(pginfo); - return -EINVAL; } void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) @@ -199,6 +104,8 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) for (i = 0; i < npages; ++i) { pfn = pginfo[i].pfn; + if (!pfn) + break; /* can't use hpt_hash since va > 64 bits */ hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK; /* diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 36b6d98f1197..04da135cae61 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include @@ -40,11 +42,22 @@ #include #include #include +#include #include #include #include #include +/* + * For now, limit memory to 64GB and require it to be large pages. + * This value is chosen because it makes the ram_pginfo array be + * 64kB in size, which is about as large as we want to be trying + * to allocate with kmalloc. + */ +#define MAX_MEM_ORDER 36 + +#define LARGE_PAGE_ORDER 24 /* 16MB pages */ + /* #define EXIT_DEBUG */ /* #define EXIT_DEBUG_SIMPLE */ /* #define EXIT_DEBUG_INT */ @@ -129,7 +142,7 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu) pr_err(" ESID = %.16llx VSID = %.16llx\n", vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv); pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n", - vcpu->arch.lpcr, vcpu->kvm->arch.sdr1, + vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1, vcpu->arch.last_inst); } @@ -441,7 +454,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) int err = -EINVAL; int core; struct kvmppc_vcore *vcore; - unsigned long lpcr; core = id / threads_per_core; if (core >= KVM_MAX_VCORES) @@ -464,10 +476,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) vcpu->arch.pvr = mfspr(SPRN_PVR); kvmppc_set_pvr(vcpu, vcpu->arch.pvr); - lpcr = kvm->arch.host_lpcr & (LPCR_PECE | LPCR_LPES); - lpcr |= LPCR_VPM0 | LPCR_VRMA_L | (4UL << LPCR_DPFD_SH) | LPCR_HDICE; - vcpu->arch.lpcr = lpcr; - kvmppc_mmu_book3s_hv_init(vcpu); /* @@ -910,24 +918,216 @@ fail: return ret; } +/* Work out RMLS (real mode limit selector) field value for a given RMA size. + Assumes POWER7. */ +static inline int lpcr_rmls(unsigned long rma_size) +{ + switch (rma_size) { + case 32ul << 20: /* 32 MB */ + return 8; + case 64ul << 20: /* 64 MB */ + return 3; + case 128ul << 20: /* 128 MB */ + return 7; + case 256ul << 20: /* 256 MB */ + return 4; + case 1ul << 30: /* 1 GB */ + return 2; + case 16ul << 30: /* 16 GB */ + return 1; + case 256ul << 30: /* 256 GB */ + return 0; + default: + return -1; + } +} + +static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct kvmppc_rma_info *ri = vma->vm_file->private_data; + struct page *page; + + if (vmf->pgoff >= ri->npages) + return VM_FAULT_SIGBUS; + + page = pfn_to_page(ri->base_pfn + vmf->pgoff); + get_page(page); + vmf->page = page; + return 0; +} + +static const struct vm_operations_struct kvm_rma_vm_ops = { + .fault = kvm_rma_fault, +}; + +static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) +{ + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &kvm_rma_vm_ops; + return 0; +} + +static int kvm_rma_release(struct inode *inode, struct file *filp) +{ + struct kvmppc_rma_info *ri = filp->private_data; + + kvm_release_rma(ri); + return 0; +} + +static struct file_operations kvm_rma_fops = { + .mmap = kvm_rma_mmap, + .release = kvm_rma_release, +}; + +long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) +{ + struct kvmppc_rma_info *ri; + long fd; + + ri = kvm_alloc_rma(); + if (!ri) + return -ENOMEM; + + fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR); + if (fd < 0) + kvm_release_rma(ri); + + ret->rma_size = ri->npages << PAGE_SHIFT; + return fd; +} + +static struct page *hva_to_page(unsigned long addr) +{ + struct page *page[1]; + int npages; + + might_sleep(); + + npages = get_user_pages_fast(addr, 1, 1, page); + + if (unlikely(npages != 1)) + return 0; + + return page[0]; +} + int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { - if (mem->guest_phys_addr == 0 && mem->memory_size != 0) - return kvmppc_prepare_vrma(kvm, mem); + unsigned long psize, porder; + unsigned long i, npages, totalpages; + unsigned long pg_ix; + struct kvmppc_pginfo *pginfo; + unsigned long hva; + struct kvmppc_rma_info *ri = NULL; + struct page *page; + + /* For now, only allow 16MB pages */ + porder = LARGE_PAGE_ORDER; + psize = 1ul << porder; + if ((mem->memory_size & (psize - 1)) || + (mem->guest_phys_addr & (psize - 1))) { + pr_err("bad memory_size=%llx @ %llx\n", + mem->memory_size, mem->guest_phys_addr); + return -EINVAL; + } + + npages = mem->memory_size >> porder; + totalpages = (mem->guest_phys_addr + mem->memory_size) >> porder; + + /* More memory than we have space to track? */ + if (totalpages > (1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER))) + return -EINVAL; + + /* Do we already have an RMA registered? */ + if (mem->guest_phys_addr == 0 && kvm->arch.rma) + return -EINVAL; + + if (totalpages > kvm->arch.ram_npages) + kvm->arch.ram_npages = totalpages; + + /* Is this one of our preallocated RMAs? */ + if (mem->guest_phys_addr == 0) { + struct vm_area_struct *vma; + + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, mem->userspace_addr); + if (vma && vma->vm_file && + vma->vm_file->f_op == &kvm_rma_fops && + mem->userspace_addr == vma->vm_start) + ri = vma->vm_file->private_data; + up_read(¤t->mm->mmap_sem); + } + + if (ri) { + unsigned long rma_size; + unsigned long lpcr; + long rmls; + + rma_size = ri->npages << PAGE_SHIFT; + if (rma_size > mem->memory_size) + rma_size = mem->memory_size; + rmls = lpcr_rmls(rma_size); + if (rmls < 0) { + pr_err("Can't use RMA of 0x%lx bytes\n", rma_size); + return -EINVAL; + } + atomic_inc(&ri->use_count); + kvm->arch.rma = ri; + kvm->arch.n_rma_pages = rma_size >> porder; + lpcr = kvm->arch.lpcr & ~(LPCR_VPM0 | LPCR_VRMA_L); + lpcr |= rmls << LPCR_RMLS_SH; + kvm->arch.lpcr = lpcr; + kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; + pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n", + ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); + } + + pg_ix = mem->guest_phys_addr >> porder; + pginfo = kvm->arch.ram_pginfo + pg_ix; + for (i = 0; i < npages; ++i, ++pg_ix) { + if (ri && pg_ix < kvm->arch.n_rma_pages) { + pginfo[i].pfn = ri->base_pfn + + (pg_ix << (porder - PAGE_SHIFT)); + continue; + } + hva = mem->userspace_addr + (i << porder); + page = hva_to_page(hva); + if (!page) { + pr_err("oops, no pfn for hva %lx\n", hva); + goto err; + } + /* Check it's a 16MB page */ + if (!PageHead(page) || + compound_order(page) != (LARGE_PAGE_ORDER - PAGE_SHIFT)) { + pr_err("page at %lx isn't 16MB (o=%d)\n", + hva, compound_order(page)); + goto err; + } + pginfo[i].pfn = page_to_pfn(page); + } + return 0; + + err: + return -EINVAL; } void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { - if (mem->guest_phys_addr == 0 && mem->memory_size != 0) + if (mem->guest_phys_addr == 0 && mem->memory_size != 0 && + !kvm->arch.rma) kvmppc_map_vrma(kvm, mem); } int kvmppc_core_init_vm(struct kvm *kvm) { long r; + unsigned long npages = 1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER); + long err = -ENOMEM; + unsigned long lpcr; /* Allocate hashed page table */ r = kvmppc_alloc_hpt(kvm); @@ -935,11 +1135,52 @@ int kvmppc_core_init_vm(struct kvm *kvm) return r; INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); + + kvm->arch.ram_pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo), + GFP_KERNEL); + if (!kvm->arch.ram_pginfo) { + pr_err("kvmppc_core_init_vm: couldn't alloc %lu bytes\n", + npages * sizeof(struct kvmppc_pginfo)); + goto out_free; + } + + kvm->arch.ram_npages = 0; + kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER; + kvm->arch.ram_porder = LARGE_PAGE_ORDER; + kvm->arch.rma = NULL; + kvm->arch.n_rma_pages = 0; + + lpcr = kvm->arch.host_lpcr & (LPCR_PECE | LPCR_LPES); + lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | + LPCR_VPM0 | LPCR_VRMA_L; + kvm->arch.lpcr = lpcr; + + return 0; + + out_free: + kvmppc_free_hpt(kvm); + return err; } void kvmppc_core_destroy_vm(struct kvm *kvm) { + struct kvmppc_pginfo *pginfo; + unsigned long i; + + if (kvm->arch.ram_pginfo) { + pginfo = kvm->arch.ram_pginfo; + kvm->arch.ram_pginfo = NULL; + for (i = kvm->arch.n_rma_pages; i < kvm->arch.ram_npages; ++i) + if (pginfo[i].pfn) + put_page(pfn_to_page(pginfo[i].pfn)); + kfree(pginfo); + } + if (kvm->arch.rma) { + kvm_release_rma(kvm->arch.rma); + kvm->arch.rma = NULL; + } + kvmppc_free_hpt(kvm); WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); } diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c new file mode 100644 index 000000000000..736df3cbbc55 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -0,0 +1,152 @@ +/* + * Copyright 2011 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * This maintains a list of RMAs (real mode areas) for KVM guests to use. + * Each RMA has to be physically contiguous and of a size that the + * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB, + * and other larger sizes. Since we are unlikely to be allocate that + * much physically contiguous memory after the system is up and running, + * we preallocate a set of RMAs in early boot for KVM to use. + */ +static unsigned long kvm_rma_size = 64 << 20; /* 64MB */ +static unsigned long kvm_rma_count; + +static int __init early_parse_rma_size(char *p) +{ + if (!p) + return 1; + + kvm_rma_size = memparse(p, &p); + + return 0; +} +early_param("kvm_rma_size", early_parse_rma_size); + +static int __init early_parse_rma_count(char *p) +{ + if (!p) + return 1; + + kvm_rma_count = simple_strtoul(p, NULL, 0); + + return 0; +} +early_param("kvm_rma_count", early_parse_rma_count); + +static struct kvmppc_rma_info *rma_info; +static LIST_HEAD(free_rmas); +static DEFINE_SPINLOCK(rma_lock); + +/* Work out RMLS (real mode limit selector) field value for a given RMA size. + Assumes POWER7. */ +static inline int lpcr_rmls(unsigned long rma_size) +{ + switch (rma_size) { + case 32ul << 20: /* 32 MB */ + return 8; + case 64ul << 20: /* 64 MB */ + return 3; + case 128ul << 20: /* 128 MB */ + return 7; + case 256ul << 20: /* 256 MB */ + return 4; + case 1ul << 30: /* 1 GB */ + return 2; + case 16ul << 30: /* 16 GB */ + return 1; + case 256ul << 30: /* 256 GB */ + return 0; + default: + return -1; + } +} + +/* + * Called at boot time while the bootmem allocator is active, + * to allocate contiguous physical memory for the real memory + * areas for guests. + */ +void kvm_rma_init(void) +{ + unsigned long i; + unsigned long j, npages; + void *rma; + struct page *pg; + + /* Only do this on POWER7 in HV mode */ + if (!cpu_has_feature(CPU_FTR_HVMODE_206)) + return; + + if (!kvm_rma_size || !kvm_rma_count) + return; + + /* Check that the requested size is one supported in hardware */ + if (lpcr_rmls(kvm_rma_size) < 0) { + pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); + return; + } + + npages = kvm_rma_size >> PAGE_SHIFT; + rma_info = alloc_bootmem(kvm_rma_count * sizeof(struct kvmppc_rma_info)); + for (i = 0; i < kvm_rma_count; ++i) { + rma = alloc_bootmem_align(kvm_rma_size, kvm_rma_size); + pr_info("Allocated KVM RMA at %p (%ld MB)\n", rma, + kvm_rma_size >> 20); + rma_info[i].base_virt = rma; + rma_info[i].base_pfn = __pa(rma) >> PAGE_SHIFT; + rma_info[i].npages = npages; + list_add_tail(&rma_info[i].list, &free_rmas); + atomic_set(&rma_info[i].use_count, 0); + + pg = pfn_to_page(rma_info[i].base_pfn); + for (j = 0; j < npages; ++j) { + atomic_inc(&pg->_count); + ++pg; + } + } +} + +struct kvmppc_rma_info *kvm_alloc_rma(void) +{ + struct kvmppc_rma_info *ri; + + ri = NULL; + spin_lock(&rma_lock); + if (!list_empty(&free_rmas)) { + ri = list_first_entry(&free_rmas, struct kvmppc_rma_info, list); + list_del(&ri->list); + atomic_inc(&ri->use_count); + } + spin_unlock(&rma_lock); + return ri; +} +EXPORT_SYMBOL_GPL(kvm_alloc_rma); + +void kvm_release_rma(struct kvmppc_rma_info *ri) +{ + if (atomic_dec_and_test(&ri->use_count)) { + spin_lock(&rma_lock); + list_add_tail(&ri->list, &free_rmas); + spin_unlock(&rma_lock); + + } +} +EXPORT_SYMBOL_GPL(kvm_release_rma); + diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c9bf177b7cf2..9ee223c35285 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -235,10 +235,10 @@ kvmppc_hv_entry: bne 21b /* Primary thread switches to guest partition. */ + ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ lwz r6,VCPU_PTID(r4) cmpwi r6,0 bne 20f - ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ ld r6,KVM_SDR1(r9) lwz r7,KVM_LPID(r9) li r0,LPID_RSVD /* switch to reserved LPID */ @@ -255,8 +255,18 @@ kvmppc_hv_entry: 20: lbz r0,VCORE_IN_GUEST(r5) cmpwi r0,0 beq 20b -10: ld r8,VCPU_LPCR(r4) - mtspr SPRN_LPCR,r8 + + /* Set LPCR. Set the MER bit if there is a pending external irq. */ +10: ld r8,KVM_LPCR(r9) + ld r0,VCPU_PENDING_EXC(r4) + li r7,(1 << BOOK3S_IRQPRIO_EXTERNAL) + oris r7,r7,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h + and. r0,r0,r7 + beq 11f + ori r8,r8,LPCR_MER +11: mtspr SPRN_LPCR,r8 + ld r8,KVM_RMOR(r9) + mtspr SPRN_RMOR,r8 isync /* Check if HDEC expires soon */ @@ -464,7 +474,8 @@ hcall_real_cont: /* Check for mediated interrupts (could be done earlier really ...) */ cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL bne+ 1f - ld r5,VCPU_LPCR(r9) + ld r5,VCPU_KVM(r9) + ld r5,KVM_LPCR(r5) andi. r0,r11,MSR_EE beq 1f andi. r0,r5,LPCR_MER diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4c549664c987..72c506505fa4 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -211,6 +211,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_SMT: r = threads_per_core; break; + case KVM_CAP_PPC_RMA: + r = 1; + break; #endif default: r = 0; @@ -673,6 +676,16 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); goto out; } + + case KVM_ALLOCATE_RMA: { + struct kvm *kvm = filp->private_data; + struct kvm_allocate_rma rma; + + r = kvm_vm_ioctl_allocate_rma(kvm, &rma); + if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) + r = -EFAULT; + break; + } #endif /* CONFIG_KVM_BOOK3S_64_HV */ default: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index e2a378d97160..2c366b52f505 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -552,6 +552,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_PPC_BOOKE_SREGS 62 #define KVM_CAP_SPAPR_TCE 63 #define KVM_CAP_PPC_SMT 64 +#define KVM_CAP_PPC_RMA 65 #ifdef KVM_CAP_IRQ_ROUTING @@ -755,6 +756,8 @@ struct kvm_clock_data { #define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) #define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) #define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) +/* Available with KVM_CAP_RMA */ +#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3 From 969391c58a4efb8411d6881179945f425ad9cbb5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:26:11 +0000 Subject: powerpc, KVM: Split HVMODE_206 cpu feature bit into separate HV and architecture bits This replaces the single CPU_FTR_HVMODE_206 bit with two bits, one to indicate that we have a usable hypervisor mode, and another to indicate that the processor conforms to PowerISA version 2.06. We also add another bit to indicate that the processor conforms to ISA version 2.01 and set that for PPC970 and derivatives. Some PPC970 chips (specifically those in Apple machines) have a hypervisor mode in that MSR[HV] is always 1, but the hypervisor mode is not useful in the sense that there is no way to run any code in supervisor mode (HV=0 PR=0). On these processors, the LPES0 and LPES1 bits in HID4 are always 0, and we use that as a way of detecting that hypervisor mode is not useful. Where we have a feature section in assembly code around code that only applies on POWER7 in hypervisor mode, we use a construct like END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) The definition of END_FTR_SECTION_IFSET is such that the code will be enabled (not overwritten with nops) only if all bits in the provided mask are set. Note that the CPU feature check in __tlbie() only needs to check the ARCH_206 bit, not the HVMODE bit, because __tlbie() can only get called if we are running bare-metal, i.e. in hypervisor mode. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/cputable.h | 14 ++++++++------ arch/powerpc/include/asm/reg.h | 16 ++++++++++++---- arch/powerpc/kernel/cpu_setup_power7.S | 4 ++-- arch/powerpc/kernel/cpu_setup_ppc970.S | 26 ++++++++++++++++++++++---- arch/powerpc/kernel/exceptions-64s.S | 4 ++-- arch/powerpc/kernel/paca.c | 2 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 3 ++- arch/powerpc/kvm/book3s_hv.c | 3 ++- arch/powerpc/kvm/book3s_hv_builtin.c | 4 ++-- arch/powerpc/kvm/book3s_segment.S | 2 +- arch/powerpc/mm/hash_native_64.c | 4 ++-- 11 files changed, 56 insertions(+), 26 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index c0d842cfd012..e30442c539ce 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -179,8 +179,9 @@ extern const char *powerpc_base_platform; #define LONG_ASM_CONST(x) 0 #endif - -#define CPU_FTR_HVMODE_206 LONG_ASM_CONST(0x0000000800000000) +#define CPU_FTR_HVMODE LONG_ASM_CONST(0x0000000200000000) +#define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000400000000) +#define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000800000000) #define CPU_FTR_CFAR LONG_ASM_CONST(0x0000001000000000) #define CPU_FTR_IABR LONG_ASM_CONST(0x0000002000000000) #define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000004000000000) @@ -401,9 +402,10 @@ extern const char *powerpc_base_platform; CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \ CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_201 | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \ - CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS) + CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS | \ + CPU_FTR_HVMODE) #define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -417,13 +419,13 @@ extern const char *powerpc_base_platform; CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR) #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_HVMODE_206 |\ + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ - CPU_FTR_ICSWX | CPU_FTR_CFAR) + CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 20a053c14270..ddbe57ae8584 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -307,6 +307,7 @@ #define SPRN_HASH1 0x3D2 /* Primary Hash Address Register */ #define SPRN_HASH2 0x3D3 /* Secondary Hash Address Resgister */ #define SPRN_HID0 0x3F0 /* Hardware Implementation Register 0 */ +#define HID0_HDICE_SH (63 - 23) /* 970 HDEC interrupt enable */ #define HID0_EMCP (1<<31) /* Enable Machine Check pin */ #define HID0_EBA (1<<29) /* Enable Bus Address Parity */ #define HID0_EBD (1<<28) /* Enable Bus Data Parity */ @@ -362,6 +363,13 @@ #define SPRN_IABR2 0x3FA /* 83xx */ #define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */ #define SPRN_HID4 0x3F4 /* 970 HID4 */ +#define HID4_LPES0 (1ul << (63-0)) /* LPAR env. sel. bit 0 */ +#define HID4_RMLS2_SH (63 - 2) /* Real mode limit bottom 2 bits */ +#define HID4_LPID5_SH (63 - 6) /* partition ID bottom 4 bits */ +#define HID4_RMOR_SH (63 - 22) /* real mode offset (16 bits) */ +#define HID4_LPES1 (1 << (63-57)) /* LPAR env. sel. bit 1 */ +#define HID4_RMLS0_SH (63 - 58) /* Real mode limit top bit */ +#define HID4_LPID1_SH 0 /* partition ID top 2 bits */ #define SPRN_HID4_GEKKO 0x3F3 /* Gekko HID4 */ #define SPRN_HID5 0x3F6 /* 970 HID5 */ #define SPRN_HID6 0x3F9 /* BE HID 6 */ @@ -811,28 +819,28 @@ mfspr rX,SPRN_SPRG_PACA; \ FTR_SECTION_ELSE_NESTED(66); \ mfspr rX,SPRN_SPRG_HPACA; \ - ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66) #define SET_PACA(rX) \ BEGIN_FTR_SECTION_NESTED(66); \ mtspr SPRN_SPRG_PACA,rX; \ FTR_SECTION_ELSE_NESTED(66); \ mtspr SPRN_SPRG_HPACA,rX; \ - ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66) #define GET_SCRATCH0(rX) \ BEGIN_FTR_SECTION_NESTED(66); \ mfspr rX,SPRN_SPRG_SCRATCH0; \ FTR_SECTION_ELSE_NESTED(66); \ mfspr rX,SPRN_SPRG_HSCRATCH0; \ - ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66) #define SET_SCRATCH0(rX) \ BEGIN_FTR_SECTION_NESTED(66); \ mtspr SPRN_SPRG_SCRATCH0,rX; \ FTR_SECTION_ELSE_NESTED(66); \ mtspr SPRN_SPRG_HSCRATCH0,rX; \ - ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66) #else /* CONFIG_PPC_BOOK3S_64 */ #define GET_SCRATCH0(rX) mfspr rX,SPRN_SPRG_SCRATCH0 diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S index 2ef6749688e9..76797c5105d6 100644 --- a/arch/powerpc/kernel/cpu_setup_power7.S +++ b/arch/powerpc/kernel/cpu_setup_power7.S @@ -45,12 +45,12 @@ _GLOBAL(__restore_cpu_power7) blr __init_hvmode_206: - /* Disable CPU_FTR_HVMODE_206 and exit if MSR:HV is not set */ + /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */ mfmsr r3 rldicl. r0,r3,4,63 bnelr ld r5,CPU_SPEC_FEATURES(r4) - LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE_206) + LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE) xor r5,r5,r6 std r5,CPU_SPEC_FEATURES(r4) blr diff --git a/arch/powerpc/kernel/cpu_setup_ppc970.S b/arch/powerpc/kernel/cpu_setup_ppc970.S index 27f2507279d8..12fac8df01c5 100644 --- a/arch/powerpc/kernel/cpu_setup_ppc970.S +++ b/arch/powerpc/kernel/cpu_setup_ppc970.S @@ -76,7 +76,7 @@ _GLOBAL(__setup_cpu_ppc970) /* Do nothing if not running in HV mode */ mfmsr r0 rldicl. r0,r0,4,63 - beqlr + beq no_hv_mode mfspr r0,SPRN_HID0 li r11,5 /* clear DOZE and SLEEP */ @@ -90,7 +90,7 @@ _GLOBAL(__setup_cpu_ppc970MP) /* Do nothing if not running in HV mode */ mfmsr r0 rldicl. r0,r0,4,63 - beqlr + beq no_hv_mode mfspr r0,SPRN_HID0 li r11,0x15 /* clear DOZE and SLEEP */ @@ -109,6 +109,14 @@ load_hids: sync isync + /* Try to set LPES = 01 in HID4 */ + mfspr r0,SPRN_HID4 + clrldi r0,r0,1 /* clear LPES0 */ + ori r0,r0,HID4_LPES1 /* set LPES1 */ + sync + mtspr SPRN_HID4,r0 + isync + /* Save away cpu state */ LOAD_REG_ADDR(r5,cpu_state_storage) @@ -117,11 +125,21 @@ load_hids: std r3,CS_HID0(r5) mfspr r3,SPRN_HID1 std r3,CS_HID1(r5) - mfspr r3,SPRN_HID4 - std r3,CS_HID4(r5) + mfspr r4,SPRN_HID4 + std r4,CS_HID4(r5) mfspr r3,SPRN_HID5 std r3,CS_HID5(r5) + /* See if we successfully set LPES1 to 1; if not we are in Apple mode */ + andi. r4,r4,HID4_LPES1 + bnelr + +no_hv_mode: + /* Disable CPU_FTR_HVMODE and exit, since we don't have HV mode */ + ld r5,CPU_SPEC_FEATURES(r4) + LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE) + andc r5,r5,r6 + std r5,CPU_SPEC_FEATURES(r4) blr /* Called with no MMU context (typically MSR:IR/DR off) to diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 5bc06fdfa6c0..a5345380bef3 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -75,7 +75,7 @@ BEGIN_FTR_SECTION b .power7_wakeup_noloss 2: b .power7_wakeup_loss 9: -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif /* CONFIG_PPC_P7_NAP */ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, NOTEST, 0x100) @@ -173,7 +173,7 @@ hardware_interrupt_hv: _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD, SOFTEN_TEST_PR) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE_206) + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) STD_EXCEPTION_PSERIES(0x600, 0x600, alignment) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x600) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index efeb88184182..0a5a899846bb 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -167,7 +167,7 @@ void setup_paca(struct paca_struct *new_paca) * if we do a GET_PACA() before the feature fixups have been * applied */ - if (cpu_has_feature(CPU_FTR_HVMODE_206)) + if (cpu_has_feature(CPU_FTR_HVMODE)) mtspr(SPRN_SPRG_HPACA, local_paca); #endif mtspr(SPRN_SPRG_PACA, local_paca); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 96ba96a16abf..212dcd8fc50b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -128,7 +128,8 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) int kvmppc_mmu_hv_init(void) { - if (!cpu_has_feature(CPU_FTR_HVMODE_206)) + if (!cpu_has_feature(CPU_FTR_HVMODE) || + !cpu_has_feature(CPU_FTR_ARCH_206)) return -EINVAL; memset(lpid_inuse, 0, sizeof(lpid_inuse)); set_bit(mfspr(SPRN_LPID), lpid_inuse); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 04da135cae61..dc70e7745ab3 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -443,7 +443,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int kvmppc_core_check_processor_compat(void) { - if (cpu_has_feature(CPU_FTR_HVMODE_206)) + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_206)) return 0; return -EIO; } diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 736df3cbbc55..7315ec6e8177 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -90,8 +90,8 @@ void kvm_rma_init(void) void *rma; struct page *pg; - /* Only do this on POWER7 in HV mode */ - if (!cpu_has_feature(CPU_FTR_HVMODE_206)) + /* Only do this in HV mode */ + if (!cpu_has_feature(CPU_FTR_HVMODE)) return; if (!kvm_rma_size || !kvm_rma_count) diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 134501691ad0..aed32e517212 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -170,7 +170,7 @@ BEGIN_FTR_SECTION mfspr r4,SPRN_HSRR1 andi. r12,r12,0x3ffd b 2f -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) #endif 1: mfsrr0 r3 mfsrr1 r4 diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index dfd764896db0..b44f5f803052 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -51,7 +51,7 @@ static inline void __tlbie(unsigned long va, int psize, int ssize) va &= ~0xffful; va |= ssize << 8; asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) - : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206) + : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) : "memory"); break; default: @@ -61,7 +61,7 @@ static inline void __tlbie(unsigned long va, int psize, int ssize) va |= ssize << 8; va |= 1; /* L */ asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2) - : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206) + : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) : "memory"); break; } -- cgit v1.2.3 From 9e368f2915601cd5bc7f5fd638b58435b018bbd7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:40:08 +0000 Subject: KVM: PPC: book3s_hv: Add support for PPC970-family processors This adds support for running KVM guests in supervisor mode on those PPC970 processors that have a usable hypervisor mode. Unfortunately, Apple G5 machines have supervisor mode disabled (MSR[HV] is forced to 1), but the YDL PowerStation does have a usable hypervisor mode. There are several differences between the PPC970 and POWER7 in how guests are managed. These differences are accommodated using the CPU_FTR_ARCH_201 (PPC970) and CPU_FTR_ARCH_206 (POWER7) CPU feature bits. Notably, on PPC970: * The LPCR, LPID or RMOR registers don't exist, and the functions of those registers are provided by bits in HID4 and one bit in HID0. * External interrupts can be directed to the hypervisor, but unlike POWER7 they are masked by MSR[EE] in non-hypervisor modes and use SRR0/1 not HSRR0/1. * There is no virtual RMA (VRMA) mode; the guest must use an RMO (real mode offset) area. * The TLB entries are not tagged with the LPID, so it is necessary to flush the whole TLB on partition switch. Furthermore, when switching partitions we have to ensure that no other CPU is executing the tlbie or tlbsync instructions in either the old or the new partition, otherwise undefined behaviour can occur. * The PMU has 8 counters (PMC registers) rather than 6. * The DSCR, PURR, SPURR, AMR, AMOR, UAMOR registers don't exist. * The SLB has 64 entries rather than 32. * There is no mediated external interrupt facility, so if we switch to a guest that has a virtual external interrupt pending but the guest has MSR[EE] = 0, we have to arrange to have an interrupt pending for it so that we can get control back once it re-enables interrupts. We do that by sending ourselves an IPI with smp_send_reschedule after hard-disabling interrupts. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/exception-64s.h | 4 + arch/powerpc/include/asm/kvm_book3s_asm.h | 2 +- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/exceptions-64s.S | 2 +- arch/powerpc/kvm/Kconfig | 13 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 30 ++-- arch/powerpc/kvm/book3s_hv.c | 60 ++++++-- arch/powerpc/kvm/book3s_hv_builtin.c | 11 +- arch/powerpc/kvm/book3s_hv_interrupts.S | 30 ++++ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 6 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 230 +++++++++++++++++++++++++++++- arch/powerpc/kvm/powerpc.c | 3 + arch/powerpc/mm/hash_native_64.c | 2 +- 14 files changed, 354 insertions(+), 42 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 69435da8f2ba..8057f4f6980f 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -246,6 +246,10 @@ label##_hv: \ KVMTEST(vec); \ _SOFTEN_TEST(EXC_HV) +#define SOFTEN_TEST_HV_201(vec) \ + KVMTEST(vec); \ + _SOFTEN_TEST(EXC_STD) + #define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ HMT_MEDIUM; \ SET_SCRATCH0(r13); /* save r13 */ \ diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 9cfd5436782d..ef7b3688c3b6 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -82,7 +82,7 @@ struct kvmppc_host_state { unsigned long xics_phys; u64 dabr; u64 host_mmcr[3]; - u32 host_pmc[6]; + u32 host_pmc[8]; u64 host_purr; u64 host_spurr; u64 host_dscr; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index f572d9cc31bd..cc22b282d755 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -353,7 +353,7 @@ struct kvm_vcpu_arch { u32 dbsr; u64 mmcr[3]; - u32 pmc[6]; + u32 pmc[8]; #ifdef CONFIG_KVM_EXIT_TIMING struct mutex exit_timing_lock; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f4aba938166b..54b935f2f5de 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -128,6 +128,7 @@ int main(void) DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); /* paca */ DEFINE(PACA_SIZE, sizeof(struct paca_struct)); + DEFINE(PACA_LOCK_TOKEN, offsetof(struct paca_struct, lock_token)); DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start)); DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack)); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a5345380bef3..41b02c792aa3 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -171,7 +171,7 @@ hardware_interrupt_hv: KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) FTR_SECTION_ELSE _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, - EXC_STD, SOFTEN_TEST_PR) + EXC_STD, SOFTEN_TEST_HV_201) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 5d9b78ebbaa6..eeb42e06f2d7 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -67,23 +67,20 @@ config KVM_BOOK3S_64 If unsure, say N. config KVM_BOOK3S_64_HV - bool "KVM support for POWER7 using hypervisor mode in host" + bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" depends on KVM_BOOK3S_64 ---help--- Support running unmodified book3s_64 guest kernels in - virtual machines on POWER7 processors that have hypervisor - mode available to the host. + virtual machines on POWER7 and PPC970 processors that have + hypervisor mode available to the host. If you say Y here, KVM will use the hardware virtualization facilities of POWER7 (and later) processors, meaning that guest operating systems will run at full hardware speed using supervisor and user modes. However, this also means that KVM is not usable under PowerVM (pHyp), is only usable - on POWER7 (or later) processors, and can only emulate - POWER5+, POWER6 and POWER7 processors. - - This module provides access to the hardware capabilities through - a character device node named /dev/kvm. + on POWER7 (or later) processors and PPC970-family processors, + and cannot emulate a different processor from the host processor. If unsure, say N. diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 212dcd8fc50b..bc3a2ea94217 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -42,6 +42,8 @@ #define VRMA_PAGE_ORDER 24 #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ +/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ +#define MAX_LPID_970 63 #define NR_LPIDS (LPID_RSVD + 1) unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)]; @@ -69,9 +71,6 @@ long kvmppc_alloc_hpt(struct kvm *kvm) kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18); kvm->arch.lpid = lpid; - kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); - kvm->arch.host_lpid = mfspr(SPRN_LPID); - kvm->arch.host_lpcr = mfspr(SPRN_LPCR); pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid); return 0; @@ -128,12 +127,24 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) int kvmppc_mmu_hv_init(void) { - if (!cpu_has_feature(CPU_FTR_HVMODE) || - !cpu_has_feature(CPU_FTR_ARCH_206)) + unsigned long host_lpid, rsvd_lpid; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) return -EINVAL; + memset(lpid_inuse, 0, sizeof(lpid_inuse)); - set_bit(mfspr(SPRN_LPID), lpid_inuse); - set_bit(LPID_RSVD, lpid_inuse); + + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + host_lpid = mfspr(SPRN_LPID); /* POWER7 */ + rsvd_lpid = LPID_RSVD; + } else { + host_lpid = 0; /* PPC970 */ + rsvd_lpid = MAX_LPID_970; + } + + set_bit(host_lpid, lpid_inuse); + /* rsvd_lpid is reserved for use in partition switching */ + set_bit(rsvd_lpid, lpid_inuse); return 0; } @@ -157,7 +168,10 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) { struct kvmppc_mmu *mmu = &vcpu->arch.mmu; - vcpu->arch.slb_nr = 32; /* Assume POWER7 for now */ + if (cpu_has_feature(CPU_FTR_ARCH_206)) + vcpu->arch.slb_nr = 32; /* POWER7 */ + else + vcpu->arch.slb_nr = 64; mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index dc70e7745ab3..cc0d7f1b19ab 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -443,8 +443,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int kvmppc_core_check_processor_compat(void) { - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_206)) + if (cpu_has_feature(CPU_FTR_HVMODE)) return 0; return -EIO; } @@ -731,6 +730,10 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return -EINTR; } + /* On PPC970, check that we have an RMA region */ + if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201)) + return -EPERM; + kvm_run->exit_reason = 0; vcpu->arch.ret = RESUME_GUEST; vcpu->arch.trap = 0; @@ -920,12 +923,14 @@ fail: } /* Work out RMLS (real mode limit selector) field value for a given RMA size. - Assumes POWER7. */ + Assumes POWER7 or PPC970. */ static inline int lpcr_rmls(unsigned long rma_size) { switch (rma_size) { case 32ul << 20: /* 32 MB */ - return 8; + if (cpu_has_feature(CPU_FTR_ARCH_206)) + return 8; /* only supported on POWER7 */ + return -1; case 64ul << 20: /* 64 MB */ return 3; case 128ul << 20: /* 128 MB */ @@ -1059,6 +1064,10 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, mem->userspace_addr == vma->vm_start) ri = vma->vm_file->private_data; up_read(¤t->mm->mmap_sem); + if (!ri && cpu_has_feature(CPU_FTR_ARCH_201)) { + pr_err("CPU requires an RMO\n"); + return -EINVAL; + } } if (ri) { @@ -1077,10 +1086,25 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, atomic_inc(&ri->use_count); kvm->arch.rma = ri; kvm->arch.n_rma_pages = rma_size >> porder; - lpcr = kvm->arch.lpcr & ~(LPCR_VPM0 | LPCR_VRMA_L); - lpcr |= rmls << LPCR_RMLS_SH; + + /* Update LPCR and RMOR */ + lpcr = kvm->arch.lpcr; + if (cpu_has_feature(CPU_FTR_ARCH_201)) { + /* PPC970; insert RMLS value (split field) in HID4 */ + lpcr &= ~((1ul << HID4_RMLS0_SH) | + (3ul << HID4_RMLS2_SH)); + lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) | + ((rmls & 3) << HID4_RMLS2_SH); + /* RMOR is also in HID4 */ + lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff) + << HID4_RMOR_SH; + } else { + /* POWER7 */ + lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); + lpcr |= rmls << LPCR_RMLS_SH; + kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; + } kvm->arch.lpcr = lpcr; - kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n", ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); } @@ -1151,11 +1175,25 @@ int kvmppc_core_init_vm(struct kvm *kvm) kvm->arch.rma = NULL; kvm->arch.n_rma_pages = 0; - lpcr = kvm->arch.host_lpcr & (LPCR_PECE | LPCR_LPES); - lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | - LPCR_VPM0 | LPCR_VRMA_L; - kvm->arch.lpcr = lpcr; + kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); + if (cpu_has_feature(CPU_FTR_ARCH_201)) { + /* PPC970; HID4 is effectively the LPCR */ + unsigned long lpid = kvm->arch.lpid; + kvm->arch.host_lpid = 0; + kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4); + lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH)); + lpcr |= ((lpid >> 4) << HID4_LPID1_SH) | + ((lpid & 0xf) << HID4_LPID5_SH); + } else { + /* POWER7; init LPCR for virtual RMA mode */ + kvm->arch.host_lpid = mfspr(SPRN_LPID); + kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); + lpcr &= LPCR_PECE | LPCR_LPES; + lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | + LPCR_VPM0 | LPCR_VRMA_L; + } + kvm->arch.lpcr = lpcr; return 0; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7315ec6e8177..d43120355eec 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -55,12 +55,14 @@ static LIST_HEAD(free_rmas); static DEFINE_SPINLOCK(rma_lock); /* Work out RMLS (real mode limit selector) field value for a given RMA size. - Assumes POWER7. */ + Assumes POWER7 or PPC970. */ static inline int lpcr_rmls(unsigned long rma_size) { switch (rma_size) { case 32ul << 20: /* 32 MB */ - return 8; + if (cpu_has_feature(CPU_FTR_ARCH_206)) + return 8; /* only supported on POWER7 */ + return -1; case 64ul << 20: /* 64 MB */ return 3; case 128ul << 20: /* 128 MB */ @@ -90,8 +92,9 @@ void kvm_rma_init(void) void *rma; struct page *pg; - /* Only do this in HV mode */ - if (!cpu_has_feature(CPU_FTR_HVMODE)) + /* Only do this on PPC970 in HV mode */ + if (!cpu_has_feature(CPU_FTR_HVMODE) || + !cpu_has_feature(CPU_FTR_ARCH_201)) return; if (!kvm_rma_size || !kvm_rma_count) diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 532afaf19841..3f7b674dd4bf 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -50,8 +50,10 @@ _GLOBAL(__kvmppc_vcore_entry) SAVE_NVGPRS(r1) /* Save host DSCR */ +BEGIN_FTR_SECTION mfspr r3, SPRN_DSCR std r3, HSTATE_DSCR(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Save host DABR */ mfspr r3, SPRN_DABR @@ -86,12 +88,20 @@ _GLOBAL(__kvmppc_vcore_entry) mfspr r7, SPRN_PMC4 mfspr r8, SPRN_PMC5 mfspr r9, SPRN_PMC6 +BEGIN_FTR_SECTION + mfspr r10, SPRN_PMC7 + mfspr r11, SPRN_PMC8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) stw r3, HSTATE_PMC(r13) stw r5, HSTATE_PMC + 4(r13) stw r6, HSTATE_PMC + 8(r13) stw r7, HSTATE_PMC + 12(r13) stw r8, HSTATE_PMC + 16(r13) stw r9, HSTATE_PMC + 20(r13) +BEGIN_FTR_SECTION + stw r10, HSTATE_PMC + 24(r13) + stw r11, HSTATE_PMC + 28(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 31: /* @@ -105,6 +115,26 @@ _GLOBAL(__kvmppc_vcore_entry) add r8,r8,r7 std r8,HSTATE_DECEXP(r13) + /* + * On PPC970, if the guest vcpu has an external interrupt pending, + * send ourselves an IPI so as to interrupt the guest once it + * enables interrupts. (It must have interrupts disabled, + * otherwise we would already have delivered the interrupt.) + */ +BEGIN_FTR_SECTION + ld r0, VCPU_PENDING_EXC(r4) + li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL) + oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h + and. r0, r0, r7 + beq 32f + mr r31, r4 + lhz r3, PACAPACAINDEX(r13) + bl smp_send_reschedule + nop + mr r4, r31 +32: +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + /* Jump to partition switch code */ bl .kvmppc_hv_entry_trampoline nop diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index edb0aae901a3..fcfe6b055558 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -56,7 +56,8 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, /* only handle 4k, 64k and 16M pages for now */ porder = 12; if (pteh & HPTE_V_LARGE) { - if ((ptel & 0xf000) == 0x1000) { + if (cpu_has_feature(CPU_FTR_ARCH_206) && + (ptel & 0xf000) == 0x1000) { /* 64k page */ porder = 16; } else if ((ptel & 0xff000) == 0) { @@ -126,7 +127,8 @@ static unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, va_low &= 0x7ff; if (v & HPTE_V_LARGE) { rb |= 1; /* L field */ - if (r & 0xff000) { + if (cpu_has_feature(CPU_FTR_ARCH_206) && + (r & 0xff000)) { /* non-16MB large page, must be 64k */ /* (masks depend on page size) */ rb |= 0x1000; /* page encoding in LP field */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 9ee223c35285..6dd33581a228 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -148,12 +148,20 @@ kvmppc_hv_entry: lwz r7, VCPU_PMC + 12(r4) lwz r8, VCPU_PMC + 16(r4) lwz r9, VCPU_PMC + 20(r4) +BEGIN_FTR_SECTION + lwz r10, VCPU_PMC + 24(r4) + lwz r11, VCPU_PMC + 28(r4) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_PMC1, r3 mtspr SPRN_PMC2, r5 mtspr SPRN_PMC3, r6 mtspr SPRN_PMC4, r7 mtspr SPRN_PMC5, r8 mtspr SPRN_PMC6, r9 +BEGIN_FTR_SECTION + mtspr SPRN_PMC7, r10 + mtspr SPRN_PMC8, r11 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) ld r3, VCPU_MMCR(r4) ld r5, VCPU_MMCR + 8(r4) ld r6, VCPU_MMCR + 16(r4) @@ -165,9 +173,11 @@ kvmppc_hv_entry: /* Load up FP, VMX and VSX registers */ bl kvmppc_load_fp +BEGIN_FTR_SECTION /* Switch DSCR to guest value */ ld r5, VCPU_DSCR(r4) mtspr SPRN_DSCR, r5 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* * Set the decrementer to the guest decrementer. @@ -210,6 +220,7 @@ kvmppc_hv_entry: mtspr SPRN_DABRX,r5 mtspr SPRN_DABR,r6 +BEGIN_FTR_SECTION /* Restore AMR and UAMOR, set AMOR to all 1s */ ld r5,VCPU_AMR(r4) ld r6,VCPU_UAMOR(r4) @@ -217,6 +228,7 @@ kvmppc_hv_entry: mtspr SPRN_AMR,r5 mtspr SPRN_UAMOR,r6 mtspr SPRN_AMOR,r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Clear out SLB */ li r6,0 @@ -224,6 +236,14 @@ kvmppc_hv_entry: slbia ptesync +BEGIN_FTR_SECTION + b 30f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + /* + * POWER7 host -> guest partition switch code. + * We don't have to lock against concurrent tlbies, + * but we do have to coordinate across hardware threads. + */ /* Increment entry count iff exit count is zero. */ ld r5,HSTATE_KVM_VCORE(r13) addi r9,r5,VCORE_ENTRY_EXIT @@ -315,9 +335,94 @@ kvmppc_hv_entry: ld r8,VCPU_SPURR(r4) mtspr SPRN_PURR,r7 mtspr SPRN_SPURR,r8 + b 31f + + /* + * PPC970 host -> guest partition switch code. + * We have to lock against concurrent tlbies, + * using native_tlbie_lock to lock against host tlbies + * and kvm->arch.tlbie_lock to lock against guest tlbies. + * We also have to invalidate the TLB since its + * entries aren't tagged with the LPID. + */ +30: ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ + + /* first take native_tlbie_lock */ + .section ".toc","aw" +toc_tlbie_lock: + .tc native_tlbie_lock[TC],native_tlbie_lock + .previous + ld r3,toc_tlbie_lock@toc(2) + lwz r8,PACA_LOCK_TOKEN(r13) +24: lwarx r0,0,r3 + cmpwi r0,0 + bne 24b + stwcx. r8,0,r3 + bne 24b + isync + + ld r7,KVM_LPCR(r9) /* use kvm->arch.lpcr to store HID4 */ + li r0,0x18f + rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ + or r0,r7,r0 + ptesync + sync + mtspr SPRN_HID4,r0 /* switch to reserved LPID */ + isync + li r0,0 + stw r0,0(r3) /* drop native_tlbie_lock */ + + /* invalidate the whole TLB */ + li r0,256 + mtctr r0 + li r6,0 +25: tlbiel r6 + addi r6,r6,0x1000 + bdnz 25b + ptesync + + /* Take the guest's tlbie_lock */ + addi r3,r9,KVM_TLBIE_LOCK +24: lwarx r0,0,r3 + cmpwi r0,0 + bne 24b + stwcx. r8,0,r3 + bne 24b + isync + ld r6,KVM_SDR1(r9) + mtspr SPRN_SDR1,r6 /* switch to partition page table */ + + /* Set up HID4 with the guest's LPID etc. */ + sync + mtspr SPRN_HID4,r7 + isync + + /* drop the guest's tlbie_lock */ + li r0,0 + stw r0,0(r3) + + /* Check if HDEC expires soon */ + mfspr r3,SPRN_HDEC + cmpwi r3,10 + li r12,BOOK3S_INTERRUPT_HV_DECREMENTER + mr r9,r4 + blt hdec_soon + + /* Enable HDEC interrupts */ + mfspr r0,SPRN_HID0 + li r3,1 + rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1 + sync + mtspr SPRN_HID0,r0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 /* Load up guest SLB entries */ - lwz r5,VCPU_SLB_MAX(r4) +31: lwz r5,VCPU_SLB_MAX(r4) cmpwi r5,0 beq 9f mtctr r5 @@ -472,6 +577,7 @@ kvmppc_interrupt: hcall_real_cont: /* Check for mediated interrupts (could be done earlier really ...) */ +BEGIN_FTR_SECTION cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL bne+ 1f ld r5,VCPU_KVM(r9) @@ -481,6 +587,7 @@ hcall_real_cont: andi. r0,r5,LPCR_MER bne bounce_ext_interrupt 1: +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Save DEC */ mfspr r5,SPRN_DEC @@ -492,9 +599,11 @@ hcall_real_cont: /* Save HEIR (HV emulation assist reg) in last_inst if this is an HEI (HV emulation interrupt, e40) */ li r3,-1 +BEGIN_FTR_SECTION cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST bne 11f mfspr r3,SPRN_HEIR +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 11: stw r3,VCPU_LAST_INST(r9) /* Save more register state */ @@ -508,8 +617,10 @@ hcall_real_cont: stw r7, VCPU_DSISR(r9) std r8, VCPU_CTR(r9) /* grab HDAR & HDSISR if HV data storage interrupt (HDSI) */ +BEGIN_FTR_SECTION cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE beq 6f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 7: std r6, VCPU_FAULT_DAR(r9) stw r7, VCPU_FAULT_DSISR(r9) @@ -543,6 +654,7 @@ hcall_real_cont: /* * Save the guest PURR/SPURR */ +BEGIN_FTR_SECTION mfspr r5,SPRN_PURR mfspr r6,SPRN_SPURR ld r7,VCPU_PURR(r9) @@ -562,6 +674,7 @@ hcall_real_cont: add r4,r4,r6 mtspr SPRN_PURR,r3 mtspr SPRN_SPURR,r4 +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201) /* Clear out SLB */ li r5,0 @@ -570,6 +683,14 @@ hcall_real_cont: ptesync hdec_soon: +BEGIN_FTR_SECTION + b 32f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + /* + * POWER7 guest -> host partition switch code. + * We don't have to lock against tlbies but we do + * have to coordinate the hardware threads. + */ /* Increment the threads-exiting-guest count in the 0xff00 bits of vcore->entry_exit_count */ lwsync @@ -640,9 +761,82 @@ hdec_soon: 16: ld r8,KVM_HOST_LPCR(r4) mtspr SPRN_LPCR,r8 isync + b 33f + + /* + * PPC970 guest -> host partition switch code. + * We have to lock against concurrent tlbies, and + * we have to flush the whole TLB. + */ +32: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ + + /* Take the guest's tlbie_lock */ + lwz r8,PACA_LOCK_TOKEN(r13) + addi r3,r4,KVM_TLBIE_LOCK +24: lwarx r0,0,r3 + cmpwi r0,0 + bne 24b + stwcx. r8,0,r3 + bne 24b + isync + + ld r7,KVM_HOST_LPCR(r4) /* use kvm->arch.host_lpcr for HID4 */ + li r0,0x18f + rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ + or r0,r7,r0 + ptesync + sync + mtspr SPRN_HID4,r0 /* switch to reserved LPID */ + isync + li r0,0 + stw r0,0(r3) /* drop guest tlbie_lock */ + + /* invalidate the whole TLB */ + li r0,256 + mtctr r0 + li r6,0 +25: tlbiel r6 + addi r6,r6,0x1000 + bdnz 25b + ptesync + + /* take native_tlbie_lock */ + ld r3,toc_tlbie_lock@toc(2) +24: lwarx r0,0,r3 + cmpwi r0,0 + bne 24b + stwcx. r8,0,r3 + bne 24b + isync + + ld r6,KVM_HOST_SDR1(r4) + mtspr SPRN_SDR1,r6 /* switch to host page table */ + + /* Set up host HID4 value */ + sync + mtspr SPRN_HID4,r7 + isync + li r0,0 + stw r0,0(r3) /* drop native_tlbie_lock */ + + lis r8,0x7fff /* MAX_INT@h */ + mtspr SPRN_HDEC,r8 + + /* Disable HDEC interrupts */ + mfspr r0,SPRN_HID0 + li r3,0 + rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1 + sync + mtspr SPRN_HID0,r0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 /* load host SLB entries */ - ld r8,PACA_SLBSHADOWPTR(r13) +33: ld r8,PACA_SLBSHADOWPTR(r13) .rept SLB_NUM_BOLTED ld r5,SLBSHADOW_SAVEAREA(r8) @@ -654,12 +848,14 @@ hdec_soon: .endr /* Save and reset AMR and UAMOR before turning on the MMU */ +BEGIN_FTR_SECTION mfspr r5,SPRN_AMR mfspr r6,SPRN_UAMOR std r5,VCPU_AMR(r9) std r6,VCPU_UAMOR(r9) li r6,0 mtspr SPRN_AMR,r6 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Restore host DABR and DABRX */ ld r5,HSTATE_DABR(r13) @@ -668,10 +864,12 @@ hdec_soon: mtspr SPRN_DABRX,r6 /* Switch DSCR back to host value */ +BEGIN_FTR_SECTION mfspr r8, SPRN_DSCR ld r7, HSTATE_DSCR(r13) std r8, VCPU_DSCR(r7) mtspr SPRN_DSCR, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Save non-volatile GPRs */ std r14, VCPU_GPR(r14)(r9) @@ -735,21 +933,31 @@ hdec_soon: mfspr r6, SPRN_PMC4 mfspr r7, SPRN_PMC5 mfspr r8, SPRN_PMC6 +BEGIN_FTR_SECTION + mfspr r10, SPRN_PMC7 + mfspr r11, SPRN_PMC8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) stw r3, VCPU_PMC(r9) stw r4, VCPU_PMC + 4(r9) stw r5, VCPU_PMC + 8(r9) stw r6, VCPU_PMC + 12(r9) stw r7, VCPU_PMC + 16(r9) stw r8, VCPU_PMC + 20(r9) +BEGIN_FTR_SECTION + stw r10, VCPU_PMC + 24(r9) + stw r11, VCPU_PMC + 28(r9) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 22: /* save FP state */ mr r3, r9 bl .kvmppc_save_fp - /* Secondary threads go off to take a nap */ + /* Secondary threads go off to take a nap on POWER7 */ +BEGIN_FTR_SECTION lwz r0,VCPU_PTID(r3) cmpwi r0,0 bne secondary_nap +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* * Reload DEC. HDEC interrupts were disabled when @@ -771,12 +979,20 @@ hdec_soon: lwz r6, HSTATE_PMC + 12(r13) lwz r8, HSTATE_PMC + 16(r13) lwz r9, HSTATE_PMC + 20(r13) +BEGIN_FTR_SECTION + lwz r10, HSTATE_PMC + 24(r13) + lwz r11, HSTATE_PMC + 28(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_PMC1, r3 mtspr SPRN_PMC2, r4 mtspr SPRN_PMC3, r5 mtspr SPRN_PMC4, r6 mtspr SPRN_PMC5, r8 mtspr SPRN_PMC6, r9 +BEGIN_FTR_SECTION + mtspr SPRN_PMC7, r10 + mtspr SPRN_PMC8, r11 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) ld r3, HSTATE_MMCR(r13) ld r4, HSTATE_MMCR + 8(r13) ld r5, HSTATE_MMCR + 16(r13) @@ -802,7 +1018,7 @@ hdec_soon: cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK /* RFI into the highmem handler, or branch to interrupt handler */ - mfmsr r6 +12: mfmsr r6 mtctr r12 li r0, MSR_RI andc r6, r6, r0 @@ -812,7 +1028,11 @@ hdec_soon: beqctr RFI -11: mtspr SPRN_HSRR0, r8 +11: +BEGIN_FTR_SECTION + b 12b +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + mtspr SPRN_HSRR0, r8 mtspr SPRN_HSRR1, r7 ba 0x500 diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 72c506505fa4..a107c9be0fb1 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -213,6 +213,9 @@ int kvm_dev_ioctl_check_extension(long ext) break; case KVM_CAP_PPC_RMA: r = 1; + /* PPC970 requires an RMA */ + if (cpu_has_feature(CPU_FTR_ARCH_201)) + r = 2; break; #endif default: diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index b44f5f803052..90039bc64119 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -37,7 +37,7 @@ #define HPTE_LOCK_BIT 3 -static DEFINE_RAW_SPINLOCK(native_tlbie_lock); +DEFINE_RAW_SPINLOCK(native_tlbie_lock); static inline void __tlbie(unsigned long va, int psize, int ssize) { -- cgit v1.2.3 From 29d03158f9d400450c17bb25ee0533b52f651d04 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 1 Jul 2011 20:26:32 +0200 Subject: KVM: PPC: Remove prog_flags Commit c8f729d408 (KVM: PPC: Deliver program interrupts right away instead of queueing them) made away with all users of prog_flags, so we can just remove it from the headers. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 3f91ebd4ae43..98da010252a3 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -89,7 +89,6 @@ struct kvmppc_vcpu_book3s { u64 vsid_max; #endif int context_id[SID_CONTEXTS]; - ulong prog_flags; /* flags to inject when giving a 700 trap */ struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; -- cgit v1.2.3