From 2f699a59f399d65d51df6eb916bf2e0f7c6f8148 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 13 Aug 2014 14:39:44 +0530 Subject: KVM: PPC: BOOKE: Emulate debug registers and exception This patch emulates debug registers and debug exception to support guest using debug resource. This enables running gdb/kgdb etc in guest. On BOOKE architecture we cannot share debug resources between QEMU and guest because: When QEMU is using debug resources then debug exception must be always enabled. To achieve this we set MSR_DE and also set MSRP_DEP so guest cannot change MSR_DE. When emulating debug resource for guest we want guest to control MSR_DE (enable/disable debug interrupt on need). So above mentioned two configuration cannot be supported at the same time. So the result is that we cannot share debug resources between QEMU and Guest on BOOKE architecture. In the current design QEMU gets priority over guest, this means that if QEMU is using debug resources then guest cannot use them and if guest is using debug resource then QEMU can overwrite them. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/include/asm/kvm_ppc.h') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index fb86a2299d8a..05e58b630601 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -206,6 +206,9 @@ extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); +void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu); +void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu); + union kvmppc_one_reg { u32 wval; u64 dval; -- cgit v1.2.3 From d02d4d156e72baf9a6628c76eb53019124d3c82f Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Mon, 1 Sep 2014 17:19:56 +0300 Subject: KVM: PPC: Remove the tasklet used by the hrtimer Powerpc timer implementation is a copycat version of s390. Now that they removed the tasklet with commit ea74c0ea1b24a6978a6ebc80ba4dbc7b7848b32d follow this optimization. Signed-off-by: Mihai Caraman Signed-off-by: Bogdan Purcareata Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 1 - arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kvm/book3s.c | 4 +--- arch/powerpc/kvm/booke.c | 4 +--- arch/powerpc/kvm/powerpc.c | 8 +------- 5 files changed, 4 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/include/asm/kvm_ppc.h') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bad3491cc32c..d2432401d301 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -606,7 +606,6 @@ struct kvm_vcpu_arch { u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ struct hrtimer dec_timer; - struct tasklet_struct tasklet; u64 dec_jiffies; u64 dec_expires; unsigned long pending_exceptions; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 05e58b630601..73063ef53694 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -89,7 +89,7 @@ extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); -extern void kvmppc_decrementer_func(unsigned long data); +extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu); extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu); extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 1b5adda2a224..f23b6a553082 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -718,10 +718,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, return -EINVAL; } -void kvmppc_decrementer_func(unsigned long data) +void kvmppc_decrementer_func(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - kvmppc_core_queue_dec(vcpu); kvm_vcpu_kick(vcpu); } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index b4e81e6be11a..97ec5b7fce9c 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1822,10 +1822,8 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) update_timer_ints(vcpu); } -void kvmppc_decrementer_func(unsigned long data) +void kvmppc_decrementer_func(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - if (vcpu->arch.tcr & TCR_ARE) { vcpu->arch.dec = vcpu->arch.decar; kvmppc_emulate_dec(vcpu); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0c7d1917c23d..ecf057551a2f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -638,7 +638,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { /* Make sure we're not using the vcpu anymore */ hrtimer_cancel(&vcpu->arch.dec_timer); - tasklet_kill(&vcpu->arch.tasklet); kvmppc_remove_vcpu_debugfs(vcpu); @@ -664,16 +663,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvmppc_core_pending_dec(vcpu); } -/* - * low level hrtimer wake routine. Because this runs in hardirq context - * we schedule a tasklet to do the real work. - */ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer); - tasklet_schedule(&vcpu->arch.tasklet); + kvmppc_decrementer_func(vcpu); return HRTIMER_NORESTART; } @@ -683,7 +678,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) int ret; hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; vcpu->arch.dec_expires = ~(u64)0; -- cgit v1.2.3 From a59c1d9e609c4bbad9ec3b238221ecf3b9ca091b Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Tue, 9 Sep 2014 22:37:35 +0530 Subject: powerpc/kvm: support to handle sw breakpoint This patch adds kernel side support for software breakpoint. Design is that, by using an illegal instruction, we trap to hypervisor via Emulation Assistance interrupt, where we check for the illegal instruction and accordingly we return to Host or Guest. Patch also adds support for software breakpoint in PR KVM. Signed-off-by: Madhavan Srinivasan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 6 ++++++ arch/powerpc/kvm/book3s.c | 3 ++- arch/powerpc/kvm/book3s_hv.c | 41 ++++++++++++++++++++++++++++++++++---- arch/powerpc/kvm/book3s_pr.c | 3 +++ arch/powerpc/kvm/emulate.c | 15 ++++++++++++++ 5 files changed, 63 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include/asm/kvm_ppc.h') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 73063ef53694..dbd160f16cb0 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -38,6 +38,12 @@ #include #endif +/* + * KVMPPC_INST_SW_BREAKPOINT is debug Instruction + * for supporting software breakpoint. + */ +#define KVMPPC_INST_SW_BREAKPOINT 0x00dddd00 + enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index f23b6a553082..27d1b7041746 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -715,7 +715,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - return -EINVAL; + vcpu->guest_debug = dbg->control; + return 0; } void kvmppc_decrementer_func(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 529d10a7a36f..e63587d30b70 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -725,6 +725,30 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) return kvmppc_hcall_impl_hv_realmode(cmd); } +static int kvmppc_emulate_debug_inst(struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + u32 last_inst; + + if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != + EMULATE_DONE) { + /* + * Fetch failed, so return to guest and + * try executing it again. + */ + return RESUME_GUEST; + } + + if (last_inst == KVMPPC_INST_SW_BREAKPOINT) { + run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.address = kvmppc_get_pc(vcpu); + return RESUME_HOST; + } else { + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } +} + static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { @@ -807,12 +831,18 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, break; /* * This occurs if the guest executes an illegal instruction. - * We just generate a program interrupt to the guest, since - * we don't emulate any guest instructions at this stage. + * If the guest debug is disabled, generate a program interrupt + * to the guest. If guest debug is enabled, we need to check + * whether the instruction is a software breakpoint instruction. + * Accordingly return to Guest or Host. */ case BOOK3S_INTERRUPT_H_EMUL_ASSIST: - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); - r = RESUME_GUEST; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { + r = kvmppc_emulate_debug_inst(run, vcpu); + } else { + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + r = RESUME_GUEST; + } break; /* * This occurs if the guest (kernel or userspace), does something that @@ -924,6 +954,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, long int i; switch (id) { + case KVM_REG_PPC_DEBUG_INST: + *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT); + break; case KVM_REG_PPC_HIOR: *val = get_reg_val(id, 0); break; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index faffb27badd9..6d7370890775 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1319,6 +1319,9 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, int r = 0; switch (id) { + case KVM_REG_PPC_DEBUG_INST: + *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT); + break; case KVM_REG_PPC_HIOR: *val = get_reg_val(id, to_book3s(vcpu)->hior); break; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index e96b50d0bdab..005222b580ea 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -274,6 +274,21 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } break; + case 0: + /* + * Instruction with primary opcode 0. Based on PowerISA + * these are illegal instructions. + */ + if (inst == KVMPPC_INST_SW_BREAKPOINT) { + run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.address = kvmppc_get_pc(vcpu); + emulated = EMULATE_EXIT_USER; + advance = 0; + } else + emulated = EMULATE_FAIL; + + break; + default: emulated = EMULATE_FAIL; } -- cgit v1.2.3 From 57128468080a8b6ea452223036d3e417f748af55 Mon Sep 17 00:00:00 2001 From: Andres Lagar-Cavilla Date: Mon, 22 Sep 2014 14:54:42 -0700 Subject: kvm: Fix page ageing bugs 1. We were calling clear_flush_young_notify in unmap_one, but we are within an mmu notifier invalidate range scope. The spte exists no more (due to range_start) and the accessed bit info has already been propagated (due to kvm_pfn_set_accessed). Simply call clear_flush_young. 2. We clear_flush_young on a primary MMU PMD, but this may be mapped as a collection of PTEs by the secondary MMU (e.g. during log-dirty). This required expanding the interface of the clear_flush_young mmu notifier, so a lot of code has been trivially touched. 3. In the absence of shadow_accessed_mask (e.g. EPT A bit), we emulate the access bit by blowing the spte. This requires proper synchronizing with MMU notifier consumers, like every other removal of spte's does. Signed-off-by: Andres Lagar-Cavilla Acked-by: Rik van Riel Signed-off-by: Paolo Bonzini --- arch/arm/include/asm/kvm_host.h | 3 ++- arch/arm64/include/asm/kvm_host.h | 3 ++- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kvm/book3s.c | 4 ++-- arch/powerpc/kvm/book3s.h | 3 ++- arch/powerpc/kvm/book3s_64_mmu_hv.c | 4 ++-- arch/powerpc/kvm/book3s_pr.c | 3 ++- arch/powerpc/kvm/e500_mmu_host.c | 2 +- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 38 ++++++++++++++++++++++--------------- drivers/iommu/amd_iommu_v2.c | 6 ++++-- include/linux/mmu_notifier.h | 24 ++++++++++++++++------- mm/mmu_notifier.c | 5 +++-- mm/rmap.c | 6 +++++- virt/kvm/kvm_main.c | 5 +++-- 16 files changed, 71 insertions(+), 41 deletions(-) (limited to 'arch/powerpc/include/asm/kvm_ppc.h') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 032a8538318a..8c3f7eb62b54 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -170,7 +170,8 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); /* We do not have shadow page tables, hence the empty hooks */ -static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) +static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, + unsigned long end) { return 0; } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index be9970a59497..a3c671b3acc9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -180,7 +180,8 @@ int kvm_unmap_hva_range(struct kvm *kvm, void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); /* We do not have shadow page tables, hence the empty hooks */ -static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) +static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, + unsigned long end) { return 0; } diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 604000882352..d329bc5543a2 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -56,7 +56,7 @@ extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); -extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); +extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index fb86a2299d8a..d4a92d7cea6a 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -243,7 +243,7 @@ struct kvmppc_ops { int (*unmap_hva)(struct kvm *kvm, unsigned long hva); int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, unsigned long end); - int (*age_hva)(struct kvm *kvm, unsigned long hva); + int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end); int (*test_age_hva)(struct kvm *kvm, unsigned long hva); void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte); void (*mmu_destroy)(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index dd03f6b299ba..c16cfbfeb781 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -851,9 +851,9 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { - return kvm->arch.kvm_ops->age_hva(kvm, hva); + return kvm->arch.kvm_ops->age_hva(kvm, start, end); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 4bf956cf94d6..d2b3ec088b8c 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -17,7 +17,8 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end); -extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva); +extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, + unsigned long end); extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 72c20bb16d26..81460c5359c0 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1002,11 +1002,11 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, return ret; } -int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva) +int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) { if (!kvm->arch.using_mmu_notifiers) return 0; - return kvm_handle_hva(kvm, hva, kvm_age_rmapp); + return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp); } static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index faffb27badd9..852fcd8951c4 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -295,7 +295,8 @@ static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, return 0; } -static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva) +static int kvm_age_hva_pr(struct kvm *kvm, unsigned long start, + unsigned long end) { /* XXX could be more clever ;) */ return 0; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 08f14bb57897..b1f3f630315e 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -732,7 +732,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) return 0; } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { /* XXX could be more clever ;) */ return 0; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index eeeb573fcf6f..763d273cab1d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1035,7 +1035,7 @@ asmlinkage void kvm_spurious_fault(void); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); -int kvm_age_hva(struct kvm *kvm, unsigned long hva); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 47d534066325..3201e93ebd07 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1417,18 +1417,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, struct rmap_iterator uninitialized_var(iter); int young = 0; - /* - * In case of absence of EPT Access and Dirty Bits supports, - * emulate the accessed bit for EPT, by checking if this page has - * an EPT mapping, and clearing it if it does. On the next access, - * a new EPT mapping will be established. - * This has some overhead, but not as much as the cost of swapping - * out actively used pages or breaking up actively used hugepages. - */ - if (!shadow_accessed_mask) { - young = kvm_unmap_rmapp(kvm, rmapp, slot, gfn, level, data); - goto out; - } + BUG_ON(!shadow_accessed_mask); for (sptep = rmap_get_first(*rmapp, &iter); sptep; sptep = rmap_get_next(&iter)) { @@ -1440,7 +1429,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, (unsigned long *)sptep); } } -out: trace_kvm_age_page(gfn, level, slot, young); return young; } @@ -1489,9 +1477,29 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) kvm_flush_remote_tlbs(vcpu->kvm); } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { - return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); + /* + * In case of absence of EPT Access and Dirty Bits supports, + * emulate the accessed bit for EPT, by checking if this page has + * an EPT mapping, and clearing it if it does. On the next access, + * a new EPT mapping will be established. + * This has some overhead, but not as much as the cost of swapping + * out actively used pages or breaking up actively used hugepages. + */ + if (!shadow_accessed_mask) { + /* + * We are holding the kvm->mmu_lock, and we are blowing up + * shadow PTEs. MMU notifier consumers need to be kept at bay. + * This is correct as long as we don't decouple the mmu_lock + * protected regions (like invalidate_range_start|end does). + */ + kvm->mmu_notifier_seq++; + return kvm_handle_hva_range(kvm, start, end, 0, + kvm_unmap_rmapp); + } + + return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 5f578e850fc5..90d734bbf467 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -402,9 +402,11 @@ static void __mn_flush_page(struct mmu_notifier *mn, static int mn_clear_flush_young(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { - __mn_flush_page(mn, address); + for (; start < end; start += PAGE_SIZE) + __mn_flush_page(mn, start); return 0; } diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 27288692241e..88787bb4b3b9 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -57,10 +57,13 @@ struct mmu_notifier_ops { * pte. This way the VM will provide proper aging to the * accesses to the page through the secondary MMUs and not * only to the ones through the Linux pte. + * Start-end is necessary in case the secondary MMU is mapping the page + * at a smaller granularity than the primary MMU. */ int (*clear_flush_young)(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address); + unsigned long start, + unsigned long end); /* * test_young is called to check the young/accessed bitflag in @@ -175,7 +178,8 @@ extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); extern void __mmu_notifier_release(struct mm_struct *mm); extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address); + unsigned long start, + unsigned long end); extern int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address); extern void __mmu_notifier_change_pte(struct mm_struct *mm, @@ -194,10 +198,11 @@ static inline void mmu_notifier_release(struct mm_struct *mm) } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { if (mm_has_notifiers(mm)) - return __mmu_notifier_clear_flush_young(mm, address); + return __mmu_notifier_clear_flush_young(mm, start, end); return 0; } @@ -255,7 +260,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) unsigned long ___address = __address; \ __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ - ___address); \ + ___address, \ + ___address + \ + PAGE_SIZE); \ __young; \ }) @@ -266,7 +273,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) unsigned long ___address = __address; \ __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ - ___address); \ + ___address, \ + ___address + \ + PMD_SIZE); \ __young; \ }) @@ -301,7 +310,8 @@ static inline void mmu_notifier_release(struct mm_struct *mm) } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { return 0; } diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 950813b1eb36..2c8da9825fe3 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -107,7 +107,8 @@ void __mmu_notifier_release(struct mm_struct *mm) * existed or not. */ int __mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { struct mmu_notifier *mn; int young = 0, id; @@ -115,7 +116,7 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm, id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->clear_flush_young) - young |= mn->ops->clear_flush_young(mn, mm, address); + young |= mn->ops->clear_flush_young(mn, mm, start, end); } srcu_read_unlock(&srcu, id); diff --git a/mm/rmap.c b/mm/rmap.c index 3e8491c504f8..bc74e0012809 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1355,7 +1355,11 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, continue; /* don't unmap */ } - if (ptep_clear_flush_young_notify(vma, address, pte)) + /* + * No need for _notify because we're within an + * mmu_notifier_invalidate_range_ {start|end} scope. + */ + if (ptep_clear_flush_young(vma, address, pte)) continue; /* Nuke the page table entry. */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ff42b11d2b9c..0316314d48f4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -369,7 +369,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { struct kvm *kvm = mmu_notifier_to_kvm(mn); int young, idx; @@ -377,7 +378,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); - young = kvm_age_hva(kvm, address); + young = kvm_age_hva(kvm, start, end); if (young) kvm_flush_remote_tlbs(kvm); -- cgit v1.2.3