diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-20 10:44:05 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-20 10:44:05 -0800 |
commit | 6a447b0e3151893f6d4a889956553c06d2e775c6 (patch) | |
tree | 0f0c149c03dd8c2e9a5fbe01d6de528b2724893e /include | |
parent | f4a2f7866faaf89ea1595b136e01fcb336b46aab (diff) | |
parent | d45f89f7437d0f2c8275b4434096164db106384d (diff) | |
download | linux-6a447b0e3151893f6d4a889956553c06d2e775c6.tar.bz2 |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"Much x86 work was pushed out to 5.12, but ARM more than made up for it.
ARM:
- PSCI relay at EL2 when "protected KVM" is enabled
- New exception injection code
- Simplification of AArch32 system register handling
- Fix PMU accesses when no PMU is enabled
- Expose CSV3 on non-Meltdown hosts
- Cache hierarchy discovery fixes
- PV steal-time cleanups
- Allow function pointers at EL2
- Various host EL2 entry cleanups
- Simplification of the EL2 vector allocation
s390:
- memcg accouting for s390 specific parts of kvm and gmap
- selftest for diag318
- new kvm_stat for when async_pf falls back to sync
x86:
- Tracepoints for the new pagetable code from 5.10
- Catch VFIO and KVM irqfd events before userspace
- Reporting dirty pages to userspace with a ring buffer
- SEV-ES host support
- Nested VMX support for wait-for-SIPI activity state
- New feature flag (AVX512 FP16)
- New system ioctl to report Hyper-V-compatible paravirtualization features
Generic:
- Selftest improvements"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (171 commits)
KVM: SVM: fix 32-bit compilation
KVM: SVM: Add AP_JUMP_TABLE support in prep for AP booting
KVM: SVM: Provide support to launch and run an SEV-ES guest
KVM: SVM: Provide an updated VMRUN invocation for SEV-ES guests
KVM: SVM: Provide support for SEV-ES vCPU loading
KVM: SVM: Provide support for SEV-ES vCPU creation/loading
KVM: SVM: Update ASID allocation to support SEV-ES guests
KVM: SVM: Set the encryption mask for the SVM host save area
KVM: SVM: Add NMI support for an SEV-ES guest
KVM: SVM: Guest FPU state save/restore not needed for SEV-ES guest
KVM: SVM: Do not report support for SMM for an SEV-ES guest
KVM: x86: Update __get_sregs() / __set_sregs() to support SEV-ES
KVM: SVM: Add support for CR8 write traps for an SEV-ES guest
KVM: SVM: Add support for CR4 write traps for an SEV-ES guest
KVM: SVM: Add support for CR0 write traps for an SEV-ES guest
KVM: SVM: Add support for EFER write traps for an SEV-ES guest
KVM: SVM: Support string IO operations for an SEV-ES guest
KVM: SVM: Support MMIO for an SEV-ES guest
KVM: SVM: Create trace events for VMGEXIT MSR protocol processing
KVM: SVM: Create trace events for VMGEXIT processing
...
Diffstat (limited to 'include')
-rw-r--r-- | include/kvm/arm_pmu.h | 3 | ||||
-rw-r--r-- | include/kvm/arm_vgic.h | 1 | ||||
-rw-r--r-- | include/linux/eventfd.h | 6 | ||||
-rw-r--r-- | include/linux/irqchip/arm-gic-v4.h | 4 | ||||
-rw-r--r-- | include/linux/kvm_dirty_ring.h | 103 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 21 | ||||
-rw-r--r-- | include/linux/psci.h | 9 | ||||
-rw-r--r-- | include/linux/wait.h | 12 | ||||
-rw-r--r-- | include/trace/events/kvm.h | 63 | ||||
-rw-r--r-- | include/uapi/linux/kvm.h | 56 |
10 files changed, 271 insertions, 7 deletions
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 1d94acd0bc85..fc85f50fa0e9 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -24,13 +24,11 @@ struct kvm_pmu { int irq_num; struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS); - bool ready; bool created; bool irq_level; struct irq_work overflow_work; }; -#define kvm_arm_pmu_v3_ready(v) ((v)->arch.pmu.ready) #define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS) u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); @@ -61,7 +59,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); struct kvm_pmu { }; -#define kvm_arm_pmu_v3_ready(v) (false) #define kvm_arm_pmu_irq_initialized(v) (false) static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index a8d8fdcd3723..3d74f1060bd1 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -402,6 +402,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, struct kvm_kernel_irq_routing_entry *irq_entry); int vgic_v4_load(struct kvm_vcpu *vcpu); +void vgic_v4_commit(struct kvm_vcpu *vcpu); int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db); #endif /* __KVM_ARM_VGIC_H */ diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index dc4fd8a6644d..fa0a524baed0 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -41,6 +41,7 @@ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); +void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt); DECLARE_PER_CPU(int, eventfd_wake_count); @@ -82,6 +83,11 @@ static inline bool eventfd_signal_count(void) return false; } +static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) +{ + +} + #endif #endif /* _LINUX_EVENTFD_H */ diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 6976b8331b60..943c3411ca10 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -39,6 +39,8 @@ struct its_vpe { irq_hw_number_t vpe_db_lpi; /* VPE resident */ bool resident; + /* VPT parse complete */ + bool ready; union { /* GICv4.0 implementations */ struct { @@ -104,6 +106,7 @@ enum its_vcpu_info_cmd_type { PROP_UPDATE_AND_INV_VLPI, SCHEDULE_VPE, DESCHEDULE_VPE, + COMMIT_VPE, INVALL_VPE, PROP_UPDATE_VSGI, }; @@ -129,6 +132,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm); void its_free_vcpu_irqs(struct its_vm *vm); int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en); int its_make_vpe_non_resident(struct its_vpe *vpe, bool db); +int its_commit_vpe(struct its_vpe *vpe); int its_invall_vpe(struct its_vpe *vpe); int its_map_vlpi(int irq, struct its_vlpi_map *map); int its_get_vlpi(int irq, struct its_vlpi_map *map); diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h new file mode 100644 index 000000000000..120e5e90fa1d --- /dev/null +++ b/include/linux/kvm_dirty_ring.h @@ -0,0 +1,103 @@ +#ifndef KVM_DIRTY_RING_H +#define KVM_DIRTY_RING_H + +#include <linux/kvm.h> + +/** + * kvm_dirty_ring: KVM internal dirty ring structure + * + * @dirty_index: free running counter that points to the next slot in + * dirty_ring->dirty_gfns, where a new dirty page should go + * @reset_index: free running counter that points to the next dirty page + * in dirty_ring->dirty_gfns for which dirty trap needs to + * be reenabled + * @size: size of the compact list, dirty_ring->dirty_gfns + * @soft_limit: when the number of dirty pages in the list reaches this + * limit, vcpu that owns this ring should exit to userspace + * to allow userspace to harvest all the dirty pages + * @dirty_gfns: the array to keep the dirty gfns + * @index: index of this dirty ring + */ +struct kvm_dirty_ring { + u32 dirty_index; + u32 reset_index; + u32 size; + u32 soft_limit; + struct kvm_dirty_gfn *dirty_gfns; + int index; +}; + +#if (KVM_DIRTY_LOG_PAGE_OFFSET == 0) +/* + * If KVM_DIRTY_LOG_PAGE_OFFSET not defined, kvm_dirty_ring.o should + * not be included as well, so define these nop functions for the arch. + */ +static inline u32 kvm_dirty_ring_get_rsvd_entries(void) +{ + return 0; +} + +static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, + int index, u32 size) +{ + return 0; +} + +static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm) +{ + return NULL; +} + +static inline int kvm_dirty_ring_reset(struct kvm *kvm, + struct kvm_dirty_ring *ring) +{ + return 0; +} + +static inline void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, + u32 slot, u64 offset) +{ +} + +static inline struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, + u32 offset) +{ + return NULL; +} + +static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) +{ +} + +static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) +{ + return true; +} + +#else /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */ + +u32 kvm_dirty_ring_get_rsvd_entries(void); +int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); +struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm); + +/* + * called with kvm->slots_lock held, returns the number of + * processed pages. + */ +int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring); + +/* + * returns =0: successfully pushed + * <0: unable to push, need to wait + */ +void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset); + +/* for use in vm_operations_struct */ +struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset); + +void kvm_dirty_ring_free(struct kvm_dirty_ring *ring); +bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring); + +#endif /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */ + +#endif /* KVM_DIRTY_RING_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7f2e2a09ebbd..f3b1013fb22c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -34,6 +34,7 @@ #include <linux/kvm_types.h> #include <asm/kvm_host.h> +#include <linux/kvm_dirty_ring.h> #ifndef KVM_MAX_VCPU_ID #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS @@ -319,6 +320,7 @@ struct kvm_vcpu { bool preempted; bool ready; struct kvm_vcpu_arch arch; + struct kvm_dirty_ring dirty_ring; }; static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) @@ -349,6 +351,11 @@ struct kvm_memory_slot { u16 as_id; }; +static inline bool kvm_slot_dirty_track_enabled(struct kvm_memory_slot *slot) +{ + return slot->flags & KVM_MEM_LOG_DIRTY_PAGES; +} + static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) { return ALIGN(memslot->npages, BITS_PER_LONG) / 8; @@ -505,6 +512,7 @@ struct kvm { struct srcu_struct irq_srcu; pid_t userspace_pid; unsigned int max_halt_poll_ns; + u32 dirty_ring_size; }; #define kvm_err(fmt, ...) \ @@ -792,13 +800,12 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, offset_in_page(__gpa), v); \ }) -int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); -void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn); +void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); @@ -1478,4 +1485,14 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) } #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ +/* + * This defines how many reserved entries we want to keep before we + * kick the vcpu to the userspace to avoid dirty ring full. This + * value can be tuned to higher if e.g. PML is enabled on the host. + */ +#define KVM_DIRTY_RING_RSVD_ENTRIES 64 + +/* Max number of entries allowed for each kvm dirty ring */ +#define KVM_DIRTY_RING_MAX_ENTRIES 65536 + #endif diff --git a/include/linux/psci.h b/include/linux/psci.h index 2a1bfb890e58..4ca0060a3fc4 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -34,6 +34,15 @@ struct psci_operations { extern struct psci_operations psci_ops; +struct psci_0_1_function_ids { + u32 cpu_suspend; + u32 cpu_on; + u32 cpu_off; + u32 migrate; +}; + +struct psci_0_1_function_ids get_psci_0_1_function_ids(void); + #if defined(CONFIG_ARM_PSCI_FW) int __init psci_dt_init(void); #else diff --git a/include/linux/wait.h b/include/linux/wait.h index 27fb99cfeb02..fe10e8570a52 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -22,6 +22,7 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int #define WQ_FLAG_BOOKMARK 0x04 #define WQ_FLAG_CUSTOM 0x08 #define WQ_FLAG_DONE 0x10 +#define WQ_FLAG_PRIORITY 0x20 /* * A single wait-queue entry structure: @@ -164,11 +165,20 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); +extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { - list_add(&wq_entry->entry, &wq_head->head); + struct list_head *head = &wq_head->head; + struct wait_queue_entry *wq; + + list_for_each_entry(wq, &wq_head->head, entry) { + if (!(wq->flags & WQ_FLAG_PRIORITY)) + break; + head = &wq->entry; + } + list_add(&wq_entry->entry, head); } /* diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 26cfb0fa8e7e..49d7d0fe29f6 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -399,6 +399,69 @@ TRACE_EVENT(kvm_halt_poll_ns, #define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \ trace_kvm_halt_poll_ns(false, vcpu_id, new, old) +TRACE_EVENT(kvm_dirty_ring_push, + TP_PROTO(struct kvm_dirty_ring *ring, u32 slot, u64 offset), + TP_ARGS(ring, slot, offset), + + TP_STRUCT__entry( + __field(int, index) + __field(u32, dirty_index) + __field(u32, reset_index) + __field(u32, slot) + __field(u64, offset) + ), + + TP_fast_assign( + __entry->index = ring->index; + __entry->dirty_index = ring->dirty_index; + __entry->reset_index = ring->reset_index; + __entry->slot = slot; + __entry->offset = offset; + ), + + TP_printk("ring %d: dirty 0x%x reset 0x%x " + "slot %u offset 0x%llx (used %u)", + __entry->index, __entry->dirty_index, + __entry->reset_index, __entry->slot, __entry->offset, + __entry->dirty_index - __entry->reset_index) +); + +TRACE_EVENT(kvm_dirty_ring_reset, + TP_PROTO(struct kvm_dirty_ring *ring), + TP_ARGS(ring), + + TP_STRUCT__entry( + __field(int, index) + __field(u32, dirty_index) + __field(u32, reset_index) + ), + + TP_fast_assign( + __entry->index = ring->index; + __entry->dirty_index = ring->dirty_index; + __entry->reset_index = ring->reset_index; + ), + + TP_printk("ring %d: dirty 0x%x reset 0x%x (used %u)", + __entry->index, __entry->dirty_index, __entry->reset_index, + __entry->dirty_index - __entry->reset_index) +); + +TRACE_EVENT(kvm_dirty_ring_exit, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(int, vcpu_id) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + ), + + TP_printk("vcpu %d", __entry->vcpu_id) +); + #endif /* _TRACE_KVM_MAIN_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ca41220b40b8..886802b8ffba 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -250,6 +250,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_ARM_NISV 28 #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 +#define KVM_EXIT_DIRTY_RING_FULL 31 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -1053,6 +1054,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_USER_SPACE_MSR 188 #define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 +#define KVM_CAP_SYS_HYPERV_CPUID 191 +#define KVM_CAP_DIRTY_LOG_RING 192 #ifdef KVM_CAP_IRQ_ROUTING @@ -1511,7 +1514,7 @@ struct kvm_enc_region { /* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */ #define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) -/* Available with KVM_CAP_HYPERV_CPUID */ +/* Available with KVM_CAP_HYPERV_CPUID (vcpu) / KVM_CAP_SYS_HYPERV_CPUID (system) */ #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2) /* Available with KVM_CAP_ARM_SVE */ @@ -1557,6 +1560,9 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_X86_MSR_FILTER */ #define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) +/* Available with KVM_CAP_DIRTY_LOG_RING */ +#define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1710,4 +1716,52 @@ struct kvm_hyperv_eventfd { #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) +/* + * Arch needs to define the macro after implementing the dirty ring + * feature. KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the + * starting page offset of the dirty ring structures. + */ +#ifndef KVM_DIRTY_LOG_PAGE_OFFSET +#define KVM_DIRTY_LOG_PAGE_OFFSET 0 +#endif + +/* + * KVM dirty GFN flags, defined as: + * + * |---------------+---------------+--------------| + * | bit 1 (reset) | bit 0 (dirty) | Status | + * |---------------+---------------+--------------| + * | 0 | 0 | Invalid GFN | + * | 0 | 1 | Dirty GFN | + * | 1 | X | GFN to reset | + * |---------------+---------------+--------------| + * + * Lifecycle of a dirty GFN goes like: + * + * dirtied harvested reset + * 00 -----------> 01 -------------> 1X -------+ + * ^ | + * | | + * +------------------------------------------+ + * + * The userspace program is only responsible for the 01->1X state + * conversion after harvesting an entry. Also, it must not skip any + * dirty bits, so that dirty bits are always harvested in sequence. + */ +#define KVM_DIRTY_GFN_F_DIRTY BIT(0) +#define KVM_DIRTY_GFN_F_RESET BIT(1) +#define KVM_DIRTY_GFN_F_MASK 0x3 + +/* + * KVM dirty rings should be mapped at KVM_DIRTY_LOG_PAGE_OFFSET of + * per-vcpu mmaped regions as an array of struct kvm_dirty_gfn. The + * size of the gfn buffer is decided by the first argument when + * enabling KVM_CAP_DIRTY_LOG_RING. + */ +struct kvm_dirty_gfn { + __u32 flags; + __u32 slot; + __u64 offset; +}; + #endif /* __LINUX_KVM_H */ |