summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/events/intel/uncore_snbep.c37
-rw-r--r--arch/x86/include/asm/asm.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c5
-rw-r--r--arch/x86/kernel/ldt.c2
-rw-r--r--arch/x86/kernel/pci-nommu.c90
-rw-r--r--arch/x86/kernel/smpboot.c45
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kvm/svm.c31
-rw-r--r--arch/x86/kvm/vmx.c95
-rw-r--r--arch/x86/kvm/x86.c15
-rw-r--r--arch/x86/mm/dump_pagetables.c11
-rw-r--r--arch/x86/power/hibernate_64.c2
15 files changed, 178 insertions, 166 deletions
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index c98b943e58b4..77076a102e34 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3028,10 +3028,27 @@ static struct intel_uncore_type bdx_uncore_cbox = {
.format_group = &hswep_uncore_cbox_format_group,
};
+static struct intel_uncore_type bdx_uncore_sbox = {
+ .name = "sbox",
+ .num_counters = 4,
+ .num_boxes = 4,
+ .perf_ctr_bits = 48,
+ .event_ctl = HSWEP_S0_MSR_PMON_CTL0,
+ .perf_ctr = HSWEP_S0_MSR_PMON_CTR0,
+ .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL,
+ .msr_offset = HSWEP_SBOX_MSR_OFFSET,
+ .ops = &hswep_uncore_sbox_msr_ops,
+ .format_group = &hswep_uncore_sbox_format_group,
+};
+
+#define BDX_MSR_UNCORE_SBOX 3
+
static struct intel_uncore_type *bdx_msr_uncores[] = {
&bdx_uncore_ubox,
&bdx_uncore_cbox,
&hswep_uncore_pcu,
+ &bdx_uncore_sbox,
NULL,
};
@@ -3043,10 +3060,25 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = {
void bdx_uncore_cpu_init(void)
{
+ int pkg = topology_phys_to_logical_pkg(0);
+
if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
uncore_msr_uncores = bdx_msr_uncores;
+ /* BDX-DE doesn't have SBOX */
+ if (boot_cpu_data.x86_model == 86) {
+ uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
+ /* Detect systems with no SBOXes */
+ } else if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) {
+ struct pci_dev *pdev;
+ u32 capid4;
+
+ pdev = uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3];
+ pci_read_config_dword(pdev, 0x94, &capid4);
+ if (((capid4 >> 6) & 0x3) == 0)
+ bdx_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
+ }
hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints;
}
@@ -3264,6 +3296,11 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46),
.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2),
},
+ { /* PCU.3 (for Capability registers) */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ HSWEP_PCI_PCU_3),
+ },
{ /* end: all zeroes */ }
};
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 386a6900e206..219faaec51df 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -136,7 +136,6 @@
#endif
#ifndef __ASSEMBLY__
-#ifndef __BPF__
/*
* This output constraint should be used for any inline asm which has a "call"
* instruction. Otherwise the asm may be inserted before the frame pointer
@@ -146,6 +145,5 @@
register unsigned long current_stack_pointer asm(_ASM_SP);
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
#endif
-#endif
#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 949c977bc4c9..c25775fad4ed 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1013,6 +1013,7 @@ struct kvm_x86_ops {
bool (*has_wbinvd_exit)(void);
+ u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 4fa4206029e3..21a114914ba4 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -749,13 +749,11 @@ enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
extern void enable_sep_cpu(void);
extern int sysenter_setup(void);
-extern void early_trap_init(void);
void early_trap_pf_init(void);
/* Defined in head.S */
extern struct desc_ptr early_gdt_descr;
-extern void cpu_set_gdt(int);
extern void switch_to_new_gdt(int);
extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index dde444f932c1..3b20607d581b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -215,6 +215,10 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
apic_id = processor->local_apic_id;
enabled = processor->lapic_flags & ACPI_MADT_ENABLED;
+ /* Ignore invalid ID */
+ if (apic_id == 0xffffffff)
+ return 0;
+
/*
* We need to register disabled CPU as well to permit
* counting disabled CPUs. This allows us to size
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 3182908b7e6c..7326078eaa7a 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -398,11 +398,10 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
* little bit simple
*/
efi_map_sz = efi_get_runtime_map_size();
- efi_map_sz = ALIGN(efi_map_sz, 16);
params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
MAX_ELFCOREHDR_STR_LEN;
params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
- kbuf.bufsz = params_cmdline_sz + efi_map_sz +
+ kbuf.bufsz = params_cmdline_sz + ALIGN(efi_map_sz, 16) +
sizeof(struct setup_data) +
sizeof(struct efi_setup_data);
@@ -410,7 +409,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
if (!params)
return ERR_PTR(-ENOMEM);
efi_map_offset = params_cmdline_sz;
- efi_setup_data_offset = efi_map_offset + efi_map_sz;
+ efi_setup_data_offset = efi_map_offset + ALIGN(efi_map_sz, 16);
/* Copy setup header onto bootparams. Documentation/x86/boot.txt */
setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index d41d896481b8..c9b14020f4dd 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -166,7 +166,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
*/
pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL);
/* Filter out unsuppored __PAGE_KERNEL* bits: */
- pgprot_val(pte_prot) |= __supported_pte_mask;
+ pgprot_val(pte_prot) &= __supported_pte_mask;
pte = pfn_pte(pfn, pte_prot);
set_pte_at(mm, va, ptep, pte);
pte_unmap_unlock(ptep, ptl);
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
deleted file mode 100644
index ac7ea3a8242f..000000000000
--- a/arch/x86/kernel/pci-nommu.c
+++ /dev/null
@@ -1,90 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Fallback functions when the main IOMMU code is not compiled in. This
- code is roughly equivalent to i386. */
-#include <linux/dma-direct.h>
-#include <linux/scatterlist.h>
-#include <linux/string.h>
-#include <linux/gfp.h>
-#include <linux/pci.h>
-#include <linux/mm.h>
-
-#include <asm/processor.h>
-#include <asm/iommu.h>
-#include <asm/dma.h>
-
-#define NOMMU_MAPPING_ERROR 0
-
-static int
-check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
-{
- if (hwdev && !dma_capable(hwdev, bus, size)) {
- if (*hwdev->dma_mask >= DMA_BIT_MASK(32))
- printk(KERN_ERR
- "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
- name, (long long)bus, size,
- (long long)*hwdev->dma_mask);
- return 0;
- }
- return 1;
-}
-
-static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- dma_addr_t bus = phys_to_dma(dev, page_to_phys(page)) + offset;
- WARN_ON(size == 0);
- if (!check_addr("map_single", dev, bus, size))
- return NOMMU_MAPPING_ERROR;
- return bus;
-}
-
-/* Map a set of buffers described by scatterlist in streaming
- * mode for DMA. This is the scatter-gather version of the
- * above pci_map_single interface. Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length. They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- * DMA address/length pairs than there are SG table elements.
- * (for example via virtual mapping capabilities)
- * The routine returns the number of addr/length pairs actually
- * used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *s;
- int i;
-
- WARN_ON(nents == 0 || sg[0].length == 0);
-
- for_each_sg(sg, s, nents, i) {
- BUG_ON(!sg_page(s));
- s->dma_address = sg_phys(s);
- if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
- return 0;
- s->dma_length = s->length;
- }
- return nents;
-}
-
-static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == NOMMU_MAPPING_ERROR;
-}
-
-const struct dma_map_ops nommu_dma_ops = {
- .alloc = dma_generic_alloc_coherent,
- .free = dma_generic_free_coherent,
- .map_sg = nommu_map_sg,
- .map_page = nommu_map_page,
- .is_phys = 1,
- .mapping_error = nommu_mapping_error,
- .dma_supported = x86_dma_supported,
-};
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ff99e2b6fc54..45175b81dd5b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -77,6 +77,8 @@
#include <asm/i8259.h>
#include <asm/misc.h>
#include <asm/qspinlock.h>
+#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
@@ -390,15 +392,47 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
+/*
+ * Define snc_cpu[] for SNC (Sub-NUMA Cluster) CPUs.
+ *
+ * These are Intel CPUs that enumerate an LLC that is shared by
+ * multiple NUMA nodes. The LLC on these systems is shared for
+ * off-package data access but private to the NUMA node (half
+ * of the package) for on-package access.
+ *
+ * CPUID (the source of the information about the LLC) can only
+ * enumerate the cache as being shared *or* unshared, but not
+ * this particular configuration. The CPU in this case enumerates
+ * the cache to be shared across the entire package (spanning both
+ * NUMA nodes).
+ */
+
+static const struct x86_cpu_id snc_cpu[] = {
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X },
+ {}
+};
+
static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
- if (per_cpu(cpu_llc_id, cpu1) != BAD_APICID &&
- per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2))
- return topology_sane(c, o, "llc");
+ /* Do not match if we do not have a valid APICID for cpu: */
+ if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID)
+ return false;
- return false;
+ /* Do not match if LLC id does not match: */
+ if (per_cpu(cpu_llc_id, cpu1) != per_cpu(cpu_llc_id, cpu2))
+ return false;
+
+ /*
+ * Allow the SNC topology without warning. Return of false
+ * means 'c' does not share the LLC of 'o'. This will be
+ * reflected to userspace.
+ */
+ if (!topology_same_node(c, o) && x86_match_cpu(snc_cpu))
+ return false;
+
+ return topology_sane(c, o, "llc");
}
/*
@@ -456,7 +490,8 @@ static struct sched_domain_topology_level x86_topology[] = {
/*
* Set if a package/die has multiple NUMA nodes inside.
- * AMD Magny-Cours and Intel Cluster-on-Die have this.
+ * AMD Magny-Cours, Intel Cluster-on-Die, and Intel
+ * Sub-NUMA Clustering have this.
*/
static bool x86_has_numa_in_package;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index ef32297ff17e..91e6da48cbb6 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -317,7 +317,7 @@ static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
hpet2 -= hpet1;
tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
do_div(tmp, 1000000);
- do_div(deltatsc, tmp);
+ deltatsc = div64_u64(deltatsc, tmp);
return (unsigned long) deltatsc;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b58787daf9f8..1fc05e428aba 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1423,12 +1423,23 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
seg->base = 0;
}
+static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (is_guest_mode(vcpu))
+ return svm->nested.hsave->control.tsc_offset;
+
+ return vcpu->arch.tsc_offset;
+}
+
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 g_tsc_offset = 0;
if (is_guest_mode(vcpu)) {
+ /* Write L1's TSC offset. */
g_tsc_offset = svm->vmcb->control.tsc_offset -
svm->nested.hsave->control.tsc_offset;
svm->nested.hsave->control.tsc_offset = offset;
@@ -3322,6 +3333,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
/* Restore the original control entries */
copy_vmcb_control_area(vmcb, hsave);
+ svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset;
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
@@ -3482,10 +3494,12 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
/* We don't want to see VMMCALLs from a nested guest */
clr_intercept(svm, INTERCEPT_VMMCALL);
+ svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
+ svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
+
svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
svm->vmcb->control.int_state = nested_vmcb->control.int_state;
- svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
@@ -4035,12 +4049,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
struct vcpu_svm *svm = to_svm(vcpu);
switch (msr_info->index) {
- case MSR_IA32_TSC: {
- msr_info->data = svm->vmcb->control.tsc_offset +
- kvm_scale_tsc(vcpu, rdtsc());
-
- break;
- }
case MSR_STAR:
msr_info->data = svm->vmcb->save.star;
break;
@@ -4193,9 +4201,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm->vmcb->save.g_pat = data;
mark_dirty(svm->vmcb, VMCB_NPT);
break;
- case MSR_IA32_TSC:
- kvm_write_tsc(vcpu, msr);
- break;
case MSR_IA32_SPEC_CTRL:
if (!msr->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
@@ -5265,9 +5270,8 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
}
if (!ret && svm) {
- trace_kvm_pi_irte_update(svm->vcpu.vcpu_id,
- host_irq, e->gsi,
- vcpu_info.vector,
+ trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
+ e->gsi, vcpu_info.vector,
vcpu_info.pi_desc_addr, set);
}
@@ -7102,6 +7106,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.has_wbinvd_exit = svm_has_wbinvd_exit,
+ .read_l1_tsc_offset = svm_read_l1_tsc_offset,
.write_tsc_offset = svm_write_tsc_offset,
.set_tdp_cr3 = set_tdp_cr3,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aafcc9881e88..aa66ccd6ed6c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2880,18 +2880,15 @@ static void setup_msrs(struct vcpu_vmx *vmx)
vmx_update_msr_bitmap(&vmx->vcpu);
}
-/*
- * reads and returns guest's timestamp counter "register"
- * guest_tsc = (host_tsc * tsc multiplier) >> 48 + tsc_offset
- * -- Intel TSC Scaling for Virtualization White Paper, sec 1.3
- */
-static u64 guest_read_tsc(struct kvm_vcpu *vcpu)
+static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
{
- u64 host_tsc, tsc_offset;
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- host_tsc = rdtsc();
- tsc_offset = vmcs_read64(TSC_OFFSET);
- return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset;
+ if (is_guest_mode(vcpu) &&
+ (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
+ return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
+
+ return vcpu->arch.tsc_offset;
}
/*
@@ -3524,9 +3521,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
#endif
case MSR_EFER:
return kvm_get_msr_common(vcpu, msr_info);
- case MSR_IA32_TSC:
- msr_info->data = guest_read_tsc(vcpu);
- break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
@@ -3646,9 +3640,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vmcs_write64(GUEST_BNDCFGS, data);
break;
- case MSR_IA32_TSC:
- kvm_write_tsc(vcpu, msr_info);
- break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
@@ -10608,6 +10599,16 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
return true;
}
+static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
+ !page_address_valid(vcpu, vmcs12->apic_access_addr))
+ return -EINVAL;
+ else
+ return 0;
+}
+
static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
@@ -11176,11 +11177,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
}
- if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
- vmcs_write64(TSC_OFFSET,
- vcpu->arch.tsc_offset + vmcs12->tsc_offset);
- else
- vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
+ vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
+
if (kvm_has_tsc_control)
decache_tsc_multiplier(vmx);
@@ -11299,6 +11297,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ if (nested_vmx_check_apic_access_controls(vcpu, vmcs12))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
@@ -11420,6 +11421,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
u32 msr_entry_idx;
u32 exit_qual;
+ int r;
enter_guest_mode(vcpu);
@@ -11429,26 +11431,21 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
vmx_segment_cache_clear(vmx);
- if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
- leave_guest_mode(vcpu);
- vmx_switch_vmcs(vcpu, &vmx->vmcs01);
- nested_vmx_entry_failure(vcpu, vmcs12,
- EXIT_REASON_INVALID_STATE, exit_qual);
- return 1;
- }
+ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
+ vcpu->arch.tsc_offset += vmcs12->tsc_offset;
+
+ r = EXIT_REASON_INVALID_STATE;
+ if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual))
+ goto fail;
nested_get_vmcs12_pages(vcpu, vmcs12);
+ r = EXIT_REASON_MSR_LOAD_FAIL;
msr_entry_idx = nested_vmx_load_msr(vcpu,
vmcs12->vm_entry_msr_load_addr,
vmcs12->vm_entry_msr_load_count);
- if (msr_entry_idx) {
- leave_guest_mode(vcpu);
- vmx_switch_vmcs(vcpu, &vmx->vmcs01);
- nested_vmx_entry_failure(vcpu, vmcs12,
- EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx);
- return 1;
- }
+ if (msr_entry_idx)
+ goto fail;
/*
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
@@ -11457,6 +11454,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
* the success flag) when L2 exits (see nested_vmx_vmexit()).
*/
return 0;
+
+fail:
+ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
+ vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
+ leave_guest_mode(vcpu);
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
+ nested_vmx_entry_failure(vcpu, vmcs12, r, exit_qual);
+ return 1;
}
/*
@@ -12028,6 +12033,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
leave_guest_mode(vcpu);
+ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
+ vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
+
if (likely(!vmx->fail)) {
if (exit_reason == -1)
sync_vmcs12(vcpu, vmcs12);
@@ -12224,10 +12232,16 @@ static inline int u64_shl_div_u64(u64 a, unsigned int shift,
static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- u64 tscl = rdtsc();
- u64 guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
- u64 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
+ struct vcpu_vmx *vmx;
+ u64 tscl, guest_tscl, delta_tsc;
+
+ if (kvm_mwait_in_guest(vcpu->kvm))
+ return -EOPNOTSUPP;
+
+ vmx = to_vmx(vcpu);
+ tscl = rdtsc();
+ guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
+ delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
/* Convert to host delta tsc if tsc scaling is enabled */
if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
@@ -12533,7 +12547,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
vcpu_info.vector = irq.vector;
- trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi,
+ trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
vcpu_info.vector, vcpu_info.pi_desc_addr, set);
if (set)
@@ -12712,6 +12726,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
+ .read_l1_tsc_offset = vmx_read_l1_tsc_offset,
.write_tsc_offset = vmx_write_tsc_offset,
.set_tdp_cr3 = vmx_set_cr3,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b2ff74b12ec4..51ecd381793b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1490,7 +1490,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
{
- u64 curr_offset = vcpu->arch.tsc_offset;
+ u64 curr_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
}
@@ -1532,7 +1532,9 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
- return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
+ u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
+
+ return tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
}
EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
@@ -2362,6 +2364,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vcpu->arch.smbase = data;
break;
+ case MSR_IA32_TSC:
+ kvm_write_tsc(vcpu, msr_info);
+ break;
case MSR_SMI_COUNT:
if (!msr_info->host_initiated)
return 1;
@@ -2605,6 +2610,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_UCODE_REV:
msr_info->data = vcpu->arch.microcode_version;
break;
+ case MSR_IA32_TSC:
+ msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
+ break;
case MSR_MTRRcap:
case 0x200 ... 0x2ff:
return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
@@ -2819,7 +2827,8 @@ out:
static inline bool kvm_can_mwait_in_guest(void)
{
return boot_cpu_has(X86_FEATURE_MWAIT) &&
- !boot_cpu_has_bug(X86_BUG_MONITOR);
+ !boot_cpu_has_bug(X86_BUG_MONITOR) &&
+ boot_cpu_has(X86_FEATURE_ARAT);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 62a7e9f65dec..cc7ff5957194 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
+#include <linux/highmem.h>
#include <asm/pgtable.h>
@@ -334,16 +335,16 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
pgprotval_t eff_in, unsigned long P)
{
int i;
- pte_t *start;
+ pte_t *pte;
pgprotval_t prot, eff;
- start = (pte_t *)pmd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PTE; i++) {
- prot = pte_flags(*start);
- eff = effective_prot(eff_in, prot);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
+ pte = pte_offset_map(&addr, st->current_address);
+ prot = pte_flags(*pte);
+ eff = effective_prot(eff_in, prot);
note_page(m, st, __pgprot(prot), eff, 5);
- start++;
+ pte_unmap(pte);
}
}
#ifdef CONFIG_KASAN
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 48b14b534897..ccf4a49bb065 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -98,7 +98,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
set_pgd(pgd + pgd_index(restore_jump_address), new_pgd);
} else {
/* No p4d for 4-level paging: point the pgd to the pud page table */
- pgd_t new_pgd = __pgd(__pa(p4d) | pgprot_val(pgtable_prot));
+ pgd_t new_pgd = __pgd(__pa(pud) | pgprot_val(pgtable_prot));
set_pgd(pgd + pgd_index(restore_jump_address), new_pgd);
}