diff options
Diffstat (limited to 'arch/x86')
34 files changed, 219 insertions, 140 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 4430dd489620..5851411e60fb 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -179,7 +179,8 @@ ifdef CONFIG_JUMP_LABEL endif ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1) - KBUILD_CFLAGS += -maccumulate-outgoing-args + # This compiler flag is not supported by Clang: + KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,) endif # Stackpointer is addressed different for 32 bit and 64 bit x86 diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h index 2e59dac07f9e..d732e608e3af 100644 --- a/arch/x86/boot/compressed/error.h +++ b/arch/x86/boot/compressed/error.h @@ -1,7 +1,9 @@ #ifndef BOOT_COMPRESSED_ERROR_H #define BOOT_COMPRESSED_ERROR_H +#include <linux/compiler.h> + void warn(char *m); -void error(char *m); +void error(char *m) __noreturn; #endif /* BOOT_COMPRESSED_ERROR_H */ diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index 56589d0a804b..1d78f1739087 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -70,7 +70,7 @@ static unsigned long level4p; * Due to relocation, pointers must be assigned at run time not build time. */ static struct x86_mapping_info mapping_info = { - .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, + .page_flag = __PAGE_KERNEL_LARGE_EXEC, }; /* Locates and clears a region for a new top level page table. */ diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 9d05c7e67f60..a45e2114a846 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -761,7 +761,7 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsx_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 7acb51c49fec..7a9df3beb89b 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -32,6 +32,7 @@ #define _ASM_ADD __ASM_SIZE(add) #define _ASM_SUB __ASM_SIZE(sub) #define _ASM_XADD __ASM_SIZE(xadd) +#define _ASM_MUL __ASM_SIZE(mul) #define _ASM_AX __ASM_REG(ax) #define _ASM_BX __ASM_REG(bx) diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 737da62bfeb0..474eb8c66fee 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -4,8 +4,9 @@ struct x86_mapping_info { void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ void *context; /* context for alloc_pgt_page */ - unsigned long pmd_flag; /* page flag for PMD entry */ + unsigned long page_flag; /* page flag for PMD or PUD entry */ unsigned long offset; /* ident mapping offset */ + bool direct_gbpages; /* PUD level 1GB page support */ }; int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f5bddf92faba..9c761fea0c98 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1020,6 +1020,8 @@ struct kvm_x86_ops { void (*enable_log_dirty_pt_masked)(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t offset, unsigned long mask); + int (*write_log_dirty)(struct kvm_vcpu *vcpu); + /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index d5a22bac9988..0ff8fe71b255 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -98,7 +98,7 @@ static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, if (bytes < 8) { if (!IS_ALIGNED(dest, 4) || (bytes != 4)) - arch_wb_cache_pmem(addr, 1); + arch_wb_cache_pmem(addr, bytes); } else { if (!IS_ALIGNED(dest, 8)) { dest = ALIGN(dest, boot_cpu_data.x86_clflush_size); diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild index 3dec769cadf7..83b6e9a0dce4 100644 --- a/arch/x86/include/uapi/asm/Kbuild +++ b/arch/x86/include/uapi/asm/Kbuild @@ -4,62 +4,3 @@ include include/uapi/asm-generic/Kbuild.asm genhdr-y += unistd_32.h genhdr-y += unistd_64.h genhdr-y += unistd_x32.h -header-y += a.out.h -header-y += auxvec.h -header-y += bitsperlong.h -header-y += boot.h -header-y += bootparam.h -header-y += byteorder.h -header-y += debugreg.h -header-y += e820.h -header-y += errno.h -header-y += fcntl.h -header-y += hw_breakpoint.h -header-y += hyperv.h -header-y += ioctl.h -header-y += ioctls.h -header-y += ipcbuf.h -header-y += ist.h -header-y += kvm.h -header-y += kvm_para.h -header-y += kvm_perf.h -header-y += ldt.h -header-y += mce.h -header-y += mman.h -header-y += msgbuf.h -header-y += msr-index.h -header-y += msr.h -header-y += mtrr.h -header-y += param.h -header-y += perf_regs.h -header-y += poll.h -header-y += posix_types.h -header-y += posix_types_32.h -header-y += posix_types_64.h -header-y += posix_types_x32.h -header-y += prctl.h -header-y += processor-flags.h -header-y += ptrace-abi.h -header-y += ptrace.h -header-y += resource.h -header-y += sembuf.h -header-y += setup.h -header-y += shmbuf.h -header-y += sigcontext.h -header-y += sigcontext32.h -header-y += siginfo.h -header-y += signal.h -header-y += socket.h -header-y += sockios.h -header-y += stat.h -header-y += statfs.h -header-y += svm.h -header-y += swab.h -header-y += termbits.h -header-y += termios.h -header-y += types.h -header-y += ucontext.h -header-y += unistd.h -header-y += vm86.h -header-y += vmx.h -header-y += vsyscall.h diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ee8f11800295..bb5abe8f5fd4 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -799,8 +799,9 @@ static void init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); - /* AMD CPUs don't reset SS attributes on SYSRET */ - set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ + if (!cpu_has(c, X86_FEATURE_XENPV)) + set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 7889ae492af0..45db4d2ebd01 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -10,7 +10,7 @@ * Author: Peter Oruba <peter.oruba@amd.com> * * Based on work by: - * Tigran Aivazian <tigran@aivazian.fsnet.co.uk> + * Tigran Aivazian <aivazian.tigran@gmail.com> * * early loader: * Copyright (C) 2013 Advanced Micro Devices, Inc. @@ -352,8 +352,6 @@ void reload_ucode_amd(void) u32 rev, dummy; mc = (struct microcode_amd *)amd_ucode_patch; - if (!mc) - return; rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index b4a4cd39b358..e53d3c909840 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -1,7 +1,7 @@ /* * CPU Microcode Update Driver for Linux * - * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> + * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com> * 2006 Shaohua Li <shaohua.li@intel.com> * 2013-2016 Borislav Petkov <bp@alien8.de> * diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 8325d8a09ab0..afdfd237b59f 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -1,7 +1,7 @@ /* * Intel CPU Microcode Update Driver for Linux * - * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> + * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com> * 2006 Shaohua Li <shaohua.li@intel.com> * * Intel CPU microcode early update for Linux diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index be22f5a2192e..4e3b8a587c88 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -418,6 +418,7 @@ struct legacy_pic default_legacy_pic = { }; struct legacy_pic *legacy_pic = &default_legacy_pic; +EXPORT_SYMBOL(legacy_pic); static int __init i8259A_init_ops(void) { diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index ce640428d6fe..6f5ca4ebe6e5 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -114,7 +114,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) struct x86_mapping_info info = { .alloc_pgt_page = alloc_pgt_page, .context = image, - .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, + .page_flag = __PAGE_KERNEL_LARGE_EXEC, }; unsigned long mstart, mend; pgd_t *level4p; @@ -123,6 +123,10 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) level4p = (pgd_t *)__va(start_pgtable); clear_page(level4p); + + if (direct_gbpages) + info.direct_gbpages = true; + for (i = 0; i < nr_pfn_mapped; i++) { mstart = pfn_mapped[i].start << PAGE_SHIFT; mend = pfn_mapped[i].end << PAGE_SHIFT; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 603a1669a2ec..0b4d3c686b1e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1225,6 +1225,21 @@ void __init setup_arch(char **cmdline_p) kasan_init(); +#ifdef CONFIG_X86_32 + /* sync back kernel address range */ + clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); + + /* + * sync back low identity map too. It is used for example + * in the 32-bit EFI stub. + */ + clone_pgd_range(initial_page_table, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); +#endif + tboot_probe(); map_vsyscall(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index bb1e8cc0bc84..10edd1e69a68 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -291,11 +291,11 @@ void __init setup_per_cpu_areas(void) #ifdef CONFIG_X86_32 /* - * Sync back kernel address range. We want to make sure that - * all kernel mappings, including percpu mappings, are available - * in the smpboot asm. We can't reliably pick up percpu - * mappings using vmalloc_fault(), because exception dispatch - * needs percpu data. + * Sync back kernel address range again. We already did this in + * setup_arch(), but percpu data also needs to be available in + * the smpboot asm. We can't reliably pick up percpu mappings + * using vmalloc_fault(), because exception dispatch needs + * percpu data. */ clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, swapper_pg_dir + KERNEL_PGD_BOUNDARY, diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index d4c8011a2293..4b1724059909 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -514,6 +514,9 @@ int tboot_force_iommu(void) if (!tboot_enabled()) return 0; + if (!intel_iommu_tboot_noforce) + return 1; + if (no_iommu || swiotlb || dmar_disabled) pr_warning("Forcing Intel-IOMMU to enabled\n"); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 558676538fca..5d3376f67794 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1498,6 +1498,21 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } +/** + * kvm_arch_write_log_dirty - emulate dirty page logging + * @vcpu: Guest mode vcpu + * + * Emulate arch specific page modification logging for the + * nested hypervisor + */ +int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu) +{ + if (kvm_x86_ops->write_log_dirty) + return kvm_x86_ops->write_log_dirty(vcpu); + + return 0; +} + bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index d8ccb32f7308..27975807cc64 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -202,4 +202,5 @@ void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn); +int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 314d2071b337..56241746abbd 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -226,6 +226,10 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, if (level == walker->level && write_fault && !(pte & PT_GUEST_DIRTY_MASK)) { trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); +#if PTTYPE == PTTYPE_EPT + if (kvm_arch_write_log_dirty(vcpu)) + return -EINVAL; +#endif pte |= PT_GUEST_DIRTY_MASK; } if (pte == orig_pte) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c5fd459c4043..c6f4ad44aa95 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -248,6 +248,7 @@ struct __packed vmcs12 { u64 xss_exit_bitmap; u64 guest_physical_address; u64 vmcs_link_pointer; + u64 pml_address; u64 guest_ia32_debugctl; u64 guest_ia32_pat; u64 guest_ia32_efer; @@ -369,6 +370,7 @@ struct __packed vmcs12 { u16 guest_ldtr_selector; u16 guest_tr_selector; u16 guest_intr_status; + u16 guest_pml_index; u16 host_es_selector; u16 host_cs_selector; u16 host_ss_selector; @@ -407,6 +409,7 @@ struct nested_vmx { /* Has the level1 guest done vmxon? */ bool vmxon; gpa_t vmxon_ptr; + bool pml_full; /* The guest-physical address of the current VMCS L1 keeps for L2 */ gpa_t current_vmptr; @@ -742,6 +745,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector), FIELD(GUEST_TR_SELECTOR, guest_tr_selector), FIELD(GUEST_INTR_STATUS, guest_intr_status), + FIELD(GUEST_PML_INDEX, guest_pml_index), FIELD(HOST_ES_SELECTOR, host_es_selector), FIELD(HOST_CS_SELECTOR, host_cs_selector), FIELD(HOST_SS_SELECTOR, host_ss_selector), @@ -767,6 +771,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), + FIELD64(PML_ADDRESS, pml_address), FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl), FIELD64(GUEST_IA32_PAT, guest_ia32_pat), FIELD64(GUEST_IA32_EFER, guest_ia32_efer), @@ -1314,6 +1319,11 @@ static inline bool report_flexpriority(void) return flexpriority_enabled; } +static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu) +{ + return vmx_misc_cr3_count(to_vmx(vcpu)->nested.nested_vmx_misc_low); +} + static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) { return vmcs12->cpu_based_vm_exec_control & bit; @@ -1348,6 +1358,11 @@ static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12) vmx_xsaves_supported(); } +static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12) +{ + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML); +} + static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12) { return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); @@ -2751,8 +2766,11 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT | VMX_EPT_1GB_PAGE_BIT; - if (enable_ept_ad_bits) + if (enable_ept_ad_bits) { + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_ENABLE_PML; vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT; + } } else vmx->nested.nested_vmx_ept_caps = 0; @@ -8114,7 +8132,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_PREEMPTION_TIMER: return false; case EXIT_REASON_PML_FULL: - /* We don't expose PML support to L1. */ + /* We emulate PML support to L1. */ return false; default: return true; @@ -9364,13 +9382,20 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exit_reason; + unsigned long exit_qualification = vcpu->arch.exit_qualification; - if (fault->error_code & PFERR_RSVD_MASK) + if (vmx->nested.pml_full) { + exit_reason = EXIT_REASON_PML_FULL; + vmx->nested.pml_full = false; + exit_qualification &= INTR_INFO_UNBLOCK_NMI; + } else if (fault->error_code & PFERR_RSVD_MASK) exit_reason = EXIT_REASON_EPT_MISCONFIG; else exit_reason = EXIT_REASON_EPT_VIOLATION; - nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification); + + nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification); vmcs12->guest_physical_address = fault->address; } @@ -9713,6 +9738,22 @@ static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu, return 0; } +static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + u64 address = vmcs12->pml_address; + int maxphyaddr = cpuid_maxphyaddr(vcpu); + + if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) { + if (!nested_cpu_has_ept(vmcs12) || + !IS_ALIGNED(address, 4096) || + address >> maxphyaddr) + return -EINVAL; + } + + return 0; +} + static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu, struct vmx_msr_entry *e) { @@ -9886,7 +9927,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, bool from_vmentry, u32 *entry_failure_code) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 exec_control; + u32 exec_control, vmcs12_exec_ctrl; vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); @@ -10017,8 +10058,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_APIC_REGISTER_VIRT); if (nested_cpu_has(vmcs12, - CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) - exec_control |= vmcs12->secondary_vm_exec_control; + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) { + vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control & + ~SECONDARY_EXEC_ENABLE_PML; + exec_control |= vmcs12_exec_ctrl; + } if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { vmcs_write64(EOI_EXIT_BITMAP0, @@ -10248,6 +10292,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_vmx_check_pml_controls(vcpu, vmcs12)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, vmx->nested.nested_vmx_procbased_ctls_low, vmx->nested.nested_vmx_procbased_ctls_high) || @@ -10266,6 +10313,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmx->nested.nested_vmx_entry_ctls_high)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) || !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || !nested_cr3_valid(vcpu, vmcs12->host_cr3)) @@ -11143,6 +11193,46 @@ static void vmx_flush_log_dirty(struct kvm *kvm) kvm_flush_pml_buffers(kvm); } +static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12; + struct vcpu_vmx *vmx = to_vmx(vcpu); + gpa_t gpa; + struct page *page = NULL; + u64 *pml_address; + + if (is_guest_mode(vcpu)) { + WARN_ON_ONCE(vmx->nested.pml_full); + + /* + * Check if PML is enabled for the nested guest. + * Whether eptp bit 6 is set is already checked + * as part of A/D emulation. + */ + vmcs12 = get_vmcs12(vcpu); + if (!nested_cpu_has_pml(vmcs12)) + return 0; + + if (vmcs12->guest_pml_index > PML_ENTITY_NUM) { + vmx->nested.pml_full = true; + return 1; + } + + gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; + + page = nested_get_page(vcpu, vmcs12->pml_address); + if (!page) + return 0; + + pml_address = kmap(page); + pml_address[vmcs12->guest_pml_index--] = gpa; + kunmap(page); + nested_release_page_clean(page); + } + + return 0; +} + static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t offset, unsigned long mask) @@ -11502,6 +11592,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .slot_disable_log_dirty = vmx_slot_disable_log_dirty, .flush_log_dirty = vmx_flush_log_dirty, .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, + .write_log_dirty = vmx_write_pml_buffer, .pre_block = vmx_pre_block, .post_block = vmx_post_block, diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index 7e48807b2fa1..45a53dfe1859 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S @@ -55,7 +55,7 @@ ENTRY(csum_partial_copy_generic) movq %r12, 3*8(%rsp) movq %r14, 4*8(%rsp) movq %r13, 5*8(%rsp) - movq %rbp, 6*8(%rsp) + movq %r15, 6*8(%rsp) movq %r8, (%rsp) movq %r9, 1*8(%rsp) @@ -74,7 +74,7 @@ ENTRY(csum_partial_copy_generic) /* main loop. clear in 64 byte blocks */ /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ /* r11: temp3, rdx: temp4, r12 loopcnt */ - /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ + /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */ .p2align 4 .Lloop: source @@ -89,7 +89,7 @@ ENTRY(csum_partial_copy_generic) source movq 32(%rdi), %r10 source - movq 40(%rdi), %rbp + movq 40(%rdi), %r15 source movq 48(%rdi), %r14 source @@ -103,7 +103,7 @@ ENTRY(csum_partial_copy_generic) adcq %r11, %rax adcq %rdx, %rax adcq %r10, %rax - adcq %rbp, %rax + adcq %r15, %rax adcq %r14, %rax adcq %r13, %rax @@ -121,7 +121,7 @@ ENTRY(csum_partial_copy_generic) dest movq %r10, 32(%rsi) dest - movq %rbp, 40(%rsi) + movq %r15, 40(%rsi) dest movq %r14, 48(%rsi) dest @@ -203,7 +203,7 @@ ENTRY(csum_partial_copy_generic) movq 3*8(%rsp), %r12 movq 4*8(%rsp), %r14 movq 5*8(%rsp), %r13 - movq 6*8(%rsp), %rbp + movq 6*8(%rsp), %r15 addq $7*8, %rsp ret diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c index 5761a4f19455..ab2d1d73e9e7 100644 --- a/arch/x86/lib/kaslr.c +++ b/arch/x86/lib/kaslr.c @@ -5,6 +5,7 @@ * kernel starts. This file is included in the compressed kernel and * normally linked in the regular. */ +#include <asm/asm.h> #include <asm/kaslr.h> #include <asm/msr.h> #include <asm/archrandom.h> @@ -79,7 +80,7 @@ unsigned long kaslr_get_random_long(const char *purpose) } /* Circular multiply for better bit diffusion */ - asm("mul %3" + asm(_ASM_MUL "%3" : "=a" (random), "=d" (raw) : "a" (random), "rm" (mix_const)); random += raw; diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index 04210a29dd60..adab1595f4bd 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -13,7 +13,7 @@ static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page, if (pmd_present(*pmd)) continue; - set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag)); + set_pmd(pmd, __pmd((addr - info->offset) | info->page_flag)); } } @@ -30,6 +30,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, if (next > end) next = end; + if (info->direct_gbpages) { + pud_t pudval; + + if (pud_present(*pud)) + continue; + + addr &= PUD_MASK; + pudval = __pud((addr - info->offset) | info->page_flag); + set_pud(pud, pudval); + continue; + } + if (pud_present(*pud)) { pmd = pmd_offset(pud, 0); ident_pmd_init(info, pmd, addr, next); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 41270b96403d..95651dc58e09 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -94,10 +94,10 @@ __setup("noexec32=", nonx32_setup); */ void sync_global_pgds(unsigned long start, unsigned long end) { - unsigned long address; + unsigned long addr; - for (address = start; address <= end; address += PGDIR_SIZE) { - pgd_t *pgd_ref = pgd_offset_k(address); + for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) { + pgd_t *pgd_ref = pgd_offset_k(addr); const p4d_t *p4d_ref; struct page *page; @@ -106,7 +106,7 @@ void sync_global_pgds(unsigned long start, unsigned long end) * handle synchonization on p4d level. */ BUILD_BUG_ON(pgd_none(*pgd_ref)); - p4d_ref = p4d_offset(pgd_ref, address); + p4d_ref = p4d_offset(pgd_ref, addr); if (p4d_none(*p4d_ref)) continue; @@ -117,8 +117,8 @@ void sync_global_pgds(unsigned long start, unsigned long end) p4d_t *p4d; spinlock_t *pgt_lock; - pgd = (pgd_t *)page_address(page) + pgd_index(address); - p4d = p4d_offset(pgd, address); + pgd = (pgd_t *)page_address(page) + pgd_index(addr); + p4d = p4d_offset(pgd, addr); /* the pgt_lock only for Xen */ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 6b7ce6279133..aca6295350f3 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -100,5 +100,6 @@ void __init initmem_init(void) printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); + __vmalloc_start_set = true; setup_bootmem_allocator(); } diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 38868adf07ea..f6ae6830b341 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -9,7 +9,7 @@ #include <linux/mmiotrace.h> static unsigned long mmio_address; -module_param(mmio_address, ulong, 0); +module_param_hw(mmio_address, ulong, iomem, 0); MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " "(or 8 MB if read_far is non-zero)."); diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 6a61194ffd58..a6e21fee22ea 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -104,7 +104,7 @@ static int set_up_temporary_mappings(void) { struct x86_mapping_info info = { .alloc_pgt_page = alloc_pgt_page, - .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, + .page_flag = __PAGE_KERNEL_LARGE_EXEC, .offset = __PAGE_OFFSET, }; unsigned long mstart, mend; diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c index a5c9910d234f..09a085bde0d4 100644 --- a/arch/x86/um/ptrace_64.c +++ b/arch/x86/um/ptrace_64.c @@ -125,7 +125,7 @@ int poke_user(struct task_struct *child, long addr, long data) else if ((addr >= offsetof(struct user, u_debugreg[0])) && (addr <= offsetof(struct user, u_debugreg[7]))) { addr -= offsetof(struct user, u_debugreg[0]); - addr = addr >> 2; + addr = addr >> 3; if ((addr == 4) || (addr == 5)) return -EIO; child->thread.arch.debugregs[addr] = data; diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h index 46a9df99f3c5..7e1d35b6ad5c 100644 --- a/arch/x86/um/shared/sysdep/kernel-offsets.h +++ b/arch/x86/um/shared/sysdep/kernel-offsets.h @@ -2,16 +2,9 @@ #include <linux/sched.h> #include <linux/elf.h> #include <linux/crypto.h> +#include <linux/kbuild.h> #include <asm/mman.h> -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - -#define OFFSET(sym, str, mem) \ - DEFINE(sym, offsetof(struct str, mem)); - void foo(void) { #include <common-offsets.h> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index a732bc2b9dfc..7cd442690f9d 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -277,31 +277,19 @@ static bool __init xen_check_mwait(void) static bool __init xen_check_xsave(void) { - unsigned int err, eax, edx; + unsigned int cx, xsave_mask; - /* - * Xen 4.0 and older accidentally leaked the host XSAVE flag into guest - * view, despite not being able to support guests using the - * functionality. Probe for the actual availability of XSAVE by seeing - * whether xgetbv executes successfully or raises #UD. - */ - asm volatile("1: .byte 0x0f,0x01,0xd0\n\t" /* xgetbv */ - "xor %[err], %[err]\n" - "2:\n\t" - ".pushsection .fixup,\"ax\"\n\t" - "3: movl $1,%[err]\n\t" - "jmp 2b\n\t" - ".popsection\n\t" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err), "=a" (eax), "=d" (edx) - : "c" (0)); - - return err == 0; + cx = cpuid_ecx(1); + + xsave_mask = (1 << (X86_FEATURE_XSAVE % 32)) | + (1 << (X86_FEATURE_OSXSAVE % 32)); + + /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ + return (cx & xsave_mask) == xsave_mask; } static void __init xen_init_capabilities(void) { - setup_clear_cpu_cap(X86_BUG_SYSRET_SS_ATTRS); setup_force_cpu_cap(X86_FEATURE_XENPV); setup_clear_cpu_cap(X86_FEATURE_DCA); setup_clear_cpu_cap(X86_FEATURE_APERFMPERF); @@ -317,10 +305,7 @@ static void __init xen_init_capabilities(void) else setup_clear_cpu_cap(X86_FEATURE_MWAIT); - if (xen_check_xsave()) { - setup_force_cpu_cap(X86_FEATURE_XSAVE); - setup_force_cpu_cap(X86_FEATURE_OSXSAVE); - } else { + if (!xen_check_xsave()) { setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_OSXSAVE); } @@ -988,6 +973,13 @@ void xen_setup_shared_info(void) #endif xen_setup_mfn_list_list(); + + /* + * Now that shared info is set up we can start using routines that + * point to pvclock area. + */ + if (system_state == SYSTEM_BOOTING) + xen_init_time_ops(); } /* This is called once we have the cpu_possible_mask */ @@ -1286,8 +1278,6 @@ asmlinkage __visible void __init xen_start_kernel(void) x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner; - xen_init_time_ops(); - /* * Set up some pagetable state before starting to set any ptes. */ diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 9d9ae6650aa1..7397d8b8459d 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2025,7 +2025,8 @@ static unsigned long __init xen_read_phys_ulong(phys_addr_t addr) /* * Translate a virtual address to a physical one without relying on mapped - * page tables. + * page tables. Don't rely on big pages being aligned in (guest) physical + * space! */ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) { @@ -2046,7 +2047,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) sizeof(pud))); if (!pud_present(pud)) return 0; - pa = pud_pfn(pud) << PAGE_SHIFT; + pa = pud_val(pud) & PTE_PFN_MASK; if (pud_large(pud)) return pa + (vaddr & ~PUD_MASK); @@ -2054,7 +2055,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) sizeof(pmd))); if (!pmd_present(pmd)) return 0; - pa = pmd_pfn(pmd) << PAGE_SHIFT; + pa = pmd_val(pmd) & PTE_PFN_MASK; if (pmd_large(pmd)) return pa + (vaddr & ~PMD_MASK); diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 090c7eb4dca9..a1895a8e85c1 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -406,7 +406,7 @@ static void __init xen_time_init(void) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); } -void __init xen_init_time_ops(void) +void __ref xen_init_time_ops(void) { pv_time_ops = xen_time_ops; |