diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/crypto/ghash-clmulni-intel_glue.c | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/e820.h | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/head_32.S | 4 | ||||
-rw-r--r-- | arch/x86/kernel/resource.c | 48 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 5 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 11 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 5 | ||||
-rw-r--r-- | arch/x86/lguest/boot.c | 16 | ||||
-rw-r--r-- | arch/x86/lguest/i386_head.S | 105 | ||||
-rw-r--r-- | arch/x86/pci/i386.c | 18 |
14 files changed, 188 insertions, 36 deletions
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index cbcc8d8ea93a..7a6e68e4f748 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -10,6 +10,7 @@ * by the Free Software Foundation. */ +#include <linux/err.h> #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 5be1542fbfaf..e99d55d74df5 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -72,6 +72,9 @@ struct e820map { #define BIOS_BEGIN 0x000a0000 #define BIOS_END 0x00100000 +#define BIOS_ROM_BASE 0xffe00000 +#define BIOS_ROM_END 0xffffffff + #ifdef __KERNEL__ /* see comment in arch/x86/kernel/e820.c */ extern struct e820map e820; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9e6fe391094e..f702f82aa1eb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -79,7 +79,7 @@ #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 -#define KVM_MAX_CPUID_ENTRIES 40 +#define KVM_MAX_CPUID_ENTRIES 80 #define KVM_NR_FIXED_MTRR_REGION 88 #define KVM_NR_VAR_MTRR 8 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9e13763b6092..1e994754d323 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -45,6 +45,7 @@ obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o io_delay.o rtc.o obj-y += pci-iommu_table.o +obj-y += resource.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index d7cdf5bc1e63..c0dbd9ac24f0 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -622,13 +622,13 @@ ENTRY(initial_code) __PAGE_ALIGNED_BSS .align PAGE_SIZE_asm #ifdef CONFIG_X86_PAE -initial_pg_pmd: +ENTRY(initial_pg_pmd) .fill 1024*KPMDS,4,0 #else ENTRY(initial_page_table) .fill 1024,4,0 #endif -initial_pg_fixmap: +ENTRY(initial_pg_fixmap) .fill 1024,4,0 ENTRY(empty_zero_page) .fill 4096,1,0 diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c new file mode 100644 index 000000000000..2a26819bb6a8 --- /dev/null +++ b/arch/x86/kernel/resource.c @@ -0,0 +1,48 @@ +#include <linux/ioport.h> +#include <asm/e820.h> + +static void resource_clip(struct resource *res, resource_size_t start, + resource_size_t end) +{ + resource_size_t low = 0, high = 0; + + if (res->end < start || res->start > end) + return; /* no conflict */ + + if (res->start < start) + low = start - res->start; + + if (res->end > end) + high = res->end - end; + + /* Keep the area above or below the conflict, whichever is larger */ + if (low > high) + res->end = start - 1; + else + res->start = end + 1; +} + +static void remove_e820_regions(struct resource *avail) +{ + int i; + struct e820entry *entry; + + for (i = 0; i < e820.nr_map; i++) { + entry = &e820.map[i]; + + resource_clip(avail, entry->addr, + entry->addr + entry->size - 1); + } +} + +void arch_remove_reservations(struct resource *avail) +{ + /* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */ + if (avail->flags & IORESOURCE_MEM) { + if (avail->start < BIOS_END) + avail->start = BIOS_END; + resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END); + + remove_e820_regions(avail); + } +} diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 21c6746338af..85268f8eadf6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -769,7 +769,6 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.arch_setup(); - resource_alloc_from_bottom = 0; iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; setup_memory_map(); parse_setup_data(); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1ca12298ffc7..b81a9b7c2ca4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3494,6 +3494,10 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { switch (func) { + case 0x00000001: + /* Mask out xsave bit as long as it is not supported by SVM */ + entry->ecx &= ~(bit(X86_FEATURE_XSAVE)); + break; case 0x80000001: if (nested) entry->ecx |= (1 << 2); /* Set SVM bit */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ff21fdda0c53..81fcbe9515c5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4227,11 +4227,6 @@ static int vmx_get_lpage_level(void) return PT_PDPE_LEVEL; } -static inline u32 bit(int bitno) -{ - return 1 << (bitno & 31); -} - static void vmx_cpuid_update(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cdac9e592aa5..b989e1f1e5d3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -155,11 +155,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { u64 __read_mostly host_xcr0; -static inline u32 bit(int bitno) -{ - return 1 << (bitno & 31); -} - static void kvm_on_user_return(struct user_return_notifier *urn) { unsigned slot; @@ -4569,9 +4564,11 @@ static void kvm_timer_init(void) #ifdef CONFIG_CPU_FREQ struct cpufreq_policy policy; memset(&policy, 0, sizeof(policy)); - cpufreq_get_policy(&policy, get_cpu()); + cpu = get_cpu(); + cpufreq_get_policy(&policy, cpu); if (policy.cpuinfo.max_freq) max_tsc_khz = policy.cpuinfo.max_freq; + put_cpu(); #endif cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); @@ -5522,6 +5519,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; kvm_x86_ops->set_cr4(vcpu, sregs->cr4); + if (sregs->cr4 & X86_CR4_OSXSAVE) + update_cpuid(vcpu); if (!is_long_mode(vcpu) && is_pae(vcpu)) { load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); mmu_reset_needed = 1; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 2cea414489f3..c600da830ce0 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -70,6 +70,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu) return kvm_read_cr0_bits(vcpu, X86_CR0_PG); } +static inline u32 bit(int bitno) +{ + return 1 << (bitno & 31); +} + void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 73b1e1a1f489..4996cf5f73a0 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -531,7 +531,10 @@ static void lguest_write_cr3(unsigned long cr3) { lguest_data.pgdir = cr3; lazy_hcall1(LHCALL_NEW_PGTABLE, cr3); - cr3_changed = true; + + /* These two page tables are simple, linear, and used during boot */ + if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table)) + cr3_changed = true; } static unsigned long lguest_read_cr3(void) @@ -703,9 +706,9 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) * to forget all of them. Fortunately, this is very rare. * * ... except in early boot when the kernel sets up the initial pagetables, - * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell - * the Host anything changed until we've done the first page table switch, - * which brings boot back to 0.25 seconds. + * which makes booting astonishingly slow: 48 seconds! So we don't even tell + * the Host anything changed until we've done the first real page table switch, + * which brings boot back to 4.3 seconds. */ static void lguest_set_pte(pte_t *ptep, pte_t pteval) { @@ -1002,7 +1005,7 @@ static void lguest_time_init(void) clockevents_register_device(&lguest_clockevent); /* Finally, we unblock the timer interrupt. */ - enable_lguest_irq(0); + clear_bit(0, lguest_data.blocked_interrupts); } /* @@ -1349,9 +1352,6 @@ __init void lguest_init(void) */ switch_to_new_gdt(0); - /* We actually boot with all memory mapped, but let's say 128MB. */ - max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; - /* * The Host<->Guest Switcher lives at the top of our address space, and * the Host told us how big it is when we made LGUEST_INIT hypercall: diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 4f420c2f2d55..e7d5382ef263 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -4,6 +4,7 @@ #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/processor-flags.h> +#include <asm/pgtable.h> /*G:020 * Our story starts with the kernel booting into startup_32 in @@ -37,9 +38,113 @@ ENTRY(lguest_entry) /* Set up the initial stack so we can run C code. */ movl $(init_thread_union+THREAD_SIZE),%esp + call init_pagetables + /* Jumps are relative: we're running __PAGE_OFFSET too low. */ jmp lguest_init+__PAGE_OFFSET +/* + * Initialize page tables. This creates a PDE and a set of page + * tables, which are located immediately beyond __brk_base. The variable + * _brk_end is set up to point to the first "safe" location. + * Mappings are created both at virtual address 0 (identity mapping) + * and PAGE_OFFSET for up to _end. + * + * FIXME: This code is taken verbatim from arch/x86/kernel/head_32.S: they + * don't have a stack at this point, so we can't just use call and ret. + */ +init_pagetables: +#if PTRS_PER_PMD > 1 +#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) +#else +#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) +#endif +#define pa(X) ((X) - __PAGE_OFFSET) + +/* Enough space to fit pagetables for the low memory linear map */ +MAPPING_BEYOND_END = \ + PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT +#ifdef CONFIG_X86_PAE + + /* + * In PAE mode initial_page_table is statically defined to contain + * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 + * entries). The identity mapping is handled by pointing two PGD entries + * to the first kernel PMD. + * + * Note the upper half of each PMD or PTE are always zero at this stage. + */ + +#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ + + xorl %ebx,%ebx /* %ebx is kept at zero */ + + movl $pa(__brk_base), %edi + movl $pa(initial_pg_pmd), %edx + movl $PTE_IDENT_ATTR, %eax +10: + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ + movl %ecx,(%edx) /* Store PMD entry */ + /* Upper half already zero */ + addl $8,%edx + movl $512,%ecx +11: + stosl + xchgl %eax,%ebx + stosl + xchgl %eax,%ebx + addl $0x1000,%eax + loop 11b + + /* + * End condition: we must map up to the end + MAPPING_BEYOND_END. + */ + movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp + cmpl %ebp,%eax + jb 10b +1: + addl $__PAGE_OFFSET, %edi + movl %edi, pa(_brk_end) + shrl $12, %eax + movl %eax, pa(max_pfn_mapped) + + /* Do early initialization of the fixmap area */ + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax + movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) +#else /* Not PAE */ + +page_pde_offset = (__PAGE_OFFSET >> 20); + + movl $pa(__brk_base), %edi + movl $pa(initial_page_table), %edx + movl $PTE_IDENT_ATTR, %eax +10: + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ + movl %ecx,(%edx) /* Store identity PDE entry */ + movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ + addl $4,%edx + movl $1024, %ecx +11: + stosl + addl $0x1000,%eax + loop 11b + /* + * End condition: we must map up to the end + MAPPING_BEYOND_END. + */ + movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp + cmpl %ebp,%eax + jb 10b + addl $__PAGE_OFFSET, %edi + movl %edi, pa(_brk_end) + shrl $12, %eax + movl %eax, pa(max_pfn_mapped) + + /* Do early initialization of the fixmap area */ + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax + movl %eax,pa(initial_page_table+0xffc) +#endif + ret + /*G:055 * We create a macro which puts the assembler code between lgstart_ and lgend_ * markers. These templates are put in the .text section: they can't be diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index c4bb261c106e..b1805b78842f 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -65,21 +65,13 @@ pcibios_align_resource(void *data, const struct resource *res, resource_size_t size, resource_size_t align) { struct pci_dev *dev = data; - resource_size_t start = round_down(res->end - size + 1, align); + resource_size_t start = res->start; if (res->flags & IORESOURCE_IO) { - - /* - * If we're avoiding ISA aliases, the largest contiguous I/O - * port space is 256 bytes. Clearing bits 9 and 10 preserves - * all 256-byte and smaller alignments, so the result will - * still be correctly aligned. - */ - if (!skip_isa_ioresource_align(dev)) - start &= ~0x300; - } else if (res->flags & IORESOURCE_MEM) { - if (start < BIOS_END) - start = res->end; /* fail; no space */ + if (skip_isa_ioresource_align(dev)) + return start; + if (start & 0x300) + start = (start + 0x3ff) & ~0x3ff; } return start; } |