From b8bcfe997e46150fedcc3f5b26b846400122fdd9 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Feb 2009 23:05:19 -0800 Subject: x86/paravirt: remove lazy mode in interrupts Impact: simplification, robustness Make paravirt_lazy_mode() always return PARAVIRT_LAZY_NONE when in an interrupt. This prevents interrupt code from accidentally inheriting an outer lazy state, and instead does everything synchronously. Outer batched operations are left deferred. Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra Cc: Thomas Gleixner --- arch/x86/kernel/paravirt.c | 3 +++ arch/x86/mm/fault.c | 6 ++---- arch/x86/mm/highmem_32.c | 2 -- arch/x86/mm/iomap_32.c | 1 - arch/x86/mm/pageattr.c | 14 -------------- 5 files changed, 5 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 63dd358d8ee1..8ab250ac498b 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -282,6 +282,9 @@ void paravirt_leave_lazy_cpu(void) enum paravirt_lazy_mode paravirt_get_lazy_mode(void) { + if (in_interrupt()) + return PARAVIRT_LAZY_NONE; + return __get_cpu_var(paravirt_lazy_mode); } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b7279efa0..cfbb4a738011 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -225,12 +225,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) if (!pmd_present(*pmd_k)) return NULL; - if (!pmd_present(*pmd)) { + if (!pmd_present(*pmd)) set_pmd(pmd, *pmd_k); - arch_flush_lazy_mmu_mode(); - } else { + else BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); - } return pmd_k; } diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 00f127c80b0e..e81dfa408157 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -87,7 +87,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); set_pte(kmap_pte-idx, mk_pte(page, prot)); - arch_flush_lazy_mmu_mode(); return (void *)vaddr; } @@ -117,7 +116,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type) #endif } - arch_flush_lazy_mmu_mode(); pagefault_enable(); } diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 04102d42ff42..b6a61f3d7ef8 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -74,7 +74,6 @@ iounmap_atomic(void *kvaddr, enum km_type type) if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) kpte_clear_flush(kmap_pte-idx, vaddr); - arch_flush_lazy_mmu_mode(); pagefault_enable(); } EXPORT_SYMBOL_GPL(iounmap_atomic); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 9c4294986af7..9015e5e412b5 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -824,13 +824,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, vm_unmap_aliases(); - /* - * If we're called with lazy mmu updates enabled, the - * in-memory pte state may be stale. Flush pending updates to - * bring them up to date. - */ - arch_flush_lazy_mmu_mode(); - cpa.vaddr = addr; cpa.numpages = numpages; cpa.mask_set = mask_set; @@ -873,13 +866,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, } else cpa_flush_all(cache); - /* - * If we've been called with lazy mmu updates enabled, then - * make sure that everything gets flushed out before we - * return. - */ - arch_flush_lazy_mmu_mode(); - out: return ret; } -- cgit v1.2.3 From 7fd7d83d49914f03aefffba6aee09032fcd54cce Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Feb 2009 23:24:03 -0800 Subject: x86/pvops: replace arch_enter_lazy_cpu_mode with arch_start_context_switch Impact: simplification, prepare for later changes Make lazy cpu mode more specific to context switching, so that it makes sense to do more context-switch specific things in the callbacks. Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra --- arch/x86/include/asm/paravirt.h | 8 +++----- arch/x86/kernel/paravirt.c | 13 ------------- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/xen/mmu.c | 5 +---- include/asm-frv/pgtable.h | 4 ++-- include/asm-generic/pgtable.h | 21 +++++++++++---------- kernel/sched.c | 2 +- 8 files changed, 20 insertions(+), 37 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 0617d5cc9712..7b28abac323f 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1420,19 +1420,17 @@ void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); void paravirt_leave_lazy(enum paravirt_lazy_mode mode); -#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE -static inline void arch_enter_lazy_cpu_mode(void) +#define __HAVE_ARCH_START_CONTEXT_SWITCH +static inline void arch_start_context_switch(void) { PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); } -static inline void arch_leave_lazy_cpu_mode(void) +static inline void arch_end_context_switch(void) { PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); } -void arch_flush_lazy_cpu_mode(void); - #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) { diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8ab250ac498b..5eea9548216b 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -301,19 +301,6 @@ void arch_flush_lazy_mmu_mode(void) preempt_enable(); } -void arch_flush_lazy_cpu_mode(void) -{ - preempt_disable(); - - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { - WARN_ON(preempt_count() == 1); - arch_leave_lazy_cpu_mode(); - arch_enter_lazy_cpu_mode(); - } - - preempt_enable(); -} - struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 14014d766cad..57e49a8278a9 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -407,7 +407,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * done before math_state_restore, so the TS bit is up * to date. */ - arch_leave_lazy_cpu_mode(); + arch_end_context_switch(); /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index abb7e6a7f0c6..7115e6085326 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -428,7 +428,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * done before math_state_restore, so the TS bit is up * to date. */ - arch_leave_lazy_cpu_mode(); + arch_end_context_switch(); /* * Switch FS and GS. diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index cb6afa4ec95c..6b98f87232ac 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1119,10 +1119,8 @@ static void drop_other_mm_ref(void *info) /* If this cpu still has a stale cr3 reference, then make sure it has been flushed. */ - if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) { + if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) load_cr3(swapper_pg_dir); - arch_flush_lazy_cpu_mode(); - } } static void xen_drop_mm_ref(struct mm_struct *mm) @@ -1135,7 +1133,6 @@ static void xen_drop_mm_ref(struct mm_struct *mm) load_cr3(swapper_pg_dir); else leave_mm(smp_processor_id()); - arch_flush_lazy_cpu_mode(); } /* Get the "official" set of cpus referring to our pagetable. */ diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index e16fdb1f4f4f..235e34a7a340 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -73,8 +73,8 @@ static inline int pte_file(pte_t pte) { return 0; } #define pgtable_cache_init() do {} while (0) #define arch_enter_lazy_mmu_mode() do {} while (0) #define arch_leave_lazy_mmu_mode() do {} while (0) -#define arch_enter_lazy_cpu_mode() do {} while (0) -#define arch_leave_lazy_cpu_mode() do {} while (0) + +#define arch_start_context_switch() do {} while (0) #else /* !CONFIG_MMU */ /*****************************************************************************/ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 8e6d0ca70aba..922f03671dd8 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -280,17 +280,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, #endif /* - * A facility to provide batching of the reload of page tables with the - * actual context switch code for paravirtualized guests. By convention, - * only one of the lazy modes (CPU, MMU) should be active at any given - * time, entry should never be nested, and entry and exits should always - * be paired. This is for sanity of maintaining and reasoning about the - * kernel code. + * A facility to provide batching of the reload of page tables and + * other process state with the actual context switch code for + * paravirtualized guests. By convention, only one of the batched + * update (lazy) modes (CPU, MMU) should be active at any given time, + * entry should never be nested, and entry and exits should always be + * paired. This is for sanity of maintaining and reasoning about the + * kernel code. In this case, the exit (end of the context switch) is + * in architecture-specific code, and so doesn't need a generic + * definition. */ -#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE -#define arch_enter_lazy_cpu_mode() do {} while (0) -#define arch_leave_lazy_cpu_mode() do {} while (0) -#define arch_flush_lazy_cpu_mode() do {} while (0) +#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH +#define arch_start_context_switch() do {} while (0) #endif #ifndef __HAVE_PFNMAP_TRACKING diff --git a/kernel/sched.c b/kernel/sched.c index 5757e03cfac0..7530fdd7c982 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2746,7 +2746,7 @@ context_switch(struct rq *rq, struct task_struct *prev, * combine the page table reload and the switch backend into * one hypercall. */ - arch_enter_lazy_cpu_mode(); + arch_start_context_switch(); if (unlikely(!mm)) { next->active_mm = oldmm; -- cgit v1.2.3 From b407fc57b815b2016186220baabc76cc8264206e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Feb 2009 23:46:21 -0800 Subject: x86/paravirt: flush pending mmu updates on context switch Impact: allow preemption during lazy mmu updates If we're in lazy mmu mode when context switching, leave lazy mmu mode, but remember the task's state in TIF_LAZY_MMU_UPDATES. When we resume the task, check this flag and re-enter lazy mmu mode if its set. This sets things up for allowing lazy mmu mode while preemptible, though that won't actually be active until the next change. Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra --- arch/x86/include/asm/paravirt.h | 1 - arch/x86/include/asm/thread_info.h | 2 ++ arch/x86/kernel/kvm.c | 2 +- arch/x86/kernel/paravirt.c | 13 ++++++++++--- arch/x86/kernel/vmi_32.c | 14 ++++++++++---- arch/x86/lguest/boot.c | 14 ++++++++++---- arch/x86/xen/enlighten.c | 6 +++--- arch/x86/xen/mmu.c | 7 ++++++- arch/x86/xen/xen-ops.h | 1 - 9 files changed, 42 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 7b28abac323f..58d2481b01a6 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1418,7 +1418,6 @@ void paravirt_enter_lazy_cpu(void); void paravirt_leave_lazy_cpu(void); void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); -void paravirt_leave_lazy(enum paravirt_lazy_mode mode); #define __HAVE_ARCH_START_CONTEXT_SWITCH static inline void arch_start_context_switch(void) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index df9d5f78385e..2f34d643b567 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -94,6 +94,7 @@ struct thread_info { #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ +#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -115,6 +116,7 @@ struct thread_info { #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) +#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 478bca986eca..5d7f6e76b5dc 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -201,7 +201,7 @@ static void kvm_leave_lazy_mmu(void) struct kvm_para_state *state = kvm_para_state(); mmu_queue_flush(state); - paravirt_leave_lazy(paravirt_get_lazy_mode()); + paravirt_leave_lazy_mmu(); state->mode = paravirt_get_lazy_mode(); } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 5eea9548216b..430a0e30577b 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -252,7 +252,7 @@ static inline void enter_lazy(enum paravirt_lazy_mode mode) __get_cpu_var(paravirt_lazy_mode) = mode; } -void paravirt_leave_lazy(enum paravirt_lazy_mode mode) +static void leave_lazy(enum paravirt_lazy_mode mode) { BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); BUG_ON(preemptible()); @@ -267,17 +267,24 @@ void paravirt_enter_lazy_mmu(void) void paravirt_leave_lazy_mmu(void) { - paravirt_leave_lazy(PARAVIRT_LAZY_MMU); + leave_lazy(PARAVIRT_LAZY_MMU); } void paravirt_enter_lazy_cpu(void) { + if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + set_thread_flag(TIF_LAZY_MMU_UPDATES); + } enter_lazy(PARAVIRT_LAZY_CPU); } void paravirt_leave_lazy_cpu(void) { - paravirt_leave_lazy(PARAVIRT_LAZY_CPU); + leave_lazy(PARAVIRT_LAZY_CPU); + + if (test_and_clear_thread_flag(TIF_LAZY_MMU_UPDATES)) + arch_enter_lazy_mmu_mode(); } enum paravirt_lazy_mode paravirt_get_lazy_mode(void) diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 2cc4a90e2cb3..950929c607d3 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -473,16 +473,22 @@ static void vmi_enter_lazy_cpu(void) vmi_ops.set_lazy_mode(2); } +static void vmi_leave_lazy_cpu(void) +{ + vmi_ops.set_lazy_mode(0); + paravirt_leave_lazy_cpu(); +} + static void vmi_enter_lazy_mmu(void) { paravirt_enter_lazy_mmu(); vmi_ops.set_lazy_mode(1); } -static void vmi_leave_lazy(void) +static void vmi_leave_lazy_mmu(void) { - paravirt_leave_lazy(paravirt_get_lazy_mode()); vmi_ops.set_lazy_mode(0); + paravirt_leave_lazy_mmu(); } static inline int __init check_vmi_rom(struct vrom_header *rom) @@ -718,12 +724,12 @@ static inline int __init activate_vmi(void) para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu, set_lazy_mode, SetLazyMode); - para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy, + para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy_cpu, set_lazy_mode, SetLazyMode); para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, set_lazy_mode, SetLazyMode); - para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy, + para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu, set_lazy_mode, SetLazyMode); /* user and kernel flush are just handled with different flags to FlushTLB */ diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 9fe4ddaa8f6f..41a5562e710e 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -147,10 +147,16 @@ static void lazy_hcall(unsigned long call, /* When lazy mode is turned off reset the per-cpu lazy mode variable and then * issue the do-nothing hypercall to flush any stored calls. */ -static void lguest_leave_lazy_mode(void) +static void lguest_leave_lazy_mmu_mode(void) { - paravirt_leave_lazy(paravirt_get_lazy_mode()); hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); + paravirt_leave_lazy_mmu(); +} + +static void lguest_leave_lazy_cpu_mode(void) +{ + hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); + paravirt_leave_lazy_cpu(); } /*G:033 @@ -1026,7 +1032,7 @@ __init void lguest_init(void) pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; pv_cpu_ops.wbinvd = lguest_wbinvd; pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu; - pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode; + pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_cpu_mode; /* pagetable management */ pv_mmu_ops.write_cr3 = lguest_write_cr3; @@ -1039,7 +1045,7 @@ __init void lguest_init(void) pv_mmu_ops.read_cr2 = lguest_read_cr2; pv_mmu_ops.read_cr3 = lguest_read_cr3; pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; - pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode; + pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode; #ifdef CONFIG_X86_LOCAL_APIC /* apic read/write intercepts */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 82cd39a6cbd3..f586e63b9a63 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -203,10 +203,10 @@ static unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } -void xen_leave_lazy(void) +static void xen_leave_lazy_cpu(void) { - paravirt_leave_lazy(paravirt_get_lazy_mode()); xen_mc_flush(); + paravirt_leave_lazy_cpu(); } static unsigned long xen_store_tr(void) @@ -819,7 +819,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .lazy_mode = { .enter = paravirt_enter_lazy_cpu, - .leave = xen_leave_lazy, + .leave = xen_leave_lazy_cpu, }, }; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 6b98f87232ac..f5f8faa4f76c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1816,6 +1816,11 @@ __init void xen_post_allocator_init(void) xen_mark_init_mm_pinned(); } +static void xen_leave_lazy_mmu(void) +{ + xen_mc_flush(); + paravirt_leave_lazy_mmu(); +} const struct pv_mmu_ops xen_mmu_ops __initdata = { .pagetable_setup_start = xen_pagetable_setup_start, @@ -1891,7 +1896,7 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { .lazy_mode = { .enter = paravirt_enter_lazy_mmu, - .leave = xen_leave_lazy, + .leave = xen_leave_lazy_mmu, }, .set_fixmap = xen_set_fixmap, diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 2f5ef2632ea2..f897cdffccb6 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -30,7 +30,6 @@ pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_ident_map_ISA(void); void xen_reserve_top(void); -void xen_leave_lazy(void); void xen_post_allocator_init(void); char * __init xen_memory_setup(void); -- cgit v1.2.3 From 224101ed69d3fbb486868e0f6e0f9fa37302efb4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 18 Feb 2009 11:18:57 -0800 Subject: x86/paravirt: finish change from lazy cpu to context switch start/end Impact: fix lazy context switch API Pass the previous and next tasks into the context switch start end calls, so that the called functions can properly access the task state (esp in end_context_switch, in which the next task is not yet completely current). Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra --- arch/x86/include/asm/paravirt.h | 17 ++++++++++------- arch/x86/include/asm/pgtable.h | 2 ++ arch/x86/kernel/paravirt.c | 14 ++++++-------- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/vmi_32.c | 12 ++++++------ arch/x86/lguest/boot.c | 8 ++++---- arch/x86/xen/enlighten.c | 10 ++++------ include/asm-frv/pgtable.h | 2 +- include/asm-generic/pgtable.h | 2 +- kernel/sched.c | 2 +- 11 files changed, 37 insertions(+), 36 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 58d2481b01a6..dfdee0ca57d3 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -56,6 +56,7 @@ struct desc_ptr; struct tss_struct; struct mm_struct; struct desc_struct; +struct task_struct; /* * Wrapper type for pointers to code which uses the non-standard @@ -203,7 +204,8 @@ struct pv_cpu_ops { void (*swapgs)(void); - struct pv_lazy_ops lazy_mode; + void (*start_context_switch)(struct task_struct *prev); + void (*end_context_switch)(struct task_struct *next); }; struct pv_irq_ops { @@ -1414,20 +1416,21 @@ enum paravirt_lazy_mode { }; enum paravirt_lazy_mode paravirt_get_lazy_mode(void); -void paravirt_enter_lazy_cpu(void); -void paravirt_leave_lazy_cpu(void); +void paravirt_start_context_switch(struct task_struct *prev); +void paravirt_end_context_switch(struct task_struct *next); + void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); #define __HAVE_ARCH_START_CONTEXT_SWITCH -static inline void arch_start_context_switch(void) +static inline void arch_start_context_switch(struct task_struct *prev) { - PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); + PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev); } -static inline void arch_end_context_switch(void) +static inline void arch_end_context_switch(struct task_struct *next) { - PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); + PVOP_VCALL1(pv_cpu_ops.end_context_switch, next); } #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index d0812e155f1d..24e42836e921 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -83,6 +83,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base) #define pte_val(x) native_pte_val(x) #define __pte(x) native_make_pte(x) +#define arch_end_context_switch(prev) do {} while(0) + #endif /* CONFIG_PARAVIRT */ /* diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 430a0e30577b..cf1437503bab 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -270,20 +270,20 @@ void paravirt_leave_lazy_mmu(void) leave_lazy(PARAVIRT_LAZY_MMU); } -void paravirt_enter_lazy_cpu(void) +void paravirt_start_context_switch(struct task_struct *prev) { if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { arch_leave_lazy_mmu_mode(); - set_thread_flag(TIF_LAZY_MMU_UPDATES); + set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); } enter_lazy(PARAVIRT_LAZY_CPU); } -void paravirt_leave_lazy_cpu(void) +void paravirt_end_context_switch(struct task_struct *next) { leave_lazy(PARAVIRT_LAZY_CPU); - if (test_and_clear_thread_flag(TIF_LAZY_MMU_UPDATES)) + if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) arch_enter_lazy_mmu_mode(); } @@ -399,10 +399,8 @@ struct pv_cpu_ops pv_cpu_ops = { .set_iopl_mask = native_set_iopl_mask, .io_delay = native_io_delay, - .lazy_mode = { - .enter = paravirt_nop, - .leave = paravirt_nop, - }, + .start_context_switch = paravirt_nop, + .end_context_switch = paravirt_nop, }; struct pv_apic_ops pv_apic_ops = { diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 57e49a8278a9..d766c7616fd7 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -407,7 +407,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * done before math_state_restore, so the TS bit is up * to date. */ - arch_end_context_switch(); + arch_end_context_switch(next_p); /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 7115e6085326..e8a9aaf9df88 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -428,7 +428,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * done before math_state_restore, so the TS bit is up * to date. */ - arch_end_context_switch(); + arch_end_context_switch(next_p); /* * Switch FS and GS. diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 950929c607d3..55a5d6938e5e 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -467,16 +467,16 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, } #endif -static void vmi_enter_lazy_cpu(void) +static void vmi_start_context_switch(struct task_struct *prev) { - paravirt_enter_lazy_cpu(); + paravirt_start_context_switch(prev); vmi_ops.set_lazy_mode(2); } -static void vmi_leave_lazy_cpu(void) +static void vmi_end_context_switch(struct task_struct *next) { vmi_ops.set_lazy_mode(0); - paravirt_leave_lazy_cpu(); + paravirt_end_context_switch(next); } static void vmi_enter_lazy_mmu(void) @@ -722,9 +722,9 @@ static inline int __init activate_vmi(void) para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); para_fill(pv_cpu_ops.io_delay, IODelay); - para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu, + para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch, set_lazy_mode, SetLazyMode); - para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy_cpu, + para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch, set_lazy_mode, SetLazyMode); para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 41a5562e710e..5287081b3567 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -153,10 +153,10 @@ static void lguest_leave_lazy_mmu_mode(void) paravirt_leave_lazy_mmu(); } -static void lguest_leave_lazy_cpu_mode(void) +static void lguest_end_context_switch(struct task_struct *next) { hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); - paravirt_leave_lazy_cpu(); + paravirt_end_context_switch(next); } /*G:033 @@ -1031,8 +1031,8 @@ __init void lguest_init(void) pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; pv_cpu_ops.wbinvd = lguest_wbinvd; - pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu; - pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_cpu_mode; + pv_cpu_ops.start_context_switch = paravirt_start_context_switch; + pv_cpu_ops.end_context_switch = lguest_end_context_switch; /* pagetable management */ pv_mmu_ops.write_cr3 = lguest_write_cr3; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f586e63b9a63..70b355d3a86c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -203,10 +203,10 @@ static unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } -static void xen_leave_lazy_cpu(void) +static void xen_end_context_switch(struct task_struct *next) { xen_mc_flush(); - paravirt_leave_lazy_cpu(); + paravirt_end_context_switch(next); } static unsigned long xen_store_tr(void) @@ -817,10 +817,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { /* Xen takes care of %gs when switching to usermode for us */ .swapgs = paravirt_nop, - .lazy_mode = { - .enter = paravirt_enter_lazy_cpu, - .leave = xen_leave_lazy_cpu, - }, + .start_context_switch = paravirt_start_context_switch, + .end_context_switch = xen_end_context_switch, }; static const struct pv_apic_ops xen_apic_ops __initdata = { diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index 235e34a7a340..09887045d03f 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -74,7 +74,7 @@ static inline int pte_file(pte_t pte) { return 0; } #define arch_enter_lazy_mmu_mode() do {} while (0) #define arch_leave_lazy_mmu_mode() do {} while (0) -#define arch_start_context_switch() do {} while (0) +#define arch_start_context_switch(prev) do {} while (0) #else /* !CONFIG_MMU */ /*****************************************************************************/ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 922f03671dd8..e410f602cab1 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -291,7 +291,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, * definition. */ #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH -#define arch_start_context_switch() do {} while (0) +#define arch_start_context_switch(prev) do {} while (0) #endif #ifndef __HAVE_PFNMAP_TRACKING diff --git a/kernel/sched.c b/kernel/sched.c index 7530fdd7c982..133762aece50 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2746,7 +2746,7 @@ context_switch(struct rq *rq, struct task_struct *prev, * combine the page table reload and the switch backend into * one hypercall. */ - arch_start_context_switch(); + arch_start_context_switch(prev); if (unlikely(!mm)) { next->active_mm = oldmm; -- cgit v1.2.3 From 2829b449276aed45f3d649efb21e3418e39dd5d1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Feb 2009 23:53:19 -0800 Subject: x86/paravirt: allow preemption with lazy mmu mode Impact: remove obsolete checks, simplification Lift restrictions on preemption with lazy mmu mode, as it is now allowed. Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra --- arch/x86/kernel/paravirt.c | 7 ++++--- arch/x86/xen/mmu.c | 8 +------- 2 files changed, 5 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index cf1437503bab..bf2e86eee80c 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -247,7 +247,6 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA static inline void enter_lazy(enum paravirt_lazy_mode mode) { BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); - BUG_ON(preemptible()); __get_cpu_var(paravirt_lazy_mode) = mode; } @@ -255,7 +254,6 @@ static inline void enter_lazy(enum paravirt_lazy_mode mode) static void leave_lazy(enum paravirt_lazy_mode mode) { BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); - BUG_ON(preemptible()); __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; } @@ -272,6 +270,8 @@ void paravirt_leave_lazy_mmu(void) void paravirt_start_context_switch(struct task_struct *prev) { + BUG_ON(preemptible()); + if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { arch_leave_lazy_mmu_mode(); set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); @@ -281,6 +281,8 @@ void paravirt_start_context_switch(struct task_struct *prev) void paravirt_end_context_switch(struct task_struct *next) { + BUG_ON(preemptible()); + leave_lazy(PARAVIRT_LAZY_CPU); if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) @@ -300,7 +302,6 @@ void arch_flush_lazy_mmu_mode(void) preempt_disable(); if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { - WARN_ON(preempt_count() == 1); arch_leave_lazy_mmu_mode(); arch_enter_lazy_mmu_mode(); } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f5f8faa4f76c..3f2d0fe5e6a8 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -419,10 +419,6 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { - /* updates to init_mm may be done without lock */ - if (mm == &init_mm) - preempt_disable(); - ADD_STATS(set_pte_at, 1); // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); ADD_STATS(set_pte_at_current, mm == current->mm); @@ -443,9 +439,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, } xen_set_pte(ptep, pteval); -out: - if (mm == &init_mm) - preempt_enable(); +out: return; } pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, -- cgit v1.2.3 From ab2f75f0b760d2b0c9a875b669a1b51dce02c85a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 18 Feb 2009 00:18:50 -0800 Subject: x86/paravirt: use percpu_ rather than __get_cpu_var Impact: minor optimisation percpu_read/write is a slightly more direct way of getting to percpu data. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/paravirt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index bf2e86eee80c..254e8aa8bfdb 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -246,16 +246,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA static inline void enter_lazy(enum paravirt_lazy_mode mode) { - BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); + BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); - __get_cpu_var(paravirt_lazy_mode) = mode; + percpu_write(paravirt_lazy_mode, mode); } static void leave_lazy(enum paravirt_lazy_mode mode) { - BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); + BUG_ON(percpu_read(paravirt_lazy_mode) != mode); - __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; + percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); } void paravirt_enter_lazy_mmu(void) @@ -294,7 +294,7 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) if (in_interrupt()) return PARAVIRT_LAZY_NONE; - return __get_cpu_var(paravirt_lazy_mode); + return percpu_read(paravirt_lazy_mode); } void arch_flush_lazy_mmu_mode(void) -- cgit v1.2.3 From 5caecb9432428241d0c641897f07ff4003f1b55f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 20 Feb 2009 23:01:26 -0800 Subject: xen: disable preempt for leave_lazy_mmu xen_mc_flush() requires preemption to be disabled for its own sanity, so disable it while we're flushing. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/mmu.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3f2d0fe5e6a8..0e572380413b 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1812,8 +1812,10 @@ __init void xen_post_allocator_init(void) static void xen_leave_lazy_mmu(void) { + preempt_disable(); xen_mc_flush(); paravirt_leave_lazy_mmu(); + preempt_enable(); } const struct pv_mmu_ops xen_mmu_ops __initdata = { -- cgit v1.2.3 From 59d7187142bbe9b404a403ed0f874d3227305f26 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 26 Feb 2009 15:48:33 -0800 Subject: xen: separate p2m allocation from setting When doing very early p2m setting, we need to separate setting from allocation, so split things up accordingly. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/mmu.c | 61 +++++++++++++++++++++++++++++++++++++++--------------- arch/x86/xen/mmu.h | 3 +++ 2 files changed, 47 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 0e572380413b..e0a55b7a6ceb 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -233,47 +233,74 @@ unsigned long get_phys_to_machine(unsigned long pfn) } EXPORT_SYMBOL_GPL(get_phys_to_machine); -static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) +/* install a new p2m_top page */ +bool install_p2mtop_page(unsigned long pfn, unsigned long *p) { - unsigned long *p; + unsigned topidx = p2m_top_index(pfn); + unsigned long **pfnp, *mfnp; unsigned i; - p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); - BUG_ON(p == NULL); + pfnp = &p2m_top[topidx]; + mfnp = &p2m_top_mfn[topidx]; for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++) p[i] = INVALID_P2M_ENTRY; - if (cmpxchg(pp, p2m_missing, p) != p2m_missing) - free_page((unsigned long)p); - else + if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) { *mfnp = virt_to_mfn(p); + return true; + } + + return false; } -void set_phys_to_machine(unsigned long pfn, unsigned long mfn) +static void alloc_p2m(unsigned long pfn) { - unsigned topidx, idx; + unsigned long *p; - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); - return; - } + p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); + BUG_ON(p == NULL); + + if (!install_p2mtop_page(pfn, p)) + free_page((unsigned long)p); +} + +/* Try to install p2m mapping; fail if intermediate bits missing */ +bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ + unsigned topidx, idx; if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { BUG_ON(mfn != INVALID_P2M_ENTRY); - return; + return true; } topidx = p2m_top_index(pfn); if (p2m_top[topidx] == p2m_missing) { - /* no need to allocate a page to store an invalid entry */ if (mfn == INVALID_P2M_ENTRY) - return; - alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); + return true; + return false; } idx = p2m_index(pfn); p2m_top[topidx][idx] = mfn; + + return true; +} + +void set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { + BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + return; + } + + if (unlikely(!__set_phys_to_machine(pfn, mfn))) { + alloc_p2m(pfn); + + if (!__set_phys_to_machine(pfn, mfn)) + BUG(); + } } unsigned long arbitrary_virt_to_mfn(void *vaddr) diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 24d1b44a337d..da7302624897 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -11,6 +11,9 @@ enum pt_level { }; +bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); +bool install_p2mtop_page(unsigned long pfn, unsigned long *p); + void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); -- cgit v1.2.3 From 7571a60446030d2576d881438447e86a0755a83b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 27 Feb 2009 15:34:59 -0800 Subject: xen: split construction of p2m mfn tables from registration Build the p2m_mfn_list_list early with the rest of the p2m table, but register it later when the real shared_info structure is in place. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/mmu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index e0a55b7a6ceb..67d2ab45cd90 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -184,7 +184,7 @@ static inline unsigned p2m_index(unsigned long pfn) } /* Build the parallel p2m_top_mfn structures */ -void xen_setup_mfn_list_list(void) +static void __init xen_build_mfn_list_list(void) { unsigned pfn, idx; @@ -198,7 +198,10 @@ void xen_setup_mfn_list_list(void) unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); } +} +void xen_setup_mfn_list_list(void) +{ BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = @@ -218,6 +221,8 @@ void __init xen_build_dynamic_phys_to_machine(void) p2m_top[topidx] = &mfn_list[pfn]; } + + xen_build_mfn_list_list(); } unsigned long get_phys_to_machine(unsigned long pfn) -- cgit v1.2.3 From 6ed6bf428aff64fe37cdc54b239d598fee6016f1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 4 Mar 2009 13:02:18 -0800 Subject: xen: clean up xen_load_gdt Makes the logic a bit clearer. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/enlighten.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 70b355d3a86c..5776dc270297 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -301,10 +301,21 @@ static void xen_load_gdt(const struct desc_ptr *dtr) frames = mcs.args; for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { - frames[f] = arbitrary_virt_to_mfn((void *)va); + int level; + pte_t *ptep = lookup_address(va, &level); + unsigned long pfn, mfn; + void *virt; + + BUG_ON(ptep == NULL); + + pfn = pte_pfn(*ptep); + mfn = pfn_to_mfn(pfn); + virt = __va(PFN_PHYS(pfn)); + + frames[f] = mfn; make_lowmem_page_readonly((void *)va); - make_lowmem_page_readonly(mfn_to_virt(frames[f])); + make_lowmem_page_readonly(virt); } MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); -- cgit v1.2.3 From 3ce5fa7ebff74b6a4dc5fdcdc22e6979f5a4ff85 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 4 Mar 2009 15:26:00 -0800 Subject: xen: make xen_load_gdt simpler Remove use of multicall machinery which is unused (gdt loading is never performance critical). This removes the implicit use of percpu variables, which simplifies understanding how the percpu code's use of load_gdt interacts with this code. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/enlighten.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5776dc270297..48b399bd6e0d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -284,12 +284,11 @@ static void xen_set_ldt(const void *addr, unsigned entries) static void xen_load_gdt(const struct desc_ptr *dtr) { - unsigned long *frames; unsigned long va = dtr->address; unsigned int size = dtr->size + 1; unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + unsigned long frames[pages]; int f; - struct multicall_space mcs; /* A GDT can be up to 64k in size, which corresponds to 8192 8-byte entries, or 16 4k pages.. */ @@ -297,9 +296,6 @@ static void xen_load_gdt(const struct desc_ptr *dtr) BUG_ON(size > 65536); BUG_ON(va & ~PAGE_MASK); - mcs = xen_mc_entry(sizeof(*frames) * pages); - frames = mcs.args; - for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { int level; pte_t *ptep = lookup_address(va, &level); @@ -314,13 +310,15 @@ static void xen_load_gdt(const struct desc_ptr *dtr) frames[f] = mfn; + printk("xen_load_gdt: %d va=%p mfn=%lx pfn=%lx va'=%p\n", + f, (void *)va, mfn, pfn, virt); + make_lowmem_page_readonly((void *)va); make_lowmem_page_readonly(virt); } - MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); - - xen_mc_issue(PARAVIRT_LAZY_CPU); + if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) + BUG(); } static void load_TLS_descriptor(struct thread_struct *t, -- cgit v1.2.3 From b4b7e58590d0e94ed78bd6be1aa163caba7b6c74 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 4 Mar 2009 16:34:27 -0800 Subject: xen: remove xen_load_gdt debug Don't need the noise. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/enlighten.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 48b399bd6e0d..75b7a0f90380 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -310,9 +310,6 @@ static void xen_load_gdt(const struct desc_ptr *dtr) frames[f] = mfn; - printk("xen_load_gdt: %d va=%p mfn=%lx pfn=%lx va'=%p\n", - f, (void *)va, mfn, pfn, virt); - make_lowmem_page_readonly((void *)va); make_lowmem_page_readonly(virt); } -- cgit v1.2.3 From e9e2d1ffcfdb38bed11a3064aa74bea9ee38ed80 Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Thu, 5 Mar 2009 20:13:57 +0100 Subject: NULL noise: arch/x86/xen/smp.c Fix this sparse warnings: arch/x86/xen/smp.c:316:52: warning: Using plain integer as NULL pointer arch/x86/xen/smp.c:421:60: warning: Using plain integer as NULL pointer Signed-off-by: Hannes Eder Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 8d470562ffc9..304d832710cd 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -317,7 +317,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) BUG_ON(rc); while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { - HYPERVISOR_sched_op(SCHEDOP_yield, 0); + HYPERVISOR_sched_op(SCHEDOP_yield, NULL); barrier(); } @@ -422,7 +422,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask) /* Make sure other vcpus get a chance to run if they need to. */ for_each_cpu(cpu, mask) { if (xen_vcpu_stolen(cpu)) { - HYPERVISOR_sched_op(SCHEDOP_yield, 0); + HYPERVISOR_sched_op(SCHEDOP_yield, NULL); break; } } -- cgit v1.2.3 From e826fe1ba1563a9272345da8e3279a930ac160a7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 7 Mar 2009 17:09:27 -0800 Subject: xen: mask XSAVE from cpuid Xen leaves XSAVE set in cpuid, but doesn't allow cr4.OSXSAVE to be set. This confuses the kernel and it ends up crashing on an xsetbv instruction. At boot time, try to set cr4.OSXSAVE, and mask XSAVE out of cpuid it we can't. This will produce a spurious error from Xen, but allows us to support XSAVE if/when Xen does. This also factors out the cpuid mask decisions to boot time. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/enlighten.c | 50 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 75b7a0f90380..da33e0c5870d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -168,21 +168,23 @@ static void __init xen_banner(void) xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } +static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; +static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; + static void xen_cpuid(unsigned int *ax, unsigned int *bx, unsigned int *cx, unsigned int *dx) { + unsigned maskecx = ~0; unsigned maskedx = ~0; /* * Mask out inconvenient features, to try and disable as many * unsupported kernel subsystems as possible. */ - if (*ax == 1) - maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ - (1 << X86_FEATURE_ACPI) | /* disable ACPI */ - (1 << X86_FEATURE_MCE) | /* disable MCE */ - (1 << X86_FEATURE_MCA) | /* disable MCA */ - (1 << X86_FEATURE_ACC)); /* thermal monitoring */ + if (*ax == 1) { + maskecx = cpuid_leaf1_ecx_mask; + maskedx = cpuid_leaf1_edx_mask; + } asm(XEN_EMULATE_PREFIX "cpuid" : "=a" (*ax), @@ -190,9 +192,43 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, "=c" (*cx), "=d" (*dx) : "0" (*ax), "2" (*cx)); + + *cx &= maskecx; *dx &= maskedx; } +static __init void xen_init_cpuid_mask(void) +{ + unsigned int ax, bx, cx, dx; + + cpuid_leaf1_edx_mask = + ~((1 << X86_FEATURE_MCE) | /* disable MCE */ + (1 << X86_FEATURE_MCA) | /* disable MCA */ + (1 << X86_FEATURE_ACC)); /* thermal monitoring */ + + if (!xen_initial_domain()) + cpuid_leaf1_edx_mask &= + ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ + (1 << X86_FEATURE_ACPI)); /* disable ACPI */ + + ax = 1; + xen_cpuid(&ax, &bx, &cx, &dx); + + /* cpuid claims we support xsave; try enabling it to see what happens */ + if (cx & (1 << (X86_FEATURE_XSAVE % 32))) { + unsigned long cr4; + + set_in_cr4(X86_CR4_OSXSAVE); + + cr4 = read_cr4(); + + if ((cr4 & X86_CR4_OSXSAVE) == 0) + cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32)); + + clear_in_cr4(X86_CR4_OSXSAVE); + } +} + static void xen_set_debugreg(int reg, unsigned long val) { HYPERVISOR_set_debugreg(reg, val); @@ -901,6 +937,8 @@ asmlinkage void __init xen_start_kernel(void) xen_init_irq_ops(); + xen_init_cpuid_mask(); + #ifdef CONFIG_X86_LOCAL_APIC /* * set up the basic apic ops. -- cgit v1.2.3 From 68509cdcde6583ee1a9542899d1270449c7d5903 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 8 Mar 2009 03:59:04 -0700 Subject: x86-64: remove PGE from must-have feature list PGE may not be available when running paravirtualized, so test the cpuid bit before using it. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/required-features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index d5cd6c586881..a4737dddfd58 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -50,7 +50,7 @@ #ifdef CONFIG_X86_64 #define NEED_PSE 0 #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) -#define NEED_PGE (1<<(X86_FEATURE_PGE & 31)) +#define NEED_PGE 0 #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) -- cgit v1.2.3 From 1e7449730853e7c9ae9a2458b2ced7ba12559a0e Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Mon, 9 Feb 2009 12:05:46 -0800 Subject: Xen: Add virt_to_pfn helper function Signed-off-by: Alex Nixon --- arch/x86/include/asm/xen/page.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 1a918dde46b5..018a0a400799 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -124,7 +124,8 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) /* VIRT <-> MACHINE conversion */ #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) -#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) +#define virt_to_pfn(v) (PFN_DOWN(__pa(v))) +#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v))) #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) static inline unsigned long pte_mfn(pte_t pte) -- cgit v1.2.3 From 5f241e65f2be4661a33e1937e1c829252a80b2b8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 16 Mar 2009 17:08:48 -0700 Subject: x86-64: non-paravirt systems always has PSE and PGE A paravirtualized system may not have PSE or PGE available to guests, so they are not required features. However, without paravirt we can assume that any x86-64 implementation will have them available. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/required-features.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index a4737dddfd58..64cf2d24fad1 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -48,9 +48,15 @@ #endif #ifdef CONFIG_X86_64 +#ifdef CONFIG_PARAVIRT +/* Paravirtualized systems may not have PSE or PGE available */ #define NEED_PSE 0 -#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) #define NEED_PGE 0 +#else +#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31) +#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31) +#endif +#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) -- cgit v1.2.3 From 4185f35404dc96f8525298c7c548aee419f3b3f4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Mar 2009 13:30:55 -0700 Subject: xen/mmu: some early pagetable cleanups 1. make sure early-allocated ptes are pinned, so they can be later unpinned 2. don't pin pmd+pud, just make them RO 3. scatter some __inits around Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/mmu.c | 40 ++++++++++++++++++++++++++++------------ arch/x86/xen/xen-ops.h | 2 -- 2 files changed, 28 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 67d2ab45cd90..df87c803cecc 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1013,7 +1013,7 @@ static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page, return 0; } -void __init xen_mark_init_mm_pinned(void) +static void __init xen_mark_init_mm_pinned(void) { xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); } @@ -1461,10 +1461,29 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) } #endif +static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) +{ + struct mmuext_op op; + op.cmd = cmd; + op.arg1.mfn = pfn_to_mfn(pfn); + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) + BUG(); +} + /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) { +#ifdef CONFIG_FLATMEM + BUG_ON(mem_map); /* should only be used early */ +#endif + make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); +} + +/* Used for pmd and pud */ +static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) +{ #ifdef CONFIG_FLATMEM BUG_ON(mem_map); /* should only be used early */ #endif @@ -1473,18 +1492,15 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) /* Early release_pte assumes that all pts are pinned, since there's only init_mm and anything attached to that is pinned. */ -static void xen_release_pte_init(unsigned long pfn) +static __init void xen_release_pte_init(unsigned long pfn) { + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } -static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) +static __init void xen_release_pmd_init(unsigned long pfn) { - struct mmuext_op op; - op.cmd = cmd; - op.arg1.mfn = pfn_to_mfn(pfn); - if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) - BUG(); + make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } /* This needs to make sure the new pte page is pinned iff its being @@ -1873,9 +1889,9 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { .alloc_pte = xen_alloc_pte_init, .release_pte = xen_release_pte_init, - .alloc_pmd = xen_alloc_pte_init, + .alloc_pmd = xen_alloc_pmd_init, .alloc_pmd_clone = paravirt_nop, - .release_pmd = xen_release_pte_init, + .release_pmd = xen_release_pmd_init, #ifdef CONFIG_HIGHPTE .kmap_atomic_pte = xen_kmap_atomic_pte, @@ -1914,8 +1930,8 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_pgd = xen_set_pgd_hyper, - .alloc_pud = xen_alloc_pte_init, - .release_pud = xen_release_pte_init, + .alloc_pud = xen_alloc_pmd_init, + .release_pud = xen_release_pmd_init, #endif /* PAGETABLE_LEVELS == 4 */ .activate_mm = xen_activate_mm, diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f897cdffccb6..5c50a1017a37 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -56,8 +56,6 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id); bool xen_vcpu_stolen(int vcpu); -void xen_mark_init_mm_pinned(void); - void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP -- cgit v1.2.3 From 8de07bbdede03598801cf33ab23dcbcd28a918d2 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 4 Mar 2009 17:36:57 -0800 Subject: xen/mmu: weaken flush_tlb_other test Impact: fixes crashing bug There's no particular problem with getting an empty cpu mask, so just shortcut-return if we get one. Avoids crash reported by Christophe Saout Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index df87c803cecc..e425a32e0a90 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1293,8 +1293,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, } *args; struct multicall_space mcs; - BUG_ON(cpumask_empty(cpus)); - BUG_ON(!mm); + if (cpumask_empty(cpus)) + return; /* nothing to do */ mcs = xen_mc_entry(sizeof(*args)); args = mcs.args; -- cgit v1.2.3 From 707ebbc81c61eb480d8a51ca61e355e240df1d32 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 27 Mar 2009 11:29:02 -0700 Subject: xen: set _PAGE_NX in __supported_pte_mask before pagetable construction Some 64-bit machines don't support the NX flag in ptes. Check for NX before constructing the kernel pagetables. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/enlighten.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index da33e0c5870d..80f4c5343495 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -912,7 +913,6 @@ static const struct machine_ops __initdata xen_machine_ops = { .emergency_restart = xen_emergency_restart, }; - /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { @@ -980,6 +980,11 @@ asmlinkage void __init xen_start_kernel(void) if (!xen_initial_domain()) __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); +#ifdef CONFIG_X86_64 + /* Work out if we support NX */ + check_efer(); +#endif + /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; -- cgit v1.2.3 From 6d02c42698f99eccb290ac53d4f10ca883b9f90c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 29 Mar 2009 22:57:15 -0700 Subject: xen: clean up gate trap/interrupt constants Use GATE_INTERRUPT/TRAP rather than 0xe/f. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/enlighten.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 80f4c5343495..12a3159333bc 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -428,7 +428,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, static int cvt_gate_to_trap(int vector, const gate_desc *val, struct trap_info *info) { - if (val->type != 0xf && val->type != 0xe) + if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) return 0; info->vector = vector; @@ -436,8 +436,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, info->cs = gate_segment(*val); info->flags = val->dpl; /* interrupt gates clear IF */ - if (val->type == 0xe) - info->flags |= 4; + if (val->type == GATE_INTERRUPT) + info->flags |= 1 << 2; return 1; } -- cgit v1.2.3 From 169aafbc8d3f05431b5cfeb60294a12b8ef2bcee Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 7 Apr 2009 13:37:26 -0700 Subject: lguest: update lazy mmu changes to match lguest's use of kvm hypercalls Duplicate hcall -> kvm_hypercall0 convertion from "lguest: use KVM hypercalls". Signed-off-by: Jeremy Fitzhardinge Cc: Matias Zabaljauregui Cc: Rusty Russell --- arch/x86/lguest/boot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 5ab239711cc2..cfb2d68dc795 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -168,7 +168,7 @@ static void lazy_hcall3(unsigned long call, * issue the do-nothing hypercall to flush any stored calls. */ static void lguest_leave_lazy_mmu_mode(void) { - hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); + kvm_hypercall0(LHCALL_FLUSH_ASYNC); paravirt_leave_lazy_mmu(); } -- cgit v1.2.3 From e7c064889606aab3569669078c69b87b2c527e72 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 7 Mar 2009 23:48:41 -0800 Subject: xen: add FIX_TEXT_POKE to fixmap FIX_TEXT_POKE[01] are used to map kernel addresses, so they're mapping pfns, not mfns. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/mmu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 77b242c9a11e..a96f5b9393ea 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1812,6 +1812,9 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) #ifdef CONFIG_X86_LOCAL_APIC case FIX_APIC_BASE: /* maps dummy local APIC */ #endif + case FIX_TEXT_POKE0: + case FIX_TEXT_POKE1: + /* All local page mappings */ pte = pfn_pte(phys, prot); break; -- cgit v1.2.3 From 6b2e8523df148c15ea5abf13075026fb8bdb3f86 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 7 May 2009 11:56:49 -0700 Subject: xen: reserve Xen start_info rather than e820 reserving Use reserve_early rather than e820 reservations for Xen start info and mfn->pfn table, so that the memory use is a bit more self-documenting. [ Impact: cleanup ] Signed-off-by: Jeremy Fitzhardinge Cc: Xen-devel Cc: Linus Torvalds LKML-Reference: <4A032EF1.6070708@goop.org> Signed-off-by: Ingo Molnar --- arch/x86/xen/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 15c6c68db6a2..ad0047f47cd4 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -61,9 +61,9 @@ char * __init xen_memory_setup(void) * - xen_start_info * See comment above "struct start_info" in */ - e820_add_region(__pa(xen_start_info->mfn_list), - xen_start_info->pt_base - xen_start_info->mfn_list, - E820_RESERVED); + reserve_early(__pa(xen_start_info->mfn_list), + __pa(xen_start_info->pt_base), + "XEN START INFO"); sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); -- cgit v1.2.3 From 6cac5a924668a56c7ccefc345805f1fe0536a90e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 29 Mar 2009 19:56:29 -0700 Subject: xen/x86-64: fix breakpoints and hardware watchpoints Native x86-64 uses the IST mechanism to run int3 and debug traps on an alternative stack. Xen does not do this, and so the frames were being misinterpreted by the ptrace code. This change special-cases these two exceptions by using Xen variants which run on the normal kernel stack properly. Impact: avoid crash or bad data when IST trap is invoked under Xen Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/traps.h | 3 +++ arch/x86/kernel/entry_64.S | 5 +++++ arch/x86/xen/enlighten.c | 19 ++++++++++++++++++- 3 files changed, 26 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0d5342515b86..c44e5002f2ff 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -13,6 +13,9 @@ asmlinkage void divide_error(void); asmlinkage void debug(void); asmlinkage void nmi(void); asmlinkage void int3(void); +asmlinkage void xen_debug(void); +asmlinkage void xen_int3(void); +asmlinkage void xen_stack_segment(void); asmlinkage void overflow(void); asmlinkage void bounds(void); asmlinkage void invalid_op(void); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 38946c6e8433..bb01ce080b80 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1379,6 +1379,11 @@ END(xen_failsafe_callback) paranoidzeroentry_ist debug do_debug DEBUG_STACK paranoidzeroentry_ist int3 do_int3 DEBUG_STACK paranoiderrorentry stack_segment do_stack_segment +#ifdef CONFIG_XEN +zeroentry xen_debug do_debug +zeroentry xen_int3 do_int3 +errorentry xen_stack_segment do_stack_segment +#endif errorentry general_protection do_general_protection errorentry page_fault do_page_fault #ifdef CONFIG_X86_MCE diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 12a3159333bc..7566e13c0cac 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -428,11 +430,26 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, static int cvt_gate_to_trap(int vector, const gate_desc *val, struct trap_info *info) { + unsigned long addr; + if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) return 0; info->vector = vector; - info->address = gate_offset(*val); + + addr = gate_offset(*val); +#ifdef CONFIG_X86_64 + if (addr == (unsigned long)debug) + addr = (unsigned long)xen_debug; + else if (addr == (unsigned long)int3) + addr = (unsigned long)xen_int3; + else if (addr == (unsigned long)stack_segment) + addr = (unsigned long)xen_stack_segment; + else + WARN_ON(val->ist != 0); +#endif /* CONFIG_X86_64 */ + info->address = addr; + info->cs = gate_segment(*val); info->flags = val->dpl; /* interrupt gates clear IF */ -- cgit v1.2.3 From b80119bb35a49a4e8dbfb9708872adfd5cf38dee Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 24 Apr 2009 00:22:08 -0700 Subject: xen/x86-64: clean up warnings about IST-using traps Ignore known IST-using traps. Aside from the debugger traps, they're low-level faults which Xen will handle for us, so the kernel needn't worry about them. Keep warning in case unknown trap starts using IST. Impact: suppress spurious warnings Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/enlighten.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 7566e13c0cac..e9df942aa143 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -439,14 +439,32 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, addr = gate_offset(*val); #ifdef CONFIG_X86_64 + /* + * Look for known traps using IST, and substitute them + * appropriately. The debugger ones are the only ones we care + * about. Xen will handle faults like double_fault and + * machine_check, so we should never see them. Warn if + * there's an unexpected IST-using fault handler. + */ if (addr == (unsigned long)debug) addr = (unsigned long)xen_debug; else if (addr == (unsigned long)int3) addr = (unsigned long)xen_int3; else if (addr == (unsigned long)stack_segment) addr = (unsigned long)xen_stack_segment; - else - WARN_ON(val->ist != 0); + else if (addr == (unsigned long)double_fault || + addr == (unsigned long)nmi) { + /* Don't need to handle these */ + return 0; +#ifdef CONFIG_X86_MCE + } else if (addr == (unsigned long)machine_check) { + return 0; +#endif + } else { + /* Some other trap using IST? */ + if (WARN_ON(val->ist != 0)) + return 0; + } #endif /* CONFIG_X86_64 */ info->address = addr; -- cgit v1.2.3 From a789ed5fb6d0256c4177c2cc27e06520ddbe4d4c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 24 Apr 2009 00:26:50 -0700 Subject: xen: cache cr0 value to avoid trap'n'emulate for read_cr0 stts() is implemented in terms of read_cr0/write_cr0 to update the state of the TS bit. This happens during context switch, and so is fairly performance critical. Rather than falling back to a trap-and-emulate native read_cr0, implement our own by caching the last-written value from write_cr0 (the TS bit is the only one we really care about). Impact: optimise Xen context switches Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/enlighten.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e9df942aa143..0a1700a2be9c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -658,10 +658,26 @@ static void xen_clts(void) xen_mc_issue(PARAVIRT_LAZY_CPU); } +static DEFINE_PER_CPU(unsigned long, xen_cr0_value); + +static unsigned long xen_read_cr0(void) +{ + unsigned long cr0 = percpu_read(xen_cr0_value); + + if (unlikely(cr0 == 0)) { + cr0 = native_read_cr0(); + percpu_write(xen_cr0_value, cr0); + } + + return cr0; +} + static void xen_write_cr0(unsigned long cr0) { struct multicall_space mcs; + percpu_write(xen_cr0_value, cr0); + /* Only pay attention to cr0.TS; everything else is ignored. */ mcs = xen_mc_entry(0); @@ -847,7 +863,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .clts = xen_clts, - .read_cr0 = native_read_cr0, + .read_cr0 = xen_read_cr0, .write_cr0 = xen_write_cr0, .read_cr4 = native_read_cr4, -- cgit v1.2.3