From a0f2dee0cd651efb5fac6a1d35b0a14460ebcdd4 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 21 Nov 2014 11:04:39 +0000 Subject: xen: add a dma_addr_t dev_addr argument to xen_dma_map_page dev_addr is the machine address of the page. The new parameter can be used by the ARM and ARM64 implementations of xen_dma_map_page to find out if the page is a local page (pfn == mfn) or a foreign page (pfn != mfn). dev_addr could be retrieved again from the physical address, using pfn_to_mfn, but it requires accessing an rbtree. Since we already have the dev_addr in our hands at the call site there is no need to get the mfn twice. Signed-off-by: Stefano Stabellini Reviewed-by: Catalin Marinas Acked-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/xen/page-coherent.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h index 7f02fe4e2c7b..acd844c017d3 100644 --- a/arch/x86/include/asm/xen/page-coherent.h +++ b/arch/x86/include/asm/xen/page-coherent.h @@ -22,8 +22,8 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, } static inline void xen_dma_map_page(struct device *hwdev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) { } + dma_addr_t dev_addr, unsigned long offset, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) { } static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir, -- cgit v1.2.3 From a4dba130891271084344c12537731542ec77cb85 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 21 Nov 2014 11:07:39 +0000 Subject: xen/arm/arm64: introduce xen_arch_need_swiotlb Introduce an arch specific function to find out whether a particular dma mapping operation needs to bounce on the swiotlb buffer. On ARM and ARM64, if the page involved is a foreign page and the device is not coherent, we need to bounce because at unmap time we cannot execute any required cache maintenance operations (we don't know how to find the pfn from the mfn). No change of behaviour for x86. Signed-off-by: Stefano Stabellini Reviewed-by: David Vrabel Reviewed-by: Catalin Marinas Acked-by: Ian Campbell Acked-by: Konrad Rzeszutek Wilk --- arch/arm/include/asm/xen/page.h | 4 ++++ arch/arm/xen/mm.c | 7 +++++++ arch/x86/include/asm/xen/page.h | 7 +++++++ drivers/xen/swiotlb-xen.c | 5 ++++- 4 files changed, 22 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h index 135c24a5ba26..68c739b3fdf4 100644 --- a/arch/arm/include/asm/xen/page.h +++ b/arch/arm/include/asm/xen/page.h @@ -107,4 +107,8 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) #define xen_remap(cookie, size) ioremap_cache((cookie), (size)) #define xen_unmap(cookie) iounmap((cookie)) +bool xen_arch_need_swiotlb(struct device *dev, + unsigned long pfn, + unsigned long mfn); + #endif /* _ASM_ARM_XEN_PAGE_H */ diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index ab700e1e5922..28ebf3ecee4e 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -100,6 +100,13 @@ void __xen_dma_sync_single_for_device(struct device *hwdev, __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir); } +bool xen_arch_need_swiotlb(struct device *dev, + unsigned long pfn, + unsigned long mfn) +{ + return ((pfn != mfn) && !is_device_dma_coherent(dev)); +} + int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, dma_addr_t *dma_handle) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index c949923a5668..f58ef6c0613b 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -236,4 +236,11 @@ void make_lowmem_page_readwrite(void *vaddr); #define xen_remap(cookie, size) ioremap((cookie), (size)); #define xen_unmap(cookie) iounmap((cookie)) +static inline bool xen_arch_need_swiotlb(struct device *dev, + unsigned long pfn, + unsigned long mfn) +{ + return false; +} + #endif /* _ASM_X86_XEN_PAGE_H */ diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index ad2c5eb8a9c7..3725ee4ff43c 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -399,7 +399,9 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, * buffering it. */ if (dma_capable(dev, dev_addr, size) && - !range_straddles_page_boundary(phys, size) && !swiotlb_force) { + !range_straddles_page_boundary(phys, size) && + !xen_arch_need_swiotlb(dev, PFN_DOWN(phys), PFN_DOWN(dev_addr)) && + !swiotlb_force) { /* we are not interested in the dma_addr returned by * xen_dma_map_page, only in the potential cache flushes executed * by the function. */ @@ -557,6 +559,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, dma_addr_t dev_addr = xen_phys_to_bus(paddr); if (swiotlb_force || + xen_arch_need_swiotlb(hwdev, PFN_DOWN(paddr), PFN_DOWN(dev_addr)) || !dma_capable(hwdev, dev_addr, sg->length) || range_straddles_page_boundary(paddr, sg->length)) { phys_addr_t map = swiotlb_tbl_map_single(hwdev, -- cgit v1.2.3 From 066d79e4e2a41fbd0da62bd994f9b77ef8fa040a Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 2 Dec 2014 15:19:12 -0500 Subject: xen/pci: Defer initialization of MSI ops on HVM guests If the hardware supports APIC virtualization we may decide not to use pirqs and instead use APIC/x2APIC directly, meaning that we don't want to set x86_msi.setup_msi_irqs and x86_msi.teardown_msi_irq to Xen-specific routines. However, x2APIC is not set up by the time pci_xen_hvm_init() is called so we need to postpone setting these ops until later, when we know which APIC mode is used. (Note that currently x2APIC is never initialized on HVM guests. This may change in the future) Signed-off-by: Boris Ostrovsky Acked-by: Stefano Stabellini Signed-off-by: David Vrabel --- arch/x86/pci/xen.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 093f5f4272d3..1370716907e1 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -431,6 +431,14 @@ int __init pci_xen_init(void) return 0; } +#ifdef CONFIG_PCI_MSI +void __init xen_msi_init(void) +{ + x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs; + x86_msi.teardown_msi_irq = xen_teardown_msi_irq; +} +#endif + int __init pci_xen_hvm_init(void) { if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs)) @@ -445,8 +453,11 @@ int __init pci_xen_hvm_init(void) #endif #ifdef CONFIG_PCI_MSI - x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs; - x86_msi.teardown_msi_irq = xen_teardown_msi_irq; + /* + * We need to wait until after x2apic is initialized + * before we can set MSI IRQ ops. + */ + x86_platform.apic_post_init = xen_msi_init; #endif return 0; } -- cgit v1.2.3 From 14520c92cbd7619ae097a3087b140c992b5c270b Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 2 Dec 2014 15:19:13 -0500 Subject: xen/pci: Use APIC directly when APIC virtualization hardware is available When hardware supports APIC/x2APIC virtualization we don't need to use pirqs for MSI handling and instead use APIC since most APIC accesses (MMIO or MSR) will now be processed without VMEXITs. As an example, netperf on the original code produces this profile (collected wih 'xentrace -e 0x0008ffff -T 5'): 342 cpu_change 260 CPUID 34638 HLT 64067 INJ_VIRQ 28374 INTR 82733 INTR_WINDOW 10 NPF 24337 TRAP 370610 vlapic_accept_pic_intr 307528 VMENTRY 307527 VMEXIT 140998 VMMCALL 127 wrap_buffer After applying this patch the same test shows 230 cpu_change 260 CPUID 36542 HLT 174 INJ_VIRQ 27250 INTR 222 INTR_WINDOW 20 NPF 24999 TRAP 381812 vlapic_accept_pic_intr 166480 VMENTRY 166479 VMEXIT 77208 VMMCALL 81 wrap_buffer ApacheBench results (ab -n 10000 -c 200) improve by about 10% Signed-off-by: Boris Ostrovsky Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Andrew Cooper Signed-off-by: David Vrabel --- arch/x86/include/asm/xen/cpuid.h | 91 ++++++++++++++++++++++++++++++++++++++++ arch/x86/pci/xen.c | 16 +++++++ 2 files changed, 107 insertions(+) create mode 100644 arch/x86/include/asm/xen/cpuid.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h new file mode 100644 index 000000000000..0d809e9fc975 --- /dev/null +++ b/arch/x86/include/asm/xen/cpuid.h @@ -0,0 +1,91 @@ +/****************************************************************************** + * arch-x86/cpuid.h + * + * CPUID interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2007 Citrix Systems, Inc. + * + * Authors: + * Keir Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__ +#define __XEN_PUBLIC_ARCH_X86_CPUID_H__ + +/* + * For compatibility with other hypervisor interfaces, the Xen cpuid leaves + * can be found at the first otherwise unused 0x100 aligned boundary starting + * from 0x40000000. + * + * e.g If viridian extensions are enabled for an HVM domain, the Xen cpuid + * leaves will start at 0x40000100 + */ + +#define XEN_CPUID_FIRST_LEAF 0x40000000 +#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i)) + +/* + * Leaf 1 (0x40000x00) + * EAX: Largest Xen-information leaf. All leaves up to an including @EAX + * are supported by the Xen host. + * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification + * of a Xen host. + */ +#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */ +#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */ +#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */ + +/* + * Leaf 2 (0x40000x01) + * EAX[31:16]: Xen major version. + * EAX[15: 0]: Xen minor version. + * EBX-EDX: Reserved (currently all zeroes). + */ + +/* + * Leaf 3 (0x40000x02) + * EAX: Number of hypercall transfer pages. This register is always guaranteed + * to specify one hypercall page. + * EBX: Base address of Xen-specific MSRs. + * ECX: Features 1. Unused bits are set to zero. + * EDX: Features 2. Unused bits are set to zero. + */ + +/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */ +#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0 +#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0) + +/* + * Leaf 5 (0x40000x04) + * HVM-specific features + */ + +/* EAX Features */ +/* Virtualized APIC registers */ +#define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) +/* Virtualized x2APIC accesses */ +#define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) +/* Memory mapped from other domains has valid IOMMU entries */ +#define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2) + +#define XEN_CPUID_MAX_NUM_LEAVES 4 + +#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */ diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 1370716907e1..37914ef19c0a 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include static int xen_pcifront_enable_irq(struct pci_dev *dev) @@ -434,6 +436,20 @@ int __init pci_xen_init(void) #ifdef CONFIG_PCI_MSI void __init xen_msi_init(void) { + if (!disable_apic) { + /* + * If hardware supports (x2)APIC virtualization (as indicated + * by hypervisor's leaf 4) then we don't need to use pirqs/ + * event channels for MSI handling and instead use regular + * APIC processing + */ + uint32_t eax = cpuid_eax(xen_cpuid_base() + 4); + + if (((eax & XEN_HVM_CPUID_X2APIC_VIRT) && x2apic_mode) || + ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && cpu_has_apic)) + return; + } + x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs; x86_msi.teardown_msi_irq = xen_teardown_msi_irq; } -- cgit v1.2.3