From 063334f30543597430f172bd7690d21e3590e148 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Fri, 3 Feb 2017 16:57:22 -0500 Subject: xen/x86: Remove PVH support We are replacing existing PVH guests with new implementation. We are keeping xen_pvh_domain() macro (for now set to zero) because when we introduce new PVH implementation later in this series we will reuse current PVH-specific code (xen_pvh_gnttab_setup()), and that code is conditioned by 'if (xen_pvh_domain())'. (We will also need a noop xen_pvh_domain() for !CONFIG_XEN_PVH). Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross Reviewed-by: Konrad Rzeszutek Wilk --- include/xen/xen.h | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'include') diff --git a/include/xen/xen.h b/include/xen/xen.h index f0f0252cff9a..d0f96840f71f 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -29,17 +29,6 @@ extern enum xen_domain_type xen_domain_type; #define xen_initial_domain() (0) #endif /* CONFIG_XEN_DOM0 */ -#ifdef CONFIG_XEN_PVH -/* This functionality exists only for x86. The XEN_PVHVM support exists - * only in x86 world - hence on ARM it will be always disabled. - * N.B. ARM guests are neither PV nor HVM nor PVHVM. - * It's a bit like PVH but is different also (it's further towards the H - * end of the spectrum than even PVH). - */ -#include -#define xen_pvh_domain() (xen_pv_domain() && \ - xen_feature(XENFEAT_auto_translated_physmap)) -#else #define xen_pvh_domain() (0) -#endif + #endif /* _XEN_XEN_H */ -- cgit v1.2.3 From cee2cfb7d18db1d7bcd0952b5e0e3f19c004023c Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Fri, 3 Feb 2017 16:57:22 -0500 Subject: xen/pvh: Import PVH-related Xen public interfaces Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross Reviewed-by: Konrad Rzeszutek Wilk --- include/xen/interface/elfnote.h | 12 ++- include/xen/interface/hvm/hvm_vcpu.h | 143 +++++++++++++++++++++++++++++++++ include/xen/interface/hvm/start_info.h | 98 ++++++++++++++++++++++ 3 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 include/xen/interface/hvm/hvm_vcpu.h create mode 100644 include/xen/interface/hvm/start_info.h (limited to 'include') diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h index f90b03454659..9e9f9bf7c66d 100644 --- a/include/xen/interface/elfnote.h +++ b/include/xen/interface/elfnote.h @@ -192,10 +192,20 @@ */ #define XEN_ELFNOTE_SUPPORTED_FEATURES 17 +/* + * Physical entry point into the kernel. + * + * 32bit entry point into the kernel. When requested to launch the + * guest kernel in a HVM container, Xen will use this entry point to + * launch the guest in 32bit protected mode with paging disabled. + * Ignored otherwise. + */ +#define XEN_ELFNOTE_PHYS32_ENTRY 18 + /* * The number of the highest elfnote defined. */ -#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY #endif /* __XEN_PUBLIC_ELFNOTE_H__ */ diff --git a/include/xen/interface/hvm/hvm_vcpu.h b/include/xen/interface/hvm/hvm_vcpu.h new file mode 100644 index 000000000000..32ca83edd44d --- /dev/null +++ b/include/xen/interface/hvm/hvm_vcpu.h @@ -0,0 +1,143 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2015, Roger Pau Monne + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_VCPU_H__ +#define __XEN_PUBLIC_HVM_HVM_VCPU_H__ + +#include "../xen.h" + +struct vcpu_hvm_x86_32 { + uint32_t eax; + uint32_t ecx; + uint32_t edx; + uint32_t ebx; + uint32_t esp; + uint32_t ebp; + uint32_t esi; + uint32_t edi; + uint32_t eip; + uint32_t eflags; + + uint32_t cr0; + uint32_t cr3; + uint32_t cr4; + + uint32_t pad1; + + /* + * EFER should only be used to set the NXE bit (if required) + * when starting a vCPU in 32bit mode with paging enabled or + * to set the LME/LMA bits in order to start the vCPU in + * compatibility mode. + */ + uint64_t efer; + + uint32_t cs_base; + uint32_t ds_base; + uint32_t ss_base; + uint32_t es_base; + uint32_t tr_base; + uint32_t cs_limit; + uint32_t ds_limit; + uint32_t ss_limit; + uint32_t es_limit; + uint32_t tr_limit; + uint16_t cs_ar; + uint16_t ds_ar; + uint16_t ss_ar; + uint16_t es_ar; + uint16_t tr_ar; + + uint16_t pad2[3]; +}; + +/* + * The layout of the _ar fields of the segment registers is the + * following: + * + * Bits [0,3]: type (bits 40-43). + * Bit 4: s (descriptor type, bit 44). + * Bit [5,6]: dpl (descriptor privilege level, bits 45-46). + * Bit 7: p (segment-present, bit 47). + * Bit 8: avl (available for system software, bit 52). + * Bit 9: l (64-bit code segment, bit 53). + * Bit 10: db (meaning depends on the segment, bit 54). + * Bit 11: g (granularity, bit 55) + * Bits [12,15]: unused, must be blank. + * + * A more complete description of the meaning of this fields can be + * obtained from the Intel SDM, Volume 3, section 3.4.5. + */ + +struct vcpu_hvm_x86_64 { + uint64_t rax; + uint64_t rcx; + uint64_t rdx; + uint64_t rbx; + uint64_t rsp; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t rip; + uint64_t rflags; + + uint64_t cr0; + uint64_t cr3; + uint64_t cr4; + uint64_t efer; + + /* + * Using VCPU_HVM_MODE_64B implies that the vCPU is launched + * directly in long mode, so the cached parts of the segment + * registers get set to match that environment. + * + * If the user wants to launch the vCPU in compatibility mode + * the 32-bit structure should be used instead. + */ +}; + +struct vcpu_hvm_context { +#define VCPU_HVM_MODE_32B 0 /* 32bit fields of the structure will be used. */ +#define VCPU_HVM_MODE_64B 1 /* 64bit fields of the structure will be used. */ + uint32_t mode; + + uint32_t pad; + + /* CPU registers. */ + union { + struct vcpu_hvm_x86_32 x86_32; + struct vcpu_hvm_x86_64 x86_64; + } cpu_regs; +}; +typedef struct vcpu_hvm_context vcpu_hvm_context_t; + +#endif /* __XEN_PUBLIC_HVM_HVM_VCPU_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/interface/hvm/start_info.h b/include/xen/interface/hvm/start_info.h new file mode 100644 index 000000000000..648415976ead --- /dev/null +++ b/include/xen/interface/hvm/start_info.h @@ -0,0 +1,98 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2016, Citrix Systems, Inc. + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__ +#define __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__ + +/* + * Start of day structure passed to PVH guests and to HVM guests in %ebx. + * + * NOTE: nothing will be loaded at physical address 0, so a 0 value in any + * of the address fields should be treated as not present. + * + * 0 +----------------+ + * | magic | Contains the magic value XEN_HVM_START_MAGIC_VALUE + * | | ("xEn3" with the 0x80 bit of the "E" set). + * 4 +----------------+ + * | version | Version of this structure. Current version is 0. New + * | | versions are guaranteed to be backwards-compatible. + * 8 +----------------+ + * | flags | SIF_xxx flags. + * 12 +----------------+ + * | nr_modules | Number of modules passed to the kernel. + * 16 +----------------+ + * | modlist_paddr | Physical address of an array of modules + * | | (layout of the structure below). + * 24 +----------------+ + * | cmdline_paddr | Physical address of the command line, + * | | a zero-terminated ASCII string. + * 32 +----------------+ + * | rsdp_paddr | Physical address of the RSDP ACPI data structure. + * 40 +----------------+ + * + * The layout of each entry in the module structure is the following: + * + * 0 +----------------+ + * | paddr | Physical address of the module. + * 8 +----------------+ + * | size | Size of the module in bytes. + * 16 +----------------+ + * | cmdline_paddr | Physical address of the command line, + * | | a zero-terminated ASCII string. + * 24 +----------------+ + * | reserved | + * 32 +----------------+ + * + * The address and sizes are always a 64bit little endian unsigned integer. + * + * NB: Xen on x86 will always try to place all the data below the 4GiB + * boundary. + */ +#define XEN_HVM_START_MAGIC_VALUE 0x336ec578 + +/* + * C representation of the x86/HVM start info layout. + * + * The canonical definition of this layout is above, this is just a way to + * represent the layout described there using C types. + */ +struct hvm_start_info { + uint32_t magic; /* Contains the magic value 0x336ec578 */ + /* ("xEn3" with the 0x80 bit of the "E" set).*/ + uint32_t version; /* Version of this structure. */ + uint32_t flags; /* SIF_xxx flags. */ + uint32_t nr_modules; /* Number of modules passed to the kernel. */ + uint64_t modlist_paddr; /* Physical address of an array of */ + /* hvm_modlist_entry. */ + uint64_t cmdline_paddr; /* Physical address of the command line. */ + uint64_t rsdp_paddr; /* Physical address of the RSDP ACPI data */ + /* structure. */ +}; + +struct hvm_modlist_entry { + uint64_t paddr; /* Physical address of the module. */ + uint64_t size; /* Size of the module in bytes. */ + uint64_t cmdline_paddr; /* Physical address of the command line. */ + uint64_t reserved; +}; + +#endif /* __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__ */ -- cgit v1.2.3 From 7243b93345f7f8de260e8f5b4670803e64fcbb00 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Sun, 5 Feb 2017 19:50:52 -0500 Subject: xen/pvh: Bootstrap PVH guest Start PVH guest at XEN_ELFNOTE_PHYS32_ENTRY address. Setup hypercall page, initialize boot_params, enable early page tables. Since this stub is executed before kernel entry point we cannot use variables in .bss which is cleared by kernel. We explicitly place variables that are initialized here into .data. While adjusting xen_hvm_init_shared_info() make it use cpuid_e?x() instead of cpuid() (wherever possible). Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross --- arch/x86/xen/Kconfig | 2 +- arch/x86/xen/Makefile | 1 + arch/x86/xen/enlighten.c | 124 +++++++++++++++++++++++++++++++++--- arch/x86/xen/xen-pvh.S | 161 +++++++++++++++++++++++++++++++++++++++++++++++ include/xen/xen.h | 5 ++ 5 files changed, 282 insertions(+), 11 deletions(-) create mode 100644 arch/x86/xen/xen-pvh.S (limited to 'include') diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index c7b15f3e2cf3..76b6dbd627df 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -53,5 +53,5 @@ config XEN_DEBUG_FS config XEN_PVH bool "Support for running as a PVH guest" - depends on X86_64 && XEN && XEN_PVHVM + depends on XEN && XEN_PVHVM && ACPI def_bool n diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index e47e52787d32..cb0164aee156 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -23,3 +23,4 @@ obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o obj-$(CONFIG_XEN_DOM0) += vga.o obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o obj-$(CONFIG_XEN_EFI) += efi.o +obj-$(CONFIG_XEN_PVH) += xen-pvh.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 828f1b226f56..d2144f7c8fab 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -176,6 +177,20 @@ struct tls_descs { */ static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); +#ifdef CONFIG_XEN_PVH +/* + * PVH variables. + * + * xen_pvh and pvh_bootparams need to live in data segment since they + * are used after startup_{32|64}, which clear .bss, are invoked. + */ +bool xen_pvh __attribute__((section(".data"))) = 0; +struct boot_params pvh_bootparams __attribute__((section(".data"))); + +struct hvm_start_info pvh_start_info; +unsigned int pvh_start_info_sz = sizeof(pvh_start_info); +#endif + static void clamp_max_cpus(void) { #ifdef CONFIG_SMP @@ -1656,6 +1671,90 @@ asmlinkage __visible void __init xen_start_kernel(void) #endif } +#ifdef CONFIG_XEN_PVH +static void __init init_pvh_bootparams(void) +{ + struct xen_memory_map memmap; + unsigned int i; + int rc; + + memset(&pvh_bootparams, 0, sizeof(pvh_bootparams)); + + memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_map); + set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_map); + rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); + if (rc) { + xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc); + BUG(); + } + + if (memmap.nr_entries < E820MAX - 1) { + pvh_bootparams.e820_map[memmap.nr_entries].addr = + ISA_START_ADDRESS; + pvh_bootparams.e820_map[memmap.nr_entries].size = + ISA_END_ADDRESS - ISA_START_ADDRESS; + pvh_bootparams.e820_map[memmap.nr_entries].type = + E820_RESERVED; + memmap.nr_entries++; + } else + xen_raw_printk("Warning: Can fit ISA range into e820\n"); + + sanitize_e820_map(pvh_bootparams.e820_map, + ARRAY_SIZE(pvh_bootparams.e820_map), + &memmap.nr_entries); + + pvh_bootparams.e820_entries = memmap.nr_entries; + for (i = 0; i < pvh_bootparams.e820_entries; i++) + e820_add_region(pvh_bootparams.e820_map[i].addr, + pvh_bootparams.e820_map[i].size, + pvh_bootparams.e820_map[i].type); + + pvh_bootparams.hdr.cmd_line_ptr = + pvh_start_info.cmdline_paddr; + + /* The first module is always ramdisk. */ + if (pvh_start_info.nr_modules) { + struct hvm_modlist_entry *modaddr = + __va(pvh_start_info.modlist_paddr); + pvh_bootparams.hdr.ramdisk_image = modaddr->paddr; + pvh_bootparams.hdr.ramdisk_size = modaddr->size; + } + + /* + * See Documentation/x86/boot.txt. + * + * Version 2.12 supports Xen entry point but we will use default x86/PC + * environment (i.e. hardware_subarch 0). + */ + pvh_bootparams.hdr.version = 0x212; + pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */ +} + +/* + * This routine (and those that it might call) should not use + * anything that lives in .bss since that segment will be cleared later. + */ +void __init xen_prepare_pvh(void) +{ + u32 msr; + u64 pfn; + + if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) { + xen_raw_printk("Error: Unexpected magic value (0x%08x)\n", + pvh_start_info.magic); + BUG(); + } + + xen_pvh = 1; + + msr = cpuid_ebx(xen_cpuid_base() + 2); + pfn = __pa(hypercall_page); + wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + + init_pvh_bootparams(); +} +#endif + void __ref xen_hvm_init_shared_info(void) { int cpu; @@ -1695,20 +1794,29 @@ void __ref xen_hvm_init_shared_info(void) static void __init init_hvm_pv_info(void) { int major, minor; - uint32_t eax, ebx, ecx, edx, pages, msr, base; - u64 pfn; + uint32_t eax, ebx, ecx, edx, base; base = xen_cpuid_base(); - cpuid(base + 1, &eax, &ebx, &ecx, &edx); + eax = cpuid_eax(base + 1); major = eax >> 16; minor = eax & 0xffff; printk(KERN_INFO "Xen version %d.%d.\n", major, minor); - cpuid(base + 2, &pages, &msr, &ecx, &edx); + xen_domain_type = XEN_HVM_DOMAIN; - pfn = __pa(hypercall_page); - wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + /* PVH set up hypercall page in xen_prepare_pvh(). */ + if (xen_pvh_domain()) + pv_info.name = "Xen PVH"; + else { + u64 pfn; + uint32_t msr; + + pv_info.name = "Xen HVM"; + msr = cpuid_ebx(base + 2); + pfn = __pa(hypercall_page); + wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + } xen_setup_features(); @@ -1717,10 +1825,6 @@ static void __init init_hvm_pv_info(void) this_cpu_write(xen_vcpu_id, ebx); else this_cpu_write(xen_vcpu_id, smp_processor_id()); - - pv_info.name = "Xen HVM"; - - xen_domain_type = XEN_HVM_DOMAIN; } #endif diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S new file mode 100644 index 000000000000..5e246716d58f --- /dev/null +++ b/arch/x86/xen/xen-pvh.S @@ -0,0 +1,161 @@ +/* + * Copyright C 2016, Oracle and/or its affiliates. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + */ + + .code32 + .text +#define _pa(x) ((x) - __START_KERNEL_map) + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + __HEAD + +/* + * Entry point for PVH guests. + * + * Xen ABI specifies the following register state when we come here: + * + * - `ebx`: contains the physical memory address where the loader has placed + * the boot start info structure. + * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared. + * - `cr4`: all bits are cleared. + * - `cs `: must be a 32-bit read/execute code segment with a base of ‘0’ + * and a limit of ‘0xFFFFFFFF’. The selector value is unspecified. + * - `ds`, `es`: must be a 32-bit read/write data segment with a base of + * ‘0’ and a limit of ‘0xFFFFFFFF’. The selector values are all + * unspecified. + * - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit + * of '0x67'. + * - `eflags`: bit 17 (VM) must be cleared. Bit 9 (IF) must be cleared. + * Bit 8 (TF) must be cleared. Other bits are all unspecified. + * + * All other processor registers and flag bits are unspecified. The OS is in + * charge of setting up it's own stack, GDT and IDT. + */ + +ENTRY(pvh_start_xen) + cld + + lgdt (_pa(gdt)) + + mov $(__BOOT_DS),%eax + mov %eax,%ds + mov %eax,%es + mov %eax,%ss + + /* Stash hvm_start_info. */ + mov $_pa(pvh_start_info), %edi + mov %ebx, %esi + mov _pa(pvh_start_info_sz), %ecx + shr $2,%ecx + rep + movsl + + mov $_pa(early_stack_end), %esp + + /* Enable PAE mode. */ + mov %cr4, %eax + orl $X86_CR4_PAE, %eax + mov %eax, %cr4 + +#ifdef CONFIG_X86_64 + /* Enable Long mode. */ + mov $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + + /* Enable pre-constructed page tables. */ + mov $_pa(init_level4_pgt), %eax + mov %eax, %cr3 + mov $(X86_CR0_PG | X86_CR0_PE), %eax + mov %eax, %cr0 + + /* Jump to 64-bit mode. */ + ljmp $__KERNEL_CS, $_pa(1f) + + /* 64-bit entry point. */ + .code64 +1: + call xen_prepare_pvh + + /* startup_64 expects boot_params in %rsi. */ + mov $_pa(pvh_bootparams), %rsi + mov $_pa(startup_64), %rax + jmp *%rax + +#else /* CONFIG_X86_64 */ + + call mk_early_pgtbl_32 + + mov $_pa(initial_page_table), %eax + mov %eax, %cr3 + + mov %cr0, %eax + or $(X86_CR0_PG | X86_CR0_PE), %eax + mov %eax, %cr0 + + ljmp $__BOOT_CS, $1f +1: + call xen_prepare_pvh + mov $_pa(pvh_bootparams), %esi + + /* startup_32 doesn't expect paging and PAE to be on. */ + ljmp $__BOOT_CS, $_pa(2f) +2: + mov %cr0, %eax + and $~X86_CR0_PG, %eax + mov %eax, %cr0 + mov %cr4, %eax + and $~X86_CR4_PAE, %eax + mov %eax, %cr4 + + ljmp $__BOOT_CS, $_pa(startup_32) +#endif +END(pvh_start_xen) + + .section ".init.data","aw" + .balign 8 +gdt: + .word gdt_end - gdt_start + .long _pa(gdt_start) + .word 0 +gdt_start: + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x0000000000000000 /* reserved */ +#ifdef CONFIG_X86_64 + .quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* __KERNEL_CS */ +#else + .quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* __KERNEL_CS */ +#endif + .quad GDT_ENTRY(0xc092, 0, 0xfffff) /* __KERNEL_DS */ +gdt_end: + + .balign 4 +early_stack: + .fill 256, 1, 0 +early_stack_end: + + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, + _ASM_PTR (pvh_start_xen - __START_KERNEL_map)) diff --git a/include/xen/xen.h b/include/xen/xen.h index d0f96840f71f..6e8b7fc79801 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -29,6 +29,11 @@ extern enum xen_domain_type xen_domain_type; #define xen_initial_domain() (0) #endif /* CONFIG_XEN_DOM0 */ +#ifdef CONFIG_XEN_PVH +extern bool xen_pvh; +#define xen_pvh_domain() (xen_hvm_domain() && xen_pvh) +#else #define xen_pvh_domain() (0) +#endif #endif /* _XEN_XEN_H */ -- cgit v1.2.3 From 332f791dc98d98116f4473b726f67c9321b0f31e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 9 Feb 2017 14:39:56 +0100 Subject: xen: clean up xenbus internal headers The xenbus driver has an awful mixture of internally and globally visible headers: some of the internally used only stuff is defined in the global header include/xen/xenbus.h while some stuff defined in internal headers is used by other drivers, too. Clean this up by moving the externally used symbols to include/xen/xenbus.h and the symbols used internally only to a new header drivers/xen/xenbus/xenbus.h replacing xenbus_comms.h and xenbus_probe.h Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/xenbus/xenbus.h | 97 ++++++++++++++++++++++++++++++ drivers/xen/xenbus/xenbus_client.c | 2 +- drivers/xen/xenbus/xenbus_comms.c | 2 +- drivers/xen/xenbus/xenbus_comms.h | 51 ---------------- drivers/xen/xenbus/xenbus_dev_backend.c | 2 +- drivers/xen/xenbus/xenbus_dev_frontend.c | 4 +- drivers/xen/xenbus/xenbus_probe.c | 3 +- drivers/xen/xenbus/xenbus_probe.h | 88 --------------------------- drivers/xen/xenbus/xenbus_probe_backend.c | 3 +- drivers/xen/xenbus/xenbus_probe_frontend.c | 3 +- drivers/xen/xenbus/xenbus_xs.c | 3 +- drivers/xen/xenfs/super.c | 2 +- drivers/xen/xenfs/xenstored.c | 2 +- include/xen/xenbus.h | 12 ++-- 14 files changed, 113 insertions(+), 161 deletions(-) create mode 100644 drivers/xen/xenbus/xenbus.h delete mode 100644 drivers/xen/xenbus/xenbus_comms.h delete mode 100644 drivers/xen/xenbus/xenbus_probe.h (limited to 'include') diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h new file mode 100644 index 000000000000..a6b007dfdaa8 --- /dev/null +++ b/drivers/xen/xenbus/xenbus.h @@ -0,0 +1,97 @@ +/* + * Private include for xenbus communications. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 XenSource Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _XENBUS_XENBUS_H +#define _XENBUS_XENBUS_H + +#define XEN_BUS_ID_SIZE 20 + +struct xen_bus_type { + char *root; + unsigned int levels; + int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); + int (*probe)(struct xen_bus_type *bus, const char *type, + const char *dir); + void (*otherend_changed)(struct xenbus_watch *watch, const char **vec, + unsigned int len); + struct bus_type bus; +}; + +enum xenstore_init { + XS_UNKNOWN, + XS_PV, + XS_HVM, + XS_LOCAL, +}; + +extern enum xenstore_init xen_store_domain_type; +extern const struct attribute_group *xenbus_dev_groups[]; + +int xs_init(void); +int xb_init_comms(void); +void xb_deinit_comms(void); +int xb_write(const void *data, unsigned int len); +int xb_read(void *data, unsigned int len); +int xb_data_to_read(void); +int xb_wait_for_data_to_read(void); + +int xenbus_match(struct device *_dev, struct device_driver *_drv); +int xenbus_dev_probe(struct device *_dev); +int xenbus_dev_remove(struct device *_dev); +int xenbus_register_driver_common(struct xenbus_driver *drv, + struct xen_bus_type *bus, + struct module *owner, + const char *mod_name); +int xenbus_probe_node(struct xen_bus_type *bus, + const char *type, + const char *nodename); +int xenbus_probe_devices(struct xen_bus_type *bus); + +void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); + +void xenbus_dev_shutdown(struct device *_dev); + +int xenbus_dev_suspend(struct device *dev); +int xenbus_dev_resume(struct device *dev); +int xenbus_dev_cancel(struct device *dev); + +void xenbus_otherend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len, + int ignore_on_shutdown); + +int xenbus_read_otherend_details(struct xenbus_device *xendev, + char *id_node, char *path_node); + +void xenbus_ring_ops_init(void); + +void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg); + +#endif diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 915d77785193..29f82338ab75 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -47,7 +47,7 @@ #include #include -#include "xenbus_probe.h" +#include "xenbus.h" #define XENBUS_PAGES(_grants) (DIV_ROUND_UP(_grants, XEN_PFN_PER_PAGE)) diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index ecdecce80a6c..c21ec02643e1 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -40,7 +40,7 @@ #include #include #include -#include "xenbus_comms.h" +#include "xenbus.h" static int xenbus_irq; diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h deleted file mode 100644 index 867a2e425208..000000000000 --- a/drivers/xen/xenbus/xenbus_comms.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Private include for xenbus communications. - * - * Copyright (C) 2005 Rusty Russell, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef _XENBUS_COMMS_H -#define _XENBUS_COMMS_H - -#include - -int xs_init(void); -int xb_init_comms(void); -void xb_deinit_comms(void); - -/* Low level routines. */ -int xb_write(const void *data, unsigned len); -int xb_read(void *data, unsigned len); -int xb_data_to_read(void); -int xb_wait_for_data_to_read(void); -extern struct xenstore_domain_interface *xen_store_interface; -extern int xen_store_evtchn; -extern enum xenstore_init xen_store_domain_type; - -extern const struct file_operations xen_xenbus_fops; - -#endif /* _XENBUS_COMMS_H */ diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c index 4a41ac9af966..1126701e212e 100644 --- a/drivers/xen/xenbus/xenbus_dev_backend.c +++ b/drivers/xen/xenbus/xenbus_dev_backend.c @@ -16,7 +16,7 @@ #include #include -#include "xenbus_comms.h" +#include "xenbus.h" static int xenbus_backend_open(struct inode *inode, struct file *filp) { diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 79130b310247..e2bc9b301494 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -57,12 +57,12 @@ #include #include -#include "xenbus_comms.h" - #include #include #include +#include "xenbus.h" + /* * An element of a list of outstanding transactions, for which we're * still waiting a reply. diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 4bdf654041e9..6baffbb6acf9 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -62,8 +62,7 @@ #include -#include "xenbus_comms.h" -#include "xenbus_probe.h" +#include "xenbus.h" int xen_store_evtchn; diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h deleted file mode 100644 index c9ec7ca1f7ab..000000000000 --- a/drivers/xen/xenbus/xenbus_probe.h +++ /dev/null @@ -1,88 +0,0 @@ -/****************************************************************************** - * xenbus_probe.h - * - * Talks to Xen Store to figure out what devices we have. - * - * Copyright (C) 2005 Rusty Russell, IBM Corporation - * Copyright (C) 2005 XenSource Ltd. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef _XENBUS_PROBE_H -#define _XENBUS_PROBE_H - -#define XEN_BUS_ID_SIZE 20 - -struct xen_bus_type { - char *root; - unsigned int levels; - int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); - int (*probe)(struct xen_bus_type *bus, const char *type, - const char *dir); - void (*otherend_changed)(struct xenbus_watch *watch, const char **vec, - unsigned int len); - struct bus_type bus; -}; - -enum xenstore_init { - XS_UNKNOWN, - XS_PV, - XS_HVM, - XS_LOCAL, -}; - -extern const struct attribute_group *xenbus_dev_groups[]; - -extern int xenbus_match(struct device *_dev, struct device_driver *_drv); -extern int xenbus_dev_probe(struct device *_dev); -extern int xenbus_dev_remove(struct device *_dev); -extern int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus, - struct module *owner, - const char *mod_name); -extern int xenbus_probe_node(struct xen_bus_type *bus, - const char *type, - const char *nodename); -extern int xenbus_probe_devices(struct xen_bus_type *bus); - -extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); - -extern void xenbus_dev_shutdown(struct device *_dev); - -extern int xenbus_dev_suspend(struct device *dev); -extern int xenbus_dev_resume(struct device *dev); -extern int xenbus_dev_cancel(struct device *dev); - -extern void xenbus_otherend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len, - int ignore_on_shutdown); - -extern int xenbus_read_otherend_details(struct xenbus_device *xendev, - char *id_node, char *path_node); - -void xenbus_ring_ops_init(void); - -#endif diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 37929df829a3..f46b4dc72c76 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -53,8 +53,7 @@ #include #include -#include "xenbus_comms.h" -#include "xenbus_probe.h" +#include "xenbus.h" /* backend/// => -- */ static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index 6d40a972ffb2..d7b77a62e6e7 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -27,8 +27,7 @@ #include -#include "xenbus_comms.h" -#include "xenbus_probe.h" +#include "xenbus.h" diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 6afb993c5809..4c49d8709765 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -48,8 +48,7 @@ #include #include #include -#include "xenbus_comms.h" -#include "xenbus_probe.h" +#include "xenbus.h" struct xs_stored_msg { struct list_head list; diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 8559a71f36b1..328c3987b112 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -16,10 +16,10 @@ #include #include +#include #include "xenfs.h" #include "../privcmd.h" -#include "../xenbus/xenbus_comms.h" #include diff --git a/drivers/xen/xenfs/xenstored.c b/drivers/xen/xenfs/xenstored.c index fef20dbc6a5c..82fd2a396d96 100644 --- a/drivers/xen/xenfs/xenstored.c +++ b/drivers/xen/xenfs/xenstored.c @@ -4,9 +4,9 @@ #include #include +#include #include "xenfs.h" -#include "../xenbus/xenbus_comms.h" static ssize_t xsd_read(struct file *file, char __user *buf, size_t size, loff_t *off) diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 271ba62503c7..98f73a20725c 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -175,16 +176,9 @@ void xs_suspend(void); void xs_resume(void); void xs_suspend_cancel(void); -/* Used by xenbus_dev to borrow kernel's store connection. */ -void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg); - struct work_struct; -/* Prepare for domain suspend: then resume or cancel the suspend. */ -void xenbus_suspend(void); -void xenbus_resume(void); void xenbus_probe(struct work_struct *); -void xenbus_suspend_cancel(void); #define XENBUS_IS_ERR_READ(str) ({ \ if (!IS_ERR(str) && strlen(str) == 0) { \ @@ -235,4 +229,8 @@ const char *xenbus_strstate(enum xenbus_state state); int xenbus_dev_is_online(struct xenbus_device *dev); int xenbus_frontend_closed(struct xenbus_device *dev); +extern const struct file_operations xen_xenbus_fops; +extern struct xenstore_domain_interface *xen_store_interface; +extern int xen_store_evtchn; + #endif /* _XEN_XENBUS_H */ -- cgit v1.2.3 From 5584ea250ae44f929feb4c7bd3877d1c5edbf813 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 9 Feb 2017 14:39:57 +0100 Subject: xen: modify xenstore watch event interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Today a Xenstore watch event is delivered via a callback function declared as: void (*callback)(struct xenbus_watch *, const char **vec, unsigned int len); As all watch events only ever come with two parameters (path and token) changing the prototype to: void (*callback)(struct xenbus_watch *, const char *path, const char *token); is the natural thing to do. Apply this change and adapt all users. Cc: konrad.wilk@oracle.com Cc: roger.pau@citrix.com Cc: wei.liu2@citrix.com Cc: paul.durrant@citrix.com Cc: netdev@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Paul Durrant Reviewed-by: Wei Liu Reviewed-by: Roger Pau Monné Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/block/xen-blkback/xenbus.c | 6 +++--- drivers/net/xen-netback/xenbus.c | 8 ++++---- drivers/xen/cpu_hotplug.c | 5 ++--- drivers/xen/manage.c | 6 +++--- drivers/xen/xen-balloon.c | 2 +- drivers/xen/xen-pciback/xenbus.c | 2 +- drivers/xen/xenbus/xenbus.h | 6 +++--- drivers/xen/xenbus/xenbus_client.c | 4 ++-- drivers/xen/xenbus/xenbus_dev_frontend.c | 21 ++++++++------------- drivers/xen/xenbus/xenbus_probe.c | 11 ++++------- drivers/xen/xenbus/xenbus_probe_backend.c | 8 ++++---- drivers/xen/xenbus/xenbus_probe_frontend.c | 14 +++++++------- drivers/xen/xenbus/xenbus_xs.c | 29 ++++++++++++++--------------- include/xen/xenbus.h | 6 +++--- 14 files changed, 59 insertions(+), 69 deletions(-) (limited to 'include') diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 415e79b69d34..8fe61b5dc5a6 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -38,8 +38,8 @@ struct backend_info { static struct kmem_cache *xen_blkif_cachep; static void connect(struct backend_info *); static int connect_ring(struct backend_info *); -static void backend_changed(struct xenbus_watch *, const char **, - unsigned int); +static void backend_changed(struct xenbus_watch *, const char *, + const char *); static void xen_blkif_free(struct xen_blkif *blkif); static void xen_vbd_free(struct xen_vbd *vbd); @@ -661,7 +661,7 @@ fail: * ready, connect. */ static void backend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) + const char *path, const char *token) { int err; unsigned major; diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 85b742e1c42f..bb854f92f5a5 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -734,7 +734,7 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) } static void xen_net_rate_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) + const char *path, const char *token) { struct xenvif *vif = container_of(watch, struct xenvif, credit_watch); struct xenbus_device *dev = xenvif_to_xenbus_device(vif); @@ -791,7 +791,7 @@ static void xen_unregister_credit_watch(struct xenvif *vif) } static void xen_mcast_ctrl_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) + const char *path, const char *token) { struct xenvif *vif = container_of(watch, struct xenvif, mcast_ctrl_watch); @@ -866,8 +866,8 @@ static void unregister_hotplug_status_watch(struct backend_info *be) } static void hotplug_status_changed(struct xenbus_watch *watch, - const char **vec, - unsigned int vec_size) + const char *path, + const char *token) { struct backend_info *be = container_of(watch, struct backend_info, diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c index 0bab60a37464..0003912a8111 100644 --- a/drivers/xen/cpu_hotplug.c +++ b/drivers/xen/cpu_hotplug.c @@ -68,13 +68,12 @@ static void vcpu_hotplug(unsigned int cpu) } static void handle_vcpu_hotplug_event(struct xenbus_watch *watch, - const char **vec, unsigned int len) + const char *path, const char *token) { unsigned int cpu; char *cpustr; - const char *node = vec[XS_WATCH_PATH]; - cpustr = strstr(node, "cpu/"); + cpustr = strstr(path, "cpu/"); if (cpustr != NULL) { sscanf(cpustr, "cpu/%u", &cpu); vcpu_hotplug(cpu); diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 357a8db859c9..c1ec8ee80924 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -218,7 +218,7 @@ static struct shutdown_handler shutdown_handlers[] = { }; static void shutdown_handler(struct xenbus_watch *watch, - const char **vec, unsigned int len) + const char *path, const char *token) { char *str; struct xenbus_transaction xbt; @@ -266,8 +266,8 @@ static void shutdown_handler(struct xenbus_watch *watch, } #ifdef CONFIG_MAGIC_SYSRQ -static void sysrq_handler(struct xenbus_watch *watch, const char **vec, - unsigned int len) +static void sysrq_handler(struct xenbus_watch *watch, const char *path, + const char *token) { char sysrq_key = '\0'; struct xenbus_transaction xbt; diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index 79865b8901ba..e7715cb62eef 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -55,7 +55,7 @@ static int register_balloon(struct device *dev); /* React to a change in the target key */ static void watch_target(struct xenbus_watch *watch, - const char **vec, unsigned int len) + const char *path, const char *token) { unsigned long long new_target; int err; diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 3f0aee0a068b..3814b44bf1f7 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -652,7 +652,7 @@ out: } static void xen_pcibk_be_watch(struct xenbus_watch *watch, - const char **vec, unsigned int len) + const char *path, const char *token) { struct xen_pcibk_device *pdev = container_of(watch, struct xen_pcibk_device, be_watch); diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index a6b007dfdaa8..51995276f549 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -40,8 +40,8 @@ struct xen_bus_type { int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); int (*probe)(struct xen_bus_type *bus, const char *type, const char *dir); - void (*otherend_changed)(struct xenbus_watch *watch, const char **vec, - unsigned int len); + void (*otherend_changed)(struct xenbus_watch *watch, const char *path, + const char *token); struct bus_type bus; }; @@ -84,7 +84,7 @@ int xenbus_dev_resume(struct device *dev); int xenbus_dev_cancel(struct device *dev); void xenbus_otherend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len, + const char *path, const char *token, int ignore_on_shutdown); int xenbus_read_otherend_details(struct xenbus_device *xendev, diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 29f82338ab75..82a8866758ee 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -115,7 +115,7 @@ EXPORT_SYMBOL_GPL(xenbus_strstate); int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, - const char **, unsigned int)) + const char *, const char *)) { int err; @@ -153,7 +153,7 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path); int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, - const char **, unsigned int), + const char *, const char *), const char *pathfmt, ...) { int err; diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index e2bc9b301494..e4b984777507 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -258,26 +258,23 @@ out_fail: } static void watch_fired(struct xenbus_watch *watch, - const char **vec, - unsigned int len) + const char *path, + const char *token) { struct watch_adapter *adap; struct xsd_sockmsg hdr; - const char *path, *token; - int path_len, tok_len, body_len, data_len = 0; + const char *token_caller; + int path_len, tok_len, body_len; int ret; LIST_HEAD(staging_q); adap = container_of(watch, struct watch_adapter, watch); - path = vec[XS_WATCH_PATH]; - token = adap->token; + token_caller = adap->token; path_len = strlen(path) + 1; - tok_len = strlen(token) + 1; - if (len > 2) - data_len = vec[len] - vec[2] + 1; - body_len = path_len + tok_len + data_len; + tok_len = strlen(token_caller) + 1; + body_len = path_len + tok_len; hdr.type = XS_WATCH_EVENT; hdr.len = body_len; @@ -288,9 +285,7 @@ static void watch_fired(struct xenbus_watch *watch, if (!ret) ret = queue_reply(&staging_q, path, path_len); if (!ret) - ret = queue_reply(&staging_q, token, tok_len); - if (!ret && len > 2) - ret = queue_reply(&staging_q, vec[2], data_len); + ret = queue_reply(&staging_q, token_caller, tok_len); if (!ret) { /* success: pass reply list onto watcher */ diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 6baffbb6acf9..74888cacd0b0 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -169,7 +169,7 @@ int xenbus_read_otherend_details(struct xenbus_device *xendev, EXPORT_SYMBOL_GPL(xenbus_read_otherend_details); void xenbus_otherend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len, + const char *path, const char *token, int ignore_on_shutdown) { struct xenbus_device *dev = @@ -180,18 +180,15 @@ void xenbus_otherend_changed(struct xenbus_watch *watch, /* Protect us against watches firing on old details when the otherend details change, say immediately after a resume. */ if (!dev->otherend || - strncmp(dev->otherend, vec[XS_WATCH_PATH], - strlen(dev->otherend))) { - dev_dbg(&dev->dev, "Ignoring watch at %s\n", - vec[XS_WATCH_PATH]); + strncmp(dev->otherend, path, strlen(dev->otherend))) { + dev_dbg(&dev->dev, "Ignoring watch at %s\n", path); return; } state = xenbus_read_driver_state(dev->otherend); dev_dbg(&dev->dev, "state is %d, (%s), %s, %s\n", - state, xenbus_strstate(state), dev->otherend_watch.node, - vec[XS_WATCH_PATH]); + state, xenbus_strstate(state), dev->otherend_watch.node, path); /* * Ignore xenbus transitions during shutdown. This prevents us doing diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index f46b4dc72c76..b0bed4faf44c 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -181,9 +181,9 @@ static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, } static void frontend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) + const char *path, const char *token) { - xenbus_otherend_changed(watch, vec, len, 0); + xenbus_otherend_changed(watch, path, token, 0); } static struct xen_bus_type xenbus_backend = { @@ -204,11 +204,11 @@ static struct xen_bus_type xenbus_backend = { }; static void backend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) + const char *path, const char *token) { DPRINTK(""); - xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); + xenbus_dev_changed(path, &xenbus_backend); } static struct xenbus_watch be_watch = { diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index d7b77a62e6e7..19e45ce21f89 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -86,9 +86,9 @@ static int xenbus_uevent_frontend(struct device *_dev, static void backend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) + const char *path, const char *token) { - xenbus_otherend_changed(watch, vec, len, 1); + xenbus_otherend_changed(watch, path, token, 1); } static void xenbus_frontend_delayed_resume(struct work_struct *w) @@ -153,11 +153,11 @@ static struct xen_bus_type xenbus_frontend = { }; static void frontend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) + const char *path, const char *token) { DPRINTK(""); - xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); + xenbus_dev_changed(path, &xenbus_frontend); } @@ -332,13 +332,13 @@ static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); static int backend_state; static void xenbus_reset_backend_state_changed(struct xenbus_watch *w, - const char **v, unsigned int l) + const char *path, const char *token) { - if (xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", + if (xenbus_scanf(XBT_NIL, path, "", "%i", &backend_state) != 1) backend_state = XenbusStateUnknown; printk(KERN_DEBUG "XENBUS: backend %s %s\n", - v[XS_WATCH_PATH], xenbus_strstate(backend_state)); + path, xenbus_strstate(backend_state)); wake_up(&backend_state_wq); } diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 4c49d8709765..ebc768f44abe 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -64,8 +64,8 @@ struct xs_stored_msg { /* Queued watch events. */ struct { struct xenbus_watch *handle; - char **vec; - unsigned int vec_size; + const char *path; + const char *token; } watch; } u; }; @@ -765,7 +765,7 @@ void unregister_xenbus_watch(struct xenbus_watch *watch) if (msg->u.watch.handle != watch) continue; list_del(&msg->list); - kfree(msg->u.watch.vec); + kfree(msg->u.watch.path); kfree(msg); } spin_unlock(&watch_events_lock); @@ -833,11 +833,10 @@ static int xenwatch_thread(void *unused) if (ent != &watch_events) { msg = list_entry(ent, struct xs_stored_msg, list); - msg->u.watch.handle->callback( - msg->u.watch.handle, - (const char **)msg->u.watch.vec, - msg->u.watch.vec_size); - kfree(msg->u.watch.vec); + msg->u.watch.handle->callback(msg->u.watch.handle, + msg->u.watch.path, + msg->u.watch.token); + kfree(msg->u.watch.path); kfree(msg); } @@ -903,24 +902,24 @@ static int process_msg(void) body[msg->hdr.len] = '\0'; if (msg->hdr.type == XS_WATCH_EVENT) { - msg->u.watch.vec = split(body, msg->hdr.len, - &msg->u.watch.vec_size); - if (IS_ERR(msg->u.watch.vec)) { - err = PTR_ERR(msg->u.watch.vec); + if (count_strings(body, msg->hdr.len) != 2) { + err = -EINVAL; kfree(msg); + kfree(body); goto out; } + msg->u.watch.path = (const char *)body; + msg->u.watch.token = (const char *)strchr(body, '\0') + 1; spin_lock(&watches_lock); - msg->u.watch.handle = find_watch( - msg->u.watch.vec[XS_WATCH_TOKEN]); + msg->u.watch.handle = find_watch(msg->u.watch.token); if (msg->u.watch.handle != NULL) { spin_lock(&watch_events_lock); list_add_tail(&msg->list, &watch_events); wake_up(&watch_events_waitq); spin_unlock(&watch_events_lock); } else { - kfree(msg->u.watch.vec); + kfree(body); kfree(msg); } spin_unlock(&watches_lock); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 98f73a20725c..869c816d5f8c 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -61,7 +61,7 @@ struct xenbus_watch /* Callback (executed in a process context with no locks held). */ void (*callback)(struct xenbus_watch *, - const char **vec, unsigned int len); + const char *path, const char *token); }; @@ -193,11 +193,11 @@ void xenbus_probe(struct work_struct *); int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, - const char **, unsigned int)); + const char *, const char *)); __printf(4, 5) int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, - const char **, unsigned int), + const char *, const char *), const char *pathfmt, ...); int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); -- cgit v1.2.3 From ab520be8cd5d56867fc95cfbc34b90880faf1f9d Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Mon, 13 Feb 2017 17:03:23 +0000 Subject: xen/privcmd: Add IOCTL_PRIVCMD_DM_OP Recently a new dm_op[1] hypercall was added to Xen to provide a mechanism for restricting device emulators (such as QEMU) to a limited set of hypervisor operations, and being able to audit those operations in the kernel of the domain in which they run. This patch adds IOCTL_PRIVCMD_DM_OP as gateway for __HYPERVISOR_dm_op. NOTE: There is no requirement for user-space code to bounce data through locked memory buffers (as with IOCTL_PRIVCMD_HYPERCALL) since privcmd has enough information to lock the original buffers directly. [1] http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=524a98c2 Signed-off-by: Paul Durrant Acked-by: Stefano Stabellini Signed-off-by: Boris Ostrovsky --- arch/arm/xen/enlighten.c | 1 + arch/arm/xen/hypercall.S | 1 + arch/arm64/xen/hypercall.S | 1 + arch/x86/include/asm/xen/hypercall.h | 7 ++ drivers/xen/privcmd.c | 139 +++++++++++++++++++++++++++++++++++ include/uapi/xen/privcmd.h | 13 ++++ include/xen/arm/hypercall.h | 1 + include/xen/interface/hvm/dm_op.h | 32 ++++++++ include/xen/interface/xen.h | 1 + 9 files changed, 196 insertions(+) create mode 100644 include/xen/interface/hvm/dm_op.h (limited to 'include') diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 11d9f2898b16..81e3217b12d3 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -457,4 +457,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op); EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist); +EXPORT_SYMBOL_GPL(HYPERVISOR_dm_op); EXPORT_SYMBOL_GPL(privcmd_call); diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index a648dfc3be30..b0b80c0f09f3 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -92,6 +92,7 @@ HYPERCALL1(tmem_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); HYPERCALL2(vm_assist); +HYPERCALL3(dm_op); ENTRY(privcmd_call) stmdb sp!, {r4} diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 947830a459d2..401ceb71540c 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -84,6 +84,7 @@ HYPERCALL1(tmem_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); HYPERCALL2(vm_assist); +HYPERCALL3(dm_op); ENTRY(privcmd_call) mov x16, x0 diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index a12a047184ee..f6d20f6cca12 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -472,6 +472,13 @@ HYPERVISOR_xenpmu_op(unsigned int op, void *arg) return _hypercall2(int, xenpmu_op, op, arg); } +static inline int +HYPERVISOR_dm_op( + domid_t dom, unsigned int nr_bufs, void *bufs) +{ + return _hypercall3(int, dm_op, dom, nr_bufs, bufs); +} + static inline void MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) { diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 5e5c7aef0c9f..1a6f1860e008 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +45,17 @@ MODULE_LICENSE("GPL"); #define PRIV_VMA_LOCKED ((void *)1) +static unsigned int privcmd_dm_op_max_num = 16; +module_param_named(dm_op_max_nr_bufs, privcmd_dm_op_max_num, uint, 0644); +MODULE_PARM_DESC(dm_op_max_nr_bufs, + "Maximum number of buffers per dm_op hypercall"); + +static unsigned int privcmd_dm_op_buf_max_size = 4096; +module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint, + 0644); +MODULE_PARM_DESC(dm_op_buf_max_size, + "Maximum size of a dm_op hypercall buffer"); + static int privcmd_vma_range_is_mapped( struct vm_area_struct *vma, unsigned long addr, @@ -548,6 +561,128 @@ out_unlock: goto out; } +static int lock_pages( + struct privcmd_dm_op_buf kbufs[], unsigned int num, + struct page *pages[], unsigned int nr_pages) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + unsigned int requested; + int pinned; + + requested = DIV_ROUND_UP( + offset_in_page(kbufs[i].uptr) + kbufs[i].size, + PAGE_SIZE); + if (requested > nr_pages) + return -ENOSPC; + + pinned = get_user_pages_fast( + (unsigned long) kbufs[i].uptr, + requested, FOLL_WRITE, pages); + if (pinned < 0) + return pinned; + + nr_pages -= pinned; + pages += pinned; + } + + return 0; +} + +static void unlock_pages(struct page *pages[], unsigned int nr_pages) +{ + unsigned int i; + + if (!pages) + return; + + for (i = 0; i < nr_pages; i++) { + if (pages[i]) + put_page(pages[i]); + } +} + +static long privcmd_ioctl_dm_op(void __user *udata) +{ + struct privcmd_dm_op kdata; + struct privcmd_dm_op_buf *kbufs; + unsigned int nr_pages = 0; + struct page **pages = NULL; + struct xen_dm_op_buf *xbufs = NULL; + unsigned int i; + long rc; + + if (copy_from_user(&kdata, udata, sizeof(kdata))) + return -EFAULT; + + if (kdata.num == 0) + return 0; + + if (kdata.num > privcmd_dm_op_max_num) + return -E2BIG; + + kbufs = kcalloc(kdata.num, sizeof(*kbufs), GFP_KERNEL); + if (!kbufs) + return -ENOMEM; + + if (copy_from_user(kbufs, kdata.ubufs, + sizeof(*kbufs) * kdata.num)) { + rc = -EFAULT; + goto out; + } + + for (i = 0; i < kdata.num; i++) { + if (kbufs[i].size > privcmd_dm_op_buf_max_size) { + rc = -E2BIG; + goto out; + } + + if (!access_ok(VERIFY_WRITE, kbufs[i].uptr, + kbufs[i].size)) { + rc = -EFAULT; + goto out; + } + + nr_pages += DIV_ROUND_UP( + offset_in_page(kbufs[i].uptr) + kbufs[i].size, + PAGE_SIZE); + } + + pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL); + if (!pages) { + rc = -ENOMEM; + goto out; + } + + xbufs = kcalloc(kdata.num, sizeof(*xbufs), GFP_KERNEL); + if (!xbufs) { + rc = -ENOMEM; + goto out; + } + + rc = lock_pages(kbufs, kdata.num, pages, nr_pages); + if (rc) + goto out; + + for (i = 0; i < kdata.num; i++) { + set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr); + xbufs[i].size = kbufs[i].size; + } + + xen_preemptible_hcall_begin(); + rc = HYPERVISOR_dm_op(kdata.dom, kdata.num, xbufs); + xen_preemptible_hcall_end(); + +out: + unlock_pages(pages, nr_pages); + kfree(xbufs); + kfree(pages); + kfree(kbufs); + + return rc; +} + static long privcmd_ioctl(struct file *file, unsigned int cmd, unsigned long data) { @@ -571,6 +706,10 @@ static long privcmd_ioctl(struct file *file, ret = privcmd_ioctl_mmap_batch(udata, 2); break; + case IOCTL_PRIVCMD_DM_OP: + ret = privcmd_ioctl_dm_op(udata); + break; + default: break; } diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h index 7ddeeda93809..f8c5d75b99e1 100644 --- a/include/uapi/xen/privcmd.h +++ b/include/uapi/xen/privcmd.h @@ -77,6 +77,17 @@ struct privcmd_mmapbatch_v2 { int __user *err; /* array of error codes */ }; +struct privcmd_dm_op_buf { + void __user *uptr; + size_t size; +}; + +struct privcmd_dm_op { + domid_t dom; + __u16 num; + const struct privcmd_dm_op_buf __user *ubufs; +}; + /* * @cmd: IOCTL_PRIVCMD_HYPERCALL * @arg: &privcmd_hypercall_t @@ -98,5 +109,7 @@ struct privcmd_mmapbatch_v2 { _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) #define IOCTL_PRIVCMD_MMAPBATCH_V2 \ _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2)) +#define IOCTL_PRIVCMD_DM_OP \ + _IOC(_IOC_NONE, 'P', 5, sizeof(struct privcmd_dm_op)) #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ diff --git a/include/xen/arm/hypercall.h b/include/xen/arm/hypercall.h index 9d874db13c0e..73db4b2eeb89 100644 --- a/include/xen/arm/hypercall.h +++ b/include/xen/arm/hypercall.h @@ -53,6 +53,7 @@ int HYPERVISOR_physdev_op(int cmd, void *arg); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); int HYPERVISOR_tmem_op(void *arg); int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type); +int HYPERVISOR_dm_op(domid_t domid, unsigned int nr_bufs, void *bufs); int HYPERVISOR_platform_op_raw(void *arg); static inline int HYPERVISOR_platform_op(struct xen_platform_op *op) { diff --git a/include/xen/interface/hvm/dm_op.h b/include/xen/interface/hvm/dm_op.h new file mode 100644 index 000000000000..ee9e480bc559 --- /dev/null +++ b/include/xen/interface/hvm/dm_op.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2016, Citrix Systems Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_DM_OP_H__ +#define __XEN_PUBLIC_HVM_DM_OP_H__ + +struct xen_dm_op_buf { + GUEST_HANDLE(void) h; + xen_ulong_t size; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_dm_op_buf); + +#endif /* __XEN_PUBLIC_HVM_DM_OP_H__ */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 1b0d189cd3d3..4f4830ef8f93 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -81,6 +81,7 @@ #define __HYPERVISOR_tmem_op 38 #define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ #define __HYPERVISOR_xenpmu_op 40 +#define __HYPERVISOR_dm_op 41 /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 -- cgit v1.2.3 From 4610d240d691768203fdd210a5da0a2e02eddb76 Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Mon, 13 Feb 2017 17:03:24 +0000 Subject: xen/privcmd: add IOCTL_PRIVCMD_RESTRICT The purpose if this ioctl is to allow a user of privcmd to restrict its operation such that it will no longer service arbitrary hypercalls via IOCTL_PRIVCMD_HYPERCALL, and will check for a matching domid when servicing IOCTL_PRIVCMD_DM_OP or IOCTL_PRIVCMD_MMAP*. The aim of this is to limit the attack surface for a compromised device model. Signed-off-by: Paul Durrant Signed-off-by: Boris Ostrovsky --- drivers/xen/privcmd.c | 88 +++++++++++++++++++++++++++++++++++++++++----- include/uapi/xen/privcmd.h | 2 ++ 2 files changed, 81 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 1a6f1860e008..2077a3ac7c0c 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -56,16 +56,25 @@ module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint, MODULE_PARM_DESC(dm_op_buf_max_size, "Maximum size of a dm_op hypercall buffer"); +struct privcmd_data { + domid_t domid; +}; + static int privcmd_vma_range_is_mapped( struct vm_area_struct *vma, unsigned long addr, unsigned long nr_pages); -static long privcmd_ioctl_hypercall(void __user *udata) +static long privcmd_ioctl_hypercall(struct file *file, void __user *udata) { + struct privcmd_data *data = file->private_data; struct privcmd_hypercall hypercall; long ret; + /* Disallow arbitrary hypercalls if restricted */ + if (data->domid != DOMID_INVALID) + return -EPERM; + if (copy_from_user(&hypercall, udata, sizeof(hypercall))) return -EFAULT; @@ -242,8 +251,9 @@ static int mmap_gfn_range(void *data, void *state) return 0; } -static long privcmd_ioctl_mmap(void __user *udata) +static long privcmd_ioctl_mmap(struct file *file, void __user *udata) { + struct privcmd_data *data = file->private_data; struct privcmd_mmap mmapcmd; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -258,6 +268,10 @@ static long privcmd_ioctl_mmap(void __user *udata) if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) return -EFAULT; + /* If restriction is in place, check the domid matches */ + if (data->domid != DOMID_INVALID && data->domid != mmapcmd.dom) + return -EPERM; + rc = gather_array(&pagelist, mmapcmd.num, sizeof(struct privcmd_mmap_entry), mmapcmd.entry); @@ -429,8 +443,10 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) static const struct vm_operations_struct privcmd_vm_ops; -static long privcmd_ioctl_mmap_batch(void __user *udata, int version) +static long privcmd_ioctl_mmap_batch( + struct file *file, void __user *udata, int version) { + struct privcmd_data *data = file->private_data; int ret; struct privcmd_mmapbatch_v2 m; struct mm_struct *mm = current->mm; @@ -459,6 +475,10 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) return -EINVAL; } + /* If restriction is in place, check the domid matches */ + if (data->domid != DOMID_INVALID && data->domid != m.dom) + return -EPERM; + nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE); if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) return -EINVAL; @@ -603,8 +623,9 @@ static void unlock_pages(struct page *pages[], unsigned int nr_pages) } } -static long privcmd_ioctl_dm_op(void __user *udata) +static long privcmd_ioctl_dm_op(struct file *file, void __user *udata) { + struct privcmd_data *data = file->private_data; struct privcmd_dm_op kdata; struct privcmd_dm_op_buf *kbufs; unsigned int nr_pages = 0; @@ -616,6 +637,10 @@ static long privcmd_ioctl_dm_op(void __user *udata) if (copy_from_user(&kdata, udata, sizeof(kdata))) return -EFAULT; + /* If restriction is in place, check the domid matches */ + if (data->domid != DOMID_INVALID && data->domid != kdata.dom) + return -EPERM; + if (kdata.num == 0) return 0; @@ -683,6 +708,23 @@ out: return rc; } +static long privcmd_ioctl_restrict(struct file *file, void __user *udata) +{ + struct privcmd_data *data = file->private_data; + domid_t dom; + + if (copy_from_user(&dom, udata, sizeof(dom))) + return -EFAULT; + + /* Set restriction to the specified domain, or check it matches */ + if (data->domid == DOMID_INVALID) + data->domid = dom; + else if (data->domid != dom) + return -EINVAL; + + return 0; +} + static long privcmd_ioctl(struct file *file, unsigned int cmd, unsigned long data) { @@ -691,23 +733,27 @@ static long privcmd_ioctl(struct file *file, switch (cmd) { case IOCTL_PRIVCMD_HYPERCALL: - ret = privcmd_ioctl_hypercall(udata); + ret = privcmd_ioctl_hypercall(file, udata); break; case IOCTL_PRIVCMD_MMAP: - ret = privcmd_ioctl_mmap(udata); + ret = privcmd_ioctl_mmap(file, udata); break; case IOCTL_PRIVCMD_MMAPBATCH: - ret = privcmd_ioctl_mmap_batch(udata, 1); + ret = privcmd_ioctl_mmap_batch(file, udata, 1); break; case IOCTL_PRIVCMD_MMAPBATCH_V2: - ret = privcmd_ioctl_mmap_batch(udata, 2); + ret = privcmd_ioctl_mmap_batch(file, udata, 2); break; case IOCTL_PRIVCMD_DM_OP: - ret = privcmd_ioctl_dm_op(udata); + ret = privcmd_ioctl_dm_op(file, udata); + break; + + case IOCTL_PRIVCMD_RESTRICT: + ret = privcmd_ioctl_restrict(file, udata); break; default: @@ -717,6 +763,28 @@ static long privcmd_ioctl(struct file *file, return ret; } +static int privcmd_open(struct inode *ino, struct file *file) +{ + struct privcmd_data *data = kzalloc(sizeof(*data), GFP_KERNEL); + + if (!data) + return -ENOMEM; + + /* DOMID_INVALID implies no restriction */ + data->domid = DOMID_INVALID; + + file->private_data = data; + return 0; +} + +static int privcmd_release(struct inode *ino, struct file *file) +{ + struct privcmd_data *data = file->private_data; + + kfree(data); + return 0; +} + static void privcmd_close(struct vm_area_struct *vma) { struct page **pages = vma->vm_private_data; @@ -785,6 +853,8 @@ static int privcmd_vma_range_is_mapped( const struct file_operations xen_privcmd_fops = { .owner = THIS_MODULE, .unlocked_ioctl = privcmd_ioctl, + .open = privcmd_open, + .release = privcmd_release, .mmap = privcmd_mmap, }; EXPORT_SYMBOL_GPL(xen_privcmd_fops); diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h index f8c5d75b99e1..63ee95c9dabb 100644 --- a/include/uapi/xen/privcmd.h +++ b/include/uapi/xen/privcmd.h @@ -111,5 +111,7 @@ struct privcmd_dm_op { _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2)) #define IOCTL_PRIVCMD_DM_OP \ _IOC(_IOC_NONE, 'P', 5, sizeof(struct privcmd_dm_op)) +#define IOCTL_PRIVCMD_RESTRICT \ + _IOC(_IOC_NONE, 'P', 6, sizeof(domid_t)) #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ -- cgit v1.2.3