From c2419b4a4727f67af2fc2cd68b0d878b75e781bb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 31 May 2011 10:50:10 -0400 Subject: xen: allow enable use of VGA console on dom0 Get the information about the VGA console hardware from Xen, and put it into the form the bootloader normally generates, so that the rest of the kernel can deal with VGA as usual. [ Impact: make VGA console work in dom0 ] Signed-off-by: Jeremy Fitzhardinge [v1: Rebased on 2.6.39] [v2: Removed incorrect comments and fixed compile warnings] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/Makefile | 2 +- arch/x86/xen/enlighten.c | 8 ++++++ arch/x86/xen/vga.c | 67 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/xen/xen-ops.h | 11 ++++++++ include/xen/interface/xen.h | 39 ++++++++++++++++++++++++++ 5 files changed, 126 insertions(+), 1 deletion(-) create mode 100644 arch/x86/xen/vga.c diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 17c565de3d64..a6575b949b11 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -18,5 +18,5 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o - +obj-$(CONFIG_XEN_DOM0) += vga.o obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e3c6a06cf725..4abd2d5d04f7 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1241,6 +1241,14 @@ asmlinkage void __init xen_start_kernel(void) if (pci_xen) x86_init.pci.arch_init = pci_xen_init; } else { + const struct dom0_vga_console_info *info = + (void *)((char *)xen_start_info + + xen_start_info->console.dom0.info_off); + + xen_init_vga(info, xen_start_info->console.dom0.info_size); + xen_start_info->console.domU.mfn = 0; + xen_start_info->console.domU.evtchn = 0; + /* Make sure ACS will be enabled */ pci_request_acs(); } diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c new file mode 100644 index 000000000000..1cd7f4d11e29 --- /dev/null +++ b/arch/x86/xen/vga.c @@ -0,0 +1,67 @@ +#include +#include + +#include +#include + +#include + +#include "xen-ops.h" + +void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) +{ + struct screen_info *screen_info = &boot_params.screen_info; + + /* This is drawn from a dump from vgacon:startup in + * standard Linux. */ + screen_info->orig_video_mode = 3; + screen_info->orig_video_isVGA = 1; + screen_info->orig_video_lines = 25; + screen_info->orig_video_cols = 80; + screen_info->orig_video_ega_bx = 3; + screen_info->orig_video_points = 16; + screen_info->orig_y = screen_info->orig_video_lines - 1; + + switch (info->video_type) { + case XEN_VGATYPE_TEXT_MODE_3: + if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3) + + sizeof(info->u.text_mode_3)) + break; + screen_info->orig_video_lines = info->u.text_mode_3.rows; + screen_info->orig_video_cols = info->u.text_mode_3.columns; + screen_info->orig_x = info->u.text_mode_3.cursor_x; + screen_info->orig_y = info->u.text_mode_3.cursor_y; + screen_info->orig_video_points = + info->u.text_mode_3.font_height; + break; + + case XEN_VGATYPE_VESA_LFB: + if (size < offsetof(struct dom0_vga_console_info, + u.vesa_lfb.gbl_caps)) + break; + screen_info->orig_video_isVGA = VIDEO_TYPE_VLFB; + screen_info->lfb_width = info->u.vesa_lfb.width; + screen_info->lfb_height = info->u.vesa_lfb.height; + screen_info->lfb_depth = info->u.vesa_lfb.bits_per_pixel; + screen_info->lfb_base = info->u.vesa_lfb.lfb_base; + screen_info->lfb_size = info->u.vesa_lfb.lfb_size; + screen_info->lfb_linelength = info->u.vesa_lfb.bytes_per_line; + screen_info->red_size = info->u.vesa_lfb.red_size; + screen_info->red_pos = info->u.vesa_lfb.red_pos; + screen_info->green_size = info->u.vesa_lfb.green_size; + screen_info->green_pos = info->u.vesa_lfb.green_pos; + screen_info->blue_size = info->u.vesa_lfb.blue_size; + screen_info->blue_pos = info->u.vesa_lfb.blue_pos; + screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; + screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; + if (size >= offsetof(struct dom0_vga_console_info, + u.vesa_lfb.gbl_caps) + + sizeof(info->u.vesa_lfb.gbl_caps)) + screen_info->capabilities = info->u.vesa_lfb.gbl_caps; + if (size >= offsetof(struct dom0_vga_console_info, + u.vesa_lfb.mode_attrs) + + sizeof(info->u.vesa_lfb.mode_attrs)) + screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs; + break; + } +} diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 3112f55638c4..e14c54eff23d 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -88,6 +88,17 @@ static inline void xen_uninit_lock_cpu(int cpu) } #endif +struct dom0_vga_console_info; + +#ifdef CONFIG_XEN_DOM0 +void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); +#else +static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, + size_t size) +{ +} +#endif + /* Declare an asm function, along with symbols needed to make it inlineable */ #define DECL_ASM(ret, name, ...) \ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index b33257bc7e83..29b362eab6cc 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -449,6 +449,45 @@ struct start_info { int8_t cmd_line[MAX_GUEST_CMDLINE]; }; +struct dom0_vga_console_info { + uint8_t video_type; +#define XEN_VGATYPE_TEXT_MODE_3 0x03 +#define XEN_VGATYPE_VESA_LFB 0x23 + + union { + struct { + /* Font height, in pixels. */ + uint16_t font_height; + /* Cursor location (column, row). */ + uint16_t cursor_x, cursor_y; + /* Number of rows and columns (dimensions in characters). */ + uint16_t rows, columns; + } text_mode_3; + + struct { + /* Width and height, in pixels. */ + uint16_t width, height; + /* Bytes per scan line. */ + uint16_t bytes_per_line; + /* Bits per pixel. */ + uint16_t bits_per_pixel; + /* LFB physical address, and size (in units of 64kB). */ + uint32_t lfb_base; + uint32_t lfb_size; + /* RGB mask offsets and sizes, as defined by VBE 1.2+ */ + uint8_t red_pos, red_size; + uint8_t green_pos, green_size; + uint8_t blue_pos, blue_size; + uint8_t rsvd_pos, rsvd_size; + + /* VESA capabilities (offset 0xa, VESA command 0x4f00). */ + uint32_t gbl_caps; + /* Mode attributes (offset 0x0, VESA command 0x4f01). */ + uint16_t mode_attrs; + } vesa_lfb; + } u; +}; + /* These flags are passed in the 'flags' field of start_info_t. */ #define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ #define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ -- cgit v1.2.3 From afec6e04922d0c8c7e244be2e544bac5e7e36294 Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Fri, 17 Jun 2011 15:06:20 -0600 Subject: xen: prepare tmem shim to handle frontswap Provide the shim code for frontswap for Xen tmem even if the frontswap patchset is not present yet. (The egg is before the chicken.) Signed-off-by: Dan Magenheimer Reviewed-by: Konrad Wilk --- drivers/xen/Kconfig | 7 +++ drivers/xen/Makefile | 2 +- drivers/xen/tmem.c | 170 +++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 166 insertions(+), 13 deletions(-) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a59638b37c1a..be8732c32185 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -105,4 +105,11 @@ config SWIOTLB_XEN depends on PCI select SWIOTLB +config XEN_TMEM + bool + default y if (CLEANCACHE || FRONTSWAP) + help + Shim to interface in-kernel Transcendent Memory hooks + (e.g. cleancache and frontswap) to Xen tmem hypercalls. + endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index bbc18258ecc5..7c1bc0a4d9f4 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,6 +1,5 @@ obj-y += grant-table.o features.o events.o manage.o balloon.o obj-y += xenbus/ -obj-y += tmem.o nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_features.o := $(nostackp) @@ -15,6 +14,7 @@ obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o +obj-$(CONFIG_XEN_TMEM) += tmem.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 816a44959ef0..d369965e8f8a 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -1,7 +1,7 @@ /* * Xen implementation for transcendent memory (tmem) * - * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. + * Copyright (C) 2009-2011 Oracle Corp. All rights reserved. * Author: Dan Magenheimer */ @@ -9,8 +9,14 @@ #include #include #include +#include #include +/* temporary ifdef until include/linux/frontswap.h is upstream */ +#ifdef CONFIG_FRONTSWAP +#include +#endif + #include #include #include @@ -122,14 +128,8 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); } -static int xen_tmem_destroy_pool(u32 pool_id) -{ - struct tmem_oid oid = { { 0 } }; - - return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); -} - -int tmem_enabled; +int tmem_enabled __read_mostly; +EXPORT_SYMBOL(tmem_enabled); static int __init enable_tmem(char *s) { @@ -139,6 +139,14 @@ static int __init enable_tmem(char *s) __setup("tmem", enable_tmem); +#ifdef CONFIG_CLEANCACHE +static int xen_tmem_destroy_pool(u32 pool_id) +{ + struct tmem_oid oid = { { 0 } }; + + return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); +} + /* cleancache ops */ static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, @@ -240,18 +248,156 @@ static struct cleancache_ops tmem_cleancache_ops = { .init_shared_fs = tmem_cleancache_init_shared_fs, .init_fs = tmem_cleancache_init_fs }; +#endif -static int __init xen_tmem_init(void) +#ifdef CONFIG_FRONTSWAP +/* frontswap tmem operations */ + +/* a single tmem poolid is used for all frontswap "types" (swapfiles) */ +static int tmem_frontswap_poolid; + +/* + * Swizzling increases objects per swaptype, increasing tmem concurrency + * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS + */ +#define SWIZ_BITS 4 +#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) +#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) +#define iswiz(_ind) (_ind >> SWIZ_BITS) + +static inline struct tmem_oid oswiz(unsigned type, u32 ind) { - struct cleancache_ops old_ops; + struct tmem_oid oid = { .oid = { 0 } }; + oid.oid[0] = _oswiz(type, ind); + return oid; +} +/* returns 0 if the page was successfully put into frontswap, -1 if not */ +static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, + struct page *page) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + unsigned long pfn = page_to_pfn(page); + int pool = tmem_frontswap_poolid; + int ret; + + if (pool < 0) + return -1; + if (ind64 != ind) + return -1; + mb(); /* ensure page is quiescent; tmem may address it with an alias */ + ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn); + /* translate Xen tmem return values to linux semantics */ + if (ret == 1) + return 0; + else + return -1; +} + +/* + * returns 0 if the page was successfully gotten from frontswap, -1 if + * was not present (should never happen!) + */ +static int tmem_frontswap_get_page(unsigned type, pgoff_t offset, + struct page *page) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + unsigned long pfn = page_to_pfn(page); + int pool = tmem_frontswap_poolid; + int ret; + + if (pool < 0) + return -1; + if (ind64 != ind) + return -1; + ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn); + /* translate Xen tmem return values to linux semantics */ + if (ret == 1) + return 0; + else + return -1; +} + +/* flush a single page from frontswap */ +static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + int pool = tmem_frontswap_poolid; + + if (pool < 0) + return; + if (ind64 != ind) + return; + (void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind)); +} + +/* flush all pages from the passed swaptype */ +static void tmem_frontswap_flush_area(unsigned type) +{ + int pool = tmem_frontswap_poolid; + int ind; + + if (pool < 0) + return; + for (ind = SWIZ_MASK; ind >= 0; ind--) + (void)xen_tmem_flush_object(pool, oswiz(type, ind)); +} + +static void tmem_frontswap_init(unsigned ignored) +{ + struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID; + + /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ + if (tmem_frontswap_poolid < 0) + tmem_frontswap_poolid = + xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE); +} + +static int __initdata use_frontswap = 1; + +static int __init no_frontswap(char *s) +{ + use_frontswap = 0; + return 1; +} + +__setup("nofrontswap", no_frontswap); + +static struct frontswap_ops tmem_frontswap_ops = { + .put_page = tmem_frontswap_put_page, + .get_page = tmem_frontswap_get_page, + .flush_page = tmem_frontswap_flush_page, + .flush_area = tmem_frontswap_flush_area, + .init = tmem_frontswap_init +}; +#endif + +static int __init xen_tmem_init(void) +{ if (!xen_domain()) return 0; +#ifdef CONFIG_FRONTSWAP + if (tmem_enabled && use_frontswap) { + char *s = ""; + struct frontswap_ops old_ops = + frontswap_register_ops(&tmem_frontswap_ops); + + tmem_frontswap_poolid = -1; + if (old_ops.init != NULL) + s = " (WARNING: frontswap_ops overridden)"; + printk(KERN_INFO "frontswap enabled, RAM provided by " + "Xen Transcendent Memory\n"); + } +#endif #ifdef CONFIG_CLEANCACHE BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); if (tmem_enabled && use_cleancache) { char *s = ""; - old_ops = cleancache_register_ops(&tmem_cleancache_ops); + struct cleancache_ops old_ops = + cleancache_register_ops(&tmem_cleancache_ops); if (old_ops.init_fs != NULL) s = " (WARNING: cleancache_ops overridden)"; printk(KERN_INFO "cleancache enabled, RAM provided by " -- cgit v1.2.3 From 08b8bfc1c649cf82edf43f686cbb9a4bac809679 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 12 Jun 2011 09:21:13 -0700 Subject: xen: Add __attribute__((format(printf... where appropriate Use the compiler to verify printf formats and arguments. Fix fallout. Signed-off-by: Joe Perches Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 2 +- include/xen/hvc-console.h | 4 +++- include/xen/xenbus.h | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 6cc0db1bf522..3f129b45451a 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -684,7 +684,7 @@ again: err = xenbus_switch_state(dev, XenbusStateConnected); if (err) - xenbus_dev_fatal(dev, err, "switching to Connected state", + xenbus_dev_fatal(dev, err, "%s: switching to Connected state", dev->nodename); return; diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h index c3adde32669b..901724dc528d 100644 --- a/include/xen/hvc-console.h +++ b/include/xen/hvc-console.h @@ -6,11 +6,13 @@ extern struct console xenboot_console; #ifdef CONFIG_HVC_XEN void xen_console_resume(void); void xen_raw_console_write(const char *str); +__attribute__((format(printf, 1, 2))) void xen_raw_printk(const char *fmt, ...); #else static inline void xen_console_resume(void) { } static inline void xen_raw_console_write(const char *str) { } -static inline void xen_raw_printk(const char *fmt, ...) { } +static inline __attribute__((format(printf, 1, 2))) +void xen_raw_printk(const char *fmt, ...) { } #endif #endif /* XEN_HVC_CONSOLE_H */ diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 5467369e0889..aceeca799fd7 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -223,7 +223,9 @@ int xenbus_free_evtchn(struct xenbus_device *dev, int port); enum xenbus_state xenbus_read_driver_state(const char *path); +__attribute__((format(printf, 3, 4))) void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...); +__attribute__((format(printf, 3, 4))) void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...); const char *xenbus_strstate(enum xenbus_state state); -- cgit v1.2.3 From cc85e93342c030f8ba07f572afa159ec4518231f Mon Sep 17 00:00:00 2001 From: Bastian Blank Date: Wed, 29 Jun 2011 14:39:26 +0200 Subject: xen: Populate xenbus device attributes The xenbus bus type uses device_create_file to assign all used device attributes. However it does not remove them when the device goes away. This patch uses the dev_attrs field of the bus type to specify default attributes for all devices. Signed-off-by: Bastian Blank Acked-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_probe.c | 41 ++++++++++-------------------- drivers/xen/xenbus/xenbus_probe.h | 2 ++ drivers/xen/xenbus/xenbus_probe_backend.c | 6 +---- drivers/xen/xenbus/xenbus_probe_frontend.c | 6 +---- 4 files changed, 18 insertions(+), 37 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 739769551e33..2ed0b045c69a 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -378,26 +378,31 @@ static void xenbus_dev_release(struct device *dev) kfree(to_xenbus_device(dev)); } -static ssize_t xendev_show_nodename(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t nodename_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); } -static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); -static ssize_t xendev_show_devtype(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t devtype_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); } -static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); -static ssize_t xendev_show_modalias(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t modalias_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); } -static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); + +struct device_attribute xenbus_dev_attrs[] = { + __ATTR_RO(nodename), + __ATTR_RO(devtype), + __ATTR_RO(modalias), + __ATTR_NULL +}; +EXPORT_SYMBOL_GPL(xenbus_dev_attrs); int xenbus_probe_node(struct xen_bus_type *bus, const char *type, @@ -449,25 +454,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, if (err) goto fail; - err = device_create_file(&xendev->dev, &dev_attr_nodename); - if (err) - goto fail_unregister; - - err = device_create_file(&xendev->dev, &dev_attr_devtype); - if (err) - goto fail_remove_nodename; - - err = device_create_file(&xendev->dev, &dev_attr_modalias); - if (err) - goto fail_remove_devtype; - return 0; -fail_remove_devtype: - device_remove_file(&xendev->dev, &dev_attr_devtype); -fail_remove_nodename: - device_remove_file(&xendev->dev, &dev_attr_nodename); -fail_unregister: - device_unregister(&xendev->dev); fail: kfree(xendev); return err; diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h index 888b9900ca08..b814935378c7 100644 --- a/drivers/xen/xenbus/xenbus_probe.h +++ b/drivers/xen/xenbus/xenbus_probe.h @@ -48,6 +48,8 @@ struct xen_bus_type struct bus_type bus; }; +extern struct device_attribute xenbus_dev_attrs[]; + extern int xenbus_match(struct device *_dev, struct device_driver *_drv); extern int xenbus_dev_probe(struct device *_dev); extern int xenbus_dev_remove(struct device *_dev); diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 6cf467bf63ec..ec510e562820 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -183,10 +183,6 @@ static void frontend_changed(struct xenbus_watch *watch, xenbus_otherend_changed(watch, vec, len, 0); } -static struct device_attribute xenbus_backend_dev_attrs[] = { - __ATTR_NULL -}; - static struct xen_bus_type xenbus_backend = { .root = "backend", .levels = 3, /* backend/type// */ @@ -200,7 +196,7 @@ static struct xen_bus_type xenbus_backend = { .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, - .dev_attrs = xenbus_backend_dev_attrs, + .dev_attrs = xenbus_dev_attrs, }, }; diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index b6a2690c9d49..ed2ba474a560 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -81,10 +81,6 @@ static void backend_changed(struct xenbus_watch *watch, xenbus_otherend_changed(watch, vec, len, 1); } -static struct device_attribute xenbus_frontend_dev_attrs[] = { - __ATTR_NULL -}; - static const struct dev_pm_ops xenbus_pm_ops = { .suspend = xenbus_dev_suspend, .resume = xenbus_dev_resume, @@ -106,7 +102,7 @@ static struct xen_bus_type xenbus_frontend = { .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, - .dev_attrs = xenbus_frontend_dev_attrs, + .dev_attrs = xenbus_dev_attrs, .pm = &xenbus_pm_ops, }, -- cgit v1.2.3 From 149bb2fab547253e6359e76f1b86b95247110e68 Mon Sep 17 00:00:00 2001 From: Bastian Blank Date: Wed, 29 Jun 2011 14:40:08 +0200 Subject: xen: Add module alias to autoload backend drivers All the Xen backend drivers are assigned to a special bus type xen-backend. This patch exports xen-backend:* names through modalias and uevent to autoload them. Signed-off-by: Bastian Blank Acked-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_probe.c | 3 ++- drivers/xen/xenbus/xenbus_probe_backend.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 2ed0b045c69a..bd2f90c9ac8b 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -393,7 +393,8 @@ static ssize_t devtype_show(struct device *dev, static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); + return sprintf(buf, "%s:%s\n", dev->bus->name, + to_xenbus_device(dev)->devicetype); } struct device_attribute xenbus_dev_attrs[] = { diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index ec510e562820..60adf919d78d 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -107,6 +107,9 @@ static int xenbus_uevent_backend(struct device *dev, if (xdev == NULL) return -ENODEV; + if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype)) + return -ENOMEM; + /* stuff we want to pass to /sbin/hotplug */ if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) return -ENOMEM; -- cgit v1.2.3 From a50777c791031d7345ce95785ea6220f67339d90 Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Fri, 8 Jul 2011 12:26:21 -0600 Subject: xen: tmem: self-ballooning and frontswap-selfshrinking This patch introduces two in-kernel drivers for Xen transcendent memory ("tmem") functionality that complement cleancache and frontswap. Both use control theory to dynamically adjust and optimize memory utilization. Selfballooning controls the in-kernel Xen balloon driver, targeting a goal value (vm_committed_as), thus pushing less frequently used clean page cache pages (through the cleancache code) into Xen tmem where Xen can balance needs across all VMs residing on the physical machine. Frontswap-selfshrinking controls the number of pages in frontswap, driving it towards zero (effectively doing a partial swapoff) when in-kernel memory pressure subsides, freeing up RAM for other VMs. More detail is provided in the header comment of xen-selfballooning.c. Signed-off-by: Dan Magenheimer [v8: konrad.wilk@oracle.com: set default enablement depending on frontswap] [v7: konrad.wilk@oracle.com: fix capitalization and punctuation in comments] [v6: fix frontswap-selfshrinking initialization] [v6: konrad.wilk@oracle.com: fix init pr_infos; add comments about swap] [v5: konrad.wilk@oracle.com: add NULL to attr list; move inits up to decls] [v4: dkiper@net-space.pl: use strict_strtoul plus a few syntactic nits] [v3: konrad.wilk@oracle.com: fix potential divides-by-zero] [v3: konrad.wilk@oracle.com: add many more comments, fix nits] [v2: rebased to linux-3.0-rc1] [v2: Ian.Campbell@citrix.com: reorganize as new file (xen-selfballoon.c)] [v2: dkiper@net-space.pl: proper access to vm_committed_as] [v2: dkiper@net-space.pl: accounting fixes] Cc: Jan Beulich Cc: Jeremy Fitzhardinge Cc: --- drivers/xen/Kconfig | 17 ++ drivers/xen/Makefile | 1 + drivers/xen/xen-balloon.c | 2 + drivers/xen/xen-selfballoon.c | 485 ++++++++++++++++++++++++++++++++++++++++++ include/xen/balloon.h | 10 + include/xen/tmem.h | 5 + 6 files changed, 520 insertions(+) create mode 100644 drivers/xen/xen-selfballoon.c create mode 100644 include/xen/tmem.h diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a59638b37c1a..fd3266696a4d 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -9,6 +9,23 @@ config XEN_BALLOON the system to expand the domain's memory allocation, or alternatively return unneeded memory to the system. +config XEN_SELFBALLOONING + bool "Dynamically self-balloon kernel memory to target" + depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP + default n + help + Self-ballooning dynamically balloons available kernel memory driven + by the current usage of anonymous memory ("committed AS") and + controlled by various sysfs-settable parameters. Configuring + FRONTSWAP is highly recommended; if it is not configured, self- + ballooning is disabled by default but can be enabled with the + 'selfballooning' kernel boot parameter. If FRONTSWAP is configured, + frontswap-selfshrinking is enabled by default but can be disabled + with the 'noselfshrink' kernel boot parameter; and self-ballooning + is enabled by default but can be disabled with the 'noselfballooning' + kernel boot parameter. Note that systems without a sufficiently + large swap device should not enable self-ballooning. + config XEN_SCRUB_PAGES bool "Scrub pages before returning them to system" depends on XEN_BALLOON diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index bbc18258ecc5..458057464dd1 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_BLOCK) += biomerge.o obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o +obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index a4ff225ee868..5c9dc43c1e94 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -98,6 +98,8 @@ static int __init balloon_init(void) register_balloon(&balloon_sysdev); + register_xen_selfballooning(&balloon_sysdev); + target_watch.callback = watch_target; xenstore_notifier.notifier_call = balloon_init_watcher; diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c new file mode 100644 index 000000000000..010937b5a7c9 --- /dev/null +++ b/drivers/xen/xen-selfballoon.c @@ -0,0 +1,485 @@ +/****************************************************************************** + * Xen selfballoon driver (and optional frontswap self-shrinking driver) + * + * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. + * + * This code complements the cleancache and frontswap patchsets to optimize + * support for Xen Transcendent Memory ("tmem"). The policy it implements + * is rudimentary and will likely improve over time, but it does work well + * enough today. + * + * Two functionalities are implemented here which both use "control theory" + * (feedback) to optimize memory utilization. In a virtualized environment + * such as Xen, RAM is often a scarce resource and we would like to ensure + * that each of a possibly large number of virtual machines is using RAM + * efficiently, i.e. using as little as possible when under light load + * and obtaining as much as possible when memory demands are high. + * Since RAM needs vary highly dynamically and sometimes dramatically, + * "hysteresis" is used, that is, memory target is determined not just + * on current data but also on past data stored in the system. + * + * "Selfballooning" creates memory pressure by managing the Xen balloon + * driver to decrease and increase available kernel memory, driven + * largely by the target value of "Committed_AS" (see /proc/meminfo). + * Since Committed_AS does not account for clean mapped pages (i.e. pages + * in RAM that are identical to pages on disk), selfballooning has the + * affect of pushing less frequently used clean pagecache pages out of + * kernel RAM and, presumably using cleancache, into Xen tmem where + * Xen can more efficiently optimize RAM utilization for such pages. + * + * When kernel memory demand unexpectedly increases faster than Xen, via + * the selfballoon driver, is able to (or chooses to) provide usable RAM, + * the kernel may invoke swapping. In most cases, frontswap is able + * to absorb this swapping into Xen tmem. However, due to the fact + * that the kernel swap subsystem assumes swapping occurs to a disk, + * swapped pages may sit on the disk for a very long time; even if + * the kernel knows the page will never be used again. This is because + * the disk space costs very little and can be overwritten when + * necessary. When such stale pages are in frontswap, however, they + * are taking up valuable real estate. "Frontswap selfshrinking" works + * to resolve this: When frontswap activity is otherwise stable + * and the guest kernel is not under memory pressure, the "frontswap + * selfshrinking" accounts for this by providing pressure to remove some + * pages from frontswap and return them to kernel memory. + * + * For both "selfballooning" and "frontswap-selfshrinking", a worker + * thread is used and sysfs tunables are provided to adjust the frequency + * and rate of adjustments to achieve the goal, as well as to disable one + * or both functions independently. + * + * While some argue that this functionality can and should be implemented + * in userspace, it has been observed that bad things happen (e.g. OOMs). + * + * System configuration note: Selfballooning should not be enabled on + * systems without a sufficiently large swap device configured; for best + * results, it is recommended that total swap be increased by the size + * of the guest memory. Also, while technically not required to be + * configured, it is highly recommended that frontswap also be configured + * and enabled when selfballooning is running. So, selfballooning + * is disabled by default if frontswap is not configured and can only + * be enabled with the "selfballooning" kernel boot option; similarly + * selfballooning is enabled by default if frontswap is configured and + * can be disabled with the "noselfballooning" kernel boot option. Finally, + * when frontswap is configured, frontswap-selfshrinking can be disabled + * with the "noselfshrink" kernel boot option. + * + * Selfballooning is disallowed in domain0 and force-disabled. + * + */ + +#include +#include +#include + +#include + +#include + +/* Enable/disable with sysfs. */ +static int xen_selfballooning_enabled __read_mostly; + +/* + * Controls rate at which memory target (this iteration) approaches + * ultimate goal when memory need is increasing (up-hysteresis) or + * decreasing (down-hysteresis). Higher values of hysteresis cause + * slower increases/decreases. The default values for the various + * parameters were deemed reasonable by experimentation, may be + * workload-dependent, and can all be adjusted via sysfs. + */ +static unsigned int selfballoon_downhysteresis __read_mostly = 8; +static unsigned int selfballoon_uphysteresis __read_mostly = 1; + +/* In HZ, controls frequency of worker invocation. */ +static unsigned int selfballoon_interval __read_mostly = 5; + +static void selfballoon_process(struct work_struct *work); +static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); + +#ifdef CONFIG_FRONTSWAP +#include + +/* Enable/disable with sysfs. */ +static bool frontswap_selfshrinking __read_mostly; + +/* Enable/disable with kernel boot option. */ +static bool use_frontswap_selfshrink __initdata = true; + +/* + * The default values for the following parameters were deemed reasonable + * by experimentation, may be workload-dependent, and can all be + * adjusted via sysfs. + */ + +/* Control rate for frontswap shrinking. Higher hysteresis is slower. */ +static unsigned int frontswap_hysteresis __read_mostly = 20; + +/* + * Number of selfballoon worker invocations to wait before observing that + * frontswap selfshrinking should commence. Note that selfshrinking does + * not use a separate worker thread. + */ +static unsigned int frontswap_inertia __read_mostly = 3; + +/* Countdown to next invocation of frontswap_shrink() */ +static unsigned long frontswap_inertia_counter; + +/* + * Invoked by the selfballoon worker thread, uses current number of pages + * in frontswap (frontswap_curr_pages()), previous status, and control + * values (hysteresis and inertia) to determine if frontswap should be + * shrunk and what the new frontswap size should be. Note that + * frontswap_shrink is essentially a partial swapoff that immediately + * transfers pages from the "swap device" (frontswap) back into kernel + * RAM; despite the name, frontswap "shrinking" is very different from + * the "shrinker" interface used by the kernel MM subsystem to reclaim + * memory. + */ +static void frontswap_selfshrink(void) +{ + static unsigned long cur_frontswap_pages; + static unsigned long last_frontswap_pages; + static unsigned long tgt_frontswap_pages; + + last_frontswap_pages = cur_frontswap_pages; + cur_frontswap_pages = frontswap_curr_pages(); + if (!cur_frontswap_pages || + (cur_frontswap_pages > last_frontswap_pages)) { + frontswap_inertia_counter = frontswap_inertia; + return; + } + if (frontswap_inertia_counter && --frontswap_inertia_counter) + return; + if (cur_frontswap_pages <= frontswap_hysteresis) + tgt_frontswap_pages = 0; + else + tgt_frontswap_pages = cur_frontswap_pages - + (cur_frontswap_pages / frontswap_hysteresis); + frontswap_shrink(tgt_frontswap_pages); +} + +static int __init xen_nofrontswap_selfshrink_setup(char *s) +{ + use_frontswap_selfshrink = false; + return 1; +} + +__setup("noselfshrink", xen_nofrontswap_selfshrink_setup); + +/* Disable with kernel boot option. */ +static bool use_selfballooning __initdata = true; + +static int __init xen_noselfballooning_setup(char *s) +{ + use_selfballooning = false; + return 1; +} + +__setup("noselfballooning", xen_noselfballooning_setup); +#else /* !CONFIG_FRONTSWAP */ +/* Enable with kernel boot option. */ +static bool use_selfballooning __initdata = false; + +static int __init xen_selfballooning_setup(char *s) +{ + use_selfballooning = true; + return 1; +} + +__setup("selfballooning", xen_selfballooning_setup); +#endif /* CONFIG_FRONTSWAP */ + +/* + * Use current balloon size, the goal (vm_committed_as), and hysteresis + * parameters to set a new target balloon size + */ +static void selfballoon_process(struct work_struct *work) +{ + unsigned long cur_pages, goal_pages, tgt_pages; + bool reset_timer = false; + + if (xen_selfballooning_enabled) { + cur_pages = balloon_stats.current_pages; + tgt_pages = cur_pages; /* default is no change */ + goal_pages = percpu_counter_read_positive(&vm_committed_as) + + balloon_stats.current_pages - totalram_pages; +#ifdef CONFIG_FRONTSWAP + /* allow space for frontswap pages to be repatriated */ + if (frontswap_selfshrinking && frontswap_enabled) + goal_pages += frontswap_curr_pages(); +#endif + if (cur_pages > goal_pages) + tgt_pages = cur_pages - + ((cur_pages - goal_pages) / + selfballoon_downhysteresis); + else if (cur_pages < goal_pages) + tgt_pages = cur_pages + + ((goal_pages - cur_pages) / + selfballoon_uphysteresis); + /* else if cur_pages == goal_pages, no change */ + balloon_set_new_target(tgt_pages); + reset_timer = true; + } +#ifdef CONFIG_FRONTSWAP + if (frontswap_selfshrinking && frontswap_enabled) { + frontswap_selfshrink(); + reset_timer = true; + } +#endif + if (reset_timer) + schedule_delayed_work(&selfballoon_worker, + selfballoon_interval * HZ); +} + +#ifdef CONFIG_SYSFS + +#include +#include + +#define SELFBALLOON_SHOW(name, format, args...) \ + static ssize_t show_##name(struct sys_device *dev, \ + struct sysdev_attribute *attr, \ + char *buf) \ + { \ + return sprintf(buf, format, ##args); \ + } + +SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled); + +static ssize_t store_selfballooning(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + bool was_enabled = xen_selfballooning_enabled; + unsigned long tmp; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + err = strict_strtoul(buf, 10, &tmp); + if (err || ((tmp != 0) && (tmp != 1))) + return -EINVAL; + + xen_selfballooning_enabled = !!tmp; + if (!was_enabled && xen_selfballooning_enabled) + schedule_delayed_work(&selfballoon_worker, + selfballoon_interval * HZ); + + return count; +} + +static SYSDEV_ATTR(selfballooning, S_IRUGO | S_IWUSR, + show_selfballooning, store_selfballooning); + +SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval); + +static ssize_t store_selfballoon_interval(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + selfballoon_interval = val; + return count; +} + +static SYSDEV_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR, + show_selfballoon_interval, store_selfballoon_interval); + +SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis); + +static ssize_t store_selfballoon_downhys(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + selfballoon_downhysteresis = val; + return count; +} + +static SYSDEV_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR, + show_selfballoon_downhys, store_selfballoon_downhys); + + +SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis); + +static ssize_t store_selfballoon_uphys(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + selfballoon_uphysteresis = val; + return count; +} + +static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, + show_selfballoon_uphys, store_selfballoon_uphys); + +#ifdef CONFIG_FRONTSWAP +SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); + +static ssize_t store_frontswap_selfshrinking(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + bool was_enabled = frontswap_selfshrinking; + unsigned long tmp; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &tmp); + if (err || ((tmp != 0) && (tmp != 1))) + return -EINVAL; + frontswap_selfshrinking = !!tmp; + if (!was_enabled && !xen_selfballooning_enabled && + frontswap_selfshrinking) + schedule_delayed_work(&selfballoon_worker, + selfballoon_interval * HZ); + + return count; +} + +static SYSDEV_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR, + show_frontswap_selfshrinking, store_frontswap_selfshrinking); + +SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia); + +static ssize_t store_frontswap_inertia(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + frontswap_inertia = val; + frontswap_inertia_counter = val; + return count; +} + +static SYSDEV_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR, + show_frontswap_inertia, store_frontswap_inertia); + +SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis); + +static ssize_t store_frontswap_hysteresis(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + frontswap_hysteresis = val; + return count; +} + +static SYSDEV_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR, + show_frontswap_hysteresis, store_frontswap_hysteresis); + +#endif /* CONFIG_FRONTSWAP */ + +static struct attribute *selfballoon_attrs[] = { + &attr_selfballooning.attr, + &attr_selfballoon_interval.attr, + &attr_selfballoon_downhysteresis.attr, + &attr_selfballoon_uphysteresis.attr, +#ifdef CONFIG_FRONTSWAP + &attr_frontswap_selfshrinking.attr, + &attr_frontswap_hysteresis.attr, + &attr_frontswap_inertia.attr, +#endif + NULL +}; + +static struct attribute_group selfballoon_group = { + .name = "selfballoon", + .attrs = selfballoon_attrs +}; +#endif + +int register_xen_selfballooning(struct sys_device *sysdev) +{ + int error = -1; + +#ifdef CONFIG_SYSFS + error = sysfs_create_group(&sysdev->kobj, &selfballoon_group); +#endif + return error; +} +EXPORT_SYMBOL(register_xen_selfballooning); + +static int __init xen_selfballoon_init(void) +{ + bool enable = false; + + if (!xen_domain()) + return -ENODEV; + + if (xen_initial_domain()) { + pr_info("xen/balloon: Xen selfballooning driver " + "disabled for domain0.\n"); + return -ENODEV; + } + + xen_selfballooning_enabled = tmem_enabled && use_selfballooning; + if (xen_selfballooning_enabled) { + pr_info("xen/balloon: Initializing Xen " + "selfballooning driver.\n"); + enable = true; + } +#ifdef CONFIG_FRONTSWAP + frontswap_selfshrinking = tmem_enabled && use_frontswap_selfshrink; + if (frontswap_selfshrinking) { + pr_info("xen/balloon: Initializing frontswap " + "selfshrinking driver.\n"); + enable = true; + } +#endif + if (!enable) + return -ENODEV; + + schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ); + + return 0; +} + +subsys_initcall(xen_selfballoon_init); + +MODULE_LICENSE("GPL"); diff --git a/include/xen/balloon.h b/include/xen/balloon.h index a2b22f01a51d..4076ed72afbd 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -23,3 +23,13 @@ void balloon_set_new_target(unsigned long target); int alloc_xenballooned_pages(int nr_pages, struct page** pages); void free_xenballooned_pages(int nr_pages, struct page** pages); + +struct sys_device; +#ifdef CONFIG_XEN_SELFBALLOONING +extern int register_xen_selfballooning(struct sys_device *sysdev); +#else +static inline int register_xen_selfballooning(struct sys_device *sysdev) +{ + return -ENOSYS; +} +#endif diff --git a/include/xen/tmem.h b/include/xen/tmem.h new file mode 100644 index 000000000000..82e2c83a32f5 --- /dev/null +++ b/include/xen/tmem.h @@ -0,0 +1,5 @@ +#ifndef _XEN_TMEM_H +#define _XEN_TMEM_H +/* defined in drivers/xen/tmem.c */ +extern int tmem_enabled; +#endif /* _XEN_TMEM_H */ -- cgit v1.2.3 From 30edc14bf39afde24ef7db2de66c91805db80828 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 13 Oct 2009 17:22:20 -0400 Subject: xen/pciback: xen pci backend driver. This is the host side counterpart to the frontend driver in drivers/pci/xen-pcifront.c. The PV protocol is also implemented by frontend drivers in other OSes too, such as the BSDs. The PV protocol is rather simple. There is page shared with the guest, which has the 'struct xen_pci_sharedinfo' embossed in it. The backend has a thread that is kicked every-time the structure is changed and based on the operation field it performs specific tasks: XEN_PCI_OP_conf_[read|write]: Read/Write 0xCF8/0xCFC filtered data. (conf_space*.c) Based on which field is probed, we either enable/disable the PCI device, change power state, read VPD, etc. The major goal of this call is to provide a Physical IRQ (PIRQ) to the guest. The PIRQ is Xen hypervisor global IRQ value irrespective of the IRQ is tied in to the IO-APIC, or is a vector. For GSI type interrupts, the PIRQ==GSI holds. For MSI/MSI-X the PIRQ value != Linux IRQ number (thought PIRQ==vector). Please note, that with Xen, all interrupts (except those level shared ones) are injected directly to the guest - there is no host interaction. XEN_PCI_OP_[enable|disable]_msi[|x] (pciback_ops.c) Enables/disables the MSI/MSI-X capability of the device. These operations setup the MSI/MSI-X vectors for the guest and pass them to the frontend. When the device is activated, the interrupts are directly injected in the guest without involving the host. XEN_PCI_OP_aer_[detected|resume|mmio|slotreset]: In case of failure, perform the appropriate AER commands on the guest. Right now that is a cop-out - we just kill the guest. Besides implementing those commands, it can also - hide a PCI device from the host. When booting up, the user can specify xen-pciback.hide=(1:0:0)(BDF..) so that host does not try to use the device. The driver was lifted from linux-2.6.18.hg tree and fixed up so that it could compile under v3.0. Per suggestion from Jesse Barnes moved the driver to drivers/xen/xen-pciback. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/Kconfig | 43 + drivers/xen/Makefile | 1 + drivers/xen/xen-pciback/Makefile | 17 + drivers/xen/xen-pciback/conf_space.c | 435 +++++++ drivers/xen/xen-pciback/conf_space.h | 126 ++ drivers/xen/xen-pciback/conf_space_capability.c | 66 + drivers/xen/xen-pciback/conf_space_capability.h | 26 + .../xen/xen-pciback/conf_space_capability_msi.c | 94 ++ drivers/xen/xen-pciback/conf_space_capability_pm.c | 113 ++ .../xen/xen-pciback/conf_space_capability_vpd.c | 40 + drivers/xen/xen-pciback/conf_space_header.c | 318 +++++ drivers/xen/xen-pciback/conf_space_quirks.c | 140 +++ drivers/xen/xen-pciback/conf_space_quirks.h | 35 + drivers/xen/xen-pciback/controller.c | 442 +++++++ drivers/xen/xen-pciback/passthrough.c | 178 +++ drivers/xen/xen-pciback/pci_stub.c | 1285 ++++++++++++++++++++ drivers/xen/xen-pciback/pciback.h | 133 ++ drivers/xen/xen-pciback/pciback_ops.c | 131 ++ drivers/xen/xen-pciback/slot.c | 191 +++ drivers/xen/xen-pciback/vpci.c | 244 ++++ drivers/xen/xen-pciback/xenbus.c | 709 +++++++++++ 21 files changed, 4767 insertions(+) create mode 100644 drivers/xen/xen-pciback/Makefile create mode 100644 drivers/xen/xen-pciback/conf_space.c create mode 100644 drivers/xen/xen-pciback/conf_space.h create mode 100644 drivers/xen/xen-pciback/conf_space_capability.c create mode 100644 drivers/xen/xen-pciback/conf_space_capability.h create mode 100644 drivers/xen/xen-pciback/conf_space_capability_msi.c create mode 100644 drivers/xen/xen-pciback/conf_space_capability_pm.c create mode 100644 drivers/xen/xen-pciback/conf_space_capability_vpd.c create mode 100644 drivers/xen/xen-pciback/conf_space_header.c create mode 100644 drivers/xen/xen-pciback/conf_space_quirks.c create mode 100644 drivers/xen/xen-pciback/conf_space_quirks.h create mode 100644 drivers/xen/xen-pciback/controller.c create mode 100644 drivers/xen/xen-pciback/passthrough.c create mode 100644 drivers/xen/xen-pciback/pci_stub.c create mode 100644 drivers/xen/xen-pciback/pciback.h create mode 100644 drivers/xen/xen-pciback/pciback_ops.c create mode 100644 drivers/xen/xen-pciback/slot.c create mode 100644 drivers/xen/xen-pciback/vpci.c create mode 100644 drivers/xen/xen-pciback/xenbus.c diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a59638b37c1a..8af0792dfd67 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -105,4 +105,47 @@ config SWIOTLB_XEN depends on PCI select SWIOTLB +config XEN_PCIDEV_BACKEND + tristate "Xen PCI-device backend driver" + depends on PCI && X86 && XEN + depends on XEN_BACKEND + help + The PCI device backend driver allows the kernel to export arbitrary + PCI devices to other guests. If you select this to be a module, you + will need to make sure no other driver has bound to the device(s) + you want to make visible to other guests. + +choice + prompt "PCI Backend Mode" + depends on XEN_PCIDEV_BACKEND + +config XEN_PCIDEV_BACKEND_VPCI + bool "Virtual PCI" + help + This PCI Backend hides the true PCI topology and makes the frontend + think there is a single PCI bus with only the exported devices on it. + For example, a device at 03:05.0 will be re-assigned to 00:00.0. A + second device at 02:1a.1 will be re-assigned to 00:01.1. + +config XEN_PCIDEV_BACKEND_PASS + bool "Passthrough" + help + This PCI Backend provides a real view of the PCI topology to the + frontend (for example, a device at 06:01.b will still appear at + 06:01.b to the frontend). This is similar to how Xen 2.0.x exposed + PCI devices to its driver domains. This may be required for drivers + which depend on finding their hardward in certain bus/slot + locations. + +endchoice + +config XEN_PCIDEV_BE_DEBUG + bool "Xen PCI Backend Debugging" + depends on XEN_PCIDEV_BACKEND + default n + help + Allows to observe all of the traffic from the frontend/backend + when reading and writting to the configuration registers. + If in doubt, say no. + endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index bbc18258ecc5..35a72ef3afac 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o +obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o diff --git a/drivers/xen/xen-pciback/Makefile b/drivers/xen/xen-pciback/Makefile new file mode 100644 index 000000000000..106dae748cdb --- /dev/null +++ b/drivers/xen/xen-pciback/Makefile @@ -0,0 +1,17 @@ +obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o + +pciback-y := pci_stub.o pciback_ops.o xenbus.o +pciback-y += conf_space.o conf_space_header.o \ + conf_space_capability.o \ + conf_space_capability_vpd.o \ + conf_space_capability_pm.o \ + conf_space_quirks.o +pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o + +ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y) +EXTRA_CFLAGS += -DDEBUG +endif diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c new file mode 100644 index 000000000000..370c18e58d7a --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space.c @@ -0,0 +1,435 @@ +/* + * PCI Backend - Functions for creating a virtual configuration space for + * exported PCI Devices. + * It's dangerous to allow PCI Driver Domains to change their + * device's resources (memory, i/o ports, interrupts). We need to + * restrict changes to certain PCI Configuration registers: + * BARs, INTERRUPT_PIN, most registers in the header... + * + * Author: Ryan Wilson + */ + +#include +#include +#include "pciback.h" +#include "conf_space.h" +#include "conf_space_quirks.h" + +static int permissive; +module_param(permissive, bool, 0644); + +#define DEFINE_PCI_CONFIG(op, size, type) \ +int pciback_##op##_config_##size \ +(struct pci_dev *dev, int offset, type value, void *data) \ +{ \ + return pci_##op##_config_##size(dev, offset, value); \ +} + +DEFINE_PCI_CONFIG(read, byte, u8 *) +DEFINE_PCI_CONFIG(read, word, u16 *) +DEFINE_PCI_CONFIG(read, dword, u32 *) + +DEFINE_PCI_CONFIG(write, byte, u8) +DEFINE_PCI_CONFIG(write, word, u16) +DEFINE_PCI_CONFIG(write, dword, u32) + +static int conf_space_read(struct pci_dev *dev, + const struct config_field_entry *entry, + int offset, u32 *value) +{ + int ret = 0; + const struct config_field *field = entry->field; + + *value = 0; + + switch (field->size) { + case 1: + if (field->u.b.read) + ret = field->u.b.read(dev, offset, (u8 *) value, + entry->data); + break; + case 2: + if (field->u.w.read) + ret = field->u.w.read(dev, offset, (u16 *) value, + entry->data); + break; + case 4: + if (field->u.dw.read) + ret = field->u.dw.read(dev, offset, value, entry->data); + break; + } + return ret; +} + +static int conf_space_write(struct pci_dev *dev, + const struct config_field_entry *entry, + int offset, u32 value) +{ + int ret = 0; + const struct config_field *field = entry->field; + + switch (field->size) { + case 1: + if (field->u.b.write) + ret = field->u.b.write(dev, offset, (u8) value, + entry->data); + break; + case 2: + if (field->u.w.write) + ret = field->u.w.write(dev, offset, (u16) value, + entry->data); + break; + case 4: + if (field->u.dw.write) + ret = field->u.dw.write(dev, offset, value, + entry->data); + break; + } + return ret; +} + +static inline u32 get_mask(int size) +{ + if (size == 1) + return 0xff; + else if (size == 2) + return 0xffff; + else + return 0xffffffff; +} + +static inline int valid_request(int offset, int size) +{ + /* Validate request (no un-aligned requests) */ + if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0) + return 1; + return 0; +} + +static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask, + int offset) +{ + if (offset >= 0) { + new_val_mask <<= (offset * 8); + new_val <<= (offset * 8); + } else { + new_val_mask >>= (offset * -8); + new_val >>= (offset * -8); + } + val = (val & ~new_val_mask) | (new_val & new_val_mask); + + return val; +} + +static int pcibios_err_to_errno(int err) +{ + switch (err) { + case PCIBIOS_SUCCESSFUL: + return XEN_PCI_ERR_success; + case PCIBIOS_DEVICE_NOT_FOUND: + return XEN_PCI_ERR_dev_not_found; + case PCIBIOS_BAD_REGISTER_NUMBER: + return XEN_PCI_ERR_invalid_offset; + case PCIBIOS_FUNC_NOT_SUPPORTED: + return XEN_PCI_ERR_not_implemented; + case PCIBIOS_SET_FAILED: + return XEN_PCI_ERR_access_denied; + } + return err; +} + +int pciback_config_read(struct pci_dev *dev, int offset, int size, + u32 *ret_val) +{ + int err = 0; + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + const struct config_field_entry *cfg_entry; + const struct config_field *field; + int req_start, req_end, field_start, field_end; + /* if read fails for any reason, return 0 + * (as if device didn't respond) */ + u32 value = 0, tmp_val; + + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n", + pci_name(dev), size, offset); + + if (!valid_request(offset, size)) { + err = XEN_PCI_ERR_invalid_offset; + goto out; + } + + /* Get the real value first, then modify as appropriate */ + switch (size) { + case 1: + err = pci_read_config_byte(dev, offset, (u8 *) &value); + break; + case 2: + err = pci_read_config_word(dev, offset, (u16 *) &value); + break; + case 4: + err = pci_read_config_dword(dev, offset, &value); + break; + } + + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { + field = cfg_entry->field; + + req_start = offset; + req_end = offset + size; + field_start = OFFSET(cfg_entry); + field_end = OFFSET(cfg_entry) + field->size; + + if ((req_start >= field_start && req_start < field_end) + || (req_end > field_start && req_end <= field_end)) { + err = conf_space_read(dev, cfg_entry, field_start, + &tmp_val); + if (err) + goto out; + + value = merge_value(value, tmp_val, + get_mask(field->size), + field_start - req_start); + } + } + +out: + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n", + pci_name(dev), size, offset, value); + + *ret_val = value; + return pcibios_err_to_errno(err); +} + +int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value) +{ + int err = 0, handled = 0; + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + const struct config_field_entry *cfg_entry; + const struct config_field *field; + u32 tmp_val; + int req_start, req_end, field_start, field_end; + + if (unlikely(verbose_request)) + printk(KERN_DEBUG + "pciback: %s: write request %d bytes at 0x%x = %x\n", + pci_name(dev), size, offset, value); + + if (!valid_request(offset, size)) + return XEN_PCI_ERR_invalid_offset; + + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { + field = cfg_entry->field; + + req_start = offset; + req_end = offset + size; + field_start = OFFSET(cfg_entry); + field_end = OFFSET(cfg_entry) + field->size; + + if ((req_start >= field_start && req_start < field_end) + || (req_end > field_start && req_end <= field_end)) { + tmp_val = 0; + + err = pciback_config_read(dev, field_start, + field->size, &tmp_val); + if (err) + break; + + tmp_val = merge_value(tmp_val, value, get_mask(size), + req_start - field_start); + + err = conf_space_write(dev, cfg_entry, field_start, + tmp_val); + + /* handled is set true here, but not every byte + * may have been written! Properly detecting if + * every byte is handled is unnecessary as the + * flag is used to detect devices that need + * special helpers to work correctly. + */ + handled = 1; + } + } + + if (!handled && !err) { + /* By default, anything not specificially handled above is + * read-only. The permissive flag changes this behavior so + * that anything not specifically handled above is writable. + * This means that some fields may still be read-only because + * they have entries in the config_field list that intercept + * the write and do nothing. */ + if (dev_data->permissive || permissive) { + switch (size) { + case 1: + err = pci_write_config_byte(dev, offset, + (u8) value); + break; + case 2: + err = pci_write_config_word(dev, offset, + (u16) value); + break; + case 4: + err = pci_write_config_dword(dev, offset, + (u32) value); + break; + } + } else if (!dev_data->warned_on_write) { + dev_data->warned_on_write = 1; + dev_warn(&dev->dev, "Driver tried to write to a " + "read-only configuration space field at offset" + " 0x%x, size %d. This may be harmless, but if " + "you have problems with your device:\n" + "1) see permissive attribute in sysfs\n" + "2) report problems to the xen-devel " + "mailing list along with details of your " + "device obtained from lspci.\n", offset, size); + } + } + + return pcibios_err_to_errno(err); +} + +void pciback_config_free_dyn_fields(struct pci_dev *dev) +{ + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + struct config_field_entry *cfg_entry, *t; + const struct config_field *field; + + dev_dbg(&dev->dev, "free-ing dynamically allocated virtual " + "configuration space fields\n"); + if (!dev_data) + return; + + list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) { + field = cfg_entry->field; + + if (field->clean) { + field->clean((struct config_field *)field); + + kfree(cfg_entry->data); + + list_del(&cfg_entry->list); + kfree(cfg_entry); + } + + } +} + +void pciback_config_reset_dev(struct pci_dev *dev) +{ + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + const struct config_field_entry *cfg_entry; + const struct config_field *field; + + dev_dbg(&dev->dev, "resetting virtual configuration space\n"); + if (!dev_data) + return; + + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { + field = cfg_entry->field; + + if (field->reset) + field->reset(dev, OFFSET(cfg_entry), cfg_entry->data); + } +} + +void pciback_config_free_dev(struct pci_dev *dev) +{ + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + struct config_field_entry *cfg_entry, *t; + const struct config_field *field; + + dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n"); + if (!dev_data) + return; + + list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) { + list_del(&cfg_entry->list); + + field = cfg_entry->field; + + if (field->release) + field->release(dev, OFFSET(cfg_entry), cfg_entry->data); + + kfree(cfg_entry); + } +} + +int pciback_config_add_field_offset(struct pci_dev *dev, + const struct config_field *field, + unsigned int base_offset) +{ + int err = 0; + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + struct config_field_entry *cfg_entry; + void *tmp; + + cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL); + if (!cfg_entry) { + err = -ENOMEM; + goto out; + } + + cfg_entry->data = NULL; + cfg_entry->field = field; + cfg_entry->base_offset = base_offset; + + /* silently ignore duplicate fields */ + err = pciback_field_is_dup(dev, OFFSET(cfg_entry)); + if (err) + goto out; + + if (field->init) { + tmp = field->init(dev, OFFSET(cfg_entry)); + + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto out; + } + + cfg_entry->data = tmp; + } + + dev_dbg(&dev->dev, "added config field at offset 0x%02x\n", + OFFSET(cfg_entry)); + list_add_tail(&cfg_entry->list, &dev_data->config_fields); + +out: + if (err) + kfree(cfg_entry); + + return err; +} + +/* This sets up the device's virtual configuration space to keep track of + * certain registers (like the base address registers (BARs) so that we can + * keep the client from manipulating them directly. + */ +int pciback_config_init_dev(struct pci_dev *dev) +{ + int err = 0; + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + + dev_dbg(&dev->dev, "initializing virtual configuration space\n"); + + INIT_LIST_HEAD(&dev_data->config_fields); + + err = pciback_config_header_add_fields(dev); + if (err) + goto out; + + err = pciback_config_capability_add_fields(dev); + if (err) + goto out; + + err = pciback_config_quirks_init(dev); + +out: + return err; +} + +int pciback_config_init(void) +{ + return pciback_config_capability_init(); +} diff --git a/drivers/xen/xen-pciback/conf_space.h b/drivers/xen/xen-pciback/conf_space.h new file mode 100644 index 000000000000..50ebef216828 --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space.h @@ -0,0 +1,126 @@ +/* + * PCI Backend - Common data structures for overriding the configuration space + * + * Author: Ryan Wilson + */ + +#ifndef __XEN_PCIBACK_CONF_SPACE_H__ +#define __XEN_PCIBACK_CONF_SPACE_H__ + +#include +#include + +/* conf_field_init can return an errno in a ptr with ERR_PTR() */ +typedef void *(*conf_field_init) (struct pci_dev *dev, int offset); +typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data); +typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data); + +typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value, + void *data); +typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value, + void *data); +typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value, + void *data); +typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value, + void *data); +typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value, + void *data); +typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value, + void *data); + +/* These are the fields within the configuration space which we + * are interested in intercepting reads/writes to and changing their + * values. + */ +struct config_field { + unsigned int offset; + unsigned int size; + unsigned int mask; + conf_field_init init; + conf_field_reset reset; + conf_field_free release; + void (*clean) (struct config_field *field); + union { + struct { + conf_dword_write write; + conf_dword_read read; + } dw; + struct { + conf_word_write write; + conf_word_read read; + } w; + struct { + conf_byte_write write; + conf_byte_read read; + } b; + } u; + struct list_head list; +}; + +struct config_field_entry { + struct list_head list; + const struct config_field *field; + unsigned int base_offset; + void *data; +}; + +#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset) + +/* Add fields to a device - the add_fields macro expects to get a pointer to + * the first entry in an array (of which the ending is marked by size==0) + */ +int pciback_config_add_field_offset(struct pci_dev *dev, + const struct config_field *field, + unsigned int offset); + +static inline int pciback_config_add_field(struct pci_dev *dev, + const struct config_field *field) +{ + return pciback_config_add_field_offset(dev, field, 0); +} + +static inline int pciback_config_add_fields(struct pci_dev *dev, + const struct config_field *field) +{ + int i, err = 0; + for (i = 0; field[i].size != 0; i++) { + err = pciback_config_add_field(dev, &field[i]); + if (err) + break; + } + return err; +} + +static inline int pciback_config_add_fields_offset(struct pci_dev *dev, + const struct config_field *field, + unsigned int offset) +{ + int i, err = 0; + for (i = 0; field[i].size != 0; i++) { + err = pciback_config_add_field_offset(dev, &field[i], offset); + if (err) + break; + } + return err; +} + +/* Read/Write the real configuration space */ +int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 *value, + void *data); +int pciback_read_config_word(struct pci_dev *dev, int offset, u16 *value, + void *data); +int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 *value, + void *data); +int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value, + void *data); +int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value, + void *data); +int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value, + void *data); + +int pciback_config_capability_init(void); + +int pciback_config_header_add_fields(struct pci_dev *dev); +int pciback_config_capability_add_fields(struct pci_dev *dev); + +#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */ diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c new file mode 100644 index 000000000000..0ea84d6335f4 --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_capability.c @@ -0,0 +1,66 @@ +/* + * PCI Backend - Handles the virtual fields found on the capability lists + * in the configuration space. + * + * Author: Ryan Wilson + */ + +#include +#include +#include "pciback.h" +#include "conf_space.h" +#include "conf_space_capability.h" + +static LIST_HEAD(capabilities); + +static const struct config_field caplist_header[] = { + { + .offset = PCI_CAP_LIST_ID, + .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */ + .u.w.read = pciback_read_config_word, + .u.w.write = NULL, + }, + {} +}; + +static inline void register_capability(struct pciback_config_capability *cap) +{ + list_add_tail(&cap->cap_list, &capabilities); +} + +int pciback_config_capability_add_fields(struct pci_dev *dev) +{ + int err = 0; + struct pciback_config_capability *cap; + int cap_offset; + + list_for_each_entry(cap, &capabilities, cap_list) { + cap_offset = pci_find_capability(dev, cap->capability); + if (cap_offset) { + dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n", + cap->capability, cap_offset); + + err = pciback_config_add_fields_offset(dev, + caplist_header, + cap_offset); + if (err) + goto out; + err = pciback_config_add_fields_offset(dev, + cap->fields, + cap_offset); + if (err) + goto out; + } + } + +out: + return err; +} + +int pciback_config_capability_init(void) +{ + register_capability(&pciback_config_capability_vpd); + register_capability(&pciback_config_capability_pm); + + return 0; +} diff --git a/drivers/xen/xen-pciback/conf_space_capability.h b/drivers/xen/xen-pciback/conf_space_capability.h new file mode 100644 index 000000000000..8da3ac415f29 --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_capability.h @@ -0,0 +1,26 @@ +/* + * PCI Backend - Data structures for special overlays for structures on + * the capability list. + * + * Author: Ryan Wilson + */ + +#ifndef __PCIBACK_CONFIG_CAPABILITY_H__ +#define __PCIBACK_CONFIG_CAPABILITY_H__ + +#include +#include + +struct pciback_config_capability { + struct list_head cap_list; + + int capability; + + /* If the device has the capability found above, add these fields */ + const struct config_field *fields; +}; + +extern struct pciback_config_capability pciback_config_capability_vpd; +extern struct pciback_config_capability pciback_config_capability_pm; + +#endif diff --git a/drivers/xen/xen-pciback/conf_space_capability_msi.c b/drivers/xen/xen-pciback/conf_space_capability_msi.c new file mode 100644 index 000000000000..78f74b1852d4 --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_capability_msi.c @@ -0,0 +1,94 @@ +/* + * PCI Backend -- Configuration overlay for MSI capability + */ +#include +#include +#include "conf_space.h" +#include "conf_space_capability.h" +#include +#include +#include "pciback.h" + +int pciback_enable_msi(struct pciback_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) +{ + int otherend = pdev->xdev->otherend_id; + int status; + + status = pci_enable_msi(dev); + + if (status) { + printk(KERN_ERR "error enable msi for guest %x status %x\n", + otherend, status); + op->value = 0; + return XEN_PCI_ERR_op_failed; + } + + /* The value the guest needs is actually the IDT vector, not the + * the local domain's IRQ number. */ + + op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + return 0; +} + +int pciback_disable_msi(struct pciback_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) +{ + pci_disable_msi(dev); + + op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + return 0; +} + +int pciback_enable_msix(struct pciback_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) +{ + int i, result; + struct msix_entry *entries; + + if (op->value > SH_INFO_MAX_VEC) + return -EINVAL; + + entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); + if (entries == NULL) + return -ENOMEM; + + for (i = 0; i < op->value; i++) { + entries[i].entry = op->msix_entries[i].entry; + entries[i].vector = op->msix_entries[i].vector; + } + + result = pci_enable_msix(dev, entries, op->value); + + if (result == 0) { + for (i = 0; i < op->value; i++) { + op->msix_entries[i].entry = entries[i].entry; + if (entries[i].vector) + op->msix_entries[i].vector = + xen_pirq_from_irq(entries[i].vector); + } + } else { + printk(KERN_WARNING "pciback: %s: failed to enable MSI-X: err %d!\n", + pci_name(dev), result); + } + kfree(entries); + + op->value = result; + + return result; +} + +int pciback_disable_msix(struct pciback_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) +{ + + pci_disable_msix(dev); + + /* + * SR-IOV devices (which don't have any legacy IRQ) have + * an undefined IRQ value of zero. + */ + op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + return 0; +} + diff --git a/drivers/xen/xen-pciback/conf_space_capability_pm.c b/drivers/xen/xen-pciback/conf_space_capability_pm.c new file mode 100644 index 000000000000..04426165a9e5 --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_capability_pm.c @@ -0,0 +1,113 @@ +/* + * PCI Backend - Configuration space overlay for power management + * + * Author: Ryan Wilson + */ + +#include +#include "conf_space.h" +#include "conf_space_capability.h" + +static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value, + void *data) +{ + int err; + u16 real_value; + + err = pci_read_config_word(dev, offset, &real_value); + if (err) + goto out; + + *value = real_value & ~PCI_PM_CAP_PME_MASK; + +out: + return err; +} + +/* PM_OK_BITS specifies the bits that the driver domain is allowed to change. + * Can't allow driver domain to enable PMEs - they're shared */ +#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK) + +static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value, + void *data) +{ + int err; + u16 old_value; + pci_power_t new_state, old_state; + + err = pci_read_config_word(dev, offset, &old_value); + if (err) + goto out; + + old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK); + new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); + + new_value &= PM_OK_BITS; + if ((old_value & PM_OK_BITS) != new_value) { + new_value = (old_value & ~PM_OK_BITS) | new_value; + err = pci_write_config_word(dev, offset, new_value); + if (err) + goto out; + } + + /* Let pci core handle the power management change */ + dev_dbg(&dev->dev, "set power state to %x\n", new_state); + err = pci_set_power_state(dev, new_state); + if (err) { + err = PCIBIOS_SET_FAILED; + goto out; + } + + out: + return err; +} + +/* Ensure PMEs are disabled */ +static void *pm_ctrl_init(struct pci_dev *dev, int offset) +{ + int err; + u16 value; + + err = pci_read_config_word(dev, offset, &value); + if (err) + goto out; + + if (value & PCI_PM_CTRL_PME_ENABLE) { + value &= ~PCI_PM_CTRL_PME_ENABLE; + err = pci_write_config_word(dev, offset, value); + } + +out: + return ERR_PTR(err); +} + +static const struct config_field caplist_pm[] = { + { + .offset = PCI_PM_PMC, + .size = 2, + .u.w.read = pm_caps_read, + }, + { + .offset = PCI_PM_CTRL, + .size = 2, + .init = pm_ctrl_init, + .u.w.read = pciback_read_config_word, + .u.w.write = pm_ctrl_write, + }, + { + .offset = PCI_PM_PPB_EXTENSIONS, + .size = 1, + .u.b.read = pciback_read_config_byte, + }, + { + .offset = PCI_PM_DATA_REGISTER, + .size = 1, + .u.b.read = pciback_read_config_byte, + }, + {} +}; + +struct pciback_config_capability pciback_config_capability_pm = { + .capability = PCI_CAP_ID_PM, + .fields = caplist_pm, +}; diff --git a/drivers/xen/xen-pciback/conf_space_capability_vpd.c b/drivers/xen/xen-pciback/conf_space_capability_vpd.c new file mode 100644 index 000000000000..e7b4d662b53d --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_capability_vpd.c @@ -0,0 +1,40 @@ +/* + * PCI Backend - Configuration space overlay for Vital Product Data + * + * Author: Ryan Wilson + */ + +#include +#include "conf_space.h" +#include "conf_space_capability.h" + +static int vpd_address_write(struct pci_dev *dev, int offset, u16 value, + void *data) +{ + /* Disallow writes to the vital product data */ + if (value & PCI_VPD_ADDR_F) + return PCIBIOS_SET_FAILED; + else + return pci_write_config_word(dev, offset, value); +} + +static const struct config_field caplist_vpd[] = { + { + .offset = PCI_VPD_ADDR, + .size = 2, + .u.w.read = pciback_read_config_word, + .u.w.write = vpd_address_write, + }, + { + .offset = PCI_VPD_DATA, + .size = 4, + .u.dw.read = pciback_read_config_dword, + .u.dw.write = NULL, + }, + {} +}; + +struct pciback_config_capability pciback_config_capability_vpd = { + .capability = PCI_CAP_ID_VPD, + .fields = caplist_vpd, +}; diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c new file mode 100644 index 000000000000..3ae7da137f7e --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -0,0 +1,318 @@ +/* + * PCI Backend - Handles the virtual fields in the configuration space headers. + * + * Author: Ryan Wilson + */ + +#include +#include +#include "pciback.h" +#include "conf_space.h" + +struct pci_bar_info { + u32 val; + u32 len_val; + int which; +}; + +#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) +#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) + +static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) +{ + int err; + + if (!pci_is_enabled(dev) && is_enable_cmd(value)) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: enable\n", + pci_name(dev)); + err = pci_enable_device(dev); + if (err) + return err; + } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: disable\n", + pci_name(dev)); + pci_disable_device(dev); + } + + if (!dev->is_busmaster && is_master_cmd(value)) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: set bus master\n", + pci_name(dev)); + pci_set_master(dev); + } + + if (value & PCI_COMMAND_INVALIDATE) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG + "pciback: %s: enable memory-write-invalidate\n", + pci_name(dev)); + err = pci_set_mwi(dev); + if (err) { + printk(KERN_WARNING + "pciback: %s: cannot enable " + "memory-write-invalidate (%d)\n", + pci_name(dev), err); + value &= ~PCI_COMMAND_INVALIDATE; + } + } + + return pci_write_config_word(dev, offset, value); +} + +static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data) +{ + struct pci_bar_info *bar = data; + + if (unlikely(!bar)) { + printk(KERN_WARNING "pciback: driver data not found for %s\n", + pci_name(dev)); + return XEN_PCI_ERR_op_failed; + } + + /* A write to obtain the length must happen as a 32-bit write. + * This does not (yet) support writing individual bytes + */ + if (value == ~PCI_ROM_ADDRESS_ENABLE) + bar->which = 1; + else { + u32 tmpval; + pci_read_config_dword(dev, offset, &tmpval); + if (tmpval != bar->val && value == bar->val) { + /* Allow restoration of bar value. */ + pci_write_config_dword(dev, offset, bar->val); + } + bar->which = 0; + } + + /* Do we need to support enabling/disabling the rom address here? */ + + return 0; +} + +/* For the BARs, only allow writes which write ~0 or + * the correct resource information + * (Needed for when the driver probes the resource usage) + */ +static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data) +{ + struct pci_bar_info *bar = data; + + if (unlikely(!bar)) { + printk(KERN_WARNING "pciback: driver data not found for %s\n", + pci_name(dev)); + return XEN_PCI_ERR_op_failed; + } + + /* A write to obtain the length must happen as a 32-bit write. + * This does not (yet) support writing individual bytes + */ + if (value == ~0) + bar->which = 1; + else { + u32 tmpval; + pci_read_config_dword(dev, offset, &tmpval); + if (tmpval != bar->val && value == bar->val) { + /* Allow restoration of bar value. */ + pci_write_config_dword(dev, offset, bar->val); + } + bar->which = 0; + } + + return 0; +} + +static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data) +{ + struct pci_bar_info *bar = data; + + if (unlikely(!bar)) { + printk(KERN_WARNING "pciback: driver data not found for %s\n", + pci_name(dev)); + return XEN_PCI_ERR_op_failed; + } + + *value = bar->which ? bar->len_val : bar->val; + + return 0; +} + +static inline void read_dev_bar(struct pci_dev *dev, + struct pci_bar_info *bar_info, int offset, + u32 len_mask) +{ + pci_read_config_dword(dev, offset, &bar_info->val); + pci_write_config_dword(dev, offset, len_mask); + pci_read_config_dword(dev, offset, &bar_info->len_val); + pci_write_config_dword(dev, offset, bar_info->val); +} + +static void *bar_init(struct pci_dev *dev, int offset) +{ + struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); + + if (!bar) + return ERR_PTR(-ENOMEM); + + read_dev_bar(dev, bar, offset, ~0); + bar->which = 0; + + return bar; +} + +static void *rom_init(struct pci_dev *dev, int offset) +{ + struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); + + if (!bar) + return ERR_PTR(-ENOMEM); + + read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE); + bar->which = 0; + + return bar; +} + +static void bar_reset(struct pci_dev *dev, int offset, void *data) +{ + struct pci_bar_info *bar = data; + + bar->which = 0; +} + +static void bar_release(struct pci_dev *dev, int offset, void *data) +{ + kfree(data); +} + +static int interrupt_read(struct pci_dev *dev, int offset, u8 * value, + void *data) +{ + *value = (u8) dev->irq; + + return 0; +} + +static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data) +{ + u8 cur_value; + int err; + + err = pci_read_config_byte(dev, offset, &cur_value); + if (err) + goto out; + + if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START) + || value == PCI_BIST_START) + err = pci_write_config_byte(dev, offset, value); + +out: + return err; +} + +static const struct config_field header_common[] = { + { + .offset = PCI_COMMAND, + .size = 2, + .u.w.read = pciback_read_config_word, + .u.w.write = command_write, + }, + { + .offset = PCI_INTERRUPT_LINE, + .size = 1, + .u.b.read = interrupt_read, + }, + { + .offset = PCI_INTERRUPT_PIN, + .size = 1, + .u.b.read = pciback_read_config_byte, + }, + { + /* Any side effects of letting driver domain control cache line? */ + .offset = PCI_CACHE_LINE_SIZE, + .size = 1, + .u.b.read = pciback_read_config_byte, + .u.b.write = pciback_write_config_byte, + }, + { + .offset = PCI_LATENCY_TIMER, + .size = 1, + .u.b.read = pciback_read_config_byte, + }, + { + .offset = PCI_BIST, + .size = 1, + .u.b.read = pciback_read_config_byte, + .u.b.write = bist_write, + }, + {} +}; + +#define CFG_FIELD_BAR(reg_offset) \ + { \ + .offset = reg_offset, \ + .size = 4, \ + .init = bar_init, \ + .reset = bar_reset, \ + .release = bar_release, \ + .u.dw.read = bar_read, \ + .u.dw.write = bar_write, \ + } + +#define CFG_FIELD_ROM(reg_offset) \ + { \ + .offset = reg_offset, \ + .size = 4, \ + .init = rom_init, \ + .reset = bar_reset, \ + .release = bar_release, \ + .u.dw.read = bar_read, \ + .u.dw.write = rom_write, \ + } + +static const struct config_field header_0[] = { + CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), + CFG_FIELD_BAR(PCI_BASE_ADDRESS_1), + CFG_FIELD_BAR(PCI_BASE_ADDRESS_2), + CFG_FIELD_BAR(PCI_BASE_ADDRESS_3), + CFG_FIELD_BAR(PCI_BASE_ADDRESS_4), + CFG_FIELD_BAR(PCI_BASE_ADDRESS_5), + CFG_FIELD_ROM(PCI_ROM_ADDRESS), + {} +}; + +static const struct config_field header_1[] = { + CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), + CFG_FIELD_BAR(PCI_BASE_ADDRESS_1), + CFG_FIELD_ROM(PCI_ROM_ADDRESS1), + {} +}; + +int pciback_config_header_add_fields(struct pci_dev *dev) +{ + int err; + + err = pciback_config_add_fields(dev, header_common); + if (err) + goto out; + + switch (dev->hdr_type) { + case PCI_HEADER_TYPE_NORMAL: + err = pciback_config_add_fields(dev, header_0); + break; + + case PCI_HEADER_TYPE_BRIDGE: + err = pciback_config_add_fields(dev, header_1); + break; + + default: + err = -EINVAL; + printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n", + pci_name(dev), dev->hdr_type); + break; + } + +out: + return err; +} diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c new file mode 100644 index 000000000000..45c31fb391ec --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_quirks.c @@ -0,0 +1,140 @@ +/* + * PCI Backend - Handle special overlays for broken devices. + * + * Author: Ryan Wilson + * Author: Chris Bookholt + */ + +#include +#include +#include "pciback.h" +#include "conf_space.h" +#include "conf_space_quirks.h" + +LIST_HEAD(pciback_quirks); + +static inline const struct pci_device_id * +match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) +{ + if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) && + (id->device == PCI_ANY_ID || id->device == dev->device) && + (id->subvendor == PCI_ANY_ID || + id->subvendor == dev->subsystem_vendor) && + (id->subdevice == PCI_ANY_ID || + id->subdevice == dev->subsystem_device) && + !((id->class ^ dev->class) & id->class_mask)) + return id; + return NULL; +} + +struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev) +{ + struct pciback_config_quirk *tmp_quirk; + + list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list) + if (match_one_device(&tmp_quirk->devid, dev) != NULL) + goto out; + tmp_quirk = NULL; + printk(KERN_DEBUG + "quirk didn't match any device pciback knows about\n"); +out: + return tmp_quirk; +} + +static inline void register_quirk(struct pciback_config_quirk *quirk) +{ + list_add_tail(&quirk->quirks_list, &pciback_quirks); +} + +int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg) +{ + int ret = 0; + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + struct config_field_entry *cfg_entry; + + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { + if (OFFSET(cfg_entry) == reg) { + ret = 1; + break; + } + } + return ret; +} + +int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field + *field) +{ + int err = 0; + + switch (field->size) { + case 1: + field->u.b.read = pciback_read_config_byte; + field->u.b.write = pciback_write_config_byte; + break; + case 2: + field->u.w.read = pciback_read_config_word; + field->u.w.write = pciback_write_config_word; + break; + case 4: + field->u.dw.read = pciback_read_config_dword; + field->u.dw.write = pciback_write_config_dword; + break; + default: + err = -EINVAL; + goto out; + } + + pciback_config_add_field(dev, field); + +out: + return err; +} + +int pciback_config_quirks_init(struct pci_dev *dev) +{ + struct pciback_config_quirk *quirk; + int ret = 0; + + quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC); + if (!quirk) { + ret = -ENOMEM; + goto out; + } + + quirk->devid.vendor = dev->vendor; + quirk->devid.device = dev->device; + quirk->devid.subvendor = dev->subsystem_vendor; + quirk->devid.subdevice = dev->subsystem_device; + quirk->devid.class = 0; + quirk->devid.class_mask = 0; + quirk->devid.driver_data = 0UL; + + quirk->pdev = dev; + + register_quirk(quirk); +out: + return ret; +} + +void pciback_config_field_free(struct config_field *field) +{ + kfree(field); +} + +int pciback_config_quirk_release(struct pci_dev *dev) +{ + struct pciback_config_quirk *quirk; + int ret = 0; + + quirk = pciback_find_quirk(dev); + if (!quirk) { + ret = -ENXIO; + goto out; + } + + list_del(&quirk->quirks_list); + kfree(quirk); + +out: + return ret; +} diff --git a/drivers/xen/xen-pciback/conf_space_quirks.h b/drivers/xen/xen-pciback/conf_space_quirks.h new file mode 100644 index 000000000000..acd0e1ae8fc5 --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_quirks.h @@ -0,0 +1,35 @@ +/* + * PCI Backend - Data structures for special overlays for broken devices. + * + * Ryan Wilson + * Chris Bookholt + */ + +#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__ +#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__ + +#include +#include + +struct pciback_config_quirk { + struct list_head quirks_list; + struct pci_device_id devid; + struct pci_dev *pdev; +}; + +struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev); + +int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field + *field); + +int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg); + +int pciback_config_quirks_init(struct pci_dev *dev); + +void pciback_config_field_free(struct config_field *field); + +int pciback_config_quirk_release(struct pci_dev *dev); + +int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg); + +#endif diff --git a/drivers/xen/xen-pciback/controller.c b/drivers/xen/xen-pciback/controller.c new file mode 100644 index 000000000000..7f04f116daec --- /dev/null +++ b/drivers/xen/xen-pciback/controller.c @@ -0,0 +1,442 @@ +/* + * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. + * Alex Williamson + * + * PCI "Controller" Backend - virtualize PCI bus topology based on PCI + * controllers. Devices under the same PCI controller are exposed on the + * same virtual domain:bus. Within a bus, device slots are virtualized + * to compact the bus. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include +#include +#include +#include +#include "pciback.h" + +#define PCI_MAX_BUSSES 255 +#define PCI_MAX_SLOTS 32 + +struct controller_dev_entry { + struct list_head list; + struct pci_dev *dev; + unsigned int devfn; +}; + +struct controller_list_entry { + struct list_head list; + struct pci_controller *controller; + unsigned int domain; + unsigned int bus; + unsigned int next_devfn; + struct list_head dev_list; +}; + +struct controller_dev_data { + struct list_head list; + unsigned int next_domain; + unsigned int next_bus; + spinlock_t lock; +}; + +struct walk_info { + struct pciback_device *pdev; + int resource_count; + int root_num; +}; + +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn) +{ + struct controller_dev_data *dev_data = pdev->pci_dev_data; + struct controller_dev_entry *dev_entry; + struct controller_list_entry *cntrl_entry; + struct pci_dev *dev = NULL; + unsigned long flags; + + spin_lock_irqsave(&dev_data->lock, flags); + + list_for_each_entry(cntrl_entry, &dev_data->list, list) { + if (cntrl_entry->domain != domain || + cntrl_entry->bus != bus) + continue; + + list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) { + if (devfn == dev_entry->devfn) { + dev = dev_entry->dev; + goto found; + } + } + } +found: + spin_unlock_irqrestore(&dev_data->lock, flags); + + return dev; +} + +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, + int devid, publish_pci_dev_cb publish_cb) +{ + struct controller_dev_data *dev_data = pdev->pci_dev_data; + struct controller_dev_entry *dev_entry; + struct controller_list_entry *cntrl_entry; + struct pci_controller *dev_controller = PCI_CONTROLLER(dev); + unsigned long flags; + int ret = 0, found = 0; + + spin_lock_irqsave(&dev_data->lock, flags); + + /* Look to see if we already have a domain:bus for this controller */ + list_for_each_entry(cntrl_entry, &dev_data->list, list) { + if (cntrl_entry->controller == dev_controller) { + found = 1; + break; + } + } + + if (!found) { + cntrl_entry = kmalloc(sizeof(*cntrl_entry), GFP_ATOMIC); + if (!cntrl_entry) { + ret = -ENOMEM; + goto out; + } + + cntrl_entry->controller = dev_controller; + cntrl_entry->next_devfn = PCI_DEVFN(0, 0); + + cntrl_entry->domain = dev_data->next_domain; + cntrl_entry->bus = dev_data->next_bus++; + if (dev_data->next_bus > PCI_MAX_BUSSES) { + dev_data->next_domain++; + dev_data->next_bus = 0; + } + + INIT_LIST_HEAD(&cntrl_entry->dev_list); + + list_add_tail(&cntrl_entry->list, &dev_data->list); + } + + if (PCI_SLOT(cntrl_entry->next_devfn) > PCI_MAX_SLOTS) { + /* + * While it seems unlikely, this can actually happen if + * a controller has P2P bridges under it. + */ + xenbus_dev_fatal(pdev->xdev, -ENOSPC, "Virtual bus %04x:%02x " + "is full, no room to export %04x:%02x:%02x.%x", + cntrl_entry->domain, cntrl_entry->bus, + pci_domain_nr(dev->bus), dev->bus->number, + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); + ret = -ENOSPC; + goto out; + } + + dev_entry = kmalloc(sizeof(*dev_entry), GFP_ATOMIC); + if (!dev_entry) { + if (list_empty(&cntrl_entry->dev_list)) { + list_del(&cntrl_entry->list); + kfree(cntrl_entry); + } + ret = -ENOMEM; + goto out; + } + + dev_entry->dev = dev; + dev_entry->devfn = cntrl_entry->next_devfn; + + list_add_tail(&dev_entry->list, &cntrl_entry->dev_list); + + cntrl_entry->next_devfn += PCI_DEVFN(1, 0); + +out: + spin_unlock_irqrestore(&dev_data->lock, flags); + + /* TODO: Publish virtual domain:bus:slot.func here. */ + + return ret; +} + +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) +{ + struct controller_dev_data *dev_data = pdev->pci_dev_data; + struct controller_list_entry *cntrl_entry; + struct controller_dev_entry *dev_entry = NULL; + struct pci_dev *found_dev = NULL; + unsigned long flags; + + spin_lock_irqsave(&dev_data->lock, flags); + + list_for_each_entry(cntrl_entry, &dev_data->list, list) { + if (cntrl_entry->controller != PCI_CONTROLLER(dev)) + continue; + + list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) { + if (dev_entry->dev == dev) { + found_dev = dev_entry->dev; + break; + } + } + } + + if (!found_dev) { + spin_unlock_irqrestore(&dev_data->lock, flags); + return; + } + + list_del(&dev_entry->list); + kfree(dev_entry); + + if (list_empty(&cntrl_entry->dev_list)) { + list_del(&cntrl_entry->list); + kfree(cntrl_entry); + } + + spin_unlock_irqrestore(&dev_data->lock, flags); + pcistub_put_pci_dev(found_dev); +} + +int pciback_init_devices(struct pciback_device *pdev) +{ + struct controller_dev_data *dev_data; + + dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) + return -ENOMEM; + + spin_lock_init(&dev_data->lock); + + INIT_LIST_HEAD(&dev_data->list); + + /* Starting domain:bus numbers */ + dev_data->next_domain = 0; + dev_data->next_bus = 0; + + pdev->pci_dev_data = dev_data; + + return 0; +} + +static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data) +{ + struct walk_info *info = data; + struct acpi_resource_address64 addr; + acpi_status status; + int i, len, err; + char str[32], tmp[3]; + unsigned char *ptr, *buf; + + status = acpi_resource_to_address64(res, &addr); + + /* Do we care about this range? Let's check. */ + if (!ACPI_SUCCESS(status) || + !(addr.resource_type == ACPI_MEMORY_RANGE || + addr.resource_type == ACPI_IO_RANGE) || + !addr.address_length || addr.producer_consumer != ACPI_PRODUCER) + return AE_OK; + + /* + * Furthermore, we really only care to tell the guest about + * address ranges that require address translation of some sort. + */ + if (!(addr.resource_type == ACPI_MEMORY_RANGE && + addr.info.mem.translation) && + !(addr.resource_type == ACPI_IO_RANGE && + addr.info.io.translation)) + return AE_OK; + + /* Store the resource in xenbus for the guest */ + len = snprintf(str, sizeof(str), "root-%d-resource-%d", + info->root_num, info->resource_count); + if (unlikely(len >= (sizeof(str) - 1))) + return AE_OK; + + buf = kzalloc((sizeof(*res) * 2) + 1, GFP_KERNEL); + if (!buf) + return AE_OK; + + /* Clean out resource_source */ + res->data.address64.resource_source.index = 0xFF; + res->data.address64.resource_source.string_length = 0; + res->data.address64.resource_source.string_ptr = NULL; + + ptr = (unsigned char *)res; + + /* Turn the acpi_resource into an ASCII byte stream */ + for (i = 0; i < sizeof(*res); i++) { + snprintf(tmp, sizeof(tmp), "%02x", ptr[i]); + strncat(buf, tmp, 2); + } + + err = xenbus_printf(XBT_NIL, info->pdev->xdev->nodename, + str, "%s", buf); + + if (!err) + info->resource_count++; + + kfree(buf); + + return AE_OK; +} + +int pciback_publish_pci_roots(struct pciback_device *pdev, + publish_pci_root_cb publish_root_cb) +{ + struct controller_dev_data *dev_data = pdev->pci_dev_data; + struct controller_list_entry *cntrl_entry; + int i, root_num, len, err = 0; + unsigned int domain, bus; + char str[64]; + struct walk_info info; + + spin_lock(&dev_data->lock); + + list_for_each_entry(cntrl_entry, &dev_data->list, list) { + /* First publish all the domain:bus info */ + err = publish_root_cb(pdev, cntrl_entry->domain, + cntrl_entry->bus); + if (err) + goto out; + + /* + * Now figure out which root-%d this belongs to + * so we can associate resources with it. + */ + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, + "root_num", "%d", &root_num); + + if (err != 1) + goto out; + + for (i = 0; i < root_num; i++) { + len = snprintf(str, sizeof(str), "root-%d", i); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, + str, "%x:%x", &domain, &bus); + if (err != 2) + goto out; + + /* Is this the one we just published? */ + if (domain == cntrl_entry->domain && + bus == cntrl_entry->bus) + break; + } + + if (i == root_num) + goto out; + + info.pdev = pdev; + info.resource_count = 0; + info.root_num = i; + + /* Let ACPI do the heavy lifting on decoding resources */ + acpi_walk_resources(cntrl_entry->controller->acpi_handle, + METHOD_NAME__CRS, write_xenbus_resource, + &info); + + /* No resouces. OK. On to the next one */ + if (!info.resource_count) + continue; + + /* Store the number of resources we wrote for this root-%d */ + len = snprintf(str, sizeof(str), "root-%d-resources", i); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, + "%d", info.resource_count); + if (err) + goto out; + } + + /* Finally, write some magic to synchronize with the guest. */ + len = snprintf(str, sizeof(str), "root-resource-magic"); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, + "%lx", (sizeof(struct acpi_resource) * 2) + 1); + +out: + spin_unlock(&dev_data->lock); + + return err; +} + +void pciback_release_devices(struct pciback_device *pdev) +{ + struct controller_dev_data *dev_data = pdev->pci_dev_data; + struct controller_list_entry *cntrl_entry, *c; + struct controller_dev_entry *dev_entry, *d; + + list_for_each_entry_safe(cntrl_entry, c, &dev_data->list, list) { + list_for_each_entry_safe(dev_entry, d, + &cntrl_entry->dev_list, list) { + list_del(&dev_entry->list); + pcistub_put_pci_dev(dev_entry->dev); + kfree(dev_entry); + } + list_del(&cntrl_entry->list); + kfree(cntrl_entry); + } + + kfree(dev_data); + pdev->pci_dev_data = NULL; +} + +int pciback_get_pcifront_dev(struct pci_dev *pcidev, + struct pciback_device *pdev, + unsigned int *domain, unsigned int *bus, unsigned int *devfn) +{ + struct controller_dev_data *dev_data = pdev->pci_dev_data; + struct controller_dev_entry *dev_entry; + struct controller_list_entry *cntrl_entry; + unsigned long flags; + int found = 0; + spin_lock_irqsave(&dev_data->lock, flags); + + list_for_each_entry(cntrl_entry, &dev_data->list, list) { + list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) { + if ((dev_entry->dev->bus->number == + pcidev->bus->number) && + (dev_entry->dev->devfn == + pcidev->devfn) && + (pci_domain_nr(dev_entry->dev->bus) == + pci_domain_nr(pcidev->bus))) { + found = 1; + *domain = cntrl_entry->domain; + *bus = cntrl_entry->bus; + *devfn = dev_entry->devfn; + goto out; + } + } + } +out: + spin_unlock_irqrestore(&dev_data->lock, flags); + return found; + +} + diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c new file mode 100644 index 000000000000..5386bebf7f9a --- /dev/null +++ b/drivers/xen/xen-pciback/passthrough.c @@ -0,0 +1,178 @@ +/* + * PCI Backend - Provides restricted access to the real PCI bus topology + * to the frontend + * + * Author: Ryan Wilson + */ + +#include +#include +#include +#include "pciback.h" + +struct passthrough_dev_data { + /* Access to dev_list must be protected by lock */ + struct list_head dev_list; + spinlock_t lock; +}; + +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn) +{ + struct passthrough_dev_data *dev_data = pdev->pci_dev_data; + struct pci_dev_entry *dev_entry; + struct pci_dev *dev = NULL; + unsigned long flags; + + spin_lock_irqsave(&dev_data->lock, flags); + + list_for_each_entry(dev_entry, &dev_data->dev_list, list) { + if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus) + && bus == (unsigned int)dev_entry->dev->bus->number + && devfn == dev_entry->dev->devfn) { + dev = dev_entry->dev; + break; + } + } + + spin_unlock_irqrestore(&dev_data->lock, flags); + + return dev; +} + +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, + int devid, publish_pci_dev_cb publish_cb) +{ + struct passthrough_dev_data *dev_data = pdev->pci_dev_data; + struct pci_dev_entry *dev_entry; + unsigned long flags; + unsigned int domain, bus, devfn; + int err; + + dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL); + if (!dev_entry) + return -ENOMEM; + dev_entry->dev = dev; + + spin_lock_irqsave(&dev_data->lock, flags); + list_add_tail(&dev_entry->list, &dev_data->dev_list); + spin_unlock_irqrestore(&dev_data->lock, flags); + + /* Publish this device. */ + domain = (unsigned int)pci_domain_nr(dev->bus); + bus = (unsigned int)dev->bus->number; + devfn = dev->devfn; + err = publish_cb(pdev, domain, bus, devfn, devid); + + return err; +} + +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) +{ + struct passthrough_dev_data *dev_data = pdev->pci_dev_data; + struct pci_dev_entry *dev_entry, *t; + struct pci_dev *found_dev = NULL; + unsigned long flags; + + spin_lock_irqsave(&dev_data->lock, flags); + + list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { + if (dev_entry->dev == dev) { + list_del(&dev_entry->list); + found_dev = dev_entry->dev; + kfree(dev_entry); + } + } + + spin_unlock_irqrestore(&dev_data->lock, flags); + + if (found_dev) + pcistub_put_pci_dev(found_dev); +} + +int pciback_init_devices(struct pciback_device *pdev) +{ + struct passthrough_dev_data *dev_data; + + dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) + return -ENOMEM; + + spin_lock_init(&dev_data->lock); + + INIT_LIST_HEAD(&dev_data->dev_list); + + pdev->pci_dev_data = dev_data; + + return 0; +} + +int pciback_publish_pci_roots(struct pciback_device *pdev, + publish_pci_root_cb publish_root_cb) +{ + int err = 0; + struct passthrough_dev_data *dev_data = pdev->pci_dev_data; + struct pci_dev_entry *dev_entry, *e; + struct pci_dev *dev; + int found; + unsigned int domain, bus; + + spin_lock(&dev_data->lock); + + list_for_each_entry(dev_entry, &dev_data->dev_list, list) { + /* Only publish this device as a root if none of its + * parent bridges are exported + */ + found = 0; + dev = dev_entry->dev->bus->self; + for (; !found && dev != NULL; dev = dev->bus->self) { + list_for_each_entry(e, &dev_data->dev_list, list) { + if (dev == e->dev) { + found = 1; + break; + } + } + } + + domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus); + bus = (unsigned int)dev_entry->dev->bus->number; + + if (!found) { + err = publish_root_cb(pdev, domain, bus); + if (err) + break; + } + } + + spin_unlock(&dev_data->lock); + + return err; +} + +void pciback_release_devices(struct pciback_device *pdev) +{ + struct passthrough_dev_data *dev_data = pdev->pci_dev_data; + struct pci_dev_entry *dev_entry, *t; + + list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { + list_del(&dev_entry->list); + pcistub_put_pci_dev(dev_entry->dev); + kfree(dev_entry); + } + + kfree(dev_data); + pdev->pci_dev_data = NULL; +} + +int pciback_get_pcifront_dev(struct pci_dev *pcidev, + struct pciback_device *pdev, + unsigned int *domain, unsigned int *bus, + unsigned int *devfn) + +{ + *domain = pci_domain_nr(pcidev->bus); + *bus = pcidev->bus->number; + *devfn = pcidev->devfn; + return 1; +} diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c new file mode 100644 index 000000000000..0b5a16b81c8c --- /dev/null +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -0,0 +1,1285 @@ +/* + * PCI Stub Driver - Grabs devices in backend to be exported later + * + * Ryan Wilson + * Chris Bookholt + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pciback.h" +#include "conf_space.h" +#include "conf_space_quirks.h" + +static char *pci_devs_to_hide; +wait_queue_head_t aer_wait_queue; +/*Add sem for sync AER handling and pciback remove/reconfigue ops, +* We want to avoid in middle of AER ops, pciback devices is being removed +*/ +static DECLARE_RWSEM(pcistub_sem); +module_param_named(hide, pci_devs_to_hide, charp, 0444); + +struct pcistub_device_id { + struct list_head slot_list; + int domain; + unsigned char bus; + unsigned int devfn; +}; +static LIST_HEAD(pcistub_device_ids); +static DEFINE_SPINLOCK(device_ids_lock); + +struct pcistub_device { + struct kref kref; + struct list_head dev_list; + spinlock_t lock; + + struct pci_dev *dev; + struct pciback_device *pdev;/* non-NULL if struct pci_dev is in use */ +}; + +/* Access to pcistub_devices & seized_devices lists and the initialize_devices + * flag must be locked with pcistub_devices_lock + */ +static DEFINE_SPINLOCK(pcistub_devices_lock); +static LIST_HEAD(pcistub_devices); + +/* wait for device_initcall before initializing our devices + * (see pcistub_init_devices_late) + */ +static int initialize_devices; +static LIST_HEAD(seized_devices); + +static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev) +{ + struct pcistub_device *psdev; + + dev_dbg(&dev->dev, "pcistub_device_alloc\n"); + + psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC); + if (!psdev) + return NULL; + + psdev->dev = pci_dev_get(dev); + if (!psdev->dev) { + kfree(psdev); + return NULL; + } + + kref_init(&psdev->kref); + spin_lock_init(&psdev->lock); + + return psdev; +} + +/* Don't call this directly as it's called by pcistub_device_put */ +static void pcistub_device_release(struct kref *kref) +{ + struct pcistub_device *psdev; + + psdev = container_of(kref, struct pcistub_device, kref); + + dev_dbg(&psdev->dev->dev, "pcistub_device_release\n"); + + /* Clean-up the device */ + pciback_reset_device(psdev->dev); + pciback_config_free_dyn_fields(psdev->dev); + pciback_config_free_dev(psdev->dev); + kfree(pci_get_drvdata(psdev->dev)); + pci_set_drvdata(psdev->dev, NULL); + + pci_dev_put(psdev->dev); + + kfree(psdev); +} + +static inline void pcistub_device_get(struct pcistub_device *psdev) +{ + kref_get(&psdev->kref); +} + +static inline void pcistub_device_put(struct pcistub_device *psdev) +{ + kref_put(&psdev->kref, pcistub_device_release); +} + +static struct pcistub_device *pcistub_device_find(int domain, int bus, + int slot, int func) +{ + struct pcistub_device *psdev = NULL; + unsigned long flags; + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + list_for_each_entry(psdev, &pcistub_devices, dev_list) { + if (psdev->dev != NULL + && domain == pci_domain_nr(psdev->dev->bus) + && bus == psdev->dev->bus->number + && PCI_DEVFN(slot, func) == psdev->dev->devfn) { + pcistub_device_get(psdev); + goto out; + } + } + + /* didn't find it */ + psdev = NULL; + +out: + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + return psdev; +} + +static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev, + struct pcistub_device *psdev) +{ + struct pci_dev *pci_dev = NULL; + unsigned long flags; + + pcistub_device_get(psdev); + + spin_lock_irqsave(&psdev->lock, flags); + if (!psdev->pdev) { + psdev->pdev = pdev; + pci_dev = psdev->dev; + } + spin_unlock_irqrestore(&psdev->lock, flags); + + if (!pci_dev) + pcistub_device_put(psdev); + + return pci_dev; +} + +struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev, + int domain, int bus, + int slot, int func) +{ + struct pcistub_device *psdev; + struct pci_dev *found_dev = NULL; + unsigned long flags; + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + list_for_each_entry(psdev, &pcistub_devices, dev_list) { + if (psdev->dev != NULL + && domain == pci_domain_nr(psdev->dev->bus) + && bus == psdev->dev->bus->number + && PCI_DEVFN(slot, func) == psdev->dev->devfn) { + found_dev = pcistub_device_get_pci_dev(pdev, psdev); + break; + } + } + + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + return found_dev; +} + +struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev, + struct pci_dev *dev) +{ + struct pcistub_device *psdev; + struct pci_dev *found_dev = NULL; + unsigned long flags; + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + list_for_each_entry(psdev, &pcistub_devices, dev_list) { + if (psdev->dev == dev) { + found_dev = pcistub_device_get_pci_dev(pdev, psdev); + break; + } + } + + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + return found_dev; +} + +void pcistub_put_pci_dev(struct pci_dev *dev) +{ + struct pcistub_device *psdev, *found_psdev = NULL; + unsigned long flags; + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + list_for_each_entry(psdev, &pcistub_devices, dev_list) { + if (psdev->dev == dev) { + found_psdev = psdev; + break; + } + } + + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + + /*hold this lock for avoiding breaking link between + * pcistub and pciback when AER is in processing + */ + down_write(&pcistub_sem); + /* Cleanup our device + * (so it's ready for the next domain) + */ + pciback_reset_device(found_psdev->dev); + pciback_config_free_dyn_fields(found_psdev->dev); + pciback_config_reset_dev(found_psdev->dev); + + spin_lock_irqsave(&found_psdev->lock, flags); + found_psdev->pdev = NULL; + spin_unlock_irqrestore(&found_psdev->lock, flags); + + pcistub_device_put(found_psdev); + up_write(&pcistub_sem); +} + +static int __devinit pcistub_match_one(struct pci_dev *dev, + struct pcistub_device_id *pdev_id) +{ + /* Match the specified device by domain, bus, slot, func and also if + * any of the device's parent bridges match. + */ + for (; dev != NULL; dev = dev->bus->self) { + if (pci_domain_nr(dev->bus) == pdev_id->domain + && dev->bus->number == pdev_id->bus + && dev->devfn == pdev_id->devfn) + return 1; + + /* Sometimes topmost bridge links to itself. */ + if (dev == dev->bus->self) + break; + } + + return 0; +} + +static int __devinit pcistub_match(struct pci_dev *dev) +{ + struct pcistub_device_id *pdev_id; + unsigned long flags; + int found = 0; + + spin_lock_irqsave(&device_ids_lock, flags); + list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) { + if (pcistub_match_one(dev, pdev_id)) { + found = 1; + break; + } + } + spin_unlock_irqrestore(&device_ids_lock, flags); + + return found; +} + +static int __devinit pcistub_init_device(struct pci_dev *dev) +{ + struct pciback_dev_data *dev_data; + int err = 0; + + dev_dbg(&dev->dev, "initializing...\n"); + + /* The PCI backend is not intended to be a module (or to work with + * removable PCI devices (yet). If it were, pciback_config_free() + * would need to be called somewhere to free the memory allocated + * here and then to call kfree(pci_get_drvdata(psdev->dev)). + */ + dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC); + if (!dev_data) { + err = -ENOMEM; + goto out; + } + pci_set_drvdata(dev, dev_data); + + dev_dbg(&dev->dev, "initializing config\n"); + + init_waitqueue_head(&aer_wait_queue); + err = pciback_config_init_dev(dev); + if (err) + goto out; + + /* HACK: Force device (& ACPI) to determine what IRQ it's on - we + * must do this here because pcibios_enable_device may specify + * the pci device's true irq (and possibly its other resources) + * if they differ from what's in the configuration space. + * This makes the assumption that the device's resources won't + * change after this point (otherwise this code may break!) + */ + dev_dbg(&dev->dev, "enabling device\n"); + err = pci_enable_device(dev); + if (err) + goto config_release; + + /* Now disable the device (this also ensures some private device + * data is setup before we export) + */ + dev_dbg(&dev->dev, "reset device\n"); + pciback_reset_device(dev); + + return 0; + +config_release: + pciback_config_free_dev(dev); + +out: + pci_set_drvdata(dev, NULL); + kfree(dev_data); + return err; +} + +/* + * Because some initialization still happens on + * devices during fs_initcall, we need to defer + * full initialization of our devices until + * device_initcall. + */ +static int __init pcistub_init_devices_late(void) +{ + struct pcistub_device *psdev; + unsigned long flags; + int err = 0; + + pr_debug("pciback: pcistub_init_devices_late\n"); + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + while (!list_empty(&seized_devices)) { + psdev = container_of(seized_devices.next, + struct pcistub_device, dev_list); + list_del(&psdev->dev_list); + + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + + err = pcistub_init_device(psdev->dev); + if (err) { + dev_err(&psdev->dev->dev, + "error %d initializing device\n", err); + kfree(psdev); + psdev = NULL; + } + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + if (psdev) + list_add_tail(&psdev->dev_list, &pcistub_devices); + } + + initialize_devices = 1; + + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + + return 0; +} + +static int __devinit pcistub_seize(struct pci_dev *dev) +{ + struct pcistub_device *psdev; + unsigned long flags; + int err = 0; + + psdev = pcistub_device_alloc(dev); + if (!psdev) + return -ENOMEM; + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + if (initialize_devices) { + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + + /* don't want irqs disabled when calling pcistub_init_device */ + err = pcistub_init_device(psdev->dev); + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + if (!err) + list_add(&psdev->dev_list, &pcistub_devices); + } else { + dev_dbg(&dev->dev, "deferring initialization\n"); + list_add(&psdev->dev_list, &seized_devices); + } + + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + + if (err) + pcistub_device_put(psdev); + + return err; +} + +static int __devinit pcistub_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + int err = 0; + + dev_dbg(&dev->dev, "probing...\n"); + + if (pcistub_match(dev)) { + + if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL + && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { + dev_err(&dev->dev, "can't export pci devices that " + "don't have a normal (0) or bridge (1) " + "header type!\n"); + err = -ENODEV; + goto out; + } + + dev_info(&dev->dev, "seizing device\n"); + err = pcistub_seize(dev); + } else + /* Didn't find the device */ + err = -ENODEV; + +out: + return err; +} + +static void pcistub_remove(struct pci_dev *dev) +{ + struct pcistub_device *psdev, *found_psdev = NULL; + unsigned long flags; + + dev_dbg(&dev->dev, "removing\n"); + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + pciback_config_quirk_release(dev); + + list_for_each_entry(psdev, &pcistub_devices, dev_list) { + if (psdev->dev == dev) { + found_psdev = psdev; + break; + } + } + + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + + if (found_psdev) { + dev_dbg(&dev->dev, "found device to remove - in use? %p\n", + found_psdev->pdev); + + if (found_psdev->pdev) { + printk(KERN_WARNING "pciback: ****** removing device " + "%s while still in-use! ******\n", + pci_name(found_psdev->dev)); + printk(KERN_WARNING "pciback: ****** driver domain may " + "still access this device's i/o resources!\n"); + printk(KERN_WARNING "pciback: ****** shutdown driver " + "domain before binding device\n"); + printk(KERN_WARNING "pciback: ****** to other drivers " + "or domains\n"); + + pciback_release_pci_dev(found_psdev->pdev, + found_psdev->dev); + } + + spin_lock_irqsave(&pcistub_devices_lock, flags); + list_del(&found_psdev->dev_list); + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + + /* the final put for releasing from the list */ + pcistub_device_put(found_psdev); + } +} + +static const struct pci_device_id pcistub_ids[] = { + { + .vendor = PCI_ANY_ID, + .device = PCI_ANY_ID, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + }, + {0,}, +}; + +#define PCI_NODENAME_MAX 40 +static void kill_domain_by_device(struct pcistub_device *psdev) +{ + struct xenbus_transaction xbt; + int err; + char nodename[PCI_NODENAME_MAX]; + + if (!psdev) + dev_err(&psdev->dev->dev, + "device is NULL when do AER recovery/kill_domain\n"); + snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0", + psdev->pdev->xdev->otherend_id); + nodename[strlen(nodename)] = '\0'; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + dev_err(&psdev->dev->dev, + "error %d when start xenbus transaction\n", err); + return; + } + /*PV AER handlers will set this flag*/ + xenbus_printf(xbt, nodename, "aerState" , "aerfail"); + err = xenbus_transaction_end(xbt, 0); + if (err) { + if (err == -EAGAIN) + goto again; + dev_err(&psdev->dev->dev, + "error %d when end xenbus transaction\n", err); + return; + } +} + +/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and + * backend need to have cooperation. In pciback, those steps will do similar + * jobs: send service request and waiting for front_end response. +*/ +static pci_ers_result_t common_process(struct pcistub_device *psdev, + pci_channel_state_t state, int aer_cmd, pci_ers_result_t result) +{ + pci_ers_result_t res = result; + struct xen_pcie_aer_op *aer_op; + int ret; + + /*with PV AER drivers*/ + aer_op = &(psdev->pdev->sh_info->aer_op); + aer_op->cmd = aer_cmd ; + /*useful for error_detected callback*/ + aer_op->err = state; + /*pcifront_end BDF*/ + ret = pciback_get_pcifront_dev(psdev->dev, psdev->pdev, + &aer_op->domain, &aer_op->bus, &aer_op->devfn); + if (!ret) { + dev_err(&psdev->dev->dev, + "pciback: failed to get pcifront device\n"); + return PCI_ERS_RESULT_NONE; + } + wmb(); + + dev_dbg(&psdev->dev->dev, + "pciback: aer_op %x dom %x bus %x devfn %x\n", + aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn); + /*local flag to mark there's aer request, pciback callback will use this + * flag to judge whether we need to check pci-front give aer service + * ack signal + */ + set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags); + + /*It is possible that a pcifront conf_read_write ops request invokes + * the callback which cause the spurious execution of wake_up. + * Yet it is harmless and better than a spinlock here + */ + set_bit(_XEN_PCIB_active, + (unsigned long *)&psdev->pdev->sh_info->flags); + wmb(); + notify_remote_via_irq(psdev->pdev->evtchn_irq); + + ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active, + (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ); + + if (!ret) { + if (test_bit(_XEN_PCIB_active, + (unsigned long *)&psdev->pdev->sh_info->flags)) { + dev_err(&psdev->dev->dev, + "pcifront aer process not responding!\n"); + clear_bit(_XEN_PCIB_active, + (unsigned long *)&psdev->pdev->sh_info->flags); + aer_op->err = PCI_ERS_RESULT_NONE; + return res; + } + } + clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags); + + if (test_bit(_XEN_PCIF_active, + (unsigned long *)&psdev->pdev->sh_info->flags)) { + dev_dbg(&psdev->dev->dev, + "schedule pci_conf service in pciback \n"); + test_and_schedule_op(psdev->pdev); + } + + res = (pci_ers_result_t)aer_op->err; + return res; +} + +/* +* pciback_slot_reset: it will send the slot_reset request to pcifront in case +* of the device driver could provide this service, and then wait for pcifront +* ack. +* @dev: pointer to PCI devices +* return value is used by aer_core do_recovery policy +*/ +static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev) +{ + struct pcistub_device *psdev; + pci_ers_result_t result; + + result = PCI_ERS_RESULT_RECOVERED; + dev_dbg(&dev->dev, "pciback_slot_reset(bus:%x,devfn:%x)\n", + dev->bus->number, dev->devfn); + + down_write(&pcistub_sem); + psdev = pcistub_device_find(pci_domain_nr(dev->bus), + dev->bus->number, + PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + + if (!psdev || !psdev->pdev) { + dev_err(&dev->dev, + "pciback device is not found/assigned\n"); + goto end; + } + + if (!psdev->pdev->sh_info) { + dev_err(&dev->dev, "pciback device is not connected or owned" + " by HVM, kill it\n"); + kill_domain_by_device(psdev); + goto release; + } + + if (!test_bit(_XEN_PCIB_AERHANDLER, + (unsigned long *)&psdev->pdev->sh_info->flags)) { + dev_err(&dev->dev, + "guest with no AER driver should have been killed\n"); + goto release; + } + result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result); + + if (result == PCI_ERS_RESULT_NONE || + result == PCI_ERS_RESULT_DISCONNECT) { + dev_dbg(&dev->dev, + "No AER slot_reset service or disconnected!\n"); + kill_domain_by_device(psdev); + } +release: + pcistub_device_put(psdev); +end: + up_write(&pcistub_sem); + return result; + +} + + +/*pciback_mmio_enabled: it will send the mmio_enabled request to pcifront +* in case of the device driver could provide this service, and then wait +* for pcifront ack +* @dev: pointer to PCI devices +* return value is used by aer_core do_recovery policy +*/ + +static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev) +{ + struct pcistub_device *psdev; + pci_ers_result_t result; + + result = PCI_ERS_RESULT_RECOVERED; + dev_dbg(&dev->dev, "pciback_mmio_enabled(bus:%x,devfn:%x)\n", + dev->bus->number, dev->devfn); + + down_write(&pcistub_sem); + psdev = pcistub_device_find(pci_domain_nr(dev->bus), + dev->bus->number, + PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + + if (!psdev || !psdev->pdev) { + dev_err(&dev->dev, + "pciback device is not found/assigned\n"); + goto end; + } + + if (!psdev->pdev->sh_info) { + dev_err(&dev->dev, "pciback device is not connected or owned" + " by HVM, kill it\n"); + kill_domain_by_device(psdev); + goto release; + } + + if (!test_bit(_XEN_PCIB_AERHANDLER, + (unsigned long *)&psdev->pdev->sh_info->flags)) { + dev_err(&dev->dev, + "guest with no AER driver should have been killed\n"); + goto release; + } + result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result); + + if (result == PCI_ERS_RESULT_NONE || + result == PCI_ERS_RESULT_DISCONNECT) { + dev_dbg(&dev->dev, + "No AER mmio_enabled service or disconnected!\n"); + kill_domain_by_device(psdev); + } +release: + pcistub_device_put(psdev); +end: + up_write(&pcistub_sem); + return result; +} + +/*pciback_error_detected: it will send the error_detected request to pcifront +* in case of the device driver could provide this service, and then wait +* for pcifront ack. +* @dev: pointer to PCI devices +* @error: the current PCI connection state +* return value is used by aer_core do_recovery policy +*/ + +static pci_ers_result_t pciback_error_detected(struct pci_dev *dev, + pci_channel_state_t error) +{ + struct pcistub_device *psdev; + pci_ers_result_t result; + + result = PCI_ERS_RESULT_CAN_RECOVER; + dev_dbg(&dev->dev, "pciback_error_detected(bus:%x,devfn:%x)\n", + dev->bus->number, dev->devfn); + + down_write(&pcistub_sem); + psdev = pcistub_device_find(pci_domain_nr(dev->bus), + dev->bus->number, + PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + + if (!psdev || !psdev->pdev) { + dev_err(&dev->dev, + "pciback device is not found/assigned\n"); + goto end; + } + + if (!psdev->pdev->sh_info) { + dev_err(&dev->dev, "pciback device is not connected or owned" + " by HVM, kill it\n"); + kill_domain_by_device(psdev); + goto release; + } + + /*Guest owns the device yet no aer handler regiested, kill guest*/ + if (!test_bit(_XEN_PCIB_AERHANDLER, + (unsigned long *)&psdev->pdev->sh_info->flags)) { + dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n"); + kill_domain_by_device(psdev); + goto release; + } + result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result); + + if (result == PCI_ERS_RESULT_NONE || + result == PCI_ERS_RESULT_DISCONNECT) { + dev_dbg(&dev->dev, + "No AER error_detected service or disconnected!\n"); + kill_domain_by_device(psdev); + } +release: + pcistub_device_put(psdev); +end: + up_write(&pcistub_sem); + return result; +} + +/*pciback_error_resume: it will send the error_resume request to pcifront +* in case of the device driver could provide this service, and then wait +* for pcifront ack. +* @dev: pointer to PCI devices +*/ + +static void pciback_error_resume(struct pci_dev *dev) +{ + struct pcistub_device *psdev; + + dev_dbg(&dev->dev, "pciback_error_resume(bus:%x,devfn:%x)\n", + dev->bus->number, dev->devfn); + + down_write(&pcistub_sem); + psdev = pcistub_device_find(pci_domain_nr(dev->bus), + dev->bus->number, + PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + + if (!psdev || !psdev->pdev) { + dev_err(&dev->dev, + "pciback device is not found/assigned\n"); + goto end; + } + + if (!psdev->pdev->sh_info) { + dev_err(&dev->dev, "pciback device is not connected or owned" + " by HVM, kill it\n"); + kill_domain_by_device(psdev); + goto release; + } + + if (!test_bit(_XEN_PCIB_AERHANDLER, + (unsigned long *)&psdev->pdev->sh_info->flags)) { + dev_err(&dev->dev, + "guest with no AER driver should have been killed\n"); + kill_domain_by_device(psdev); + goto release; + } + common_process(psdev, 1, XEN_PCI_OP_aer_resume, + PCI_ERS_RESULT_RECOVERED); +release: + pcistub_device_put(psdev); +end: + up_write(&pcistub_sem); + return; +} + +/*add pciback AER handling*/ +static struct pci_error_handlers pciback_error_handler = { + .error_detected = pciback_error_detected, + .mmio_enabled = pciback_mmio_enabled, + .slot_reset = pciback_slot_reset, + .resume = pciback_error_resume, +}; + +/* + * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't + * for a normal device. I don't want it to be loaded automatically. + */ + +static struct pci_driver pciback_pci_driver = { + .name = "pciback", + .id_table = pcistub_ids, + .probe = pcistub_probe, + .remove = pcistub_remove, + .err_handler = &pciback_error_handler, +}; + +static inline int str_to_slot(const char *buf, int *domain, int *bus, + int *slot, int *func) +{ + int err; + + err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func); + if (err == 4) + return 0; + else if (err < 0) + return -EINVAL; + + /* try again without domain */ + *domain = 0; + err = sscanf(buf, " %x:%x.%x", bus, slot, func); + if (err == 3) + return 0; + + return -EINVAL; +} + +static inline int str_to_quirk(const char *buf, int *domain, int *bus, int + *slot, int *func, int *reg, int *size, int *mask) +{ + int err; + + err = + sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot, + func, reg, size, mask); + if (err == 7) + return 0; + return -EINVAL; +} + +static int pcistub_device_id_add(int domain, int bus, int slot, int func) +{ + struct pcistub_device_id *pci_dev_id; + unsigned long flags; + + pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL); + if (!pci_dev_id) + return -ENOMEM; + + pci_dev_id->domain = domain; + pci_dev_id->bus = bus; + pci_dev_id->devfn = PCI_DEVFN(slot, func); + + pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n", + domain, bus, slot, func); + + spin_lock_irqsave(&device_ids_lock, flags); + list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids); + spin_unlock_irqrestore(&device_ids_lock, flags); + + return 0; +} + +static int pcistub_device_id_remove(int domain, int bus, int slot, int func) +{ + struct pcistub_device_id *pci_dev_id, *t; + int devfn = PCI_DEVFN(slot, func); + int err = -ENOENT; + unsigned long flags; + + spin_lock_irqsave(&device_ids_lock, flags); + list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, + slot_list) { + if (pci_dev_id->domain == domain + && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) { + /* Don't break; here because it's possible the same + * slot could be in the list more than once + */ + list_del(&pci_dev_id->slot_list); + kfree(pci_dev_id); + + err = 0; + + pr_debug("pciback: removed %04x:%02x:%02x.%01x from " + "seize list\n", domain, bus, slot, func); + } + } + spin_unlock_irqrestore(&device_ids_lock, flags); + + return err; +} + +static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg, + int size, int mask) +{ + int err = 0; + struct pcistub_device *psdev; + struct pci_dev *dev; + struct config_field *field; + + psdev = pcistub_device_find(domain, bus, slot, func); + if (!psdev || !psdev->dev) { + err = -ENODEV; + goto out; + } + dev = psdev->dev; + + field = kzalloc(sizeof(*field), GFP_ATOMIC); + if (!field) { + err = -ENOMEM; + goto out; + } + + field->offset = reg; + field->size = size; + field->mask = mask; + field->init = NULL; + field->reset = NULL; + field->release = NULL; + field->clean = pciback_config_field_free; + + err = pciback_config_quirks_add_field(dev, field); + if (err) + kfree(field); +out: + return err; +} + +static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf, + size_t count) +{ + int domain, bus, slot, func; + int err; + + err = str_to_slot(buf, &domain, &bus, &slot, &func); + if (err) + goto out; + + err = pcistub_device_id_add(domain, bus, slot, func); + +out: + if (!err) + err = count; + return err; +} + +DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add); + +static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf, + size_t count) +{ + int domain, bus, slot, func; + int err; + + err = str_to_slot(buf, &domain, &bus, &slot, &func); + if (err) + goto out; + + err = pcistub_device_id_remove(domain, bus, slot, func); + +out: + if (!err) + err = count; + return err; +} + +DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove); + +static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) +{ + struct pcistub_device_id *pci_dev_id; + size_t count = 0; + unsigned long flags; + + spin_lock_irqsave(&device_ids_lock, flags); + list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) { + if (count >= PAGE_SIZE) + break; + + count += scnprintf(buf + count, PAGE_SIZE - count, + "%04x:%02x:%02x.%01x\n", + pci_dev_id->domain, pci_dev_id->bus, + PCI_SLOT(pci_dev_id->devfn), + PCI_FUNC(pci_dev_id->devfn)); + } + spin_unlock_irqrestore(&device_ids_lock, flags); + + return count; +} + +DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); + +static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf, + size_t count) +{ + int domain, bus, slot, func, reg, size, mask; + int err; + + err = str_to_quirk(buf, &domain, &bus, &slot, &func, ®, &size, + &mask); + if (err) + goto out; + + err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask); + +out: + if (!err) + err = count; + return err; +} + +static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf) +{ + int count = 0; + unsigned long flags; + struct pciback_config_quirk *quirk; + struct pciback_dev_data *dev_data; + const struct config_field *field; + const struct config_field_entry *cfg_entry; + + spin_lock_irqsave(&device_ids_lock, flags); + list_for_each_entry(quirk, &pciback_quirks, quirks_list) { + if (count >= PAGE_SIZE) + goto out; + + count += scnprintf(buf + count, PAGE_SIZE - count, + "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n", + quirk->pdev->bus->number, + PCI_SLOT(quirk->pdev->devfn), + PCI_FUNC(quirk->pdev->devfn), + quirk->devid.vendor, quirk->devid.device, + quirk->devid.subvendor, + quirk->devid.subdevice); + + dev_data = pci_get_drvdata(quirk->pdev); + + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { + field = cfg_entry->field; + if (count >= PAGE_SIZE) + goto out; + + count += scnprintf(buf + count, PAGE_SIZE - count, + "\t\t%08x:%01x:%08x\n", + cfg_entry->base_offset + + field->offset, field->size, + field->mask); + } + } + +out: + spin_unlock_irqrestore(&device_ids_lock, flags); + + return count; +} + +DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add); + +static ssize_t permissive_add(struct device_driver *drv, const char *buf, + size_t count) +{ + int domain, bus, slot, func; + int err; + struct pcistub_device *psdev; + struct pciback_dev_data *dev_data; + err = str_to_slot(buf, &domain, &bus, &slot, &func); + if (err) + goto out; + psdev = pcistub_device_find(domain, bus, slot, func); + if (!psdev) { + err = -ENODEV; + goto out; + } + if (!psdev->dev) { + err = -ENODEV; + goto release; + } + dev_data = pci_get_drvdata(psdev->dev); + /* the driver data for a device should never be null at this point */ + if (!dev_data) { + err = -ENXIO; + goto release; + } + if (!dev_data->permissive) { + dev_data->permissive = 1; + /* Let user know that what they're doing could be unsafe */ + dev_warn(&psdev->dev->dev, "enabling permissive mode " + "configuration space accesses!\n"); + dev_warn(&psdev->dev->dev, + "permissive mode is potentially unsafe!\n"); + } +release: + pcistub_device_put(psdev); +out: + if (!err) + err = count; + return err; +} + +static ssize_t permissive_show(struct device_driver *drv, char *buf) +{ + struct pcistub_device *psdev; + struct pciback_dev_data *dev_data; + size_t count = 0; + unsigned long flags; + spin_lock_irqsave(&pcistub_devices_lock, flags); + list_for_each_entry(psdev, &pcistub_devices, dev_list) { + if (count >= PAGE_SIZE) + break; + if (!psdev->dev) + continue; + dev_data = pci_get_drvdata(psdev->dev); + if (!dev_data || !dev_data->permissive) + continue; + count += + scnprintf(buf + count, PAGE_SIZE - count, "%s\n", + pci_name(psdev->dev)); + } + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + return count; +} + +DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add); + +static void pcistub_exit(void) +{ + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot); + driver_remove_file(&pciback_pci_driver.driver, + &driver_attr_remove_slot); + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots); + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks); + driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive); + + pci_unregister_driver(&pciback_pci_driver); +} + +static int __init pcistub_init(void) +{ + int pos = 0; + int err = 0; + int domain, bus, slot, func; + int parsed; + + if (pci_devs_to_hide && *pci_devs_to_hide) { + do { + parsed = 0; + + err = sscanf(pci_devs_to_hide + pos, + " (%x:%x:%x.%x) %n", + &domain, &bus, &slot, &func, &parsed); + if (err != 4) { + domain = 0; + err = sscanf(pci_devs_to_hide + pos, + " (%x:%x.%x) %n", + &bus, &slot, &func, &parsed); + if (err != 3) + goto parse_error; + } + + err = pcistub_device_id_add(domain, bus, slot, func); + if (err) + goto out; + + /* if parsed<=0, we've reached the end of the string */ + pos += parsed; + } while (parsed > 0 && pci_devs_to_hide[pos]); + } + + /* If we're the first PCI Device Driver to register, we're the + * first one to get offered PCI devices as they become + * available (and thus we can be the first to grab them) + */ + err = pci_register_driver(&pciback_pci_driver); + if (err < 0) + goto out; + + err = driver_create_file(&pciback_pci_driver.driver, + &driver_attr_new_slot); + if (!err) + err = driver_create_file(&pciback_pci_driver.driver, + &driver_attr_remove_slot); + if (!err) + err = driver_create_file(&pciback_pci_driver.driver, + &driver_attr_slots); + if (!err) + err = driver_create_file(&pciback_pci_driver.driver, + &driver_attr_quirks); + if (!err) + err = driver_create_file(&pciback_pci_driver.driver, + &driver_attr_permissive); + + if (err) + pcistub_exit(); + +out: + return err; + +parse_error: + printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n", + pci_devs_to_hide + pos); + return -EINVAL; +} + +#ifndef MODULE +/* + * fs_initcall happens before device_initcall + * so pciback *should* get called first (b/c we + * want to suck up any device before other drivers + * get a chance by being the first pci device + * driver to register) + */ +fs_initcall(pcistub_init); +#endif + +static int __init pciback_init(void) +{ + int err; + + if (!xen_initial_domain()) + return -ENODEV; + + err = pciback_config_init(); + if (err) + return err; + +#ifdef MODULE + err = pcistub_init(); + if (err < 0) + return err; +#endif + + pcistub_init_devices_late(); + err = pciback_xenbus_register(); + if (err) + pcistub_exit(); + + return err; +} + +static void __exit pciback_cleanup(void) +{ + pciback_xenbus_unregister(); + pcistub_exit(); +} + +module_init(pciback_init); +module_exit(pciback_cleanup); + +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h new file mode 100644 index 000000000000..98e29127abf9 --- /dev/null +++ b/drivers/xen/xen-pciback/pciback.h @@ -0,0 +1,133 @@ +/* + * PCI Backend Common Data Structures & Function Declarations + * + * Author: Ryan Wilson + */ +#ifndef __XEN_PCIBACK_H__ +#define __XEN_PCIBACK_H__ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct pci_dev_entry { + struct list_head list; + struct pci_dev *dev; +}; + +#define _PDEVF_op_active (0) +#define PDEVF_op_active (1<<(_PDEVF_op_active)) +#define _PCIB_op_pending (1) +#define PCIB_op_pending (1<<(_PCIB_op_pending)) + +struct pciback_device { + void *pci_dev_data; + spinlock_t dev_lock; + + struct xenbus_device *xdev; + + struct xenbus_watch be_watch; + u8 be_watching; + + int evtchn_irq; + + struct xen_pci_sharedinfo *sh_info; + + unsigned long flags; + + struct work_struct op_work; +}; + +struct pciback_dev_data { + struct list_head config_fields; + int permissive; + int warned_on_write; +}; + +/* Used by XenBus and pciback_ops.c */ +extern wait_queue_head_t aer_wait_queue; +extern struct workqueue_struct *pciback_wq; +/* Used by pcistub.c and conf_space_quirks.c */ +extern struct list_head pciback_quirks; + +/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */ +struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev, + int domain, int bus, + int slot, int func); +struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev, + struct pci_dev *dev); +void pcistub_put_pci_dev(struct pci_dev *dev); + +/* Ensure a device is turned off or reset */ +void pciback_reset_device(struct pci_dev *pdev); + +/* Access a virtual configuration space for a PCI device */ +int pciback_config_init(void); +int pciback_config_init_dev(struct pci_dev *dev); +void pciback_config_free_dyn_fields(struct pci_dev *dev); +void pciback_config_reset_dev(struct pci_dev *dev); +void pciback_config_free_dev(struct pci_dev *dev); +int pciback_config_read(struct pci_dev *dev, int offset, int size, + u32 *ret_val); +int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value); + +/* Handle requests for specific devices from the frontend */ +typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn, unsigned int devid); +typedef int (*publish_pci_root_cb) (struct pciback_device *pdev, + unsigned int domain, unsigned int bus); +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, + int devid, publish_pci_dev_cb publish_cb); +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev); +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn); + +/** +* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback +* before sending aer request to pcifront, so that guest could identify +* device, coopearte with pciback to finish aer recovery job if device driver +* has the capability +*/ + +int pciback_get_pcifront_dev(struct pci_dev *pcidev, + struct pciback_device *pdev, + unsigned int *domain, unsigned int *bus, + unsigned int *devfn); +int pciback_init_devices(struct pciback_device *pdev); +int pciback_publish_pci_roots(struct pciback_device *pdev, + publish_pci_root_cb cb); +void pciback_release_devices(struct pciback_device *pdev); + +/* Handles events from front-end */ +irqreturn_t pciback_handle_event(int irq, void *dev_id); +void pciback_do_op(struct work_struct *data); + +int pciback_xenbus_register(void); +void pciback_xenbus_unregister(void); + +#ifdef CONFIG_PCI_MSI +int pciback_enable_msi(struct pciback_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op); + +int pciback_disable_msi(struct pciback_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op); + + +int pciback_enable_msix(struct pciback_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op); + +int pciback_disable_msix(struct pciback_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op); +#endif +extern int verbose_request; + +void test_and_schedule_op(struct pciback_device *pdev); +#endif + diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c new file mode 100644 index 000000000000..2b9a93e1fdee --- /dev/null +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -0,0 +1,131 @@ +/* + * PCI Backend Operations - respond to PCI requests from Frontend + * + * Author: Ryan Wilson + */ +#include +#include +#include +#include +#include +#include "pciback.h" + +int verbose_request; +module_param(verbose_request, int, 0644); + +/* Ensure a device is "turned off" and ready to be exported. + * (Also see pciback_config_reset to ensure virtual configuration space is + * ready to be re-exported) + */ +void pciback_reset_device(struct pci_dev *dev) +{ + u16 cmd; + + /* Disable devices (but not bridges) */ + if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { + pci_disable_device(dev); + + pci_write_config_word(dev, PCI_COMMAND, 0); + + dev->is_busmaster = 0; + } else { + pci_read_config_word(dev, PCI_COMMAND, &cmd); + if (cmd & (PCI_COMMAND_INVALIDATE)) { + cmd &= ~(PCI_COMMAND_INVALIDATE); + pci_write_config_word(dev, PCI_COMMAND, cmd); + + dev->is_busmaster = 0; + } + } +} +/* +* Now the same evtchn is used for both pcifront conf_read_write request +* as well as pcie aer front end ack. We use a new work_queue to schedule +* pciback conf_read_write service for avoiding confict with aer_core +* do_recovery job which also use the system default work_queue +*/ +void test_and_schedule_op(struct pciback_device *pdev) +{ + /* Check that frontend is requesting an operation and that we are not + * already processing a request */ + if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) + && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { + queue_work(pciback_wq, &pdev->op_work); + } + /*_XEN_PCIB_active should have been cleared by pcifront. And also make + sure pciback is waiting for ack by checking _PCIB_op_pending*/ + if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) + && test_bit(_PCIB_op_pending, &pdev->flags)) { + wake_up(&aer_wait_queue); + } +} + +/* Performing the configuration space reads/writes must not be done in atomic + * context because some of the pci_* functions can sleep (mostly due to ACPI + * use of semaphores). This function is intended to be called from a work + * queue in process context taking a struct pciback_device as a parameter */ + +void pciback_do_op(struct work_struct *data) +{ + struct pciback_device *pdev = + container_of(data, struct pciback_device, op_work); + struct pci_dev *dev; + struct xen_pci_op *op = &pdev->sh_info->op; + + dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn); + + if (dev == NULL) + op->err = XEN_PCI_ERR_dev_not_found; + else { + switch (op->cmd) { + case XEN_PCI_OP_conf_read: + op->err = pciback_config_read(dev, + op->offset, op->size, &op->value); + break; + case XEN_PCI_OP_conf_write: + op->err = pciback_config_write(dev, + op->offset, op->size, op->value); + break; +#ifdef CONFIG_PCI_MSI + case XEN_PCI_OP_enable_msi: + op->err = pciback_enable_msi(pdev, dev, op); + break; + case XEN_PCI_OP_disable_msi: + op->err = pciback_disable_msi(pdev, dev, op); + break; + case XEN_PCI_OP_enable_msix: + op->err = pciback_enable_msix(pdev, dev, op); + break; + case XEN_PCI_OP_disable_msix: + op->err = pciback_disable_msix(pdev, dev, op); + break; +#endif + default: + op->err = XEN_PCI_ERR_not_implemented; + break; + } + } + /* Tell the driver domain that we're done. */ + wmb(); + clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); + notify_remote_via_irq(pdev->evtchn_irq); + + /* Mark that we're done. */ + smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */ + clear_bit(_PDEVF_op_active, &pdev->flags); + smp_mb__after_clear_bit(); /* /before/ final check for work */ + + /* Check to see if the driver domain tried to start another request in + * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. + */ + test_and_schedule_op(pdev); +} + +irqreturn_t pciback_handle_event(int irq, void *dev_id) +{ + struct pciback_device *pdev = dev_id; + + test_and_schedule_op(pdev); + + return IRQ_HANDLED; +} diff --git a/drivers/xen/xen-pciback/slot.c b/drivers/xen/xen-pciback/slot.c new file mode 100644 index 000000000000..efb922d6f78e --- /dev/null +++ b/drivers/xen/xen-pciback/slot.c @@ -0,0 +1,191 @@ +/* + * PCI Backend - Provides a Virtual PCI bus (with real devices) + * to the frontend + * + * Author: Ryan Wilson (vpci.c) + * Author: Tristan Gingold , from vpci.c + */ + +#include +#include +#include +#include +#include "pciback.h" + +/* There are at most 32 slots in a pci bus. */ +#define PCI_SLOT_MAX 32 + +#define PCI_BUS_NBR 2 + +struct slot_dev_data { + /* Access to dev_list must be protected by lock */ + struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX]; + spinlock_t lock; +}; + +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn) +{ + struct pci_dev *dev = NULL; + struct slot_dev_data *slot_dev = pdev->pci_dev_data; + unsigned long flags; + + if (domain != 0 || PCI_FUNC(devfn) != 0) + return NULL; + + if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR) + return NULL; + + spin_lock_irqsave(&slot_dev->lock, flags); + dev = slot_dev->slots[bus][PCI_SLOT(devfn)]; + spin_unlock_irqrestore(&slot_dev->lock, flags); + + return dev; +} + +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, + int devid, publish_pci_dev_cb publish_cb) +{ + int err = 0, slot, bus; + struct slot_dev_data *slot_dev = pdev->pci_dev_data; + unsigned long flags; + + if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { + err = -EFAULT; + xenbus_dev_fatal(pdev->xdev, err, + "Can't export bridges on the virtual PCI bus"); + goto out; + } + + spin_lock_irqsave(&slot_dev->lock, flags); + + /* Assign to a new slot on the virtual PCI bus */ + for (bus = 0; bus < PCI_BUS_NBR; bus++) + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + if (slot_dev->slots[bus][slot] == NULL) { + printk(KERN_INFO + "pciback: slot: %s: assign to virtual " + "slot %d, bus %d\n", + pci_name(dev), slot, bus); + slot_dev->slots[bus][slot] = dev; + goto unlock; + } + } + + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "No more space on root virtual PCI bus"); + +unlock: + spin_unlock_irqrestore(&slot_dev->lock, flags); + + /* Publish this device. */ + if (!err) + err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid); + +out: + return err; +} + +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) +{ + int slot, bus; + struct slot_dev_data *slot_dev = pdev->pci_dev_data; + struct pci_dev *found_dev = NULL; + unsigned long flags; + + spin_lock_irqsave(&slot_dev->lock, flags); + + for (bus = 0; bus < PCI_BUS_NBR; bus++) + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + if (slot_dev->slots[bus][slot] == dev) { + slot_dev->slots[bus][slot] = NULL; + found_dev = dev; + goto out; + } + } + +out: + spin_unlock_irqrestore(&slot_dev->lock, flags); + + if (found_dev) + pcistub_put_pci_dev(found_dev); +} + +int pciback_init_devices(struct pciback_device *pdev) +{ + int slot, bus; + struct slot_dev_data *slot_dev; + + slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL); + if (!slot_dev) + return -ENOMEM; + + spin_lock_init(&slot_dev->lock); + + for (bus = 0; bus < PCI_BUS_NBR; bus++) + for (slot = 0; slot < PCI_SLOT_MAX; slot++) + slot_dev->slots[bus][slot] = NULL; + + pdev->pci_dev_data = slot_dev; + + return 0; +} + +int pciback_publish_pci_roots(struct pciback_device *pdev, + publish_pci_root_cb publish_cb) +{ + /* The Virtual PCI bus has only one root */ + return publish_cb(pdev, 0, 0); +} + +void pciback_release_devices(struct pciback_device *pdev) +{ + int slot, bus; + struct slot_dev_data *slot_dev = pdev->pci_dev_data; + struct pci_dev *dev; + + for (bus = 0; bus < PCI_BUS_NBR; bus++) + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + dev = slot_dev->slots[bus][slot]; + if (dev != NULL) + pcistub_put_pci_dev(dev); + } + + kfree(slot_dev); + pdev->pci_dev_data = NULL; +} + +int pciback_get_pcifront_dev(struct pci_dev *pcidev, + struct pciback_device *pdev, + unsigned int *domain, unsigned int *bus, + unsigned int *devfn) +{ + int slot, busnr; + struct slot_dev_data *slot_dev = pdev->pci_dev_data; + struct pci_dev *dev; + int found = 0; + unsigned long flags; + + spin_lock_irqsave(&slot_dev->lock, flags); + + for (busnr = 0; busnr < PCI_BUS_NBR; bus++) + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + dev = slot_dev->slots[busnr][slot]; + if (dev && dev->bus->number == pcidev->bus->number + && dev->devfn == pcidev->devfn + && pci_domain_nr(dev->bus) == + pci_domain_nr(pcidev->bus)) { + found = 1; + *domain = 0; + *bus = busnr; + *devfn = PCI_DEVFN(slot, 0); + goto out; + } + } +out: + spin_unlock_irqrestore(&slot_dev->lock, flags); + return found; + +} diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c new file mode 100644 index 000000000000..2857ab892f02 --- /dev/null +++ b/drivers/xen/xen-pciback/vpci.c @@ -0,0 +1,244 @@ +/* + * PCI Backend - Provides a Virtual PCI bus (with real devices) + * to the frontend + * + * Author: Ryan Wilson + */ + +#include +#include +#include +#include +#include "pciback.h" + +#define PCI_SLOT_MAX 32 + +struct vpci_dev_data { + /* Access to dev_list must be protected by lock */ + struct list_head dev_list[PCI_SLOT_MAX]; + spinlock_t lock; +}; + +static inline struct list_head *list_first(struct list_head *head) +{ + return head->next; +} + +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn) +{ + struct pci_dev_entry *entry; + struct pci_dev *dev = NULL; + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; + unsigned long flags; + + if (domain != 0 || bus != 0) + return NULL; + + if (PCI_SLOT(devfn) < PCI_SLOT_MAX) { + spin_lock_irqsave(&vpci_dev->lock, flags); + + list_for_each_entry(entry, + &vpci_dev->dev_list[PCI_SLOT(devfn)], + list) { + if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) { + dev = entry->dev; + break; + } + } + + spin_unlock_irqrestore(&vpci_dev->lock, flags); + } + return dev; +} + +static inline int match_slot(struct pci_dev *l, struct pci_dev *r) +{ + if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus) + && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn)) + return 1; + + return 0; +} + +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, + int devid, publish_pci_dev_cb publish_cb) +{ + int err = 0, slot, func = -1; + struct pci_dev_entry *t, *dev_entry; + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; + unsigned long flags; + + if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { + err = -EFAULT; + xenbus_dev_fatal(pdev->xdev, err, + "Can't export bridges on the virtual PCI bus"); + goto out; + } + + dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL); + if (!dev_entry) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "Error adding entry to virtual PCI bus"); + goto out; + } + + dev_entry->dev = dev; + + spin_lock_irqsave(&vpci_dev->lock, flags); + + /* Keep multi-function devices together on the virtual PCI bus */ + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + if (!list_empty(&vpci_dev->dev_list[slot])) { + t = list_entry(list_first(&vpci_dev->dev_list[slot]), + struct pci_dev_entry, list); + + if (match_slot(dev, t->dev)) { + pr_info("pciback: vpci: %s: " + "assign to virtual slot %d func %d\n", + pci_name(dev), slot, + PCI_FUNC(dev->devfn)); + list_add_tail(&dev_entry->list, + &vpci_dev->dev_list[slot]); + func = PCI_FUNC(dev->devfn); + goto unlock; + } + } + } + + /* Assign to a new slot on the virtual PCI bus */ + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + if (list_empty(&vpci_dev->dev_list[slot])) { + printk(KERN_INFO + "pciback: vpci: %s: assign to virtual slot %d\n", + pci_name(dev), slot); + list_add_tail(&dev_entry->list, + &vpci_dev->dev_list[slot]); + func = PCI_FUNC(dev->devfn); + goto unlock; + } + } + + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "No more space on root virtual PCI bus"); + +unlock: + spin_unlock_irqrestore(&vpci_dev->lock, flags); + + /* Publish this device. */ + if (!err) + err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid); + +out: + return err; +} + +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) +{ + int slot; + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; + struct pci_dev *found_dev = NULL; + unsigned long flags; + + spin_lock_irqsave(&vpci_dev->lock, flags); + + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + struct pci_dev_entry *e, *tmp; + list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], + list) { + if (e->dev == dev) { + list_del(&e->list); + found_dev = e->dev; + kfree(e); + goto out; + } + } + } + +out: + spin_unlock_irqrestore(&vpci_dev->lock, flags); + + if (found_dev) + pcistub_put_pci_dev(found_dev); +} + +int pciback_init_devices(struct pciback_device *pdev) +{ + int slot; + struct vpci_dev_data *vpci_dev; + + vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL); + if (!vpci_dev) + return -ENOMEM; + + spin_lock_init(&vpci_dev->lock); + + for (slot = 0; slot < PCI_SLOT_MAX; slot++) + INIT_LIST_HEAD(&vpci_dev->dev_list[slot]); + + pdev->pci_dev_data = vpci_dev; + + return 0; +} + +int pciback_publish_pci_roots(struct pciback_device *pdev, + publish_pci_root_cb publish_cb) +{ + /* The Virtual PCI bus has only one root */ + return publish_cb(pdev, 0, 0); +} + +void pciback_release_devices(struct pciback_device *pdev) +{ + int slot; + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; + + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + struct pci_dev_entry *e, *tmp; + list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], + list) { + list_del(&e->list); + pcistub_put_pci_dev(e->dev); + kfree(e); + } + } + + kfree(vpci_dev); + pdev->pci_dev_data = NULL; +} + +int pciback_get_pcifront_dev(struct pci_dev *pcidev, + struct pciback_device *pdev, + unsigned int *domain, unsigned int *bus, + unsigned int *devfn) +{ + struct pci_dev_entry *entry; + struct pci_dev *dev = NULL; + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; + unsigned long flags; + int found = 0, slot; + + spin_lock_irqsave(&vpci_dev->lock, flags); + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + list_for_each_entry(entry, + &vpci_dev->dev_list[slot], + list) { + dev = entry->dev; + if (dev && dev->bus->number == pcidev->bus->number + && pci_domain_nr(dev->bus) == + pci_domain_nr(pcidev->bus) + && dev->devfn == pcidev->devfn) { + found = 1; + *domain = 0; + *bus = 0; + *devfn = PCI_DEVFN(slot, + PCI_FUNC(pcidev->devfn)); + } + } + } + spin_unlock_irqrestore(&vpci_dev->lock, flags); + return found; +} diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c new file mode 100644 index 000000000000..af6c25a1d729 --- /dev/null +++ b/drivers/xen/xen-pciback/xenbus.c @@ -0,0 +1,709 @@ +/* + * PCI Backend Xenbus Setup - handles setup with frontend and xend + * + * Author: Ryan Wilson + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "pciback.h" + +#define INVALID_EVTCHN_IRQ (-1) +struct workqueue_struct *pciback_wq; + +static struct pciback_device *alloc_pdev(struct xenbus_device *xdev) +{ + struct pciback_device *pdev; + + pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL); + if (pdev == NULL) + goto out; + dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); + + pdev->xdev = xdev; + dev_set_drvdata(&xdev->dev, pdev); + + spin_lock_init(&pdev->dev_lock); + + pdev->sh_info = NULL; + pdev->evtchn_irq = INVALID_EVTCHN_IRQ; + pdev->be_watching = 0; + + INIT_WORK(&pdev->op_work, pciback_do_op); + + if (pciback_init_devices(pdev)) { + kfree(pdev); + pdev = NULL; + } +out: + return pdev; +} + +static void pciback_disconnect(struct pciback_device *pdev) +{ + spin_lock(&pdev->dev_lock); + + /* Ensure the guest can't trigger our handler before removing devices */ + if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) { + unbind_from_irqhandler(pdev->evtchn_irq, pdev); + pdev->evtchn_irq = INVALID_EVTCHN_IRQ; + } + + /* If the driver domain started an op, make sure we complete it + * before releasing the shared memory */ + flush_workqueue(pciback_wq); + + if (pdev->sh_info != NULL) { + xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info); + pdev->sh_info = NULL; + } + + spin_unlock(&pdev->dev_lock); +} + +static void free_pdev(struct pciback_device *pdev) +{ + if (pdev->be_watching) + unregister_xenbus_watch(&pdev->be_watch); + + pciback_disconnect(pdev); + + pciback_release_devices(pdev); + + dev_set_drvdata(&pdev->xdev->dev, NULL); + pdev->xdev = NULL; + + kfree(pdev); +} + +static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref, + int remote_evtchn) +{ + int err = 0; + void *vaddr; + + dev_dbg(&pdev->xdev->dev, + "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", + gnt_ref, remote_evtchn); + + err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error mapping other domain page in ours."); + goto out; + } + pdev->sh_info = vaddr; + + err = bind_interdomain_evtchn_to_irqhandler( + pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event, + 0, "pciback", pdev); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error binding event channel to IRQ"); + goto out; + } + pdev->evtchn_irq = err; + err = 0; + + dev_dbg(&pdev->xdev->dev, "Attached!\n"); +out: + return err; +} + +static int pciback_attach(struct pciback_device *pdev) +{ + int err = 0; + int gnt_ref, remote_evtchn; + char *magic = NULL; + + spin_lock(&pdev->dev_lock); + + /* Make sure we only do this setup once */ + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateInitialised) + goto out; + + /* Wait for frontend to state that it has published the configuration */ + if (xenbus_read_driver_state(pdev->xdev->otherend) != + XenbusStateInitialised) + goto out; + + dev_dbg(&pdev->xdev->dev, "Reading frontend config\n"); + + err = xenbus_gather(XBT_NIL, pdev->xdev->otherend, + "pci-op-ref", "%u", &gnt_ref, + "event-channel", "%u", &remote_evtchn, + "magic", NULL, &magic, NULL); + if (err) { + /* If configuration didn't get read correctly, wait longer */ + xenbus_dev_fatal(pdev->xdev, err, + "Error reading configuration from frontend"); + goto out; + } + + if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) { + xenbus_dev_fatal(pdev->xdev, -EFAULT, + "version mismatch (%s/%s) with pcifront - " + "halting pciback", + magic, XEN_PCI_MAGIC); + goto out; + } + + err = pciback_do_attach(pdev, gnt_ref, remote_evtchn); + if (err) + goto out; + + dev_dbg(&pdev->xdev->dev, "Connecting...\n"); + + err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); + if (err) + xenbus_dev_fatal(pdev->xdev, err, + "Error switching to connected state!"); + + dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err); +out: + spin_unlock(&pdev->dev_lock); + + kfree(magic); + + return err; +} + +static int pciback_publish_pci_dev(struct pciback_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn, unsigned int devid) +{ + int err; + int len; + char str[64]; + + len = snprintf(str, sizeof(str), "vdev-%d", devid); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, + "%04x:%02x:%02x.%02x", domain, bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); + +out: + return err; +} + +static int pciback_export_device(struct pciback_device *pdev, + int domain, int bus, int slot, int func, + int devid) +{ + struct pci_dev *dev; + int err = 0; + + dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n", + domain, bus, slot, func); + + dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func); + if (!dev) { + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Couldn't locate PCI device " + "(%04x:%02x:%02x.%01x)! " + "perhaps already in-use?", + domain, bus, slot, func); + goto out; + } + + err = pciback_add_pci_dev(pdev, dev, devid, pciback_publish_pci_dev); + if (err) + goto out; + + /* TODO: It'd be nice to export a bridge and have all of its children + * get exported with it. This may be best done in xend (which will + * have to calculate resource usage anyway) but we probably want to + * put something in here to ensure that if a bridge gets given to a + * driver domain, that all devices under that bridge are not given + * to other driver domains (as he who controls the bridge can disable + * it and stop the other devices from working). + */ +out: + return err; +} + +static int pciback_remove_device(struct pciback_device *pdev, + int domain, int bus, int slot, int func) +{ + int err = 0; + struct pci_dev *dev; + + dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n", + domain, bus, slot, func); + + dev = pciback_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func)); + if (!dev) { + err = -EINVAL; + dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device " + "(%04x:%02x:%02x.%01x)! not owned by this domain\n", + domain, bus, slot, func); + goto out; + } + + pciback_release_pci_dev(pdev, dev); + +out: + return err; +} + +static int pciback_publish_pci_root(struct pciback_device *pdev, + unsigned int domain, unsigned int bus) +{ + unsigned int d, b; + int i, root_num, len, err; + char str[64]; + + dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n"); + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, + "root_num", "%d", &root_num); + if (err == 0 || err == -ENOENT) + root_num = 0; + else if (err < 0) + goto out; + + /* Verify that we haven't already published this pci root */ + for (i = 0; i < root_num; i++) { + len = snprintf(str, sizeof(str), "root-%d", i); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, + str, "%x:%x", &d, &b); + if (err < 0) + goto out; + if (err != 2) { + err = -EINVAL; + goto out; + } + + if (d == domain && b == bus) { + err = 0; + goto out; + } + } + + len = snprintf(str, sizeof(str), "root-%d", root_num); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n", + root_num, domain, bus); + + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, + "%04x:%02x", domain, bus); + if (err) + goto out; + + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, + "root_num", "%d", (root_num + 1)); + +out: + return err; +} + +static int pciback_reconfigure(struct pciback_device *pdev) +{ + int err = 0; + int num_devs; + int domain, bus, slot, func; + int substate; + int i, len; + char state_str[64]; + char dev_str[64]; + + spin_lock(&pdev->dev_lock); + + dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); + + /* Make sure we only reconfigure once */ + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateReconfiguring) + goto out; + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", + &num_devs); + if (err != 1) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of devices"); + goto out; + } + + for (i = 0; i < num_devs; i++) { + len = snprintf(state_str, sizeof(state_str), "state-%d", i); + if (unlikely(len >= (sizeof(state_str) - 1))) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "String overflow while reading " + "configuration"); + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str, + "%d", &substate); + if (err != 1) + substate = XenbusStateUnknown; + + switch (substate) { + case XenbusStateInitialising: + dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i); + + len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); + if (unlikely(len >= (sizeof(dev_str) - 1))) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "String overflow while " + "reading configuration"); + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, + dev_str, "%x:%x:%x.%x", + &domain, &bus, &slot, &func); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error reading device " + "configuration"); + goto out; + } + if (err != 4) { + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error parsing pci device " + "configuration"); + goto out; + } + + err = pciback_export_device(pdev, domain, bus, slot, + func, i); + if (err) + goto out; + + /* Publish pci roots. */ + err = pciback_publish_pci_roots(pdev, + pciback_publish_pci_root); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error while publish PCI root" + "buses for frontend"); + goto out; + } + + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, + state_str, "%d", + XenbusStateInitialised); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error switching substate of " + "dev-%d\n", i); + goto out; + } + break; + + case XenbusStateClosing: + dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i); + + len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i); + if (unlikely(len >= (sizeof(dev_str) - 1))) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "String overflow while " + "reading configuration"); + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, + dev_str, "%x:%x:%x.%x", + &domain, &bus, &slot, &func); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error reading device " + "configuration"); + goto out; + } + if (err != 4) { + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error parsing pci device " + "configuration"); + goto out; + } + + err = pciback_remove_device(pdev, domain, bus, slot, + func); + if (err) + goto out; + + /* TODO: If at some point we implement support for pci + * root hot-remove on pcifront side, we'll need to + * remove unnecessary xenstore nodes of pci roots here. + */ + + break; + + default: + break; + } + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error switching to reconfigured state!"); + goto out; + } + +out: + spin_unlock(&pdev->dev_lock); + + return 0; +} + +static void pciback_frontend_changed(struct xenbus_device *xdev, + enum xenbus_state fe_state) +{ + struct pciback_device *pdev = dev_get_drvdata(&xdev->dev); + + dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state); + + switch (fe_state) { + case XenbusStateInitialised: + pciback_attach(pdev); + break; + + case XenbusStateReconfiguring: + pciback_reconfigure(pdev); + break; + + case XenbusStateConnected: + /* pcifront switched its state from reconfiguring to connected. + * Then switch to connected state. + */ + xenbus_switch_state(xdev, XenbusStateConnected); + break; + + case XenbusStateClosing: + pciback_disconnect(pdev); + xenbus_switch_state(xdev, XenbusStateClosing); + break; + + case XenbusStateClosed: + pciback_disconnect(pdev); + xenbus_switch_state(xdev, XenbusStateClosed); + if (xenbus_dev_is_online(xdev)) + break; + /* fall through if not online */ + case XenbusStateUnknown: + dev_dbg(&xdev->dev, "frontend is gone! unregister device\n"); + device_unregister(&xdev->dev); + break; + + default: + break; + } +} + +static int pciback_setup_backend(struct pciback_device *pdev) +{ + /* Get configuration from xend (if available now) */ + int domain, bus, slot, func; + int err = 0; + int i, num_devs; + char dev_str[64]; + char state_str[64]; + + spin_lock(&pdev->dev_lock); + + /* It's possible we could get the call to setup twice, so make sure + * we're not already connected. + */ + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateInitWait) + goto out; + + dev_dbg(&pdev->xdev->dev, "getting be setup\n"); + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", + &num_devs); + if (err != 1) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of devices"); + goto out; + } + + for (i = 0; i < num_devs; i++) { + int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); + if (unlikely(l >= (sizeof(dev_str) - 1))) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "String overflow while reading " + "configuration"); + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str, + "%x:%x:%x.%x", &domain, &bus, &slot, &func); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error reading device configuration"); + goto out; + } + if (err != 4) { + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error parsing pci device " + "configuration"); + goto out; + } + + err = pciback_export_device(pdev, domain, bus, slot, func, i); + if (err) + goto out; + + /* Switch substate of this device. */ + l = snprintf(state_str, sizeof(state_str), "state-%d", i); + if (unlikely(l >= (sizeof(state_str) - 1))) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "String overflow while reading " + "configuration"); + goto out; + } + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str, + "%d", XenbusStateInitialised); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, "Error switching " + "substate of dev-%d\n", i); + goto out; + } + } + + err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error while publish PCI root buses " + "for frontend"); + goto out; + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised); + if (err) + xenbus_dev_fatal(pdev->xdev, err, + "Error switching to initialised state!"); + +out: + spin_unlock(&pdev->dev_lock); + + if (!err) + /* see if pcifront is already configured (if not, we'll wait) */ + pciback_attach(pdev); + + return err; +} + +static void pciback_be_watch(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + struct pciback_device *pdev = + container_of(watch, struct pciback_device, be_watch); + + switch (xenbus_read_driver_state(pdev->xdev->nodename)) { + case XenbusStateInitWait: + pciback_setup_backend(pdev); + break; + + default: + break; + } +} + +static int pciback_xenbus_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err = 0; + struct pciback_device *pdev = alloc_pdev(dev); + + if (pdev == NULL) { + err = -ENOMEM; + xenbus_dev_fatal(dev, err, + "Error allocating pciback_device struct"); + goto out; + } + + /* wait for xend to configure us */ + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) + goto out; + + /* watch the backend node for backend configuration information */ + err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch, + pciback_be_watch); + if (err) + goto out; + pdev->be_watching = 1; + + /* We need to force a call to our callback here in case + * xend already configured us! + */ + pciback_be_watch(&pdev->be_watch, NULL, 0); + +out: + return err; +} + +static int pciback_xenbus_remove(struct xenbus_device *dev) +{ + struct pciback_device *pdev = dev_get_drvdata(&dev->dev); + + if (pdev != NULL) + free_pdev(pdev); + + return 0; +} + +static const struct xenbus_device_id xenpci_ids[] = { + {"pci"}, + {""}, +}; + +static struct xenbus_driver xenbus_pciback_driver = { + .name = "pciback", + .owner = THIS_MODULE, + .ids = xenpci_ids, + .probe = pciback_xenbus_probe, + .remove = pciback_xenbus_remove, + .otherend_changed = pciback_frontend_changed, +}; + +int __init pciback_xenbus_register(void) +{ + pciback_wq = create_workqueue("pciback_workqueue"); + if (!pciback_wq) { + printk(KERN_ERR "pciback_xenbus_register: create" + "pciback_workqueue failed\n"); + return -EFAULT; + } + return xenbus_register_backend(&xenbus_pciback_driver); +} + +void __exit pciback_xenbus_unregister(void) +{ + destroy_workqueue(pciback_wq); + xenbus_unregister_driver(&xenbus_pciback_driver); +} -- cgit v1.2.3 From 8bfd4e023f5fb5793d7d7483b6e17e04933c53e9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 19 Jul 2011 20:09:43 -0400 Subject: xen/pciback: Cleanup the driver based on checkpatch warnings and errors. Checkpatch found some extra warnings and errors. This mega patch fixes them all in one big swoop. We also spruce up the pcistub_ids to use DEFINE_PCI_DEVICE_TABLE macro (suggested by Jan Beulich). Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/conf_space.c | 4 +-- drivers/xen/xen-pciback/conf_space_header.c | 42 ++++++++++++++--------------- drivers/xen/xen-pciback/pci_stub.c | 6 ++--- drivers/xen/xen-pciback/pciback.h | 6 ++--- drivers/xen/xen-pciback/xenbus.c | 16 +++++------ 5 files changed, 37 insertions(+), 37 deletions(-) diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index 370c18e58d7a..eb6bba044438 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -18,8 +18,8 @@ static int permissive; module_param(permissive, bool, 0644); -#define DEFINE_PCI_CONFIG(op, size, type) \ -int pciback_##op##_config_##size \ +#define DEFINE_PCI_CONFIG(op, size, type) \ +int pciback_##op##_config_##size \ (struct pci_dev *dev, int offset, type value, void *data) \ { \ return pci_##op##_config_##size(dev, offset, value); \ diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index 3ae7da137f7e..40166e04a2b9 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -249,27 +249,27 @@ static const struct config_field header_common[] = { {} }; -#define CFG_FIELD_BAR(reg_offset) \ - { \ - .offset = reg_offset, \ - .size = 4, \ - .init = bar_init, \ - .reset = bar_reset, \ - .release = bar_release, \ - .u.dw.read = bar_read, \ - .u.dw.write = bar_write, \ - } - -#define CFG_FIELD_ROM(reg_offset) \ - { \ - .offset = reg_offset, \ - .size = 4, \ - .init = rom_init, \ - .reset = bar_reset, \ - .release = bar_release, \ - .u.dw.read = bar_read, \ - .u.dw.write = rom_write, \ - } +#define CFG_FIELD_BAR(reg_offset) \ + { \ + .offset = reg_offset, \ + .size = 4, \ + .init = bar_init, \ + .reset = bar_reset, \ + .release = bar_release, \ + .u.dw.read = bar_read, \ + .u.dw.write = bar_write, \ + } + +#define CFG_FIELD_ROM(reg_offset) \ + { \ + .offset = reg_offset, \ + .size = 4, \ + .init = rom_init, \ + .reset = bar_reset, \ + .release = bar_release, \ + .u.dw.read = bar_read, \ + .u.dw.write = rom_write, \ + } static const struct config_field header_0[] = { CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 0b5a16b81c8c..e7853c340d7c 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -486,7 +486,7 @@ static void pcistub_remove(struct pci_dev *dev) } } -static const struct pci_device_id pcistub_ids[] = { +static DEFINE_PCI_DEVICE_TABLE(pcistub_ids) = { { .vendor = PCI_ANY_ID, .device = PCI_ANY_ID, @@ -592,7 +592,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, if (test_bit(_XEN_PCIF_active, (unsigned long *)&psdev->pdev->sh_info->flags)) { dev_dbg(&psdev->dev->dev, - "schedule pci_conf service in pciback \n"); + "schedule pci_conf service in pciback\n"); test_and_schedule_op(psdev->pdev); } diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index 98e29127abf9..c1e95e88ee9e 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include struct pci_dev_entry { @@ -20,8 +20,8 @@ struct pci_dev_entry { struct pci_dev *dev; }; -#define _PDEVF_op_active (0) -#define PDEVF_op_active (1<<(_PDEVF_op_active)) +#define _PDEVF_op_active (0) +#define PDEVF_op_active (1<<(_PDEVF_op_active)) #define _PCIB_op_pending (1) #define PCIB_op_pending (1<<(_PCIB_op_pending)) diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index af6c25a1d729..c0984cb442ca 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -683,20 +683,20 @@ static const struct xenbus_device_id xenpci_ids[] = { }; static struct xenbus_driver xenbus_pciback_driver = { - .name = "pciback", - .owner = THIS_MODULE, - .ids = xenpci_ids, - .probe = pciback_xenbus_probe, - .remove = pciback_xenbus_remove, - .otherend_changed = pciback_frontend_changed, + .name = "pciback", + .owner = THIS_MODULE, + .ids = xenpci_ids, + .probe = pciback_xenbus_probe, + .remove = pciback_xenbus_remove, + .otherend_changed = pciback_frontend_changed, }; int __init pciback_xenbus_register(void) { pciback_wq = create_workqueue("pciback_workqueue"); if (!pciback_wq) { - printk(KERN_ERR "pciback_xenbus_register: create" - "pciback_workqueue failed\n"); + printk(KERN_ERR "%s: create" + "pciback_workqueue failed\n", __func__); return -EFAULT; } return xenbus_register_backend(&xenbus_pciback_driver); -- cgit v1.2.3 From 6221a9b2a11acfb54e7b41da425e961b31e88553 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 9 Dec 2009 17:43:15 -0500 Subject: xen/pciback: Register the owner (domain) of the PCI device. When the front-end and back-end start negotiating we register the domain that will use the PCI device. Furthermore during shutdown of guest or unbinding of the PCI device (and unloading of module) from pciback we unregister the domain owner. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/xen-pciback/pci_stub.c | 2 ++ drivers/xen/xen-pciback/xenbus.c | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index e7853c340d7c..ac8396d8206b 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -90,6 +90,8 @@ static void pcistub_device_release(struct kref *kref) dev_dbg(&psdev->dev->dev, "pcistub_device_release\n"); + xen_unregister_device_domain_owner(psdev->dev); + /* Clean-up the device */ pciback_reset_device(psdev->dev); pciback_config_free_dyn_fields(psdev->dev); diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index c0984cb442ca..a0cf7285d320 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "pciback.h" @@ -221,6 +222,15 @@ static int pciback_export_device(struct pciback_device *pdev, if (err) goto out; + dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id); + if (xen_register_device_domain_owner(dev, + pdev->xdev->otherend_id) != 0) { + dev_err(&dev->dev, "device has been assigned to another " \ + "domain! Over-writting the ownership, but beware.\n"); + xen_unregister_device_domain_owner(dev); + xen_register_device_domain_owner(dev, pdev->xdev->otherend_id); + } + /* TODO: It'd be nice to export a bridge and have all of its children * get exported with it. This may be best done in xend (which will * have to calculate resource usage anyway) but we probably want to @@ -251,6 +261,9 @@ static int pciback_remove_device(struct pciback_device *pdev, goto out; } + dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id); + xen_unregister_device_domain_owner(dev); + pciback_release_pci_dev(pdev, dev); out: -- cgit v1.2.3 From fd5b221b348adf63d413de9adad030e4f13664b7 Mon Sep 17 00:00:00 2001 From: "Zhao, Yu" Date: Wed, 3 Mar 2010 13:27:55 -0500 Subject: xen/pciback: guest SR-IOV support for PV guest These changes are for PV guest to use Virtual Function. Because the VF's vendor, device registers in cfg space are 0xffff, which are invalid and ignored by PCI device scan. Values in 'struct pci_dev' are fixed up by SR-IOV code, and using these values will present correct VID and DID to PV guest kernel. And command registers in the cfg space are read only 0, which means we have to emulate MMIO enable bit (VF only uses MMIO resource) so PV kernel can work properly. Acked-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/conf_space_header.c | 71 +++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 5 deletions(-) diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index 40166e04a2b9..dcd6dd964e3b 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -18,6 +18,25 @@ struct pci_bar_info { #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) +static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) +{ + int i; + int ret; + + ret = pciback_read_config_word(dev, offset, value, data); + if (!atomic_read(&dev->enable_cnt)) + return ret; + + for (i = 0; i < PCI_ROM_RESOURCE; i++) { + if (dev->resource[i].flags & IORESOURCE_IO) + *value |= PCI_COMMAND_IO; + if (dev->resource[i].flags & IORESOURCE_MEM) + *value |= PCI_COMMAND_MEMORY; + } + + return ret; +} + static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) { int err; @@ -142,10 +161,26 @@ static inline void read_dev_bar(struct pci_dev *dev, struct pci_bar_info *bar_info, int offset, u32 len_mask) { - pci_read_config_dword(dev, offset, &bar_info->val); - pci_write_config_dword(dev, offset, len_mask); - pci_read_config_dword(dev, offset, &bar_info->len_val); - pci_write_config_dword(dev, offset, bar_info->val); + int pos; + struct resource *res = dev->resource; + + if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1) + pos = PCI_ROM_RESOURCE; + else { + pos = (offset - PCI_BASE_ADDRESS_0) / 4; + if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE | + PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == + (PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64))) { + bar_info->val = res[pos - 1].start >> 32; + bar_info->len_val = res[pos - 1].end >> 32; + return; + } + } + + bar_info->val = res[pos].start | + (res[pos].flags & PCI_REGION_FLAG_MASK); + bar_info->len_val = res[pos].end - res[pos].start + 1; } static void *bar_init(struct pci_dev *dev, int offset) @@ -186,6 +221,22 @@ static void bar_release(struct pci_dev *dev, int offset, void *data) kfree(data); } +static int pciback_read_vendor(struct pci_dev *dev, int offset, + u16 *value, void *data) +{ + *value = dev->vendor; + + return 0; +} + +static int pciback_read_device(struct pci_dev *dev, int offset, + u16 *value, void *data) +{ + *value = dev->device; + + return 0; +} + static int interrupt_read(struct pci_dev *dev, int offset, u8 * value, void *data) { @@ -212,10 +263,20 @@ out: } static const struct config_field header_common[] = { + { + .offset = PCI_VENDOR_ID, + .size = 2, + .u.w.read = pciback_read_vendor, + }, + { + .offset = PCI_DEVICE_ID, + .size = 2, + .u.w.read = pciback_read_device, + }, { .offset = PCI_COMMAND, .size = 2, - .u.w.read = pciback_read_config_word, + .u.w.read = command_read, .u.w.write = command_write, }, { -- cgit v1.2.3 From a2be65fd363831502afdf0babdf48149b3959cde Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 3 Mar 2010 13:38:43 -0500 Subject: xen/pciback: Disable MSI/MSI-X when reseting a device In cases where the guest is abruptly killed and has not disabled MSI/MSI-X interrupts we want to do it for it. Otherwise when the guest is started up and enables MSI, we would get a WARN() that the device already had been enabled. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/pciback_ops.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 2b9a93e1fdee..011db675e437 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -23,6 +23,14 @@ void pciback_reset_device(struct pci_dev *dev) /* Disable devices (but not bridges) */ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { +#ifdef CONFIG_PCI_MSI + /* The guest could have been abruptly killed without + * disabling MSI/MSI-X interrupts.*/ + if (dev->msix_enabled) + pci_disable_msix(dev); + if (dev->msi_enabled) + pci_disable_msi(dev); +#endif pci_disable_device(dev); pci_write_config_word(dev, PCI_COMMAND, 0); -- cgit v1.2.3 From 0513fe9e5b54e47e37217ea078dd870e3825e02d Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 19 Jul 2011 18:56:39 -0400 Subject: xen/pciback: Allocate IRQ handler for device that is shared with guest. If the device that is to be shared with a guest is a level device and the IRQ is shared with the initial domain we need to take actions. Mainly we install a dummy IRQ handler that will ACK on the interrupt line so as to not have the initial domain disable the interrupt line. This dummy IRQ handler is not enabled when the device MSI/MSI-X lines are set, nor for edge interrupts. And also not for level interrupts that are not shared amongst devices. Lastly, if the user passes to the guest all of the PCI devices on the shared line the we won't install the dummy handler either. There is also SysFS instrumentation to check its state and turn IRQ ACKing on/off if necessary. Signed-off-by: Konrad Rzeszutek Wilk --- .../xen/xen-pciback/conf_space_capability_msi.c | 17 ++++ drivers/xen/xen-pciback/conf_space_header.c | 6 ++ drivers/xen/xen-pciback/pci_stub.c | 90 +++++++++++++++++- drivers/xen/xen-pciback/pciback.h | 13 ++- drivers/xen/xen-pciback/pciback_ops.c | 102 +++++++++++++++++++++ 5 files changed, 223 insertions(+), 5 deletions(-) diff --git a/drivers/xen/xen-pciback/conf_space_capability_msi.c b/drivers/xen/xen-pciback/conf_space_capability_msi.c index 78f74b1852d4..d0d2255b5da9 100644 --- a/drivers/xen/xen-pciback/conf_space_capability_msi.c +++ b/drivers/xen/xen-pciback/conf_space_capability_msi.c @@ -12,6 +12,7 @@ int pciback_enable_msi(struct pciback_device *pdev, struct pci_dev *dev, struct xen_pci_op *op) { + struct pciback_dev_data *dev_data; int otherend = pdev->xdev->otherend_id; int status; @@ -28,21 +29,29 @@ int pciback_enable_msi(struct pciback_device *pdev, * the local domain's IRQ number. */ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 0; return 0; } int pciback_disable_msi(struct pciback_device *pdev, struct pci_dev *dev, struct xen_pci_op *op) { + struct pciback_dev_data *dev_data; pci_disable_msi(dev); op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 1; return 0; } int pciback_enable_msix(struct pciback_device *pdev, struct pci_dev *dev, struct xen_pci_op *op) { + struct pciback_dev_data *dev_data; int i, result; struct msix_entry *entries; @@ -74,6 +83,9 @@ int pciback_enable_msix(struct pciback_device *pdev, kfree(entries); op->value = result; + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 0; return result; } @@ -81,6 +93,7 @@ int pciback_enable_msix(struct pciback_device *pdev, int pciback_disable_msix(struct pciback_device *pdev, struct pci_dev *dev, struct xen_pci_op *op) { + struct pciback_dev_data *dev_data; pci_disable_msix(dev); @@ -89,6 +102,10 @@ int pciback_disable_msix(struct pciback_device *pdev, * an undefined IRQ value of zero. */ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 1; + return 0; } diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index dcd6dd964e3b..22ad0f560669 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -39,8 +39,10 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) { + struct pciback_dev_data *dev_data; int err; + dev_data = pci_get_drvdata(dev); if (!pci_is_enabled(dev) && is_enable_cmd(value)) { if (unlikely(verbose_request)) printk(KERN_DEBUG "pciback: %s: enable\n", @@ -48,11 +50,15 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) err = pci_enable_device(dev); if (err) return err; + if (dev_data) + dev_data->enable_intx = 1; } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) { if (unlikely(verbose_request)) printk(KERN_DEBUG "pciback: %s: disable\n", pci_name(dev)); pci_disable_device(dev); + if (dev_data) + dev_data->enable_intx = 0; } if (!dev->is_busmaster && is_master_cmd(value)) { diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index ac8396d8206b..c4d1071ebbe6 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -21,6 +21,8 @@ #include "conf_space.h" #include "conf_space_quirks.h" +#define DRV_NAME "pciback" + static char *pci_devs_to_hide; wait_queue_head_t aer_wait_queue; /*Add sem for sync AER handling and pciback remove/reconfigue ops, @@ -290,13 +292,20 @@ static int __devinit pcistub_init_device(struct pci_dev *dev) * would need to be called somewhere to free the memory allocated * here and then to call kfree(pci_get_drvdata(psdev->dev)). */ - dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC); + dev_data = kzalloc(sizeof(*dev_data) + strlen(DRV_NAME "[]") + + strlen(pci_name(dev)) + 1, GFP_ATOMIC); if (!dev_data) { err = -ENOMEM; goto out; } pci_set_drvdata(dev, dev_data); + /* + * Setup name for fake IRQ handler. It will only be enabled + * once the device is turned on by the guest. + */ + sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev)); + dev_dbg(&dev->dev, "initializing config\n"); init_waitqueue_head(&aer_wait_queue); @@ -837,7 +846,7 @@ static struct pci_error_handlers pciback_error_handler = { */ static struct pci_driver pciback_pci_driver = { - .name = "pciback", + .name = DRV_NAME, .id_table = pcistub_ids, .probe = pcistub_probe, .remove = pcistub_remove, @@ -1029,6 +1038,72 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); +static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) +{ + struct pcistub_device *psdev; + struct pciback_dev_data *dev_data; + size_t count = 0; + unsigned long flags; + + spin_lock_irqsave(&pcistub_devices_lock, flags); + list_for_each_entry(psdev, &pcistub_devices, dev_list) { + if (count >= PAGE_SIZE) + break; + if (!psdev->dev) + continue; + dev_data = pci_get_drvdata(psdev->dev); + if (!dev_data) + continue; + count += + scnprintf(buf + count, PAGE_SIZE - count, + "%s:%s:%sing:%ld\n", + pci_name(psdev->dev), + dev_data->isr_on ? "on" : "off", + dev_data->ack_intr ? "ack" : "not ack", + dev_data->handled); + } + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + return count; +} + +DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL); + +static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, + const char *buf, + size_t count) +{ + struct pcistub_device *psdev; + struct pciback_dev_data *dev_data; + int domain, bus, slot, func; + int err = -ENOENT; + + err = str_to_slot(buf, &domain, &bus, &slot, &func); + if (err) + goto out; + + psdev = pcistub_device_find(domain, bus, slot, func); + + if (!psdev) + goto out; + + dev_data = pci_get_drvdata(psdev->dev); + if (!dev_data) + goto out; + + dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n", + dev_data->irq_name, dev_data->isr_on, + !dev_data->isr_on); + + dev_data->isr_on = !(dev_data->isr_on); + if (dev_data->isr_on) + dev_data->ack_intr = 1; +out: + if (!err) + err = count; + return err; +} +DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch); + static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf, size_t count) { @@ -1168,7 +1243,10 @@ static void pcistub_exit(void) driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots); driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks); driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive); - + driver_remove_file(&pciback_pci_driver.driver, + &driver_attr_irq_handlers); + driver_remove_file(&pciback_pci_driver.driver, + &driver_attr_irq_handler_state); pci_unregister_driver(&pciback_pci_driver); } @@ -1227,6 +1305,12 @@ static int __init pcistub_init(void) err = driver_create_file(&pciback_pci_driver.driver, &driver_attr_permissive); + if (!err) + err = driver_create_file(&pciback_pci_driver.driver, + &driver_attr_irq_handlers); + if (!err) + err = driver_create_file(&pciback_pci_driver.driver, + &driver_attr_irq_handler_state); if (err) pcistub_exit(); diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index c1e95e88ee9e..5c140200a5ea 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -45,8 +45,14 @@ struct pciback_device { struct pciback_dev_data { struct list_head config_fields; - int permissive; - int warned_on_write; + unsigned int permissive:1; + unsigned int warned_on_write:1; + unsigned int enable_intx:1; + unsigned int isr_on:1; /* Whether the IRQ handler is installed. */ + unsigned int ack_intr:1; /* .. and ACK-ing */ + unsigned long handled; + unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */ + char irq_name[0]; /* pciback[000:04:00.0] */ }; /* Used by XenBus and pciback_ops.c */ @@ -131,3 +137,6 @@ extern int verbose_request; void test_and_schedule_op(struct pciback_device *pdev); #endif +/* Handles shared IRQs that can to device domain and control domain. */ +void pciback_irq_handler(struct pci_dev *dev, int reset); +irqreturn_t pciback_guest_interrupt(int irq, void *dev_id); diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 011db675e437..6c398fde7a83 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -13,6 +13,77 @@ int verbose_request; module_param(verbose_request, int, 0644); +/* Ensure a device is has the fake IRQ handler "turned on/off" and is + * ready to be exported. This MUST be run after pciback_reset_device + * which does the actual PCI device enable/disable. + */ +void pciback_control_isr(struct pci_dev *dev, int reset) +{ + struct pciback_dev_data *dev_data; + int rc; + int enable = 0; + + dev_data = pci_get_drvdata(dev); + if (!dev_data) + return; + + /* We don't deal with bridges */ + if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) + return; + + if (reset) { + dev_data->enable_intx = 0; + dev_data->ack_intr = 0; + } + enable = dev_data->enable_intx; + + /* Asked to disable, but ISR isn't runnig */ + if (!enable && !dev_data->isr_on) + return; + + /* Squirrel away the IRQs in the dev_data. We need this + * b/c when device transitions to MSI, the dev->irq is + * overwritten with the MSI vector. + */ + if (enable) + dev_data->irq = dev->irq; + + dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n", + dev_data->irq_name, + dev_data->irq, + pci_is_enabled(dev) ? "on" : "off", + dev->msi_enabled ? "MSI" : "", + dev->msix_enabled ? "MSI/X" : "", + dev_data->isr_on ? "enable" : "disable", + enable ? "enable" : "disable"); + + if (enable) { + rc = request_irq(dev_data->irq, + pciback_guest_interrupt, IRQF_SHARED, + dev_data->irq_name, dev); + if (rc) { + dev_err(&dev->dev, "%s: failed to install fake IRQ " \ + "handler for IRQ %d! (rc:%d)\n", + dev_data->irq_name, dev_data->irq, rc); + goto out; + } + } else { + free_irq(dev_data->irq, dev); + dev_data->irq = 0; + } + dev_data->isr_on = enable; + dev_data->ack_intr = enable; +out: + dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n", + dev_data->irq_name, + dev_data->irq, + pci_is_enabled(dev) ? "on" : "off", + dev->msi_enabled ? "MSI" : "", + dev->msix_enabled ? "MSI/X" : "", + enable ? (dev_data->isr_on ? "enabled" : "failed to enable") : + (dev_data->isr_on ? "failed to disable" : "disabled")); +} + /* Ensure a device is "turned off" and ready to be exported. * (Also see pciback_config_reset to ensure virtual configuration space is * ready to be re-exported) @@ -21,6 +92,8 @@ void pciback_reset_device(struct pci_dev *dev) { u16 cmd; + pciback_control_isr(dev, 1 /* reset device */); + /* Disable devices (but not bridges) */ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { #ifdef CONFIG_PCI_MSI @@ -78,13 +151,18 @@ void pciback_do_op(struct work_struct *data) struct pciback_device *pdev = container_of(data, struct pciback_device, op_work); struct pci_dev *dev; + struct pciback_dev_data *dev_data = NULL; struct xen_pci_op *op = &pdev->sh_info->op; + int test_intx = 0; dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn); if (dev == NULL) op->err = XEN_PCI_ERR_dev_not_found; else { + dev_data = pci_get_drvdata(dev); + if (dev_data) + test_intx = dev_data->enable_intx; switch (op->cmd) { case XEN_PCI_OP_conf_read: op->err = pciback_config_read(dev, @@ -113,6 +191,11 @@ void pciback_do_op(struct work_struct *data) break; } } + if (!op->err && dev && dev_data) { + /* Transition detected */ + if ((dev_data->enable_intx != test_intx)) + pciback_control_isr(dev, 0 /* no reset */); + } /* Tell the driver domain that we're done. */ wmb(); clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); @@ -137,3 +220,22 @@ irqreturn_t pciback_handle_event(int irq, void *dev_id) return IRQ_HANDLED; } +irqreturn_t pciback_guest_interrupt(int irq, void *dev_id) +{ + struct pci_dev *dev = (struct pci_dev *)dev_id; + struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + + if (dev_data->isr_on && dev_data->ack_intr) { + dev_data->handled++; + if ((dev_data->handled % 1000) == 0) { + if (xen_test_irq_shared(irq)) { + printk(KERN_INFO "%s IRQ line is not shared " + "with other domains. Turning ISR off\n", + dev_data->irq_name); + dev_data->ack_intr = 0; + } + } + return IRQ_HANDLED; + } + return IRQ_NONE; +} -- cgit v1.2.3 From 494ef20db6ea2e2ab1c3a45a1461e6e717fdcf48 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 23 Jul 2010 14:35:47 -0400 Subject: xen/pciback: Fine-grain the spinlocks and fix BUG: scheduling while atomic cases. We were using coarse spinlocks that could end up with a deadlock. This patch fixes that and makes the spinlocks much more fine-grained. We also drop be->watchding state spinlocks as they are already guarded by the xenwatch_thread against multiple customers. Without that we would trigger the BUG: scheduling while atomic. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/passthrough.c | 9 ++++++--- drivers/xen/xen-pciback/xenbus.c | 26 +++++++++++++++----------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c index 5386bebf7f9a..6e3999b997d4 100644 --- a/drivers/xen/xen-pciback/passthrough.c +++ b/drivers/xen/xen-pciback/passthrough.c @@ -113,14 +113,14 @@ int pciback_publish_pci_roots(struct pciback_device *pdev, { int err = 0; struct passthrough_dev_data *dev_data = pdev->pci_dev_data; - struct pci_dev_entry *dev_entry, *e; + struct pci_dev_entry *dev_entry, *e, *tmp; struct pci_dev *dev; int found; unsigned int domain, bus; spin_lock(&dev_data->lock); - list_for_each_entry(dev_entry, &dev_data->dev_list, list) { + list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) { /* Only publish this device as a root if none of its * parent bridges are exported */ @@ -139,13 +139,16 @@ int pciback_publish_pci_roots(struct pciback_device *pdev, bus = (unsigned int)dev_entry->dev->bus->number; if (!found) { + spin_unlock(&dev_data->lock); err = publish_root_cb(pdev, domain, bus); if (err) break; + spin_lock(&dev_data->lock); } } - spin_unlock(&dev_data->lock); + if (!err) + spin_unlock(&dev_data->lock); return err; } diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index a0cf7285d320..70030c409212 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -54,23 +54,29 @@ static void pciback_disconnect(struct pciback_device *pdev) unbind_from_irqhandler(pdev->evtchn_irq, pdev); pdev->evtchn_irq = INVALID_EVTCHN_IRQ; } + spin_unlock(&pdev->dev_lock); /* If the driver domain started an op, make sure we complete it * before releasing the shared memory */ + + /* Note, the workqueue does not use spinlocks at all.*/ flush_workqueue(pciback_wq); + spin_lock(&pdev->dev_lock); if (pdev->sh_info != NULL) { xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info); pdev->sh_info = NULL; } - spin_unlock(&pdev->dev_lock); + } static void free_pdev(struct pciback_device *pdev) { - if (pdev->be_watching) + if (pdev->be_watching) { unregister_xenbus_watch(&pdev->be_watch); + pdev->be_watching = 0; + } pciback_disconnect(pdev); @@ -98,7 +104,10 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref, "Error mapping other domain page in ours."); goto out; } + + spin_lock(&pdev->dev_lock); pdev->sh_info = vaddr; + spin_unlock(&pdev->dev_lock); err = bind_interdomain_evtchn_to_irqhandler( pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event, @@ -108,7 +117,10 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref, "Error binding event channel to IRQ"); goto out; } + + spin_lock(&pdev->dev_lock); pdev->evtchn_irq = err; + spin_unlock(&pdev->dev_lock); err = 0; dev_dbg(&pdev->xdev->dev, "Attached!\n"); @@ -122,7 +134,6 @@ static int pciback_attach(struct pciback_device *pdev) int gnt_ref, remote_evtchn; char *magic = NULL; - spin_lock(&pdev->dev_lock); /* Make sure we only do this setup once */ if (xenbus_read_driver_state(pdev->xdev->nodename) != @@ -168,7 +179,6 @@ static int pciback_attach(struct pciback_device *pdev) dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err); out: - spin_unlock(&pdev->dev_lock); kfree(magic); @@ -340,7 +350,6 @@ static int pciback_reconfigure(struct pciback_device *pdev) char state_str[64]; char dev_str[64]; - spin_lock(&pdev->dev_lock); dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); @@ -481,8 +490,6 @@ static int pciback_reconfigure(struct pciback_device *pdev) } out: - spin_unlock(&pdev->dev_lock); - return 0; } @@ -539,8 +546,6 @@ static int pciback_setup_backend(struct pciback_device *pdev) char dev_str[64]; char state_str[64]; - spin_lock(&pdev->dev_lock); - /* It's possible we could get the call to setup twice, so make sure * we're not already connected. */ @@ -621,8 +626,6 @@ static int pciback_setup_backend(struct pciback_device *pdev) "Error switching to initialised state!"); out: - spin_unlock(&pdev->dev_lock); - if (!err) /* see if pcifront is already configured (if not, we'll wait) */ pciback_attach(pdev); @@ -669,6 +672,7 @@ static int pciback_xenbus_probe(struct xenbus_device *dev, pciback_be_watch); if (err) goto out; + pdev->be_watching = 1; /* We need to force a call to our callback here in case -- cgit v1.2.3 From a0b2fa8ef8fee0493f6c1e2ac08b9e50a629a7c2 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 3 Dec 2009 21:56:20 +0000 Subject: xen: rename pciback module to xen-pciback. pciback is rather generic for a modular distro style kernel. Signed-off-by: Ian Campbell Cc: Jeremy Fitzhardinge Cc: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/xen-pciback/Makefile | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/xen/xen-pciback/Makefile b/drivers/xen/xen-pciback/Makefile index 106dae748cdb..38bc123841e7 100644 --- a/drivers/xen/xen-pciback/Makefile +++ b/drivers/xen/xen-pciback/Makefile @@ -1,16 +1,16 @@ -obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o +obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o -pciback-y := pci_stub.o pciback_ops.o xenbus.o -pciback-y += conf_space.o conf_space_header.o \ - conf_space_capability.o \ - conf_space_capability_vpd.o \ - conf_space_capability_pm.o \ - conf_space_quirks.o -pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o -pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o -pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o -pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o -pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o +xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o +xen-pciback-y += conf_space.o conf_space_header.o \ + conf_space_capability.o \ + conf_space_capability_vpd.o \ + conf_space_capability_pm.o \ + conf_space_quirks.o +xen-pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o +xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o +xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o +xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o +xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y) EXTRA_CFLAGS += -DDEBUG -- cgit v1.2.3 From e17ab35f05cb8b3d19c70454a41ea67207caf484 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 16 Feb 2011 15:43:25 -0500 Subject: xen/pciback: Don't setup an fake IRQ handler for SR-IOV devices. If we try to setup an fake IRQ handler for legacy interrupts for devices that only have MSI-X (most if not all SR-IOV cards), we will fail with this: pciback[0000:01:10.0]: failed to install fake IRQ handler for IRQ 0! (rc:-38) Since those cards don't have anything in dev->irq. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/pciback_ops.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 6c398fde7a83..28a2a5584831 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -48,6 +48,13 @@ void pciback_control_isr(struct pci_dev *dev, int reset) if (enable) dev_data->irq = dev->irq; + /* + * SR-IOV devices in all use MSI-X and have no legacy + * interrupts, so inhibit creating a fake IRQ handler for them. + */ + if (dev_data->irq == 0) + goto out; + dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n", dev_data->irq_name, dev_data->irq, -- cgit v1.2.3 From c288b67b9b4d65790e1a1a1fd982330730b68f46 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 17 Feb 2011 11:18:00 -0500 Subject: xen/pciback: Print out the MSI/MSI-X (PIRQ) values If the verbose_request is set (and loglevel high enough), print out the MSI/MSI-X values that are sent to the guest. This should aid in debugging issues. Signed-off-by: Konrad Rzeszutek Wilk --- .../xen/xen-pciback/conf_space_capability_msi.c | 31 ++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/xen/xen-pciback/conf_space_capability_msi.c b/drivers/xen/xen-pciback/conf_space_capability_msi.c index d0d2255b5da9..6e876b600e66 100644 --- a/drivers/xen/xen-pciback/conf_space_capability_msi.c +++ b/drivers/xen/xen-pciback/conf_space_capability_msi.c @@ -16,6 +16,9 @@ int pciback_enable_msi(struct pciback_device *pdev, int otherend = pdev->xdev->otherend_id; int status; + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev)); + status = pci_enable_msi(dev); if (status) { @@ -29,9 +32,14 @@ int pciback_enable_msi(struct pciback_device *pdev, * the local domain's IRQ number. */ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: MSI: %d\n", pci_name(dev), + op->value); + dev_data = pci_get_drvdata(dev); if (dev_data) dev_data->ack_intr = 0; + return 0; } @@ -39,9 +47,16 @@ int pciback_disable_msi(struct pciback_device *pdev, struct pci_dev *dev, struct xen_pci_op *op) { struct pciback_dev_data *dev_data; + + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev)); + pci_disable_msi(dev); op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: MSI: %d\n", pci_name(dev), + op->value); dev_data = pci_get_drvdata(dev); if (dev_data) dev_data->ack_intr = 1; @@ -55,6 +70,10 @@ int pciback_enable_msix(struct pciback_device *pdev, int i, result; struct msix_entry *entries; + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", + pci_name(dev)); + if (op->value > SH_INFO_MAX_VEC) return -EINVAL; @@ -75,6 +94,11 @@ int pciback_enable_msix(struct pciback_device *pdev, if (entries[i].vector) op->msix_entries[i].vector = xen_pirq_from_irq(entries[i].vector); + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: " \ + "MSI-X[%d]: %d\n", + pci_name(dev), i, + op->msix_entries[i].vector); } } else { printk(KERN_WARNING "pciback: %s: failed to enable MSI-X: err %d!\n", @@ -95,6 +119,10 @@ int pciback_disable_msix(struct pciback_device *pdev, { struct pciback_dev_data *dev_data; + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: disable MSI-X\n", + pci_name(dev)); + pci_disable_msix(dev); /* @@ -102,6 +130,9 @@ int pciback_disable_msix(struct pciback_device *pdev, * an undefined IRQ value of zero. */ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + if (unlikely(verbose_request)) + printk(KERN_DEBUG "pciback: %s: MSI-X: %d\n", pci_name(dev), + op->value); dev_data = pci_get_drvdata(dev); if (dev_data) dev_data->ack_intr = 1; -- cgit v1.2.3 From a92336a1176b2119eaa990a1e8bf3109665fdbc6 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 19 Jul 2011 19:40:51 -0400 Subject: xen/pciback: Drop two backends, squash and cleanup some code. - Remove the slot and controller controller backend as they are not used. - Document the find pciback_[read|write]_config_[byte|word|dword] to make it easier to find. - Collapse the code from conf_space_capability_msi into pciback_ops.c - Collapse conf_space_capability_[pm|vpd].c in conf_space_capability.c [and remove the conf_space_capability.h file] - Rename all visible functions from pciback to xen_pcibk. - Rename all the printk/pr_info, etc that use the "pciback" to say "xen-pciback". - Convert functions that are not referenced outside the code to be static to save on name space. - Do the same thing for structures that are internal to the driver. - Run checkpatch.pl after the renames and fixup its warnings and fix any compile errors caused by the variable rename - Cleanup any structs that checkpath.pl commented about or just look odd. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/Makefile | 5 - drivers/xen/xen-pciback/conf_space.c | 55 +-- drivers/xen/xen-pciback/conf_space.h | 42 +- drivers/xen/xen-pciback/conf_space_capability.c | 161 +++++++- drivers/xen/xen-pciback/conf_space_capability.h | 26 -- .../xen/xen-pciback/conf_space_capability_msi.c | 142 ------- drivers/xen/xen-pciback/conf_space_capability_pm.c | 113 ------ .../xen/xen-pciback/conf_space_capability_vpd.c | 40 -- drivers/xen/xen-pciback/conf_space_header.c | 49 +-- drivers/xen/xen-pciback/conf_space_quirks.c | 50 +-- drivers/xen/xen-pciback/conf_space_quirks.h | 16 +- drivers/xen/xen-pciback/controller.c | 442 --------------------- drivers/xen/xen-pciback/passthrough.c | 30 +- drivers/xen/xen-pciback/pci_stub.c | 217 +++++----- drivers/xen/xen-pciback/pciback.h | 107 ++--- drivers/xen/xen-pciback/pciback_ops.c | 198 +++++++-- drivers/xen/xen-pciback/slot.c | 191 --------- drivers/xen/xen-pciback/vpci.c | 36 +- drivers/xen/xen-pciback/xenbus.c | 128 +++--- 19 files changed, 679 insertions(+), 1369 deletions(-) delete mode 100644 drivers/xen/xen-pciback/conf_space_capability.h delete mode 100644 drivers/xen/xen-pciback/conf_space_capability_msi.c delete mode 100644 drivers/xen/xen-pciback/conf_space_capability_pm.c delete mode 100644 drivers/xen/xen-pciback/conf_space_capability_vpd.c delete mode 100644 drivers/xen/xen-pciback/controller.c delete mode 100644 drivers/xen/xen-pciback/slot.c diff --git a/drivers/xen/xen-pciback/Makefile b/drivers/xen/xen-pciback/Makefile index 38bc123841e7..e79c518afc9a 100644 --- a/drivers/xen/xen-pciback/Makefile +++ b/drivers/xen/xen-pciback/Makefile @@ -3,14 +3,9 @@ obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o xen-pciback-y += conf_space.o conf_space_header.o \ conf_space_capability.o \ - conf_space_capability_vpd.o \ - conf_space_capability_pm.o \ conf_space_quirks.o -xen-pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o -xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o -xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index eb6bba044438..a8031445d94e 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -15,11 +15,14 @@ #include "conf_space.h" #include "conf_space_quirks.h" +#define DRV_NAME "xen-pciback" static int permissive; module_param(permissive, bool, 0644); +/* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word, + * xen_pcibk_write_config_word, and xen_pcibk_write_config_byte are created. */ #define DEFINE_PCI_CONFIG(op, size, type) \ -int pciback_##op##_config_##size \ +int xen_pcibk_##op##_config_##size \ (struct pci_dev *dev, int offset, type value, void *data) \ { \ return pci_##op##_config_##size(dev, offset, value); \ @@ -138,11 +141,11 @@ static int pcibios_err_to_errno(int err) return err; } -int pciback_config_read(struct pci_dev *dev, int offset, int size, - u32 *ret_val) +int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, + u32 *ret_val) { int err = 0; - struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); const struct config_field_entry *cfg_entry; const struct config_field *field; int req_start, req_end, field_start, field_end; @@ -151,7 +154,7 @@ int pciback_config_read(struct pci_dev *dev, int offset, int size, u32 value = 0, tmp_val; if (unlikely(verbose_request)) - printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n", + printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n", pci_name(dev), size, offset); if (!valid_request(offset, size)) { @@ -195,17 +198,17 @@ int pciback_config_read(struct pci_dev *dev, int offset, int size, out: if (unlikely(verbose_request)) - printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n", + printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x = %x\n", pci_name(dev), size, offset, value); *ret_val = value; return pcibios_err_to_errno(err); } -int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value) +int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) { int err = 0, handled = 0; - struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); const struct config_field_entry *cfg_entry; const struct config_field *field; u32 tmp_val; @@ -213,7 +216,7 @@ int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value) if (unlikely(verbose_request)) printk(KERN_DEBUG - "pciback: %s: write request %d bytes at 0x%x = %x\n", + DRV_NAME ": %s: write request %d bytes at 0x%x = %x\n", pci_name(dev), size, offset, value); if (!valid_request(offset, size)) @@ -231,7 +234,7 @@ int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value) || (req_end > field_start && req_end <= field_end)) { tmp_val = 0; - err = pciback_config_read(dev, field_start, + err = xen_pcibk_config_read(dev, field_start, field->size, &tmp_val); if (err) break; @@ -290,9 +293,9 @@ int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value) return pcibios_err_to_errno(err); } -void pciback_config_free_dyn_fields(struct pci_dev *dev) +void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev) { - struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); struct config_field_entry *cfg_entry, *t; const struct config_field *field; @@ -316,9 +319,9 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev) } } -void pciback_config_reset_dev(struct pci_dev *dev) +void xen_pcibk_config_reset_dev(struct pci_dev *dev) { - struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); const struct config_field_entry *cfg_entry; const struct config_field *field; @@ -334,9 +337,9 @@ void pciback_config_reset_dev(struct pci_dev *dev) } } -void pciback_config_free_dev(struct pci_dev *dev) +void xen_pcibk_config_free_dev(struct pci_dev *dev) { - struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); struct config_field_entry *cfg_entry, *t; const struct config_field *field; @@ -356,12 +359,12 @@ void pciback_config_free_dev(struct pci_dev *dev) } } -int pciback_config_add_field_offset(struct pci_dev *dev, +int xen_pcibk_config_add_field_offset(struct pci_dev *dev, const struct config_field *field, unsigned int base_offset) { int err = 0; - struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); struct config_field_entry *cfg_entry; void *tmp; @@ -376,7 +379,7 @@ int pciback_config_add_field_offset(struct pci_dev *dev, cfg_entry->base_offset = base_offset; /* silently ignore duplicate fields */ - err = pciback_field_is_dup(dev, OFFSET(cfg_entry)); + err = xen_pcibk_field_is_dup(dev, OFFSET(cfg_entry)); if (err) goto out; @@ -406,30 +409,30 @@ out: * certain registers (like the base address registers (BARs) so that we can * keep the client from manipulating them directly. */ -int pciback_config_init_dev(struct pci_dev *dev) +int xen_pcibk_config_init_dev(struct pci_dev *dev) { int err = 0; - struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); dev_dbg(&dev->dev, "initializing virtual configuration space\n"); INIT_LIST_HEAD(&dev_data->config_fields); - err = pciback_config_header_add_fields(dev); + err = xen_pcibk_config_header_add_fields(dev); if (err) goto out; - err = pciback_config_capability_add_fields(dev); + err = xen_pcibk_config_capability_add_fields(dev); if (err) goto out; - err = pciback_config_quirks_init(dev); + err = xen_pcibk_config_quirks_init(dev); out: return err; } -int pciback_config_init(void) +int xen_pcibk_config_init(void) { - return pciback_config_capability_init(); + return xen_pcibk_config_capability_init(); } diff --git a/drivers/xen/xen-pciback/conf_space.h b/drivers/xen/xen-pciback/conf_space.h index 50ebef216828..e56c934ad137 100644 --- a/drivers/xen/xen-pciback/conf_space.h +++ b/drivers/xen/xen-pciback/conf_space.h @@ -69,35 +69,35 @@ struct config_field_entry { /* Add fields to a device - the add_fields macro expects to get a pointer to * the first entry in an array (of which the ending is marked by size==0) */ -int pciback_config_add_field_offset(struct pci_dev *dev, +int xen_pcibk_config_add_field_offset(struct pci_dev *dev, const struct config_field *field, unsigned int offset); -static inline int pciback_config_add_field(struct pci_dev *dev, +static inline int xen_pcibk_config_add_field(struct pci_dev *dev, const struct config_field *field) { - return pciback_config_add_field_offset(dev, field, 0); + return xen_pcibk_config_add_field_offset(dev, field, 0); } -static inline int pciback_config_add_fields(struct pci_dev *dev, +static inline int xen_pcibk_config_add_fields(struct pci_dev *dev, const struct config_field *field) { int i, err = 0; for (i = 0; field[i].size != 0; i++) { - err = pciback_config_add_field(dev, &field[i]); + err = xen_pcibk_config_add_field(dev, &field[i]); if (err) break; } return err; } -static inline int pciback_config_add_fields_offset(struct pci_dev *dev, +static inline int xen_pcibk_config_add_fields_offset(struct pci_dev *dev, const struct config_field *field, unsigned int offset) { int i, err = 0; for (i = 0; field[i].size != 0; i++) { - err = pciback_config_add_field_offset(dev, &field[i], offset); + err = xen_pcibk_config_add_field_offset(dev, &field[i], offset); if (err) break; } @@ -105,22 +105,22 @@ static inline int pciback_config_add_fields_offset(struct pci_dev *dev, } /* Read/Write the real configuration space */ -int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 *value, - void *data); -int pciback_read_config_word(struct pci_dev *dev, int offset, u16 *value, - void *data); -int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 *value, - void *data); -int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value, - void *data); -int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value, - void *data); -int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value, +int xen_pcibk_read_config_byte(struct pci_dev *dev, int offset, u8 *value, void *data); +int xen_pcibk_read_config_word(struct pci_dev *dev, int offset, u16 *value, + void *data); +int xen_pcibk_read_config_dword(struct pci_dev *dev, int offset, u32 *value, + void *data); +int xen_pcibk_write_config_byte(struct pci_dev *dev, int offset, u8 value, + void *data); +int xen_pcibk_write_config_word(struct pci_dev *dev, int offset, u16 value, + void *data); +int xen_pcibk_write_config_dword(struct pci_dev *dev, int offset, u32 value, + void *data); -int pciback_config_capability_init(void); +int xen_pcibk_config_capability_init(void); -int pciback_config_header_add_fields(struct pci_dev *dev); -int pciback_config_capability_add_fields(struct pci_dev *dev); +int xen_pcibk_config_header_add_fields(struct pci_dev *dev); +int xen_pcibk_config_capability_add_fields(struct pci_dev *dev); #endif /* __XEN_PCIBACK_CONF_SPACE_H__ */ diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c index 0ea84d6335f4..7f83e9083e9d 100644 --- a/drivers/xen/xen-pciback/conf_space_capability.c +++ b/drivers/xen/xen-pciback/conf_space_capability.c @@ -9,29 +9,36 @@ #include #include "pciback.h" #include "conf_space.h" -#include "conf_space_capability.h" static LIST_HEAD(capabilities); +struct xen_pcibk_config_capability { + struct list_head cap_list; + + int capability; + + /* If the device has the capability found above, add these fields */ + const struct config_field *fields; +}; static const struct config_field caplist_header[] = { { .offset = PCI_CAP_LIST_ID, .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */ - .u.w.read = pciback_read_config_word, + .u.w.read = xen_pcibk_read_config_word, .u.w.write = NULL, }, {} }; -static inline void register_capability(struct pciback_config_capability *cap) +static inline void register_capability(struct xen_pcibk_config_capability *cap) { list_add_tail(&cap->cap_list, &capabilities); } -int pciback_config_capability_add_fields(struct pci_dev *dev) +int xen_pcibk_config_capability_add_fields(struct pci_dev *dev) { int err = 0; - struct pciback_config_capability *cap; + struct xen_pcibk_config_capability *cap; int cap_offset; list_for_each_entry(cap, &capabilities, cap_list) { @@ -40,12 +47,12 @@ int pciback_config_capability_add_fields(struct pci_dev *dev) dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n", cap->capability, cap_offset); - err = pciback_config_add_fields_offset(dev, + err = xen_pcibk_config_add_fields_offset(dev, caplist_header, cap_offset); if (err) goto out; - err = pciback_config_add_fields_offset(dev, + err = xen_pcibk_config_add_fields_offset(dev, cap->fields, cap_offset); if (err) @@ -57,10 +64,144 @@ out: return err; } -int pciback_config_capability_init(void) +static int vpd_address_write(struct pci_dev *dev, int offset, u16 value, + void *data) +{ + /* Disallow writes to the vital product data */ + if (value & PCI_VPD_ADDR_F) + return PCIBIOS_SET_FAILED; + else + return pci_write_config_word(dev, offset, value); +} + +static const struct config_field caplist_vpd[] = { + { + .offset = PCI_VPD_ADDR, + .size = 2, + .u.w.read = xen_pcibk_read_config_word, + .u.w.write = vpd_address_write, + }, + { + .offset = PCI_VPD_DATA, + .size = 4, + .u.dw.read = xen_pcibk_read_config_dword, + .u.dw.write = NULL, + }, + {} +}; + +static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value, + void *data) +{ + int err; + u16 real_value; + + err = pci_read_config_word(dev, offset, &real_value); + if (err) + goto out; + + *value = real_value & ~PCI_PM_CAP_PME_MASK; + +out: + return err; +} + +/* PM_OK_BITS specifies the bits that the driver domain is allowed to change. + * Can't allow driver domain to enable PMEs - they're shared */ +#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK) + +static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value, + void *data) +{ + int err; + u16 old_value; + pci_power_t new_state, old_state; + + err = pci_read_config_word(dev, offset, &old_value); + if (err) + goto out; + + old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK); + new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); + + new_value &= PM_OK_BITS; + if ((old_value & PM_OK_BITS) != new_value) { + new_value = (old_value & ~PM_OK_BITS) | new_value; + err = pci_write_config_word(dev, offset, new_value); + if (err) + goto out; + } + + /* Let pci core handle the power management change */ + dev_dbg(&dev->dev, "set power state to %x\n", new_state); + err = pci_set_power_state(dev, new_state); + if (err) { + err = PCIBIOS_SET_FAILED; + goto out; + } + + out: + return err; +} + +/* Ensure PMEs are disabled */ +static void *pm_ctrl_init(struct pci_dev *dev, int offset) +{ + int err; + u16 value; + + err = pci_read_config_word(dev, offset, &value); + if (err) + goto out; + + if (value & PCI_PM_CTRL_PME_ENABLE) { + value &= ~PCI_PM_CTRL_PME_ENABLE; + err = pci_write_config_word(dev, offset, value); + } + +out: + return ERR_PTR(err); +} + +static const struct config_field caplist_pm[] = { + { + .offset = PCI_PM_PMC, + .size = 2, + .u.w.read = pm_caps_read, + }, + { + .offset = PCI_PM_CTRL, + .size = 2, + .init = pm_ctrl_init, + .u.w.read = xen_pcibk_read_config_word, + .u.w.write = pm_ctrl_write, + }, + { + .offset = PCI_PM_PPB_EXTENSIONS, + .size = 1, + .u.b.read = xen_pcibk_read_config_byte, + }, + { + .offset = PCI_PM_DATA_REGISTER, + .size = 1, + .u.b.read = xen_pcibk_read_config_byte, + }, + {} +}; + +static struct xen_pcibk_config_capability xen_pcibk_config_capability_pm = { + .capability = PCI_CAP_ID_PM, + .fields = caplist_pm, +}; +static struct xen_pcibk_config_capability xen_pcibk_config_capability_vpd = { + .capability = PCI_CAP_ID_VPD, + .fields = caplist_vpd, +}; + +int xen_pcibk_config_capability_init(void) { - register_capability(&pciback_config_capability_vpd); - register_capability(&pciback_config_capability_pm); + register_capability(&xen_pcibk_config_capability_vpd); + register_capability(&xen_pcibk_config_capability_pm); return 0; } diff --git a/drivers/xen/xen-pciback/conf_space_capability.h b/drivers/xen/xen-pciback/conf_space_capability.h deleted file mode 100644 index 8da3ac415f29..000000000000 --- a/drivers/xen/xen-pciback/conf_space_capability.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * PCI Backend - Data structures for special overlays for structures on - * the capability list. - * - * Author: Ryan Wilson - */ - -#ifndef __PCIBACK_CONFIG_CAPABILITY_H__ -#define __PCIBACK_CONFIG_CAPABILITY_H__ - -#include -#include - -struct pciback_config_capability { - struct list_head cap_list; - - int capability; - - /* If the device has the capability found above, add these fields */ - const struct config_field *fields; -}; - -extern struct pciback_config_capability pciback_config_capability_vpd; -extern struct pciback_config_capability pciback_config_capability_pm; - -#endif diff --git a/drivers/xen/xen-pciback/conf_space_capability_msi.c b/drivers/xen/xen-pciback/conf_space_capability_msi.c deleted file mode 100644 index 6e876b600e66..000000000000 --- a/drivers/xen/xen-pciback/conf_space_capability_msi.c +++ /dev/null @@ -1,142 +0,0 @@ -/* - * PCI Backend -- Configuration overlay for MSI capability - */ -#include -#include -#include "conf_space.h" -#include "conf_space_capability.h" -#include -#include -#include "pciback.h" - -int pciback_enable_msi(struct pciback_device *pdev, - struct pci_dev *dev, struct xen_pci_op *op) -{ - struct pciback_dev_data *dev_data; - int otherend = pdev->xdev->otherend_id; - int status; - - if (unlikely(verbose_request)) - printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev)); - - status = pci_enable_msi(dev); - - if (status) { - printk(KERN_ERR "error enable msi for guest %x status %x\n", - otherend, status); - op->value = 0; - return XEN_PCI_ERR_op_failed; - } - - /* The value the guest needs is actually the IDT vector, not the - * the local domain's IRQ number. */ - - op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; - if (unlikely(verbose_request)) - printk(KERN_DEBUG "pciback: %s: MSI: %d\n", pci_name(dev), - op->value); - - dev_data = pci_get_drvdata(dev); - if (dev_data) - dev_data->ack_intr = 0; - - return 0; -} - -int pciback_disable_msi(struct pciback_device *pdev, - struct pci_dev *dev, struct xen_pci_op *op) -{ - struct pciback_dev_data *dev_data; - - if (unlikely(verbose_request)) - printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev)); - - pci_disable_msi(dev); - - op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; - if (unlikely(verbose_request)) - printk(KERN_DEBUG "pciback: %s: MSI: %d\n", pci_name(dev), - op->value); - dev_data = pci_get_drvdata(dev); - if (dev_data) - dev_data->ack_intr = 1; - return 0; -} - -int pciback_enable_msix(struct pciback_device *pdev, - struct pci_dev *dev, struct xen_pci_op *op) -{ - struct pciback_dev_data *dev_data; - int i, result; - struct msix_entry *entries; - - if (unlikely(verbose_request)) - printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", - pci_name(dev)); - - if (op->value > SH_INFO_MAX_VEC) - return -EINVAL; - - entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); - if (entries == NULL) - return -ENOMEM; - - for (i = 0; i < op->value; i++) { - entries[i].entry = op->msix_entries[i].entry; - entries[i].vector = op->msix_entries[i].vector; - } - - result = pci_enable_msix(dev, entries, op->value); - - if (result == 0) { - for (i = 0; i < op->value; i++) { - op->msix_entries[i].entry = entries[i].entry; - if (entries[i].vector) - op->msix_entries[i].vector = - xen_pirq_from_irq(entries[i].vector); - if (unlikely(verbose_request)) - printk(KERN_DEBUG "pciback: %s: " \ - "MSI-X[%d]: %d\n", - pci_name(dev), i, - op->msix_entries[i].vector); - } - } else { - printk(KERN_WARNING "pciback: %s: failed to enable MSI-X: err %d!\n", - pci_name(dev), result); - } - kfree(entries); - - op->value = result; - dev_data = pci_get_drvdata(dev); - if (dev_data) - dev_data->ack_intr = 0; - - return result; -} - -int pciback_disable_msix(struct pciback_device *pdev, - struct pci_dev *dev, struct xen_pci_op *op) -{ - struct pciback_dev_data *dev_data; - - if (unlikely(verbose_request)) - printk(KERN_DEBUG "pciback: %s: disable MSI-X\n", - pci_name(dev)); - - pci_disable_msix(dev); - - /* - * SR-IOV devices (which don't have any legacy IRQ) have - * an undefined IRQ value of zero. - */ - op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; - if (unlikely(verbose_request)) - printk(KERN_DEBUG "pciback: %s: MSI-X: %d\n", pci_name(dev), - op->value); - dev_data = pci_get_drvdata(dev); - if (dev_data) - dev_data->ack_intr = 1; - - return 0; -} - diff --git a/drivers/xen/xen-pciback/conf_space_capability_pm.c b/drivers/xen/xen-pciback/conf_space_capability_pm.c deleted file mode 100644 index 04426165a9e5..000000000000 --- a/drivers/xen/xen-pciback/conf_space_capability_pm.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * PCI Backend - Configuration space overlay for power management - * - * Author: Ryan Wilson - */ - -#include -#include "conf_space.h" -#include "conf_space_capability.h" - -static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value, - void *data) -{ - int err; - u16 real_value; - - err = pci_read_config_word(dev, offset, &real_value); - if (err) - goto out; - - *value = real_value & ~PCI_PM_CAP_PME_MASK; - -out: - return err; -} - -/* PM_OK_BITS specifies the bits that the driver domain is allowed to change. - * Can't allow driver domain to enable PMEs - they're shared */ -#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK) - -static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value, - void *data) -{ - int err; - u16 old_value; - pci_power_t new_state, old_state; - - err = pci_read_config_word(dev, offset, &old_value); - if (err) - goto out; - - old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK); - new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); - - new_value &= PM_OK_BITS; - if ((old_value & PM_OK_BITS) != new_value) { - new_value = (old_value & ~PM_OK_BITS) | new_value; - err = pci_write_config_word(dev, offset, new_value); - if (err) - goto out; - } - - /* Let pci core handle the power management change */ - dev_dbg(&dev->dev, "set power state to %x\n", new_state); - err = pci_set_power_state(dev, new_state); - if (err) { - err = PCIBIOS_SET_FAILED; - goto out; - } - - out: - return err; -} - -/* Ensure PMEs are disabled */ -static void *pm_ctrl_init(struct pci_dev *dev, int offset) -{ - int err; - u16 value; - - err = pci_read_config_word(dev, offset, &value); - if (err) - goto out; - - if (value & PCI_PM_CTRL_PME_ENABLE) { - value &= ~PCI_PM_CTRL_PME_ENABLE; - err = pci_write_config_word(dev, offset, value); - } - -out: - return ERR_PTR(err); -} - -static const struct config_field caplist_pm[] = { - { - .offset = PCI_PM_PMC, - .size = 2, - .u.w.read = pm_caps_read, - }, - { - .offset = PCI_PM_CTRL, - .size = 2, - .init = pm_ctrl_init, - .u.w.read = pciback_read_config_word, - .u.w.write = pm_ctrl_write, - }, - { - .offset = PCI_PM_PPB_EXTENSIONS, - .size = 1, - .u.b.read = pciback_read_config_byte, - }, - { - .offset = PCI_PM_DATA_REGISTER, - .size = 1, - .u.b.read = pciback_read_config_byte, - }, - {} -}; - -struct pciback_config_capability pciback_config_capability_pm = { - .capability = PCI_CAP_ID_PM, - .fields = caplist_pm, -}; diff --git a/drivers/xen/xen-pciback/conf_space_capability_vpd.c b/drivers/xen/xen-pciback/conf_space_capability_vpd.c deleted file mode 100644 index e7b4d662b53d..000000000000 --- a/drivers/xen/xen-pciback/conf_space_capability_vpd.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * PCI Backend - Configuration space overlay for Vital Product Data - * - * Author: Ryan Wilson - */ - -#include -#include "conf_space.h" -#include "conf_space_capability.h" - -static int vpd_address_write(struct pci_dev *dev, int offset, u16 value, - void *data) -{ - /* Disallow writes to the vital product data */ - if (value & PCI_VPD_ADDR_F) - return PCIBIOS_SET_FAILED; - else - return pci_write_config_word(dev, offset, value); -} - -static const struct config_field caplist_vpd[] = { - { - .offset = PCI_VPD_ADDR, - .size = 2, - .u.w.read = pciback_read_config_word, - .u.w.write = vpd_address_write, - }, - { - .offset = PCI_VPD_DATA, - .size = 4, - .u.dw.read = pciback_read_config_dword, - .u.dw.write = NULL, - }, - {} -}; - -struct pciback_config_capability pciback_config_capability_vpd = { - .capability = PCI_CAP_ID_VPD, - .fields = caplist_vpd, -}; diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index 22ad0f560669..da3cbdfcb5dc 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -15,6 +15,7 @@ struct pci_bar_info { int which; }; +#define DRV_NAME "xen-pciback" #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) @@ -23,7 +24,7 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) int i; int ret; - ret = pciback_read_config_word(dev, offset, value, data); + ret = xen_pcibk_read_config_word(dev, offset, value, data); if (!atomic_read(&dev->enable_cnt)) return ret; @@ -39,13 +40,13 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) { - struct pciback_dev_data *dev_data; + struct xen_pcibk_dev_data *dev_data; int err; dev_data = pci_get_drvdata(dev); if (!pci_is_enabled(dev) && is_enable_cmd(value)) { if (unlikely(verbose_request)) - printk(KERN_DEBUG "pciback: %s: enable\n", + printk(KERN_DEBUG DRV_NAME ": %s: enable\n", pci_name(dev)); err = pci_enable_device(dev); if (err) @@ -54,7 +55,7 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) dev_data->enable_intx = 1; } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) { if (unlikely(verbose_request)) - printk(KERN_DEBUG "pciback: %s: disable\n", + printk(KERN_DEBUG DRV_NAME ": %s: disable\n", pci_name(dev)); pci_disable_device(dev); if (dev_data) @@ -63,7 +64,7 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) if (!dev->is_busmaster && is_master_cmd(value)) { if (unlikely(verbose_request)) - printk(KERN_DEBUG "pciback: %s: set bus master\n", + printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n", pci_name(dev)); pci_set_master(dev); } @@ -71,12 +72,12 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) if (value & PCI_COMMAND_INVALIDATE) { if (unlikely(verbose_request)) printk(KERN_DEBUG - "pciback: %s: enable memory-write-invalidate\n", + DRV_NAME ": %s: enable memory-write-invalidate\n", pci_name(dev)); err = pci_set_mwi(dev); if (err) { printk(KERN_WARNING - "pciback: %s: cannot enable " + DRV_NAME ": %s: cannot enable " "memory-write-invalidate (%d)\n", pci_name(dev), err); value &= ~PCI_COMMAND_INVALIDATE; @@ -91,7 +92,7 @@ static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data) struct pci_bar_info *bar = data; if (unlikely(!bar)) { - printk(KERN_WARNING "pciback: driver data not found for %s\n", + printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n", pci_name(dev)); return XEN_PCI_ERR_op_failed; } @@ -125,7 +126,7 @@ static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data) struct pci_bar_info *bar = data; if (unlikely(!bar)) { - printk(KERN_WARNING "pciback: driver data not found for %s\n", + printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n", pci_name(dev)); return XEN_PCI_ERR_op_failed; } @@ -153,7 +154,7 @@ static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data) struct pci_bar_info *bar = data; if (unlikely(!bar)) { - printk(KERN_WARNING "pciback: driver data not found for %s\n", + printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n", pci_name(dev)); return XEN_PCI_ERR_op_failed; } @@ -227,7 +228,7 @@ static void bar_release(struct pci_dev *dev, int offset, void *data) kfree(data); } -static int pciback_read_vendor(struct pci_dev *dev, int offset, +static int xen_pcibk_read_vendor(struct pci_dev *dev, int offset, u16 *value, void *data) { *value = dev->vendor; @@ -235,7 +236,7 @@ static int pciback_read_vendor(struct pci_dev *dev, int offset, return 0; } -static int pciback_read_device(struct pci_dev *dev, int offset, +static int xen_pcibk_read_device(struct pci_dev *dev, int offset, u16 *value, void *data) { *value = dev->device; @@ -272,12 +273,12 @@ static const struct config_field header_common[] = { { .offset = PCI_VENDOR_ID, .size = 2, - .u.w.read = pciback_read_vendor, + .u.w.read = xen_pcibk_read_vendor, }, { .offset = PCI_DEVICE_ID, .size = 2, - .u.w.read = pciback_read_device, + .u.w.read = xen_pcibk_read_device, }, { .offset = PCI_COMMAND, @@ -293,24 +294,24 @@ static const struct config_field header_common[] = { { .offset = PCI_INTERRUPT_PIN, .size = 1, - .u.b.read = pciback_read_config_byte, + .u.b.read = xen_pcibk_read_config_byte, }, { /* Any side effects of letting driver domain control cache line? */ .offset = PCI_CACHE_LINE_SIZE, .size = 1, - .u.b.read = pciback_read_config_byte, - .u.b.write = pciback_write_config_byte, + .u.b.read = xen_pcibk_read_config_byte, + .u.b.write = xen_pcibk_write_config_byte, }, { .offset = PCI_LATENCY_TIMER, .size = 1, - .u.b.read = pciback_read_config_byte, + .u.b.read = xen_pcibk_read_config_byte, }, { .offset = PCI_BIST, .size = 1, - .u.b.read = pciback_read_config_byte, + .u.b.read = xen_pcibk_read_config_byte, .u.b.write = bist_write, }, {} @@ -356,26 +357,26 @@ static const struct config_field header_1[] = { {} }; -int pciback_config_header_add_fields(struct pci_dev *dev) +int xen_pcibk_config_header_add_fields(struct pci_dev *dev) { int err; - err = pciback_config_add_fields(dev, header_common); + err = xen_pcibk_config_add_fields(dev, header_common); if (err) goto out; switch (dev->hdr_type) { case PCI_HEADER_TYPE_NORMAL: - err = pciback_config_add_fields(dev, header_0); + err = xen_pcibk_config_add_fields(dev, header_0); break; case PCI_HEADER_TYPE_BRIDGE: - err = pciback_config_add_fields(dev, header_1); + err = xen_pcibk_config_add_fields(dev, header_1); break; default: err = -EINVAL; - printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n", + printk(KERN_ERR DRV_NAME ": %s: Unsupported header type %d!\n", pci_name(dev), dev->hdr_type); break; } diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c index 45c31fb391ec..921a889e65eb 100644 --- a/drivers/xen/xen-pciback/conf_space_quirks.c +++ b/drivers/xen/xen-pciback/conf_space_quirks.c @@ -11,8 +11,8 @@ #include "conf_space.h" #include "conf_space_quirks.h" -LIST_HEAD(pciback_quirks); - +LIST_HEAD(xen_pcibk_quirks); +#define DRV_NAME "xen-pciback" static inline const struct pci_device_id * match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) { @@ -27,29 +27,29 @@ match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) return NULL; } -struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev) +static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev) { - struct pciback_config_quirk *tmp_quirk; + struct xen_pcibk_config_quirk *tmp_quirk; - list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list) + list_for_each_entry(tmp_quirk, &xen_pcibk_quirks, quirks_list) if (match_one_device(&tmp_quirk->devid, dev) != NULL) goto out; tmp_quirk = NULL; - printk(KERN_DEBUG - "quirk didn't match any device pciback knows about\n"); + printk(KERN_DEBUG DRV_NAME + ":quirk didn't match any device xen_pciback knows about\n"); out: return tmp_quirk; } -static inline void register_quirk(struct pciback_config_quirk *quirk) +static inline void register_quirk(struct xen_pcibk_config_quirk *quirk) { - list_add_tail(&quirk->quirks_list, &pciback_quirks); + list_add_tail(&quirk->quirks_list, &xen_pcibk_quirks); } -int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg) +int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg) { int ret = 0; - struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); struct config_field_entry *cfg_entry; list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { @@ -61,38 +61,38 @@ int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg) return ret; } -int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field +int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field *field) { int err = 0; switch (field->size) { case 1: - field->u.b.read = pciback_read_config_byte; - field->u.b.write = pciback_write_config_byte; + field->u.b.read = xen_pcibk_read_config_byte; + field->u.b.write = xen_pcibk_write_config_byte; break; case 2: - field->u.w.read = pciback_read_config_word; - field->u.w.write = pciback_write_config_word; + field->u.w.read = xen_pcibk_read_config_word; + field->u.w.write = xen_pcibk_write_config_word; break; case 4: - field->u.dw.read = pciback_read_config_dword; - field->u.dw.write = pciback_write_config_dword; + field->u.dw.read = xen_pcibk_read_config_dword; + field->u.dw.write = xen_pcibk_write_config_dword; break; default: err = -EINVAL; goto out; } - pciback_config_add_field(dev, field); + xen_pcibk_config_add_field(dev, field); out: return err; } -int pciback_config_quirks_init(struct pci_dev *dev) +int xen_pcibk_config_quirks_init(struct pci_dev *dev) { - struct pciback_config_quirk *quirk; + struct xen_pcibk_config_quirk *quirk; int ret = 0; quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC); @@ -116,17 +116,17 @@ out: return ret; } -void pciback_config_field_free(struct config_field *field) +void xen_pcibk_config_field_free(struct config_field *field) { kfree(field); } -int pciback_config_quirk_release(struct pci_dev *dev) +int xen_pcibk_config_quirk_release(struct pci_dev *dev) { - struct pciback_config_quirk *quirk; + struct xen_pcibk_config_quirk *quirk; int ret = 0; - quirk = pciback_find_quirk(dev); + quirk = xen_pcibk_find_quirk(dev); if (!quirk) { ret = -ENXIO; goto out; diff --git a/drivers/xen/xen-pciback/conf_space_quirks.h b/drivers/xen/xen-pciback/conf_space_quirks.h index acd0e1ae8fc5..cfcc517e4570 100644 --- a/drivers/xen/xen-pciback/conf_space_quirks.h +++ b/drivers/xen/xen-pciback/conf_space_quirks.h @@ -11,25 +11,23 @@ #include #include -struct pciback_config_quirk { +struct xen_pcibk_config_quirk { struct list_head quirks_list; struct pci_device_id devid; struct pci_dev *pdev; }; -struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev); - -int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field +int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field *field); -int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg); +int xen_pcibk_config_quirks_remove_field(struct pci_dev *dev, int reg); -int pciback_config_quirks_init(struct pci_dev *dev); +int xen_pcibk_config_quirks_init(struct pci_dev *dev); -void pciback_config_field_free(struct config_field *field); +void xen_pcibk_config_field_free(struct config_field *field); -int pciback_config_quirk_release(struct pci_dev *dev); +int xen_pcibk_config_quirk_release(struct pci_dev *dev); -int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg); +int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg); #endif diff --git a/drivers/xen/xen-pciback/controller.c b/drivers/xen/xen-pciback/controller.c deleted file mode 100644 index 7f04f116daec..000000000000 --- a/drivers/xen/xen-pciback/controller.c +++ /dev/null @@ -1,442 +0,0 @@ -/* - * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. - * Alex Williamson - * - * PCI "Controller" Backend - virtualize PCI bus topology based on PCI - * controllers. Devices under the same PCI controller are exposed on the - * same virtual domain:bus. Within a bus, device slots are virtualized - * to compact the bus. - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#include -#include -#include -#include -#include "pciback.h" - -#define PCI_MAX_BUSSES 255 -#define PCI_MAX_SLOTS 32 - -struct controller_dev_entry { - struct list_head list; - struct pci_dev *dev; - unsigned int devfn; -}; - -struct controller_list_entry { - struct list_head list; - struct pci_controller *controller; - unsigned int domain; - unsigned int bus; - unsigned int next_devfn; - struct list_head dev_list; -}; - -struct controller_dev_data { - struct list_head list; - unsigned int next_domain; - unsigned int next_bus; - spinlock_t lock; -}; - -struct walk_info { - struct pciback_device *pdev; - int resource_count; - int root_num; -}; - -struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, - unsigned int domain, unsigned int bus, - unsigned int devfn) -{ - struct controller_dev_data *dev_data = pdev->pci_dev_data; - struct controller_dev_entry *dev_entry; - struct controller_list_entry *cntrl_entry; - struct pci_dev *dev = NULL; - unsigned long flags; - - spin_lock_irqsave(&dev_data->lock, flags); - - list_for_each_entry(cntrl_entry, &dev_data->list, list) { - if (cntrl_entry->domain != domain || - cntrl_entry->bus != bus) - continue; - - list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) { - if (devfn == dev_entry->devfn) { - dev = dev_entry->dev; - goto found; - } - } - } -found: - spin_unlock_irqrestore(&dev_data->lock, flags); - - return dev; -} - -int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, - int devid, publish_pci_dev_cb publish_cb) -{ - struct controller_dev_data *dev_data = pdev->pci_dev_data; - struct controller_dev_entry *dev_entry; - struct controller_list_entry *cntrl_entry; - struct pci_controller *dev_controller = PCI_CONTROLLER(dev); - unsigned long flags; - int ret = 0, found = 0; - - spin_lock_irqsave(&dev_data->lock, flags); - - /* Look to see if we already have a domain:bus for this controller */ - list_for_each_entry(cntrl_entry, &dev_data->list, list) { - if (cntrl_entry->controller == dev_controller) { - found = 1; - break; - } - } - - if (!found) { - cntrl_entry = kmalloc(sizeof(*cntrl_entry), GFP_ATOMIC); - if (!cntrl_entry) { - ret = -ENOMEM; - goto out; - } - - cntrl_entry->controller = dev_controller; - cntrl_entry->next_devfn = PCI_DEVFN(0, 0); - - cntrl_entry->domain = dev_data->next_domain; - cntrl_entry->bus = dev_data->next_bus++; - if (dev_data->next_bus > PCI_MAX_BUSSES) { - dev_data->next_domain++; - dev_data->next_bus = 0; - } - - INIT_LIST_HEAD(&cntrl_entry->dev_list); - - list_add_tail(&cntrl_entry->list, &dev_data->list); - } - - if (PCI_SLOT(cntrl_entry->next_devfn) > PCI_MAX_SLOTS) { - /* - * While it seems unlikely, this can actually happen if - * a controller has P2P bridges under it. - */ - xenbus_dev_fatal(pdev->xdev, -ENOSPC, "Virtual bus %04x:%02x " - "is full, no room to export %04x:%02x:%02x.%x", - cntrl_entry->domain, cntrl_entry->bus, - pci_domain_nr(dev->bus), dev->bus->number, - PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); - ret = -ENOSPC; - goto out; - } - - dev_entry = kmalloc(sizeof(*dev_entry), GFP_ATOMIC); - if (!dev_entry) { - if (list_empty(&cntrl_entry->dev_list)) { - list_del(&cntrl_entry->list); - kfree(cntrl_entry); - } - ret = -ENOMEM; - goto out; - } - - dev_entry->dev = dev; - dev_entry->devfn = cntrl_entry->next_devfn; - - list_add_tail(&dev_entry->list, &cntrl_entry->dev_list); - - cntrl_entry->next_devfn += PCI_DEVFN(1, 0); - -out: - spin_unlock_irqrestore(&dev_data->lock, flags); - - /* TODO: Publish virtual domain:bus:slot.func here. */ - - return ret; -} - -void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) -{ - struct controller_dev_data *dev_data = pdev->pci_dev_data; - struct controller_list_entry *cntrl_entry; - struct controller_dev_entry *dev_entry = NULL; - struct pci_dev *found_dev = NULL; - unsigned long flags; - - spin_lock_irqsave(&dev_data->lock, flags); - - list_for_each_entry(cntrl_entry, &dev_data->list, list) { - if (cntrl_entry->controller != PCI_CONTROLLER(dev)) - continue; - - list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) { - if (dev_entry->dev == dev) { - found_dev = dev_entry->dev; - break; - } - } - } - - if (!found_dev) { - spin_unlock_irqrestore(&dev_data->lock, flags); - return; - } - - list_del(&dev_entry->list); - kfree(dev_entry); - - if (list_empty(&cntrl_entry->dev_list)) { - list_del(&cntrl_entry->list); - kfree(cntrl_entry); - } - - spin_unlock_irqrestore(&dev_data->lock, flags); - pcistub_put_pci_dev(found_dev); -} - -int pciback_init_devices(struct pciback_device *pdev) -{ - struct controller_dev_data *dev_data; - - dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL); - if (!dev_data) - return -ENOMEM; - - spin_lock_init(&dev_data->lock); - - INIT_LIST_HEAD(&dev_data->list); - - /* Starting domain:bus numbers */ - dev_data->next_domain = 0; - dev_data->next_bus = 0; - - pdev->pci_dev_data = dev_data; - - return 0; -} - -static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data) -{ - struct walk_info *info = data; - struct acpi_resource_address64 addr; - acpi_status status; - int i, len, err; - char str[32], tmp[3]; - unsigned char *ptr, *buf; - - status = acpi_resource_to_address64(res, &addr); - - /* Do we care about this range? Let's check. */ - if (!ACPI_SUCCESS(status) || - !(addr.resource_type == ACPI_MEMORY_RANGE || - addr.resource_type == ACPI_IO_RANGE) || - !addr.address_length || addr.producer_consumer != ACPI_PRODUCER) - return AE_OK; - - /* - * Furthermore, we really only care to tell the guest about - * address ranges that require address translation of some sort. - */ - if (!(addr.resource_type == ACPI_MEMORY_RANGE && - addr.info.mem.translation) && - !(addr.resource_type == ACPI_IO_RANGE && - addr.info.io.translation)) - return AE_OK; - - /* Store the resource in xenbus for the guest */ - len = snprintf(str, sizeof(str), "root-%d-resource-%d", - info->root_num, info->resource_count); - if (unlikely(len >= (sizeof(str) - 1))) - return AE_OK; - - buf = kzalloc((sizeof(*res) * 2) + 1, GFP_KERNEL); - if (!buf) - return AE_OK; - - /* Clean out resource_source */ - res->data.address64.resource_source.index = 0xFF; - res->data.address64.resource_source.string_length = 0; - res->data.address64.resource_source.string_ptr = NULL; - - ptr = (unsigned char *)res; - - /* Turn the acpi_resource into an ASCII byte stream */ - for (i = 0; i < sizeof(*res); i++) { - snprintf(tmp, sizeof(tmp), "%02x", ptr[i]); - strncat(buf, tmp, 2); - } - - err = xenbus_printf(XBT_NIL, info->pdev->xdev->nodename, - str, "%s", buf); - - if (!err) - info->resource_count++; - - kfree(buf); - - return AE_OK; -} - -int pciback_publish_pci_roots(struct pciback_device *pdev, - publish_pci_root_cb publish_root_cb) -{ - struct controller_dev_data *dev_data = pdev->pci_dev_data; - struct controller_list_entry *cntrl_entry; - int i, root_num, len, err = 0; - unsigned int domain, bus; - char str[64]; - struct walk_info info; - - spin_lock(&dev_data->lock); - - list_for_each_entry(cntrl_entry, &dev_data->list, list) { - /* First publish all the domain:bus info */ - err = publish_root_cb(pdev, cntrl_entry->domain, - cntrl_entry->bus); - if (err) - goto out; - - /* - * Now figure out which root-%d this belongs to - * so we can associate resources with it. - */ - err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, - "root_num", "%d", &root_num); - - if (err != 1) - goto out; - - for (i = 0; i < root_num; i++) { - len = snprintf(str, sizeof(str), "root-%d", i); - if (unlikely(len >= (sizeof(str) - 1))) { - err = -ENOMEM; - goto out; - } - - err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, - str, "%x:%x", &domain, &bus); - if (err != 2) - goto out; - - /* Is this the one we just published? */ - if (domain == cntrl_entry->domain && - bus == cntrl_entry->bus) - break; - } - - if (i == root_num) - goto out; - - info.pdev = pdev; - info.resource_count = 0; - info.root_num = i; - - /* Let ACPI do the heavy lifting on decoding resources */ - acpi_walk_resources(cntrl_entry->controller->acpi_handle, - METHOD_NAME__CRS, write_xenbus_resource, - &info); - - /* No resouces. OK. On to the next one */ - if (!info.resource_count) - continue; - - /* Store the number of resources we wrote for this root-%d */ - len = snprintf(str, sizeof(str), "root-%d-resources", i); - if (unlikely(len >= (sizeof(str) - 1))) { - err = -ENOMEM; - goto out; - } - - err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, - "%d", info.resource_count); - if (err) - goto out; - } - - /* Finally, write some magic to synchronize with the guest. */ - len = snprintf(str, sizeof(str), "root-resource-magic"); - if (unlikely(len >= (sizeof(str) - 1))) { - err = -ENOMEM; - goto out; - } - - err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, - "%lx", (sizeof(struct acpi_resource) * 2) + 1); - -out: - spin_unlock(&dev_data->lock); - - return err; -} - -void pciback_release_devices(struct pciback_device *pdev) -{ - struct controller_dev_data *dev_data = pdev->pci_dev_data; - struct controller_list_entry *cntrl_entry, *c; - struct controller_dev_entry *dev_entry, *d; - - list_for_each_entry_safe(cntrl_entry, c, &dev_data->list, list) { - list_for_each_entry_safe(dev_entry, d, - &cntrl_entry->dev_list, list) { - list_del(&dev_entry->list); - pcistub_put_pci_dev(dev_entry->dev); - kfree(dev_entry); - } - list_del(&cntrl_entry->list); - kfree(cntrl_entry); - } - - kfree(dev_data); - pdev->pci_dev_data = NULL; -} - -int pciback_get_pcifront_dev(struct pci_dev *pcidev, - struct pciback_device *pdev, - unsigned int *domain, unsigned int *bus, unsigned int *devfn) -{ - struct controller_dev_data *dev_data = pdev->pci_dev_data; - struct controller_dev_entry *dev_entry; - struct controller_list_entry *cntrl_entry; - unsigned long flags; - int found = 0; - spin_lock_irqsave(&dev_data->lock, flags); - - list_for_each_entry(cntrl_entry, &dev_data->list, list) { - list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) { - if ((dev_entry->dev->bus->number == - pcidev->bus->number) && - (dev_entry->dev->devfn == - pcidev->devfn) && - (pci_domain_nr(dev_entry->dev->bus) == - pci_domain_nr(pcidev->bus))) { - found = 1; - *domain = cntrl_entry->domain; - *bus = cntrl_entry->bus; - *devfn = dev_entry->devfn; - goto out; - } - } - } -out: - spin_unlock_irqrestore(&dev_data->lock, flags); - return found; - -} - diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c index 6e3999b997d4..b451cb8dd2ff 100644 --- a/drivers/xen/xen-pciback/passthrough.c +++ b/drivers/xen/xen-pciback/passthrough.c @@ -16,9 +16,9 @@ struct passthrough_dev_data { spinlock_t lock; }; -struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, - unsigned int domain, unsigned int bus, - unsigned int devfn) +struct pci_dev *xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn) { struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry; @@ -41,8 +41,8 @@ struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, return dev; } -int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, - int devid, publish_pci_dev_cb publish_cb) +int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev, + int devid, publish_pci_dev_cb publish_cb) { struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry; @@ -68,7 +68,8 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, return err; } -void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) +void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev) { struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry, *t; @@ -91,7 +92,7 @@ void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) pcistub_put_pci_dev(found_dev); } -int pciback_init_devices(struct pciback_device *pdev) +int xen_pcibk_init_devices(struct xen_pcibk_device *pdev) { struct passthrough_dev_data *dev_data; @@ -108,8 +109,8 @@ int pciback_init_devices(struct pciback_device *pdev) return 0; } -int pciback_publish_pci_roots(struct pciback_device *pdev, - publish_pci_root_cb publish_root_cb) +int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, + publish_pci_root_cb publish_root_cb) { int err = 0; struct passthrough_dev_data *dev_data = pdev->pci_dev_data; @@ -153,7 +154,7 @@ int pciback_publish_pci_roots(struct pciback_device *pdev, return err; } -void pciback_release_devices(struct pciback_device *pdev) +void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) { struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry, *t; @@ -168,11 +169,10 @@ void pciback_release_devices(struct pciback_device *pdev) pdev->pci_dev_data = NULL; } -int pciback_get_pcifront_dev(struct pci_dev *pcidev, - struct pciback_device *pdev, - unsigned int *domain, unsigned int *bus, - unsigned int *devfn) - +int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, + struct xen_pcibk_device *pdev, + unsigned int *domain, unsigned int *bus, + unsigned int *devfn) { *domain = pci_domain_nr(pcidev->bus); *bus = pcidev->bus->number; diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index c4d1071ebbe6..aec214ac0a14 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -21,12 +21,12 @@ #include "conf_space.h" #include "conf_space_quirks.h" -#define DRV_NAME "pciback" +#define DRV_NAME "xen-pciback" static char *pci_devs_to_hide; -wait_queue_head_t aer_wait_queue; -/*Add sem for sync AER handling and pciback remove/reconfigue ops, -* We want to avoid in middle of AER ops, pciback devices is being removed +wait_queue_head_t xen_pcibk_aer_wait_queue; +/*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops, +* We want to avoid in middle of AER ops, xen_pcibk devices is being removed */ static DECLARE_RWSEM(pcistub_sem); module_param_named(hide, pci_devs_to_hide, charp, 0444); @@ -46,7 +46,7 @@ struct pcistub_device { spinlock_t lock; struct pci_dev *dev; - struct pciback_device *pdev;/* non-NULL if struct pci_dev is in use */ + struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */ }; /* Access to pcistub_devices & seized_devices lists and the initialize_devices @@ -95,9 +95,9 @@ static void pcistub_device_release(struct kref *kref) xen_unregister_device_domain_owner(psdev->dev); /* Clean-up the device */ - pciback_reset_device(psdev->dev); - pciback_config_free_dyn_fields(psdev->dev); - pciback_config_free_dev(psdev->dev); + xen_pcibk_reset_device(psdev->dev); + xen_pcibk_config_free_dyn_fields(psdev->dev); + xen_pcibk_config_free_dev(psdev->dev); kfree(pci_get_drvdata(psdev->dev)); pci_set_drvdata(psdev->dev, NULL); @@ -142,7 +142,7 @@ out: return psdev; } -static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev, +static struct pci_dev *pcistub_device_get_pci_dev(struct xen_pcibk_device *pdev, struct pcistub_device *psdev) { struct pci_dev *pci_dev = NULL; @@ -163,7 +163,7 @@ static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev, return pci_dev; } -struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev, +struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev, int domain, int bus, int slot, int func) { @@ -187,7 +187,7 @@ struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev, return found_dev; } -struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev, +struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev) { struct pcistub_device *psdev; @@ -224,15 +224,15 @@ void pcistub_put_pci_dev(struct pci_dev *dev) spin_unlock_irqrestore(&pcistub_devices_lock, flags); /*hold this lock for avoiding breaking link between - * pcistub and pciback when AER is in processing + * pcistub and xen_pcibk when AER is in processing */ down_write(&pcistub_sem); /* Cleanup our device * (so it's ready for the next domain) */ - pciback_reset_device(found_psdev->dev); - pciback_config_free_dyn_fields(found_psdev->dev); - pciback_config_reset_dev(found_psdev->dev); + xen_pcibk_reset_device(found_psdev->dev); + xen_pcibk_config_free_dyn_fields(found_psdev->dev); + xen_pcibk_config_reset_dev(found_psdev->dev); spin_lock_irqsave(&found_psdev->lock, flags); found_psdev->pdev = NULL; @@ -282,13 +282,13 @@ static int __devinit pcistub_match(struct pci_dev *dev) static int __devinit pcistub_init_device(struct pci_dev *dev) { - struct pciback_dev_data *dev_data; + struct xen_pcibk_dev_data *dev_data; int err = 0; dev_dbg(&dev->dev, "initializing...\n"); /* The PCI backend is not intended to be a module (or to work with - * removable PCI devices (yet). If it were, pciback_config_free() + * removable PCI devices (yet). If it were, xen_pcibk_config_free() * would need to be called somewhere to free the memory allocated * here and then to call kfree(pci_get_drvdata(psdev->dev)). */ @@ -308,8 +308,8 @@ static int __devinit pcistub_init_device(struct pci_dev *dev) dev_dbg(&dev->dev, "initializing config\n"); - init_waitqueue_head(&aer_wait_queue); - err = pciback_config_init_dev(dev); + init_waitqueue_head(&xen_pcibk_aer_wait_queue); + err = xen_pcibk_config_init_dev(dev); if (err) goto out; @@ -329,12 +329,12 @@ static int __devinit pcistub_init_device(struct pci_dev *dev) * data is setup before we export) */ dev_dbg(&dev->dev, "reset device\n"); - pciback_reset_device(dev); + xen_pcibk_reset_device(dev); return 0; config_release: - pciback_config_free_dev(dev); + xen_pcibk_config_free_dev(dev); out: pci_set_drvdata(dev, NULL); @@ -354,7 +354,7 @@ static int __init pcistub_init_devices_late(void) unsigned long flags; int err = 0; - pr_debug("pciback: pcistub_init_devices_late\n"); + pr_debug(DRV_NAME ": pcistub_init_devices_late\n"); spin_lock_irqsave(&pcistub_devices_lock, flags); @@ -458,7 +458,7 @@ static void pcistub_remove(struct pci_dev *dev) spin_lock_irqsave(&pcistub_devices_lock, flags); - pciback_config_quirk_release(dev); + xen_pcibk_config_quirk_release(dev); list_for_each_entry(psdev, &pcistub_devices, dev_list) { if (psdev->dev == dev) { @@ -474,17 +474,17 @@ static void pcistub_remove(struct pci_dev *dev) found_psdev->pdev); if (found_psdev->pdev) { - printk(KERN_WARNING "pciback: ****** removing device " + printk(KERN_WARNING DRV_NAME ": ****** removing device " "%s while still in-use! ******\n", pci_name(found_psdev->dev)); - printk(KERN_WARNING "pciback: ****** driver domain may " - "still access this device's i/o resources!\n"); - printk(KERN_WARNING "pciback: ****** shutdown driver " + printk(KERN_WARNING DRV_NAME ": ****** driver domain may" + " still access this device's i/o resources!\n"); + printk(KERN_WARNING DRV_NAME ": ****** shutdown driver " "domain before binding device\n"); - printk(KERN_WARNING "pciback: ****** to other drivers " + printk(KERN_WARNING DRV_NAME ": ****** to other drivers " "or domains\n"); - pciback_release_pci_dev(found_psdev->pdev, + xen_pcibk_release_pci_dev(found_psdev->pdev, found_psdev->dev); } @@ -541,11 +541,12 @@ again: } /* For each aer recovery step error_detected, mmio_enabled, etc, front_end and - * backend need to have cooperation. In pciback, those steps will do similar + * backend need to have cooperation. In xen_pcibk, those steps will do similar * jobs: send service request and waiting for front_end response. */ static pci_ers_result_t common_process(struct pcistub_device *psdev, - pci_channel_state_t state, int aer_cmd, pci_ers_result_t result) + pci_channel_state_t state, int aer_cmd, + pci_ers_result_t result) { pci_ers_result_t res = result; struct xen_pcie_aer_op *aer_op; @@ -557,21 +558,21 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, /*useful for error_detected callback*/ aer_op->err = state; /*pcifront_end BDF*/ - ret = pciback_get_pcifront_dev(psdev->dev, psdev->pdev, + ret = xen_pcibk_get_pcifront_dev(psdev->dev, psdev->pdev, &aer_op->domain, &aer_op->bus, &aer_op->devfn); if (!ret) { dev_err(&psdev->dev->dev, - "pciback: failed to get pcifront device\n"); + DRV_NAME ": failed to get pcifront device\n"); return PCI_ERS_RESULT_NONE; } wmb(); dev_dbg(&psdev->dev->dev, - "pciback: aer_op %x dom %x bus %x devfn %x\n", + DRV_NAME ": aer_op %x dom %x bus %x devfn %x\n", aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn); - /*local flag to mark there's aer request, pciback callback will use this - * flag to judge whether we need to check pci-front give aer service - * ack signal + /*local flag to mark there's aer request, xen_pcibk callback will use + * this flag to judge whether we need to check pci-front give aer + * service ack signal */ set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags); @@ -584,8 +585,9 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, wmb(); notify_remote_via_irq(psdev->pdev->evtchn_irq); - ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active, - (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ); + ret = wait_event_timeout(xen_pcibk_aer_wait_queue, + !(test_bit(_XEN_PCIB_active, (unsigned long *) + &psdev->pdev->sh_info->flags)), 300*HZ); if (!ret) { if (test_bit(_XEN_PCIB_active, @@ -603,8 +605,8 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, if (test_bit(_XEN_PCIF_active, (unsigned long *)&psdev->pdev->sh_info->flags)) { dev_dbg(&psdev->dev->dev, - "schedule pci_conf service in pciback\n"); - test_and_schedule_op(psdev->pdev); + "schedule pci_conf service in xen_pcibk\n"); + xen_pcibk_test_and_schedule_op(psdev->pdev); } res = (pci_ers_result_t)aer_op->err; @@ -612,19 +614,19 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, } /* -* pciback_slot_reset: it will send the slot_reset request to pcifront in case +* xen_pcibk_slot_reset: it will send the slot_reset request to pcifront in case * of the device driver could provide this service, and then wait for pcifront * ack. * @dev: pointer to PCI devices * return value is used by aer_core do_recovery policy */ -static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev) +static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev) { struct pcistub_device *psdev; pci_ers_result_t result; result = PCI_ERS_RESULT_RECOVERED; - dev_dbg(&dev->dev, "pciback_slot_reset(bus:%x,devfn:%x)\n", + dev_dbg(&dev->dev, "xen_pcibk_slot_reset(bus:%x,devfn:%x)\n", dev->bus->number, dev->devfn); down_write(&pcistub_sem); @@ -635,12 +637,12 @@ static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev) if (!psdev || !psdev->pdev) { dev_err(&dev->dev, - "pciback device is not found/assigned\n"); + DRV_NAME " device is not found/assigned\n"); goto end; } if (!psdev->pdev->sh_info) { - dev_err(&dev->dev, "pciback device is not connected or owned" + dev_err(&dev->dev, DRV_NAME " device is not connected or owned" " by HVM, kill it\n"); kill_domain_by_device(psdev); goto release; @@ -669,20 +671,20 @@ end: } -/*pciback_mmio_enabled: it will send the mmio_enabled request to pcifront +/*xen_pcibk_mmio_enabled: it will send the mmio_enabled request to pcifront * in case of the device driver could provide this service, and then wait * for pcifront ack * @dev: pointer to PCI devices * return value is used by aer_core do_recovery policy */ -static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev) +static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev) { struct pcistub_device *psdev; pci_ers_result_t result; result = PCI_ERS_RESULT_RECOVERED; - dev_dbg(&dev->dev, "pciback_mmio_enabled(bus:%x,devfn:%x)\n", + dev_dbg(&dev->dev, "xen_pcibk_mmio_enabled(bus:%x,devfn:%x)\n", dev->bus->number, dev->devfn); down_write(&pcistub_sem); @@ -693,12 +695,12 @@ static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev) if (!psdev || !psdev->pdev) { dev_err(&dev->dev, - "pciback device is not found/assigned\n"); + DRV_NAME " device is not found/assigned\n"); goto end; } if (!psdev->pdev->sh_info) { - dev_err(&dev->dev, "pciback device is not connected or owned" + dev_err(&dev->dev, DRV_NAME " device is not connected or owned" " by HVM, kill it\n"); kill_domain_by_device(psdev); goto release; @@ -725,7 +727,7 @@ end: return result; } -/*pciback_error_detected: it will send the error_detected request to pcifront +/*xen_pcibk_error_detected: it will send the error_detected request to pcifront * in case of the device driver could provide this service, and then wait * for pcifront ack. * @dev: pointer to PCI devices @@ -733,14 +735,14 @@ end: * return value is used by aer_core do_recovery policy */ -static pci_ers_result_t pciback_error_detected(struct pci_dev *dev, +static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev, pci_channel_state_t error) { struct pcistub_device *psdev; pci_ers_result_t result; result = PCI_ERS_RESULT_CAN_RECOVER; - dev_dbg(&dev->dev, "pciback_error_detected(bus:%x,devfn:%x)\n", + dev_dbg(&dev->dev, "xen_pcibk_error_detected(bus:%x,devfn:%x)\n", dev->bus->number, dev->devfn); down_write(&pcistub_sem); @@ -751,12 +753,12 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev, if (!psdev || !psdev->pdev) { dev_err(&dev->dev, - "pciback device is not found/assigned\n"); + DRV_NAME " device is not found/assigned\n"); goto end; } if (!psdev->pdev->sh_info) { - dev_err(&dev->dev, "pciback device is not connected or owned" + dev_err(&dev->dev, DRV_NAME " device is not connected or owned" " by HVM, kill it\n"); kill_domain_by_device(psdev); goto release; @@ -784,17 +786,17 @@ end: return result; } -/*pciback_error_resume: it will send the error_resume request to pcifront +/*xen_pcibk_error_resume: it will send the error_resume request to pcifront * in case of the device driver could provide this service, and then wait * for pcifront ack. * @dev: pointer to PCI devices */ -static void pciback_error_resume(struct pci_dev *dev) +static void xen_pcibk_error_resume(struct pci_dev *dev) { struct pcistub_device *psdev; - dev_dbg(&dev->dev, "pciback_error_resume(bus:%x,devfn:%x)\n", + dev_dbg(&dev->dev, "xen_pcibk_error_resume(bus:%x,devfn:%x)\n", dev->bus->number, dev->devfn); down_write(&pcistub_sem); @@ -805,12 +807,12 @@ static void pciback_error_resume(struct pci_dev *dev) if (!psdev || !psdev->pdev) { dev_err(&dev->dev, - "pciback device is not found/assigned\n"); + DRV_NAME " device is not found/assigned\n"); goto end; } if (!psdev->pdev->sh_info) { - dev_err(&dev->dev, "pciback device is not connected or owned" + dev_err(&dev->dev, DRV_NAME " device is not connected or owned" " by HVM, kill it\n"); kill_domain_by_device(psdev); goto release; @@ -832,12 +834,12 @@ end: return; } -/*add pciback AER handling*/ -static struct pci_error_handlers pciback_error_handler = { - .error_detected = pciback_error_detected, - .mmio_enabled = pciback_mmio_enabled, - .slot_reset = pciback_slot_reset, - .resume = pciback_error_resume, +/*add xen_pcibk AER handling*/ +static struct pci_error_handlers xen_pcibk_error_handler = { + .error_detected = xen_pcibk_error_detected, + .mmio_enabled = xen_pcibk_mmio_enabled, + .slot_reset = xen_pcibk_slot_reset, + .resume = xen_pcibk_error_resume, }; /* @@ -845,12 +847,14 @@ static struct pci_error_handlers pciback_error_handler = { * for a normal device. I don't want it to be loaded automatically. */ -static struct pci_driver pciback_pci_driver = { - .name = DRV_NAME, +static struct pci_driver xen_pcibk_pci_driver = { + /* The name should be xen_pciback, but until the tools are updated + * we will keep it as pciback. */ + .name = "pciback", .id_table = pcistub_ids, .probe = pcistub_probe, .remove = pcistub_remove, - .err_handler = &pciback_error_handler, + .err_handler = &xen_pcibk_error_handler, }; static inline int str_to_slot(const char *buf, int *domain, int *bus, @@ -899,7 +903,7 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func) pci_dev_id->bus = bus; pci_dev_id->devfn = PCI_DEVFN(slot, func); - pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n", + pr_debug(DRV_NAME ": wants to seize %04x:%02x:%02x.%01x\n", domain, bus, slot, func); spin_lock_irqsave(&device_ids_lock, flags); @@ -929,7 +933,7 @@ static int pcistub_device_id_remove(int domain, int bus, int slot, int func) err = 0; - pr_debug("pciback: removed %04x:%02x:%02x.%01x from " + pr_debug(DRV_NAME ": removed %04x:%02x:%02x.%01x from " "seize list\n", domain, bus, slot, func); } } @@ -965,9 +969,9 @@ static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg, field->init = NULL; field->reset = NULL; field->release = NULL; - field->clean = pciback_config_field_free; + field->clean = xen_pcibk_config_field_free; - err = pciback_config_quirks_add_field(dev, field); + err = xen_pcibk_config_quirks_add_field(dev, field); if (err) kfree(field); out: @@ -1041,7 +1045,7 @@ DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) { struct pcistub_device *psdev; - struct pciback_dev_data *dev_data; + struct xen_pcibk_dev_data *dev_data; size_t count = 0; unsigned long flags; @@ -1073,7 +1077,7 @@ static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, size_t count) { struct pcistub_device *psdev; - struct pciback_dev_data *dev_data; + struct xen_pcibk_dev_data *dev_data; int domain, bus, slot, func; int err = -ENOENT; @@ -1127,13 +1131,13 @@ static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf) { int count = 0; unsigned long flags; - struct pciback_config_quirk *quirk; - struct pciback_dev_data *dev_data; + struct xen_pcibk_config_quirk *quirk; + struct xen_pcibk_dev_data *dev_data; const struct config_field *field; const struct config_field_entry *cfg_entry; spin_lock_irqsave(&device_ids_lock, flags); - list_for_each_entry(quirk, &pciback_quirks, quirks_list) { + list_for_each_entry(quirk, &xen_pcibk_quirks, quirks_list) { if (count >= PAGE_SIZE) goto out; @@ -1175,7 +1179,7 @@ static ssize_t permissive_add(struct device_driver *drv, const char *buf, int domain, bus, slot, func; int err; struct pcistub_device *psdev; - struct pciback_dev_data *dev_data; + struct xen_pcibk_dev_data *dev_data; err = str_to_slot(buf, &domain, &bus, &slot, &func); if (err) goto out; @@ -1213,7 +1217,7 @@ out: static ssize_t permissive_show(struct device_driver *drv, char *buf) { struct pcistub_device *psdev; - struct pciback_dev_data *dev_data; + struct xen_pcibk_dev_data *dev_data; size_t count = 0; unsigned long flags; spin_lock_irqsave(&pcistub_devices_lock, flags); @@ -1237,17 +1241,18 @@ DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add); static void pcistub_exit(void) { - driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot); - driver_remove_file(&pciback_pci_driver.driver, + driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_new_slot); + driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_remove_slot); - driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots); - driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks); - driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive); - driver_remove_file(&pciback_pci_driver.driver, + driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_slots); + driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_quirks); + driver_remove_file(&xen_pcibk_pci_driver.driver, + &driver_attr_permissive); + driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_irq_handlers); - driver_remove_file(&pciback_pci_driver.driver, + driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_irq_handler_state); - pci_unregister_driver(&pciback_pci_driver); + pci_unregister_driver(&xen_pcibk_pci_driver); } static int __init pcistub_init(void) @@ -1286,30 +1291,30 @@ static int __init pcistub_init(void) * first one to get offered PCI devices as they become * available (and thus we can be the first to grab them) */ - err = pci_register_driver(&pciback_pci_driver); + err = pci_register_driver(&xen_pcibk_pci_driver); if (err < 0) goto out; - err = driver_create_file(&pciback_pci_driver.driver, + err = driver_create_file(&xen_pcibk_pci_driver.driver, &driver_attr_new_slot); if (!err) - err = driver_create_file(&pciback_pci_driver.driver, + err = driver_create_file(&xen_pcibk_pci_driver.driver, &driver_attr_remove_slot); if (!err) - err = driver_create_file(&pciback_pci_driver.driver, + err = driver_create_file(&xen_pcibk_pci_driver.driver, &driver_attr_slots); if (!err) - err = driver_create_file(&pciback_pci_driver.driver, + err = driver_create_file(&xen_pcibk_pci_driver.driver, &driver_attr_quirks); if (!err) - err = driver_create_file(&pciback_pci_driver.driver, + err = driver_create_file(&xen_pcibk_pci_driver.driver, &driver_attr_permissive); if (!err) - err = driver_create_file(&pciback_pci_driver.driver, + err = driver_create_file(&xen_pcibk_pci_driver.driver, &driver_attr_irq_handlers); if (!err) - err = driver_create_file(&pciback_pci_driver.driver, + err = driver_create_file(&xen_pcibk_pci_driver.driver, &driver_attr_irq_handler_state); if (err) pcistub_exit(); @@ -1318,7 +1323,7 @@ out: return err; parse_error: - printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n", + printk(KERN_ERR DRV_NAME ": Error parsing pci_devs_to_hide at \"%s\"\n", pci_devs_to_hide + pos); return -EINVAL; } @@ -1326,7 +1331,7 @@ parse_error: #ifndef MODULE /* * fs_initcall happens before device_initcall - * so pciback *should* get called first (b/c we + * so xen_pcibk *should* get called first (b/c we * want to suck up any device before other drivers * get a chance by being the first pci device * driver to register) @@ -1334,14 +1339,14 @@ parse_error: fs_initcall(pcistub_init); #endif -static int __init pciback_init(void) +static int __init xen_pcibk_init(void) { int err; if (!xen_initial_domain()) return -ENODEV; - err = pciback_config_init(); + err = xen_pcibk_config_init(); if (err) return err; @@ -1352,20 +1357,20 @@ static int __init pciback_init(void) #endif pcistub_init_devices_late(); - err = pciback_xenbus_register(); + err = xen_pcibk_xenbus_register(); if (err) pcistub_exit(); return err; } -static void __exit pciback_cleanup(void) +static void __exit xen_pcibk_cleanup(void) { - pciback_xenbus_unregister(); + xen_pcibk_xenbus_unregister(); pcistub_exit(); } -module_init(pciback_init); -module_exit(pciback_cleanup); +module_init(xen_pcibk_init); +module_exit(xen_pcibk_cleanup); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index 5c140200a5ea..427b7fd01356 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -25,25 +25,19 @@ struct pci_dev_entry { #define _PCIB_op_pending (1) #define PCIB_op_pending (1<<(_PCIB_op_pending)) -struct pciback_device { +struct xen_pcibk_device { void *pci_dev_data; spinlock_t dev_lock; - struct xenbus_device *xdev; - struct xenbus_watch be_watch; u8 be_watching; - int evtchn_irq; - struct xen_pci_sharedinfo *sh_info; - unsigned long flags; - struct work_struct op_work; }; -struct pciback_dev_data { +struct xen_pcibk_dev_data { struct list_head config_fields; unsigned int permissive:1; unsigned int warned_on_write:1; @@ -52,91 +46,78 @@ struct pciback_dev_data { unsigned int ack_intr:1; /* .. and ACK-ing */ unsigned long handled; unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */ - char irq_name[0]; /* pciback[000:04:00.0] */ + char irq_name[0]; /* xen-pcibk[000:04:00.0] */ }; -/* Used by XenBus and pciback_ops.c */ -extern wait_queue_head_t aer_wait_queue; -extern struct workqueue_struct *pciback_wq; +/* Used by XenBus and xen_pcibk_ops.c */ +extern wait_queue_head_t xen_pcibk_aer_wait_queue; +extern struct workqueue_struct *xen_pcibk_wq; /* Used by pcistub.c and conf_space_quirks.c */ -extern struct list_head pciback_quirks; +extern struct list_head xen_pcibk_quirks; /* Get/Put PCI Devices that are hidden from the PCI Backend Domain */ -struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev, +struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev, int domain, int bus, int slot, int func); -struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev, +struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev); void pcistub_put_pci_dev(struct pci_dev *dev); /* Ensure a device is turned off or reset */ -void pciback_reset_device(struct pci_dev *pdev); +void xen_pcibk_reset_device(struct pci_dev *pdev); /* Access a virtual configuration space for a PCI device */ -int pciback_config_init(void); -int pciback_config_init_dev(struct pci_dev *dev); -void pciback_config_free_dyn_fields(struct pci_dev *dev); -void pciback_config_reset_dev(struct pci_dev *dev); -void pciback_config_free_dev(struct pci_dev *dev); -int pciback_config_read(struct pci_dev *dev, int offset, int size, - u32 *ret_val); -int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value); +int xen_pcibk_config_init(void); +int xen_pcibk_config_init_dev(struct pci_dev *dev); +void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev); +void xen_pcibk_config_reset_dev(struct pci_dev *dev); +void xen_pcibk_config_free_dev(struct pci_dev *dev); +int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, + u32 *ret_val); +int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, + u32 value); /* Handle requests for specific devices from the frontend */ -typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev, +typedef int (*publish_pci_dev_cb) (struct xen_pcibk_device *pdev, unsigned int domain, unsigned int bus, unsigned int devfn, unsigned int devid); -typedef int (*publish_pci_root_cb) (struct pciback_device *pdev, +typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev, unsigned int domain, unsigned int bus); -int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, - int devid, publish_pci_dev_cb publish_cb); -void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev); -struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, - unsigned int domain, unsigned int bus, - unsigned int devfn); +int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev, + int devid, publish_pci_dev_cb publish_cb); +void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev); +struct pci_dev *xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn); /** -* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback +* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk * before sending aer request to pcifront, so that guest could identify -* device, coopearte with pciback to finish aer recovery job if device driver +* device, coopearte with xen_pcibk to finish aer recovery job if device driver * has the capability */ -int pciback_get_pcifront_dev(struct pci_dev *pcidev, - struct pciback_device *pdev, - unsigned int *domain, unsigned int *bus, - unsigned int *devfn); -int pciback_init_devices(struct pciback_device *pdev); -int pciback_publish_pci_roots(struct pciback_device *pdev, - publish_pci_root_cb cb); -void pciback_release_devices(struct pciback_device *pdev); +int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, + struct xen_pcibk_device *pdev, + unsigned int *domain, unsigned int *bus, + unsigned int *devfn); +int xen_pcibk_init_devices(struct xen_pcibk_device *pdev); +int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, + publish_pci_root_cb cb); +void xen_pcibk_release_devices(struct xen_pcibk_device *pdev); /* Handles events from front-end */ -irqreturn_t pciback_handle_event(int irq, void *dev_id); -void pciback_do_op(struct work_struct *data); +irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); +void xen_pcibk_do_op(struct work_struct *data); -int pciback_xenbus_register(void); -void pciback_xenbus_unregister(void); +int xen_pcibk_xenbus_register(void); +void xen_pcibk_xenbus_unregister(void); -#ifdef CONFIG_PCI_MSI -int pciback_enable_msi(struct pciback_device *pdev, - struct pci_dev *dev, struct xen_pci_op *op); - -int pciback_disable_msi(struct pciback_device *pdev, - struct pci_dev *dev, struct xen_pci_op *op); - - -int pciback_enable_msix(struct pciback_device *pdev, - struct pci_dev *dev, struct xen_pci_op *op); - -int pciback_disable_msix(struct pciback_device *pdev, - struct pci_dev *dev, struct xen_pci_op *op); -#endif extern int verbose_request; -void test_and_schedule_op(struct pciback_device *pdev); +void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev); #endif /* Handles shared IRQs that can to device domain and control domain. */ -void pciback_irq_handler(struct pci_dev *dev, int reset); -irqreturn_t pciback_guest_interrupt(int irq, void *dev_id); +void xen_pcibk_irq_handler(struct pci_dev *dev, int reset); diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 28a2a5584831..8c95c3415b75 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -10,16 +10,19 @@ #include #include "pciback.h" +#define DRV_NAME "xen-pciback" int verbose_request; module_param(verbose_request, int, 0644); +static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id); + /* Ensure a device is has the fake IRQ handler "turned on/off" and is - * ready to be exported. This MUST be run after pciback_reset_device + * ready to be exported. This MUST be run after xen_pcibk_reset_device * which does the actual PCI device enable/disable. */ -void pciback_control_isr(struct pci_dev *dev, int reset) +static void xen_pcibk_control_isr(struct pci_dev *dev, int reset) { - struct pciback_dev_data *dev_data; + struct xen_pcibk_dev_data *dev_data; int rc; int enable = 0; @@ -66,7 +69,7 @@ void pciback_control_isr(struct pci_dev *dev, int reset) if (enable) { rc = request_irq(dev_data->irq, - pciback_guest_interrupt, IRQF_SHARED, + xen_pcibk_guest_interrupt, IRQF_SHARED, dev_data->irq_name, dev); if (rc) { dev_err(&dev->dev, "%s: failed to install fake IRQ " \ @@ -92,14 +95,14 @@ out: } /* Ensure a device is "turned off" and ready to be exported. - * (Also see pciback_config_reset to ensure virtual configuration space is + * (Also see xen_pcibk_config_reset to ensure virtual configuration space is * ready to be re-exported) */ -void pciback_reset_device(struct pci_dev *dev) +void xen_pcibk_reset_device(struct pci_dev *dev) { u16 cmd; - pciback_control_isr(dev, 1 /* reset device */); + xen_pcibk_control_isr(dev, 1 /* reset device */); /* Disable devices (but not bridges) */ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { @@ -126,43 +129,176 @@ void pciback_reset_device(struct pci_dev *dev) } } } + +#ifdef CONFIG_PCI_MSI +static +int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) +{ + struct xen_pcibk_dev_data *dev_data; + int otherend = pdev->xdev->otherend_id; + int status; + + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev)); + + status = pci_enable_msi(dev); + + if (status) { + printk(KERN_ERR "error enable msi for guest %x status %x\n", + otherend, status); + op->value = 0; + return XEN_PCI_ERR_op_failed; + } + + /* The value the guest needs is actually the IDT vector, not the + * the local domain's IRQ number. */ + + op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), + op->value); + + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 0; + + return 0; +} + +static +int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) +{ + struct xen_pcibk_dev_data *dev_data; + + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n", + pci_name(dev)); + pci_disable_msi(dev); + + op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), + op->value); + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 1; + return 0; +} + +static +int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) +{ + struct xen_pcibk_dev_data *dev_data; + int i, result; + struct msix_entry *entries; + + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", + pci_name(dev)); + if (op->value > SH_INFO_MAX_VEC) + return -EINVAL; + + entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); + if (entries == NULL) + return -ENOMEM; + + for (i = 0; i < op->value; i++) { + entries[i].entry = op->msix_entries[i].entry; + entries[i].vector = op->msix_entries[i].vector; + } + + result = pci_enable_msix(dev, entries, op->value); + + if (result == 0) { + for (i = 0; i < op->value; i++) { + op->msix_entries[i].entry = entries[i].entry; + if (entries[i].vector) + op->msix_entries[i].vector = + xen_pirq_from_irq(entries[i].vector); + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: " \ + "MSI-X[%d]: %d\n", + pci_name(dev), i, + op->msix_entries[i].vector); + } + } else { + printk(KERN_WARNING DRV_NAME ": %s: failed to enable MSI-X: err %d!\n", + pci_name(dev), result); + } + kfree(entries); + + op->value = result; + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 0; + + return result; +} + +static +int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) +{ + struct xen_pcibk_dev_data *dev_data; + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n", + pci_name(dev)); + pci_disable_msix(dev); + + /* + * SR-IOV devices (which don't have any legacy IRQ) have + * an undefined IRQ value of zero. + */ + op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev), + op->value); + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 1; + return 0; +} +#endif /* * Now the same evtchn is used for both pcifront conf_read_write request * as well as pcie aer front end ack. We use a new work_queue to schedule -* pciback conf_read_write service for avoiding confict with aer_core +* xen_pcibk conf_read_write service for avoiding confict with aer_core * do_recovery job which also use the system default work_queue */ -void test_and_schedule_op(struct pciback_device *pdev) +void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) { /* Check that frontend is requesting an operation and that we are not * already processing a request */ if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { - queue_work(pciback_wq, &pdev->op_work); + queue_work(xen_pcibk_wq, &pdev->op_work); } /*_XEN_PCIB_active should have been cleared by pcifront. And also make - sure pciback is waiting for ack by checking _PCIB_op_pending*/ + sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) && test_bit(_PCIB_op_pending, &pdev->flags)) { - wake_up(&aer_wait_queue); + wake_up(&xen_pcibk_aer_wait_queue); } } /* Performing the configuration space reads/writes must not be done in atomic * context because some of the pci_* functions can sleep (mostly due to ACPI * use of semaphores). This function is intended to be called from a work - * queue in process context taking a struct pciback_device as a parameter */ + * queue in process context taking a struct xen_pcibk_device as a parameter */ -void pciback_do_op(struct work_struct *data) +void xen_pcibk_do_op(struct work_struct *data) { - struct pciback_device *pdev = - container_of(data, struct pciback_device, op_work); + struct xen_pcibk_device *pdev = + container_of(data, struct xen_pcibk_device, op_work); struct pci_dev *dev; - struct pciback_dev_data *dev_data = NULL; + struct xen_pcibk_dev_data *dev_data = NULL; struct xen_pci_op *op = &pdev->sh_info->op; int test_intx = 0; - dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn); + dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); if (dev == NULL) op->err = XEN_PCI_ERR_dev_not_found; @@ -172,25 +308,25 @@ void pciback_do_op(struct work_struct *data) test_intx = dev_data->enable_intx; switch (op->cmd) { case XEN_PCI_OP_conf_read: - op->err = pciback_config_read(dev, + op->err = xen_pcibk_config_read(dev, op->offset, op->size, &op->value); break; case XEN_PCI_OP_conf_write: - op->err = pciback_config_write(dev, + op->err = xen_pcibk_config_write(dev, op->offset, op->size, op->value); break; #ifdef CONFIG_PCI_MSI case XEN_PCI_OP_enable_msi: - op->err = pciback_enable_msi(pdev, dev, op); + op->err = xen_pcibk_enable_msi(pdev, dev, op); break; case XEN_PCI_OP_disable_msi: - op->err = pciback_disable_msi(pdev, dev, op); + op->err = xen_pcibk_disable_msi(pdev, dev, op); break; case XEN_PCI_OP_enable_msix: - op->err = pciback_enable_msix(pdev, dev, op); + op->err = xen_pcibk_enable_msix(pdev, dev, op); break; case XEN_PCI_OP_disable_msix: - op->err = pciback_disable_msix(pdev, dev, op); + op->err = xen_pcibk_disable_msix(pdev, dev, op); break; #endif default: @@ -201,7 +337,7 @@ void pciback_do_op(struct work_struct *data) if (!op->err && dev && dev_data) { /* Transition detected */ if ((dev_data->enable_intx != test_intx)) - pciback_control_isr(dev, 0 /* no reset */); + xen_pcibk_control_isr(dev, 0 /* no reset */); } /* Tell the driver domain that we're done. */ wmb(); @@ -216,21 +352,21 @@ void pciback_do_op(struct work_struct *data) /* Check to see if the driver domain tried to start another request in * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. */ - test_and_schedule_op(pdev); + xen_pcibk_test_and_schedule_op(pdev); } -irqreturn_t pciback_handle_event(int irq, void *dev_id) +irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) { - struct pciback_device *pdev = dev_id; + struct xen_pcibk_device *pdev = dev_id; - test_and_schedule_op(pdev); + xen_pcibk_test_and_schedule_op(pdev); return IRQ_HANDLED; } -irqreturn_t pciback_guest_interrupt(int irq, void *dev_id) +static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id) { struct pci_dev *dev = (struct pci_dev *)dev_id; - struct pciback_dev_data *dev_data = pci_get_drvdata(dev); + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); if (dev_data->isr_on && dev_data->ack_intr) { dev_data->handled++; diff --git a/drivers/xen/xen-pciback/slot.c b/drivers/xen/xen-pciback/slot.c deleted file mode 100644 index efb922d6f78e..000000000000 --- a/drivers/xen/xen-pciback/slot.c +++ /dev/null @@ -1,191 +0,0 @@ -/* - * PCI Backend - Provides a Virtual PCI bus (with real devices) - * to the frontend - * - * Author: Ryan Wilson (vpci.c) - * Author: Tristan Gingold , from vpci.c - */ - -#include -#include -#include -#include -#include "pciback.h" - -/* There are at most 32 slots in a pci bus. */ -#define PCI_SLOT_MAX 32 - -#define PCI_BUS_NBR 2 - -struct slot_dev_data { - /* Access to dev_list must be protected by lock */ - struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX]; - spinlock_t lock; -}; - -struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, - unsigned int domain, unsigned int bus, - unsigned int devfn) -{ - struct pci_dev *dev = NULL; - struct slot_dev_data *slot_dev = pdev->pci_dev_data; - unsigned long flags; - - if (domain != 0 || PCI_FUNC(devfn) != 0) - return NULL; - - if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR) - return NULL; - - spin_lock_irqsave(&slot_dev->lock, flags); - dev = slot_dev->slots[bus][PCI_SLOT(devfn)]; - spin_unlock_irqrestore(&slot_dev->lock, flags); - - return dev; -} - -int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, - int devid, publish_pci_dev_cb publish_cb) -{ - int err = 0, slot, bus; - struct slot_dev_data *slot_dev = pdev->pci_dev_data; - unsigned long flags; - - if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { - err = -EFAULT; - xenbus_dev_fatal(pdev->xdev, err, - "Can't export bridges on the virtual PCI bus"); - goto out; - } - - spin_lock_irqsave(&slot_dev->lock, flags); - - /* Assign to a new slot on the virtual PCI bus */ - for (bus = 0; bus < PCI_BUS_NBR; bus++) - for (slot = 0; slot < PCI_SLOT_MAX; slot++) { - if (slot_dev->slots[bus][slot] == NULL) { - printk(KERN_INFO - "pciback: slot: %s: assign to virtual " - "slot %d, bus %d\n", - pci_name(dev), slot, bus); - slot_dev->slots[bus][slot] = dev; - goto unlock; - } - } - - err = -ENOMEM; - xenbus_dev_fatal(pdev->xdev, err, - "No more space on root virtual PCI bus"); - -unlock: - spin_unlock_irqrestore(&slot_dev->lock, flags); - - /* Publish this device. */ - if (!err) - err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid); - -out: - return err; -} - -void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) -{ - int slot, bus; - struct slot_dev_data *slot_dev = pdev->pci_dev_data; - struct pci_dev *found_dev = NULL; - unsigned long flags; - - spin_lock_irqsave(&slot_dev->lock, flags); - - for (bus = 0; bus < PCI_BUS_NBR; bus++) - for (slot = 0; slot < PCI_SLOT_MAX; slot++) { - if (slot_dev->slots[bus][slot] == dev) { - slot_dev->slots[bus][slot] = NULL; - found_dev = dev; - goto out; - } - } - -out: - spin_unlock_irqrestore(&slot_dev->lock, flags); - - if (found_dev) - pcistub_put_pci_dev(found_dev); -} - -int pciback_init_devices(struct pciback_device *pdev) -{ - int slot, bus; - struct slot_dev_data *slot_dev; - - slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL); - if (!slot_dev) - return -ENOMEM; - - spin_lock_init(&slot_dev->lock); - - for (bus = 0; bus < PCI_BUS_NBR; bus++) - for (slot = 0; slot < PCI_SLOT_MAX; slot++) - slot_dev->slots[bus][slot] = NULL; - - pdev->pci_dev_data = slot_dev; - - return 0; -} - -int pciback_publish_pci_roots(struct pciback_device *pdev, - publish_pci_root_cb publish_cb) -{ - /* The Virtual PCI bus has only one root */ - return publish_cb(pdev, 0, 0); -} - -void pciback_release_devices(struct pciback_device *pdev) -{ - int slot, bus; - struct slot_dev_data *slot_dev = pdev->pci_dev_data; - struct pci_dev *dev; - - for (bus = 0; bus < PCI_BUS_NBR; bus++) - for (slot = 0; slot < PCI_SLOT_MAX; slot++) { - dev = slot_dev->slots[bus][slot]; - if (dev != NULL) - pcistub_put_pci_dev(dev); - } - - kfree(slot_dev); - pdev->pci_dev_data = NULL; -} - -int pciback_get_pcifront_dev(struct pci_dev *pcidev, - struct pciback_device *pdev, - unsigned int *domain, unsigned int *bus, - unsigned int *devfn) -{ - int slot, busnr; - struct slot_dev_data *slot_dev = pdev->pci_dev_data; - struct pci_dev *dev; - int found = 0; - unsigned long flags; - - spin_lock_irqsave(&slot_dev->lock, flags); - - for (busnr = 0; busnr < PCI_BUS_NBR; bus++) - for (slot = 0; slot < PCI_SLOT_MAX; slot++) { - dev = slot_dev->slots[busnr][slot]; - if (dev && dev->bus->number == pcidev->bus->number - && dev->devfn == pcidev->devfn - && pci_domain_nr(dev->bus) == - pci_domain_nr(pcidev->bus)) { - found = 1; - *domain = 0; - *bus = busnr; - *devfn = PCI_DEVFN(slot, 0); - goto out; - } - } -out: - spin_unlock_irqrestore(&slot_dev->lock, flags); - return found; - -} diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c index 2857ab892f02..7d5c192a1505 100644 --- a/drivers/xen/xen-pciback/vpci.c +++ b/drivers/xen/xen-pciback/vpci.c @@ -12,6 +12,7 @@ #include "pciback.h" #define PCI_SLOT_MAX 32 +#define DRV_NAME "xen-pciback" struct vpci_dev_data { /* Access to dev_list must be protected by lock */ @@ -24,9 +25,9 @@ static inline struct list_head *list_first(struct list_head *head) return head->next; } -struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, - unsigned int domain, unsigned int bus, - unsigned int devfn) +struct pci_dev *xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn) { struct pci_dev_entry *entry; struct pci_dev *dev = NULL; @@ -62,8 +63,8 @@ static inline int match_slot(struct pci_dev *l, struct pci_dev *r) return 0; } -int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, - int devid, publish_pci_dev_cb publish_cb) +int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev, + int devid, publish_pci_dev_cb publish_cb) { int err = 0, slot, func = -1; struct pci_dev_entry *t, *dev_entry; @@ -96,7 +97,7 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, struct pci_dev_entry, list); if (match_slot(dev, t->dev)) { - pr_info("pciback: vpci: %s: " + pr_info(DRV_NAME ": vpci: %s: " "assign to virtual slot %d func %d\n", pci_name(dev), slot, PCI_FUNC(dev->devfn)); @@ -111,8 +112,8 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, /* Assign to a new slot on the virtual PCI bus */ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { if (list_empty(&vpci_dev->dev_list[slot])) { - printk(KERN_INFO - "pciback: vpci: %s: assign to virtual slot %d\n", + printk(KERN_INFO DRV_NAME + ": vpci: %s: assign to virtual slot %d\n", pci_name(dev), slot); list_add_tail(&dev_entry->list, &vpci_dev->dev_list[slot]); @@ -136,7 +137,8 @@ out: return err; } -void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) +void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev) { int slot; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; @@ -165,7 +167,7 @@ out: pcistub_put_pci_dev(found_dev); } -int pciback_init_devices(struct pciback_device *pdev) +int xen_pcibk_init_devices(struct xen_pcibk_device *pdev) { int slot; struct vpci_dev_data *vpci_dev; @@ -184,14 +186,14 @@ int pciback_init_devices(struct pciback_device *pdev) return 0; } -int pciback_publish_pci_roots(struct pciback_device *pdev, - publish_pci_root_cb publish_cb) +int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, + publish_pci_root_cb publish_cb) { /* The Virtual PCI bus has only one root */ return publish_cb(pdev, 0, 0); } -void pciback_release_devices(struct pciback_device *pdev) +void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) { int slot; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; @@ -210,10 +212,10 @@ void pciback_release_devices(struct pciback_device *pdev) pdev->pci_dev_data = NULL; } -int pciback_get_pcifront_dev(struct pci_dev *pcidev, - struct pciback_device *pdev, - unsigned int *domain, unsigned int *bus, - unsigned int *devfn) +int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, + struct xen_pcibk_device *pdev, + unsigned int *domain, unsigned int *bus, + unsigned int *devfn) { struct pci_dev_entry *entry; struct pci_dev *dev = NULL; diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 70030c409212..1e5ba85c0d33 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -14,14 +14,15 @@ #include #include "pciback.h" +#define DRV_NAME "xen-pciback" #define INVALID_EVTCHN_IRQ (-1) -struct workqueue_struct *pciback_wq; +struct workqueue_struct *xen_pcibk_wq; -static struct pciback_device *alloc_pdev(struct xenbus_device *xdev) +static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) { - struct pciback_device *pdev; + struct xen_pcibk_device *pdev; - pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL); + pdev = kzalloc(sizeof(struct xen_pcibk_device), GFP_KERNEL); if (pdev == NULL) goto out; dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); @@ -35,9 +36,9 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev) pdev->evtchn_irq = INVALID_EVTCHN_IRQ; pdev->be_watching = 0; - INIT_WORK(&pdev->op_work, pciback_do_op); + INIT_WORK(&pdev->op_work, xen_pcibk_do_op); - if (pciback_init_devices(pdev)) { + if (xen_pcibk_init_devices(pdev)) { kfree(pdev); pdev = NULL; } @@ -45,7 +46,7 @@ out: return pdev; } -static void pciback_disconnect(struct pciback_device *pdev) +static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev) { spin_lock(&pdev->dev_lock); @@ -60,7 +61,7 @@ static void pciback_disconnect(struct pciback_device *pdev) * before releasing the shared memory */ /* Note, the workqueue does not use spinlocks at all.*/ - flush_workqueue(pciback_wq); + flush_workqueue(xen_pcibk_wq); spin_lock(&pdev->dev_lock); if (pdev->sh_info != NULL) { @@ -71,16 +72,16 @@ static void pciback_disconnect(struct pciback_device *pdev) } -static void free_pdev(struct pciback_device *pdev) +static void free_pdev(struct xen_pcibk_device *pdev) { if (pdev->be_watching) { unregister_xenbus_watch(&pdev->be_watch); pdev->be_watching = 0; } - pciback_disconnect(pdev); + xen_pcibk_disconnect(pdev); - pciback_release_devices(pdev); + xen_pcibk_release_devices(pdev); dev_set_drvdata(&pdev->xdev->dev, NULL); pdev->xdev = NULL; @@ -88,7 +89,7 @@ static void free_pdev(struct pciback_device *pdev) kfree(pdev); } -static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref, +static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, int remote_evtchn) { int err = 0; @@ -110,8 +111,8 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref, spin_unlock(&pdev->dev_lock); err = bind_interdomain_evtchn_to_irqhandler( - pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event, - 0, "pciback", pdev); + pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, + 0, DRV_NAME, pdev); if (err < 0) { xenbus_dev_fatal(pdev->xdev, err, "Error binding event channel to IRQ"); @@ -128,7 +129,7 @@ out: return err; } -static int pciback_attach(struct pciback_device *pdev) +static int xen_pcibk_attach(struct xen_pcibk_device *pdev) { int err = 0; int gnt_ref, remote_evtchn; @@ -161,12 +162,12 @@ static int pciback_attach(struct pciback_device *pdev) if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) { xenbus_dev_fatal(pdev->xdev, -EFAULT, "version mismatch (%s/%s) with pcifront - " - "halting pciback", + "halting xen_pcibk", magic, XEN_PCI_MAGIC); goto out; } - err = pciback_do_attach(pdev, gnt_ref, remote_evtchn); + err = xen_pcibk_do_attach(pdev, gnt_ref, remote_evtchn); if (err) goto out; @@ -185,7 +186,7 @@ out: return err; } -static int pciback_publish_pci_dev(struct pciback_device *pdev, +static int xen_pcibk_publish_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain, unsigned int bus, unsigned int devfn, unsigned int devid) { @@ -207,7 +208,7 @@ out: return err; } -static int pciback_export_device(struct pciback_device *pdev, +static int xen_pcibk_export_device(struct xen_pcibk_device *pdev, int domain, int bus, int slot, int func, int devid) { @@ -228,7 +229,8 @@ static int pciback_export_device(struct pciback_device *pdev, goto out; } - err = pciback_add_pci_dev(pdev, dev, devid, pciback_publish_pci_dev); + err = xen_pcibk_add_pci_dev(pdev, dev, devid, + xen_pcibk_publish_pci_dev); if (err) goto out; @@ -253,7 +255,7 @@ out: return err; } -static int pciback_remove_device(struct pciback_device *pdev, +static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev, int domain, int bus, int slot, int func) { int err = 0; @@ -262,7 +264,7 @@ static int pciback_remove_device(struct pciback_device *pdev, dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n", domain, bus, slot, func); - dev = pciback_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func)); + dev = xen_pcibk_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func)); if (!dev) { err = -EINVAL; dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device " @@ -274,13 +276,13 @@ static int pciback_remove_device(struct pciback_device *pdev, dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id); xen_unregister_device_domain_owner(dev); - pciback_release_pci_dev(pdev, dev); + xen_pcibk_release_pci_dev(pdev, dev); out: return err; } -static int pciback_publish_pci_root(struct pciback_device *pdev, +static int xen_pcibk_publish_pci_root(struct xen_pcibk_device *pdev, unsigned int domain, unsigned int bus) { unsigned int d, b; @@ -340,7 +342,7 @@ out: return err; } -static int pciback_reconfigure(struct pciback_device *pdev) +static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) { int err = 0; int num_devs; @@ -411,14 +413,14 @@ static int pciback_reconfigure(struct pciback_device *pdev) goto out; } - err = pciback_export_device(pdev, domain, bus, slot, + err = xen_pcibk_export_device(pdev, domain, bus, slot, func, i); if (err) goto out; /* Publish pci roots. */ - err = pciback_publish_pci_roots(pdev, - pciback_publish_pci_root); + err = xen_pcibk_publish_pci_roots(pdev, + xen_pcibk_publish_pci_root); if (err) { xenbus_dev_fatal(pdev->xdev, err, "Error while publish PCI root" @@ -465,7 +467,7 @@ static int pciback_reconfigure(struct pciback_device *pdev) goto out; } - err = pciback_remove_device(pdev, domain, bus, slot, + err = xen_pcibk_remove_device(pdev, domain, bus, slot, func); if (err) goto out; @@ -493,20 +495,20 @@ out: return 0; } -static void pciback_frontend_changed(struct xenbus_device *xdev, +static void xen_pcibk_frontend_changed(struct xenbus_device *xdev, enum xenbus_state fe_state) { - struct pciback_device *pdev = dev_get_drvdata(&xdev->dev); + struct xen_pcibk_device *pdev = dev_get_drvdata(&xdev->dev); dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state); switch (fe_state) { case XenbusStateInitialised: - pciback_attach(pdev); + xen_pcibk_attach(pdev); break; case XenbusStateReconfiguring: - pciback_reconfigure(pdev); + xen_pcibk_reconfigure(pdev); break; case XenbusStateConnected: @@ -517,12 +519,12 @@ static void pciback_frontend_changed(struct xenbus_device *xdev, break; case XenbusStateClosing: - pciback_disconnect(pdev); + xen_pcibk_disconnect(pdev); xenbus_switch_state(xdev, XenbusStateClosing); break; case XenbusStateClosed: - pciback_disconnect(pdev); + xen_pcibk_disconnect(pdev); xenbus_switch_state(xdev, XenbusStateClosed); if (xenbus_dev_is_online(xdev)) break; @@ -537,7 +539,7 @@ static void pciback_frontend_changed(struct xenbus_device *xdev, } } -static int pciback_setup_backend(struct pciback_device *pdev) +static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev) { /* Get configuration from xend (if available now) */ int domain, bus, slot, func; @@ -590,7 +592,7 @@ static int pciback_setup_backend(struct pciback_device *pdev) goto out; } - err = pciback_export_device(pdev, domain, bus, slot, func, i); + err = xen_pcibk_export_device(pdev, domain, bus, slot, func, i); if (err) goto out; @@ -612,7 +614,7 @@ static int pciback_setup_backend(struct pciback_device *pdev) } } - err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root); + err = xen_pcibk_publish_pci_roots(pdev, xen_pcibk_publish_pci_root); if (err) { xenbus_dev_fatal(pdev->xdev, err, "Error while publish PCI root buses " @@ -628,20 +630,20 @@ static int pciback_setup_backend(struct pciback_device *pdev) out: if (!err) /* see if pcifront is already configured (if not, we'll wait) */ - pciback_attach(pdev); + xen_pcibk_attach(pdev); return err; } -static void pciback_be_watch(struct xenbus_watch *watch, +static void xen_pcibk_be_watch(struct xenbus_watch *watch, const char **vec, unsigned int len) { - struct pciback_device *pdev = - container_of(watch, struct pciback_device, be_watch); + struct xen_pcibk_device *pdev = + container_of(watch, struct xen_pcibk_device, be_watch); switch (xenbus_read_driver_state(pdev->xdev->nodename)) { case XenbusStateInitWait: - pciback_setup_backend(pdev); + xen_pcibk_setup_backend(pdev); break; default: @@ -649,16 +651,16 @@ static void pciback_be_watch(struct xenbus_watch *watch, } } -static int pciback_xenbus_probe(struct xenbus_device *dev, +static int xen_pcibk_xenbus_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { int err = 0; - struct pciback_device *pdev = alloc_pdev(dev); + struct xen_pcibk_device *pdev = alloc_pdev(dev); if (pdev == NULL) { err = -ENOMEM; xenbus_dev_fatal(dev, err, - "Error allocating pciback_device struct"); + "Error allocating xen_pcibk_device struct"); goto out; } @@ -669,7 +671,7 @@ static int pciback_xenbus_probe(struct xenbus_device *dev, /* watch the backend node for backend configuration information */ err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch, - pciback_be_watch); + xen_pcibk_be_watch); if (err) goto out; @@ -678,15 +680,15 @@ static int pciback_xenbus_probe(struct xenbus_device *dev, /* We need to force a call to our callback here in case * xend already configured us! */ - pciback_be_watch(&pdev->be_watch, NULL, 0); + xen_pcibk_be_watch(&pdev->be_watch, NULL, 0); out: return err; } -static int pciback_xenbus_remove(struct xenbus_device *dev) +static int xen_pcibk_xenbus_remove(struct xenbus_device *dev) { - struct pciback_device *pdev = dev_get_drvdata(&dev->dev); + struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev); if (pdev != NULL) free_pdev(pdev); @@ -699,28 +701,28 @@ static const struct xenbus_device_id xenpci_ids[] = { {""}, }; -static struct xenbus_driver xenbus_pciback_driver = { - .name = "pciback", +static struct xenbus_driver xenbus_xen_pcibk_driver = { + .name = DRV_NAME, .owner = THIS_MODULE, .ids = xenpci_ids, - .probe = pciback_xenbus_probe, - .remove = pciback_xenbus_remove, - .otherend_changed = pciback_frontend_changed, + .probe = xen_pcibk_xenbus_probe, + .remove = xen_pcibk_xenbus_remove, + .otherend_changed = xen_pcibk_frontend_changed, }; -int __init pciback_xenbus_register(void) +int __init xen_pcibk_xenbus_register(void) { - pciback_wq = create_workqueue("pciback_workqueue"); - if (!pciback_wq) { + xen_pcibk_wq = create_workqueue("xen_pciback_workqueue"); + if (!xen_pcibk_wq) { printk(KERN_ERR "%s: create" - "pciback_workqueue failed\n", __func__); + "xen_pciback_workqueue failed\n", __func__); return -EFAULT; } - return xenbus_register_backend(&xenbus_pciback_driver); + return xenbus_register_backend(&xenbus_xen_pcibk_driver); } -void __exit pciback_xenbus_unregister(void) +void __exit xen_pcibk_xenbus_unregister(void) { - destroy_workqueue(pciback_wq); - xenbus_unregister_driver(&xenbus_pciback_driver); + destroy_workqueue(xen_pcibk_wq); + xenbus_unregister_driver(&xenbus_xen_pcibk_driver); } -- cgit v1.2.3 From 778999703db6d875c22e1a8d02c8296ad4648958 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 12 Jul 2011 15:29:48 -0400 Subject: xen/pciback: Remove the DEBUG option. The latter is easily fixed - by the developer compiling the module with -DDEBUG. And during runtime - the loglvl provides quite a lot of useful data. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/Kconfig | 9 --------- drivers/xen/xen-pciback/Makefile | 4 ---- 2 files changed, 13 deletions(-) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 8af0792dfd67..0b6989f92041 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -139,13 +139,4 @@ config XEN_PCIDEV_BACKEND_PASS endchoice -config XEN_PCIDEV_BE_DEBUG - bool "Xen PCI Backend Debugging" - depends on XEN_PCIDEV_BACKEND - default n - help - Allows to observe all of the traffic from the frontend/backend - when reading and writting to the configuration registers. - If in doubt, say no. - endmenu diff --git a/drivers/xen/xen-pciback/Makefile b/drivers/xen/xen-pciback/Makefile index e79c518afc9a..290396766f07 100644 --- a/drivers/xen/xen-pciback/Makefile +++ b/drivers/xen/xen-pciback/Makefile @@ -6,7 +6,3 @@ xen-pciback-y += conf_space.o conf_space_header.o \ conf_space_quirks.o xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o - -ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y) -EXTRA_CFLAGS += -DDEBUG -endif -- cgit v1.2.3 From 2ebdc4263022e0015341016b123fe7f44f9cf396 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 11 Jul 2011 16:49:41 -0400 Subject: xen/pciback: Have 'passthrough' option instead of XEN_PCIDEV_BACKEND_PASS and XEN_PCIDEV_BACKEND_VPCI .. compile options. This way the user can decide during runtime whether they want the default 'vpci' (virtual pci passthrough) or where the PCI devices are passed in without any BDF renumbering. The option 'passthrough' allows the user to toggle the it from 0 (vpci) to 1 (passthrough). Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/Kconfig | 32 ++++-------- drivers/xen/xen-pciback/Makefile | 5 +- drivers/xen/xen-pciback/passthrough.c | 43 ++++++++++------ drivers/xen/xen-pciback/pciback.h | 94 ++++++++++++++++++++++++++++------- drivers/xen/xen-pciback/vpci.c | 43 ++++++++++------ drivers/xen/xen-pciback/xenbus.c | 21 ++++++++ 6 files changed, 166 insertions(+), 72 deletions(-) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 0b6989f92041..9b700b4a987a 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -109,34 +109,22 @@ config XEN_PCIDEV_BACKEND tristate "Xen PCI-device backend driver" depends on PCI && X86 && XEN depends on XEN_BACKEND + default m help The PCI device backend driver allows the kernel to export arbitrary PCI devices to other guests. If you select this to be a module, you will need to make sure no other driver has bound to the device(s) you want to make visible to other guests. -choice - prompt "PCI Backend Mode" - depends on XEN_PCIDEV_BACKEND - -config XEN_PCIDEV_BACKEND_VPCI - bool "Virtual PCI" - help - This PCI Backend hides the true PCI topology and makes the frontend - think there is a single PCI bus with only the exported devices on it. - For example, a device at 03:05.0 will be re-assigned to 00:00.0. A - second device at 02:1a.1 will be re-assigned to 00:01.1. - -config XEN_PCIDEV_BACKEND_PASS - bool "Passthrough" - help - This PCI Backend provides a real view of the PCI topology to the - frontend (for example, a device at 06:01.b will still appear at - 06:01.b to the frontend). This is similar to how Xen 2.0.x exposed - PCI devices to its driver domains. This may be required for drivers - which depend on finding their hardward in certain bus/slot - locations. + The parameter "passthrough" allows you specify how you want the PCI + devices to appear in the guest. You can choose the default (0) where + PCI topology starts at 00.00.0, or (1) for passthrough if you want + the PCI devices topology appear the same as in the host. -endchoice + The "hide" parameter (only applicable if backend driver is compiled + into the kernel) allows you to bind the PCI devices to this module + from the default device drivers. The argument is the list of PCI BDFs: + xen-pciback.hide=(03:00.0)(04:00.0) + If in doubt, say m. endmenu diff --git a/drivers/xen/xen-pciback/Makefile b/drivers/xen/xen-pciback/Makefile index 290396766f07..ffe0ad3438bd 100644 --- a/drivers/xen/xen-pciback/Makefile +++ b/drivers/xen/xen-pciback/Makefile @@ -3,6 +3,5 @@ obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o xen-pciback-y += conf_space.o conf_space_header.o \ conf_space_capability.o \ - conf_space_quirks.o -xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o -xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o + conf_space_quirks.o vpci.o \ + passthrough.o diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c index b451cb8dd2ff..1d32a9a42c01 100644 --- a/drivers/xen/xen-pciback/passthrough.c +++ b/drivers/xen/xen-pciback/passthrough.c @@ -16,9 +16,10 @@ struct passthrough_dev_data { spinlock_t lock; }; -struct pci_dev *xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, - unsigned int domain, unsigned int bus, - unsigned int devfn) +static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, + unsigned int domain, + unsigned int bus, + unsigned int devfn) { struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry; @@ -41,8 +42,9 @@ struct pci_dev *xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, return dev; } -int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev, - int devid, publish_pci_dev_cb publish_cb) +static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev, + int devid, publish_pci_dev_cb publish_cb) { struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry; @@ -68,8 +70,8 @@ int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev, return err; } -void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, - struct pci_dev *dev) +static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev) { struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry, *t; @@ -92,7 +94,7 @@ void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, pcistub_put_pci_dev(found_dev); } -int xen_pcibk_init_devices(struct xen_pcibk_device *pdev) +static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev) { struct passthrough_dev_data *dev_data; @@ -109,8 +111,8 @@ int xen_pcibk_init_devices(struct xen_pcibk_device *pdev) return 0; } -int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, - publish_pci_root_cb publish_root_cb) +static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, + publish_pci_root_cb publish_root_cb) { int err = 0; struct passthrough_dev_data *dev_data = pdev->pci_dev_data; @@ -154,7 +156,7 @@ int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, return err; } -void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) +static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev) { struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry, *t; @@ -169,13 +171,24 @@ void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) pdev->pci_dev_data = NULL; } -int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, - struct xen_pcibk_device *pdev, - unsigned int *domain, unsigned int *bus, - unsigned int *devfn) +static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, + struct xen_pcibk_device *pdev, + unsigned int *domain, unsigned int *bus, + unsigned int *devfn) { *domain = pci_domain_nr(pcidev->bus); *bus = pcidev->bus->number; *devfn = pcidev->devfn; return 1; } + +struct xen_pcibk_backend xen_pcibk_passthrough_backend = { + .name = "passthrough", + .init = __xen_pcibk_init_devices, + .free = __xen_pcibk_release_devices, + .find = __xen_pcibk_get_pcifront_dev, + .publish = __xen_pcibk_publish_pci_roots, + .release = __xen_pcibk_release_pci_dev, + .add = __xen_pcibk_add_pci_dev, + .get = __xen_pcibk_get_pci_dev, +}; diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index 427b7fd01356..a0e131a81503 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -83,30 +83,90 @@ typedef int (*publish_pci_dev_cb) (struct xen_pcibk_device *pdev, unsigned int devfn, unsigned int devid); typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev, unsigned int domain, unsigned int bus); -int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev, - int devid, publish_pci_dev_cb publish_cb); -void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, - struct pci_dev *dev); -struct pci_dev *xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, - unsigned int domain, unsigned int bus, - unsigned int devfn); +/* Backend registration for the two types of BDF representation: + * vpci - BDFs start at 00 + * passthrough - BDFs are exactly like in the host. + */ +struct xen_pcibk_backend { + char *name; + int (*init)(struct xen_pcibk_device *pdev); + void (*free)(struct xen_pcibk_device *pdev); + int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev, + unsigned int *domain, unsigned int *bus, + unsigned int *devfn); + int (*publish)(struct xen_pcibk_device *pdev, publish_pci_root_cb cb); + void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev); + int (*add)(struct xen_pcibk_device *pdev, struct pci_dev *dev, + int devid, publish_pci_dev_cb publish_cb); + struct pci_dev *(*get)(struct xen_pcibk_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn); +}; + +extern struct xen_pcibk_backend xen_pcibk_vpci_backend; +extern struct xen_pcibk_backend xen_pcibk_passthrough_backend; +extern struct xen_pcibk_backend *xen_pcibk_backend; + +static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev, + int devid, + publish_pci_dev_cb publish_cb) +{ + if (xen_pcibk_backend && xen_pcibk_backend->add) + return xen_pcibk_backend->add(pdev, dev, devid, publish_cb); + return -1; +}; +static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev) +{ + if (xen_pcibk_backend && xen_pcibk_backend->free) + return xen_pcibk_backend->release(pdev, dev); +}; + +static inline struct pci_dev * +xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain, + unsigned int bus, unsigned int devfn) +{ + if (xen_pcibk_backend && xen_pcibk_backend->get) + return xen_pcibk_backend->get(pdev, domain, bus, devfn); + return NULL; +}; /** * Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk * before sending aer request to pcifront, so that guest could identify * device, coopearte with xen_pcibk to finish aer recovery job if device driver * has the capability */ - -int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, - struct xen_pcibk_device *pdev, - unsigned int *domain, unsigned int *bus, - unsigned int *devfn); -int xen_pcibk_init_devices(struct xen_pcibk_device *pdev); -int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, - publish_pci_root_cb cb); -void xen_pcibk_release_devices(struct xen_pcibk_device *pdev); - +static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, + struct xen_pcibk_device *pdev, + unsigned int *domain, + unsigned int *bus, + unsigned int *devfn) +{ + if (xen_pcibk_backend && xen_pcibk_backend->find) + return xen_pcibk_backend->find(pcidev, pdev, domain, bus, + devfn); + return -1; +}; +static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev) +{ + if (xen_pcibk_backend && xen_pcibk_backend->init) + return xen_pcibk_backend->init(pdev); + return -1; +}; +static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, + publish_pci_root_cb cb) +{ + if (xen_pcibk_backend && xen_pcibk_backend->publish) + return xen_pcibk_backend->publish(pdev, cb); + return -1; +}; +static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) +{ + if (xen_pcibk_backend && xen_pcibk_backend->free) + return xen_pcibk_backend->free(pdev); +}; /* Handles events from front-end */ irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); void xen_pcibk_do_op(struct work_struct *data); diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c index 7d5c192a1505..4a42cfb0959d 100644 --- a/drivers/xen/xen-pciback/vpci.c +++ b/drivers/xen/xen-pciback/vpci.c @@ -25,9 +25,10 @@ static inline struct list_head *list_first(struct list_head *head) return head->next; } -struct pci_dev *xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, - unsigned int domain, unsigned int bus, - unsigned int devfn) +static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, + unsigned int domain, + unsigned int bus, + unsigned int devfn) { struct pci_dev_entry *entry; struct pci_dev *dev = NULL; @@ -63,8 +64,9 @@ static inline int match_slot(struct pci_dev *l, struct pci_dev *r) return 0; } -int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev, - int devid, publish_pci_dev_cb publish_cb) +static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev, int devid, + publish_pci_dev_cb publish_cb) { int err = 0, slot, func = -1; struct pci_dev_entry *t, *dev_entry; @@ -137,8 +139,8 @@ out: return err; } -void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, - struct pci_dev *dev) +static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev) { int slot; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; @@ -167,7 +169,7 @@ out: pcistub_put_pci_dev(found_dev); } -int xen_pcibk_init_devices(struct xen_pcibk_device *pdev) +static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev) { int slot; struct vpci_dev_data *vpci_dev; @@ -186,14 +188,14 @@ int xen_pcibk_init_devices(struct xen_pcibk_device *pdev) return 0; } -int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, - publish_pci_root_cb publish_cb) +static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, + publish_pci_root_cb publish_cb) { /* The Virtual PCI bus has only one root */ return publish_cb(pdev, 0, 0); } -void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) +static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev) { int slot; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; @@ -212,10 +214,10 @@ void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) pdev->pci_dev_data = NULL; } -int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, - struct xen_pcibk_device *pdev, - unsigned int *domain, unsigned int *bus, - unsigned int *devfn) +static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, + struct xen_pcibk_device *pdev, + unsigned int *domain, unsigned int *bus, + unsigned int *devfn) { struct pci_dev_entry *entry; struct pci_dev *dev = NULL; @@ -244,3 +246,14 @@ int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, spin_unlock_irqrestore(&vpci_dev->lock, flags); return found; } + +struct xen_pcibk_backend xen_pcibk_vpci_backend = { + .name = "vpci", + .init = __xen_pcibk_init_devices, + .free = __xen_pcibk_release_devices, + .find = __xen_pcibk_get_pcifront_dev, + .publish = __xen_pcibk_publish_pci_roots, + .release = __xen_pcibk_release_pci_dev, + .add = __xen_pcibk_add_pci_dev, + .get = __xen_pcibk_get_pci_dev, +}; diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 1e5ba85c0d33..206c4ce030bc 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -18,6 +18,21 @@ #define INVALID_EVTCHN_IRQ (-1) struct workqueue_struct *xen_pcibk_wq; +static int __read_mostly passthrough; +module_param(passthrough, bool, S_IRUGO); +MODULE_PARM_DESC(passthrough, + "Option to specify how to export PCI topology to guest:\n"\ + " 0 - (default) Hide the true PCI topology and makes the frontend\n"\ + " there is a single PCI bus with only the exported devices on it.\n"\ + " For example, a device at 03:05.0 will be re-assigned to 00:00.0\n"\ + " while second device at 02:1a.1 will be re-assigned to 00:01.1.\n"\ + " 1 - Passthrough provides a real view of the PCI topology to the\n"\ + " frontend (for example, a device at 06:01.b will still appear at\n"\ + " 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\ + " exposed PCI devices to its driver domains. This may be required\n"\ + " for drivers which depend on finding their hardward in certain\n"\ + " bus/slot locations."); + static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) { struct xen_pcibk_device *pdev; @@ -710,6 +725,8 @@ static struct xenbus_driver xenbus_xen_pcibk_driver = { .otherend_changed = xen_pcibk_frontend_changed, }; +struct xen_pcibk_backend *xen_pcibk_backend; + int __init xen_pcibk_xenbus_register(void) { xen_pcibk_wq = create_workqueue("xen_pciback_workqueue"); @@ -718,6 +735,10 @@ int __init xen_pcibk_xenbus_register(void) "xen_pciback_workqueue failed\n", __func__); return -EFAULT; } + xen_pcibk_backend = &xen_pcibk_vpci_backend; + if (passthrough) + xen_pcibk_backend = &xen_pcibk_passthrough_backend; + pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name); return xenbus_register_backend(&xenbus_xen_pcibk_driver); } -- cgit v1.2.3