/* * VFIO PCI I/O Port & MMIO access * * Copyright (C) 2012 Red Hat, Inc. All rights reserved. * Author: Alex Williamson * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * Derived from original vfio: * Copyright 2010 Cisco Systems, Inc. All rights reserved. * Author: Tom Lyon, pugs@cisco.com */ #include #include #include #include #include #include #include "vfio_pci_private.h" #ifdef __LITTLE_ENDIAN #define vfio_ioread64 ioread64 #define vfio_iowrite64 iowrite64 #define vfio_ioread32 ioread32 #define vfio_iowrite32 iowrite32 #define vfio_ioread16 ioread16 #define vfio_iowrite16 iowrite16 #else #define vfio_ioread64 ioread64be #define vfio_iowrite64 iowrite64be #define vfio_ioread32 ioread32be #define vfio_iowrite32 iowrite32be #define vfio_ioread16 ioread16be #define vfio_iowrite16 iowrite16be #endif #define vfio_ioread8 ioread8 #define vfio_iowrite8 iowrite8 /* * Read or write from an __iomem region (MMIO or I/O port) with an excluded * range which is inaccessible. The excluded range drops writes and fills * reads with -1. This is intended for handling MSI-X vector tables and * leftover space for ROM BARs. */ static ssize_t do_io_rw(void __iomem *io, char __user *buf, loff_t off, size_t count, size_t x_start, size_t x_end, bool iswrite) { ssize_t done = 0; while (count) { size_t fillable, filled; if (off < x_start) fillable = min(count, (size_t)(x_start - off)); else if (off >= x_end) fillable = count; else fillable = 0; if (fillable >= 4 && !(off % 4)) { u32 val; if (iswrite) { if (copy_from_user(&val, buf, 4)) return -EFAULT; vfio_iowrite32(val, io + off); } else { val = vfio_ioread32(io + off); if (copy_to_user(buf, &val, 4)) return -EFAULT; } filled = 4; } else if (fillable >= 2 && !(off % 2)) { u16 val; if (iswrite) { if (copy_from_user(&val, buf, 2)) return -EFAULT; vfio_iowrite16(val, io + off); } else { val = vfio_ioread16(io + off); if (copy_to_user(buf, &val, 2)) return -EFAULT; } filled = 2; } else if (fillable) { u8 val; if (iswrite) { if (copy_from_user(&val, buf, 1)) return -EFAULT; vfio_iowrite8(val, io + off); } else { val = vfio_ioread8(io + off); if (copy_to_user(buf, &val, 1)) return -EFAULT; } filled = 1; } else { /* Fill reads with -1, drop writes */ filled = min(count, (size_t)(x_end - off)); if (!iswrite) { u8 val = 0xFF; size_t i; for (i = 0; i < filled; i++) if (copy_to_user(buf + i, &val, 1)) return -EFAULT; } } count -= filled; done += filled; off += filled; buf += filled; } return done; } static int vfio_pci_setup_barmap(struct vfio_pci_device *vdev, int bar) { struct pci_dev *pdev = vdev->pdev; int ret; void __iomem *io; if (vdev->barmap[bar]) return 0; ret = pci_request_selected_regions(pdev, 1 << bar, "vfio"); if (ret) return ret; io = pci_iomap(pdev, bar, 0); if (!io) { pci_release_selected_regions(pdev, 1 << bar); return -ENOMEM; } vdev->barmap[bar] = io; return 0; } ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { struct pci_dev *pdev = vdev->pdev; loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos); size_t x_start = 0, x_end = 0; resource_size_t end; void __iomem *io; ssize_t done; if (pci_resource_start(pdev, bar)) end = pci_resource_len(pdev, bar); else if (bar == PCI_ROM_RESOURCE && pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW) end = 0x20000; else return -EINVAL; if (pos >= end) return -EINVAL; count = min(count, (size_t)(end - pos)); if (bar == PCI_ROM_RESOURCE) { /* * The ROM can fill less space than the BAR, so we start the * excluded range at the end of the actual ROM. This makes * filling large ROM BARs much faster. */ io = pci_map_rom(pdev, &x_start); if (!io) return -ENOMEM; x_end = end; } else { int ret = vfio_pci_setup_barmap(vdev, bar); if (ret) return ret; io = vdev->barmap[bar]; } if (bar == vdev->msix_bar) { x_start = vdev->msix_offset; x_end = vdev->msix_offset + vdev->msix_size; } done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite); if (done >= 0) *ppos += done; if (bar == PCI_ROM_RESOURCE) pci_unmap_rom(pdev, io); return done; } ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { int ret; loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK; void __iomem *iomem = NULL; unsigned int rsrc; bool is_ioport; ssize_t done; if (!vdev->has_vga) return -EINVAL; if (pos > 0xbfffful) return -EINVAL; switch ((u32)pos) { case 0xa0000 ... 0xbffff: count = min(count, (size_t)(0xc0000 - pos)); iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1); off = pos - 0xa0000; rsrc = VGA_RSRC_LEGACY_MEM; is_ioport = false; break; case 0x3b0 ... 0x3bb: count = min(count, (size_t)(0x3bc - pos)); iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1); off = pos - 0x3b0; rsrc = VGA_RSRC_LEGACY_IO; is_ioport = true; break; case 0x3c0 ... 0x3df: count = min(count, (size_t)(0x3e0 - pos)); iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1); off = pos - 0x3c0; rsrc = VGA_RSRC_LEGACY_IO; is_ioport = true; break; default: return -EINVAL; } if (!iomem) return -ENOMEM; ret = vga_get_interruptible(vdev->pdev, rsrc); if (ret) { is_ioport ? ioport_unmap(iomem) : iounmap(iomem); return ret; } done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite); vga_put(vdev->pdev, rsrc); is_ioport ? ioport_unmap(iomem) : iounmap(iomem); if (done >= 0) *ppos += done; return done; } static int vfio_pci_ioeventfd_handler(void *opaque, void *unused) { struct vfio_pci_ioeventfd *ioeventfd = opaque; switch (ioeventfd->count) { case 1: vfio_iowrite8(ioeventfd->data, ioeventfd->addr); break; case 2: vfio_iowrite16(ioeventfd->data, ioeventfd->addr); break; case 4: vfio_iowrite32(ioeventfd->data, ioeventfd->addr); break; #ifdef iowrite64 case 8: vfio_iowrite64(ioeventfd->data, ioeventfd->addr); break; #endif } return 0; } long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset, uint64_t data, int count, int fd) { struct pci_dev *pdev = vdev->pdev; loff_t pos = offset & VFIO_PCI_OFFSET_MASK; int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset); struct vfio_pci_ioeventfd *ioeventfd; /* Only support ioeventfds into BARs */ if (bar > VFIO_PCI_BAR5_REGION_INDEX) return -EINVAL; if (pos + count > pci_resource_len(pdev, bar)) return -EINVAL; /* Disallow ioeventfds working around MSI-X table writes */ if (bar == vdev->msix_bar && !(pos + count <= vdev->msix_offset || pos >= vdev->msix_offset + vdev->msix_size)) return -EINVAL; #ifndef iowrite64 if (count == 8) return -EINVAL; #endif ret = vfio_pci_setup_barmap(vdev, bar); if (ret) return ret; mutex_lock(&vdev->ioeventfds_lock); list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) { if (ioeventfd->pos == pos && ioeventfd->bar == bar && ioeventfd->data == data && ioeventfd->count == count) { if (fd == -1) { vfio_virqfd_disable(&ioeventfd->virqfd); list_del(&ioeventfd->next); vdev->ioeventfds_nr--; kfree(ioeventfd); ret = 0; } else ret = -EEXIST; goto out_unlock; } } if (fd < 0) { ret = -ENODEV; goto out_unlock; } if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) { ret = -ENOSPC; goto out_unlock; } ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL); if (!ioeventfd) { ret = -ENOMEM; goto out_unlock; } ioeventfd->addr = vdev->barmap[bar] + pos; ioeventfd->data = data; ioeventfd->pos = pos; ioeventfd->bar = bar; ioeventfd->count = count; ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler, NULL, NULL, &ioeventfd->virqfd, fd); if (ret) { kfree(ioeventfd); goto out_unlock; } list_add(&ioeventfd->next, &vdev->ioeventfds_list); vdev->ioeventfds_nr++; out_unlock: mutex_unlock(&vdev->ioeventfds_lock); return ret; }