diff options
author | Bjorn Helgaas <bhelgaas@google.com> | 2019-09-23 16:10:12 -0500 |
---|---|---|
committer | Bjorn Helgaas <bhelgaas@google.com> | 2019-09-23 16:10:12 -0500 |
commit | 63fa8437cbae701c163f692bce6ad6e847910e49 (patch) | |
tree | f72faaf618857ad33128c1c3dc3e580f450227d2 /drivers | |
parent | 3ddbff3676bea1fbd6c1b70e12416e17aaf91a66 (diff) | |
parent | 27235b15e4197b3a1014cf0ca0b08dbbf61a692b (diff) | |
download | linux-63fa8437cbae701c163f692bce6ad6e847910e49.tar.bz2 |
Merge branch 'pci/p2pdma'
- Move P2PCMA PCI bus offset from generic dev_pagemap to
pci_p2pdma_pagemap (Logan Gunthorpe)
- Add provider's pci_dev to pci_p2pdma_pagemap (Logan Gunthorpe)
- Apply host bridge whitelist for ACS (Logan Gunthorpe)
- Whitelist some Intel host bridges for P2PDMA (Logan Gunthorpe)
- Add attrs to pci_p2pdma_map_sg() to match dma_map_sg() (Logan
Gunthorpe)
- Add pci_p2pdma_unmap_sg() (Logan Gunthorpe)
- Store P2PDMA mapping method in xarray (Logan Gunthorpe)
- Map requests that traverse a host bridge (Logan Gunthorpe)
- Allow IOMMU for host bridge whitelist (Logan Gunthorpe)
* pci/p2pdma:
PCI/P2PDMA: Update pci_p2pdma_distance_many() documentation
PCI/P2PDMA: Allow IOMMU for host bridge whitelist
PCI/P2PDMA: dma_map() requests that traverse the host bridge
PCI/P2PDMA: Store mapping method in an xarray
PCI/P2PDMA: Factor out __pci_p2pdma_map_sg()
PCI/P2PDMA: Introduce pci_p2pdma_unmap_sg()
PCI/P2PDMA: Add attrs argument to pci_p2pdma_map_sg()
PCI/P2PDMA: Whitelist some Intel host bridges
PCI/P2PDMA: Factor out host_bridge_whitelist()
PCI/P2PDMA: Apply host bridge whitelist for ACS
PCI/P2PDMA: Factor out __upstream_bridge_distance()
PCI/P2PDMA: Add constants for map type results to upstream_bridge_distance()
PCI/P2PDMA: Add provider's pci_dev to pci_p2pdma_pagemap struct
PCI/P2PDMA: Introduce private pagemap structure
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/infiniband/core/rw.c | 6 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 10 | ||||
-rw-r--r-- | drivers/pci/p2pdma.c | 374 |
3 files changed, 276 insertions, 114 deletions
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index dce06108c8c3..5337393d4dfe 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -583,8 +583,10 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, break; } - /* P2PDMA contexts do not need to be unmapped */ - if (!is_pci_p2pdma_page(sg_page(sg))) + if (is_pci_p2pdma_page(sg_page(sg))) + pci_p2pdma_unmap_sg(qp->pd->device->dma_device, sg, + sg_cnt, dir); + else ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); } EXPORT_SYMBOL(rdma_rw_ctx_destroy); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index bb970ca82517..2348b15f6bd0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -547,8 +547,10 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) WARN_ON_ONCE(!iod->nents); - /* P2PDMA requests do not need to be unmapped */ - if (!is_pci_p2pdma_page(sg_page(iod->sg))) + if (is_pci_p2pdma_page(sg_page(iod->sg))) + pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents, + rq_dma_dir(req)); + else dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req)); @@ -832,8 +834,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, goto out; if (is_pci_p2pdma_page(sg_page(iod->sg))) - nr_mapped = pci_p2pdma_map_sg(dev->dev, iod->sg, iod->nents, - rq_dma_dir(req)); + nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg, + iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN); else nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN); diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 234476226529..0608aae72ccc 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -18,13 +18,32 @@ #include <linux/percpu-refcount.h> #include <linux/random.h> #include <linux/seq_buf.h> -#include <linux/iommu.h> +#include <linux/xarray.h> + +enum pci_p2pdma_map_type { + PCI_P2PDMA_MAP_UNKNOWN = 0, + PCI_P2PDMA_MAP_NOT_SUPPORTED, + PCI_P2PDMA_MAP_BUS_ADDR, + PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, +}; struct pci_p2pdma { struct gen_pool *pool; bool p2pmem_published; + struct xarray map_types; }; +struct pci_p2pdma_pagemap { + struct dev_pagemap pgmap; + struct pci_dev *provider; + u64 bus_offset; +}; + +static struct pci_p2pdma_pagemap *to_p2p_pgmap(struct dev_pagemap *pgmap) +{ + return container_of(pgmap, struct pci_p2pdma_pagemap, pgmap); +} + static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -87,6 +106,7 @@ static void pci_p2pdma_release(void *data) gen_pool_destroy(p2pdma->pool); sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group); + xa_destroy(&p2pdma->map_types); } static int pci_p2pdma_setup(struct pci_dev *pdev) @@ -98,6 +118,8 @@ static int pci_p2pdma_setup(struct pci_dev *pdev) if (!p2p) return -ENOMEM; + xa_init(&p2p->map_types); + p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev)); if (!p2p->pool) goto out; @@ -135,6 +157,7 @@ out: int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, u64 offset) { + struct pci_p2pdma_pagemap *p2p_pgmap; struct dev_pagemap *pgmap; void *addr; int error; @@ -157,14 +180,18 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, return error; } - pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL); - if (!pgmap) + p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL); + if (!p2p_pgmap) return -ENOMEM; + + pgmap = &p2p_pgmap->pgmap; pgmap->res.start = pci_resource_start(pdev, bar) + offset; pgmap->res.end = pgmap->res.start + size - 1; pgmap->res.flags = pci_resource_flags(pdev, bar); pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; - pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) - + + p2p_pgmap->provider = pdev; + p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) - pci_resource_start(pdev, bar); addr = devm_memremap_pages(&pdev->dev, pgmap); @@ -246,19 +273,32 @@ static void seq_buf_print_bus_devfn(struct seq_buf *buf, struct pci_dev *pdev) seq_buf_printf(buf, "%s;", pci_name(pdev)); } -/* - * If we can't find a common upstream bridge take a look at the root - * complex and compare it to a whitelist of known good hardware. - */ -static bool root_complex_whitelist(struct pci_dev *dev) +static const struct pci_p2pdma_whitelist_entry { + unsigned short vendor; + unsigned short device; + enum { + REQ_SAME_HOST_BRIDGE = 1 << 0, + } flags; +} pci_p2pdma_whitelist[] = { + /* AMD ZEN */ + {PCI_VENDOR_ID_AMD, 0x1450, 0}, + + /* Intel Xeon E5/Core i7 */ + {PCI_VENDOR_ID_INTEL, 0x3c00, REQ_SAME_HOST_BRIDGE}, + {PCI_VENDOR_ID_INTEL, 0x3c01, REQ_SAME_HOST_BRIDGE}, + /* Intel Xeon E7 v3/Xeon E5 v3/Core i7 */ + {PCI_VENDOR_ID_INTEL, 0x2f00, REQ_SAME_HOST_BRIDGE}, + {PCI_VENDOR_ID_INTEL, 0x2f01, REQ_SAME_HOST_BRIDGE}, + {} +}; + +static bool __host_bridge_whitelist(struct pci_host_bridge *host, + bool same_host_bridge) { - struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); struct pci_dev *root = pci_get_slot(host->bus, PCI_DEVFN(0, 0)); + const struct pci_p2pdma_whitelist_entry *entry; unsigned short vendor, device; - if (iommu_present(dev->dev.bus)) - return false; - if (!root) return false; @@ -266,65 +306,49 @@ static bool root_complex_whitelist(struct pci_dev *dev) device = root->device; pci_dev_put(root); - /* AMD ZEN host bridges can do peer to peer */ - if (vendor == PCI_VENDOR_ID_AMD && device == 0x1450) + for (entry = pci_p2pdma_whitelist; entry->vendor; entry++) { + if (vendor != entry->vendor || device != entry->device) + continue; + if (entry->flags & REQ_SAME_HOST_BRIDGE && !same_host_bridge) + return false; + return true; + } return false; } /* - * Find the distance through the nearest common upstream bridge between - * two PCI devices. - * - * If the two devices are the same device then 0 will be returned. - * - * If there are two virtual functions of the same device behind the same - * bridge port then 2 will be returned (one step down to the PCIe switch, - * then one step back to the same device). - * - * In the case where two devices are connected to the same PCIe switch, the - * value 4 will be returned. This corresponds to the following PCI tree: - * - * -+ Root Port - * \+ Switch Upstream Port - * +-+ Switch Downstream Port - * + \- Device A - * \-+ Switch Downstream Port - * \- Device B - * - * The distance is 4 because we traverse from Device A through the downstream - * port of the switch, to the common upstream port, back up to the second - * downstream port and then to Device B. - * - * Any two devices that don't have a common upstream bridge will return -1. - * In this way devices on separate PCIe root ports will be rejected, which - * is what we want for peer-to-peer seeing each PCIe root port defines a - * separate hierarchy domain and there's no way to determine whether the root - * complex supports forwarding between them. - * - * In the case where two devices are connected to different PCIe switches, - * this function will still return a positive distance as long as both - * switches eventually have a common upstream bridge. Note this covers - * the case of using multiple PCIe switches to achieve a desired level of - * fan-out from a root port. The exact distance will be a function of the - * number of switches between Device A and Device B. - * - * If a bridge which has any ACS redirection bits set is in the path - * then this functions will return -2. This is so we reject any - * cases where the TLPs are forwarded up into the root complex. - * In this case, a list of all infringing bridge addresses will be - * populated in acs_list (assuming it's non-null) for printk purposes. + * If we can't find a common upstream bridge take a look at the root + * complex and compare it to a whitelist of known good hardware. */ -static int upstream_bridge_distance(struct pci_dev *provider, - struct pci_dev *client, - struct seq_buf *acs_list) +static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b) +{ + struct pci_host_bridge *host_a = pci_find_host_bridge(a->bus); + struct pci_host_bridge *host_b = pci_find_host_bridge(b->bus); + + if (host_a == host_b) + return __host_bridge_whitelist(host_a, true); + + if (__host_bridge_whitelist(host_a, false) && + __host_bridge_whitelist(host_b, false)) + return true; + + return false; +} + +static enum pci_p2pdma_map_type +__upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client, + int *dist, bool *acs_redirects, struct seq_buf *acs_list) { struct pci_dev *a = provider, *b = client, *bb; int dist_a = 0; int dist_b = 0; int acs_cnt = 0; + if (acs_redirects) + *acs_redirects = false; + /* * Note, we don't need to take references to devices returned by * pci_upstream_bridge() seeing we hold a reference to a child @@ -353,15 +377,10 @@ static int upstream_bridge_distance(struct pci_dev *provider, dist_a++; } - /* - * Allow the connection if both devices are on a whitelisted root - * complex, but add an arbitrary large value to the distance. - */ - if (root_complex_whitelist(provider) && - root_complex_whitelist(client)) - return 0x1000 + dist_a + dist_b; + if (dist) + *dist = dist_a + dist_b; - return -1; + return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; check_b_path_acs: bb = b; @@ -378,33 +397,110 @@ check_b_path_acs: bb = pci_upstream_bridge(bb); } - if (acs_cnt) - return -2; + if (dist) + *dist = dist_a + dist_b; + + if (acs_cnt) { + if (acs_redirects) + *acs_redirects = true; + + return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; + } + + return PCI_P2PDMA_MAP_BUS_ADDR; +} + +static unsigned long map_types_idx(struct pci_dev *client) +{ + return (pci_domain_nr(client->bus) << 16) | + (client->bus->number << 8) | client->devfn; +} + +/* + * Find the distance through the nearest common upstream bridge between + * two PCI devices. + * + * If the two devices are the same device then 0 will be returned. + * + * If there are two virtual functions of the same device behind the same + * bridge port then 2 will be returned (one step down to the PCIe switch, + * then one step back to the same device). + * + * In the case where two devices are connected to the same PCIe switch, the + * value 4 will be returned. This corresponds to the following PCI tree: + * + * -+ Root Port + * \+ Switch Upstream Port + * +-+ Switch Downstream Port + * + \- Device A + * \-+ Switch Downstream Port + * \- Device B + * + * The distance is 4 because we traverse from Device A through the downstream + * port of the switch, to the common upstream port, back up to the second + * downstream port and then to Device B. + * + * Any two devices that cannot communicate using p2pdma will return + * PCI_P2PDMA_MAP_NOT_SUPPORTED. + * + * Any two devices that have a data path that goes through the host bridge + * will consult a whitelist. If the host bridges are on the whitelist, + * this function will return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE. + * + * If either bridge is not on the whitelist this function returns + * PCI_P2PDMA_MAP_NOT_SUPPORTED. + * + * If a bridge which has any ACS redirection bits set is in the path, + * acs_redirects will be set to true. In this case, a list of all infringing + * bridge addresses will be populated in acs_list (assuming it's non-null) + * for printk purposes. + */ +static enum pci_p2pdma_map_type +upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client, + int *dist, bool *acs_redirects, struct seq_buf *acs_list) +{ + enum pci_p2pdma_map_type map_type; + + map_type = __upstream_bridge_distance(provider, client, dist, + acs_redirects, acs_list); + + if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) { + if (!host_bridge_whitelist(provider, client)) + map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED; + } + + if (provider->p2pdma) + xa_store(&provider->p2pdma->map_types, map_types_idx(client), + xa_mk_value(map_type), GFP_KERNEL); - return dist_a + dist_b; + return map_type; } -static int upstream_bridge_distance_warn(struct pci_dev *provider, - struct pci_dev *client) +static enum pci_p2pdma_map_type +upstream_bridge_distance_warn(struct pci_dev *provider, struct pci_dev *client, + int *dist) { struct seq_buf acs_list; + bool acs_redirects; int ret; seq_buf_init(&acs_list, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE); if (!acs_list.buffer) return -ENOMEM; - ret = upstream_bridge_distance(provider, client, &acs_list); - if (ret == -2) { - pci_warn(client, "cannot be used for peer-to-peer DMA as ACS redirect is set between the client and provider (%s)\n", + ret = upstream_bridge_distance(provider, client, dist, &acs_redirects, + &acs_list); + if (acs_redirects) { + pci_warn(client, "ACS redirect is set between the client and provider (%s)\n", pci_name(provider)); /* Drop final semicolon */ acs_list.buffer[acs_list.len-1] = 0; pci_warn(client, "to disable ACS redirect for this path, add the kernel parameter: pci=disable_acs_redir=%s\n", acs_list.buffer); + } - } else if (ret < 0) { - pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge\n", + if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED) { + pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge or whitelisted host bridge\n", pci_name(provider)); } @@ -421,22 +517,22 @@ static int upstream_bridge_distance_warn(struct pci_dev *provider, * @num_clients: number of clients in the array * @verbose: if true, print warnings for devices when we return -1 * - * Returns -1 if any of the clients are not compatible (behind the same - * root port as the provider), otherwise returns a positive number where - * a lower number is the preferable choice. (If there's one client - * that's the same as the provider it will return 0, which is best choice). + * Returns -1 if any of the clients are not compatible, otherwise returns a + * positive number where a lower number is the preferable choice. (If there's + * one client that's the same as the provider it will return 0, which is best + * choice). * - * For now, "compatible" means the provider and the clients are all behind - * the same PCI root port. This cuts out cases that may work but is safest - * for the user. Future work can expand this to white-list root complexes that - * can safely forward between each ports. + * "compatible" means the provider and the clients are either all behind + * the same PCI root port or the host bridges connected to each of the devices + * are listed in the 'pci_p2pdma_whitelist'. */ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, int num_clients, bool verbose) { bool not_supported = false; struct pci_dev *pci_client; - int distance = 0; + int total_dist = 0; + int distance; int i, ret; if (num_clients == 0) @@ -461,26 +557,26 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, if (verbose) ret = upstream_bridge_distance_warn(provider, - pci_client); + pci_client, &distance); else ret = upstream_bridge_distance(provider, pci_client, - NULL); + &distance, NULL, NULL); pci_dev_put(pci_client); - if (ret < 0) + if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED) not_supported = true; if (not_supported && !verbose) break; - distance += ret; + total_dist += distance; } if (not_supported) return -1; - return distance; + return total_dist; } EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many); @@ -706,21 +802,19 @@ void pci_p2pmem_publish(struct pci_dev *pdev, bool publish) } EXPORT_SYMBOL_GPL(pci_p2pmem_publish); -/** - * pci_p2pdma_map_sg - map a PCI peer-to-peer scatterlist for DMA - * @dev: device doing the DMA request - * @sg: scatter list to map - * @nents: elements in the scatterlist - * @dir: DMA direction - * - * Scatterlists mapped with this function should not be unmapped in any way. - * - * Returns the number of SG entries mapped or 0 on error. - */ -int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir) +static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct pci_dev *provider, + struct pci_dev *client) +{ + if (!provider->p2pdma) + return PCI_P2PDMA_MAP_NOT_SUPPORTED; + + return xa_to_value(xa_load(&provider->p2pdma->map_types, + map_types_idx(client))); +} + +static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap, + struct device *dev, struct scatterlist *sg, int nents) { - struct dev_pagemap *pgmap; struct scatterlist *s; phys_addr_t paddr; int i; @@ -736,16 +830,80 @@ int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents, return 0; for_each_sg(sg, s, nents, i) { - pgmap = sg_page(s)->pgmap; paddr = sg_phys(s); - s->dma_address = paddr - pgmap->pci_p2pdma_bus_offset; + s->dma_address = paddr - p2p_pgmap->bus_offset; sg_dma_len(s) = s->length; } return nents; } -EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg); + +/** + * pci_p2pdma_map_sg - map a PCI peer-to-peer scatterlist for DMA + * @dev: device doing the DMA request + * @sg: scatter list to map + * @nents: elements in the scatterlist + * @dir: DMA direction + * @attrs: DMA attributes passed to dma_map_sg() (if called) + * + * Scatterlists mapped with this function should be unmapped using + * pci_p2pdma_unmap_sg_attrs(). + * + * Returns the number of SG entries mapped or 0 on error. + */ +int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + struct pci_p2pdma_pagemap *p2p_pgmap = + to_p2p_pgmap(sg_page(sg)->pgmap); + struct pci_dev *client; + + if (WARN_ON_ONCE(!dev_is_pci(dev))) + return 0; + + client = to_pci_dev(dev); + + switch (pci_p2pdma_map_type(p2p_pgmap->provider, client)) { + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: + return dma_map_sg_attrs(dev, sg, nents, dir, attrs); + case PCI_P2PDMA_MAP_BUS_ADDR: + return __pci_p2pdma_map_sg(p2p_pgmap, dev, sg, nents); + default: + WARN_ON_ONCE(1); + return 0; + } +} +EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg_attrs); + +/** + * pci_p2pdma_unmap_sg - unmap a PCI peer-to-peer scatterlist that was + * mapped with pci_p2pdma_map_sg() + * @dev: device doing the DMA request + * @sg: scatter list to map + * @nents: number of elements returned by pci_p2pdma_map_sg() + * @dir: DMA direction + * @attrs: DMA attributes passed to dma_unmap_sg() (if called) + */ +void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + struct pci_p2pdma_pagemap *p2p_pgmap = + to_p2p_pgmap(sg_page(sg)->pgmap); + enum pci_p2pdma_map_type map_type; + struct pci_dev *client; + + if (WARN_ON_ONCE(!dev_is_pci(dev))) + return; + + client = to_pci_dev(dev); + + map_type = pci_p2pdma_map_type(p2p_pgmap->provider, client); + + if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) + dma_unmap_sg_attrs(dev, sg, nents, dir, attrs); +} +EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs); /** * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store |