diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-07 13:55:45 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-07 13:55:45 -0700 |
commit | d691b7e7d1b5186eae62fd32adee65d3316bfdf6 (patch) | |
tree | 3808f7deab74f68267b9fdd6a35dcda9e50142aa /arch/powerpc/platforms/powernv/pci-ioda.c | |
parent | b59eea554f57befa2aa3172fcb63e521bdd850dd (diff) | |
parent | 1e0fc9d1eb2b0241a03e0a02bcdb9b5b641b9d35 (diff) | |
download | linux-d691b7e7d1b5186eae62fd32adee65d3316bfdf6.tar.bz2 |
Merge tag 'powerpc-4.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
"Highlights include:
- Support for STRICT_KERNEL_RWX on 64-bit server CPUs.
- Platform support for FSP2 (476fpe) board
- Enable ZONE_DEVICE on 64-bit server CPUs.
- Generic & powerpc spin loop primitives to optimise busy waiting
- Convert VDSO update function to use new update_vsyscall() interface
- Optimisations to hypercall/syscall/context-switch paths
- Improvements to the CPU idle code on Power8 and Power9.
As well as many other fixes and improvements.
Thanks to: Akshay Adiga, Andrew Donnellan, Andrew Jeffery, Anshuman
Khandual, Anton Blanchard, Balbir Singh, Benjamin Herrenschmidt,
Christophe Leroy, Christophe Lombard, Colin Ian King, Dan Carpenter,
Gautham R. Shenoy, Hari Bathini, Ian Munsie, Ivan Mikhaylov, Javier
Martinez Canillas, Madhavan Srinivasan, Masahiro Yamada, Matt Brown,
Michael Neuling, Michal Suchanek, Murilo Opsfelder Araujo, Naveen N.
Rao, Nicholas Piggin, Oliver O'Halloran, Paul Mackerras, Pavel Machek,
Russell Currey, Santosh Sivaraj, Stephen Rothwell, Thiago Jung
Bauermann, Yang Li"
* tag 'powerpc-4.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (158 commits)
powerpc/Kconfig: Enable STRICT_KERNEL_RWX for some configs
powerpc/mm/radix: Implement STRICT_RWX/mark_rodata_ro() for Radix
powerpc/mm/hash: Implement mark_rodata_ro() for hash
powerpc/vmlinux.lds: Align __init_begin to 16M
powerpc/lib/code-patching: Use alternate map for patch_instruction()
powerpc/xmon: Add patch_instruction() support for xmon
powerpc/kprobes/optprobes: Use patch_instruction()
powerpc/kprobes: Move kprobes over to patch_instruction()
powerpc/mm/radix: Fix execute permissions for interrupt_vectors
powerpc/pseries: Fix passing of pp0 in updatepp() and updateboltedpp()
powerpc/64s: Blacklist rtas entry/exit from kprobes
powerpc/64s: Blacklist functions invoked on a trap
powerpc/64s: Un-blacklist system_call() from kprobes
powerpc/64s: Move system_call() symbol to just after setting MSR_EE
powerpc/64s: Blacklist system_call() and system_call_common() from kprobes
powerpc/64s: Convert .L__replay_interrupt_return to a local label
powerpc64/elfv1: Only dereference function descriptor for non-text symbols
cxl: Export library to support IBM XSL
powerpc/dts: Use #include "..." to include local DT
powerpc/perf/hv-24x7: Aggregate result elements on POWER9 SMT8
...
Diffstat (limited to 'arch/powerpc/platforms/powernv/pci-ioda.c')
-rw-r--r-- | arch/powerpc/platforms/powernv/pci-ioda.c | 133 |
1 files changed, 128 insertions, 5 deletions
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 283caf1070c9..437613588df1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1718,6 +1718,100 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev */ } +static bool pnv_pci_ioda_pe_single_vendor(struct pnv_ioda_pe *pe) +{ + unsigned short vendor = 0; + struct pci_dev *pdev; + + if (pe->device_count == 1) + return true; + + /* pe->pdev should be set if it's a single device, pe->pbus if not */ + if (!pe->pbus) + return true; + + list_for_each_entry(pdev, &pe->pbus->devices, bus_list) { + if (!vendor) { + vendor = pdev->vendor; + continue; + } + + if (pdev->vendor != vendor) + return false; + } + + return true; +} + +/* + * Reconfigure TVE#0 to be usable as 64-bit DMA space. + * + * The first 4GB of virtual memory for a PE is reserved for 32-bit accesses. + * Devices can only access more than that if bit 59 of the PCI address is set + * by hardware, which indicates TVE#1 should be used instead of TVE#0. + * Many PCI devices are not capable of addressing that many bits, and as a + * result are limited to the 4GB of virtual memory made available to 32-bit + * devices in TVE#0. + * + * In order to work around this, reconfigure TVE#0 to be suitable for 64-bit + * devices by configuring the virtual memory past the first 4GB inaccessible + * by 64-bit DMAs. This should only be used by devices that want more than + * 4GB, and only on PEs that have no 32-bit devices. + * + * Currently this will only work on PHB3 (POWER8). + */ +static int pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe) +{ + u64 window_size, table_size, tce_count, addr; + struct page *table_pages; + u64 tce_order = 28; /* 256MB TCEs */ + __be64 *tces; + s64 rc; + + /* + * Window size needs to be a power of two, but needs to account for + * shifting memory by the 4GB offset required to skip 32bit space. + */ + window_size = roundup_pow_of_two(memory_hotplug_max() + (1ULL << 32)); + tce_count = window_size >> tce_order; + table_size = tce_count << 3; + + if (table_size < PAGE_SIZE) + table_size = PAGE_SIZE; + + table_pages = alloc_pages_node(pe->phb->hose->node, GFP_KERNEL, + get_order(table_size)); + if (!table_pages) + goto err; + + tces = page_address(table_pages); + if (!tces) + goto err; + + memset(tces, 0, table_size); + + for (addr = 0; addr < memory_hotplug_max(); addr += (1 << tce_order)) { + tces[(addr + (1ULL << 32)) >> tce_order] = + cpu_to_be64(addr | TCE_PCI_READ | TCE_PCI_WRITE); + } + + rc = opal_pci_map_pe_dma_window(pe->phb->opal_id, + pe->pe_number, + /* reconfigure window 0 */ + (pe->pe_number << 1) + 0, + 1, + __pa(tces), + table_size, + 1 << tce_order); + if (rc == OPAL_SUCCESS) { + pe_info(pe, "Using 64-bit DMA iommu bypass (through TVE#0)\n"); + return 0; + } +err: + pe_err(pe, "Error configuring 64-bit DMA bypass\n"); + return -EIO; +} + static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); @@ -1726,6 +1820,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) struct pnv_ioda_pe *pe; uint64_t top; bool bypass = false; + s64 rc; if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) return -ENODEV;; @@ -1740,8 +1835,27 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n"); set_dma_ops(&pdev->dev, &dma_direct_ops); } else { - dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); - set_dma_ops(&pdev->dev, &dma_iommu_ops); + /* + * If the device can't set the TCE bypass bit but still wants + * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to + * bypass the 32-bit region and be usable for 64-bit DMAs. + * The device needs to be able to address all of this space. + */ + if (dma_mask >> 32 && + dma_mask > (memory_hotplug_max() + (1ULL << 32)) && + pnv_pci_ioda_pe_single_vendor(pe) && + phb->model == PNV_PHB_MODEL_PHB3) { + /* Configure the bypass mode */ + rc = pnv_pci_ioda_dma_64bit_bypass(pe); + if (rc) + return rc; + /* 4GB offset bypasses 32-bit space */ + set_dma_offset(&pdev->dev, (1ULL << 32)); + set_dma_ops(&pdev->dev, &dma_direct_ops); + } else { + dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); + set_dma_ops(&pdev->dev, &dma_iommu_ops); + } } *pdev->dev.dma_mask = dma_mask; @@ -3123,13 +3237,13 @@ static int pnv_pci_diag_data_set(void *data, u64 val) phb = hose->private_data; /* Retrieve the diag data from firmware */ - ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, - PNV_PCI_DIAG_BUF_SIZE); + ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data, + phb->diag_data_size); if (ret != OPAL_SUCCESS) return -EIO; /* Print the diag data to the kernel log */ - pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob); + pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data); return 0; } @@ -3725,6 +3839,15 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, else phb->model = PNV_PHB_MODEL_UNKNOWN; + /* Initialize diagnostic data buffer */ + prop32 = of_get_property(np, "ibm,phb-diag-data-size", NULL); + if (prop32) + phb->diag_data_size = be32_to_cpup(prop32); + else + phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE; + + phb->diag_data = memblock_virt_alloc(phb->diag_data_size, 0); + /* Parse 32-bit and IO ranges (if any) */ pci_process_bridge_OF_ranges(hose, np, !hose->global_number); |