From 9b4fffa14906fce7aabf1f032ddd7efc7a031bba Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Tue, 9 Feb 2016 18:17:48 +1100 Subject: powerpc/powernv: new function to access OPAL msglog Currently, the OPAL msglog/console buffer is exposed as a sysfs file, with the sysfs read handler responsible for retrieving the log from the OPAL buffer. We'd like to be able to use it in xmon as well. Refactor the OPAL msglog code to create a new function, opal_msglog_copy(), that copies to an arbitrary buffer. Separate the initialisation code into generic memcons init and sysfs file creation. Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 07a99e638449..9d86c6651716 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -248,6 +248,7 @@ extern int opal_elog_init(void); extern void opal_platform_dump_init(void); extern void opal_sys_param_init(void); extern void opal_msglog_init(void); +extern void opal_msglog_sysfs_init(void); extern int opal_async_comp_init(void); extern int opal_sensor_init(void); extern int opal_hmi_handler_init(void); @@ -273,6 +274,8 @@ void opal_free_sg_list(struct opal_sg_list *sg); extern int opal_error_code(int rc); +ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_OPAL_H */ -- cgit v1.2.3 From ee8222fe95e40ade9f50b852095d4626631ebbbe Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 22 Oct 2015 09:22:16 +0800 Subject: powerpc/powernv: use one M64 BAR in Single PE mode for one VF BAR In current implementation, when VF BAR is bigger than 64MB, it uses 4 M64 BARs in Single PE mode to cover the number of VFs required to be enabled. By doing so, several VFs would be in one VF Group and leads to interference between VFs in the same group. And in this patch, m64_wins is renamed to m64_map, which means index number of the M64 BAR used to map the VF BAR. Based on Gavin's comments. Also makes sure the VF BAR size is bigger than 32MB when M64 BAR is used in Single PE mode. This patch changes the design by using one M64 BAR in Single PE mode for one VF BAR. This gives absolute isolation for VFs. Signed-off-by: Wei Yang Reviewed-by: Gavin Shan Acked-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 5 +- arch/powerpc/platforms/powernv/pci-ioda.c | 177 ++++++++++++------------------ 2 files changed, 75 insertions(+), 107 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 54843ca5fa2b..11d3543a57f2 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -216,10 +216,9 @@ struct pci_dn { u16 vfs_expanded; /* number of VFs IOV BAR expanded */ u16 num_vfs; /* number of VFs enabled*/ int offset; /* PE# for the first VF PE */ -#define M64_PER_IOV 4 - int m64_per_iov; + bool m64_single_mode; /* Use M64 BAR in Single Mode */ #define IODA_INVALID_M64 (-1) - int m64_wins[PCI_SRIOV_NUM_BARS][M64_PER_IOV]; + int (*m64_map)[PCI_SRIOV_NUM_BARS]; #endif /* CONFIG_PCI_IOV */ #endif struct list_head child_list; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 15e6ff18dcd5..4004c0a842ca 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1190,29 +1190,36 @@ static void pnv_pci_ioda_setup_PEs(void) } #ifdef CONFIG_PCI_IOV -static int pnv_pci_vf_release_m64(struct pci_dev *pdev) +static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs) { struct pci_bus *bus; struct pci_controller *hose; struct pnv_phb *phb; struct pci_dn *pdn; int i, j; + int m64_bars; bus = pdev->bus; hose = pci_bus_to_host(bus); phb = hose->private_data; pdn = pci_get_pdn(pdev); + if (pdn->m64_single_mode) + m64_bars = num_vfs; + else + m64_bars = 1; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) - for (j = 0; j < M64_PER_IOV; j++) { - if (pdn->m64_wins[i][j] == IODA_INVALID_M64) + for (j = 0; j < m64_bars; j++) { + if (pdn->m64_map[j][i] == IODA_INVALID_M64) continue; opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 0); - clear_bit(pdn->m64_wins[i][j], &phb->ioda.m64_bar_alloc); - pdn->m64_wins[i][j] = IODA_INVALID_M64; + OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0); + clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc); + pdn->m64_map[j][i] = IODA_INVALID_M64; } + kfree(pdn->m64_map); return 0; } @@ -1229,8 +1236,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) int total_vfs; resource_size_t size, start; int pe_num; - int vf_groups; - int vf_per_group; + int m64_bars; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1238,26 +1244,26 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) pdn = pci_get_pdn(pdev); total_vfs = pci_sriov_get_totalvfs(pdev); - /* Initialize the m64_wins to IODA_INVALID_M64 */ - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) - for (j = 0; j < M64_PER_IOV; j++) - pdn->m64_wins[i][j] = IODA_INVALID_M64; + if (pdn->m64_single_mode) + m64_bars = num_vfs; + else + m64_bars = 1; + + pdn->m64_map = kmalloc(sizeof(*pdn->m64_map) * m64_bars, GFP_KERNEL); + if (!pdn->m64_map) + return -ENOMEM; + /* Initialize the m64_map to IODA_INVALID_M64 */ + for (i = 0; i < m64_bars ; i++) + for (j = 0; j < PCI_SRIOV_NUM_BARS; j++) + pdn->m64_map[i][j] = IODA_INVALID_M64; - if (pdn->m64_per_iov == M64_PER_IOV) { - vf_groups = (num_vfs <= M64_PER_IOV) ? num_vfs: M64_PER_IOV; - vf_per_group = (num_vfs <= M64_PER_IOV)? 1: - roundup_pow_of_two(num_vfs) / pdn->m64_per_iov; - } else { - vf_groups = 1; - vf_per_group = 1; - } for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || !res->parent) continue; - for (j = 0; j < vf_groups; j++) { + for (j = 0; j < m64_bars; j++) { do { win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, phb->ioda.m64_bar_idx + 1, 0); @@ -1266,12 +1272,11 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) goto m64_failed; } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc)); - pdn->m64_wins[i][j] = win; + pdn->m64_map[j][i] = win; - if (pdn->m64_per_iov == M64_PER_IOV) { + if (pdn->m64_single_mode) { size = pci_iov_resource_size(pdev, PCI_IOV_RESOURCES + i); - size = size * vf_per_group; start = res->start + size * j; } else { size = resource_size(res); @@ -1279,16 +1284,16 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) } /* Map the M64 here */ - if (pdn->m64_per_iov == M64_PER_IOV) { + if (pdn->m64_single_mode) { pe_num = pdn->offset + j; rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe_num, OPAL_M64_WINDOW_TYPE, - pdn->m64_wins[i][j], 0); + pdn->m64_map[j][i], 0); } rc = opal_pci_set_phb_mem_window(phb->opal_id, OPAL_M64_WINDOW_TYPE, - pdn->m64_wins[i][j], + pdn->m64_map[j][i], start, 0, /* unused */ size); @@ -1300,12 +1305,12 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) goto m64_failed; } - if (pdn->m64_per_iov == M64_PER_IOV) + if (pdn->m64_single_mode) rc = opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 2); + OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2); else rc = opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 1); + OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1); if (rc != OPAL_SUCCESS) { dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n", @@ -1317,7 +1322,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) return 0; m64_failed: - pnv_pci_vf_release_m64(pdev); + pnv_pci_vf_release_m64(pdev, num_vfs); return -EBUSY; } @@ -1344,15 +1349,13 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); } -static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) +static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) { struct pci_bus *bus; struct pci_controller *hose; struct pnv_phb *phb; struct pnv_ioda_pe *pe, *pe_n; struct pci_dn *pdn; - u16 vf_index; - int64_t rc; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1362,35 +1365,6 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) if (!pdev->is_physfn) return; - if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) { - int vf_group; - int vf_per_group; - int vf_index1; - - vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov; - - for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) - for (vf_index = vf_group * vf_per_group; - vf_index < (vf_group + 1) * vf_per_group && - vf_index < num_vfs; - vf_index++) - for (vf_index1 = vf_group * vf_per_group; - vf_index1 < (vf_group + 1) * vf_per_group && - vf_index1 < num_vfs; - vf_index1++){ - - rc = opal_pci_set_peltv(phb->opal_id, - pdn->offset + vf_index, - pdn->offset + vf_index1, - OPAL_REMOVE_PE_FROM_DOMAIN); - - if (rc) - dev_warn(&pdev->dev, "%s: Failed to unlink same group PE#%d(%lld)\n", - __func__, - pdn->offset + vf_index1, rc); - } - } - list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) { if (pe->parent_dev != pdev) continue; @@ -1425,14 +1399,14 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev) num_vfs = pdn->num_vfs; /* Release VF PEs */ - pnv_ioda_release_vf_PE(pdev, num_vfs); + pnv_ioda_release_vf_PE(pdev); if (phb->type == PNV_PHB_IODA2) { - if (pdn->m64_per_iov == 1) + if (!pdn->m64_single_mode) pnv_pci_vf_resource_shift(pdev, -pdn->offset); /* Release M64 windows */ - pnv_pci_vf_release_m64(pdev); + pnv_pci_vf_release_m64(pdev, num_vfs); /* Release PE numbers */ bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs); @@ -1451,7 +1425,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) int pe_num; u16 vf_index; struct pci_dn *pdn; - int64_t rc; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1496,37 +1469,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pnv_pci_ioda2_setup_dma_pe(phb, pe); } - - if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) { - int vf_group; - int vf_per_group; - int vf_index1; - - vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov; - - for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) { - for (vf_index = vf_group * vf_per_group; - vf_index < (vf_group + 1) * vf_per_group && - vf_index < num_vfs; - vf_index++) { - for (vf_index1 = vf_group * vf_per_group; - vf_index1 < (vf_group + 1) * vf_per_group && - vf_index1 < num_vfs; - vf_index1++) { - - rc = opal_pci_set_peltv(phb->opal_id, - pdn->offset + vf_index, - pdn->offset + vf_index1, - OPAL_ADD_PE_TO_DOMAIN); - - if (rc) - dev_warn(&pdev->dev, "%s: Failed to link same group PE#%d(%lld)\n", - __func__, - pdn->offset + vf_index1, rc); - } - } - } - } } int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) @@ -1549,6 +1491,15 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) return -ENOSPC; } + /* + * When M64 BARs functions in Single PE mode, the number of VFs + * could be enabled must be less than the number of M64 BARs. + */ + if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) { + dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n"); + return -EBUSY; + } + /* Calculate available PE for required VFs */ mutex_lock(&phb->ioda.pe_alloc_mutex); pdn->offset = bitmap_find_next_zero_area( @@ -1576,7 +1527,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) * the IOV BAR according to the PE# allocated to the VFs. * Otherwise, the PE# for the VF will conflict with others. */ - if (pdn->m64_per_iov == 1) { + if (!pdn->m64_single_mode) { ret = pnv_pci_vf_resource_shift(pdev, pdn->offset); if (ret) goto m64_failed; @@ -1609,8 +1560,7 @@ int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) /* Allocate PCI data */ add_dev_pci_data(pdev); - pnv_pci_sriov_enable(pdev, num_vfs); - return 0; + return pnv_pci_sriov_enable(pdev, num_vfs); } #endif /* CONFIG_PCI_IOV */ @@ -2864,9 +2814,9 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) pdn = pci_get_pdn(pdev); pdn->vfs_expanded = 0; + pdn->m64_single_mode = false; total_vfs = pci_sriov_get_totalvfs(pdev); - pdn->m64_per_iov = 1; mul = phb->ioda.total_pe; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { @@ -2886,8 +2836,8 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) if (size > (1 << 26)) { dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size is bigger than 64M, roundup power2\n", i, res); - pdn->m64_per_iov = M64_PER_IOV; mul = roundup_pow_of_two(total_vfs); + pdn->m64_single_mode = true; break; } } @@ -2897,8 +2847,14 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) if (!res->flags || res->parent) continue; - dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res); size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); + /* + * On PHB3, the minimum size alignment of M64 BAR in single + * mode is 32MB. + */ + if (pdn->m64_single_mode && (size < SZ_32M)) + goto truncate_iov; + dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res); res->end = res->start + size * mul - 1; dev_dbg(&pdev->dev, " %pR\n", res); dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)", @@ -3128,6 +3084,8 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno) { + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn = pci_get_pdn(pdev); resource_size_t align; @@ -3136,12 +3094,23 @@ static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, * SR-IOV. While from hardware perspective, the range mapped by M64 * BAR should be size aligned. * + * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra + * powernv-specific hardware restriction is gone. But if just use the + * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with + * in one segment of M64 #15, which introduces the PE conflict between + * PF and VF. Based on this, the minimum alignment of an IOV BAR is + * m64_segsize. + * * This function returns the total IOV BAR size if M64 BAR is in * Shared PE mode or just VF BAR size if not. + * If the M64 BAR is in Single PE mode, return the VF BAR size or + * M64 segment size if IOV BAR size is less. */ align = pci_iov_resource_size(pdev, resno); if (!pdn->vfs_expanded) return align; + if (pdn->m64_single_mode) + return max(align, (resource_size_t)phb->ioda.m64_segsize); return pdn->vfs_expanded * align; } -- cgit v1.2.3 From be283eeb7f6d9165b3c50f5222123ac25cf0d417 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 22 Oct 2015 09:22:19 +0800 Subject: powerpc/powernv: allocate sparse PE# when using M64 BAR in Single PE mode When M64 BAR is set to Single PE mode, the PE# assigned to VF could be sparse. This patch restructures the code to allocate sparse PE# for VFs when M64 BAR is set to Single PE mode. Also it rename the offset to pe_num_map to reflect the content is the PE number. Signed-off-by: Wei Yang Reviewed-by: Gavin Shan Acked-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 81 +++++++++++++++++++++++-------- 2 files changed, 63 insertions(+), 20 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 11d3543a57f2..b0b43f5fbc5f 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -215,7 +215,7 @@ struct pci_dn { #ifdef CONFIG_PCI_IOV u16 vfs_expanded; /* number of VFs IOV BAR expanded */ u16 num_vfs; /* number of VFs enabled*/ - int offset; /* PE# for the first VF PE */ + int *pe_num_map; /* PE# for the first VF PE or array */ bool m64_single_mode; /* Use M64 BAR in Single Mode */ #define IODA_INVALID_M64 (-1) int (*m64_map)[PCI_SRIOV_NUM_BARS]; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 0c7e6ba80b07..dc868586315d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1285,7 +1285,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) /* Map the M64 here */ if (pdn->m64_single_mode) { - pe_num = pdn->offset + j; + pe_num = pdn->pe_num_map[j]; rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe_num, OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0); @@ -1389,7 +1389,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev) struct pnv_phb *phb; struct pci_dn *pdn; struct pci_sriov *iov; - u16 num_vfs; + u16 num_vfs, i; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1403,14 +1403,21 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev) if (phb->type == PNV_PHB_IODA2) { if (!pdn->m64_single_mode) - pnv_pci_vf_resource_shift(pdev, -pdn->offset); + pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map); /* Release M64 windows */ pnv_pci_vf_release_m64(pdev, num_vfs); /* Release PE numbers */ - bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs); - pdn->offset = 0; + if (pdn->m64_single_mode) { + for (i = 0; i < num_vfs; i++) { + if (pdn->pe_num_map[i] != IODA_INVALID_PE) + pnv_ioda_free_pe(phb, pdn->pe_num_map[i]); + } + } else + bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); + /* Releasing pe_num_map */ + kfree(pdn->pe_num_map); } } @@ -1436,7 +1443,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) /* Reserve PE for each VF */ for (vf_index = 0; vf_index < num_vfs; vf_index++) { - pe_num = pdn->offset + vf_index; + if (pdn->m64_single_mode) + pe_num = pdn->pe_num_map[vf_index]; + else + pe_num = *pdn->pe_num_map + vf_index; pe = &phb->ioda.pe_array[pe_num]; pe->pe_number = pe_num; @@ -1478,6 +1488,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) struct pnv_phb *phb; struct pci_dn *pdn; int ret; + u16 i; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1500,20 +1511,44 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) return -EBUSY; } + /* Allocating pe_num_map */ + if (pdn->m64_single_mode) + pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map) * num_vfs, + GFP_KERNEL); + else + pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL); + + if (!pdn->pe_num_map) + return -ENOMEM; + + if (pdn->m64_single_mode) + for (i = 0; i < num_vfs; i++) + pdn->pe_num_map[i] = IODA_INVALID_PE; + /* Calculate available PE for required VFs */ - mutex_lock(&phb->ioda.pe_alloc_mutex); - pdn->offset = bitmap_find_next_zero_area( - phb->ioda.pe_alloc, phb->ioda.total_pe, - 0, num_vfs, 0); - if (pdn->offset >= phb->ioda.total_pe) { + if (pdn->m64_single_mode) { + for (i = 0; i < num_vfs; i++) { + pdn->pe_num_map[i] = pnv_ioda_alloc_pe(phb); + if (pdn->pe_num_map[i] == IODA_INVALID_PE) { + ret = -EBUSY; + goto m64_failed; + } + } + } else { + mutex_lock(&phb->ioda.pe_alloc_mutex); + *pdn->pe_num_map = bitmap_find_next_zero_area( + phb->ioda.pe_alloc, phb->ioda.total_pe, + 0, num_vfs, 0); + if (*pdn->pe_num_map >= phb->ioda.total_pe) { + mutex_unlock(&phb->ioda.pe_alloc_mutex); + dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); + kfree(pdn->pe_num_map); + return -EBUSY; + } + bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); mutex_unlock(&phb->ioda.pe_alloc_mutex); - dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); - pdn->offset = 0; - return -EBUSY; } - bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs); pdn->num_vfs = num_vfs; - mutex_unlock(&phb->ioda.pe_alloc_mutex); /* Assign M64 window accordingly */ ret = pnv_pci_vf_assign_m64(pdev, num_vfs); @@ -1528,7 +1563,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) * Otherwise, the PE# for the VF will conflict with others. */ if (!pdn->m64_single_mode) { - ret = pnv_pci_vf_resource_shift(pdev, pdn->offset); + ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map); if (ret) goto m64_failed; } @@ -1540,8 +1575,16 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) return 0; m64_failed: - bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs); - pdn->offset = 0; + if (pdn->m64_single_mode) { + for (i = 0; i < num_vfs; i++) { + if (pdn->pe_num_map[i] != IODA_INVALID_PE) + pnv_ioda_free_pe(phb, pdn->pe_num_map[i]); + } + } else + bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); + + /* Releasing pe_num_map */ + kfree(pdn->pe_num_map); return ret; } -- cgit v1.2.3 From dc53617c4a3f6ca35641dfd4279720365ce9f4da Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Wed, 6 Jan 2016 10:08:25 +0800 Subject: powerpc: atomic: Implement atomic{, 64}_*_return_* variants On powerpc, acquire and release semantics can be achieved with lightweight barriers("lwsync" and "ctrl+isync"), which can be used to implement __atomic_op_{acquire,release}. For release semantics, since we only need to ensure all memory accesses that issue before must take effects before the -store- part of the atomics, "lwsync" is what we only need. On the platform without "lwsync", "sync" should be used. Therefore in __atomic_op_release() we use PPC_RELEASE_BARRIER. For acquire semantics, "lwsync" is what we only need for the similar reason. However on the platform without "lwsync", we can use "isync" rather than "sync" as an acquire barrier. Therefore in __atomic_op_acquire() we use PPC_ACQUIRE_BARRIER, which is barrier() on UP, "lwsync" if available and "isync" otherwise. Implement atomic{,64}_{add,sub,inc,dec}_return_relaxed, and build other variants with these helpers. Signed-off-by: Boqun Feng Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/atomic.h | 147 ++++++++++++++++++++++---------------- 1 file changed, 85 insertions(+), 62 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 55f106ed12bf..a35c27709e05 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -12,6 +12,24 @@ #define ATOMIC_INIT(i) { (i) } +/* + * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with + * a "bne-" instruction at the end, so an isync is enough as a acquire barrier + * on the platform without lwsync. + */ +#define __atomic_op_acquire(op, args...) \ +({ \ + typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \ + __asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory"); \ + __ret; \ +}) + +#define __atomic_op_release(op, args...) \ +({ \ + __asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory"); \ + op##_relaxed(args); \ +}) + static __inline__ int atomic_read(const atomic_t *v) { int t; @@ -42,27 +60,27 @@ static __inline__ void atomic_##op(int a, atomic_t *v) \ : "cc"); \ } \ -#define ATOMIC_OP_RETURN(op, asm_op) \ -static __inline__ int atomic_##op##_return(int a, atomic_t *v) \ +#define ATOMIC_OP_RETURN_RELAXED(op, asm_op) \ +static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \ { \ int t; \ \ __asm__ __volatile__( \ - PPC_ATOMIC_ENTRY_BARRIER \ -"1: lwarx %0,0,%2 # atomic_" #op "_return\n" \ - #asm_op " %0,%1,%0\n" \ - PPC405_ERR77(0,%2) \ -" stwcx. %0,0,%2 \n" \ +"1: lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \ + #asm_op " %0,%2,%0\n" \ + PPC405_ERR77(0, %3) \ +" stwcx. %0,0,%3\n" \ " bne- 1b\n" \ - PPC_ATOMIC_EXIT_BARRIER \ - : "=&r" (t) \ + : "=&r" (t), "+m" (v->counter) \ : "r" (a), "r" (&v->counter) \ - : "cc", "memory"); \ + : "cc"); \ \ return t; \ } -#define ATOMIC_OPS(op, asm_op) ATOMIC_OP(op, asm_op) ATOMIC_OP_RETURN(op, asm_op) +#define ATOMIC_OPS(op, asm_op) \ + ATOMIC_OP(op, asm_op) \ + ATOMIC_OP_RETURN_RELAXED(op, asm_op) ATOMIC_OPS(add, add) ATOMIC_OPS(sub, subf) @@ -71,8 +89,11 @@ ATOMIC_OP(and, and) ATOMIC_OP(or, or) ATOMIC_OP(xor, xor) +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return_relaxed atomic_sub_return_relaxed + #undef ATOMIC_OPS -#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP_RETURN_RELAXED #undef ATOMIC_OP #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) @@ -92,21 +113,19 @@ static __inline__ void atomic_inc(atomic_t *v) : "cc", "xer"); } -static __inline__ int atomic_inc_return(atomic_t *v) +static __inline__ int atomic_inc_return_relaxed(atomic_t *v) { int t; __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER -"1: lwarx %0,0,%1 # atomic_inc_return\n\ - addic %0,%0,1\n" - PPC405_ERR77(0,%1) -" stwcx. %0,0,%1 \n\ - bne- 1b" - PPC_ATOMIC_EXIT_BARRIER - : "=&r" (t) +"1: lwarx %0,0,%2 # atomic_inc_return_relaxed\n" +" addic %0,%0,1\n" + PPC405_ERR77(0, %2) +" stwcx. %0,0,%2\n" +" bne- 1b" + : "=&r" (t), "+m" (v->counter) : "r" (&v->counter) - : "cc", "xer", "memory"); + : "cc", "xer"); return t; } @@ -136,25 +155,26 @@ static __inline__ void atomic_dec(atomic_t *v) : "cc", "xer"); } -static __inline__ int atomic_dec_return(atomic_t *v) +static __inline__ int atomic_dec_return_relaxed(atomic_t *v) { int t; __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER -"1: lwarx %0,0,%1 # atomic_dec_return\n\ - addic %0,%0,-1\n" - PPC405_ERR77(0,%1) -" stwcx. %0,0,%1\n\ - bne- 1b" - PPC_ATOMIC_EXIT_BARRIER - : "=&r" (t) +"1: lwarx %0,0,%2 # atomic_dec_return_relaxed\n" +" addic %0,%0,-1\n" + PPC405_ERR77(0, %2) +" stwcx. %0,0,%2\n" +" bne- 1b" + : "=&r" (t), "+m" (v->counter) : "r" (&v->counter) - : "cc", "xer", "memory"); + : "cc", "xer"); return t; } +#define atomic_inc_return_relaxed atomic_inc_return_relaxed +#define atomic_dec_return_relaxed atomic_dec_return_relaxed + #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) @@ -285,26 +305,27 @@ static __inline__ void atomic64_##op(long a, atomic64_t *v) \ : "cc"); \ } -#define ATOMIC64_OP_RETURN(op, asm_op) \ -static __inline__ long atomic64_##op##_return(long a, atomic64_t *v) \ +#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \ +static inline long \ +atomic64_##op##_return_relaxed(long a, atomic64_t *v) \ { \ long t; \ \ __asm__ __volatile__( \ - PPC_ATOMIC_ENTRY_BARRIER \ -"1: ldarx %0,0,%2 # atomic64_" #op "_return\n" \ - #asm_op " %0,%1,%0\n" \ -" stdcx. %0,0,%2 \n" \ +"1: ldarx %0,0,%3 # atomic64_" #op "_return_relaxed\n" \ + #asm_op " %0,%2,%0\n" \ +" stdcx. %0,0,%3\n" \ " bne- 1b\n" \ - PPC_ATOMIC_EXIT_BARRIER \ - : "=&r" (t) \ + : "=&r" (t), "+m" (v->counter) \ : "r" (a), "r" (&v->counter) \ - : "cc", "memory"); \ + : "cc"); \ \ return t; \ } -#define ATOMIC64_OPS(op, asm_op) ATOMIC64_OP(op, asm_op) ATOMIC64_OP_RETURN(op, asm_op) +#define ATOMIC64_OPS(op, asm_op) \ + ATOMIC64_OP(op, asm_op) \ + ATOMIC64_OP_RETURN_RELAXED(op, asm_op) ATOMIC64_OPS(add, add) ATOMIC64_OPS(sub, subf) @@ -312,8 +333,11 @@ ATOMIC64_OP(and, and) ATOMIC64_OP(or, or) ATOMIC64_OP(xor, xor) -#undef ATOMIC64_OPS -#undef ATOMIC64_OP_RETURN +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed + +#undef ATOPIC64_OPS +#undef ATOMIC64_OP_RETURN_RELAXED #undef ATOMIC64_OP #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) @@ -332,20 +356,18 @@ static __inline__ void atomic64_inc(atomic64_t *v) : "cc", "xer"); } -static __inline__ long atomic64_inc_return(atomic64_t *v) +static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v) { long t; __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER -"1: ldarx %0,0,%1 # atomic64_inc_return\n\ - addic %0,%0,1\n\ - stdcx. %0,0,%1 \n\ - bne- 1b" - PPC_ATOMIC_EXIT_BARRIER - : "=&r" (t) +"1: ldarx %0,0,%2 # atomic64_inc_return_relaxed\n" +" addic %0,%0,1\n" +" stdcx. %0,0,%2\n" +" bne- 1b" + : "=&r" (t), "+m" (v->counter) : "r" (&v->counter) - : "cc", "xer", "memory"); + : "cc", "xer"); return t; } @@ -374,24 +396,25 @@ static __inline__ void atomic64_dec(atomic64_t *v) : "cc", "xer"); } -static __inline__ long atomic64_dec_return(atomic64_t *v) +static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v) { long t; __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER -"1: ldarx %0,0,%1 # atomic64_dec_return\n\ - addic %0,%0,-1\n\ - stdcx. %0,0,%1\n\ - bne- 1b" - PPC_ATOMIC_EXIT_BARRIER - : "=&r" (t) +"1: ldarx %0,0,%2 # atomic64_dec_return_relaxed\n" +" addic %0,%0,-1\n" +" stdcx. %0,0,%2\n" +" bne- 1b" + : "=&r" (t), "+m" (v->counter) : "r" (&v->counter) - : "cc", "xer", "memory"); + : "cc", "xer"); return t; } +#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed +#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed + #define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) -- cgit v1.2.3 From 26760fc19a7e663e4f49d586aca6740fb21d887d Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 15 Dec 2015 22:24:16 +0800 Subject: powerpc: atomic: Implement acquire/release/relaxed variants for xchg Implement xchg{,64}_relaxed and atomic{,64}_xchg_relaxed, based on these _relaxed variants, release/acquire variants and fully ordered versions can be built. Note that xchg{,64}_relaxed and atomic_{,64}_xchg_relaxed are not compiler barriers. Signed-off-by: Boqun Feng Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/atomic.h | 2 ++ arch/powerpc/include/asm/cmpxchg.h | 69 +++++++++++++++++--------------------- 2 files changed, 32 insertions(+), 39 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index a35c27709e05..a19fcdc318ee 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -177,6 +177,7 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v) #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) /** * __atomic_add_unless - add unless the number is a given value @@ -444,6 +445,7 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v) #define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) /** * atomic64_add_unless - add unless the number is a given value diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index d1a8d93cccfd..17c7e14b37ca 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -9,21 +9,20 @@ /* * Atomic exchange * - * Changes the memory location '*ptr' to be val and returns + * Changes the memory location '*p' to be val and returns * the previous value stored there. */ + static __always_inline unsigned long -__xchg_u32(volatile void *p, unsigned long val) +__xchg_u32_local(volatile void *p, unsigned long val) { unsigned long prev; __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stwcx. %3,0,%2 \n\ bne- 1b" - PPC_ATOMIC_EXIT_BARRIER : "=&r" (prev), "+m" (*(volatile unsigned int *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -31,42 +30,34 @@ __xchg_u32(volatile void *p, unsigned long val) return prev; } -/* - * Atomic exchange - * - * Changes the memory location '*ptr' to be val and returns - * the previous value stored there. - */ static __always_inline unsigned long -__xchg_u32_local(volatile void *p, unsigned long val) +__xchg_u32_relaxed(u32 *p, unsigned long val) { unsigned long prev; __asm__ __volatile__( -"1: lwarx %0,0,%2 \n" - PPC405_ERR77(0,%2) -" stwcx. %3,0,%2 \n\ - bne- 1b" - : "=&r" (prev), "+m" (*(volatile unsigned int *)p) +"1: lwarx %0,0,%2\n" + PPC405_ERR77(0, %2) +" stwcx. %3,0,%2\n" +" bne- 1b" + : "=&r" (prev), "+m" (*p) : "r" (p), "r" (val) - : "cc", "memory"); + : "cc"); return prev; } #ifdef CONFIG_PPC64 static __always_inline unsigned long -__xchg_u64(volatile void *p, unsigned long val) +__xchg_u64_local(volatile void *p, unsigned long val) { unsigned long prev; __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER "1: ldarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stdcx. %3,0,%2 \n\ bne- 1b" - PPC_ATOMIC_EXIT_BARRIER : "=&r" (prev), "+m" (*(volatile unsigned long *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -75,18 +66,18 @@ __xchg_u64(volatile void *p, unsigned long val) } static __always_inline unsigned long -__xchg_u64_local(volatile void *p, unsigned long val) +__xchg_u64_relaxed(u64 *p, unsigned long val) { unsigned long prev; __asm__ __volatile__( -"1: ldarx %0,0,%2 \n" - PPC405_ERR77(0,%2) -" stdcx. %3,0,%2 \n\ - bne- 1b" - : "=&r" (prev), "+m" (*(volatile unsigned long *)p) +"1: ldarx %0,0,%2\n" + PPC405_ERR77(0, %2) +" stdcx. %3,0,%2\n" +" bne- 1b" + : "=&r" (prev), "+m" (*p) : "r" (p), "r" (val) - : "cc", "memory"); + : "cc"); return prev; } @@ -99,14 +90,14 @@ __xchg_u64_local(volatile void *p, unsigned long val) extern void __xchg_called_with_bad_pointer(void); static __always_inline unsigned long -__xchg(volatile void *ptr, unsigned long x, unsigned int size) +__xchg_local(volatile void *ptr, unsigned long x, unsigned int size) { switch (size) { case 4: - return __xchg_u32(ptr, x); + return __xchg_u32_local(ptr, x); #ifdef CONFIG_PPC64 case 8: - return __xchg_u64(ptr, x); + return __xchg_u64_local(ptr, x); #endif } __xchg_called_with_bad_pointer(); @@ -114,25 +105,19 @@ __xchg(volatile void *ptr, unsigned long x, unsigned int size) } static __always_inline unsigned long -__xchg_local(volatile void *ptr, unsigned long x, unsigned int size) +__xchg_relaxed(void *ptr, unsigned long x, unsigned int size) { switch (size) { case 4: - return __xchg_u32_local(ptr, x); + return __xchg_u32_relaxed(ptr, x); #ifdef CONFIG_PPC64 case 8: - return __xchg_u64_local(ptr, x); + return __xchg_u64_relaxed(ptr, x); #endif } __xchg_called_with_bad_pointer(); return x; } -#define xchg(ptr,x) \ - ({ \ - __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ - }) - #define xchg_local(ptr,x) \ ({ \ __typeof__(*(ptr)) _x_ = (x); \ @@ -140,6 +125,12 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size) (unsigned long)_x_, sizeof(*(ptr))); \ }) +#define xchg_relaxed(ptr, x) \ +({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_relaxed((ptr), \ + (unsigned long)_x_, sizeof(*(ptr))); \ +}) /* * Compare and exchange - if *p == old, set it to new, * and return the old value of *p. -- cgit v1.2.3 From 56c08e6d226c860ad097fa6ba109133228c56722 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 15 Dec 2015 22:24:17 +0800 Subject: powerpc: atomic: Implement acquire/release/relaxed variants for cmpxchg Implement cmpxchg{,64}_relaxed and atomic{,64}_cmpxchg_relaxed, based on which _release variants can be built. To avoid superfluous barriers in _acquire variants, we implement these operations with assembly code rather use __atomic_op_acquire() to build them automatically. For the same reason, we keep the assembly implementation of fully ordered cmpxchg operations. However, we don't do the similar for _release, because that will require putting barriers in the middle of ll/sc loops, which is probably a bad idea. Note cmpxchg{,64}_relaxed and atomic{,64}_cmpxchg_relaxed are not compiler barriers. Signed-off-by: Boqun Feng Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/atomic.h | 10 +++ arch/powerpc/include/asm/cmpxchg.h | 149 ++++++++++++++++++++++++++++++++++++- 2 files changed, 158 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index a19fcdc318ee..ae0751ef8788 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -176,6 +176,11 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v) #define atomic_dec_return_relaxed atomic_dec_return_relaxed #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) +#define atomic_cmpxchg_relaxed(v, o, n) \ + cmpxchg_relaxed(&((v)->counter), (o), (n)) +#define atomic_cmpxchg_acquire(v, o, n) \ + cmpxchg_acquire(&((v)->counter), (o), (n)) + #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) @@ -444,6 +449,11 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v) } #define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_cmpxchg_relaxed(v, o, n) \ + cmpxchg_relaxed(&((v)->counter), (o), (n)) +#define atomic64_cmpxchg_acquire(v, o, n) \ + cmpxchg_acquire(&((v)->counter), (o), (n)) + #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) #define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index 17c7e14b37ca..cae4fa85250c 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -181,6 +181,56 @@ __cmpxchg_u32_local(volatile unsigned int *p, unsigned long old, return prev; } +static __always_inline unsigned long +__cmpxchg_u32_relaxed(u32 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lwarx %0,0,%2 # __cmpxchg_u32_relaxed\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" + PPC405_ERR77(0, %2) +" stwcx. %4,0,%2\n" +" bne- 1b\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc"); + + return prev; +} + +/* + * cmpxchg family don't have order guarantee if cmp part fails, therefore we + * can avoid superfluous barriers if we use assembly code to implement + * cmpxchg() and cmpxchg_acquire(), however we don't do the similar for + * cmpxchg_release() because that will result in putting a barrier in the + * middle of a ll/sc loop, which is probably a bad idea. For example, this + * might cause the conditional store more likely to fail. + */ +static __always_inline unsigned long +__cmpxchg_u32_acquire(u32 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lwarx %0,0,%2 # __cmpxchg_u32_acquire\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" + PPC405_ERR77(0, %2) +" stwcx. %4,0,%2\n" +" bne- 1b\n" + PPC_ACQUIRE_BARRIER + "\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + #ifdef CONFIG_PPC64 static __always_inline unsigned long __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) @@ -224,6 +274,46 @@ __cmpxchg_u64_local(volatile unsigned long *p, unsigned long old, return prev; } + +static __always_inline unsigned long +__cmpxchg_u64_relaxed(u64 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: ldarx %0,0,%2 # __cmpxchg_u64_relaxed\n" +" cmpd 0,%0,%3\n" +" bne- 2f\n" +" stdcx. %4,0,%2\n" +" bne- 1b\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u64_acquire(u64 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: ldarx %0,0,%2 # __cmpxchg_u64_acquire\n" +" cmpd 0,%0,%3\n" +" bne- 2f\n" +" stdcx. %4,0,%2\n" +" bne- 1b\n" + PPC_ACQUIRE_BARRIER + "\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} #endif /* This function doesn't exist, so you'll get a linker error @@ -262,6 +352,37 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, return old; } +static __always_inline unsigned long +__cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new, + unsigned int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32_relaxed(ptr, old, new); +#ifdef CONFIG_PPC64 + case 8: + return __cmpxchg_u64_relaxed(ptr, old, new); +#endif + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +static __always_inline unsigned long +__cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new, + unsigned int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32_acquire(ptr, old, new); +#ifdef CONFIG_PPC64 + case 8: + return __cmpxchg_u64_acquire(ptr, old, new); +#endif + } + __cmpxchg_called_with_bad_pointer(); + return old; +} #define cmpxchg(ptr, o, n) \ ({ \ __typeof__(*(ptr)) _o_ = (o); \ @@ -279,6 +400,23 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, (unsigned long)_n_, sizeof(*(ptr))); \ }) +#define cmpxchg_relaxed(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \ + (unsigned long)_o_, (unsigned long)_n_, \ + sizeof(*(ptr))); \ +}) + +#define cmpxchg_acquire(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \ + (unsigned long)_o_, (unsigned long)_n_, \ + sizeof(*(ptr))); \ +}) #ifdef CONFIG_PPC64 #define cmpxchg64(ptr, o, n) \ ({ \ @@ -290,7 +428,16 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg_local((ptr), (o), (n)); \ }) -#define cmpxchg64_relaxed cmpxchg64_local +#define cmpxchg64_relaxed(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg_relaxed((ptr), (o), (n)); \ +}) +#define cmpxchg64_acquire(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg_acquire((ptr), (o), (n)); \ +}) #else #include #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) -- cgit v1.2.3 From ce5732a28dd09956540f61ba9d902b63e73a1232 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 19 Feb 2016 11:16:22 +1100 Subject: powerpc/powernv: Create separate subcores CPU feature bit Subcores isn't really part of the 2.07 architecture but currently we turn it on using the 2.07 feature bit. Subcores is really a POWER8 specific feature. This adds a new CPU_FTR bit just for subcores and moves the subcore init code over to use this. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cputable.h | 3 ++- arch/powerpc/platforms/powernv/subcore.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index b118072670fb..a47e175992b0 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -196,6 +196,7 @@ enum { #define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) +#define CPU_FTR_SUBCORE LONG_ASM_CONST(0x2000000000000000) #ifndef __ASSEMBLY__ @@ -443,7 +444,7 @@ enum { CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP) + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_SUBCORE) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 503a73f59359..0babef11136f 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -407,7 +407,7 @@ static DEVICE_ATTR(subcores_per_core, 0644, static int subcore_init(void) { - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + if (!cpu_has_feature(CPU_FTR_SUBCORE)) return 0; /* -- cgit v1.2.3 From c3ab300ea55541014348561e7690c41c79966ac6 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 19 Feb 2016 11:16:24 +1100 Subject: powerpc: Add POWER9 cputable entry Add a cputable entry for POWER9. More code is required to actually boot and run on a POWER9 but this gets the base piece in which we can start building on. Copies over from POWER8 except for: - Adds a new CPU_FTR_ARCH_300 bit to start hanging new architecture features from (in subsequent patches). - Advertises new user features bits PPC_FEATURE2_ARCH_3_00 & HAS_IEEE128 when on POWER9. - Drops CPU_FTR_SUBCORE. - Drops PMU code and machine check. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cputable.h | 17 +++++++++++--- arch/powerpc/include/asm/mmu-hash64.h | 1 + arch/powerpc/include/asm/mmu.h | 1 + arch/powerpc/kernel/cpu_setup_power.S | 44 +++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/cputable.c | 27 +++++++++++++++++++++ arch/powerpc/kernel/mce_power.c | 17 +++++++------- 6 files changed, 95 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index a47e175992b0..94ace9b4c4e1 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -171,7 +171,7 @@ enum { #define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000200000000) #define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000400000000) #define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000800000000) -/* Free LONG_ASM_CONST(0x0000001000000000) */ +#define CPU_FTR_ARCH_300 LONG_ASM_CONST(0x0000001000000000) #define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000002000000000) #define CPU_FTR_CTRL LONG_ASM_CONST(0x0000004000000000) #define CPU_FTR_SMT LONG_ASM_CONST(0x0000008000000000) @@ -447,6 +447,16 @@ enum { CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_SUBCORE) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL) +#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ + CPU_FTR_MMCRA | CPU_FTR_SMT | \ + CPU_FTR_COHERENT_ICACHE | \ + CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ + CPU_FTR_DSCR | CPU_FTR_SAO | \ + CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ + CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ + CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -465,7 +475,7 @@ enum { (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \ - CPU_FTRS_PA6T | CPU_FTR_VSX) + CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9) #endif #else enum { @@ -516,7 +526,8 @@ enum { (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \ CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \ - CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE) + CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & \ + CPU_FTRS_POWER9) #endif #else enum { diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 7352d3f212df..e36dc90c80d0 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -114,6 +114,7 @@ #define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */ #define POWER8_TLB_SETS 512 /* # sets in POWER8 TLB */ +#define POWER9_TLB_SETS_HASH 256 /* # sets in POWER9 TLB Hash mode */ #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 3d5abfe6ba67..54d46504733d 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -97,6 +97,7 @@ #define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE #define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE #define MMU_FTRS_POWER8 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE +#define MMU_FTRS_POWER9 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE #define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ MMU_FTR_CI_LARGE_PAGE #define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index cb3e272a659b..5932219f1e79 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -84,6 +84,39 @@ _GLOBAL(__restore_cpu_power8) mtlr r11 blr +_GLOBAL(__setup_cpu_power9) + mflr r11 + bl __init_FSCR + bl __init_hvmode_206 + mtlr r11 + beqlr + li r0,0 + mtspr SPRN_LPID,r0 + mfspr r3,SPRN_LPCR + ori r3, r3, LPCR_PECEDH + bl __init_LPCR + bl __init_HFSCR + bl __init_tlb_power9 + mtlr r11 + blr + +_GLOBAL(__restore_cpu_power9) + mflr r11 + bl __init_FSCR + mfmsr r3 + rldicl. r0,r3,4,63 + mtlr r11 + beqlr + li r0,0 + mtspr SPRN_LPID,r0 + mfspr r3,SPRN_LPCR + ori r3, r3, LPCR_PECEDH + bl __init_LPCR + bl __init_HFSCR + bl __init_tlb_power9 + mtlr r11 + blr + __init_hvmode_206: /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */ mfmsr r3 @@ -161,6 +194,17 @@ __init_tlb_power8: ptesync 1: blr +__init_tlb_power9: + li r6,POWER9_TLB_SETS_HASH + mtctr r6 + li r7,0xc00 /* IS field = 0b11 */ + ptesync +2: tlbiel r7 + addi r7,r7,0x1000 + bdnz 2b + ptesync +1: blr + __init_PMU_HV: li r5,0 mtspr SPRN_MMCRC,r5 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 7d80bfdfb15e..be4d73053bed 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -70,9 +70,12 @@ extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power7(void); extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power8(void); +extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); +extern void __restore_cpu_power9(void); extern void __restore_cpu_a2(void); extern void __flush_tlb_power7(unsigned int action); extern void __flush_tlb_power8(unsigned int action); +extern void __flush_tlb_power9(unsigned int action); extern long __machine_check_early_realmode_p7(struct pt_regs *regs); extern long __machine_check_early_realmode_p8(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ @@ -116,6 +119,11 @@ extern void __restore_cpu_e6500(void); #define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ PPC_FEATURE_TRUE_LE | \ PPC_FEATURE_HAS_ALTIVEC_COMP) +#define COMMON_USER_POWER9 COMMON_USER_POWER8 +#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \ + PPC_FEATURE2_ARCH_3_00 | \ + PPC_FEATURE2_HAS_IEEE128) + #ifdef CONFIG_PPC_BOOK3E_64 #define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE) #else @@ -499,6 +507,25 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* Power9 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004e0000, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power9", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .flush_tlb = __flush_tlb_power9, + .platform = "power9", + }, { /* Cell Broadband Engine */ .pvr_mask = 0xffff0000, .pvr_value = 0x00700000, diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 2c647b1e62e4..ee62b197502d 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -54,8 +54,8 @@ static void flush_tlb_206(unsigned int num_sets, unsigned int action) } /* - * Generic routine to flush TLB on power7. This routine is used as - * flush_tlb hook in cpu_spec for Power7 processor. + * Generic routines to flush TLB on POWER processors. These routines + * are used as flush_tlb hook in the cpu_spec. * * action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs. * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID. @@ -65,18 +65,17 @@ void __flush_tlb_power7(unsigned int action) flush_tlb_206(POWER7_TLB_SETS, action); } -/* - * Generic routine to flush TLB on power8. This routine is used as - * flush_tlb hook in cpu_spec for power8 processor. - * - * action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs. - * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID. - */ void __flush_tlb_power8(unsigned int action) { flush_tlb_206(POWER8_TLB_SETS, action); } +void __flush_tlb_power9(unsigned int action) +{ + flush_tlb_206(POWER9_TLB_SETS_HASH, action); +} + + /* flush SLBs and reload */ static void flush_and_reload_slb(void) { -- cgit v1.2.3 From 10d8b1480e6966ba03cd3afad3b5d6eb5e341fae Mon Sep 17 00:00:00 2001 From: pan xinhui Date: Tue, 23 Feb 2016 19:05:01 +0800 Subject: powerpc: Use BUILD_BUG_ON_MSG() for unsupported {cmp}xchg sizes __xchg_called_with_bad_pointer() can't tell us which code uses {cmp}xchg with an unsupported size, and no error is reported until the link stage. To make such problems easier to debug, use BUILD_BUG_ON_MSG() instead. Signed-off-by: pan xinhui [mpe: Tweak change log wording & add relaxed/acquire] Signed-off-by: Michael Ellerman fixup --- arch/powerpc/include/asm/cmpxchg.h | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index cae4fa85250c..44efe739b6b9 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -5,6 +5,7 @@ #include #include #include +#include /* * Atomic exchange @@ -83,12 +84,6 @@ __xchg_u64_relaxed(u64 *p, unsigned long val) } #endif -/* - * This function doesn't exist, so you'll get a linker error - * if something tries to do an invalid xchg(). - */ -extern void __xchg_called_with_bad_pointer(void); - static __always_inline unsigned long __xchg_local(volatile void *ptr, unsigned long x, unsigned int size) { @@ -100,7 +95,7 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size) return __xchg_u64_local(ptr, x); #endif } - __xchg_called_with_bad_pointer(); + BUILD_BUG_ON_MSG(1, "Unsupported size for __xchg"); return x; } @@ -115,7 +110,7 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) return __xchg_u64_relaxed(ptr, x); #endif } - __xchg_called_with_bad_pointer(); + BUILD_BUG_ON_MSG(1, "Unsupported size for __xchg_local"); return x; } #define xchg_local(ptr,x) \ @@ -316,10 +311,6 @@ __cmpxchg_u64_acquire(u64 *p, unsigned long old, unsigned long new) } #endif -/* This function doesn't exist, so you'll get a linker error - if something tries to do an invalid cmpxchg(). */ -extern void __cmpxchg_called_with_bad_pointer(void); - static __always_inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size) @@ -332,7 +323,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, return __cmpxchg_u64(ptr, old, new); #endif } - __cmpxchg_called_with_bad_pointer(); + BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg"); return old; } @@ -348,7 +339,7 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, return __cmpxchg_u64_local(ptr, old, new); #endif } - __cmpxchg_called_with_bad_pointer(); + BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_local"); return old; } @@ -364,7 +355,7 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new, return __cmpxchg_u64_relaxed(ptr, old, new); #endif } - __cmpxchg_called_with_bad_pointer(); + BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_relaxed"); return old; } @@ -380,7 +371,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new, return __cmpxchg_u64_acquire(ptr, old, new); #endif } - __cmpxchg_called_with_bad_pointer(); + BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire"); return old; } #define cmpxchg(ptr, o, n) \ -- cgit v1.2.3 From 1ec3f937102154b54c3ba97b79d4bd3931bb0eaa Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 22 Feb 2016 13:41:12 +1100 Subject: powerpc/mm/book3s-64: Clean up some obsolete or misleading comments No code changes. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash.h | 13 ++++++------- arch/powerpc/mm/hash64_64k.c | 3 +-- arch/powerpc/mm/hash_utils_64.c | 10 +++++----- 3 files changed, 12 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 8d1c8162f0c1..9a0a4ef53b6d 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -4,8 +4,7 @@ /* * Common bits between 4K and 64K pages in a linux-style PTE. - * These match the bits in the (hardware-defined) PowerPC PTE as closely - * as possible. Additional bits may be defined in pgtable-hash64-*.h + * Additional bits may be defined in pgtable-hash64-*.h * * Note: We only support user read/write permissions. Supervisor always * have full read/write to pages above PAGE_OFFSET (pages below that @@ -14,13 +13,13 @@ * We could create separate kernel read-only if we used the 3 PP bits * combinations that newer processors provide but we currently don't. */ -#define _PAGE_PTE 0x00001 +#define _PAGE_PTE 0x00001 /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT 0x00002 /* software: pte contains a translation */ #define _PAGE_BIT_SWAP_TYPE 2 -#define _PAGE_USER 0x00004 /* matches one of the PP bits */ -#define _PAGE_EXEC 0x00008 /* No execute on POWER4 and newer (we invert) */ -#define _PAGE_GUARDED 0x00010 -/* We can derive Memory coherence from _PAGE_NO_CACHE */ +#define _PAGE_USER 0x00004 /* page may be accessed by userspace */ +#define _PAGE_EXEC 0x00008 /* execute permission */ +#define _PAGE_GUARDED 0x00010 /* G: guarded (side-effect) page */ +/* M (memory coherence) is always set in the HPTE, so we don't need it here */ #define _PAGE_COHERENT 0x0 #define _PAGE_NO_CACHE 0x00020 /* I: cache inhibit */ #define _PAGE_WRITETHRU 0x00040 /* W: cache write-through */ diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index edb09912f0c9..ef6fac6d773c 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -249,8 +249,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, return 0; /* * Try to lock the PTE, add ACCESSED and DIRTY if it was - * a write access. Since this is 4K insert of 64K page size - * also add _PAGE_COMBO + * a write access. */ new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; if (access & _PAGE_RW) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index ba59d5977f34..47a0bc12cb65 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -168,11 +168,11 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) rflags |= HPTE_R_N; /* * PP bits: - * Linux use slb key 0 for kernel and 1 for user. - * kernel areas are mapped by PP bits 00 - * and and there is no kernel RO (_PAGE_KERNEL_RO). - * User area mapped by 0x2 and read only use by - * 0x3. + * Linux uses slb key 0 for kernel and 1 for user. + * kernel areas are mapped with PP=00 + * and there is no kernel RO (_PAGE_KERNEL_RO). + * User area is mapped with PP=0x2 for read/write + * or PP=0x3 for read-only (including writeable but clean pages). */ if (pteflags & _PAGE_USER) { rflags |= 0x2; -- cgit v1.2.3 From f1a9ae034a263d93ea408825afe699aebf37010a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 22 Feb 2016 13:41:13 +1100 Subject: powerpc/mm/book3s-64: Free up 7 high-order bits in the Linux PTE This frees up bits 57-63 in the Linux PTE on 64-bit Book 3S machines. In the 4k page case, this is done just by reducing the size of the RPN field to 39 bits, giving 51-bit real addresses. In the 64k page case, we had 10 unused bits in the middle of the PTE, so this moves the RPN field down 10 bits to make use of those unused bits. This means the RPN field is now 3 bits larger at 37 bits, giving 53-bit real addresses in the normal case, or 49-bit real addresses for the special 4k PFN case. We are doing this in order to be able to move some other PTE bits into the positions where PowerISA V3.0 processors will expect to find them in radix-tree mode. Ultimately we will be able to move the RPN field to lower bit positions and make it larger. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 1 + arch/powerpc/include/asm/book3s/64/hash-64k.h | 10 ++++++---- arch/powerpc/include/asm/book3s/64/hash.h | 6 +++--- arch/powerpc/include/asm/book3s/64/pgtable.h | 6 +++--- arch/powerpc/mm/pgtable_64.c | 2 +- 5 files changed, 14 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index ea0414d6659e..bee3643fa097 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -53,6 +53,7 @@ /* shift to put page number into pte */ #define PTE_RPN_SHIFT (18) +#define PTE_RPN_SIZE (39) /* gives 51-bit real addresses */ #define _PAGE_4K_PFN 0 #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 849bbec80f7b..a8c4c2a1940b 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -39,10 +39,12 @@ /* Shift to put page number into pte. * - * That gives us a max RPN of 34 bits, which means a max of 50 bits - * of addressable physical space, or 46 bits for the special 4k PFNs. + * That gives us a max RPN of 37 bits, which means a max of 53 bits + * of addressable physical space, or 49 bits for the special 4k PFNs. */ -#define PTE_RPN_SHIFT (30) +#define PTE_RPN_SHIFT (20) +#define PTE_RPN_SIZE (37) + /* * we support 16 fragments per PTE page of 64K size. */ @@ -120,7 +122,7 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); (((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) #define remap_4k_pfn(vma, addr, pfn, prot) \ - (WARN_ON(((pfn) >= (1UL << (64 - PTE_RPN_SHIFT)))) ? -EINVAL : \ + (WARN_ON(((pfn) >= (1UL << PTE_RPN_SIZE))) ? -EINVAL : \ remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, \ __pgprot(pgprot_val((prot)) | _PAGE_4K_PFN))) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 9a0a4ef53b6d..64eff409b027 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -131,7 +131,7 @@ * The mask convered by the RPN must be a ULL on 32-bit platforms with * 64-bit PTEs */ -#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) +#define PTE_RPN_MASK (((1UL << PTE_RPN_SIZE) - 1) << PTE_RPN_SHIFT) /* * _PAGE_CHG_MASK masks of bits that are to be preserved across * pgprot changes @@ -412,13 +412,13 @@ static inline int pte_present(pte_t pte) */ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) { - return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) | + return __pte((((pte_basic_t)(pfn) << PTE_RPN_SHIFT) & PTE_RPN_MASK) | pgprot_val(pgprot)); } static inline unsigned long pte_pfn(pte_t pte) { - return pte_val(pte) >> PTE_RPN_SHIFT; + return (pte_val(pte) & PTE_RPN_MASK) >> PTE_RPN_SHIFT; } /* Generic modifiers for PTE bits */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index ac07a30a7934..c8240b737d11 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -154,10 +154,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) #define SWP_TYPE_BITS 5 #define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \ & ((1UL << SWP_TYPE_BITS) - 1)) -#define __swp_offset(x) ((x).val >> PTE_RPN_SHIFT) +#define __swp_offset(x) (((x).val & PTE_RPN_MASK) >> PTE_RPN_SHIFT) #define __swp_entry(type, offset) ((swp_entry_t) { \ - ((type) << _PAGE_BIT_SWAP_TYPE) \ - | ((offset) << PTE_RPN_SHIFT) }) + ((type) << _PAGE_BIT_SWAP_TYPE) \ + | (((offset) << PTE_RPN_SHIFT) & PTE_RPN_MASK)}) /* * swp_entry_t must be independent of pte bits. We build a swp_entry_t from * swap type and offset we get from swap and convert that to pte to find a diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index cdf2123d46db..a1bbdfd88630 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -749,7 +749,7 @@ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot) { unsigned long pmdv; - pmdv = pfn << PTE_RPN_SHIFT; + pmdv = (pfn << PTE_RPN_SHIFT) & PTE_RPN_MASK; return pmd_set_protbits(__pmd(pmdv), pgprot); } -- cgit v1.2.3 From c61a8843124e353f4ba27c073133868da00e0335 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 23 Feb 2016 13:36:17 +1100 Subject: powerpc/mm/book3s-64: Use physical addresses in upper page table tree levels This changes the Linux page tables to store physical addresses rather than kernel virtual addresses in the upper levels of the tree (pgd, pud and pmd) for 64-bit Book 3S machines. This also changes the hugepd pointers used to implement hugepages when the base page size is 4k to store physical addresses rather than virtual addresses (again just for 64-bit Book3S machines). This frees up some high order bits, and will be needed with PowerISA v3.0 machines which read the page table tree in hardware in radix mode. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 2 +- arch/powerpc/include/asm/book3s/64/hash.h | 13 +++++++------ arch/powerpc/include/asm/hugetlb.h | 2 +- arch/powerpc/include/asm/nohash/64/pgtable.h | 3 +++ arch/powerpc/include/asm/page.h | 7 +++++++ arch/powerpc/include/asm/pgalloc-64.h | 16 ++++++++-------- arch/powerpc/mm/hugetlbpage.c | 3 +-- 7 files changed, 28 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index bee3643fa097..0425d3e1b7ae 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -64,7 +64,7 @@ #define pgd_none(pgd) (!pgd_val(pgd)) #define pgd_bad(pgd) (pgd_val(pgd) == 0) #define pgd_present(pgd) (pgd_val(pgd) != 0) -#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) +#define pgd_page_vaddr(pgd) __va(pgd_val(pgd) & ~PGD_MASKED_BITS) static inline void pgd_clear(pgd_t *pgdp) { diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 64eff409b027..5b8ba60032e2 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -222,13 +222,14 @@ #define PUD_BAD_BITS (PMD_TABLE_SIZE-1) #ifndef __ASSEMBLY__ -#define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \ - || (pmd_val(pmd) & PMD_BAD_BITS)) -#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) +#define pmd_bad(pmd) (pmd_val(pmd) & PMD_BAD_BITS) +#define pmd_page_vaddr(pmd) __va(pmd_val(pmd) & ~PMD_MASKED_BITS) -#define pud_bad(pud) (!is_kernel_addr(pud_val(pud)) \ - || (pud_val(pud) & PUD_BAD_BITS)) -#define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS) +#define pud_bad(pud) (pud_val(pud) & PUD_BAD_BITS) +#define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS) + +/* Pointers in the page table tree are physical addresses */ +#define __pgtable_ptr_val(ptr) __pa(ptr) #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) #define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 7eac89b9f02e..42814f0567cc 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -19,7 +19,7 @@ static inline pte_t *hugepd_page(hugepd_t hpd) * We have only four bits to encode, MMU page size */ BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf); - return (pte_t *)(hpd.pd & ~HUGEPD_SHIFT_MASK); + return __va(hpd.pd & HUGEPD_ADDR_MASK); } static inline unsigned int hugepd_mmu_psize(hugepd_t hpd) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index b9f734dd5b81..10debb93c4a4 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -108,6 +108,9 @@ #ifndef __ASSEMBLY__ /* pte_clear moved to later in this file */ +/* Pointers in the page table tree are virtual addresses */ +#define __pgtable_ptr_val(ptr) ((unsigned long)(ptr)) + #define PMD_BAD_BITS (PTE_TABLE_SIZE-1) #define PUD_BAD_BITS (PMD_TABLE_SIZE-1) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index e34124f6fbf2..af7a3422a3ef 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -271,6 +271,13 @@ extern long long virt_phys_offset; #else #define PD_HUGE 0x80000000 #endif + +#else /* CONFIG_PPC_BOOK3S_64 */ +/* + * Book3S 64 stores real addresses in the hugepd entries to + * avoid overlaps with _PAGE_PRESENT and _PAGE_PTE. + */ +#define HUGEPD_ADDR_MASK (0x0ffffffffffffffful & ~HUGEPD_SHIFT_MASK) #endif /* CONFIG_PPC_BOOK3S_64 */ /* diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h index 69ef28a81733..7ac59a32c66a 100644 --- a/arch/powerpc/include/asm/pgalloc-64.h +++ b/arch/powerpc/include/asm/pgalloc-64.h @@ -53,7 +53,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) #ifndef CONFIG_PPC_64K_PAGES -#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD) +#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, __pgtable_ptr_val(PUD)) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -68,19 +68,19 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pud_set(pud, (unsigned long)pmd); + pud_set(pud, __pgtable_ptr_val(pmd)); } static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { - pmd_set(pmd, (unsigned long)pte); + pmd_set(pmd, __pgtable_ptr_val(pte)); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page) { - pmd_set(pmd, (unsigned long)page_address(pte_page)); + pmd_set(pmd, __pgtable_ptr_val(page_address(pte_page))); } #define pmd_pgtable(pmd) pmd_page(pmd) @@ -171,23 +171,23 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); extern void __tlb_remove_table(void *_table); #endif -#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd) +#define pud_populate(mm, pud, pmd) pud_set(pud, __pgtable_ptr_val(pmd)) static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { - pmd_set(pmd, (unsigned long)pte); + pmd_set(pmd, __pgtable_ptr_val(pte)); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page) { - pmd_set(pmd, (unsigned long)pte_page); + pmd_set(pmd, __pgtable_ptr_val(pte_page)); } static inline pgtable_t pmd_pgtable(pmd_t pmd) { - return (pgtable_t)(pmd_val(pmd) & ~PMD_MASKED_BITS); + return (pgtable_t)pmd_page_vaddr(pmd); } static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 744e24bcb85c..6dd272b6196f 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -107,8 +107,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, kmem_cache_free(cachep, new); else { #ifdef CONFIG_PPC_BOOK3S_64 - hpdp->pd = (unsigned long)new | - (shift_to_mmu_psize(pshift) << 2); + hpdp->pd = __pa(new) | (shift_to_mmu_psize(pshift) << 2); #else hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; #endif -- cgit v1.2.3 From 849f86a630e9c84bf4c9d5dcbfe59dc94b2e15ce Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 22 Feb 2016 13:41:15 +1100 Subject: powerpc/mm/book3s-64: Move _PAGE_PRESENT to the most significant bit This changes _PAGE_PRESENT for 64-bit Book 3S processors from 0x2 to 0x8000_0000_0000_0000, because that is where PowerISA v3.0 CPUs in radix mode will expect to find it. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-64k.h | 10 +++++----- arch/powerpc/include/asm/book3s/64/hash.h | 5 +++-- arch/powerpc/mm/mmu_decl.h | 3 ++- arch/powerpc/mm/pgtable_64.c | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index a8c4c2a1940b..ed390e1915b7 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -210,30 +210,30 @@ static inline char *get_hpte_slot_array(pmd_t *pmdp) /* * The linux hugepage PMD now include the pmd entries followed by the address * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits. - * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per + * [ 000 | 1 bit secondary | 3 bit hidx | 1 bit valid]. We use one byte per * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t. * - * The last three bits are intentionally left to zero. This memory location + * The top three bits are intentionally left as zero. This memory location * are also used as normal page PTE pointers. So if we have any pointers * left around while we collapse a hugepage, we need to make sure * _PAGE_PRESENT bit of that is zero when we look at them */ static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) { - return (hpte_slot_array[index] >> 3) & 0x1; + return hpte_slot_array[index] & 0x1; } static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array, int index) { - return hpte_slot_array[index] >> 4; + return hpte_slot_array[index] >> 1; } static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, unsigned int index, unsigned int hidx) { - hpte_slot_array[index] = hidx << 4 | 0x1 << 3; + hpte_slot_array[index] = (hidx << 1) | 0x1; } /* diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 5b8ba60032e2..36ff107b9469 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -14,7 +14,6 @@ * combinations that newer processors provide but we currently don't. */ #define _PAGE_PTE 0x00001 /* distinguishes PTEs from pointers */ -#define _PAGE_PRESENT 0x00002 /* software: pte contains a translation */ #define _PAGE_BIT_SWAP_TYPE 2 #define _PAGE_USER 0x00004 /* page may be accessed by userspace */ #define _PAGE_EXEC 0x00008 /* execute permission */ @@ -39,6 +38,8 @@ #define _PAGE_SOFT_DIRTY 0x00000 #endif +#define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */ + /* * We need to differentiate between explicit huge page and THP huge * page, since THP huge page also need to track real subpage details @@ -402,7 +403,7 @@ static inline int pte_protnone(pte_t pte) static inline int pte_present(pte_t pte) { - return pte_val(pte) & _PAGE_PRESENT; + return !!(pte_val(pte) & _PAGE_PRESENT); } /* Conversion functions: convert a page and protection to a page entry, diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 9f58ff44a075..898d63365cdd 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -110,7 +110,8 @@ extern unsigned long Hash_size, Hash_mask; #endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC64 -extern int map_kernel_page(unsigned long ea, unsigned long pa, int flags); +extern int map_kernel_page(unsigned long ea, unsigned long pa, + unsigned long flags); #endif /* CONFIG_PPC64 */ extern unsigned long ioremap_bot; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index a1bbdfd88630..af304e6d5a89 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -88,7 +88,7 @@ static __ref void *early_alloc_pgtable(unsigned long size) * map_kernel_page adds an entry to the ioremap page table * and adds an entry to the HPT, possibly bolting it */ -int map_kernel_page(unsigned long ea, unsigned long pa, int flags) +int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags) { pgd_t *pgdp; pud_t *pudp; -- cgit v1.2.3 From 84c957560a7a8f548aaf6b6b2b15e6d03b7249e2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 22 Feb 2016 13:41:16 +1100 Subject: powerpc/mm/book3s-64: Move _PAGE_PTE to 2nd most significant bit This changes _PAGE_PTE for 64-bit Book 3S processors from 0x1 to 0x4000_0000_0000_0000, because that bit is used as the L (leaf) bit by PowerISA v3.0 CPUs in radix mode. The "leaf" bit indicates that the PTE points to a page directly rather than another radix level, which is what the _PAGE_PTE bit means. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 36ff107b9469..14cfd49b2ab0 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -13,7 +13,6 @@ * We could create separate kernel read-only if we used the 3 PP bits * combinations that newer processors provide but we currently don't. */ -#define _PAGE_PTE 0x00001 /* distinguishes PTEs from pointers */ #define _PAGE_BIT_SWAP_TYPE 2 #define _PAGE_USER 0x00004 /* page may be accessed by userspace */ #define _PAGE_EXEC 0x00008 /* execute permission */ @@ -38,6 +37,7 @@ #define _PAGE_SOFT_DIRTY 0x00000 #endif +#define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */ /* -- cgit v1.2.3 From a9d4996df1f49dc7b45e55060436cace87f1e0d4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 22 Feb 2016 13:41:17 +1100 Subject: powerpc/mm/book3s-64: Move HPTE-related bits in PTE to upper end This moves the _PAGE_HASHPTE, _PAGE_F_GIX and _PAGE_F_SECOND fields in the Linux PTE on 64-bit Book 3S systems to the most significant byte. Of the 5 bits, one is a software-use bit and the other four are reserved bit positions in the PowerISA v3.0 radix PTE format. Using these bits is OK because these bits are all to do with tracking the HPTE(s) associated with the Linux PTE, and therefore won't be needed in radix mode. This frees up bit positions in the lower two bytes. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash.h | 8 ++++---- arch/powerpc/mm/hugetlbpage-hash64.c | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 14cfd49b2ab0..a59cfae7b9c2 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -24,11 +24,7 @@ #define _PAGE_DIRTY 0x00080 /* C: page changed */ #define _PAGE_ACCESSED 0x00100 /* R: page referenced */ #define _PAGE_RW 0x00200 /* software: user write access allowed */ -#define _PAGE_HASHPTE 0x00400 /* software: pte has an associated HPTE */ #define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */ -#define _PAGE_F_GIX 0x07000 /* full page: hidx bits */ -#define _PAGE_F_GIX_SHIFT 12 -#define _PAGE_F_SECOND 0x08000 /* Whether to use secondary hash or not */ #define _PAGE_SPECIAL 0x10000 /* software: special page */ #ifdef CONFIG_MEM_SOFT_DIRTY @@ -37,6 +33,10 @@ #define _PAGE_SOFT_DIRTY 0x00000 #endif +#define _PAGE_F_GIX_SHIFT 57 +#define _PAGE_F_GIX (7ul << 57) /* HPTE index within HPTEG */ +#define _PAGE_F_SECOND (1ul << 60) /* HPTE is in 2ndary HPTEG */ +#define _PAGE_HASHPTE (1ul << 61) /* PTE has associated HPTE */ #define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */ diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index e2138c7ae70f..8555fce902fe 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -76,7 +76,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, if (old_pte & _PAGE_F_SECOND) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (old_pte & _PAGE_F_GIX) >> 12; + slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT; if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, mmu_psize, ssize, flags) == -1) @@ -105,7 +105,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, return -1; } - new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX); + new_pte |= (slot << _PAGE_F_GIX_SHIFT) & + (_PAGE_F_SECOND | _PAGE_F_GIX); } /* -- cgit v1.2.3 From c915df162d09f0957baf56fd3e2f03b8992699a6 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 22 Feb 2016 13:41:18 +1100 Subject: powerpc/mm/book3s-64: Shuffle read, write, execute and user bits in PTE This moves the _PAGE_EXEC, _PAGE_RW and _PAGE_USER bits around in the Linux PTE on 64-bit Book 3S systems to correspond with the bit positions used in radix mode by PowerISA v3.0 CPUs. This also adds a _PAGE_READ bit corresponding to the read permission bit in the radix PTE. _PAGE_READ is currently unused but could possibly be used in future to improve pte_protnone(). Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index a59cfae7b9c2..cd4bf957af5a 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -13,9 +13,12 @@ * We could create separate kernel read-only if we used the 3 PP bits * combinations that newer processors provide but we currently don't. */ -#define _PAGE_BIT_SWAP_TYPE 2 -#define _PAGE_USER 0x00004 /* page may be accessed by userspace */ -#define _PAGE_EXEC 0x00008 /* execute permission */ +#define _PAGE_BIT_SWAP_TYPE 0 + +#define _PAGE_EXEC 0x00001 /* execute permission */ +#define _PAGE_RW 0x00002 /* read & write access allowed */ +#define _PAGE_READ 0x00004 /* read access allowed */ +#define _PAGE_USER 0x00008 /* page may be accessed by userspace */ #define _PAGE_GUARDED 0x00010 /* G: guarded (side-effect) page */ /* M (memory coherence) is always set in the HPTE, so we don't need it here */ #define _PAGE_COHERENT 0x0 @@ -23,7 +26,6 @@ #define _PAGE_WRITETHRU 0x00040 /* W: cache write-through */ #define _PAGE_DIRTY 0x00080 /* C: page changed */ #define _PAGE_ACCESSED 0x00100 /* R: page referenced */ -#define _PAGE_RW 0x00200 /* software: user write access allowed */ #define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */ #define _PAGE_SPECIAL 0x10000 /* software: special page */ -- cgit v1.2.3 From e726202f06cbfda4e42d2b32138cf288445ec80d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 22 Feb 2016 13:41:19 +1100 Subject: powerpc/mm/book3s-64: Move software-used bits in PTE This moves the _PAGE_SPECIAL and _PAGE_SOFT_DIRTY bits in the Linux PTE on 64-bit Book 3S systems to bit positions which are designated for software use in the radix PTE format used by PowerISA v3.0 CPUs in radix mode. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index cd4bf957af5a..ef9bd68f7e6d 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -26,13 +26,13 @@ #define _PAGE_WRITETHRU 0x00040 /* W: cache write-through */ #define _PAGE_DIRTY 0x00080 /* C: page changed */ #define _PAGE_ACCESSED 0x00100 /* R: page referenced */ +#define _PAGE_SPECIAL 0x00400 /* software: special page */ #define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */ -#define _PAGE_SPECIAL 0x10000 /* software: special page */ #ifdef CONFIG_MEM_SOFT_DIRTY -#define _PAGE_SOFT_DIRTY 0x20000 /* software: software dirty tracking */ +#define _PAGE_SOFT_DIRTY 0x200 /* software: software dirty tracking */ #else -#define _PAGE_SOFT_DIRTY 0x00000 +#define _PAGE_SOFT_DIRTY 0x000 #endif #define _PAGE_F_GIX_SHIFT 57 -- cgit v1.2.3 From 8daf51f55febbc755fe20e74320a194e175811a9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 22 Feb 2016 13:41:20 +1100 Subject: powerpc/mm/book3s-64: Expand the real page number field of the Linux PTE Now that other PTE fields have been moved out of the way, we can expand the RPN field of the PTE on 64-bit Book 3S systems and align it with the RPN field in the radix PTE format used by PowerISA v3.0 CPUs in radix mode. For 64k page size, this means we need to move the _PAGE_COMBO and _PAGE_4K_PFN bits. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 4 ++-- arch/powerpc/include/asm/book3s/64/hash-64k.h | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 0425d3e1b7ae..7f60f7e814d4 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -52,8 +52,8 @@ _PAGE_F_SECOND | _PAGE_F_GIX) /* shift to put page number into pte */ -#define PTE_RPN_SHIFT (18) -#define PTE_RPN_SIZE (39) /* gives 51-bit real addresses */ +#define PTE_RPN_SHIFT (12) +#define PTE_RPN_SIZE (45) /* gives 57-bit real addresses */ #define _PAGE_4K_PFN 0 #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index ed390e1915b7..8bb03251f34c 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -25,8 +25,8 @@ #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define _PAGE_COMBO 0x00040000 /* this is a combo 4k page */ -#define _PAGE_4K_PFN 0x00080000 /* PFN is for a single 4k page */ +#define _PAGE_COMBO 0x00001000 /* this is a combo 4k page */ +#define _PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */ /* * Used to track subpage group valid if _PAGE_COMBO is set * This overloads _PAGE_F_GIX and _PAGE_F_SECOND @@ -39,11 +39,11 @@ /* Shift to put page number into pte. * - * That gives us a max RPN of 37 bits, which means a max of 53 bits - * of addressable physical space, or 49 bits for the special 4k PFNs. + * That gives us a max RPN of 41 bits, which means a max of 57 bits + * of addressable physical space, or 53 bits for the special 4k PFNs. */ -#define PTE_RPN_SHIFT (20) -#define PTE_RPN_SIZE (37) +#define PTE_RPN_SHIFT (16) +#define PTE_RPN_SIZE (41) /* * we support 16 fragments per PTE page of 64K size. -- cgit v1.2.3 From 446957ba5127141ee007fc61509e24a9e60853d9 Mon Sep 17 00:00:00 2001 From: Adam Buchbinder Date: Wed, 24 Feb 2016 10:51:11 -0800 Subject: powerpc: Fix misspellings in comments. Signed-off-by: Adam Buchbinder Signed-off-by: Michael Ellerman --- arch/powerpc/boot/rs6000.h | 2 +- arch/powerpc/boot/treeboot-akebono.c | 2 +- arch/powerpc/boot/treeboot-currituck.c | 2 +- arch/powerpc/boot/treeboot-iss4xx.c | 2 +- arch/powerpc/crypto/aes-spe-core.S | 4 ++-- arch/powerpc/crypto/aes-spe-glue.c | 2 +- arch/powerpc/include/asm/hydra.h | 2 +- arch/powerpc/include/asm/io.h | 2 +- arch/powerpc/include/asm/machdep.h | 4 ++-- arch/powerpc/include/asm/module.h | 2 +- arch/powerpc/include/asm/pmac_feature.h | 2 +- arch/powerpc/include/asm/reg.h | 8 ++++---- arch/powerpc/include/asm/reg_booke.h | 2 +- arch/powerpc/include/asm/smu.h | 2 +- arch/powerpc/include/asm/uninorth.h | 2 +- arch/powerpc/include/asm/xics.h | 2 +- arch/powerpc/include/uapi/asm/epapr_hcalls.h | 4 ++-- arch/powerpc/kernel/head_44x.S | 2 +- arch/powerpc/kernel/signal.c | 4 ++-- arch/powerpc/kernel/signal.h | 2 +- arch/powerpc/kernel/traps.c | 2 +- arch/powerpc/kvm/book3s_xics.c | 2 +- arch/powerpc/kvm/booke.c | 2 +- arch/powerpc/kvm/e500mc.c | 2 +- arch/powerpc/mm/tlb_low_64e.S | 2 +- arch/powerpc/mm/tlb_nohash_low.S | 4 ++-- arch/powerpc/oprofile/op_model_cell.c | 4 ++-- arch/powerpc/perf/hv-24x7.h | 2 +- arch/powerpc/perf/power8-pmu.c | 2 +- arch/powerpc/platforms/52xx/mpc52xx_pci.c | 2 +- arch/powerpc/platforms/85xx/mpc85xx_cds.c | 2 +- arch/powerpc/platforms/powermac/cache.S | 2 +- arch/powerpc/platforms/powermac/feature.c | 6 +++--- arch/powerpc/platforms/powernv/idle.c | 6 +++--- arch/powerpc/platforms/powernv/npu-dma.c | 2 +- arch/powerpc/platforms/ps3/interrupt.c | 2 +- arch/powerpc/platforms/pseries/hvconsole.c | 2 +- arch/powerpc/platforms/pseries/setup.c | 2 +- arch/powerpc/sysdev/fsl_pci.c | 2 +- arch/powerpc/sysdev/fsl_rmu.c | 2 +- arch/powerpc/sysdev/i8259.c | 2 +- arch/powerpc/sysdev/mpic.c | 4 ++-- 42 files changed, 56 insertions(+), 56 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/boot/rs6000.h b/arch/powerpc/boot/rs6000.h index 433f45084e41..d70517ccc0f7 100644 --- a/arch/powerpc/boot/rs6000.h +++ b/arch/powerpc/boot/rs6000.h @@ -239,5 +239,5 @@ struct external_reloc { #define DEFAULT_DATA_SECTION_ALIGNMENT 4 #define DEFAULT_BSS_SECTION_ALIGNMENT 4 #define DEFAULT_TEXT_SECTION_ALIGNMENT 4 -/* For new sections we havn't heard of before */ +/* For new sections we haven't heard of before */ #define DEFAULT_SECTION_ALIGNMENT 4 diff --git a/arch/powerpc/boot/treeboot-akebono.c b/arch/powerpc/boot/treeboot-akebono.c index b73174c34fe4..bcc5902f8462 100644 --- a/arch/powerpc/boot/treeboot-akebono.c +++ b/arch/powerpc/boot/treeboot-akebono.c @@ -38,7 +38,7 @@ BSS_STACK(4096); -#define SPRN_PIR 0x11E /* Processor Indentification Register */ +#define SPRN_PIR 0x11E /* Processor Identification Register */ #define USERDATA_LEN 256 /* Length of userdata passed in by PIBS */ #define MAX_RANKS 0x4 #define DDR3_MR0CF 0x80010011U diff --git a/arch/powerpc/boot/treeboot-currituck.c b/arch/powerpc/boot/treeboot-currituck.c index 925ae43b7467..303d2074ee56 100644 --- a/arch/powerpc/boot/treeboot-currituck.c +++ b/arch/powerpc/boot/treeboot-currituck.c @@ -80,7 +80,7 @@ static void ibm_currituck_fixups(void) } } -#define SPRN_PIR 0x11E /* Processor Indentification Register */ +#define SPRN_PIR 0x11E /* Processor Identification Register */ void platform_init(void) { unsigned long end_of_ram, avail_ram; diff --git a/arch/powerpc/boot/treeboot-iss4xx.c b/arch/powerpc/boot/treeboot-iss4xx.c index 329e710feda2..733f8bf25184 100644 --- a/arch/powerpc/boot/treeboot-iss4xx.c +++ b/arch/powerpc/boot/treeboot-iss4xx.c @@ -59,7 +59,7 @@ static void *iss_4xx_vmlinux_alloc(unsigned long size) return (void *)ibm4xx_memstart; } -#define SPRN_PIR 0x11E /* Processor Indentification Register */ +#define SPRN_PIR 0x11E /* Processor Identification Register */ void platform_init(void) { unsigned long end_of_ram = 0x08000000; diff --git a/arch/powerpc/crypto/aes-spe-core.S b/arch/powerpc/crypto/aes-spe-core.S index 5dc6bce90a77..bc6ff43a9889 100644 --- a/arch/powerpc/crypto/aes-spe-core.S +++ b/arch/powerpc/crypto/aes-spe-core.S @@ -61,7 +61,7 @@ * via bl/blr. It expects that caller has pre-xored input data with first * 4 words of encryption key into rD0-rD3. Pointer/counter registers must * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 - * and rW0-rW3 and caller must execute a final xor on the ouput registers. + * and rW0-rW3 and caller must execute a final xor on the output registers. * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. * */ @@ -209,7 +209,7 @@ ppc_encrypt_block_loop: * via bl/blr. It expects that caller has pre-xored input data with first * 4 words of encryption key into rD0-rD3. Pointer/counter registers must * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 - * and rW0-rW3 and caller must execute a final xor on the ouput registers. + * and rW0-rW3 and caller must execute a final xor on the output registers. * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. * */ diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c index 93ee046d12cd..6d99ebf2ea15 100644 --- a/arch/powerpc/crypto/aes-spe-glue.c +++ b/arch/powerpc/crypto/aes-spe-glue.c @@ -32,7 +32,7 @@ * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data * will need an estimated maximum of 20,000 cycles. Headroom for cache misses * included. Even with the low end model clocked at 667 MHz this equals to a - * critical time window of less than 30us. The value has been choosen to + * critical time window of less than 30us. The value has been chosen to * process a 512 byte disk block in one or a large 1400 bytes IPsec network * packet in two runs. * diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h index 1cb39c96d155..b3b0f2d020f0 100644 --- a/arch/powerpc/include/asm/hydra.h +++ b/arch/powerpc/include/asm/hydra.h @@ -89,7 +89,7 @@ extern volatile struct Hydra __iomem *Hydra; #define HYDRA_INT_EXT2 13 /* PCI IRQX */ #define HYDRA_INT_EXT3 14 /* PCI IRQY */ #define HYDRA_INT_EXT4 15 /* PCI IRQZ */ -#define HYDRA_INT_EXT5 16 /* IDE Primay/Secondary */ +#define HYDRA_INT_EXT5 16 /* IDE Primary/Secondary */ #define HYDRA_INT_EXT6 17 /* IDE Secondary */ #define HYDRA_INT_EXT7 18 /* Power Off Request */ #define HYDRA_INT_SPARE 19 diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 6c1297ec374c..2fd1690b79d2 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -300,7 +300,7 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, * When CONFIG_PPC_INDIRECT_MMIO is set, the platform can provide hooks * on all MMIOs. (Note that this is all 64 bits only for now) * - * To help platforms who may need to differenciate MMIO addresses in + * To help platforms who may need to differentiate MMIO addresses in * their hooks, a bitfield is reserved for use by the platform near the * top of MMIO addresses (not PIO, those have to cope the hard way). * diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 3f191f573d4f..5c38e49ddd42 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -174,11 +174,11 @@ struct machdep_calls { platform, called once per cpu. */ void (*enable_pmcs)(void); - /* Set DABR for this platform, leave empty for default implemenation */ + /* Set DABR for this platform, leave empty for default implementation */ int (*set_dabr)(unsigned long dabr, unsigned long dabrx); - /* Set DAWR for this platform, leave empty for default implemenation */ + /* Set DAWR for this platform, leave empty for default implementation */ int (*set_dawr)(unsigned long dawr, unsigned long dawrx); diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index dcfcad139bcc..5f1526fddccf 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -19,7 +19,7 @@ * Thanks to Paul M for explaining this. * * PPC can only do rel jumps += 32MB, and often the kernel and other - * modules are furthur away than this. So, we jump to a table of + * modules are further away than this. So, we jump to a table of * trampolines attached to the module (the Procedure Linkage Table) * whenever that happens. */ diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h index 10902c9375d0..925697968946 100644 --- a/arch/powerpc/include/asm/pmac_feature.h +++ b/arch/powerpc/include/asm/pmac_feature.h @@ -46,7 +46,7 @@ /* PowerSurge are the first generation of PCI Pmacs. This include * all of the Grand-Central based machines. We currently don't - * differenciate most of them. + * differentiate most of them. */ #define PMAC_TYPE_PSURGE 0x10 /* PowerSurge */ #define PMAC_TYPE_ANS 0x11 /* Apple Network Server */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c4cb2ffc624e..11a81bd5dabd 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -376,7 +376,7 @@ #define SPRN_TSCR 0x399 /* Thread Switch Control Register */ #define SPRN_DEC 0x016 /* Decrement Register */ -#define SPRN_DER 0x095 /* Debug Enable Regsiter */ +#define SPRN_DER 0x095 /* Debug Enable Register */ #define DER_RSTE 0x40000000 /* Reset Interrupt */ #define DER_CHSTPE 0x20000000 /* Check Stop */ #define DER_MCIE 0x10000000 /* Machine Check Interrupt */ @@ -401,7 +401,7 @@ #define SPRN_DPDES 0x0B0 /* Directed Priv. Doorbell Exc. State */ #define SPRN_EAR 0x11A /* External Address Register */ #define SPRN_HASH1 0x3D2 /* Primary Hash Address Register */ -#define SPRN_HASH2 0x3D3 /* Secondary Hash Address Resgister */ +#define SPRN_HASH2 0x3D3 /* Secondary Hash Address Register */ #define SPRN_HID0 0x3F0 /* Hardware Implementation Register 0 */ #define HID0_HDICE_SH (63 - 23) /* 970 HDEC interrupt enable */ #define HID0_EMCP (1<<31) /* Enable Machine Check pin */ @@ -514,7 +514,7 @@ #define ICTRL_EICP 0x00000100 /* enable icache par. check */ #define SPRN_IMISS 0x3D4 /* Instruction TLB Miss Register */ #define SPRN_IMMR 0x27E /* Internal Memory Map Register */ -#define SPRN_L2CR 0x3F9 /* Level 2 Cache Control Regsiter */ +#define SPRN_L2CR 0x3F9 /* Level 2 Cache Control Register */ #define SPRN_L2CR2 0x3f8 #define L2CR_L2E 0x80000000 /* L2 enable */ #define L2CR_L2PE 0x40000000 /* L2 parity enable */ @@ -549,7 +549,7 @@ #define L2CR_L2DO_745x 0x00010000 /* L2 data only (745x) */ #define L2CR_L2REP_745x 0x00001000 /* L2 repl. algorithm (745x) */ #define L2CR_L2HWF_745x 0x00000800 /* L2 hardware flush (745x) */ -#define SPRN_L3CR 0x3FA /* Level 3 Cache Control Regsiter */ +#define SPRN_L3CR 0x3FA /* Level 3 Cache Control Register */ #define L3CR_L3E 0x80000000 /* L3 enable */ #define L3CR_L3PE 0x40000000 /* L3 data parity enable */ #define L3CR_L3APE 0x20000000 /* L3 addr parity enable */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 2fef74b474f0..737e012ef56e 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -681,7 +681,7 @@ #define SPRN_CDBCR 0x3D7 /* Cache Debug Control Register */ #define SPRN_TBHI 0x3DC /* Time Base High */ #define SPRN_TBLO 0x3DD /* Time Base Low */ -#define SPRN_DBCR 0x3F2 /* Debug Control Regsiter */ +#define SPRN_DBCR 0x3F2 /* Debug Control Register */ #define SPRN_PBL1 0x3FC /* Protection Bound Lower 1 */ #define SPRN_PBL2 0x3FE /* Protection Bound Lower 2 */ #define SPRN_PBU1 0x3FD /* Protection Bound Upper 1 */ diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h index 37d2da6feabf..f280dd11243f 100644 --- a/arch/powerpc/include/asm/smu.h +++ b/arch/powerpc/include/asm/smu.h @@ -154,7 +154,7 @@ * * The Darwin I2C driver is less subtle though. On any non-success status * from the response command, it waits 5ms and tries again up to 20 times, - * it doesn't differenciate between fatal errors or "busy" status. + * it doesn't differentiate between fatal errors or "busy" status. * * This driver provides an asynchronous paramblock based i2c command * interface to be used either directly by low level code or by a higher diff --git a/arch/powerpc/include/asm/uninorth.h b/arch/powerpc/include/asm/uninorth.h index d12b11d7641e..a1d112979fd2 100644 --- a/arch/powerpc/include/asm/uninorth.h +++ b/arch/powerpc/include/asm/uninorth.h @@ -132,7 +132,7 @@ /* This one _might_ return the CPU number of the CPU reading it; * the bootROM decides whether to boot or to sleep/spinloop depending - * on this register beeing 0 or not + * on this register being 0 or not */ #define UNI_N_CPU_NUMBER 0x0050 diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 0e25bdb190bb..5d61bbced6a1 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -1,5 +1,5 @@ /* - * Common definitions accross all variants of ICP and ICS interrupt + * Common definitions across all variants of ICP and ICS interrupt * controllers. */ diff --git a/arch/powerpc/include/uapi/asm/epapr_hcalls.h b/arch/powerpc/include/uapi/asm/epapr_hcalls.h index 7f9c74b46704..b4504f394427 100644 --- a/arch/powerpc/include/uapi/asm/epapr_hcalls.h +++ b/arch/powerpc/include/uapi/asm/epapr_hcalls.h @@ -78,7 +78,7 @@ #define EV_SUCCESS 0 #define EV_EPERM 1 /* Operation not permitted */ #define EV_ENOENT 2 /* Entry Not Found */ -#define EV_EIO 3 /* I/O error occured */ +#define EV_EIO 3 /* I/O error occurred */ #define EV_EAGAIN 4 /* The operation had insufficient * resources to complete and should be * retried @@ -89,7 +89,7 @@ #define EV_ENODEV 7 /* No such device */ #define EV_EINVAL 8 /* An argument supplied to the hcall was out of range or invalid */ -#define EV_INTERNAL 9 /* An internal error occured */ +#define EV_INTERNAL 9 /* An internal error occurred */ #define EV_CONFIG 10 /* A configuration error was detected */ #define EV_INVALID_STATE 11 /* The object is in an invalid state */ #define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */ diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index b5061abbd2e0..9cdf5c71e426 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -806,7 +806,7 @@ _GLOBAL(set_context) _GLOBAL(init_cpu_state) mflr r22 #ifdef CONFIG_PPC_47x - /* We use the PVR to differenciate 44x cores from 476 */ + /* We use the PVR to differentiate 44x cores from 476 */ mfspr r3,SPRN_PVR srwi r3,r3,16 cmplwi cr0,r3,PVR_476FPE@h diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index cf8c7e4e0b21..cb64d6feb45a 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -1,7 +1,7 @@ /* * Common signal handling code for both 32 and 64 bits * - * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration + * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation * Extracted from signal_32.c and signal_64.c * * This file is subject to the terms and conditions of the GNU General @@ -178,7 +178,7 @@ unsigned long get_tm_stackpointer(struct pt_regs *regs) * need to use the stack pointer from the checkpointed state, rather * than the speculated state. This ensures that the signal context * (written tm suspended) will be written below the stack required for - * the rollback. The transaction is aborted becuase of the treclaim, + * the rollback. The transaction is aborted because of the treclaim, * so any memory written between the tbegin and the signal will be * rolled back anyway. * diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 51b274199dd9..be305c858e51 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration + * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation * Extracted from signal_32.c and signal_64.c * * This file is subject to the terms and conditions of the GNU General diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 4e5c11d4d19d..88414dde7e35 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1402,7 +1402,7 @@ void facility_unavailable_exception(struct pt_regs *regs) * is a read DSCR attempt through a mfspr instruction, we * just emulate the instruction instead. This code path will * always emulate all the mfspr instructions till the user - * has attempted atleast one mtspr instruction. This way it + * has attempted at least one mtspr instruction. This way it * preserves the same behaviour when the user is accessing * the DSCR through privilege level only SPR number (0x11) * which is emulated through illegal instruction exception. diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 905e94a1370f..46871d554057 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -432,7 +432,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, * the whole masked_pending business which is about not * losing interrupts that occur while masked. * - * I don't differenciate normal deliveries and resends, this + * I don't differentiate normal deliveries and resends, this * implementation will differ from PAPR and not lose such * interrupts. */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 778ef86e187e..4d66f44a1657 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -992,7 +992,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_restart_interrupt(vcpu, exit_nr); /* - * get last instruction before beeing preempted + * get last instruction before being preempted * TODO: for e6500 check also BOOKE_INTERRUPT_LRAT_ERROR & ESR_DATA */ switch (exit_nr) { diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index cda695de8aa7..f48a0c22e8f9 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -182,7 +182,7 @@ int kvmppc_core_check_processor_compat(void) r = 0; #ifdef CONFIG_ALTIVEC /* - * Since guests have the priviledge to enable AltiVec, we need AltiVec + * Since guests have the privilege to enable AltiVec, we need AltiVec * support in the host to save/restore their context. * Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit * because it's cleared in the absence of CONFIG_ALTIVEC! diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 29d6987c37ba..eb82d787d99a 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -895,7 +895,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) BEGIN_MMU_FTR_SECTION virt_page_table_tlb_miss_done: - /* We have overriden MAS2:EPN but currently our primary TLB miss + /* We have overridden MAS2:EPN but currently our primary TLB miss * handler will always restore it so that should not be an issue, * if we ever optimize the primary handler to not write MAS2 on * some cases, we'll have to restore MAS2:EPN here based on the diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index 68c477592e43..eabecfcaef7c 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -108,7 +108,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) blr 2: #ifdef CONFIG_PPC_47x - oris r7,r6,0x8000 /* specify way explicitely */ + oris r7,r6,0x8000 /* specify way explicitly */ clrrwi r4,r3,12 /* get an EPN for the hashing with V = 0 */ ori r4,r4,PPC47x_TLBE_SIZE tlbwe r4,r7,0 /* write it */ @@ -149,7 +149,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) li r3,-1 /* Current set */ lis r10,tlb_47x_boltmap@h ori r10,r10,tlb_47x_boltmap@l - lis r7,0x8000 /* Specify way explicitely */ + lis r7,0x8000 /* Specify way explicitly */ b 9f /* For each set */ diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index 863d89386f60..c82497a31c54 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c @@ -208,7 +208,7 @@ static void pm_rtas_reset_signals(u32 node) /* * The debug bus is being set to the passthru disable state. - * However, the FW still expects atleast one legal signal routing + * However, the FW still expects at least one legal signal routing * entry or it will return an error on the arguments. If we don't * supply a valid entry, we must ignore all return values. Ignoring * all return values means we might miss an error we should be @@ -1008,7 +1008,7 @@ static int initial_lfsr[] = { * * To avoid the time to compute the LFSR, a lookup table is used. The 24 bit * LFSR sequence is broken into four ranges. The spacing of the precomputed - * values is adjusted in each range so the error between the user specifed + * values is adjusted in each range so the error between the user specified * number (N) of events between samples and the actual number of events based * on the precomputed value will be les then about 6.2%. Note, if the user * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used. diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h index 0f9fa21a29f2..c57d67dc9f3f 100644 --- a/arch/powerpc/perf/hv-24x7.h +++ b/arch/powerpc/perf/hv-24x7.h @@ -80,7 +80,7 @@ struct hv_24x7_result { __u8 results_complete; __be16 num_elements_returned; - /* This is a copy of @data_size from the coresponding hv_24x7_request */ + /* This is a copy of @data_size from the corresponding hv_24x7_request */ __be16 result_element_data_size; __u8 reserved[0x2]; diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 9958ba8bf0d2..be9b7aec216f 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -415,7 +415,7 @@ static int power8_compute_mmcr(u64 event[], int n_ev, pmc_inuse |= 1 << pmc; } - /* In continous sampling mode, update SDAR on TLB miss */ + /* In continuous sampling mode, update SDAR on TLB miss */ mmcra = MMCRA_SDAR_MODE_TLB; mmcr1 = mmcr2 = 0; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c index 6eb3b2abae90..00282c2b0cae 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pci.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c @@ -319,7 +319,7 @@ mpc52xx_pci_setup(struct pci_controller *hose, tmp = in_be32(&pci_regs->gscr); #if 0 - /* Reset the exteral bus ( internal PCI controller is NOT resetted ) */ + /* Reset the exteral bus ( internal PCI controller is NOT reset ) */ /* Not necessary and can be a bad thing if for example the bootloader is displaying a splash screen or ... Just left here for documentation purpose if anyone need it */ diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index 5ac70de3e48a..d7e87ff912d7 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -99,7 +99,7 @@ static void mpc85xx_cds_restart(char *cmd) pci_read_config_byte(dev, 0x47, &tmp); /* - * At this point, the harware reset should have triggered. + * At this point, the hardware reset should have triggered. * However, if it doesn't work for some mysterious reason, * just fall through to the default reset below. */ diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S index 6be1a4af3359..cc5347eb1662 100644 --- a/arch/powerpc/platforms/powermac/cache.S +++ b/arch/powerpc/platforms/powermac/cache.S @@ -23,7 +23,7 @@ * when going to sleep, when doing a PMU based cpufreq transition, * or when "offlining" a CPU on SMP machines. This code is over * paranoid, but I've had enough issues with various CPU revs and - * bugs that I decided it was worth beeing over cautious + * bugs that I decided it was worth being over cautious */ _GLOBAL(flush_disable_caches) diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 4882bfd90e27..1e02328c3f2d 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -198,7 +198,7 @@ static long ohare_htw_scc_enable(struct device_node *node, long param, if (htw) { /* Side effect: this will also power up the * modem, but it's too messy to figure out on which - * ports this controls the tranceiver and on which + * ports this controls the transceiver and on which * it controls the modem */ if (trans) @@ -463,7 +463,7 @@ static long heathrow_sound_enable(struct device_node *node, long param, unsigned long flags; /* B&W G3 and Yikes don't support that properly (the - * sound appear to never come back after beeing shut down). + * sound appear to never come back after being shut down). */ if (pmac_mb.model_id == PMAC_TYPE_YOSEMITE || pmac_mb.model_id == PMAC_TYPE_YIKES) @@ -2770,7 +2770,7 @@ set_initial_features(void) * but I'm not too sure it was audited for side-effects on other * ohare based machines... * Since I still have difficulties figuring the right way to - * differenciate them all and since that hack was there for a long + * differentiate them all and since that hack was there for a long * time, I'll keep it around */ if (macio_chips[0].type == macio_ohare) { diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 15bfbcd5debc..fcc8b6861b63 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -35,9 +35,9 @@ int pnv_save_sprs_for_winkle(void) int rc; /* - * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross + * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across * all cpus at boot. Get these reg values of current cpu and use the - * same accross all cpus. + * same across all cpus. */ uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; uint64_t hid0_val = mfspr(SPRN_HID0); @@ -185,7 +185,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, * fastsleep workaround needs to be left in 'applied' state on all * the cores. Do this by- * 1. Patching out the call to 'undo' workaround in fastsleep exit path - * 2. Sending ipi to all the cores which have atleast one online thread + * 2. Sending ipi to all the cores which have at least one online thread * 3. Patching out the call to 'apply' workaround in fastsleep entry * path * There is no need to send ipi to cores which have all threads diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index e85aa900f5c0..7229acd9bb3a 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -278,7 +278,7 @@ static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe) /* * Enable/disable bypass mode on the NPU. The NPU only supports one - * window per link, so bypass needs to be explicity enabled or + * window per link, so bypass needs to be explicitly enabled or * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be * active at the same time. */ diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index 638c4060938e..b831638e6f4a 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -78,7 +78,7 @@ struct ps3_bmp { /** * struct ps3_private - a per cpu data structure * @bmp: ps3_bmp structure - * @bmp_lock: Syncronize access to bmp. + * @bmp_lock: Synchronize access to bmp. * @ipi_debug_brk_mask: Mask for debug break IPIs * @ppe_id: HV logical_ppe_id * @thread_id: HV thread_id diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c index 849b29b3e9ae..74da18de853a 100644 --- a/arch/powerpc/platforms/pseries/hvconsole.c +++ b/arch/powerpc/platforms/pseries/hvconsole.c @@ -31,7 +31,7 @@ #include /** - * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper + * hvc_get_chars - retrieve characters from firmware for denoted vterm adapter * @vtermno: The vtermno or unit_address of the adapter from which to fetch the * data. * @buf: The character buffer into which to put the character data fetched from diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 36df46eaba24..6e944fc6e5f9 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -515,7 +515,7 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); - /* By default, only probe PCI (can be overriden by rtas_pci) */ + /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); /* Find and initialize PCI host bridges */ diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index c69e88e91459..85729f49764f 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -575,7 +575,7 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary) if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) { /* use fsl_indirect_read_config for PCIe */ hose->ops = &fsl_indirect_pcie_ops; - /* For PCIE read HEADER_TYPE to identify controler mode */ + /* For PCIE read HEADER_TYPE to identify controller mode */ early_read_config_byte(hose, 0, 0, PCI_HEADER_TYPE, &hdr_type); if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE) goto no_bridge; diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c index b48197ae44d0..ffe0ee832768 100644 --- a/arch/powerpc/sysdev/fsl_rmu.c +++ b/arch/powerpc/sysdev/fsl_rmu.c @@ -570,7 +570,7 @@ int fsl_rio_port_write_init(struct fsl_rio_pw *pw) out_be32(&pw->pw_regs->pwsr, (RIO_IPWSR_TE | RIO_IPWSR_QFI | RIO_IPWSR_PWD)); - /* Configure port write contoller for snooping enable all reporting, + /* Configure port write controller for snooping enable all reporting, clear queue full */ out_be32(&pw->pw_regs->pwmr, RIO_IPWMR_SEN | RIO_IPWMR_QFIE | RIO_IPWMR_EIE | RIO_IPWMR_CQ); diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index 6f99ed3967fd..aa2c186d3115 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -238,7 +238,7 @@ void i8259_init(struct device_node *node, unsigned long intack_addr) /* init master interrupt controller */ outb(0x11, 0x20); /* Start init sequence */ outb(0x00, 0x21); /* Vector base */ - outb(0x04, 0x21); /* edge tiggered, Cascade (slave) on IRQ2 */ + outb(0x04, 0x21); /* edge triggered, Cascade (slave) on IRQ2 */ outb(0x01, 0x21); /* Select 8086 mode */ /* init slave interrupt controller */ diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 2a0452e364ba..afe3c7cd395d 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -2,7 +2,7 @@ * arch/powerpc/kernel/mpic.c * * Driver for interrupt controllers following the OpenPIC standard, the - * common implementation beeing IBM's MPIC. This driver also can deal + * common implementation being IBM's MPIC. This driver also can deal * with various broken implementations of this HW. * * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. @@ -1657,7 +1657,7 @@ void __init mpic_init(struct mpic *mpic) } } - /* FSL mpic error interrupt intialization */ + /* FSL mpic error interrupt initialization */ if (mpic->flags & MPIC_FSL_HAS_EIMR) mpic_err_int_init(mpic, MPIC_FSL_ERR_INT); } -- cgit v1.2.3 From 27828f98a0522ad4a745a80407d051e5874c8d93 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 9 Feb 2016 13:32:41 +1000 Subject: powerpc/mm: Handle removing maybe-present bolted HPTEs At the moment the hpte_removebolted callback in ppc_md returns void and will BUG_ON() if the hpte it's asked to remove doesn't exist in the first place. This is awkward for the case of cleaning up a mapping which was partially made before failing. So, we add a return value to hpte_removebolted, and have it return ENOENT in the case that the HPTE to remove didn't exist in the first place. In the (sole) caller, we propagate errors in hpte_removebolted to its caller to handle. However, we handle ENOENT specially, continuing to complete the unmapping over the specified range before returning the error to the caller. This means that htab_remove_mapping() will work sanely on a partially present mapping, removing any HPTEs which are present, while also returning ENOENT to its caller in case it's important there. There are two callers of htab_remove_mapping(): - In remove_section_mapping() we already WARN_ON() any error return, which is reasonable - in this case the mapping should be fully present - In vmemmap_remove_mapping() we BUG_ON() any error. We change that to just a WARN_ON() in the case of ENOENT, since failing to remove a mapping that wasn't there in the first place probably shouldn't be fatal. Signed-off-by: David Gibson Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/machdep.h | 2 +- arch/powerpc/mm/hash_utils_64.c | 15 ++++++++++++--- arch/powerpc/mm/init_64.c | 9 +++++---- arch/powerpc/platforms/pseries/lpar.c | 9 ++++++--- 4 files changed, 24 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 5c38e49ddd42..fd22442d30a9 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -54,7 +54,7 @@ struct machdep_calls { int psize, int apsize, int ssize); long (*hpte_remove)(unsigned long hpte_group); - void (*hpte_removebolted)(unsigned long ea, + int (*hpte_removebolted)(unsigned long ea, int psize, int ssize); void (*flush_hash_range)(unsigned long number, int local); void (*hugepage_invalidate)(unsigned long vsid, diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 70490c41e14e..44f145a66578 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -269,6 +269,8 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, { unsigned long vaddr; unsigned int step, shift; + int rc; + int ret = 0; shift = mmu_psize_defs[psize].shift; step = 1 << shift; @@ -276,10 +278,17 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, if (!ppc_md.hpte_removebolted) return -ENODEV; - for (vaddr = vstart; vaddr < vend; vaddr += step) - ppc_md.hpte_removebolted(vaddr, psize, ssize); + for (vaddr = vstart; vaddr < vend; vaddr += step) { + rc = ppc_md.hpte_removebolted(vaddr, psize, ssize); + if (rc == -ENOENT) { + ret = -ENOENT; + continue; + } + if (rc < 0) + return rc; + } - return 0; + return ret; } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 379a6a90644b..baa1a23488d3 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -232,10 +232,11 @@ static void __meminit vmemmap_create_mapping(unsigned long start, static void vmemmap_remove_mapping(unsigned long start, unsigned long page_size) { - int mapped = htab_remove_mapping(start, start + page_size, - mmu_vmemmap_psize, - mmu_kernel_ssize); - BUG_ON(mapped < 0); + int rc = htab_remove_mapping(start, start + page_size, + mmu_vmemmap_psize, + mmu_kernel_ssize); + BUG_ON((rc < 0) && (rc != -ENOENT)); + WARN_ON(rc == -ENOENT); } #endif diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 477290ad855e..2415a0d31f8f 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -505,8 +505,8 @@ static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, } #endif -static void pSeries_lpar_hpte_removebolted(unsigned long ea, - int psize, int ssize) +static int pSeries_lpar_hpte_removebolted(unsigned long ea, + int psize, int ssize) { unsigned long vpn; unsigned long slot, vsid; @@ -515,11 +515,14 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea, vpn = hpt_vpn(ea, vsid, ssize); slot = pSeries_lpar_hpte_find(vpn, psize, ssize); - BUG_ON(slot == -1); + if (slot == -1) + return -ENOENT; + /* * lpar doesn't use the passed actual page size */ pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0); + return 0; } /* -- cgit v1.2.3 From 5c3c7ede2bdcb85fa2fd51c8147cdf70ebc17fcb Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 9 Feb 2016 13:32:43 +1000 Subject: powerpc/mm: Split hash page table sizing heuristic into a helper htab_get_table_size() either retrieve the size of the hash page table (HPT) from the device tree - if the HPT size is determined by firmware - or uses a heuristic to determine a good size based on RAM size if the kernel is responsible for allocating the HPT. To support a PAPR extension allowing resizing of the HPT, we're going to want the memory size -> HPT size logic elsewhere, so split it out into a helper function. Signed-off-by: David Gibson Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu-hash64.h | 3 +++ arch/powerpc/mm/hash_utils_64.c | 34 +++++++++++++++++++++------------- 2 files changed, 24 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index e36dc90c80d0..0cea4807e26f 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -608,6 +608,9 @@ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1; return get_vsid(context, ea, ssize); } + +unsigned htab_shift_for_mem_size(unsigned long mem_size); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_MMU_HASH64_H_ */ diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 335cd4115646..90dd9280894f 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -609,10 +609,28 @@ static int __init htab_dt_scan_pftsize(unsigned long node, return 0; } -static unsigned long __init htab_get_table_size(void) +unsigned htab_shift_for_mem_size(unsigned long mem_size) { - unsigned long mem_size, rnd_mem_size, pteg_count, psize; + unsigned memshift = __ilog2(mem_size); + unsigned pshift = mmu_psize_defs[mmu_virtual_psize].shift; + unsigned pteg_shift; + + /* round mem_size up to next power of 2 */ + if ((1UL << memshift) < mem_size) + memshift += 1; + + /* aim for 2 pages / pteg */ + pteg_shift = memshift - (pshift + 1); + /* + * 2^11 PTEGS of 128 bytes each, ie. 2^18 bytes is the minimum htab + * size permitted by the architecture. + */ + return max(pteg_shift + 7, 18U); +} + +static unsigned long __init htab_get_table_size(void) +{ /* If hash size isn't already provided by the platform, we try to * retrieve it from the device-tree. If it's not there neither, we * calculate it now based on the total RAM size @@ -622,17 +640,7 @@ static unsigned long __init htab_get_table_size(void) if (ppc64_pft_size) return 1UL << ppc64_pft_size; - /* round mem_size up to next power of 2 */ - mem_size = memblock_phys_mem_size(); - rnd_mem_size = 1UL << __ilog2(mem_size); - if (rnd_mem_size < mem_size) - rnd_mem_size <<= 1; - - /* # pages / 2 */ - psize = mmu_psize_defs[mmu_virtual_psize].shift; - pteg_count = max(rnd_mem_size >> (psize + 1), 1UL << 11); - - return pteg_count << 7; + return 1UL << htab_shift_for_mem_size(memblock_phys_mem_size()); } #ifdef CONFIG_MEMORY_HOTPLUG -- cgit v1.2.3 From 70fe3d980f5f14d8125869125ba9a0ea95e09c6b Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:47 +1100 Subject: powerpc: Restore FPU/VEC/VSX if previously used Currently the FPU, VEC and VSX facilities are lazily loaded. This is not a problem unless a process is using these facilities. Modern versions of GCC are very good at automatically vectorising code, new and modernised workloads make use of floating point and vector facilities, even the kernel makes use of vectorised memcpy. All this combined greatly increases the cost of a syscall since the kernel uses the facilities sometimes even in syscall fast-path making it increasingly common for a thread to take an *_unavailable exception soon after a syscall, not to mention potentially taking all three. The obvious overcompensation to this problem is to simply always load all the facilities on every exit to userspace. Loading up all FPU, VEC and VSX registers every time can be expensive and if a workload does avoid using them, it should not be forced to incur this penalty. An 8bit counter is used to detect if the registers have been used in the past and the registers are always loaded until the value wraps to back to zero. Several versions of the assembly in entry_64.S were tested: 1. Always calling C. 2. Performing a common case check and then calling C. 3. A complex check in asm. After some benchmarking it was determined that avoiding C in the common case is a performance benefit (option 2). The full check in asm (option 3) greatly complicated that codepath for a negligible performance gain and the trade-off was deemed not worth it. Signed-off-by: Cyril Bur [mpe: Move load_vec in the struct to fill an existing hole, reword change log] Signed-off-by: Michael Ellerman fixup --- arch/powerpc/include/asm/processor.h | 2 + arch/powerpc/kernel/asm-offsets.c | 2 + arch/powerpc/kernel/entry_64.S | 21 +++++++-- arch/powerpc/kernel/fpu.S | 4 ++ arch/powerpc/kernel/process.c | 88 +++++++++++++++++++++++++++++++----- arch/powerpc/kernel/vector.S | 4 ++ 6 files changed, 107 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index ac2330820b9a..8ab8a1a9610a 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -236,7 +236,9 @@ struct thread_struct { #endif struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */ unsigned long trap_nr; /* last trap # on this thread */ + u8 load_fp; #ifdef CONFIG_ALTIVEC + u8 load_vec; struct thread_vr_state vr_state; struct thread_vr_state *vr_save_area; unsigned long vrsave; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 07cebc3514f3..10d5eab19458 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -95,12 +95,14 @@ int main(void) DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state)); DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area)); DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr)); + DEFINE(THREAD_LOAD_FP, offsetof(struct thread_struct, load_fp)); #ifdef CONFIG_ALTIVEC DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state)); DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area)); DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave)); DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr)); DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr)); + DEFINE(THREAD_LOAD_VEC, offsetof(struct thread_struct, load_vec)); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr)); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 0d525ce3717f..038e0a1425e7 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -210,7 +210,20 @@ system_call: /* label this so stack traces look sane */ li r11,-MAX_ERRNO andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- syscall_exit_work - cmpld r3,r11 + + andi. r0,r8,MSR_FP + beq 2f +#ifdef CONFIG_ALTIVEC + andis. r0,r8,MSR_VEC@h + bne 3f +#endif +2: addi r3,r1,STACK_FRAME_OVERHEAD + bl restore_math + ld r8,_MSR(r1) + ld r3,RESULT(r1) + li r11,-MAX_ERRNO + +3: cmpld r3,r11 ld r5,_CCR(r1) bge- syscall_error .Lsyscall_error_cont: @@ -602,8 +615,8 @@ _GLOBAL(ret_from_except_lite) /* Check current_thread_info()->flags */ andi. r0,r4,_TIF_USER_WORK_MASK -#ifdef CONFIG_PPC_BOOK3E bne 1f +#ifdef CONFIG_PPC_BOOK3E /* * Check to see if the dbcr0 register is set up to debug. * Use the internal debug mode bit to do this. @@ -618,7 +631,9 @@ _GLOBAL(ret_from_except_lite) mtspr SPRN_DBSR,r10 b restore #else - beq restore + addi r3,r1,STACK_FRAME_OVERHEAD + bl restore_math + b restore #endif 1: andi. r0,r4,_TIF_NEED_RESCHED beq 2f diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 2117eaca3d28..b06352474ad0 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -130,6 +130,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) or r12,r12,r4 std r12,_MSR(r1) #endif + /* Don't care if r4 overflows, this is desired behaviour */ + lbz r4,THREAD_LOAD_FP(r5) + addi r4,r4,1 + stb r4,THREAD_LOAD_FP(r5) addi r10,r5,THREAD_FPSTATE lfd fr0,FPSTATE_FPSCR(r10) MTFSF_L(fr0) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e0c3d2dc7ca3..55c1eb0465af 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -187,9 +187,22 @@ void enable_kernel_fp(void) } } EXPORT_SYMBOL(enable_kernel_fp); + +static int restore_fp(struct task_struct *tsk) { + if (tsk->thread.load_fp) { + load_fp_state(¤t->thread.fp_state); + current->thread.load_fp++; + return 1; + } + return 0; +} +#else +static int restore_fp(struct task_struct *tsk) { return 0; } #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC +#define loadvec(thr) ((thr).load_vec) + void giveup_altivec(struct task_struct *tsk) { check_if_tm_restore_required(tsk); @@ -229,6 +242,21 @@ void flush_altivec_to_thread(struct task_struct *tsk) } } EXPORT_SYMBOL_GPL(flush_altivec_to_thread); + +static int restore_altivec(struct task_struct *tsk) +{ + if (cpu_has_feature(CPU_FTR_ALTIVEC) && tsk->thread.load_vec) { + load_vr_state(&tsk->thread.vr_state); + tsk->thread.used_vr = 1; + tsk->thread.load_vec++; + + return 1; + } + return 0; +} +#else +#define loadvec(thr) 0 +static inline int restore_altivec(struct task_struct *tsk) { return 0; } #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX @@ -275,6 +303,18 @@ void flush_vsx_to_thread(struct task_struct *tsk) } } EXPORT_SYMBOL_GPL(flush_vsx_to_thread); + +static int restore_vsx(struct task_struct *tsk) +{ + if (cpu_has_feature(CPU_FTR_VSX)) { + tsk->thread.used_vsr = 1; + return 1; + } + + return 0; +} +#else +static inline int restore_vsx(struct task_struct *tsk) { return 0; } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE @@ -374,6 +414,36 @@ void giveup_all(struct task_struct *tsk) } EXPORT_SYMBOL(giveup_all); +void restore_math(struct pt_regs *regs) +{ + unsigned long msr; + + if (!current->thread.load_fp && !loadvec(current->thread)) + return; + + msr = regs->msr; + msr_check_and_set(msr_all_available); + + /* + * Only reload if the bit is not set in the user MSR, the bit BEING set + * indicates that the registers are hot + */ + if ((!(msr & MSR_FP)) && restore_fp(current)) + msr |= MSR_FP | current->thread.fpexc_mode; + + if ((!(msr & MSR_VEC)) && restore_altivec(current)) + msr |= MSR_VEC; + + if ((msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC) && + restore_vsx(current)) { + msr |= MSR_VSX; + } + + msr_check_and_clear(msr_all_available); + + regs->msr = msr; +} + void flush_all_to_thread(struct task_struct *tsk) { if (tsk->thread.regs) { @@ -832,17 +902,9 @@ void restore_tm_state(struct pt_regs *regs) msr_diff = current->thread.ckpt_regs.msr & ~regs->msr; msr_diff &= MSR_FP | MSR_VEC | MSR_VSX; - if (msr_diff & MSR_FP) { - msr_check_and_set(MSR_FP); - load_fp_state(¤t->thread.fp_state); - msr_check_and_clear(MSR_FP); - regs->msr |= current->thread.fpexc_mode; - } - if (msr_diff & MSR_VEC) { - msr_check_and_set(MSR_VEC); - load_vr_state(¤t->thread.vr_state); - msr_check_and_clear(MSR_VEC); - } + + restore_math(regs); + regs->msr |= msr_diff; } @@ -1006,6 +1068,10 @@ struct task_struct *__switch_to(struct task_struct *prev, batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } + + if (current_thread_info()->task->thread.regs) + restore_math(current_thread_info()->task->thread.regs); + #endif /* CONFIG_PPC_BOOK3S_64 */ return last; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 162d0f714941..038cff8cf5f2 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -91,6 +91,10 @@ _GLOBAL(load_up_altivec) oris r12,r12,MSR_VEC@h std r12,_MSR(r1) #endif + /* Don't care if r4 overflows, this is desired behaviour */ + lbz r4,THREAD_LOAD_VEC(r5) + addi r4,r4,1 + stb r4,THREAD_LOAD_VEC(r5) addi r6,r5,THREAD_VRSTATE li r4,1 li r10,VRSTATE_VSCR -- cgit v1.2.3 From de2a20aa7237b45d3c14a2505804a8daa95a8f53 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:48 +1100 Subject: powerpc: Prepare for splitting giveup_{fpu, altivec, vsx} in two This prepares for the decoupling of saving {fpu,altivec,vsx} registers and marking {fpu,altivec,vsx} as being unused by a thread. Currently giveup_{fpu,altivec,vsx}() does both however optimisations to task switching can be made if these two operations are decoupled. save_all() will permit the saving of registers to thread structs and leave threads MSR with bits enabled. This patch introduces no functional change. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 8 ++++++++ arch/powerpc/include/asm/switch_to.h | 7 +++++++ arch/powerpc/kernel/process.c | 31 ++++++++++++++++++++++++++++++- 3 files changed, 45 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 11a81bd5dabd..52ed654d01ba 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -75,6 +75,14 @@ #define MSR_HV 0 #endif +/* + * To be used in shared book E/book S, this avoids needing to worry about + * book S/book E in shared code + */ +#ifndef MSR_SPE +#define MSR_SPE 0 +#endif + #define MSR_VEC __MASK(MSR_VEC_LG) /* Enable AltiVec */ #define MSR_VSX __MASK(MSR_VSX_LG) /* Enable VSX */ #define MSR_POW __MASK(MSR_POW_LG) /* Enable Power Management */ diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 5b268b6be74c..3690041c126a 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -34,6 +34,7 @@ static inline void disable_kernel_fp(void) msr_check_and_clear(MSR_FP); } #else +static inline void __giveup_fpu(struct task_struct *t) { } static inline void flush_fp_to_thread(struct task_struct *t) { } #endif @@ -46,6 +47,8 @@ static inline void disable_kernel_altivec(void) { msr_check_and_clear(MSR_VEC); } +#else +static inline void __giveup_altivec(struct task_struct *t) { } #endif #ifdef CONFIG_VSX @@ -57,6 +60,8 @@ static inline void disable_kernel_vsx(void) { msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } +#else +static inline void __giveup_vsx(struct task_struct *t) { } #endif #ifdef CONFIG_SPE @@ -68,6 +73,8 @@ static inline void disable_kernel_spe(void) { msr_check_and_clear(MSR_SPE); } +#else +static inline void __giveup_spe(struct task_struct *t) { } #endif static inline void clear_task_ebb(struct task_struct *t) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 55c1eb0465af..29da07fb3b4a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -444,12 +444,41 @@ void restore_math(struct pt_regs *regs) regs->msr = msr; } +void save_all(struct task_struct *tsk) +{ + unsigned long usermsr; + + if (!tsk->thread.regs) + return; + + usermsr = tsk->thread.regs->msr; + + if ((usermsr & msr_all_available) == 0) + return; + + msr_check_and_set(msr_all_available); + + if (usermsr & MSR_FP) + __giveup_fpu(tsk); + + if (usermsr & MSR_VEC) + __giveup_altivec(tsk); + + if (usermsr & MSR_VSX) + __giveup_vsx(tsk); + + if (usermsr & MSR_SPE) + __giveup_spe(tsk); + + msr_check_and_clear(msr_all_available); +} + void flush_all_to_thread(struct task_struct *tsk) { if (tsk->thread.regs) { preempt_disable(); BUG_ON(tsk != current); - giveup_all(tsk); + save_all(tsk); #ifdef CONFIG_SPE if (tsk->thread.regs->msr & MSR_SPE) -- cgit v1.2.3 From 8792468da5e12e77e76e1edf081acf0392abb331 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:49 +1100 Subject: powerpc: Add the ability to save FPU without giving it up This patch adds the ability to be able to save the FPU registers to the thread struct without giving up (disabling the facility) next time the process returns to userspace. This patch optimises the thread copy path (as a result of a fork() or clone()) so that the parent thread can return to userspace with hot registers avoiding a possibly pointless reload of FPU register state. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/switch_to.h | 3 ++- arch/powerpc/kernel/fpu.S | 21 ++++----------------- arch/powerpc/kernel/process.c | 12 +++++++++++- 3 files changed, 17 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 3690041c126a..6a201e8dc947 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -28,13 +28,14 @@ extern void giveup_all(struct task_struct *); extern void enable_kernel_fp(void); extern void flush_fp_to_thread(struct task_struct *); extern void giveup_fpu(struct task_struct *); -extern void __giveup_fpu(struct task_struct *); +extern void save_fpu(struct task_struct *); static inline void disable_kernel_fp(void) { msr_check_and_clear(MSR_FP); } #else static inline void __giveup_fpu(struct task_struct *t) { } +static inline void save_fpu(struct task_struct *t) { } static inline void flush_fp_to_thread(struct task_struct *t) { } #endif diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index b06352474ad0..15da2b5df85e 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -143,33 +143,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) blr /* - * __giveup_fpu(tsk) - * Disable FP for the task given as the argument, - * and save the floating-point registers in its thread_struct. + * save_fpu(tsk) + * Save the floating-point registers in its thread_struct. * Enables the FPU for use in the kernel on return. */ -_GLOBAL(__giveup_fpu) +_GLOBAL(save_fpu) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r6,THREAD_FPSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r6,0 bne 2f addi r6,r3,THREAD_FPSTATE -2: PPC_LCMPI 0,r5,0 - SAVE_32FPVSRS(0, R4, R6) +2: SAVE_32FPVSRS(0, R4, R6) mffs fr0 stfd fr0,FPSTATE_FPSCR(r6) - beq 1f - PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r3,MSR_FP|MSR_FE0|MSR_FE1 -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - oris r3,r3,MSR_VSX@h -END_FTR_SECTION_IFSET(CPU_FTR_VSX) -#endif - andc r4,r4,r3 /* disable FP for previous task */ - PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: blr /* diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 29da07fb3b4a..a7e5061187e8 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -133,6 +133,16 @@ void __msr_check_and_clear(unsigned long bits) EXPORT_SYMBOL(__msr_check_and_clear); #ifdef CONFIG_PPC_FPU +void __giveup_fpu(struct task_struct *tsk) +{ + save_fpu(tsk); + tsk->thread.regs->msr &= ~MSR_FP; +#ifdef CONFIG_VSX + if (cpu_has_feature(CPU_FTR_VSX)) + tsk->thread.regs->msr &= ~MSR_VSX; +#endif +} + void giveup_fpu(struct task_struct *tsk) { check_if_tm_restore_required(tsk); @@ -459,7 +469,7 @@ void save_all(struct task_struct *tsk) msr_check_and_set(msr_all_available); if (usermsr & MSR_FP) - __giveup_fpu(tsk); + save_fpu(tsk); if (usermsr & MSR_VEC) __giveup_altivec(tsk); -- cgit v1.2.3 From 6f515d842e8e1b205e54f44b9013bf14870b97a7 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:50 +1100 Subject: powerpc: Add the ability to save Altivec without giving it up This patch adds the ability to be able to save the VEC registers to the thread struct without giving up (disabling the facility) next time the process returns to userspace. This patch builds on a previous optimisation for the FPU registers in the thread copy path to avoid a possibly pointless reload of VEC state. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/switch_to.h | 3 ++- arch/powerpc/kernel/process.c | 12 +++++++++++- arch/powerpc/kernel/vector.S | 24 ++++-------------------- 3 files changed, 17 insertions(+), 22 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 6a201e8dc947..9028822bb73f 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -43,12 +43,13 @@ static inline void flush_fp_to_thread(struct task_struct *t) { } extern void enable_kernel_altivec(void); extern void flush_altivec_to_thread(struct task_struct *); extern void giveup_altivec(struct task_struct *); -extern void __giveup_altivec(struct task_struct *); +extern void save_altivec(struct task_struct *); static inline void disable_kernel_altivec(void) { msr_check_and_clear(MSR_VEC); } #else +static inline void save_altivec(struct task_struct *t) { } static inline void __giveup_altivec(struct task_struct *t) { } #endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a7e5061187e8..14c09d25de98 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -213,6 +213,16 @@ static int restore_fp(struct task_struct *tsk) { return 0; } #ifdef CONFIG_ALTIVEC #define loadvec(thr) ((thr).load_vec) +static void __giveup_altivec(struct task_struct *tsk) +{ + save_altivec(tsk); + tsk->thread.regs->msr &= ~MSR_VEC; +#ifdef CONFIG_VSX + if (cpu_has_feature(CPU_FTR_VSX)) + tsk->thread.regs->msr &= ~MSR_VSX; +#endif +} + void giveup_altivec(struct task_struct *tsk) { check_if_tm_restore_required(tsk); @@ -472,7 +482,7 @@ void save_all(struct task_struct *tsk) save_fpu(tsk); if (usermsr & MSR_VEC) - __giveup_altivec(tsk); + save_altivec(tsk); if (usermsr & MSR_VSX) __giveup_vsx(tsk); diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 038cff8cf5f2..51b0c175ea8c 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -106,36 +106,20 @@ _GLOBAL(load_up_altivec) blr /* - * __giveup_altivec(tsk) - * Disable VMX for the task given as the argument, - * and save the vector registers in its thread_struct. + * save_altivec(tsk) + * Save the vector registers to its thread_struct */ -_GLOBAL(__giveup_altivec) +_GLOBAL(save_altivec) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r7,THREAD_VRSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r7,0 bne 2f addi r7,r3,THREAD_VRSTATE -2: PPC_LCMPI 0,r5,0 - SAVE_32VRS(0,r4,r7) +2: SAVE_32VRS(0,r4,r7) mfvscr v0 li r4,VRSTATE_VSCR stvx v0,r4,r7 - beq 1f - PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - lis r3,(MSR_VEC|MSR_VSX)@h -FTR_SECTION_ELSE - lis r3,MSR_VEC@h -ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) -#else - lis r3,MSR_VEC@h -#endif - andc r4,r4,r3 /* disable FP for previous task */ - PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: blr #ifdef CONFIG_VSX -- cgit v1.2.3 From bf6a4d5b75d1ea87897fe68d0e45d35a2996c678 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:51 +1100 Subject: powerpc: Add the ability to save VSX without giving it up This patch adds the ability to be able to save the VSX registers to the thread struct without giving up (disabling the facility) next time the process returns to userspace. This patch builds on a previous optimisation for the FPU and VEC registers in the thread copy path to avoid a possibly pointless reload of VSX state. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/switch_to.h | 4 ---- arch/powerpc/kernel/ppc_ksyms.c | 4 ---- arch/powerpc/kernel/process.c | 42 +++++++++++++++++++++++++----------- arch/powerpc/kernel/vector.S | 17 --------------- 4 files changed, 30 insertions(+), 37 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 9028822bb73f..17c8380673a6 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -56,14 +56,10 @@ static inline void __giveup_altivec(struct task_struct *t) { } #ifdef CONFIG_VSX extern void enable_kernel_vsx(void); extern void flush_vsx_to_thread(struct task_struct *); -extern void giveup_vsx(struct task_struct *); -extern void __giveup_vsx(struct task_struct *); static inline void disable_kernel_vsx(void) { msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } -#else -static inline void __giveup_vsx(struct task_struct *t) { } #endif #ifdef CONFIG_SPE diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 41e1607e800c..ef7024dacff7 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -28,10 +28,6 @@ EXPORT_SYMBOL(load_vr_state); EXPORT_SYMBOL(store_vr_state); #endif -#ifdef CONFIG_VSX -EXPORT_SYMBOL_GPL(__giveup_vsx); -#endif - #ifdef CONFIG_EPAPR_PARAVIRT EXPORT_SYMBOL(epapr_hypercall_start); #endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 14c09d25de98..d7a9df51b974 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -280,19 +280,31 @@ static inline int restore_altivec(struct task_struct *tsk) { return 0; } #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX -void giveup_vsx(struct task_struct *tsk) +static void __giveup_vsx(struct task_struct *tsk) { - check_if_tm_restore_required(tsk); - - msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); if (tsk->thread.regs->msr & MSR_FP) __giveup_fpu(tsk); if (tsk->thread.regs->msr & MSR_VEC) __giveup_altivec(tsk); + tsk->thread.regs->msr &= ~MSR_VSX; +} + +static void giveup_vsx(struct task_struct *tsk) +{ + check_if_tm_restore_required(tsk); + + msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); __giveup_vsx(tsk); msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } -EXPORT_SYMBOL(giveup_vsx); + +static void save_vsx(struct task_struct *tsk) +{ + if (tsk->thread.regs->msr & MSR_FP) + save_fpu(tsk); + if (tsk->thread.regs->msr & MSR_VEC) + save_altivec(tsk); +} void enable_kernel_vsx(void) { @@ -335,6 +347,7 @@ static int restore_vsx(struct task_struct *tsk) } #else static inline int restore_vsx(struct task_struct *tsk) { return 0; } +static inline void save_vsx(struct task_struct *tsk) { } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE @@ -478,14 +491,19 @@ void save_all(struct task_struct *tsk) msr_check_and_set(msr_all_available); - if (usermsr & MSR_FP) - save_fpu(tsk); - - if (usermsr & MSR_VEC) - save_altivec(tsk); + /* + * Saving the way the register space is in hardware, save_vsx boils + * down to a save_fpu() and save_altivec() + */ + if (usermsr & MSR_VSX) { + save_vsx(tsk); + } else { + if (usermsr & MSR_FP) + save_fpu(tsk); - if (usermsr & MSR_VSX) - __giveup_vsx(tsk); + if (usermsr & MSR_VEC) + save_altivec(tsk); + } if (usermsr & MSR_SPE) __giveup_spe(tsk); diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 51b0c175ea8c..1c2e7a343bf5 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -151,23 +151,6 @@ _GLOBAL(load_up_vsx) std r12,_MSR(r1) b fast_exception_return -/* - * __giveup_vsx(tsk) - * Disable VSX for the task given as the argument. - * Does NOT save vsx registers. - */ -_GLOBAL(__giveup_vsx) - addi r3,r3,THREAD /* want THREAD of task */ - ld r5,PT_REGS(r3) - cmpdi 0,r5,0 - beq 1f - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r3,MSR_VSX@h - andc r4,r4,r3 /* disable VSX for previous task */ - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: - blr - #endif /* CONFIG_VSX */ -- cgit v1.2.3 From 2bf59916ef033edb9f8e968ee27e486ccb8ba1ce Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 1 Mar 2016 09:45:11 +0530 Subject: powerpc/mm: Split pgtable types to separate header We move the page table accessors into a separate header. We will later add a big endian variant of the table which is needed for radix. No functionality change only code movement. Reviewed-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/page.h | 104 +---------------------------- arch/powerpc/include/asm/pgtable-types.h | 108 +++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 103 deletions(-) create mode 100644 arch/powerpc/include/asm/pgtable-types.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index af7a3422a3ef..ab3d8977bacd 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -288,109 +288,7 @@ extern long long virt_phys_offset; #ifndef __ASSEMBLY__ -#ifdef CONFIG_STRICT_MM_TYPECHECKS -/* These are used to make use of C type-checking. */ - -/* PTE level */ -typedef struct { pte_basic_t pte; } pte_t; -#define __pte(x) ((pte_t) { (x) }) -static inline pte_basic_t pte_val(pte_t x) -{ - return x.pte; -} - -/* 64k pages additionally define a bigger "real PTE" type that gathers - * the "second half" part of the PTE for pseudo 64k pages - */ -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) -typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; -#else -typedef struct { pte_t pte; } real_pte_t; -#endif - -/* PMD level */ -#ifdef CONFIG_PPC64 -typedef struct { unsigned long pmd; } pmd_t; -#define __pmd(x) ((pmd_t) { (x) }) -static inline unsigned long pmd_val(pmd_t x) -{ - return x.pmd; -} - -/* PUD level exusts only on 4k pages */ -#ifndef CONFIG_PPC_64K_PAGES -typedef struct { unsigned long pud; } pud_t; -#define __pud(x) ((pud_t) { (x) }) -static inline unsigned long pud_val(pud_t x) -{ - return x.pud; -} -#endif /* !CONFIG_PPC_64K_PAGES */ -#endif /* CONFIG_PPC64 */ - -/* PGD level */ -typedef struct { unsigned long pgd; } pgd_t; -#define __pgd(x) ((pgd_t) { (x) }) -static inline unsigned long pgd_val(pgd_t x) -{ - return x.pgd; -} - -/* Page protection bits */ -typedef struct { unsigned long pgprot; } pgprot_t; -#define pgprot_val(x) ((x).pgprot) -#define __pgprot(x) ((pgprot_t) { (x) }) - -#else - -/* - * .. while these make it easier on the compiler - */ - -typedef pte_basic_t pte_t; -#define __pte(x) (x) -static inline pte_basic_t pte_val(pte_t pte) -{ - return pte; -} - -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) -typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; -#else -typedef pte_t real_pte_t; -#endif - - -#ifdef CONFIG_PPC64 -typedef unsigned long pmd_t; -#define __pmd(x) (x) -static inline unsigned long pmd_val(pmd_t pmd) -{ - return pmd; -} - -#ifndef CONFIG_PPC_64K_PAGES -typedef unsigned long pud_t; -#define __pud(x) (x) -static inline unsigned long pud_val(pud_t pud) -{ - return pud; -} -#endif /* !CONFIG_PPC_64K_PAGES */ -#endif /* CONFIG_PPC64 */ - -typedef unsigned long pgd_t; -#define __pgd(x) (x) -static inline unsigned long pgd_val(pgd_t pgd) -{ - return pgd; -} - -typedef unsigned long pgprot_t; -#define pgprot_val(x) (x) -#define __pgprot(x) (x) - -#endif +#include typedef struct { signed long pd; } hugepd_t; diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h new file mode 100644 index 000000000000..2fac0c4acfa4 --- /dev/null +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -0,0 +1,108 @@ +#ifndef _ASM_POWERPC_PGTABLE_TYPES_H +#define _ASM_POWERPC_PGTABLE_TYPES_H + +#ifdef CONFIG_STRICT_MM_TYPECHECKS +/* These are used to make use of C type-checking. */ + +/* PTE level */ +typedef struct { pte_basic_t pte; } pte_t; +#define __pte(x) ((pte_t) { (x) }) +static inline pte_basic_t pte_val(pte_t x) +{ + return x.pte; +} + +/* 64k pages additionally define a bigger "real PTE" type that gathers + * the "second half" part of the PTE for pseudo 64k pages + */ +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) +typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; +#else +typedef struct { pte_t pte; } real_pte_t; +#endif + +/* PMD level */ +#ifdef CONFIG_PPC64 +typedef struct { unsigned long pmd; } pmd_t; +#define __pmd(x) ((pmd_t) { (x) }) +static inline unsigned long pmd_val(pmd_t x) +{ + return x.pmd; +} + +/* PUD level exusts only on 4k pages */ +#ifndef CONFIG_PPC_64K_PAGES +typedef struct { unsigned long pud; } pud_t; +#define __pud(x) ((pud_t) { (x) }) +static inline unsigned long pud_val(pud_t x) +{ + return x.pud; +} +#endif /* !CONFIG_PPC_64K_PAGES */ +#endif /* CONFIG_PPC64 */ + +/* PGD level */ +typedef struct { unsigned long pgd; } pgd_t; +#define __pgd(x) ((pgd_t) { (x) }) +static inline unsigned long pgd_val(pgd_t x) +{ + return x.pgd; +} + +/* Page protection bits */ +typedef struct { unsigned long pgprot; } pgprot_t; +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) }) + +#else + +/* + * .. while these make it easier on the compiler + */ + +typedef pte_basic_t pte_t; +#define __pte(x) (x) +static inline pte_basic_t pte_val(pte_t pte) +{ + return pte; +} + +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) +typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; +#else +typedef pte_t real_pte_t; +#endif + + +#ifdef CONFIG_PPC64 +typedef unsigned long pmd_t; +#define __pmd(x) (x) +static inline unsigned long pmd_val(pmd_t pmd) +{ + return pmd; +} + +#ifndef CONFIG_PPC_64K_PAGES +typedef unsigned long pud_t; +#define __pud(x) (x) +static inline unsigned long pud_val(pud_t pud) +{ + return pud; +} +#endif /* !CONFIG_PPC_64K_PAGES */ +#endif /* CONFIG_PPC64 */ + +typedef unsigned long pgd_t; +#define __pgd(x) (x) +static inline unsigned long pgd_val(pgd_t pgd) +{ + return pgd; +} + +typedef unsigned long pgprot_t; +#define pgprot_val(x) (x) +#define __pgprot(x) (x) + +#endif + +#endif /* _ASM_POWERPC_PGTABLE_TYPES_H */ -- cgit v1.2.3 From ae9a71afa4d7cf29a816fbc387bfd59ad7c292b6 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 1 Mar 2016 09:45:12 +0530 Subject: powerpc/mm: Don't have conditional defines for real_pte_t We remove real_pte_t out of STRICT_MM_TYPESCHECK. Reviewed-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 5 ----- arch/powerpc/include/asm/pgtable-types.h | 26 +++++++++----------------- 2 files changed, 9 insertions(+), 22 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index c8240b737d11..7482f69117b6 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -43,13 +43,8 @@ */ #ifndef __real_pte -#ifdef CONFIG_STRICT_MM_TYPECHECKS #define __real_pte(e,p) ((real_pte_t){(e)}) #define __rpte_to_pte(r) ((r).pte) -#else -#define __real_pte(e,p) (e) -#define __rpte_to_pte(r) (__pte(r)) -#endif #define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >>_PAGE_F_GIX_SHIFT) #define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index 2fac0c4acfa4..71487e1ca638 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -12,15 +12,6 @@ static inline pte_basic_t pte_val(pte_t x) return x.pte; } -/* 64k pages additionally define a bigger "real PTE" type that gathers - * the "second half" part of the PTE for pseudo 64k pages - */ -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) -typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; -#else -typedef struct { pte_t pte; } real_pte_t; -#endif - /* PMD level */ #ifdef CONFIG_PPC64 typedef struct { unsigned long pmd; } pmd_t; @@ -67,13 +58,6 @@ static inline pte_basic_t pte_val(pte_t pte) return pte; } -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) -typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; -#else -typedef pte_t real_pte_t; -#endif - - #ifdef CONFIG_PPC64 typedef unsigned long pmd_t; #define __pmd(x) (x) @@ -103,6 +87,14 @@ typedef unsigned long pgprot_t; #define pgprot_val(x) (x) #define __pgprot(x) (x) +#endif /* CONFIG_STRICT_MM_TYPECHECKS */ +/* + * With hash config 64k pages additionally define a bigger "real PTE" type that + * gathers the "second half" part of the PTE for pseudo 64k pages + */ +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) +typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; +#else +typedef struct { pte_t pte; } real_pte_t; #endif - #endif /* _ASM_POWERPC_PGTABLE_TYPES_H */ -- cgit v1.2.3 From 368ced78e6ed3d72c2acc61233b58487071ec289 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 1 Mar 2016 09:45:13 +0530 Subject: powerpc/mm: Switch book3s 64 with 64K page size to 4 level page table This is needed so that we can support both hash and radix page table using single kernel. Radix kernel uses a 4 level table. We now use physical address in upper page table tree levels. Even though they are aligned to their size, for the masked bits we use the bit positions as per PowerISA 3.0. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/book3s/64/hash-4k.h | 33 +-------------------------- arch/powerpc/include/asm/book3s/64/hash-64k.h | 33 ++++++++++++++------------- arch/powerpc/include/asm/book3s/64/hash.h | 11 +++++++++ arch/powerpc/include/asm/book3s/64/pgtable.h | 25 +++++++++++++++++++- arch/powerpc/include/asm/pgalloc-64.h | 28 ++++++++++++++++++++--- arch/powerpc/include/asm/pgtable-types.h | 13 +++++++---- arch/powerpc/mm/init_64.c | 21 ++++++++++++----- 8 files changed, 102 insertions(+), 64 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9faa18c4f3f7..7efddd18d700 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -303,7 +303,7 @@ config ZONE_DMA32 config PGTABLE_LEVELS int default 2 if !PPC64 - default 3 if PPC_64K_PAGES + default 3 if PPC_64K_PAGES && !PPC_BOOK3S_64 default 4 source "init/Kconfig" diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 7f60f7e814d4..5f08a0832238 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -58,39 +58,8 @@ #define _PAGE_4K_PFN 0 #ifndef __ASSEMBLY__ /* - * 4-level page tables related bits + * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ - -#define pgd_none(pgd) (!pgd_val(pgd)) -#define pgd_bad(pgd) (pgd_val(pgd) == 0) -#define pgd_present(pgd) (pgd_val(pgd) != 0) -#define pgd_page_vaddr(pgd) __va(pgd_val(pgd) & ~PGD_MASKED_BITS) - -static inline void pgd_clear(pgd_t *pgdp) -{ - *pgdp = __pgd(0); -} - -static inline pte_t pgd_pte(pgd_t pgd) -{ - return __pte(pgd_val(pgd)); -} - -static inline pgd_t pte_pgd(pte_t pte) -{ - return __pgd(pte_val(pte)); -} -extern struct page *pgd_page(pgd_t pgd); - -#define pud_offset(pgdp, addr) \ - (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ - (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) - -#define pud_ERROR(e) \ - pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) - -/* - * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ #define remap_4k_pfn(vma, addr, pfn, prot) \ remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot)) diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 8bb03251f34c..0a7956a80a08 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -1,15 +1,14 @@ #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H -#include - #define PTE_INDEX_SIZE 8 -#define PMD_INDEX_SIZE 10 -#define PUD_INDEX_SIZE 0 +#define PMD_INDEX_SIZE 5 +#define PUD_INDEX_SIZE 5 #define PGD_INDEX_SIZE 12 #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) /* With 4k base page size, hugepage PTEs go at the PMD level */ @@ -20,8 +19,13 @@ #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -/* PGDIR_SHIFT determines what a third-level page table entry can map */ -#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +/* PUD_SHIFT determines what a third-level page table entry can map */ +#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ +#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -56,13 +60,12 @@ #define PTE_FRAG_SIZE_SHIFT 12 #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) -/* - * Bits to mask out from a PMD to get to the PTE page - * PMDs point to PTE table fragments which are PTE_FRAG_SIZE aligned. - */ -#define PMD_MASKED_BITS (PTE_FRAG_SIZE - 1) -/* Bits to mask out from a PGD/PUD to get to the PMD page */ -#define PUD_MASKED_BITS 0x1ff +/* Bits to mask out from a PMD to get to the PTE page */ +#define PMD_MASKED_BITS 0xc0000000000000ffUL +/* Bits to mask out from a PUD to get to the PMD page */ +#define PUD_MASKED_BITS 0xc0000000000000ffUL +/* Bits to mask out from a PGD to get to the PUD page */ +#define PGD_MASKED_BITS 0xc0000000000000ffUL #ifndef __ASSEMBLY__ @@ -132,11 +135,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); #else #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) #endif +#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) -#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd }))) -#define pte_pgd(pte) ((pgd_t)pte_pud(pte)) - #ifdef CONFIG_HUGETLB_PAGE /* * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index ef9bd68f7e6d..d0ee6fcef823 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -235,6 +235,7 @@ #define __pgtable_ptr_val(ptr) __pa(ptr) #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) +#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) #define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) #define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) @@ -363,8 +364,18 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) :"cc"); } +static inline int pgd_bad(pgd_t pgd) +{ + return (pgd_val(pgd) == 0); +} + #define __HAVE_ARCH_PTE_SAME #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) +static inline unsigned long pgd_page_vaddr(pgd_t pgd) +{ + return (unsigned long)__va(pgd_val(pgd) & ~PGD_MASKED_BITS); +} + /* Generic accessors to PTE bits */ static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 7482f69117b6..77d3ce05798e 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -106,6 +106,26 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) *pgdp = __pgd(val); } +static inline void pgd_clear(pgd_t *pgdp) +{ + *pgdp = __pgd(0); +} + +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_present(pgd) (!pgd_none(pgd)) + +static inline pte_t pgd_pte(pgd_t pgd) +{ + return __pte(pgd_val(pgd)); +} + +static inline pgd_t pte_pgd(pte_t pte) +{ + return __pgd(pte_val(pte)); +} + +extern struct page *pgd_page(pgd_t pgd); + /* * Find an entry in a page-table-directory. We combine the address region * (the high order N bits) and the pgd portion of the address. @@ -113,9 +133,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) +#define pud_offset(pgdp, addr) \ + (((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr)) #define pmd_offset(pudp,addr) \ (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) - #define pte_offset_kernel(dir,addr) \ (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) @@ -130,6 +151,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h index 7ac59a32c66a..8d5fc3ac43da 100644 --- a/arch/powerpc/include/asm/pgalloc-64.h +++ b/arch/powerpc/include/asm/pgalloc-64.h @@ -171,7 +171,29 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); extern void __tlb_remove_table(void *_table); #endif -#define pud_populate(mm, pud, pmd) pud_set(pud, __pgtable_ptr_val(pmd)) +#ifndef __PAGETABLE_PUD_FOLDED +/* book3s 64 is 4 level page table */ +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +{ + pgd_set(pgd, __pgtable_ptr_val(pud)); +} + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), + GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud); +} +#endif + +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + pud_set(pud, __pgtable_ptr_val(pmd)); +} static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) @@ -233,11 +255,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #define __pmd_free_tlb(tlb, pmd, addr) \ pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX) -#ifndef CONFIG_PPC_64K_PAGES +#ifndef __PAGETABLE_PUD_FOLDED #define __pud_free_tlb(tlb, pud, addr) \ pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) -#endif /* CONFIG_PPC_64K_PAGES */ +#endif /* __PAGETABLE_PUD_FOLDED */ #define check_pgt_cache() do { } while (0) diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index 71487e1ca638..43140f8b0592 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -21,15 +21,18 @@ static inline unsigned long pmd_val(pmd_t x) return x.pmd; } -/* PUD level exusts only on 4k pages */ -#ifndef CONFIG_PPC_64K_PAGES +/* + * 64 bit hash always use 4 level table. Everybody else use 4 level + * only for 4K page size. + */ +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) typedef struct { unsigned long pud; } pud_t; #define __pud(x) ((pud_t) { (x) }) static inline unsigned long pud_val(pud_t x) { return x.pud; } -#endif /* !CONFIG_PPC_64K_PAGES */ +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC64 */ /* PGD level */ @@ -66,14 +69,14 @@ static inline unsigned long pmd_val(pmd_t pmd) return pmd; } -#ifndef CONFIG_PPC_64K_PAGES +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) typedef unsigned long pud_t; #define __pud(x) (x) static inline unsigned long pud_val(pud_t pud) { return pud; } -#endif /* !CONFIG_PPC_64K_PAGES */ +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC64 */ typedef unsigned long pgd_t; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index fbc9448cca63..ba655666186d 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -85,6 +85,11 @@ static void pgd_ctor(void *addr) memset(addr, 0, PGD_TABLE_SIZE); } +static void pud_ctor(void *addr) +{ + memset(addr, 0, PUD_TABLE_SIZE); +} + static void pmd_ctor(void *addr) { memset(addr, 0, PMD_TABLE_SIZE); @@ -138,14 +143,18 @@ void pgtable_cache_init(void) { pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); + /* + * In all current configs, when the PUD index exists it's the + * same size as either the pgd or pmd index except with THP enabled + * on book3s 64 + */ + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) + pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); + if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX)) panic("Couldn't allocate pgtable caches"); - /* In all current configs, when the PUD index exists it's the - * same size as either the pgd or pmd index. Verify that the - * initialization above has also created a PUD cache. This - * will need re-examiniation if we add new possibilities for - * the pagetable layout. */ - BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)); + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) + panic("Couldn't allocate pud pgtable caches"); } #ifdef CONFIG_SPARSEMEM_VMEMMAP -- cgit v1.2.3 From f64e8084c94bb0449177364856d8117e2f14c4c0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 1 Mar 2016 12:59:20 +0530 Subject: powerpc/mm: Move hash related mmu-*.h headers to book3s/ No code changes. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/mmu-hash.h | 93 ++++ arch/powerpc/include/asm/book3s/64/mmu-hash.h | 616 ++++++++++++++++++++++++++ arch/powerpc/include/asm/mmu-hash32.h | 93 ---- arch/powerpc/include/asm/mmu-hash64.h | 616 -------------------------- arch/powerpc/include/asm/mmu.h | 4 +- arch/powerpc/kernel/cpu_setup_power.S | 2 +- arch/powerpc/kernel/idle_power7.S | 2 +- arch/powerpc/kvm/book3s_32_mmu_host.c | 2 +- arch/powerpc/kvm/book3s_64_mmu.c | 2 +- arch/powerpc/kvm/book3s_64_mmu_host.c | 2 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/powerpc/kvm/book3s_64_vio.c | 2 +- arch/powerpc/kvm/book3s_64_vio_hv.c | 2 +- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +- 15 files changed, 721 insertions(+), 721 deletions(-) create mode 100644 arch/powerpc/include/asm/book3s/32/mmu-hash.h create mode 100644 arch/powerpc/include/asm/book3s/64/mmu-hash.h delete mode 100644 arch/powerpc/include/asm/mmu-hash32.h delete mode 100644 arch/powerpc/include/asm/mmu-hash64.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h new file mode 100644 index 000000000000..16f513e5cbd7 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -0,0 +1,93 @@ +#ifndef _ASM_POWERPC_MMU_HASH32_H_ +#define _ASM_POWERPC_MMU_HASH32_H_ +/* + * 32-bit hash table MMU support + */ + +/* + * BATs + */ + +/* Block size masks */ +#define BL_128K 0x000 +#define BL_256K 0x001 +#define BL_512K 0x003 +#define BL_1M 0x007 +#define BL_2M 0x00F +#define BL_4M 0x01F +#define BL_8M 0x03F +#define BL_16M 0x07F +#define BL_32M 0x0FF +#define BL_64M 0x1FF +#define BL_128M 0x3FF +#define BL_256M 0x7FF + +/* BAT Access Protection */ +#define BPP_XX 0x00 /* No access */ +#define BPP_RX 0x01 /* Read only */ +#define BPP_RW 0x02 /* Read/write */ + +#ifndef __ASSEMBLY__ +/* Contort a phys_addr_t into the right format/bits for a BAT */ +#ifdef CONFIG_PHYS_64BIT +#define BAT_PHYS_ADDR(x) ((u32)((x & 0x00000000fffe0000ULL) | \ + ((x & 0x0000000e00000000ULL) >> 24) | \ + ((x & 0x0000000100000000ULL) >> 30))) +#else +#define BAT_PHYS_ADDR(x) (x) +#endif + +struct ppc_bat { + u32 batu; + u32 batl; +}; +#endif /* !__ASSEMBLY__ */ + +/* + * Hash table + */ + +/* Values for PP (assumes Ks=0, Kp=1) */ +#define PP_RWXX 0 /* Supervisor read/write, User none */ +#define PP_RWRX 1 /* Supervisor read/write, User read */ +#define PP_RWRW 2 /* Supervisor read/write, User read/write */ +#define PP_RXRX 3 /* Supervisor read, User read */ + +#ifndef __ASSEMBLY__ + +/* + * Hardware Page Table Entry + * Note that the xpn and x bitfields are used only by processors that + * support extended addressing; otherwise, those bits are reserved. + */ +struct hash_pte { + unsigned long v:1; /* Entry is valid */ + unsigned long vsid:24; /* Virtual segment identifier */ + unsigned long h:1; /* Hash algorithm indicator */ + unsigned long api:6; /* Abbreviated page index */ + unsigned long rpn:20; /* Real (physical) page number */ + unsigned long xpn:3; /* Real page number bits 0-2, optional */ + unsigned long r:1; /* Referenced */ + unsigned long c:1; /* Changed */ + unsigned long w:1; /* Write-thru cache mode */ + unsigned long i:1; /* Cache inhibited */ + unsigned long m:1; /* Memory coherence */ + unsigned long g:1; /* Guarded */ + unsigned long x:1; /* Real page number bit 3, optional */ + unsigned long pp:2; /* Page protection */ +}; + +typedef struct { + unsigned long id; + unsigned long vdso_base; +} mm_context_t; + +#endif /* !__ASSEMBLY__ */ + +/* We happily ignore the smaller BATs on 601, we don't actually use + * those definitions on hash32 at the moment anyway + */ +#define mmu_virtual_psize MMU_PAGE_4K +#define mmu_linear_psize MMU_PAGE_256M + +#endif /* _ASM_POWERPC_MMU_HASH32_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h new file mode 100644 index 000000000000..0cea4807e26f --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -0,0 +1,616 @@ +#ifndef _ASM_POWERPC_MMU_HASH64_H_ +#define _ASM_POWERPC_MMU_HASH64_H_ +/* + * PowerPC64 memory management structures + * + * Dave Engebretsen & Mike Corrigan <{engebret|mikejc}@us.ibm.com> + * PPC64 rework. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +/* + * This is necessary to get the definition of PGTABLE_RANGE which we + * need for various slices related matters. Note that this isn't the + * complete pgtable.h but only a portion of it. + */ +#include +#include +#include + +/* + * SLB + */ + +#define SLB_NUM_BOLTED 3 +#define SLB_CACHE_ENTRIES 8 +#define SLB_MIN_SIZE 32 + +/* Bits in the SLB ESID word */ +#define SLB_ESID_V ASM_CONST(0x0000000008000000) /* valid */ + +/* Bits in the SLB VSID word */ +#define SLB_VSID_SHIFT 12 +#define SLB_VSID_SHIFT_1T 24 +#define SLB_VSID_SSIZE_SHIFT 62 +#define SLB_VSID_B ASM_CONST(0xc000000000000000) +#define SLB_VSID_B_256M ASM_CONST(0x0000000000000000) +#define SLB_VSID_B_1T ASM_CONST(0x4000000000000000) +#define SLB_VSID_KS ASM_CONST(0x0000000000000800) +#define SLB_VSID_KP ASM_CONST(0x0000000000000400) +#define SLB_VSID_N ASM_CONST(0x0000000000000200) /* no-execute */ +#define SLB_VSID_L ASM_CONST(0x0000000000000100) +#define SLB_VSID_C ASM_CONST(0x0000000000000080) /* class */ +#define SLB_VSID_LP ASM_CONST(0x0000000000000030) +#define SLB_VSID_LP_00 ASM_CONST(0x0000000000000000) +#define SLB_VSID_LP_01 ASM_CONST(0x0000000000000010) +#define SLB_VSID_LP_10 ASM_CONST(0x0000000000000020) +#define SLB_VSID_LP_11 ASM_CONST(0x0000000000000030) +#define SLB_VSID_LLP (SLB_VSID_L|SLB_VSID_LP) + +#define SLB_VSID_KERNEL (SLB_VSID_KP) +#define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C) + +#define SLBIE_C (0x08000000) +#define SLBIE_SSIZE_SHIFT 25 + +/* + * Hash table + */ + +#define HPTES_PER_GROUP 8 + +#define HPTE_V_SSIZE_SHIFT 62 +#define HPTE_V_AVPN_SHIFT 7 +#define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80) +#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) +#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL)) +#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) +#define HPTE_V_LOCK ASM_CONST(0x0000000000000008) +#define HPTE_V_LARGE ASM_CONST(0x0000000000000004) +#define HPTE_V_SECONDARY ASM_CONST(0x0000000000000002) +#define HPTE_V_VALID ASM_CONST(0x0000000000000001) + +#define HPTE_R_PP0 ASM_CONST(0x8000000000000000) +#define HPTE_R_TS ASM_CONST(0x4000000000000000) +#define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) +#define HPTE_R_RPN_SHIFT 12 +#define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) +#define HPTE_R_PP ASM_CONST(0x0000000000000003) +#define HPTE_R_N ASM_CONST(0x0000000000000004) +#define HPTE_R_G ASM_CONST(0x0000000000000008) +#define HPTE_R_M ASM_CONST(0x0000000000000010) +#define HPTE_R_I ASM_CONST(0x0000000000000020) +#define HPTE_R_W ASM_CONST(0x0000000000000040) +#define HPTE_R_WIMG ASM_CONST(0x0000000000000078) +#define HPTE_R_C ASM_CONST(0x0000000000000080) +#define HPTE_R_R ASM_CONST(0x0000000000000100) +#define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) + +#define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) +#define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000) + +/* Values for PP (assumes Ks=0, Kp=1) */ +#define PP_RWXX 0 /* Supervisor read/write, User none */ +#define PP_RWRX 1 /* Supervisor read/write, User read */ +#define PP_RWRW 2 /* Supervisor read/write, User read/write */ +#define PP_RXRX 3 /* Supervisor read, User read */ +#define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */ + +/* Fields for tlbiel instruction in architecture 2.06 */ +#define TLBIEL_INVAL_SEL_MASK 0xc00 /* invalidation selector */ +#define TLBIEL_INVAL_PAGE 0x000 /* invalidate a single page */ +#define TLBIEL_INVAL_SET_LPID 0x800 /* invalidate a set for current LPID */ +#define TLBIEL_INVAL_SET 0xc00 /* invalidate a set for all LPIDs */ +#define TLBIEL_INVAL_SET_MASK 0xfff000 /* set number to inval. */ +#define TLBIEL_INVAL_SET_SHIFT 12 + +#define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */ +#define POWER8_TLB_SETS 512 /* # sets in POWER8 TLB */ +#define POWER9_TLB_SETS_HASH 256 /* # sets in POWER9 TLB Hash mode */ + +#ifndef __ASSEMBLY__ + +struct hash_pte { + __be64 v; + __be64 r; +}; + +extern struct hash_pte *htab_address; +extern unsigned long htab_size_bytes; +extern unsigned long htab_hash_mask; + +/* + * Page size definition + * + * shift : is the "PAGE_SHIFT" value for that page size + * sllp : is a bit mask with the value of SLB L || LP to be or'ed + * directly to a slbmte "vsid" value + * penc : is the HPTE encoding mask for the "LP" field: + * + */ +struct mmu_psize_def +{ + unsigned int shift; /* number of bits */ + int penc[MMU_PAGE_COUNT]; /* HPTE encoding */ + unsigned int tlbiel; /* tlbiel supported for that page size */ + unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */ + unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */ +}; +extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; + +static inline int shift_to_mmu_psize(unsigned int shift) +{ + int psize; + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) + if (mmu_psize_defs[psize].shift == shift) + return psize; + return -1; +} + +static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) +{ + if (mmu_psize_defs[mmu_psize].shift) + return mmu_psize_defs[mmu_psize].shift; + BUG(); +} + +#endif /* __ASSEMBLY__ */ + +/* + * Segment sizes. + * These are the values used by hardware in the B field of + * SLB entries and the first dword of MMU hashtable entries. + * The B field is 2 bits; the values 2 and 3 are unused and reserved. + */ +#define MMU_SEGSIZE_256M 0 +#define MMU_SEGSIZE_1T 1 + +/* + * encode page number shift. + * in order to fit the 78 bit va in a 64 bit variable we shift the va by + * 12 bits. This enable us to address upto 76 bit va. + * For hpt hash from a va we can ignore the page size bits of va and for + * hpte encoding we ignore up to 23 bits of va. So ignoring lower 12 bits ensure + * we work in all cases including 4k page size. + */ +#define VPN_SHIFT 12 + +/* + * HPTE Large Page (LP) details + */ +#define LP_SHIFT 12 +#define LP_BITS 8 +#define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT) + +#ifndef __ASSEMBLY__ + +static inline int slb_vsid_shift(int ssize) +{ + if (ssize == MMU_SEGSIZE_256M) + return SLB_VSID_SHIFT; + return SLB_VSID_SHIFT_1T; +} + +static inline int segment_shift(int ssize) +{ + if (ssize == MMU_SEGSIZE_256M) + return SID_SHIFT; + return SID_SHIFT_1T; +} + +/* + * The current system page and segment sizes + */ +extern int mmu_linear_psize; +extern int mmu_virtual_psize; +extern int mmu_vmalloc_psize; +extern int mmu_vmemmap_psize; +extern int mmu_io_psize; +extern int mmu_kernel_ssize; +extern int mmu_highuser_ssize; +extern u16 mmu_slb_size; +extern unsigned long tce_alloc_start, tce_alloc_end; + +/* + * If the processor supports 64k normal pages but not 64k cache + * inhibited pages, we have to be prepared to switch processes + * to use 4k pages when they create cache-inhibited mappings. + * If this is the case, mmu_ci_restrictions will be set to 1. + */ +extern int mmu_ci_restrictions; + +/* + * This computes the AVPN and B fields of the first dword of a HPTE, + * for use when we want to match an existing PTE. The bottom 7 bits + * of the returned value are zero. + */ +static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize, + int ssize) +{ + unsigned long v; + /* + * The AVA field omits the low-order 23 bits of the 78 bits VA. + * These bits are not needed in the PTE, because the + * low-order b of these bits are part of the byte offset + * into the virtual page and, if b < 23, the high-order + * 23-b of these bits are always used in selecting the + * PTEGs to be searched + */ + v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); + v <<= HPTE_V_AVPN_SHIFT; + v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; + return v; +} + +/* + * This function sets the AVPN and L fields of the HPTE appropriately + * using the base page size and actual page size. + */ +static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize, + int actual_psize, int ssize) +{ + unsigned long v; + v = hpte_encode_avpn(vpn, base_psize, ssize); + if (actual_psize != MMU_PAGE_4K) + v |= HPTE_V_LARGE; + return v; +} + +/* + * This function sets the ARPN, and LP fields of the HPTE appropriately + * for the page size. We assume the pa is already "clean" that is properly + * aligned for the requested page size + */ +static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize, + int actual_psize) +{ + /* A 4K page needs no special encoding */ + if (actual_psize == MMU_PAGE_4K) + return pa & HPTE_R_RPN; + else { + unsigned int penc = mmu_psize_defs[base_psize].penc[actual_psize]; + unsigned int shift = mmu_psize_defs[actual_psize].shift; + return (pa & ~((1ul << shift) - 1)) | (penc << LP_SHIFT); + } +} + +/* + * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size. + */ +static inline unsigned long hpt_vpn(unsigned long ea, + unsigned long vsid, int ssize) +{ + unsigned long mask; + int s_shift = segment_shift(ssize); + + mask = (1ul << (s_shift - VPN_SHIFT)) - 1; + return (vsid << (s_shift - VPN_SHIFT)) | ((ea >> VPN_SHIFT) & mask); +} + +/* + * This hashes a virtual address + */ +static inline unsigned long hpt_hash(unsigned long vpn, + unsigned int shift, int ssize) +{ + int mask; + unsigned long hash, vsid; + + /* VPN_SHIFT can be atmost 12 */ + if (ssize == MMU_SEGSIZE_256M) { + mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1; + hash = (vpn >> (SID_SHIFT - VPN_SHIFT)) ^ + ((vpn & mask) >> (shift - VPN_SHIFT)); + } else { + mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1; + vsid = vpn >> (SID_SHIFT_1T - VPN_SHIFT); + hash = vsid ^ (vsid << 25) ^ + ((vpn & mask) >> (shift - VPN_SHIFT)) ; + } + return hash & 0x7fffffffffUL; +} + +#define HPTE_LOCAL_UPDATE 0x1 +#define HPTE_NOHPTE_UPDATE 0x2 + +extern int __hash_page_4K(unsigned long ea, unsigned long access, + unsigned long vsid, pte_t *ptep, unsigned long trap, + unsigned long flags, int ssize, int subpage_prot); +extern int __hash_page_64K(unsigned long ea, unsigned long access, + unsigned long vsid, pte_t *ptep, unsigned long trap, + unsigned long flags, int ssize); +struct mm_struct; +unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap); +extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap, + unsigned long flags); +extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap, + unsigned long dsisr); +int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, + pte_t *ptep, unsigned long trap, unsigned long flags, + int ssize, unsigned int shift, unsigned int mmu_psize); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +extern int __hash_page_thp(unsigned long ea, unsigned long access, + unsigned long vsid, pmd_t *pmdp, unsigned long trap, + unsigned long flags, int ssize, unsigned int psize); +#else +static inline int __hash_page_thp(unsigned long ea, unsigned long access, + unsigned long vsid, pmd_t *pmdp, + unsigned long trap, unsigned long flags, + int ssize, unsigned int psize) +{ + BUG(); + return -1; +} +#endif +extern void hash_failure_debug(unsigned long ea, unsigned long access, + unsigned long vsid, unsigned long trap, + int ssize, int psize, int lpsize, + unsigned long pte); +extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, + unsigned long pstart, unsigned long prot, + int psize, int ssize); +int htab_remove_mapping(unsigned long vstart, unsigned long vend, + int psize, int ssize); +extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); +extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); + +extern void hpte_init_native(void); +extern void hpte_init_lpar(void); +extern void hpte_init_beat(void); +extern void hpte_init_beat_v3(void); + +extern void slb_initialize(void); +extern void slb_flush_and_rebolt(void); + +extern void slb_vmalloc_update(void); +extern void slb_set_size(u16 size); +#endif /* __ASSEMBLY__ */ + +/* + * VSID allocation (256MB segment) + * + * We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated + * from mmu context id and effective segment id of the address. + * + * For user processes max context id is limited to ((1ul << 19) - 5) + * for kernel space, we use the top 4 context ids to map address as below + * NOTE: each context only support 64TB now. + * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ] + * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ] + * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ] + * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ] + * + * The proto-VSIDs are then scrambled into real VSIDs with the + * multiplicative hash: + * + * VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS + * + * VSID_MULTIPLIER is prime, so in particular it is + * co-prime to VSID_MODULUS, making this a 1:1 scrambling function. + * Because the modulus is 2^n-1 we can compute it efficiently without + * a divide or extra multiply (see below). The scramble function gives + * robust scattering in the hash table (at least based on some initial + * results). + * + * We also consider VSID 0 special. We use VSID 0 for slb entries mapping + * bad address. This enables us to consolidate bad address handling in + * hash_page. + * + * We also need to avoid the last segment of the last context, because that + * would give a protovsid of 0x1fffffffff. That will result in a VSID 0 + * because of the modulo operation in vsid scramble. But the vmemmap + * (which is what uses region 0xf) will never be close to 64TB in size + * (it's 56 bytes per page of system memory). + */ + +#define CONTEXT_BITS 19 +#define ESID_BITS 18 +#define ESID_BITS_1T 6 + +/* + * 256MB segment + * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments + * available for user + kernel mapping. The top 4 contexts are used for + * kernel mapping. Each segment contains 2^28 bytes. Each + * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts + * (19 == 37 + 28 - 46). + */ +#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 5) + +/* + * This should be computed such that protovosid * vsid_mulitplier + * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus + */ +#define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */ +#define VSID_BITS_256M (CONTEXT_BITS + ESID_BITS) +#define VSID_MODULUS_256M ((1UL<= \ + * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \ + * the bit clear, r3 already has the answer we want, if it \ + * doesn't, the answer is the low 36 bits of r3+1. So in all \ + * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\ + addi rx,rt,1; \ + srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ + add rt,rt,rx + +/* 4 bits per slice and we have one slice per 1TB */ +#define SLICE_ARRAY_SIZE (PGTABLE_RANGE >> 41) + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_PPC_SUBPAGE_PROT +/* + * For the sub-page protection option, we extend the PGD with one of + * these. Basically we have a 3-level tree, with the top level being + * the protptrs array. To optimize speed and memory consumption when + * only addresses < 4GB are being protected, pointers to the first + * four pages of sub-page protection words are stored in the low_prot + * array. + * Each page of sub-page protection words protects 1GB (4 bytes + * protects 64k). For the 3-level tree, each page of pointers then + * protects 8TB. + */ +struct subpage_prot_table { + unsigned long maxaddr; /* only addresses < this are protected */ + unsigned int **protptrs[(TASK_SIZE_USER64 >> 43)]; + unsigned int *low_prot[4]; +}; + +#define SBP_L1_BITS (PAGE_SHIFT - 2) +#define SBP_L2_BITS (PAGE_SHIFT - 3) +#define SBP_L1_COUNT (1 << SBP_L1_BITS) +#define SBP_L2_COUNT (1 << SBP_L2_BITS) +#define SBP_L2_SHIFT (PAGE_SHIFT + SBP_L1_BITS) +#define SBP_L3_SHIFT (SBP_L2_SHIFT + SBP_L2_BITS) + +extern void subpage_prot_free(struct mm_struct *mm); +extern void subpage_prot_init_new_context(struct mm_struct *mm); +#else +static inline void subpage_prot_free(struct mm_struct *mm) {} +static inline void subpage_prot_init_new_context(struct mm_struct *mm) { } +#endif /* CONFIG_PPC_SUBPAGE_PROT */ + +typedef unsigned long mm_context_id_t; +struct spinlock; + +typedef struct { + mm_context_id_t id; + u16 user_psize; /* page size index */ + +#ifdef CONFIG_PPC_MM_SLICES + u64 low_slices_psize; /* SLB page size encodings */ + unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; +#else + u16 sllp; /* SLB page size encoding */ +#endif + unsigned long vdso_base; +#ifdef CONFIG_PPC_SUBPAGE_PROT + struct subpage_prot_table spt; +#endif /* CONFIG_PPC_SUBPAGE_PROT */ +#ifdef CONFIG_PPC_ICSWX + struct spinlock *cop_lockp; /* guard acop and cop_pid */ + unsigned long acop; /* mask of enabled coprocessor types */ + unsigned int cop_pid; /* pid value used with coprocessors */ +#endif /* CONFIG_PPC_ICSWX */ +#ifdef CONFIG_PPC_64K_PAGES + /* for 4K PTE fragment support */ + void *pte_frag; +#endif +#ifdef CONFIG_SPAPR_TCE_IOMMU + struct list_head iommu_group_mem_list; +#endif +} mm_context_t; + + +#if 0 +/* + * The code below is equivalent to this function for arguments + * < 2^VSID_BITS, which is all this should ever be called + * with. However gcc is not clever enough to compute the + * modulus (2^n-1) without a second multiply. + */ +#define vsid_scramble(protovsid, size) \ + ((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size)) + +#else /* 1 */ +#define vsid_scramble(protovsid, size) \ + ({ \ + unsigned long x; \ + x = (protovsid) * VSID_MULTIPLIER_##size; \ + x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \ + (x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \ + }) +#endif /* 1 */ + +/* Returns the segment size indicator for a user address */ +static inline int user_segment_size(unsigned long addr) +{ + /* Use 1T segments if possible for addresses >= 1T */ + if (addr >= (1UL << SID_SHIFT_1T)) + return mmu_highuser_ssize; + return MMU_SEGSIZE_256M; +} + +static inline unsigned long get_vsid(unsigned long context, unsigned long ea, + int ssize) +{ + /* + * Bad address. We return VSID 0 for that + */ + if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) + return 0; + + if (ssize == MMU_SEGSIZE_256M) + return vsid_scramble((context << ESID_BITS) + | (ea >> SID_SHIFT), 256M); + return vsid_scramble((context << ESID_BITS_1T) + | (ea >> SID_SHIFT_1T), 1T); +} + +/* + * This is only valid for addresses >= PAGE_OFFSET + * + * For kernel space, we use the top 4 context ids to map address as below + * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ] + * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ] + * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ] + * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ] + */ +static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) +{ + unsigned long context; + + /* + * kernel take the top 4 context from the available range + */ + context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1; + return get_vsid(context, ea, ssize); +} + +unsigned htab_shift_for_mem_size(unsigned long mem_size); + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_MMU_HASH64_H_ */ diff --git a/arch/powerpc/include/asm/mmu-hash32.h b/arch/powerpc/include/asm/mmu-hash32.h deleted file mode 100644 index 16f513e5cbd7..000000000000 --- a/arch/powerpc/include/asm/mmu-hash32.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef _ASM_POWERPC_MMU_HASH32_H_ -#define _ASM_POWERPC_MMU_HASH32_H_ -/* - * 32-bit hash table MMU support - */ - -/* - * BATs - */ - -/* Block size masks */ -#define BL_128K 0x000 -#define BL_256K 0x001 -#define BL_512K 0x003 -#define BL_1M 0x007 -#define BL_2M 0x00F -#define BL_4M 0x01F -#define BL_8M 0x03F -#define BL_16M 0x07F -#define BL_32M 0x0FF -#define BL_64M 0x1FF -#define BL_128M 0x3FF -#define BL_256M 0x7FF - -/* BAT Access Protection */ -#define BPP_XX 0x00 /* No access */ -#define BPP_RX 0x01 /* Read only */ -#define BPP_RW 0x02 /* Read/write */ - -#ifndef __ASSEMBLY__ -/* Contort a phys_addr_t into the right format/bits for a BAT */ -#ifdef CONFIG_PHYS_64BIT -#define BAT_PHYS_ADDR(x) ((u32)((x & 0x00000000fffe0000ULL) | \ - ((x & 0x0000000e00000000ULL) >> 24) | \ - ((x & 0x0000000100000000ULL) >> 30))) -#else -#define BAT_PHYS_ADDR(x) (x) -#endif - -struct ppc_bat { - u32 batu; - u32 batl; -}; -#endif /* !__ASSEMBLY__ */ - -/* - * Hash table - */ - -/* Values for PP (assumes Ks=0, Kp=1) */ -#define PP_RWXX 0 /* Supervisor read/write, User none */ -#define PP_RWRX 1 /* Supervisor read/write, User read */ -#define PP_RWRW 2 /* Supervisor read/write, User read/write */ -#define PP_RXRX 3 /* Supervisor read, User read */ - -#ifndef __ASSEMBLY__ - -/* - * Hardware Page Table Entry - * Note that the xpn and x bitfields are used only by processors that - * support extended addressing; otherwise, those bits are reserved. - */ -struct hash_pte { - unsigned long v:1; /* Entry is valid */ - unsigned long vsid:24; /* Virtual segment identifier */ - unsigned long h:1; /* Hash algorithm indicator */ - unsigned long api:6; /* Abbreviated page index */ - unsigned long rpn:20; /* Real (physical) page number */ - unsigned long xpn:3; /* Real page number bits 0-2, optional */ - unsigned long r:1; /* Referenced */ - unsigned long c:1; /* Changed */ - unsigned long w:1; /* Write-thru cache mode */ - unsigned long i:1; /* Cache inhibited */ - unsigned long m:1; /* Memory coherence */ - unsigned long g:1; /* Guarded */ - unsigned long x:1; /* Real page number bit 3, optional */ - unsigned long pp:2; /* Page protection */ -}; - -typedef struct { - unsigned long id; - unsigned long vdso_base; -} mm_context_t; - -#endif /* !__ASSEMBLY__ */ - -/* We happily ignore the smaller BATs on 601, we don't actually use - * those definitions on hash32 at the moment anyway - */ -#define mmu_virtual_psize MMU_PAGE_4K -#define mmu_linear_psize MMU_PAGE_256M - -#endif /* _ASM_POWERPC_MMU_HASH32_H_ */ diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h deleted file mode 100644 index 0cea4807e26f..000000000000 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ /dev/null @@ -1,616 +0,0 @@ -#ifndef _ASM_POWERPC_MMU_HASH64_H_ -#define _ASM_POWERPC_MMU_HASH64_H_ -/* - * PowerPC64 memory management structures - * - * Dave Engebretsen & Mike Corrigan <{engebret|mikejc}@us.ibm.com> - * PPC64 rework. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include - -/* - * This is necessary to get the definition of PGTABLE_RANGE which we - * need for various slices related matters. Note that this isn't the - * complete pgtable.h but only a portion of it. - */ -#include -#include -#include - -/* - * SLB - */ - -#define SLB_NUM_BOLTED 3 -#define SLB_CACHE_ENTRIES 8 -#define SLB_MIN_SIZE 32 - -/* Bits in the SLB ESID word */ -#define SLB_ESID_V ASM_CONST(0x0000000008000000) /* valid */ - -/* Bits in the SLB VSID word */ -#define SLB_VSID_SHIFT 12 -#define SLB_VSID_SHIFT_1T 24 -#define SLB_VSID_SSIZE_SHIFT 62 -#define SLB_VSID_B ASM_CONST(0xc000000000000000) -#define SLB_VSID_B_256M ASM_CONST(0x0000000000000000) -#define SLB_VSID_B_1T ASM_CONST(0x4000000000000000) -#define SLB_VSID_KS ASM_CONST(0x0000000000000800) -#define SLB_VSID_KP ASM_CONST(0x0000000000000400) -#define SLB_VSID_N ASM_CONST(0x0000000000000200) /* no-execute */ -#define SLB_VSID_L ASM_CONST(0x0000000000000100) -#define SLB_VSID_C ASM_CONST(0x0000000000000080) /* class */ -#define SLB_VSID_LP ASM_CONST(0x0000000000000030) -#define SLB_VSID_LP_00 ASM_CONST(0x0000000000000000) -#define SLB_VSID_LP_01 ASM_CONST(0x0000000000000010) -#define SLB_VSID_LP_10 ASM_CONST(0x0000000000000020) -#define SLB_VSID_LP_11 ASM_CONST(0x0000000000000030) -#define SLB_VSID_LLP (SLB_VSID_L|SLB_VSID_LP) - -#define SLB_VSID_KERNEL (SLB_VSID_KP) -#define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C) - -#define SLBIE_C (0x08000000) -#define SLBIE_SSIZE_SHIFT 25 - -/* - * Hash table - */ - -#define HPTES_PER_GROUP 8 - -#define HPTE_V_SSIZE_SHIFT 62 -#define HPTE_V_AVPN_SHIFT 7 -#define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80) -#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) -#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL)) -#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) -#define HPTE_V_LOCK ASM_CONST(0x0000000000000008) -#define HPTE_V_LARGE ASM_CONST(0x0000000000000004) -#define HPTE_V_SECONDARY ASM_CONST(0x0000000000000002) -#define HPTE_V_VALID ASM_CONST(0x0000000000000001) - -#define HPTE_R_PP0 ASM_CONST(0x8000000000000000) -#define HPTE_R_TS ASM_CONST(0x4000000000000000) -#define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) -#define HPTE_R_RPN_SHIFT 12 -#define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) -#define HPTE_R_PP ASM_CONST(0x0000000000000003) -#define HPTE_R_N ASM_CONST(0x0000000000000004) -#define HPTE_R_G ASM_CONST(0x0000000000000008) -#define HPTE_R_M ASM_CONST(0x0000000000000010) -#define HPTE_R_I ASM_CONST(0x0000000000000020) -#define HPTE_R_W ASM_CONST(0x0000000000000040) -#define HPTE_R_WIMG ASM_CONST(0x0000000000000078) -#define HPTE_R_C ASM_CONST(0x0000000000000080) -#define HPTE_R_R ASM_CONST(0x0000000000000100) -#define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) - -#define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) -#define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000) - -/* Values for PP (assumes Ks=0, Kp=1) */ -#define PP_RWXX 0 /* Supervisor read/write, User none */ -#define PP_RWRX 1 /* Supervisor read/write, User read */ -#define PP_RWRW 2 /* Supervisor read/write, User read/write */ -#define PP_RXRX 3 /* Supervisor read, User read */ -#define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */ - -/* Fields for tlbiel instruction in architecture 2.06 */ -#define TLBIEL_INVAL_SEL_MASK 0xc00 /* invalidation selector */ -#define TLBIEL_INVAL_PAGE 0x000 /* invalidate a single page */ -#define TLBIEL_INVAL_SET_LPID 0x800 /* invalidate a set for current LPID */ -#define TLBIEL_INVAL_SET 0xc00 /* invalidate a set for all LPIDs */ -#define TLBIEL_INVAL_SET_MASK 0xfff000 /* set number to inval. */ -#define TLBIEL_INVAL_SET_SHIFT 12 - -#define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */ -#define POWER8_TLB_SETS 512 /* # sets in POWER8 TLB */ -#define POWER9_TLB_SETS_HASH 256 /* # sets in POWER9 TLB Hash mode */ - -#ifndef __ASSEMBLY__ - -struct hash_pte { - __be64 v; - __be64 r; -}; - -extern struct hash_pte *htab_address; -extern unsigned long htab_size_bytes; -extern unsigned long htab_hash_mask; - -/* - * Page size definition - * - * shift : is the "PAGE_SHIFT" value for that page size - * sllp : is a bit mask with the value of SLB L || LP to be or'ed - * directly to a slbmte "vsid" value - * penc : is the HPTE encoding mask for the "LP" field: - * - */ -struct mmu_psize_def -{ - unsigned int shift; /* number of bits */ - int penc[MMU_PAGE_COUNT]; /* HPTE encoding */ - unsigned int tlbiel; /* tlbiel supported for that page size */ - unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */ - unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */ -}; -extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; - -static inline int shift_to_mmu_psize(unsigned int shift) -{ - int psize; - - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) - if (mmu_psize_defs[psize].shift == shift) - return psize; - return -1; -} - -static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) -{ - if (mmu_psize_defs[mmu_psize].shift) - return mmu_psize_defs[mmu_psize].shift; - BUG(); -} - -#endif /* __ASSEMBLY__ */ - -/* - * Segment sizes. - * These are the values used by hardware in the B field of - * SLB entries and the first dword of MMU hashtable entries. - * The B field is 2 bits; the values 2 and 3 are unused and reserved. - */ -#define MMU_SEGSIZE_256M 0 -#define MMU_SEGSIZE_1T 1 - -/* - * encode page number shift. - * in order to fit the 78 bit va in a 64 bit variable we shift the va by - * 12 bits. This enable us to address upto 76 bit va. - * For hpt hash from a va we can ignore the page size bits of va and for - * hpte encoding we ignore up to 23 bits of va. So ignoring lower 12 bits ensure - * we work in all cases including 4k page size. - */ -#define VPN_SHIFT 12 - -/* - * HPTE Large Page (LP) details - */ -#define LP_SHIFT 12 -#define LP_BITS 8 -#define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT) - -#ifndef __ASSEMBLY__ - -static inline int slb_vsid_shift(int ssize) -{ - if (ssize == MMU_SEGSIZE_256M) - return SLB_VSID_SHIFT; - return SLB_VSID_SHIFT_1T; -} - -static inline int segment_shift(int ssize) -{ - if (ssize == MMU_SEGSIZE_256M) - return SID_SHIFT; - return SID_SHIFT_1T; -} - -/* - * The current system page and segment sizes - */ -extern int mmu_linear_psize; -extern int mmu_virtual_psize; -extern int mmu_vmalloc_psize; -extern int mmu_vmemmap_psize; -extern int mmu_io_psize; -extern int mmu_kernel_ssize; -extern int mmu_highuser_ssize; -extern u16 mmu_slb_size; -extern unsigned long tce_alloc_start, tce_alloc_end; - -/* - * If the processor supports 64k normal pages but not 64k cache - * inhibited pages, we have to be prepared to switch processes - * to use 4k pages when they create cache-inhibited mappings. - * If this is the case, mmu_ci_restrictions will be set to 1. - */ -extern int mmu_ci_restrictions; - -/* - * This computes the AVPN and B fields of the first dword of a HPTE, - * for use when we want to match an existing PTE. The bottom 7 bits - * of the returned value are zero. - */ -static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize, - int ssize) -{ - unsigned long v; - /* - * The AVA field omits the low-order 23 bits of the 78 bits VA. - * These bits are not needed in the PTE, because the - * low-order b of these bits are part of the byte offset - * into the virtual page and, if b < 23, the high-order - * 23-b of these bits are always used in selecting the - * PTEGs to be searched - */ - v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); - v <<= HPTE_V_AVPN_SHIFT; - v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; - return v; -} - -/* - * This function sets the AVPN and L fields of the HPTE appropriately - * using the base page size and actual page size. - */ -static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize, - int actual_psize, int ssize) -{ - unsigned long v; - v = hpte_encode_avpn(vpn, base_psize, ssize); - if (actual_psize != MMU_PAGE_4K) - v |= HPTE_V_LARGE; - return v; -} - -/* - * This function sets the ARPN, and LP fields of the HPTE appropriately - * for the page size. We assume the pa is already "clean" that is properly - * aligned for the requested page size - */ -static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize, - int actual_psize) -{ - /* A 4K page needs no special encoding */ - if (actual_psize == MMU_PAGE_4K) - return pa & HPTE_R_RPN; - else { - unsigned int penc = mmu_psize_defs[base_psize].penc[actual_psize]; - unsigned int shift = mmu_psize_defs[actual_psize].shift; - return (pa & ~((1ul << shift) - 1)) | (penc << LP_SHIFT); - } -} - -/* - * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size. - */ -static inline unsigned long hpt_vpn(unsigned long ea, - unsigned long vsid, int ssize) -{ - unsigned long mask; - int s_shift = segment_shift(ssize); - - mask = (1ul << (s_shift - VPN_SHIFT)) - 1; - return (vsid << (s_shift - VPN_SHIFT)) | ((ea >> VPN_SHIFT) & mask); -} - -/* - * This hashes a virtual address - */ -static inline unsigned long hpt_hash(unsigned long vpn, - unsigned int shift, int ssize) -{ - int mask; - unsigned long hash, vsid; - - /* VPN_SHIFT can be atmost 12 */ - if (ssize == MMU_SEGSIZE_256M) { - mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1; - hash = (vpn >> (SID_SHIFT - VPN_SHIFT)) ^ - ((vpn & mask) >> (shift - VPN_SHIFT)); - } else { - mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1; - vsid = vpn >> (SID_SHIFT_1T - VPN_SHIFT); - hash = vsid ^ (vsid << 25) ^ - ((vpn & mask) >> (shift - VPN_SHIFT)) ; - } - return hash & 0x7fffffffffUL; -} - -#define HPTE_LOCAL_UPDATE 0x1 -#define HPTE_NOHPTE_UPDATE 0x2 - -extern int __hash_page_4K(unsigned long ea, unsigned long access, - unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned long flags, int ssize, int subpage_prot); -extern int __hash_page_64K(unsigned long ea, unsigned long access, - unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned long flags, int ssize); -struct mm_struct; -unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap); -extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, - unsigned long access, unsigned long trap, - unsigned long flags); -extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap, - unsigned long dsisr); -int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, - pte_t *ptep, unsigned long trap, unsigned long flags, - int ssize, unsigned int shift, unsigned int mmu_psize); -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -extern int __hash_page_thp(unsigned long ea, unsigned long access, - unsigned long vsid, pmd_t *pmdp, unsigned long trap, - unsigned long flags, int ssize, unsigned int psize); -#else -static inline int __hash_page_thp(unsigned long ea, unsigned long access, - unsigned long vsid, pmd_t *pmdp, - unsigned long trap, unsigned long flags, - int ssize, unsigned int psize) -{ - BUG(); - return -1; -} -#endif -extern void hash_failure_debug(unsigned long ea, unsigned long access, - unsigned long vsid, unsigned long trap, - int ssize, int psize, int lpsize, - unsigned long pte); -extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, - unsigned long pstart, unsigned long prot, - int psize, int ssize); -int htab_remove_mapping(unsigned long vstart, unsigned long vend, - int psize, int ssize); -extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); -extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); - -extern void hpte_init_native(void); -extern void hpte_init_lpar(void); -extern void hpte_init_beat(void); -extern void hpte_init_beat_v3(void); - -extern void slb_initialize(void); -extern void slb_flush_and_rebolt(void); - -extern void slb_vmalloc_update(void); -extern void slb_set_size(u16 size); -#endif /* __ASSEMBLY__ */ - -/* - * VSID allocation (256MB segment) - * - * We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated - * from mmu context id and effective segment id of the address. - * - * For user processes max context id is limited to ((1ul << 19) - 5) - * for kernel space, we use the top 4 context ids to map address as below - * NOTE: each context only support 64TB now. - * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ] - * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ] - * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ] - * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ] - * - * The proto-VSIDs are then scrambled into real VSIDs with the - * multiplicative hash: - * - * VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS - * - * VSID_MULTIPLIER is prime, so in particular it is - * co-prime to VSID_MODULUS, making this a 1:1 scrambling function. - * Because the modulus is 2^n-1 we can compute it efficiently without - * a divide or extra multiply (see below). The scramble function gives - * robust scattering in the hash table (at least based on some initial - * results). - * - * We also consider VSID 0 special. We use VSID 0 for slb entries mapping - * bad address. This enables us to consolidate bad address handling in - * hash_page. - * - * We also need to avoid the last segment of the last context, because that - * would give a protovsid of 0x1fffffffff. That will result in a VSID 0 - * because of the modulo operation in vsid scramble. But the vmemmap - * (which is what uses region 0xf) will never be close to 64TB in size - * (it's 56 bytes per page of system memory). - */ - -#define CONTEXT_BITS 19 -#define ESID_BITS 18 -#define ESID_BITS_1T 6 - -/* - * 256MB segment - * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments - * available for user + kernel mapping. The top 4 contexts are used for - * kernel mapping. Each segment contains 2^28 bytes. Each - * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts - * (19 == 37 + 28 - 46). - */ -#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 5) - -/* - * This should be computed such that protovosid * vsid_mulitplier - * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus - */ -#define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */ -#define VSID_BITS_256M (CONTEXT_BITS + ESID_BITS) -#define VSID_MODULUS_256M ((1UL<= \ - * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \ - * the bit clear, r3 already has the answer we want, if it \ - * doesn't, the answer is the low 36 bits of r3+1. So in all \ - * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\ - addi rx,rt,1; \ - srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ - add rt,rt,rx - -/* 4 bits per slice and we have one slice per 1TB */ -#define SLICE_ARRAY_SIZE (PGTABLE_RANGE >> 41) - -#ifndef __ASSEMBLY__ - -#ifdef CONFIG_PPC_SUBPAGE_PROT -/* - * For the sub-page protection option, we extend the PGD with one of - * these. Basically we have a 3-level tree, with the top level being - * the protptrs array. To optimize speed and memory consumption when - * only addresses < 4GB are being protected, pointers to the first - * four pages of sub-page protection words are stored in the low_prot - * array. - * Each page of sub-page protection words protects 1GB (4 bytes - * protects 64k). For the 3-level tree, each page of pointers then - * protects 8TB. - */ -struct subpage_prot_table { - unsigned long maxaddr; /* only addresses < this are protected */ - unsigned int **protptrs[(TASK_SIZE_USER64 >> 43)]; - unsigned int *low_prot[4]; -}; - -#define SBP_L1_BITS (PAGE_SHIFT - 2) -#define SBP_L2_BITS (PAGE_SHIFT - 3) -#define SBP_L1_COUNT (1 << SBP_L1_BITS) -#define SBP_L2_COUNT (1 << SBP_L2_BITS) -#define SBP_L2_SHIFT (PAGE_SHIFT + SBP_L1_BITS) -#define SBP_L3_SHIFT (SBP_L2_SHIFT + SBP_L2_BITS) - -extern void subpage_prot_free(struct mm_struct *mm); -extern void subpage_prot_init_new_context(struct mm_struct *mm); -#else -static inline void subpage_prot_free(struct mm_struct *mm) {} -static inline void subpage_prot_init_new_context(struct mm_struct *mm) { } -#endif /* CONFIG_PPC_SUBPAGE_PROT */ - -typedef unsigned long mm_context_id_t; -struct spinlock; - -typedef struct { - mm_context_id_t id; - u16 user_psize; /* page size index */ - -#ifdef CONFIG_PPC_MM_SLICES - u64 low_slices_psize; /* SLB page size encodings */ - unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; -#else - u16 sllp; /* SLB page size encoding */ -#endif - unsigned long vdso_base; -#ifdef CONFIG_PPC_SUBPAGE_PROT - struct subpage_prot_table spt; -#endif /* CONFIG_PPC_SUBPAGE_PROT */ -#ifdef CONFIG_PPC_ICSWX - struct spinlock *cop_lockp; /* guard acop and cop_pid */ - unsigned long acop; /* mask of enabled coprocessor types */ - unsigned int cop_pid; /* pid value used with coprocessors */ -#endif /* CONFIG_PPC_ICSWX */ -#ifdef CONFIG_PPC_64K_PAGES - /* for 4K PTE fragment support */ - void *pte_frag; -#endif -#ifdef CONFIG_SPAPR_TCE_IOMMU - struct list_head iommu_group_mem_list; -#endif -} mm_context_t; - - -#if 0 -/* - * The code below is equivalent to this function for arguments - * < 2^VSID_BITS, which is all this should ever be called - * with. However gcc is not clever enough to compute the - * modulus (2^n-1) without a second multiply. - */ -#define vsid_scramble(protovsid, size) \ - ((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size)) - -#else /* 1 */ -#define vsid_scramble(protovsid, size) \ - ({ \ - unsigned long x; \ - x = (protovsid) * VSID_MULTIPLIER_##size; \ - x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \ - (x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \ - }) -#endif /* 1 */ - -/* Returns the segment size indicator for a user address */ -static inline int user_segment_size(unsigned long addr) -{ - /* Use 1T segments if possible for addresses >= 1T */ - if (addr >= (1UL << SID_SHIFT_1T)) - return mmu_highuser_ssize; - return MMU_SEGSIZE_256M; -} - -static inline unsigned long get_vsid(unsigned long context, unsigned long ea, - int ssize) -{ - /* - * Bad address. We return VSID 0 for that - */ - if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) - return 0; - - if (ssize == MMU_SEGSIZE_256M) - return vsid_scramble((context << ESID_BITS) - | (ea >> SID_SHIFT), 256M); - return vsid_scramble((context << ESID_BITS_1T) - | (ea >> SID_SHIFT_1T), 1T); -} - -/* - * This is only valid for addresses >= PAGE_OFFSET - * - * For kernel space, we use the top 4 context ids to map address as below - * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ] - * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ] - * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ] - * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ] - */ -static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) -{ - unsigned long context; - - /* - * kernel take the top 4 context from the available range - */ - context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1; - return get_vsid(context, ea, ssize); -} - -unsigned htab_shift_for_mem_size(unsigned long mem_size); - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_POWERPC_MMU_HASH64_H_ */ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 54d46504733d..8ca1c983bf6c 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -183,10 +183,10 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr) #if defined(CONFIG_PPC_STD_MMU_64) /* 64-bit classic hash table MMU */ -# include +#include #elif defined(CONFIG_PPC_STD_MMU_32) /* 32-bit classic hash table MMU */ -# include +#include #elif defined(CONFIG_40x) /* 40x-style software loaded TLB */ # include diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 5932219f1e79..584e119fa8b0 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -15,7 +15,7 @@ #include #include #include -#include +#include /* Entry: r3 = crap, r4 = ptr to cputable entry * diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index cf4fb5429cf1..470ceebd2d23 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -19,7 +19,7 @@ #include #include #include -#include +#include #undef DEBUG diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 55c4d51ea3e2..999106991a76 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 9bf7031a67ff..b9131aa1aedf 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include /* #define DEBUG_MMU */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 913cd2198fa6..114edace6cdd 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index fb37290a57b4..c7b78d8336b2 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 54cf9bc94dad..9c3b76bb69d9 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 89e96b3e0039..039028d3ccb5 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 91700518bbf3..4cb8db05f3e5 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 6ee26de9a1de..c613fee0b9f7 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) -- cgit v1.2.3 From ee3b93ebfbed6279f7a329001433c75c50ddfcc9 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 1 Mar 2016 12:59:21 +0530 Subject: powerpc/mm: Move hash64 tlbflush code into a new header No code changes. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/tlbflush-hash.h | 94 ++++++++++++++++++++++ arch/powerpc/include/asm/tlbflush.h | 92 +-------------------- 2 files changed, 95 insertions(+), 91 deletions(-) create mode 100644 arch/powerpc/include/asm/book3s/64/tlbflush-hash.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h new file mode 100644 index 000000000000..1b753f96b374 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -0,0 +1,94 @@ +#ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H +#define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H + +#define MMU_NO_CONTEXT 0 + +/* + * TLB flushing for 64-bit hash-MMU CPUs + */ + +#include +#include + +#define PPC64_TLB_BATCH_NR 192 + +struct ppc64_tlb_batch { + int active; + unsigned long index; + struct mm_struct *mm; + real_pte_t pte[PPC64_TLB_BATCH_NR]; + unsigned long vpn[PPC64_TLB_BATCH_NR]; + unsigned int psize; + int ssize; +}; +DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); + +extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch); + +#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE + +static inline void arch_enter_lazy_mmu_mode(void) +{ + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); + + batch->active = 1; +} + +static inline void arch_leave_lazy_mmu_mode(void) +{ + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); + + if (batch->index) + __flush_tlb_pending(batch); + batch->active = 0; +} + +#define arch_flush_lazy_mmu_mode() do {} while (0) + + +extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, + int ssize, unsigned long flags); +extern void flush_hash_range(unsigned long number, int local); +extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize, + unsigned long flags); + +static inline void local_flush_tlb_mm(struct mm_struct *mm) +{ +} + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ +} + +static inline void local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ +} + +static inline void flush_tlb_page_nohash(struct vm_area_struct *vma, + unsigned long vmaddr) +{ +} + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ +} + +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ +} + +/* Private function for use by PCI IO mapping code */ +extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, + unsigned long end); +extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr); +#endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H */ diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 23d351ca0303..9f77f85e3e99 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -78,97 +78,7 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) } #elif defined(CONFIG_PPC_STD_MMU_64) - -#define MMU_NO_CONTEXT 0 - -/* - * TLB flushing for 64-bit hash-MMU CPUs - */ - -#include -#include - -#define PPC64_TLB_BATCH_NR 192 - -struct ppc64_tlb_batch { - int active; - unsigned long index; - struct mm_struct *mm; - real_pte_t pte[PPC64_TLB_BATCH_NR]; - unsigned long vpn[PPC64_TLB_BATCH_NR]; - unsigned int psize; - int ssize; -}; -DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); - -extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch); - -#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE - -static inline void arch_enter_lazy_mmu_mode(void) -{ - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); - - batch->active = 1; -} - -static inline void arch_leave_lazy_mmu_mode(void) -{ - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); - - if (batch->index) - __flush_tlb_pending(batch); - batch->active = 0; -} - -#define arch_flush_lazy_mmu_mode() do {} while (0) - - -extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, - int ssize, unsigned long flags); -extern void flush_hash_range(unsigned long number, int local); -extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize, - unsigned long flags); - -static inline void local_flush_tlb_mm(struct mm_struct *mm) -{ -} - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ -} - -static inline void local_flush_tlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ -} - -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ -} - -static inline void flush_tlb_page_nohash(struct vm_area_struct *vma, - unsigned long vmaddr) -{ -} - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ -} - -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ -} - -/* Private function for use by PCI IO mapping code */ -extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, - unsigned long end); -extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr); +#include #else #error Unsupported MMU type #endif -- cgit v1.2.3 From 11dfbf588ae697bc5362c24294c2605f09c2d3d4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Sep 2015 16:34:21 +0200 Subject: powerpc: mark xer clobbered in csum_add() addc uses carry so xer is clobbered in csum_add() Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index e8d9ef4755a4..d2ca07bb8837 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -141,7 +141,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) #else asm("addc %0,%0,%1;" "addze %0,%0;" - : "+r" (csum) : "r" (addend)); + : "+r" (csum) : "r" (addend) : "xer"); return csum; #endif } -- cgit v1.2.3 From 03bc8b0fc87616c75c6dd060a2191e7fc8faacb6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Sep 2015 16:34:23 +0200 Subject: powerpc32: checksum_wrappers_64 becomes checksum_wrappers The powerpc64 checksum wrapper functions adds csum_and_copy_to_user() which otherwise is implemented in include/net/checksum.h by using csum_partial() then copy_to_user() Those two wrapper fonctions are also applicable to powerpc32 as it is based on the use of csum_partial_copy_generic() which also exists on powerpc32 This patch renames arch/powerpc/lib/checksum_wrappers_64.c to arch/powerpc/lib/checksum_wrappers.c and makes it non-conditional to CONFIG_WORD_SIZE Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/checksum.h | 9 --- arch/powerpc/lib/Makefile | 3 +- arch/powerpc/lib/checksum_wrappers.c | 102 ++++++++++++++++++++++++++++++++ arch/powerpc/lib/checksum_wrappers_64.c | 102 -------------------------------- 4 files changed, 103 insertions(+), 113 deletions(-) create mode 100644 arch/powerpc/lib/checksum_wrappers.c delete mode 100644 arch/powerpc/lib/checksum_wrappers_64.c (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index d2ca07bb8837..afa6722cb7d2 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -47,21 +47,12 @@ extern __wsum csum_partial_copy_generic(const void *src, void *dst, int len, __wsum sum, int *src_err, int *dst_err); -#ifdef __powerpc64__ #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr); #define HAVE_CSUM_COPY_USER extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, __wsum sum, int *err_ptr); -#else -/* - * the same as csum_partial, but copies from src to dst while it - * checksums. - */ -#define csum_partial_copy_from_user(src, dst, len, sum, errp) \ - csum_partial_copy_generic((__force const void *)(src), (dst), (len), (sum), (errp), NULL) -#endif #define csum_partial_copy_nocheck(src, dst, len, sum) \ csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index a47e14277fd8..e46b06822bea 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -22,8 +22,7 @@ obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o ifeq ($(CONFIG_GENERIC_CSUM),) -obj-y += checksum_$(CONFIG_WORD_SIZE).o -obj-$(CONFIG_PPC64) += checksum_wrappers_64.o +obj-y += checksum_$(CONFIG_WORD_SIZE).o checksum_wrappers.o endif obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c new file mode 100644 index 000000000000..08e3a3356c40 --- /dev/null +++ b/arch/powerpc/lib/checksum_wrappers.c @@ -0,0 +1,102 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2010 + * + * Author: Anton Blanchard + */ +#include +#include +#include +#include +#include + +__wsum csum_and_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *err_ptr) +{ + unsigned int csum; + + might_sleep(); + + *err_ptr = 0; + + if (!len) { + csum = 0; + goto out; + } + + if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) { + *err_ptr = -EFAULT; + csum = (__force unsigned int)sum; + goto out; + } + + csum = csum_partial_copy_generic((void __force *)src, dst, + len, sum, err_ptr, NULL); + + if (unlikely(*err_ptr)) { + int missing = __copy_from_user(dst, src, len); + + if (missing) { + memset(dst + len - missing, 0, missing); + *err_ptr = -EFAULT; + } else { + *err_ptr = 0; + } + + csum = csum_partial(dst, len, sum); + } + +out: + return (__force __wsum)csum; +} +EXPORT_SYMBOL(csum_and_copy_from_user); + +__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, + __wsum sum, int *err_ptr) +{ + unsigned int csum; + + might_sleep(); + + *err_ptr = 0; + + if (!len) { + csum = 0; + goto out; + } + + if (unlikely((len < 0) || !access_ok(VERIFY_WRITE, dst, len))) { + *err_ptr = -EFAULT; + csum = -1; /* invalid checksum */ + goto out; + } + + csum = csum_partial_copy_generic(src, (void __force *)dst, + len, sum, NULL, err_ptr); + + if (unlikely(*err_ptr)) { + csum = csum_partial(src, len, sum); + + if (copy_to_user(dst, src, len)) { + *err_ptr = -EFAULT; + csum = -1; /* invalid checksum */ + } + } + +out: + return (__force __wsum)csum; +} +EXPORT_SYMBOL(csum_and_copy_to_user); diff --git a/arch/powerpc/lib/checksum_wrappers_64.c b/arch/powerpc/lib/checksum_wrappers_64.c deleted file mode 100644 index 08e3a3356c40..000000000000 --- a/arch/powerpc/lib/checksum_wrappers_64.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2010 - * - * Author: Anton Blanchard - */ -#include -#include -#include -#include -#include - -__wsum csum_and_copy_from_user(const void __user *src, void *dst, - int len, __wsum sum, int *err_ptr) -{ - unsigned int csum; - - might_sleep(); - - *err_ptr = 0; - - if (!len) { - csum = 0; - goto out; - } - - if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) { - *err_ptr = -EFAULT; - csum = (__force unsigned int)sum; - goto out; - } - - csum = csum_partial_copy_generic((void __force *)src, dst, - len, sum, err_ptr, NULL); - - if (unlikely(*err_ptr)) { - int missing = __copy_from_user(dst, src, len); - - if (missing) { - memset(dst + len - missing, 0, missing); - *err_ptr = -EFAULT; - } else { - *err_ptr = 0; - } - - csum = csum_partial(dst, len, sum); - } - -out: - return (__force __wsum)csum; -} -EXPORT_SYMBOL(csum_and_copy_from_user); - -__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, - __wsum sum, int *err_ptr) -{ - unsigned int csum; - - might_sleep(); - - *err_ptr = 0; - - if (!len) { - csum = 0; - goto out; - } - - if (unlikely((len < 0) || !access_ok(VERIFY_WRITE, dst, len))) { - *err_ptr = -EFAULT; - csum = -1; /* invalid checksum */ - goto out; - } - - csum = csum_partial_copy_generic(src, (void __force *)dst, - len, sum, NULL, err_ptr); - - if (unlikely(*err_ptr)) { - csum = csum_partial(src, len, sum); - - if (copy_to_user(dst, src, len)) { - *err_ptr = -EFAULT; - csum = -1; /* invalid checksum */ - } - } - -out: - return (__force __wsum)csum; -} -EXPORT_SYMBOL(csum_and_copy_to_user); -- cgit v1.2.3 From 37e08cad8f177f7bf6226a9b3724234ac3d3c81d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Sep 2015 16:34:25 +0200 Subject: powerpc: inline ip_fast_csum() In several architectures, ip_fast_csum() is inlined There are functions like ip_send_check() which do nothing much more than calling ip_fast_csum(). Inlining ip_fast_csum() allows the compiler to optimise better Suggested-by: Eric Dumazet Signed-off-by: Christophe Leroy [scottwood: whitespace and cast fixes] Signed-off-by: Scott Wood --- arch/powerpc/include/asm/checksum.h | 45 +++++++++++++++++++++++++++++++------ arch/powerpc/lib/checksum_32.S | 21 ----------------- arch/powerpc/lib/checksum_64.S | 27 ---------------------- arch/powerpc/lib/ppc_ksyms.c | 1 - 4 files changed, 38 insertions(+), 56 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index afa6722cb7d2..1778f75bf769 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -9,16 +9,9 @@ * 2 of the License, or (at your option) any later version. */ -/* - * This is a version of ip_compute_csum() optimized for IP headers, - * which always checksum on 4 octet boundaries. ihl is the number - * of 32-bit words and is always >= 5. - */ #ifdef CONFIG_GENERIC_CSUM #include #else -extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); - /* * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) @@ -137,6 +130,44 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) #endif } +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. ihl is the number + * of 32-bit words and is always >= 5. + */ +static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl) +{ + const u32 *ptr = (const u32 *)iph + 1; +#ifdef __powerpc64__ + unsigned int i; + u64 s = *(const u32 *)iph; + + for (i = 0; i < ihl - 1; i++, ptr++) + s += *ptr; + s += (s >> 32); + return (__force __wsum)s; +#else + __wsum sum, tmp; + + asm("mtctr %3;" + "addc %0,%4,%5;" + "1: lwzu %1, 4(%2);" + "adde %0,%0,%1;" + "bdnz 1b;" + "addze %0,%0;" + : "=r" (sum), "=r" (tmp), "+b" (ptr) + : "r" (ihl - 2), "r" (*(const u32 *)iph), "r" (*ptr) + : "ctr", "xer", "memory"); + + return sum; +#endif +} + +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + return csum_fold(ip_fast_csum_nofold(iph, ihl)); +} + #endif #endif /* __KERNEL__ */ #endif diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 6d67e057f15e..0d7eba31d93f 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -19,27 +19,6 @@ .text -/* - * ip_fast_csum(buf, len) -- Optimized for IP header - * len is in words and is always >= 5. - */ -_GLOBAL(ip_fast_csum) - lwz r0,0(r3) - lwzu r5,4(r3) - addic. r4,r4,-2 - addc r0,r0,r5 - mtctr r4 - blelr- -1: lwzu r4,4(r3) - adde r0,r0,r4 - bdnz 1b - addze r0,r0 /* add in final carry */ - rlwinm r3,r0,16,0,31 /* fold two halves together */ - add r3,r0,r3 - not r3,r3 - srwi r3,r3,16 - blr - /* * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index f3ef35436612..f53f4abb9282 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -17,33 +17,6 @@ #include #include -/* - * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header - * len is in words and is always >= 5. - * - * In practice len == 5, but this is not guaranteed. So this code does not - * attempt to use doubleword instructions. - */ -_GLOBAL(ip_fast_csum) - lwz r0,0(r3) - lwzu r5,4(r3) - addic. r4,r4,-2 - addc r0,r0,r5 - mtctr r4 - blelr- -1: lwzu r4,4(r3) - adde r0,r0,r4 - bdnz 1b - addze r0,r0 /* add in final carry */ - rldicl r4,r0,32,0 /* fold two 32-bit halves together */ - add r0,r0,r4 - srdi r0,r0,32 - rlwinm r3,r0,16,0,31 /* fold two halves together */ - add r3,r0,r3 - not r3,r3 - srwi r3,r3,16 - blr - /* * Computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit). diff --git a/arch/powerpc/lib/ppc_ksyms.c b/arch/powerpc/lib/ppc_ksyms.c index f5e427e7bc0d..8cd5c0b2986c 100644 --- a/arch/powerpc/lib/ppc_ksyms.c +++ b/arch/powerpc/lib/ppc_ksyms.c @@ -19,7 +19,6 @@ EXPORT_SYMBOL(strncmp); #ifndef CONFIG_GENERIC_CSUM EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(csum_partial_copy_generic); -EXPORT_SYMBOL(ip_fast_csum); #endif EXPORT_SYMBOL(__copy_tofrom_user); -- cgit v1.2.3 From 5a8847c83ce6072d6fdf0d15d9aa060c0b83537f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Sep 2015 16:34:34 +0200 Subject: powerpc: simplify csum_add(a, b) in case a or b is constant 0 Simplify csum_add(a, b) in case a or b is constant 0 Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/checksum.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 1778f75bf769..74cd8d82628a 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -119,7 +119,13 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) { #ifdef __powerpc64__ u64 res = (__force u64)csum; +#endif + if (__builtin_constant_p(csum) && csum == 0) + return addend; + if (__builtin_constant_p(addend) && addend == 0) + return csum; +#ifdef __powerpc64__ res += (__force u64)addend; return (__force __wsum)((u32)res + (res >> 32)); #else -- cgit v1.2.3 From ebb9d30a6a74f4ced5b7d35446ebb6362a724393 Mon Sep 17 00:00:00 2001 From: chenhui zhao Date: Thu, 24 Dec 2015 08:39:57 +0800 Subject: powerpc/mm: any thread in one core can be the first to setup TLB1 On e6500, in the case of cpu hotplug, either thread in one core may be the first thread initilzing the TLB1. The subsequent threads must not setup it again. The code is derived from the comment of Scott Wood. Signed-off-by: Chenhui Zhao Signed-off-by: Scott Wood --- arch/powerpc/include/asm/cputhreads.h | 8 ++++++++ arch/powerpc/mm/tlb_nohash.c | 4 +--- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index ba42e46ea58e..ea9623147b87 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -94,6 +94,14 @@ static inline int cpu_last_thread_sibling(int cpu) return cpu | (threads_per_core - 1); } +static inline u32 get_tensr(void) +{ +#ifdef CONFIG_BOOKE + if (cpu_has_feature(CPU_FTR_SMT)) + return mfspr(SPRN_TENSR); +#endif + return 1; +} #endif /* _ASM_POWERPC_CPUTHREADS_H */ diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index bb04e4df3100..f4668488512c 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -640,9 +640,7 @@ static void early_init_this_mmu(void) * transient mapping would cause problems. */ #ifdef CONFIG_SMP - if (cpu != boot_cpuid && - (cpu != cpu_first_thread_sibling(cpu) || - cpu == cpu_first_thread_sibling(boot_cpuid))) + if (hweight32(get_tensr()) > 1) map = false; #endif -- cgit v1.2.3 From e7affb1dba0e9068aeb3978e858f39753e0dc20a Mon Sep 17 00:00:00 2001 From: chenhui zhao Date: Fri, 20 Nov 2015 17:13:58 +0800 Subject: powerpc/cache: add cache flush operation for various e500 Various e500 core have different cache architecture, so they need different cache flush operations. Therefore, add a callback function cpu_flush_caches to the struct cpu_spec. The cache flush operation for the specific kind of e500 is selected at init time. The callback function will flush all caches inside the current cpu. Signed-off-by: Chenhui Zhao Signed-off-by: Tang Yuantian Signed-off-by: Scott Wood --- arch/powerpc/include/asm/cacheflush.h | 2 - arch/powerpc/include/asm/cputable.h | 8 +++ arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/cpu_setup_fsl_booke.S | 112 ++++++++++++++++++++++++++++++ arch/powerpc/kernel/cputable.c | 4 ++ arch/powerpc/kernel/head_fsl_booke.S | 74 -------------------- arch/powerpc/platforms/85xx/smp.c | 5 +- 7 files changed, 128 insertions(+), 78 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 6229e6b6037b..47add2e2364a 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -30,8 +30,6 @@ extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) -extern void __flush_disable_L1(void); - extern void flush_icache_range(unsigned long, unsigned long); extern void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, unsigned long addr, diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 94ace9b4c4e1..df4fb5faba43 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -43,6 +43,11 @@ extern int machine_check_e500(struct pt_regs *regs); extern int machine_check_e200(struct pt_regs *regs); extern int machine_check_47x(struct pt_regs *regs); +extern void cpu_down_flush_e500v2(void); +extern void cpu_down_flush_e500mc(void); +extern void cpu_down_flush_e5500(void); +extern void cpu_down_flush_e6500(void); + /* NOTE WELL: Update identify_cpu() if fields are added or removed! */ struct cpu_spec { /* CPU is matched via (PVR & pvr_mask) == pvr_value */ @@ -59,6 +64,9 @@ struct cpu_spec { unsigned int icache_bsize; unsigned int dcache_bsize; + /* flush caches inside the current cpu */ + void (*cpu_down_flush)(void); + /* number of performance monitor counters */ unsigned int num_pmcs; enum powerpc_pmc_type pmc_type; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 10d5eab19458..0d0183d3180a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -376,6 +376,7 @@ int main(void) DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore)); + DEFINE(CPU_DOWN_FLUSH, offsetof(struct cpu_spec, cpu_down_flush)); DEFINE(pbe_address, offsetof(struct pbe, address)); DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index dddba3e94260..462aed9bcf51 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -13,11 +13,13 @@ * */ +#include #include #include #include #include #include +#include _GLOBAL(__e500_icache_setup) mfspr r0, SPRN_L1CSR1 @@ -233,3 +235,113 @@ _GLOBAL(__setup_cpu_e5500) mtlr r5 blr #endif + +/* flush L1 date cache, it can apply to e500v2, e500mc and e5500 */ +_GLOBAL(flush_dcache_L1) + mfmsr r10 + wrteei 0 + + mfspr r3,SPRN_L1CFG0 + rlwinm r5,r3,9,3 /* Extract cache block size */ + twlgti r5,1 /* Only 32 and 64 byte cache blocks + * are currently defined. + */ + li r4,32 + subfic r6,r5,2 /* r6 = log2(1KiB / cache block size) - + * log2(number of ways) + */ + slw r5,r4,r5 /* r5 = cache block size */ + + rlwinm r7,r3,0,0xff /* Extract number of KiB in the cache */ + mulli r7,r7,13 /* An 8-way cache will require 13 + * loads per set. + */ + slw r7,r7,r6 + + /* save off HID0 and set DCFA */ + mfspr r8,SPRN_HID0 + ori r9,r8,HID0_DCFA@l + mtspr SPRN_HID0,r9 + isync + + LOAD_REG_IMMEDIATE(r6, KERNELBASE) + mr r4, r6 + mtctr r7 + +1: lwz r3,0(r4) /* Load... */ + add r4,r4,r5 + bdnz 1b + + msync + mr r4, r6 + mtctr r7 + +1: dcbf 0,r4 /* ...and flush. */ + add r4,r4,r5 + bdnz 1b + + /* restore HID0 */ + mtspr SPRN_HID0,r8 + isync + + wrtee r10 + + blr + +has_L2_cache: + /* skip L2 cache on P2040/P2040E as they have no L2 cache */ + mfspr r3, SPRN_SVR + /* shift right by 8 bits and clear E bit of SVR */ + rlwinm r4, r3, 24, ~0x800 + + lis r3, SVR_P2040@h + ori r3, r3, SVR_P2040@l + cmpw r4, r3 + beq 1f + + li r3, 1 + blr +1: + li r3, 0 + blr + +/* flush backside L2 cache */ +flush_backside_L2_cache: + mflr r10 + bl has_L2_cache + mtlr r10 + cmpwi r3, 0 + beq 2f + + /* Flush the L2 cache */ + mfspr r3, SPRN_L2CSR0 + ori r3, r3, L2CSR0_L2FL@l + msync + isync + mtspr SPRN_L2CSR0,r3 + isync + + /* check if it is complete */ +1: mfspr r3,SPRN_L2CSR0 + andi. r3, r3, L2CSR0_L2FL@l + bne 1b +2: + blr + +_GLOBAL(cpu_down_flush_e500v2) + mflr r0 + bl flush_dcache_L1 + mtlr r0 + blr + +_GLOBAL(cpu_down_flush_e500mc) +_GLOBAL(cpu_down_flush_e5500) + mflr r0 + bl flush_dcache_L1 + bl flush_backside_L2_cache + mtlr r0 + blr + +/* L1 Data Cache of e6500 contains no modified data, no flush is required */ +_GLOBAL(cpu_down_flush_e6500) + blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index be4d73053bed..6c662b8de90d 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2050,6 +2050,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_e500v2, .machine_check = machine_check_e500, .platform = "ppc8548", + .cpu_down_flush = cpu_down_flush_e500v2, }, #else { /* e500mc */ @@ -2069,6 +2070,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_e500mc, .machine_check = machine_check_e500mc, .platform = "ppce500mc", + .cpu_down_flush = cpu_down_flush_e500mc, }, #endif /* CONFIG_PPC_E500MC */ #endif /* CONFIG_PPC32 */ @@ -2093,6 +2095,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif .machine_check = machine_check_e500mc, .platform = "ppce5500", + .cpu_down_flush = cpu_down_flush_e5500, }, { /* e6500 */ .pvr_mask = 0xffff0000, @@ -2115,6 +2118,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif .machine_check = machine_check_e500mc, .platform = "ppce6500", + .cpu_down_flush = cpu_down_flush_e6500, }, #endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index f705171b924b..3bfa3150911f 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -1037,80 +1037,6 @@ _GLOBAL(set_context) isync /* Force context change */ blr -_GLOBAL(flush_dcache_L1) - mfspr r3,SPRN_L1CFG0 - - rlwinm r5,r3,9,3 /* Extract cache block size */ - twlgti r5,1 /* Only 32 and 64 byte cache blocks - * are currently defined. - */ - li r4,32 - subfic r6,r5,2 /* r6 = log2(1KiB / cache block size) - - * log2(number of ways) - */ - slw r5,r4,r5 /* r5 = cache block size */ - - rlwinm r7,r3,0,0xff /* Extract number of KiB in the cache */ - mulli r7,r7,13 /* An 8-way cache will require 13 - * loads per set. - */ - slw r7,r7,r6 - - /* save off HID0 and set DCFA */ - mfspr r8,SPRN_HID0 - ori r9,r8,HID0_DCFA@l - mtspr SPRN_HID0,r9 - isync - - lis r4,KERNELBASE@h - mtctr r7 - -1: lwz r3,0(r4) /* Load... */ - add r4,r4,r5 - bdnz 1b - - msync - lis r4,KERNELBASE@h - mtctr r7 - -1: dcbf 0,r4 /* ...and flush. */ - add r4,r4,r5 - bdnz 1b - - /* restore HID0 */ - mtspr SPRN_HID0,r8 - isync - - blr - -/* Flush L1 d-cache, invalidate and disable d-cache and i-cache */ -_GLOBAL(__flush_disable_L1) - mflr r10 - bl flush_dcache_L1 /* Flush L1 d-cache */ - mtlr r10 - - mfspr r4, SPRN_L1CSR0 /* Invalidate and disable d-cache */ - li r5, 2 - rlwimi r4, r5, 0, 3 - - msync - isync - mtspr SPRN_L1CSR0, r4 - isync - -1: mfspr r4, SPRN_L1CSR0 /* Wait for the invalidate to finish */ - andi. r4, r4, 2 - bne 1b - - mfspr r4, SPRN_L1CSR1 /* Invalidate and disable i-cache */ - li r5, 2 - rlwimi r4, r5, 0, 3 - - mtspr SPRN_L1CSR1, r4 - isync - - blr - #ifdef CONFIG_SMP /* When we get here, r24 needs to hold the CPU # */ .globl __secondary_start diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 6b107cea1c08..4a7841616c7f 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -139,7 +139,8 @@ static void smp_85xx_mach_cpu_die(void) mtspr(SPRN_TCR, 0); - __flush_disable_L1(); + cur_cpu_spec->cpu_down_flush(); + tmp = (mfspr(SPRN_HID0) & ~(HID0_DOZE|HID0_SLEEP)) | HID0_NAP; mtspr(SPRN_HID0, tmp); isync(); @@ -359,7 +360,7 @@ void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) local_irq_disable(); if (secondary) { - __flush_disable_L1(); + cur_cpu_spec->cpu_down_flush(); atomic_inc(&kexec_down_cpus); /* loop forever */ while (1); -- cgit v1.2.3 From d17799f9c10e283cccd4d598d3416e6fac336ab9 Mon Sep 17 00:00:00 2001 From: chenhui zhao Date: Fri, 20 Nov 2015 17:13:59 +0800 Subject: powerpc/rcpm: add RCPM driver There is a RCPM (Run Control/Power Management) in Freescale QorIQ series processors. The device performs tasks associated with device run control and power management. The driver implements some features: mask/unmask irq, enter/exit low power states, freeze time base, etc. Signed-off-by: Chenhui Zhao Signed-off-by: Tang Yuantian [scottwood: remove __KERNEL__ ifdef] Signed-off-by: Scott Wood --- arch/powerpc/include/asm/cputhreads.h | 1 + arch/powerpc/include/asm/fsl_pm.h | 51 +++++ arch/powerpc/kernel/head_64.S | 19 ++ arch/powerpc/platforms/85xx/Kconfig | 1 + arch/powerpc/platforms/85xx/common.c | 3 + arch/powerpc/sysdev/Kconfig | 5 + arch/powerpc/sysdev/Makefile | 1 + arch/powerpc/sysdev/fsl_rcpm.c | 385 ++++++++++++++++++++++++++++++++++ include/linux/fsl/guts.h | 105 ++++++++++ 9 files changed, 571 insertions(+) create mode 100644 arch/powerpc/include/asm/fsl_pm.h create mode 100644 arch/powerpc/sysdev/fsl_rcpm.c (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index ea9623147b87..81f5b338c76d 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -103,6 +103,7 @@ static inline u32 get_tensr(void) return 1; } +void book3e_stop_thread(int thread); #endif /* _ASM_POWERPC_CPUTHREADS_H */ diff --git a/arch/powerpc/include/asm/fsl_pm.h b/arch/powerpc/include/asm/fsl_pm.h new file mode 100644 index 000000000000..47df55e36d4f --- /dev/null +++ b/arch/powerpc/include/asm/fsl_pm.h @@ -0,0 +1,51 @@ +/* + * Support Power Management + * + * Copyright 2014-2015 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#ifndef __PPC_FSL_PM_H +#define __PPC_FSL_PM_H + +#define E500_PM_PH10 1 +#define E500_PM_PH15 2 +#define E500_PM_PH20 3 +#define E500_PM_PH30 4 +#define E500_PM_DOZE E500_PM_PH10 +#define E500_PM_NAP E500_PM_PH15 + +#define PLAT_PM_SLEEP 20 +#define PLAT_PM_LPM20 30 + +#define FSL_PM_SLEEP (1 << 0) +#define FSL_PM_DEEP_SLEEP (1 << 1) + +struct fsl_pm_ops { + /* mask pending interrupts to the RCPM from MPIC */ + void (*irq_mask)(int cpu); + + /* unmask pending interrupts to the RCPM from MPIC */ + void (*irq_unmask)(int cpu); + void (*cpu_enter_state)(int cpu, int state); + void (*cpu_exit_state)(int cpu, int state); + void (*cpu_up_prepare)(int cpu); + void (*cpu_die)(int cpu); + int (*plat_enter_sleep)(void); + void (*freeze_time_base)(bool freeze); + + /* keep the power of IP blocks during sleep/deep sleep */ + void (*set_ip_power)(bool enable, u32 mask); + + /* get platform supported power management modes */ + unsigned int (*get_pm_modes)(void); +}; + +extern const struct fsl_pm_ops *qoriq_pm_ops; + +int __init fsl_rcpm_init(void); + +#endif /* __PPC_FSL_PM_H */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 1b779560728f..603625368ceb 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -181,6 +181,25 @@ exception_marker: #endif #ifdef CONFIG_PPC_BOOK3E +/* + * stop a thread in the same core + * input parameter: + * r3 = the thread physical id + */ +_GLOBAL(book3e_stop_thread) + cmpi 0, r3, 0 + beq 10f + cmpi 0, r3, 1 + beq 10f + /* If the thread id is invalid, just exit. */ + b 13f +10: + li r4, 1 + sld r4, r4, r3 + mtspr SPRN_TENC, r4 +13: + blr + _GLOBAL(fsl_secondary_thread_init) mfspr r4,SPRN_BUCSR diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 97915feffd42..e626461a63bd 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -8,6 +8,7 @@ menuconfig FSL_SOC_BOOKE select FSL_PCI if PCI select SERIAL_8250_EXTENDED if SERIAL_8250 select SERIAL_8250_SHARE_IRQ if SERIAL_8250 + select FSL_CORENET_RCPM if PPC_E500MC default y if FSL_SOC_BOOKE diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c index 949f22c86e61..28720a4ded7b 100644 --- a/arch/powerpc/platforms/85xx/common.c +++ b/arch/powerpc/platforms/85xx/common.c @@ -9,11 +9,14 @@ #include #include +#include #include #include #include "mpc85xx.h" +const struct fsl_pm_ops *qoriq_pm_ops; + static const struct of_device_id mpc85xx_common_ids[] __initconst = { { .type = "soc", }, { .compatible = "soc", }, diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig index a19332a38715..52dc165c0efb 100644 --- a/arch/powerpc/sysdev/Kconfig +++ b/arch/powerpc/sysdev/Kconfig @@ -40,3 +40,8 @@ config SCOM_DEBUGFS config GE_FPGA bool default n + +config FSL_CORENET_RCPM + bool + help + This option enables support for RCPM (Run Control/Power Management). diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index bd6bd729969c..a254824719f1 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_MMIO_NVRAM) += mmio_nvram.o obj-$(CONFIG_FSL_SOC) += fsl_soc.o fsl_mpic_err.o obj-$(CONFIG_FSL_PCI) += fsl_pci.o $(fsl-msi-obj-y) obj-$(CONFIG_FSL_PMC) += fsl_pmc.o +obj-$(CONFIG_FSL_CORENET_RCPM) += fsl_rcpm.o obj-$(CONFIG_FSL_LBC) += fsl_lbc.o obj-$(CONFIG_FSL_GTM) += fsl_gtm.o obj-$(CONFIG_FSL_85XX_CACHE_SRAM) += fsl_85xx_l2ctlr.o fsl_85xx_cache_sram.o diff --git a/arch/powerpc/sysdev/fsl_rcpm.c b/arch/powerpc/sysdev/fsl_rcpm.c new file mode 100644 index 000000000000..656d9ca057e5 --- /dev/null +++ b/arch/powerpc/sysdev/fsl_rcpm.c @@ -0,0 +1,385 @@ +/* + * RCPM(Run Control/Power Management) support + * + * Copyright 2012-2015 Freescale Semiconductor Inc. + * + * Author: Chenhui Zhao + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#define pr_fmt(fmt) "%s: " fmt, __func__ + +#include +#include +#include +#include + +#include +#include +#include +#include + +static struct ccsr_rcpm_v1 __iomem *rcpm_v1_regs; +static struct ccsr_rcpm_v2 __iomem *rcpm_v2_regs; +static unsigned int fsl_supported_pm_modes; + +static void rcpm_v1_irq_mask(int cpu) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + unsigned int mask = 1 << hw_cpu; + + setbits32(&rcpm_v1_regs->cpmimr, mask); + setbits32(&rcpm_v1_regs->cpmcimr, mask); + setbits32(&rcpm_v1_regs->cpmmcmr, mask); + setbits32(&rcpm_v1_regs->cpmnmimr, mask); +} + +static void rcpm_v2_irq_mask(int cpu) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + unsigned int mask = 1 << hw_cpu; + + setbits32(&rcpm_v2_regs->tpmimr0, mask); + setbits32(&rcpm_v2_regs->tpmcimr0, mask); + setbits32(&rcpm_v2_regs->tpmmcmr0, mask); + setbits32(&rcpm_v2_regs->tpmnmimr0, mask); +} + +static void rcpm_v1_irq_unmask(int cpu) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + unsigned int mask = 1 << hw_cpu; + + clrbits32(&rcpm_v1_regs->cpmimr, mask); + clrbits32(&rcpm_v1_regs->cpmcimr, mask); + clrbits32(&rcpm_v1_regs->cpmmcmr, mask); + clrbits32(&rcpm_v1_regs->cpmnmimr, mask); +} + +static void rcpm_v2_irq_unmask(int cpu) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + unsigned int mask = 1 << hw_cpu; + + clrbits32(&rcpm_v2_regs->tpmimr0, mask); + clrbits32(&rcpm_v2_regs->tpmcimr0, mask); + clrbits32(&rcpm_v2_regs->tpmmcmr0, mask); + clrbits32(&rcpm_v2_regs->tpmnmimr0, mask); +} + +static void rcpm_v1_set_ip_power(bool enable, u32 mask) +{ + if (enable) + setbits32(&rcpm_v1_regs->ippdexpcr, mask); + else + clrbits32(&rcpm_v1_regs->ippdexpcr, mask); +} + +static void rcpm_v2_set_ip_power(bool enable, u32 mask) +{ + if (enable) + setbits32(&rcpm_v2_regs->ippdexpcr[0], mask); + else + clrbits32(&rcpm_v2_regs->ippdexpcr[0], mask); +} + +static void rcpm_v1_cpu_enter_state(int cpu, int state) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + unsigned int mask = 1 << hw_cpu; + + switch (state) { + case E500_PM_PH10: + setbits32(&rcpm_v1_regs->cdozcr, mask); + break; + case E500_PM_PH15: + setbits32(&rcpm_v1_regs->cnapcr, mask); + break; + default: + pr_warn("Unknown cpu PM state (%d)\n", state); + break; + } +} + +static void rcpm_v2_cpu_enter_state(int cpu, int state) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + u32 mask = 1 << cpu_core_index_of_thread(cpu); + + switch (state) { + case E500_PM_PH10: + /* one bit corresponds to one thread for PH10 of 6500 */ + setbits32(&rcpm_v2_regs->tph10setr0, 1 << hw_cpu); + break; + case E500_PM_PH15: + setbits32(&rcpm_v2_regs->pcph15setr, mask); + break; + case E500_PM_PH20: + setbits32(&rcpm_v2_regs->pcph20setr, mask); + break; + case E500_PM_PH30: + setbits32(&rcpm_v2_regs->pcph30setr, mask); + break; + default: + pr_warn("Unknown cpu PM state (%d)\n", state); + } +} + +static void rcpm_v1_cpu_die(int cpu) +{ + rcpm_v1_cpu_enter_state(cpu, E500_PM_PH15); +} + +#ifdef CONFIG_PPC64 +static void qoriq_disable_thread(int cpu) +{ + int thread = cpu_thread_in_core(cpu); + + book3e_stop_thread(thread); +} +#endif + +static void rcpm_v2_cpu_die(int cpu) +{ +#ifdef CONFIG_PPC64 + int primary; + + if (threads_per_core == 2) { + primary = cpu_first_thread_sibling(cpu); + if (cpu_is_offline(primary) && cpu_is_offline(primary + 1)) { + /* if both threads are offline, put the cpu in PH20 */ + rcpm_v2_cpu_enter_state(cpu, E500_PM_PH20); + } else { + /* if only one thread is offline, disable the thread */ + qoriq_disable_thread(cpu); + } + } +#endif + + if (threads_per_core == 1) + rcpm_v2_cpu_enter_state(cpu, E500_PM_PH20); +} + +static void rcpm_v1_cpu_exit_state(int cpu, int state) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + unsigned int mask = 1 << hw_cpu; + + switch (state) { + case E500_PM_PH10: + clrbits32(&rcpm_v1_regs->cdozcr, mask); + break; + case E500_PM_PH15: + clrbits32(&rcpm_v1_regs->cnapcr, mask); + break; + default: + pr_warn("Unknown cpu PM state (%d)\n", state); + break; + } +} + +static void rcpm_v1_cpu_up_prepare(int cpu) +{ + rcpm_v1_cpu_exit_state(cpu, E500_PM_PH15); + rcpm_v1_irq_unmask(cpu); +} + +static void rcpm_v2_cpu_exit_state(int cpu, int state) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + u32 mask = 1 << cpu_core_index_of_thread(cpu); + + switch (state) { + case E500_PM_PH10: + setbits32(&rcpm_v2_regs->tph10clrr0, 1 << hw_cpu); + break; + case E500_PM_PH15: + setbits32(&rcpm_v2_regs->pcph15clrr, mask); + break; + case E500_PM_PH20: + setbits32(&rcpm_v2_regs->pcph20clrr, mask); + break; + case E500_PM_PH30: + setbits32(&rcpm_v2_regs->pcph30clrr, mask); + break; + default: + pr_warn("Unknown cpu PM state (%d)\n", state); + } +} + +static void rcpm_v2_cpu_up_prepare(int cpu) +{ + rcpm_v2_cpu_exit_state(cpu, E500_PM_PH20); + rcpm_v2_irq_unmask(cpu); +} + +static int rcpm_v1_plat_enter_state(int state) +{ + u32 *pmcsr_reg = &rcpm_v1_regs->powmgtcsr; + int ret = 0; + int result; + + switch (state) { + case PLAT_PM_SLEEP: + setbits32(pmcsr_reg, RCPM_POWMGTCSR_SLP); + + /* Upon resume, wait for RCPM_POWMGTCSR_SLP bit to be clear. */ + result = spin_event_timeout( + !(in_be32(pmcsr_reg) & RCPM_POWMGTCSR_SLP), 10000, 10); + if (!result) { + pr_err("timeout waiting for SLP bit to be cleared\n"); + ret = -ETIMEDOUT; + } + break; + default: + pr_warn("Unknown platform PM state (%d)", state); + ret = -EINVAL; + } + + return ret; +} + +static int rcpm_v2_plat_enter_state(int state) +{ + u32 *pmcsr_reg = &rcpm_v2_regs->powmgtcsr; + int ret = 0; + int result; + + switch (state) { + case PLAT_PM_LPM20: + /* clear previous LPM20 status */ + setbits32(pmcsr_reg, RCPM_POWMGTCSR_P_LPM20_ST); + /* enter LPM20 status */ + setbits32(pmcsr_reg, RCPM_POWMGTCSR_LPM20_RQ); + + /* At this point, the device is in LPM20 status. */ + + /* resume ... */ + result = spin_event_timeout( + !(in_be32(pmcsr_reg) & RCPM_POWMGTCSR_LPM20_ST), 10000, 10); + if (!result) { + pr_err("timeout waiting for LPM20 bit to be cleared\n"); + ret = -ETIMEDOUT; + } + break; + default: + pr_warn("Unknown platform PM state (%d)\n", state); + ret = -EINVAL; + } + + return ret; +} + +static int rcpm_v1_plat_enter_sleep(void) +{ + return rcpm_v1_plat_enter_state(PLAT_PM_SLEEP); +} + +static int rcpm_v2_plat_enter_sleep(void) +{ + return rcpm_v2_plat_enter_state(PLAT_PM_LPM20); +} + +static void rcpm_common_freeze_time_base(u32 *tben_reg, int freeze) +{ + static u32 mask; + + if (freeze) { + mask = in_be32(tben_reg); + clrbits32(tben_reg, mask); + } else { + setbits32(tben_reg, mask); + } + + /* read back to push the previous write */ + in_be32(tben_reg); +} + +static void rcpm_v1_freeze_time_base(bool freeze) +{ + rcpm_common_freeze_time_base(&rcpm_v1_regs->ctbenr, freeze); +} + +static void rcpm_v2_freeze_time_base(bool freeze) +{ + rcpm_common_freeze_time_base(&rcpm_v2_regs->pctbenr, freeze); +} + +static unsigned int rcpm_get_pm_modes(void) +{ + return fsl_supported_pm_modes; +} + +static const struct fsl_pm_ops qoriq_rcpm_v1_ops = { + .irq_mask = rcpm_v1_irq_mask, + .irq_unmask = rcpm_v1_irq_unmask, + .cpu_enter_state = rcpm_v1_cpu_enter_state, + .cpu_exit_state = rcpm_v1_cpu_exit_state, + .cpu_up_prepare = rcpm_v1_cpu_up_prepare, + .cpu_die = rcpm_v1_cpu_die, + .plat_enter_sleep = rcpm_v1_plat_enter_sleep, + .set_ip_power = rcpm_v1_set_ip_power, + .freeze_time_base = rcpm_v1_freeze_time_base, + .get_pm_modes = rcpm_get_pm_modes, +}; + +static const struct fsl_pm_ops qoriq_rcpm_v2_ops = { + .irq_mask = rcpm_v2_irq_mask, + .irq_unmask = rcpm_v2_irq_unmask, + .cpu_enter_state = rcpm_v2_cpu_enter_state, + .cpu_exit_state = rcpm_v2_cpu_exit_state, + .cpu_up_prepare = rcpm_v2_cpu_up_prepare, + .cpu_die = rcpm_v2_cpu_die, + .plat_enter_sleep = rcpm_v2_plat_enter_sleep, + .set_ip_power = rcpm_v2_set_ip_power, + .freeze_time_base = rcpm_v2_freeze_time_base, + .get_pm_modes = rcpm_get_pm_modes, +}; + +static const struct of_device_id rcpm_matches[] = { + { + .compatible = "fsl,qoriq-rcpm-1.0", + .data = &qoriq_rcpm_v1_ops, + }, + { + .compatible = "fsl,qoriq-rcpm-2.0", + .data = &qoriq_rcpm_v2_ops, + }, + { + .compatible = "fsl,qoriq-rcpm-2.1", + .data = &qoriq_rcpm_v2_ops, + }, + {}, +}; + +int __init fsl_rcpm_init(void) +{ + struct device_node *np; + const struct of_device_id *match; + void __iomem *base; + + np = of_find_matching_node_and_match(NULL, rcpm_matches, &match); + if (!np) + return 0; + + base = of_iomap(np, 0); + of_node_put(np); + if (!base) { + pr_err("of_iomap() error.\n"); + return -ENOMEM; + } + + rcpm_v1_regs = base; + rcpm_v2_regs = base; + + /* support sleep by default */ + fsl_supported_pm_modes = FSL_PM_SLEEP; + + qoriq_pm_ops = match->data; + + return 0; +} diff --git a/include/linux/fsl/guts.h b/include/linux/fsl/guts.h index 84d971ff3fba..649e9171a9b3 100644 --- a/include/linux/fsl/guts.h +++ b/include/linux/fsl/guts.h @@ -189,4 +189,109 @@ static inline void guts_set_pmuxcr_dma(struct ccsr_guts __iomem *guts, #endif +struct ccsr_rcpm_v1 { + u8 res0000[4]; + __be32 cdozsr; /* 0x0004 Core Doze Status Register */ + u8 res0008[4]; + __be32 cdozcr; /* 0x000c Core Doze Control Register */ + u8 res0010[4]; + __be32 cnapsr; /* 0x0014 Core Nap Status Register */ + u8 res0018[4]; + __be32 cnapcr; /* 0x001c Core Nap Control Register */ + u8 res0020[4]; + __be32 cdozpsr; /* 0x0024 Core Doze Previous Status Register */ + u8 res0028[4]; + __be32 cnappsr; /* 0x002c Core Nap Previous Status Register */ + u8 res0030[4]; + __be32 cwaitsr; /* 0x0034 Core Wait Status Register */ + u8 res0038[4]; + __be32 cwdtdsr; /* 0x003c Core Watchdog Detect Status Register */ + __be32 powmgtcsr; /* 0x0040 PM Control&Status Register */ +#define RCPM_POWMGTCSR_SLP 0x00020000 + u8 res0044[12]; + __be32 ippdexpcr; /* 0x0050 IP Powerdown Exception Control Register */ + u8 res0054[16]; + __be32 cpmimr; /* 0x0064 Core PM IRQ Mask Register */ + u8 res0068[4]; + __be32 cpmcimr; /* 0x006c Core PM Critical IRQ Mask Register */ + u8 res0070[4]; + __be32 cpmmcmr; /* 0x0074 Core PM Machine Check Mask Register */ + u8 res0078[4]; + __be32 cpmnmimr; /* 0x007c Core PM NMI Mask Register */ + u8 res0080[4]; + __be32 ctbenr; /* 0x0084 Core Time Base Enable Register */ + u8 res0088[4]; + __be32 ctbckselr; /* 0x008c Core Time Base Clock Select Register */ + u8 res0090[4]; + __be32 ctbhltcr; /* 0x0094 Core Time Base Halt Control Register */ + u8 res0098[4]; + __be32 cmcpmaskcr; /* 0x00a4 Core Machine Check Mask Register */ +}; + +struct ccsr_rcpm_v2 { + u8 res_00[12]; + __be32 tph10sr0; /* Thread PH10 Status Register */ + u8 res_10[12]; + __be32 tph10setr0; /* Thread PH10 Set Control Register */ + u8 res_20[12]; + __be32 tph10clrr0; /* Thread PH10 Clear Control Register */ + u8 res_30[12]; + __be32 tph10psr0; /* Thread PH10 Previous Status Register */ + u8 res_40[12]; + __be32 twaitsr0; /* Thread Wait Status Register */ + u8 res_50[96]; + __be32 pcph15sr; /* Physical Core PH15 Status Register */ + __be32 pcph15setr; /* Physical Core PH15 Set Control Register */ + __be32 pcph15clrr; /* Physical Core PH15 Clear Control Register */ + __be32 pcph15psr; /* Physical Core PH15 Prev Status Register */ + u8 res_c0[16]; + __be32 pcph20sr; /* Physical Core PH20 Status Register */ + __be32 pcph20setr; /* Physical Core PH20 Set Control Register */ + __be32 pcph20clrr; /* Physical Core PH20 Clear Control Register */ + __be32 pcph20psr; /* Physical Core PH20 Prev Status Register */ + __be32 pcpw20sr; /* Physical Core PW20 Status Register */ + u8 res_e0[12]; + __be32 pcph30sr; /* Physical Core PH30 Status Register */ + __be32 pcph30setr; /* Physical Core PH30 Set Control Register */ + __be32 pcph30clrr; /* Physical Core PH30 Clear Control Register */ + __be32 pcph30psr; /* Physical Core PH30 Prev Status Register */ + u8 res_100[32]; + __be32 ippwrgatecr; /* IP Power Gating Control Register */ + u8 res_124[12]; + __be32 powmgtcsr; /* Power Management Control & Status Reg */ +#define RCPM_POWMGTCSR_LPM20_RQ 0x00100000 +#define RCPM_POWMGTCSR_LPM20_ST 0x00000200 +#define RCPM_POWMGTCSR_P_LPM20_ST 0x00000100 + u8 res_134[12]; + __be32 ippdexpcr[4]; /* IP Powerdown Exception Control Reg */ + u8 res_150[12]; + __be32 tpmimr0; /* Thread PM Interrupt Mask Reg */ + u8 res_160[12]; + __be32 tpmcimr0; /* Thread PM Crit Interrupt Mask Reg */ + u8 res_170[12]; + __be32 tpmmcmr0; /* Thread PM Machine Check Interrupt Mask Reg */ + u8 res_180[12]; + __be32 tpmnmimr0; /* Thread PM NMI Mask Reg */ + u8 res_190[12]; + __be32 tmcpmaskcr0; /* Thread Machine Check Mask Control Reg */ + __be32 pctbenr; /* Physical Core Time Base Enable Reg */ + __be32 pctbclkselr; /* Physical Core Time Base Clock Select */ + __be32 tbclkdivr; /* Time Base Clock Divider Register */ + u8 res_1ac[4]; + __be32 ttbhltcr[4]; /* Thread Time Base Halt Control Register */ + __be32 clpcl10sr; /* Cluster PCL10 Status Register */ + __be32 clpcl10setr; /* Cluster PCL30 Set Control Register */ + __be32 clpcl10clrr; /* Cluster PCL30 Clear Control Register */ + __be32 clpcl10psr; /* Cluster PCL30 Prev Status Register */ + __be32 cddslpsetr; /* Core Domain Deep Sleep Set Register */ + __be32 cddslpclrr; /* Core Domain Deep Sleep Clear Register */ + __be32 cdpwroksetr; /* Core Domain Power OK Set Register */ + __be32 cdpwrokclrr; /* Core Domain Power OK Clear Register */ + __be32 cdpwrensr; /* Core Domain Power Enable Status Register */ + __be32 cddslsr; /* Core Domain Deep Sleep Status Register */ + u8 res_1e8[8]; + __be32 dslpcntcr[8]; /* Deep Sleep Counter Cfg Register */ + u8 res_300[3568]; +}; + #endif -- cgit v1.2.3 From 2f4f1f815bc6d03ea42d4f67dd1e284525e7524e Mon Sep 17 00:00:00 2001 From: chenhui zhao Date: Fri, 20 Nov 2015 17:14:01 +0800 Subject: powerpc/mpc85xx: Add hotplug support on E5500 and E500MC cores Freescale E500MC and E5500 core-based platforms, like P4080, T1040, support disabling/enabling CPU dynamically. This patch adds this feature on those platforms. Signed-off-by: Chenhui Zhao Signed-off-by: Tang Yuantian [scottwood: removed unused pr_fmt] Signed-off-by: Scott Wood --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/smp.h | 3 + arch/powerpc/kernel/smp.c | 7 +- arch/powerpc/platforms/85xx/smp.c | 196 +++++++++++++++++++++----------------- 4 files changed, 118 insertions(+), 90 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7efddd18d700..d500772a1d9b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -389,7 +389,7 @@ config SWIOTLB config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" depends on SMP && (PPC_PSERIES || \ - PPC_PMAC || PPC_POWERNV || (PPC_85xx && !PPC_E500MC)) + PPC_PMAC || PPC_POWERNV || FSL_SOC_BOOKE) ---help--- Say Y here to be able to disable and re-enable individual CPUs at runtime on SMP machines. diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 825663c30945..bdb811116d85 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -67,6 +67,9 @@ void generic_cpu_die(unsigned int cpu); void generic_set_cpu_dead(unsigned int cpu); void generic_set_cpu_up(unsigned int cpu); int generic_check_cpu_restart(unsigned int cpu); +int is_cpu_dead(unsigned int cpu); +#else +#define generic_set_cpu_up(i) do { } while (0) #endif #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ec9ec2058d2d..8575d044775d 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -427,7 +427,7 @@ void generic_cpu_die(unsigned int cpu) for (i = 0; i < 100; i++) { smp_rmb(); - if (per_cpu(cpu_state, cpu) == CPU_DEAD) + if (is_cpu_dead(cpu)) return; msleep(100); } @@ -454,6 +454,11 @@ int generic_check_cpu_restart(unsigned int cpu) return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE; } +int is_cpu_dead(unsigned int cpu) +{ + return per_cpu(cpu_state, cpu) == CPU_DEAD; +} + static bool secondaries_inhibited(void) { return kvm_hv_mode_active(); diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index ab0459d904ff..d7cc538ca9dd 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -53,6 +53,7 @@ static void mpc85xx_give_timebase(void) unsigned long flags; local_irq_save(flags); + hard_irq_disable(); while (!tb_req) barrier(); @@ -101,6 +102,7 @@ static void mpc85xx_take_timebase(void) unsigned long flags; local_irq_save(flags); + hard_irq_disable(); tb_req = 1; while (!tb_valid) @@ -136,8 +138,31 @@ static void smp_85xx_mach_cpu_die(void) while (1) ; } + +static void qoriq_cpu_kill(unsigned int cpu) +{ + int i; + + for (i = 0; i < 500; i++) { + if (is_cpu_dead(cpu)) { +#ifdef CONFIG_PPC64 + paca[cpu].cpu_start = 0; +#endif + return; + } + msleep(20); + } + pr_err("CPU%d didn't die...\n", cpu); +} #endif +/* + * To keep it compatible with old boot program which uses + * cache-inhibit spin table, we need to flush the cache + * before accessing spin table to invalidate any staled data. + * We also need to flush the cache after writing to spin + * table to push data out. + */ static inline void flush_spin_table(void *spin_table) { flush_dcache_range((ulong)spin_table, @@ -176,57 +201,20 @@ static void wake_hw_thread(void *info) } #endif -static int smp_85xx_kick_cpu(int nr) +static int smp_85xx_start_cpu(int cpu) { - unsigned long flags; - const u64 *cpu_rel_addr; - __iomem struct epapr_spin_table *spin_table; + int ret = 0; struct device_node *np; - int hw_cpu = get_hard_smp_processor_id(nr); + const u64 *cpu_rel_addr; + unsigned long flags; int ioremappable; - int ret = 0; + int hw_cpu = get_hard_smp_processor_id(cpu); + struct epapr_spin_table __iomem *spin_table; - WARN_ON(nr < 0 || nr >= NR_CPUS); - WARN_ON(hw_cpu < 0 || hw_cpu >= NR_CPUS); - - pr_debug("smp_85xx_kick_cpu: kick CPU #%d\n", nr); - -#ifdef CONFIG_PPC64 - /* Threads don't use the spin table */ - if (cpu_thread_in_core(nr) != 0) { - int primary = cpu_first_thread_sibling(nr); - - if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT))) - return -ENOENT; - - if (cpu_thread_in_core(nr) != 1) { - pr_err("%s: cpu %d: invalid hw thread %d\n", - __func__, nr, cpu_thread_in_core(nr)); - return -ENOENT; - } - - if (!cpu_online(primary)) { - pr_err("%s: cpu %d: primary %d not online\n", - __func__, nr, primary); - return -ENOENT; - } - - smp_call_function_single(primary, wake_hw_thread, &nr, 0); - return 0; - } else if (cpu_thread_in_core(boot_cpuid) != 0 && - cpu_first_thread_sibling(boot_cpuid) == nr) { - if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT))) - return -ENOENT; - - smp_call_function_single(boot_cpuid, wake_hw_thread, &nr, 0); - } -#endif - - np = of_get_cpu_node(nr, NULL); + np = of_get_cpu_node(cpu, NULL); cpu_rel_addr = of_get_property(np, "cpu-release-addr", NULL); - - if (cpu_rel_addr == NULL) { - printk(KERN_ERR "No cpu-release-addr for cpu %d\n", nr); + if (!cpu_rel_addr) { + pr_err("No cpu-release-addr for cpu %d\n", cpu); return -ENOENT; } @@ -246,28 +234,18 @@ static int smp_85xx_kick_cpu(int nr) spin_table = phys_to_virt(*cpu_rel_addr); local_irq_save(flags); -#ifdef CONFIG_PPC32 -#ifdef CONFIG_HOTPLUG_CPU - /* Corresponding to generic_set_cpu_dead() */ - generic_set_cpu_up(nr); + hard_irq_disable(); - if (system_state == SYSTEM_RUNNING) { - /* - * To keep it compatible with old boot program which uses - * cache-inhibit spin table, we need to flush the cache - * before accessing spin table to invalidate any staled data. - * We also need to flush the cache after writing to spin - * table to push data out. - */ - flush_spin_table(spin_table); - out_be32(&spin_table->addr_l, 0); - flush_spin_table(spin_table); + if (qoriq_pm_ops) + qoriq_pm_ops->cpu_up_prepare(cpu); + /* if cpu is not spinning, reset it */ + if (read_spin_table_addr_l(spin_table) != 1) { /* * We don't set the BPTR register here since it already points * to the boot page properly. */ - mpic_reset_core(nr); + mpic_reset_core(cpu); /* * wait until core is ready... @@ -277,40 +255,23 @@ static int smp_85xx_kick_cpu(int nr) if (!spin_event_timeout( read_spin_table_addr_l(spin_table) == 1, 10000, 100)) { - pr_err("%s: timeout waiting for core %d to reset\n", - __func__, hw_cpu); - ret = -ENOENT; - goto out; + pr_err("timeout waiting for cpu %d to reset\n", + hw_cpu); + ret = -EAGAIN; + goto err; } - - /* clear the acknowledge status */ - __secondary_hold_acknowledge = -1; - } -#endif - flush_spin_table(spin_table); - out_be32(&spin_table->pir, hw_cpu); - out_be32(&spin_table->addr_l, __pa(__early_start)); - flush_spin_table(spin_table); - - /* Wait a bit for the CPU to ack. */ - if (!spin_event_timeout(__secondary_hold_acknowledge == hw_cpu, - 10000, 100)) { - pr_err("%s: timeout waiting for core %d to ack\n", - __func__, hw_cpu); - ret = -ENOENT; - goto out; } -out: -#else - smp_generic_kick_cpu(nr); flush_spin_table(spin_table); out_be32(&spin_table->pir, hw_cpu); +#ifdef CONFIG_PPC64 out_be64((u64 *)(&spin_table->addr_h), __pa(ppc_function_entry(generic_secondary_smp_init))); - flush_spin_table(spin_table); +#else + out_be32(&spin_table->addr_l, __pa(__early_start)); #endif - + flush_spin_table(spin_table); +err: local_irq_restore(flags); if (ioremappable) @@ -319,6 +280,60 @@ out: return ret; } +static int smp_85xx_kick_cpu(int nr) +{ + int ret = 0; +#ifdef CONFIG_PPC64 + int primary = nr; +#endif + + WARN_ON(nr < 0 || nr >= num_possible_cpus()); + + pr_debug("kick CPU #%d\n", nr); + +#ifdef CONFIG_PPC64 + /* Threads don't use the spin table */ + if (cpu_thread_in_core(nr) != 0) { + int primary = cpu_first_thread_sibling(nr); + + if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT))) + return -ENOENT; + + if (cpu_thread_in_core(nr) != 1) { + pr_err("%s: cpu %d: invalid hw thread %d\n", + __func__, nr, cpu_thread_in_core(nr)); + return -ENOENT; + } + + if (!cpu_online(primary)) { + pr_err("%s: cpu %d: primary %d not online\n", + __func__, nr, primary); + return -ENOENT; + } + + smp_call_function_single(primary, wake_hw_thread, &nr, 0); + return 0; + } + + ret = smp_85xx_start_cpu(primary); + if (ret) + return ret; + + paca[nr].cpu_start = 1; + generic_set_cpu_up(nr); + + return ret; +#else + ret = smp_85xx_start_cpu(nr); + if (ret) + return ret; + + generic_set_cpu_up(nr); + + return ret; +#endif +} + struct smp_ops_t smp_85xx_ops = { .kick_cpu = smp_85xx_kick_cpu, .cpu_bootable = smp_generic_cpu_bootable, @@ -473,6 +488,10 @@ void __init mpc85xx_smp_init(void) } #ifdef CONFIG_HOTPLUG_CPU +#ifdef CONFIG_FSL_CORENET_RCPM + fsl_rcpm_init(); +#endif + #ifdef CONFIG_FSL_PMC mpc85xx_setup_pmc(); #endif @@ -480,6 +499,7 @@ void __init mpc85xx_smp_init(void) smp_85xx_ops.give_timebase = mpc85xx_give_timebase; smp_85xx_ops.take_timebase = mpc85xx_take_timebase; ppc_md.cpu_die = smp_85xx_mach_cpu_die; + smp_85xx_ops.cpu_die = qoriq_cpu_kill; } #endif smp_ops = &smp_85xx_ops; -- cgit v1.2.3 From 6becef7ea04a695f64299238fe13d41e41607469 Mon Sep 17 00:00:00 2001 From: chenhui zhao Date: Fri, 20 Nov 2015 17:14:02 +0800 Subject: powerpc/mpc85xx: Add CPU hotplug support for E6500 Support Freescale E6500 core-based platforms, like t4240. Support disabling/enabling individual CPU thread dynamically. Signed-off-by: Chenhui Zhao --- arch/powerpc/include/asm/cputhreads.h | 6 +++ arch/powerpc/include/asm/smp.h | 1 + arch/powerpc/kernel/head_64.S | 78 +++++++++++++++++++++++++++++++++++ arch/powerpc/platforms/85xx/smp.c | 70 +++++++++++++++++-------------- 4 files changed, 124 insertions(+), 31 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index 81f5b338c76d..666bef4ebfae 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_CPUTHREADS_H #define _ASM_POWERPC_CPUTHREADS_H +#ifndef __ASSEMBLY__ #include /* @@ -103,7 +104,12 @@ static inline u32 get_tensr(void) return 1; } +void book3e_start_thread(int thread, unsigned long addr); void book3e_stop_thread(int thread); +#endif /* __ASSEMBLY__ */ + +#define INVALID_THREAD_HWID 0x0fff + #endif /* _ASM_POWERPC_CPUTHREADS_H */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index bdb811116d85..174271ef2767 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -200,6 +200,7 @@ extern void generic_secondary_thread_init(void); extern unsigned long __secondary_hold_spinloop; extern unsigned long __secondary_hold_acknowledge; extern char __secondary_hold; +extern unsigned int booting_thread_hwid; extern void __early_start(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 603625368ceb..291628320fbe 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -40,6 +40,7 @@ #include #include #include +#include /* The physical memory is laid out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow @@ -181,6 +182,45 @@ exception_marker: #endif #ifdef CONFIG_PPC_BOOK3E +/* + * The booting_thread_hwid holds the thread id we want to boot in cpu + * hotplug case. It is set by cpu hotplug code, and is invalid by default. + * The thread id is the same as the initial value of SPRN_PIR[THREAD_ID] + * bit field. + */ + .globl booting_thread_hwid +booting_thread_hwid: + .long INVALID_THREAD_HWID + .align 3 +/* + * start a thread in the same core + * input parameters: + * r3 = the thread physical id + * r4 = the entry point where thread starts + */ +_GLOBAL(book3e_start_thread) + LOAD_REG_IMMEDIATE(r5, MSR_KERNEL) + cmpi 0, r3, 0 + beq 10f + cmpi 0, r3, 1 + beq 11f + /* If the thread id is invalid, just exit. */ + b 13f +10: + mttmr TMRN_IMSR0, r5 + mttmr TMRN_INIA0, r4 + b 12f +11: + mttmr TMRN_IMSR1, r5 + mttmr TMRN_INIA1, r4 +12: + isync + li r6, 1 + sld r6, r6, r3 + mtspr SPRN_TENS, r6 +13: + blr + /* * stop a thread in the same core * input parameter: @@ -280,6 +320,44 @@ _GLOBAL(generic_secondary_smp_init) mr r3,r24 mr r4,r25 bl book3e_secondary_core_init + +/* + * After common core init has finished, check if the current thread is the + * one we wanted to boot. If not, start the specified thread and stop the + * current thread. + */ + LOAD_REG_ADDR(r4, booting_thread_hwid) + lwz r3, 0(r4) + li r5, INVALID_THREAD_HWID + cmpw r3, r5 + beq 20f + + /* + * The value of booting_thread_hwid has been stored in r3, + * so make it invalid. + */ + stw r5, 0(r4) + + /* + * Get the current thread id and check if it is the one we wanted. + * If not, start the one specified in booting_thread_hwid and stop + * the current thread. + */ + mfspr r8, SPRN_TIR + cmpw r3, r8 + beq 20f + + /* start the specified thread */ + LOAD_REG_ADDR(r5, fsl_secondary_thread_init) + ld r4, 0(r5) + bl book3e_start_thread + + /* stop the current thread */ + mr r3, r8 + bl book3e_stop_thread +10: + b 10b +20: #endif generic_secondary_common_init: diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index d7cc538ca9dd..fe9f19e5e935 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -180,24 +180,11 @@ static inline u32 read_spin_table_addr_l(void *spin_table) static void wake_hw_thread(void *info) { void fsl_secondary_thread_init(void); - unsigned long imsr, inia; - int nr = *(const int *)info; + unsigned long inia; + int cpu = *(const int *)info; - imsr = MSR_KERNEL; inia = *(unsigned long *)fsl_secondary_thread_init; - - if (cpu_thread_in_core(nr) == 0) { - /* For when we boot on a secondary thread with kdump */ - mttmr(TMRN_IMSR0, imsr); - mttmr(TMRN_INIA0, inia); - mtspr(SPRN_TENS, TEN_THREAD(0)); - } else { - mttmr(TMRN_IMSR1, imsr); - mttmr(TMRN_INIA1, inia); - mtspr(SPRN_TENS, TEN_THREAD(1)); - } - - smp_generic_kick_cpu(nr); + book3e_start_thread(cpu_thread_in_core(cpu), inia); } #endif @@ -292,33 +279,54 @@ static int smp_85xx_kick_cpu(int nr) pr_debug("kick CPU #%d\n", nr); #ifdef CONFIG_PPC64 - /* Threads don't use the spin table */ - if (cpu_thread_in_core(nr) != 0) { - int primary = cpu_first_thread_sibling(nr); - + if (threads_per_core == 2) { if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT))) return -ENOENT; - if (cpu_thread_in_core(nr) != 1) { - pr_err("%s: cpu %d: invalid hw thread %d\n", - __func__, nr, cpu_thread_in_core(nr)); - return -ENOENT; - } + booting_thread_hwid = cpu_thread_in_core(nr); + primary = cpu_first_thread_sibling(nr); - if (!cpu_online(primary)) { - pr_err("%s: cpu %d: primary %d not online\n", - __func__, nr, primary); - return -ENOENT; + if (qoriq_pm_ops) + qoriq_pm_ops->cpu_up_prepare(nr); + + /* + * If either thread in the core is online, use it to start + * the other. + */ + if (cpu_online(primary)) { + smp_call_function_single(primary, + wake_hw_thread, &nr, 1); + goto done; + } else if (cpu_online(primary + 1)) { + smp_call_function_single(primary + 1, + wake_hw_thread, &nr, 1); + goto done; } - smp_call_function_single(primary, wake_hw_thread, &nr, 0); - return 0; + /* + * If getting here, it means both threads in the core are + * offline. So start the primary thread, then it will start + * the thread specified in booting_thread_hwid, the one + * corresponding to nr. + */ + + } else if (threads_per_core == 1) { + /* + * If one core has only one thread, set booting_thread_hwid to + * an invalid value. + */ + booting_thread_hwid = INVALID_THREAD_HWID; + + } else if (threads_per_core > 2) { + pr_err("Do not support more than 2 threads per CPU."); + return -EINVAL; } ret = smp_85xx_start_cpu(primary); if (ret) return ret; +done: paca[nr].cpu_start = 1; generic_set_cpu_up(nr); -- cgit v1.2.3 From a5cab83cd3d2d75d3893276cb5f27e163484ef04 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 3 Mar 2016 15:26:53 +1100 Subject: powerpc: Create a helper for getting the kernel toc value Move the logic to work out the kernel toc pointer into a header. This is a good cleanup, and also means we can use it elsewhere in future. Reviewed-by: Kamalesh Babulal Reviewed-by: Torsten Duwe Reviewed-by: Balbir Singh Signed-off-by: Michael Ellerman Tested-by: Kamalesh Babulal --- arch/powerpc/include/asm/sections.h | 12 ++++++++++++ arch/powerpc/kernel/paca.c | 11 +---------- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index a5e930aca804..abf5866e08c6 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -22,6 +22,18 @@ static inline int in_kernel_text(unsigned long addr) return 0; } +static inline unsigned long kernel_toc_addr(void) +{ + /* Defined by the linker, see vmlinux.lds.S */ + extern unsigned long __toc_start; + + /* + * The TOC register (r2) points 32kB into the TOC, so that 64kB of + * the TOC can be addressed using a single machine instruction. + */ + return (unsigned long)(&__toc_start) + 0x8000UL; +} + static inline int overlaps_interrupt_vector_text(unsigned long start, unsigned long end) { diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 01ea0edf0579..93dae296b6be 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -17,10 +17,6 @@ #include #include -/* This symbol is provided by the linker - let it fill in the paca - * field correctly */ -extern unsigned long __toc_start; - #ifdef CONFIG_PPC_BOOK3S /* @@ -149,11 +145,6 @@ EXPORT_SYMBOL(paca); void __init initialise_paca(struct paca_struct *new_paca, int cpu) { - /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB - * of the TOC can be addressed using a single machine instruction. - */ - unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL; - #ifdef CONFIG_PPC_BOOK3S new_paca->lppaca_ptr = new_lppaca(cpu); #else @@ -161,7 +152,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) #endif new_paca->lock_token = 0x8000; new_paca->paca_index = cpu; - new_paca->kernel_toc = kernel_toc; + new_paca->kernel_toc = kernel_toc_addr(); new_paca->kernelbase = (unsigned long) _stext; /* Only set MSR:IR/DR when MMU is initialized */ new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR); -- cgit v1.2.3 From 136cd3450af8092f30d0e289806f08ac2aeee38f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 3 Mar 2016 15:26:54 +1100 Subject: powerpc/module: Only try to generate the ftrace_caller() stub once Currently we generate the module stub for ftrace_caller() at the bottom of apply_relocate_add(). However apply_relocate_add() is potentially called more than once per module, which means we will try to generate the ftrace_caller() stub multiple times. Although the current code deals with that correctly, ie. it only generates a stub the first time, it would be clearer to only try to generate the stub once. Note also on first reading it may appear that we generate a different stub for each section that requires relocation, but that is not the case. The code in stub_for_addr() that searches for an existing stub uses sechdrs[me->arch.stubs_section], ie. the single stub section for this module. A cleaner approach is to only generate the ftrace_caller() stub once, from module_finalize(). Although the original code didn't check to see if the stub was actually generated correctly, it seems prudent to add a check, so do that. And an additional benefit is we can clean the ifdefs up a little. Finally we must propagate the const'ness of some of the pointers passed to module_finalize(), but that is also an improvement. Reviewed-by: Balbir Singh Reviewed-by: Torsten Duwe Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/module.h | 9 +++++++++ arch/powerpc/kernel/module.c | 5 +++++ arch/powerpc/kernel/module_32.c | 20 ++++++++++++++------ arch/powerpc/kernel/module_64.c | 22 ++++++++++++++-------- 4 files changed, 42 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index dcfcad139bcc..74d25a749018 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -82,6 +82,15 @@ bool is_module_trampoline(u32 *insns); int module_trampoline_target(struct module *mod, u32 *trampoline, unsigned long *target); +#ifdef CONFIG_DYNAMIC_FTRACE +int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs); +#else +static inline int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) +{ + return 0; +} +#endif + struct exception_table_entry; void sort_ex_table(struct exception_table_entry *start, struct exception_table_entry *finish); diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 9547381b631a..d1f1b35bf0c7 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -47,6 +47,11 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { const Elf_Shdr *sect; + int rc; + + rc = module_finalize_ftrace(me, sechdrs); + if (rc) + return rc; /* Apply feature fixups */ sect = find_section(hdr, sechdrs, "__ftr_fixup"); diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 2c01665eb410..5a7a78f12562 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -181,7 +181,7 @@ static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) /* Set up a trampoline in the PLT to bounce us to the distant function */ static uint32_t do_plt_call(void *location, Elf32_Addr val, - Elf32_Shdr *sechdrs, + const Elf32_Shdr *sechdrs, struct module *mod) { struct ppc_plt_entry *entry; @@ -294,11 +294,19 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, return -ENOEXEC; } } + + return 0; +} + #ifdef CONFIG_DYNAMIC_FTRACE - module->arch.tramp = - do_plt_call(module->core_layout.base, - (unsigned long)ftrace_caller, - sechdrs, module); -#endif +int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs) +{ + module->arch.tramp = do_plt_call(module->core_layout.base, + (unsigned long)ftrace_caller, + sechdrs, module); + if (!module->arch.tramp) + return -ENOENT; + return 0; } +#endif diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index ac64ffdb52c8..599c753c7960 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -413,7 +413,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, /* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this gives the value maximum span in an instruction which uses a signed offset) */ -static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me) +static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me) { return sechdrs[me->arch.toc_section].sh_addr + 0x8000; } @@ -426,7 +426,7 @@ static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me) #define PPC_HA(v) PPC_HI ((v) + 0x8000) /* Patch stub to reference function and correct r2 value. */ -static inline int create_stub(Elf64_Shdr *sechdrs, +static inline int create_stub(const Elf64_Shdr *sechdrs, struct ppc64_stub_entry *entry, unsigned long addr, struct module *me) @@ -452,7 +452,7 @@ static inline int create_stub(Elf64_Shdr *sechdrs, /* Create stub to jump to function described in this OPD/ptr: we need the stub to set up the TOC ptr (r2) for the function. */ -static unsigned long stub_for_addr(Elf64_Shdr *sechdrs, +static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, unsigned long addr, struct module *me) { @@ -693,12 +693,18 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } } + return 0; +} + #ifdef CONFIG_DYNAMIC_FTRACE - me->arch.toc = my_r2(sechdrs, me); - me->arch.tramp = stub_for_addr(sechdrs, - (unsigned long)ftrace_caller, - me); -#endif +int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) +{ + mod->arch.toc = my_r2(sechdrs, mod); + mod->arch.tramp = stub_for_addr(sechdrs, (unsigned long)ftrace_caller, mod); + + if (!mod->arch.tramp) + return -ENOENT; return 0; } +#endif -- cgit v1.2.3 From f17c4e01e906c568fb03a2e6c2b99e4ee4587fbf Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 3 Mar 2016 15:26:55 +1100 Subject: powerpc/module: Mark module stubs with a magic value When a module is loaded, calls out to the kernel go via a stub which is generated at runtime. One of these stubs is used to call _mcount(), which is the default target of tracing calls generated by the compiler with -pg. If dynamic ftrace is enabled (which it typically is), another stub is used to call ftrace_caller(), which is the target of tracing calls when ftrace is actually active. ftrace then wants to disable the calls to _mcount() at module startup, and enable/disable the calls to ftrace_caller() when enabling/disabling tracing - all of these it does by patching the code. As part of that code patching, the ftrace code wants to confirm that the branch it is about to modify, is in fact a call to a module stub which calls _mcount() or ftrace_caller(). Currently it does that by inspecting the instructions and confirming they are what it expects. Although that works, the code to do it is pretty intricate because it requires lots of knowledge about the exact format of the stub. We can make that process easier by marking the generated stubs with a magic value, and then looking for that magic value. Altough this is not as rigorous as the current method, I believe it is sufficient in practice. Reviewed-by: Balbir Singh Reviewed-by: Torsten Duwe Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/module.h | 3 +- arch/powerpc/kernel/ftrace.c | 14 ++----- arch/powerpc/kernel/module_64.c | 78 +++++++++++++-------------------------- 3 files changed, 31 insertions(+), 64 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 74d25a749018..5b6b5a427b54 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -78,8 +78,7 @@ struct mod_arch_specific { # endif /* MODULE */ #endif -bool is_module_trampoline(u32 *insns); -int module_trampoline_target(struct module *mod, u32 *trampoline, +int module_trampoline_target(struct module *mod, unsigned long trampoline, unsigned long *target); #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 44d4d8eb3c85..4505cbfd0e13 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -106,10 +106,9 @@ static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned int op; - unsigned long entry, ptr; + unsigned long entry, ptr, tramp; unsigned long ip = rec->ip; - void *tramp; + unsigned int op; /* read where this goes */ if (probe_kernel_read(&op, (void *)ip, sizeof(int))) @@ -122,14 +121,9 @@ __ftrace_make_nop(struct module *mod, } /* lets find where the pointer goes */ - tramp = (void *)find_bl_target(ip, op); - - pr_devel("ip:%lx jumps to %p", ip, tramp); + tramp = find_bl_target(ip, op); - if (!is_module_trampoline(tramp)) { - pr_err("Not a trampoline\n"); - return -EINVAL; - } + pr_devel("ip:%lx jumps to %lx", ip, tramp); if (module_trampoline_target(mod, tramp, &ptr)) { pr_err("Failed to get trampoline target\n"); diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 599c753c7960..9629966e614b 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -96,6 +96,8 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym) } #endif +#define STUB_MAGIC 0x73747562 /* stub */ + /* Like PPC32, we need little trampolines to do > 24-bit jumps (into the kernel itself). But on PPC64, these need to be used for every jump, actually, to reset r2 (TOC+0x8000). */ @@ -105,7 +107,8 @@ struct ppc64_stub_entry * need 6 instructions on ABIv2 but we always allocate 7 so * so we don't have to modify the trampoline load instruction. */ u32 jump[7]; - u32 unused; + /* Used by ftrace to identify stubs */ + u32 magic; /* Data for the above code */ func_desc_t funcdata; }; @@ -139,70 +142,39 @@ static u32 ppc64_stub_insns[] = { }; #ifdef CONFIG_DYNAMIC_FTRACE - -static u32 ppc64_stub_mask[] = { - 0xffff0000, - 0xffff0000, - 0xffffffff, - 0xffffffff, -#if !defined(_CALL_ELF) || _CALL_ELF != 2 - 0xffffffff, -#endif - 0xffffffff, - 0xffffffff -}; - -bool is_module_trampoline(u32 *p) +int module_trampoline_target(struct module *mod, unsigned long addr, + unsigned long *target) { - unsigned int i; - u32 insns[ARRAY_SIZE(ppc64_stub_insns)]; - - BUILD_BUG_ON(sizeof(ppc64_stub_insns) != sizeof(ppc64_stub_mask)); + struct ppc64_stub_entry *stub; + func_desc_t funcdata; + u32 magic; - if (probe_kernel_read(insns, p, sizeof(insns))) + if (!within_module_core(addr, mod)) { + pr_err("%s: stub %lx not in module %s\n", __func__, addr, mod->name); return -EFAULT; - - for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) { - u32 insna = insns[i]; - u32 insnb = ppc64_stub_insns[i]; - u32 mask = ppc64_stub_mask[i]; - - if ((insna & mask) != (insnb & mask)) - return false; } - return true; -} + stub = (struct ppc64_stub_entry *)addr; -int module_trampoline_target(struct module *mod, u32 *trampoline, - unsigned long *target) -{ - u32 buf[2]; - u16 upper, lower; - long offset; - void *toc_entry; - - if (probe_kernel_read(buf, trampoline, sizeof(buf))) + if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) { + pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name); return -EFAULT; + } - upper = buf[0] & 0xffff; - lower = buf[1] & 0xffff; - - /* perform the addis/addi, both signed */ - offset = ((short)upper << 16) + (short)lower; + if (magic != STUB_MAGIC) { + pr_err("%s: bad magic for stub %lx for %s\n", __func__, addr, mod->name); + return -EFAULT; + } - /* - * Now get the address this trampoline jumps to. This - * is always 32 bytes into our trampoline stub. - */ - toc_entry = (void *)mod->arch.toc + offset + 32; + if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) { + pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name); + return -EFAULT; + } - if (probe_kernel_read(target, toc_entry, sizeof(*target))) - return -EFAULT; + *target = stub_func_addr(funcdata); return 0; } - #endif /* Count how many different 24-bit relocations (different symbol, @@ -447,6 +419,8 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, entry->jump[0] |= PPC_HA(reladdr); entry->jump[1] |= PPC_LO(reladdr); entry->funcdata = func_desc(addr); + entry->magic = STUB_MAGIC; + return 1; } -- cgit v1.2.3 From 153086644fd1fb07fb3af84d9f11542a19b1e8b6 Mon Sep 17 00:00:00 2001 From: Torsten Duwe Date: Thu, 3 Mar 2016 15:26:59 +1100 Subject: powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI The gcc switch -mprofile-kernel defines a new ABI for calling _mcount() very early in the function with minimal overhead. Although mprofile-kernel has been available since GCC 3.4, there were bugs which were only fixed recently. Currently it is known to work in GCC 4.9, 5 and 6. Additionally there are two possible code sequences generated by the flag, the first uses mflr/std/bl and the second is optimised to omit the std. Currently only gcc 6 has the optimised sequence. This patch supports both sequences. Initial work started by Vojtech Pavlik, used with permission. Key changes: - rework _mcount() to work for both the old and new ABIs. - implement new versions of ftrace_caller() and ftrace_graph_caller() which deal with the new ABI. - updates to __ftrace_make_nop() to recognise the new mcount calling sequence. - updates to __ftrace_make_call() to recognise the nop'ed sequence. - implement ftrace_modify_call(). - updates to the module loader to surpress the toc save in the module stub when calling mcount with the new ABI. Reviewed-by: Balbir Singh Signed-off-by: Torsten Duwe Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/code-patching.h | 21 ++++ arch/powerpc/include/asm/ftrace.h | 5 + arch/powerpc/kernel/entry_64.S | 166 ++++++++++++++++++++++++++++++- arch/powerpc/kernel/ftrace.c | 103 ++++++++++++++++--- arch/powerpc/kernel/module_64.c | 49 ++++++++- 5 files changed, 324 insertions(+), 20 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 840a5509b3f1..994c60a857ce 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -99,4 +99,25 @@ static inline unsigned long ppc_global_function_entry(void *func) #endif } +#ifdef CONFIG_PPC64 +/* + * Some instruction encodings commonly used in dynamic ftracing + * and function live patching. + */ + +/* This must match the definition of STK_GOT in */ +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define R2_STACK_OFFSET 24 +#else +#define R2_STACK_OFFSET 40 +#endif + +#define PPC_INST_LD_TOC (PPC_INST_LD | ___PPC_RT(__REG_R2) | \ + ___PPC_RA(__REG_R1) | R2_STACK_OFFSET) + +/* usually preceded by a mflr r0 */ +#define PPC_INST_STD_LR (PPC_INST_STD | ___PPC_RS(__REG_R0) | \ + ___PPC_RA(__REG_R1) | PPC_LR_STKOFF) +#endif /* CONFIG_PPC64 */ + #endif /* _ASM_POWERPC_CODE_PATCHING_H */ diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index ef89b1465573..50ca7585abe2 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -46,6 +46,8 @@ extern void _mcount(void); #ifdef CONFIG_DYNAMIC_FTRACE +# define FTRACE_ADDR ((unsigned long)ftrace_caller) +# define FTRACE_REGS_ADDR FTRACE_ADDR static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* reloction of mcount call site is the same as the address */ @@ -58,6 +60,9 @@ struct dyn_arch_ftrace { #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* __ASSEMBLY__ */ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#endif #endif #if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 0d525ce3717f..ec7f8aada697 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1143,8 +1143,12 @@ _GLOBAL(enter_prom) #ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) - blr + mflr r12 + mtctr r12 + mtlr r0 + bctr +#ifndef CC_USING_MPROFILE_KERNEL _GLOBAL_TOC(ftrace_caller) /* Taken from output of objdump from lib64/glibc */ mflr r3 @@ -1166,6 +1170,115 @@ _GLOBAL(ftrace_graph_stub) ld r0, 128(r1) mtlr r0 addi r1, r1, 112 + +#else /* CC_USING_MPROFILE_KERNEL */ +/* + * + * ftrace_caller() is the function that replaces _mcount() when ftrace is + * active. + * + * We arrive here after a function A calls function B, and we are the trace + * function for B. When we enter r1 points to A's stack frame, B has not yet + * had a chance to allocate one yet. + * + * Additionally r2 may point either to the TOC for A, or B, depending on + * whether B did a TOC setup sequence before calling us. + * + * On entry the LR points back to the _mcount() call site, and r0 holds the + * saved LR as it was on entry to B, ie. the original return address at the + * call site in A. + * + * Our job is to save the register state into a struct pt_regs (on the stack) + * and then arrange for the ftrace function to be called. + */ +_GLOBAL(ftrace_caller) + /* Save the original return address in A's stack frame */ + std r0,LRSAVE(r1) + + /* Create our stack frame + pt_regs */ + stdu r1,-SWITCH_FRAME_SIZE(r1) + + /* Save all gprs to pt_regs */ + SAVE_8GPRS(0,r1) + SAVE_8GPRS(8,r1) + SAVE_8GPRS(16,r1) + SAVE_8GPRS(24,r1) + + /* Load special regs for save below */ + mfmsr r8 + mfctr r9 + mfxer r10 + mfcr r11 + + /* Get the _mcount() call site out of LR */ + mflr r7 + /* Save it as pt_regs->nip & pt_regs->link */ + std r7, _NIP(r1) + std r7, _LINK(r1) + + /* Save callee's TOC in the ABI compliant location */ + std r2, 24(r1) + ld r2,PACATOC(r13) /* get kernel TOC in r2 */ + + addis r3,r2,function_trace_op@toc@ha + addi r3,r3,function_trace_op@toc@l + ld r5,0(r3) + + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r7, MCOUNT_INSN_SIZE + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + + /* Save special regs */ + std r8, _MSR(r1) + std r9, _CTR(r1) + std r10, _XER(r1) + std r11, _CCR(r1) + + /* Load &pt_regs in r6 for call below */ + addi r6, r1 ,STACK_FRAME_OVERHEAD + + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + + /* Load ctr with the possibly modified NIP */ + ld r3, _NIP(r1) + mtctr r3 + + /* Restore gprs */ + REST_8GPRS(0,r1) + REST_8GPRS(8,r1) + REST_8GPRS(16,r1) + REST_8GPRS(24,r1) + + /* Restore callee's TOC */ + ld r2, 24(r1) + + /* Pop our stack frame */ + addi r1, r1, SWITCH_FRAME_SIZE + + /* Restore original LR for return to B */ + ld r0, LRSAVE(r1) + mtlr r0 + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + stdu r1, -112(r1) +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) + addi r1, r1, 112 +#endif + + ld r0,LRSAVE(r1) /* restore callee's lr at _mcount site */ + mtlr r0 + bctr /* jump after _mcount site */ +#endif /* CC_USING_MPROFILE_KERNEL */ + _GLOBAL(ftrace_stub) blr #else @@ -1198,6 +1311,7 @@ _GLOBAL(ftrace_stub) #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER +#ifndef CC_USING_MPROFILE_KERNEL _GLOBAL(ftrace_graph_caller) /* load r4 with local address */ ld r4, 128(r1) @@ -1222,6 +1336,56 @@ _GLOBAL(ftrace_graph_caller) addi r1, r1, 112 blr +#else /* CC_USING_MPROFILE_KERNEL */ +_GLOBAL(ftrace_graph_caller) + /* with -mprofile-kernel, parameter regs are still alive at _mcount */ + std r10, 104(r1) + std r9, 96(r1) + std r8, 88(r1) + std r7, 80(r1) + std r6, 72(r1) + std r5, 64(r1) + std r4, 56(r1) + std r3, 48(r1) + + /* Save callee's TOC in the ABI compliant location */ + std r2, 24(r1) + ld r2, PACATOC(r13) /* get kernel TOC in r2 */ + + mfctr r4 /* ftrace_caller has moved local addr here */ + std r4, 40(r1) + mflr r3 /* ftrace_caller has restored LR from stack */ + subi r4, r4, MCOUNT_INSN_SIZE + + bl prepare_ftrace_return + nop + + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR to this. + */ + mtlr r3 + + ld r0, 40(r1) + mtctr r0 + ld r10, 104(r1) + ld r9, 96(r1) + ld r8, 88(r1) + ld r7, 80(r1) + ld r6, 72(r1) + ld r5, 64(r1) + ld r4, 56(r1) + ld r3, 48(r1) + + /* Restore callee's TOC */ + ld r2, 24(r1) + + addi r1, r1, 112 + mflr r0 + std r0, LRSAVE(r1) + bctr +#endif /* CC_USING_MPROFILE_KERNEL */ + _GLOBAL(return_to_handler) /* need to save return values */ std r4, -32(r1) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 62899fbae703..9dac18dabd03 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -61,8 +61,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) return -EFAULT; /* Make sure it is what we expect it to be */ - if (replaced != old) + if (replaced != old) { + pr_err("%p: replaced (%#x) != old (%#x)", + (void *)ip, replaced, old); return -EINVAL; + } /* replace the text with the new text */ if (patch_instruction((unsigned int *)ip, new)) @@ -108,11 +111,13 @@ __ftrace_make_nop(struct module *mod, { unsigned long entry, ptr, tramp; unsigned long ip = rec->ip; - unsigned int op; + unsigned int op, pop; /* read where this goes */ - if (probe_kernel_read(&op, (void *)ip, sizeof(int))) + if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { + pr_err("Fetching opcode failed.\n"); return -EFAULT; + } /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { @@ -152,10 +157,42 @@ __ftrace_make_nop(struct module *mod, * * Use a b +8 to jump over the load. */ - op = 0x48000008; /* b +8 */ - if (patch_instruction((unsigned int *)ip, op)) + pop = PPC_INST_BRANCH | 8; /* b +8 */ + + /* + * Check what is in the next instruction. We can see ld r2,40(r1), but + * on first pass after boot we will see mflr r0. + */ + if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) { + pr_err("Fetching op failed.\n"); + return -EFAULT; + } + + if (op != PPC_INST_LD_TOC) { + unsigned int inst; + + if (probe_kernel_read(&inst, (void *)(ip - 4), 4)) { + pr_err("Fetching instruction at %lx failed.\n", ip - 4); + return -EFAULT; + } + + /* We expect either a mlfr r0, or a std r0, LRSAVE(r1) */ + if (inst != PPC_INST_MFLR && inst != PPC_INST_STD_LR) { + pr_err("Unexpected instructions around bl _mcount\n" + "when enabling dynamic ftrace!\t" + "(%08x,bl,%08x)\n", inst, op); + return -EINVAL; + } + + /* When using -mkernel_profile there is no load to jump over */ + pop = PPC_INST_NOP; + } + + if (patch_instruction((unsigned int *)ip, pop)) { + pr_err("Patching NOP failed.\n"); return -EPERM; + } return 0; } @@ -281,16 +318,15 @@ int ftrace_make_nop(struct module *mod, #ifdef CONFIG_MODULES #ifdef CONFIG_PPC64 +/* + * Examine the existing instructions for __ftrace_make_call. + * They should effectively be a NOP, and follow formal constraints, + * depending on the ABI. Return false if they don't. + */ +#ifndef CC_USING_MPROFILE_KERNEL static int -__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) { - unsigned int op[2]; - void *ip = (void *)rec->ip; - - /* read where this goes */ - if (probe_kernel_read(op, ip, sizeof(op))) - return -EFAULT; - /* * We expect to see: * @@ -300,8 +336,34 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * The load offset is different depending on the ABI. For simplicity * just mask it out when doing the compare. */ - if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) { - pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]); + if ((op0 != 0x48000008) || ((op1 & 0xffff0000) != 0xe8410000)) + return 0; + return 1; +} +#else +static int +expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) +{ + /* look for patched "NOP" on ppc64 with -mprofile-kernel */ + if (op0 != PPC_INST_NOP) + return 0; + return 1; +} +#endif + +static int +__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned int op[2]; + void *ip = (void *)rec->ip; + + /* read where this goes */ + if (probe_kernel_read(op, ip, sizeof(op))) + return -EFAULT; + + if (!expected_nop_sequence(ip, op[0], op[1])) { + pr_err("Unexpected call sequence at %p: %x %x\n", + ip, op[0], op[1]); return -EINVAL; } @@ -324,7 +386,16 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return 0; } -#else + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + return ftrace_make_call(rec, addr); +} +#endif + +#else /* !CONFIG_PPC64: */ static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 76c0963572f5..848b47499a27 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -42,7 +42,6 @@ --RR. */ #if defined(_CALL_ELF) && _CALL_ELF == 2 -#define R2_STACK_OFFSET 24 /* An address is simply the address of the function. */ typedef unsigned long func_desc_t; @@ -74,7 +73,6 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym) return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); } #else -#define R2_STACK_OFFSET 40 /* An address is address of the OPD entry, which contains address of fn. */ typedef struct ppc64_opd_entry func_desc_t; @@ -451,17 +449,60 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, return (unsigned long)&stubs[i]; } +#ifdef CC_USING_MPROFILE_KERNEL +static bool is_early_mcount_callsite(u32 *instruction) +{ + /* + * Check if this is one of the -mprofile-kernel sequences. + */ + if (instruction[-1] == PPC_INST_STD_LR && + instruction[-2] == PPC_INST_MFLR) + return true; + + if (instruction[-1] == PPC_INST_MFLR) + return true; + + return false; +} + +/* + * In case of _mcount calls, do not save the current callee's TOC (in r2) into + * the original caller's stack frame. If we did we would clobber the saved TOC + * value of the original caller. + */ +static void squash_toc_save_inst(const char *name, unsigned long addr) +{ + struct ppc64_stub_entry *stub = (struct ppc64_stub_entry *)addr; + + /* Only for calls to _mcount */ + if (strcmp("_mcount", name) != 0) + return; + + stub->jump[2] = PPC_INST_NOP; +} +#else +static void squash_toc_save_inst(const char *name, unsigned long addr) { } + +/* without -mprofile-kernel, mcount calls are never early */ +static bool is_early_mcount_callsite(u32 *instruction) +{ + return false; +} +#endif + /* We expect a noop next: if it is, replace it with instruction to restore r2. */ static int restore_r2(u32 *instruction, struct module *me) { if (*instruction != PPC_INST_NOP) { + if (is_early_mcount_callsite(instruction - 1)) + return 1; pr_err("%s: Expect noop after relocate, got %08x\n", me->name, *instruction); return 0; } /* ld r2,R2_STACK_OFFSET(r1) */ - *instruction = 0xe8410000 | R2_STACK_OFFSET; + *instruction = PPC_INST_LD_TOC; return 1; } @@ -586,6 +627,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return -ENOENT; if (!restore_r2((u32 *)location + 1, me)) return -ENOEXEC; + + squash_toc_save_inst(strtab + sym->st_name, value); } else value += local_entry_offset(sym); -- cgit v1.2.3 From 39218cd00ebf08b16edf015adc363de42d9ad612 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 4 Mar 2016 10:53:07 +1100 Subject: powerpc/eeh: EEH device for VF VFs and their corresponding pdn are created and released dynamically when their PF's SRIOV capability is enabled and disabled. This creates and releases EEH devices for VFs when creating and releasing their pdn instances, which means EEH devices and pdn instances have same life cycle. Also, VF's EEH device is identified by (struct eeh_dev::physfn). Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/pci_dn.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 867c39b45df6..574ed49a4a0d 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -141,6 +141,7 @@ struct eeh_dev { struct pci_controller *phb; /* Associated PHB */ struct pci_dn *pdn; /* Associated PCI device node */ struct pci_dev *pdev; /* Associated PCI device */ + struct pci_dev *physfn; /* Associated SRIOV PF */ struct pci_bus *bus; /* PCI bus for partial hotplug */ }; diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index b3b4df91b792..e23bdf786dab 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -179,6 +179,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) { #ifdef CONFIG_PCI_IOV struct pci_dn *parent, *pdn; + struct eeh_dev *edev; int i; /* Only support IOV for now */ @@ -204,6 +205,12 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) __func__, i); return NULL; } + + /* Create the EEH device for the VF */ + eeh_dev_init(pdn, pci_bus_to_host(pdev->bus)); + edev = pdn_to_eeh_dev(pdn); + BUG_ON(!edev); + edev->physfn = pdev; } #endif /* CONFIG_PCI_IOV */ @@ -215,6 +222,7 @@ void remove_dev_pci_data(struct pci_dev *pdev) #ifdef CONFIG_PCI_IOV struct pci_dn *parent; struct pci_dn *pdn, *tmp; + struct eeh_dev *edev; int i; /* @@ -256,6 +264,13 @@ void remove_dev_pci_data(struct pci_dev *pdev) pdn->devfn != pci_iov_virtfn_devfn(pdev, i)) continue; + /* Release EEH device for the VF */ + edev = pdn_to_eeh_dev(pdn); + if (edev) { + pdn->edev = NULL; + kfree(edev); + } + if (!list_empty(&pdn->list)) list_del(&pdn->list); -- cgit v1.2.3 From c29fa27d26e3189009ba42786f0c0dce14a90940 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 4 Mar 2016 10:53:08 +1100 Subject: powerpc/eeh: Create PE for VFs This creates PEs for VFs in the weak function pcibios_bus_add_device(). Those PEs for VFs are identified with newly introduced flag EEH_PE_VF so that we treat them differently during EEH recovery. Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh_pe.c | 10 ++++++++-- arch/powerpc/platforms/powernv/eeh-powernv.c | 16 ++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 574ed49a4a0d..0c551a27e37d 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -72,6 +72,7 @@ struct pci_dn; #define EEH_PE_PHB (1 << 1) /* PHB PE */ #define EEH_PE_DEVICE (1 << 2) /* Device PE */ #define EEH_PE_BUS (1 << 3) /* Bus PE */ +#define EEH_PE_VF (1 << 4) /* VF PE */ #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 9066afc2b9db..eea48d8baf49 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -299,7 +299,10 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) * EEH device already having associated PE, but * the direct parent EEH device doesn't have yet. */ - pdn = pdn ? pdn->parent : NULL; + if (edev->physfn) + pdn = pci_get_pdn(edev->physfn); + else + pdn = pdn ? pdn->parent : NULL; while (pdn) { /* We're poking out of PCI territory */ parent = pdn_to_eeh_dev(pdn); @@ -382,7 +385,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) } /* Create a new EEH PE */ - pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); + if (edev->physfn) + pe = eeh_pe_alloc(edev->phb, EEH_PE_VF); + else + pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); if (!pe) { pr_err("%s: out of memory!\n", __func__); return -ENOMEM; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 811917219bf1..830526e72cce 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1503,6 +1503,22 @@ static struct eeh_ops pnv_eeh_ops = { .restore_config = pnv_eeh_restore_config }; +void pcibios_bus_add_device(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + + if (!pdev->is_virtfn) + return; + + /* + * The following operations will fail if VF's sysfs files + * aren't created or its resources aren't finalized. + */ + eeh_add_device_early(pdn); + eeh_add_device_late(pdev); + eeh_sysfs_add_device(pdev); +} + /** * eeh_powernv_init - Register platform dependent EEH operations * -- cgit v1.2.3 From 9312bc5bab5907937db20c9f8c094d0c02dd78db Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 4 Mar 2016 10:53:09 +1100 Subject: powerpc/powernv: Support EEH reset for VF PE PEs for VFs don't have primary bus. So they have to have their own reset backend, which is used during EEH recovery. The patch implements the reset backend for VF's PE by issuing FLR or AF FLR to the VFs, which are contained in the PE. Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh.c | 9 +- arch/powerpc/platforms/powernv/eeh-powernv.c | 127 ++++++++++++++++++++++++++- 3 files changed, 133 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 0c551a27e37d..b5b5f45dfb0a 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -137,6 +137,7 @@ struct eeh_dev { int pcix_cap; /* Saved PCIx capability */ int pcie_cap; /* Saved PCIe capability */ int aer_cap; /* Saved AER capability */ + int af_cap; /* Saved AF capability */ struct eeh_pe *pe; /* Associated PE */ struct list_head list; /* Form link list in the PE */ struct pci_controller *phb; /* Associated PHB */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 8c6005cf1583..0d724625662f 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -761,7 +761,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat case pcie_deassert_reset: eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); eeh_unfreeze_pe(pe, false); - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); + if (!(pe->type & EEH_PE_VF)) + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); eeh_pe_state_clear(pe, EEH_PE_ISOLATED); break; @@ -769,14 +770,16 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); - eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); + if (!(pe->type & EEH_PE_VF)) + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); - eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); + if (!(pe->type & EEH_PE_VF)) + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); break; default: diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 830526e72cce..e26256b462c8 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -371,6 +371,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) edev->mode &= 0xFFFFFF00; edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); + edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF); edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; @@ -879,6 +880,120 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev) } } +static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type, + int pos, u16 mask) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + int i, status = 0; + + /* Wait for Transaction Pending bit to be cleared */ + for (i = 0; i < 4; i++) { + eeh_ops->read_config(pdn, pos, 2, &status); + if (!(status & mask)) + return; + + msleep((1 << i) * 100); + } + + pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n", + __func__, type, + edev->phb->global_number, pdn->busno, + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); +} + +static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 reg = 0; + + if (WARN_ON(!edev->pcie_cap)) + return -ENOTTY; + + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); + if (!(reg & PCI_EXP_DEVCAP_FLR)) + return -ENOTTY; + + switch (option) { + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + pnv_eeh_wait_for_pending(pdn, "", + edev->pcie_cap + PCI_EXP_DEVSTA, + PCI_EXP_DEVSTA_TRPND); + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, ®); + reg |= PCI_EXP_DEVCTL_BCR_FLR; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, reg); + msleep(EEH_PE_RST_HOLD_TIME); + break; + case EEH_RESET_DEACTIVATE: + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, ®); + reg &= ~PCI_EXP_DEVCTL_BCR_FLR; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, reg); + msleep(EEH_PE_RST_SETTLE_TIME); + break; + } + + return 0; +} + +static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 cap = 0; + + if (WARN_ON(!edev->af_cap)) + return -ENOTTY; + + eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap); + if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) + return -ENOTTY; + + switch (option) { + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + /* + * Wait for Transaction Pending bit to clear. A word-aligned + * test is used, so we use the conrol offset rather than status + * and shift the test bit to match. + */ + pnv_eeh_wait_for_pending(pdn, "AF", + edev->af_cap + PCI_AF_CTRL, + PCI_AF_STATUS_TP << 8); + eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, + 1, PCI_AF_CTRL_FLR); + msleep(EEH_PE_RST_HOLD_TIME); + break; + case EEH_RESET_DEACTIVATE: + eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0); + msleep(EEH_PE_RST_SETTLE_TIME); + break; + } + + return 0; +} + +static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option) +{ + struct eeh_dev *edev; + struct pci_dn *pdn; + int ret; + + /* The VF PE should have only one child device */ + edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list); + pdn = eeh_dev_to_pdn(edev); + if (!pdn) + return -ENXIO; + + ret = pnv_eeh_do_flr(pdn, option); + if (!ret) + return ret; + + return pnv_eeh_do_af_flr(pdn, option); +} + /** * pnv_eeh_reset - Reset the specified PE * @pe: EEH PE @@ -940,7 +1055,9 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) } bus = eeh_pe_bus_get(pe); - if (pci_is_root_bus(bus) || + if (pe->type & EEH_PE_VF) + ret = pnv_eeh_reset_vf_pe(pe, option); + else if (pci_is_root_bus(bus) || pci_is_root_bus(bus->parent)) ret = pnv_eeh_root_reset(hose, option); else @@ -1079,6 +1196,14 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) if (!edev || !edev->pe) return false; + /* + * We will issue FLR or AF FLR to all VFs, which are contained + * in VF PE. It relies on the EEH PCI config accessors. So we + * can't block them during the window. + */ + if (edev->physfn && (edev->pe->state & EEH_PE_RESET)) + return false; + if (edev->pe->state & EEH_PE_CFG_BLOCKED) return true; -- cgit v1.2.3 From 0dc2830e0a48d520c7db7cc55dbbd0edefac02f5 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 4 Mar 2016 10:53:10 +1100 Subject: powerpc/powernv: Support PCI config restore for VFs After PE reset, OPAL API opal_pci_reinit() is called on all devices contained in the PE to reinitialize them. While skiboot is not aware of VFs, we have to implement the function in kernel to reinitialize VFs after reset on PE for VFs. In this patch, two functions pnv_pci_fixup_vf_mps() and pnv_eeh_restore_vf_config() both manipulate the MPS of the VF, since for a VF it has three cases. 1. Normal creation for a VF In this case, pnv_pci_fixup_vf_mps() is called to make the MPS a proper value compared with its parent. 2. EEH recovery without VF removed In this case, MPS is stored in pci_dn and pnv_eeh_restore_vf_config() is called to restore it and reinitialize other part. 3. EEH recovery with VF removed In this case, VF will be removed then re-created. Both functions are called. First pnv_pci_fixup_vf_mps() is called to store the proper MPS to pci_dn and then pnv_eeh_restore_vf_config() is called to do proper thing. This introduces two functions: pnv_pci_fixup_vf_mps() to fixup the VF's MPS to make sure it is equal to parent's and store this value in pci_dn for future use. pnv_eeh_restore_vf_config() to re-initialize on VF by restoring MPS, disabling completion timeout, enabling SERR, etc. Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 1 + arch/powerpc/platforms/powernv/eeh-powernv.c | 95 +++++++++++++++++++++++++++- 2 files changed, 93 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index b0b43f5fbc5f..f4d17587fa31 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -220,6 +220,7 @@ struct pci_dn { #define IODA_INVALID_M64 (-1) int (*m64_map)[PCI_SRIOV_NUM_BARS]; #endif /* CONFIG_PCI_IOV */ + int mps; /* Maximum Payload Size */ #endif struct list_head child_list; struct list_head list; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index e26256b462c8..950b3e539057 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1588,6 +1588,65 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) return ret; } +static int pnv_eeh_restore_vf_config(struct pci_dn *pdn) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 devctl, cmd, cap2, aer_capctl; + int old_mps; + + if (edev->pcie_cap) { + /* Restore MPS */ + old_mps = (ffs(pdn->mps) - 8) << 5; + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, &devctl); + devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; + devctl |= old_mps; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, devctl); + + /* Disable Completion Timeout */ + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, + 4, &cap2); + if (cap2 & 0x10) { + eeh_ops->read_config(pdn, + edev->pcie_cap + PCI_EXP_DEVCTL2, + 4, &cap2); + cap2 |= 0x10; + eeh_ops->write_config(pdn, + edev->pcie_cap + PCI_EXP_DEVCTL2, + 4, cap2); + } + } + + /* Enable SERR and parity checking */ + eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); + cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); + eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); + + /* Enable report various errors */ + if (edev->pcie_cap) { + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, &devctl); + devctl &= ~PCI_EXP_DEVCTL_CERE; + devctl |= (PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE); + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, devctl); + } + + /* Enable ECRC generation and check */ + if (edev->pcie_cap && edev->aer_cap) { + eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, + 4, &aer_capctl); + aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); + eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, + 4, aer_capctl); + } + + return 0; +} + static int pnv_eeh_restore_config(struct pci_dn *pdn) { struct eeh_dev *edev = pdn_to_eeh_dev(pdn); @@ -1597,9 +1656,21 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn) if (!edev) return -EEXIST; - phb = edev->phb->private_data; - ret = opal_pci_reinit(phb->opal_id, - OPAL_REINIT_PCI_DEV, edev->config_addr); + /* + * We have to restore the PCI config space after reset since the + * firmware can't see SRIOV VFs. + * + * FIXME: The MPS, error routing rules, timeout setting are worthy + * to be exported by firmware in extendible way. + */ + if (edev->physfn) { + ret = pnv_eeh_restore_vf_config(pdn); + } else { + phb = edev->phb->private_data; + ret = opal_pci_reinit(phb->opal_id, + OPAL_REINIT_PCI_DEV, edev->config_addr); + } + if (ret) { pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", __func__, edev->config_addr, ret); @@ -1644,6 +1715,24 @@ void pcibios_bus_add_device(struct pci_dev *pdev) eeh_sysfs_add_device(pdev); } +#ifdef CONFIG_PCI_IOV +static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + int parent_mps; + + if (!pdev->is_virtfn) + return; + + /* Synchronize MPS for VF and PF */ + parent_mps = pcie_get_mps(pdev->physfn); + if ((128 << pdev->pcie_mpss) >= parent_mps) + pcie_set_mps(pdev, parent_mps); + pdn->mps = pcie_get_mps(pdev); +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_pci_fixup_vf_mps); +#endif /* CONFIG_PCI_IOV */ + /** * eeh_powernv_init - Register platform dependent EEH operations * -- cgit v1.2.3 From 67086e32b56481531ab1292b284e074b1a8d764c Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 4 Mar 2016 10:53:11 +1100 Subject: powerpc/eeh: powerpc/eeh: Support error recovery for VF PE PFs are enumerated on PCI bus, while VFs are created by PF's driver. In EEH recovery, it has two cases: 1. Device and driver is EEH aware, error handlers are called. 2. Device and driver is not EEH aware, un-plug the device and plug it again by enumerating it. The special thing happens on the second case. For a PF, we could use the original pci core to enumerate the bus, while for VF we need to record the VFs which aer un-plugged then plug it again. Also The patch caches the VF index in pci_dn, which can be used to calculate VF's bus, device and function number. Those information helps to locate the VF's PCI device instance when doing hotplug during EEH recovery if necessary. Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 2 + arch/powerpc/include/asm/pci-bridge.h | 1 + arch/powerpc/kernel/eeh.c | 8 ++ arch/powerpc/kernel/eeh_dev.c | 1 + arch/powerpc/kernel/eeh_driver.c | 137 +++++++++++++++++++++++++++------- arch/powerpc/kernel/pci_dn.c | 4 +- 6 files changed, 127 insertions(+), 26 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index b5b5f45dfb0a..fb9f376ae27b 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -140,9 +140,11 @@ struct eeh_dev { int af_cap; /* Saved AF capability */ struct eeh_pe *pe; /* Associated PE */ struct list_head list; /* Form link list in the PE */ + struct list_head rmv_list; /* Record the removed edevs */ struct pci_controller *phb; /* Associated PHB */ struct pci_dn *pdn; /* Associated PCI device node */ struct pci_dev *pdev; /* Associated PCI device */ + bool in_error; /* Error flag for edev */ struct pci_dev *physfn; /* Associated SRIOV PF */ struct pci_bus *bus; /* PCI bus for partial hotplug */ }; diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index f4d17587fa31..9f165e8a77bf 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -212,6 +212,7 @@ struct pci_dn { #define IODA_INVALID_PE (-1) #ifdef CONFIG_PPC_POWERNV int pe_number; + int vf_index; /* VF index in the PF */ #ifdef CONFIG_PCI_IOV u16 vfs_expanded; /* number of VFs IOV BAR expanded */ u16 num_vfs; /* number of VFs enabled*/ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 0d724625662f..b7338a9426df 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1246,6 +1246,14 @@ void eeh_remove_device(struct pci_dev *dev) * from the parent PE during the BAR resotre. */ edev->pdev = NULL; + + /* + * The flag "in_error" is used to trace EEH devices for VFs + * in error state or not. It's set in eeh_report_error(). If + * it's not set, eeh_report_{reset,resume}() won't be called + * for the VF EEH device. + */ + edev->in_error = false; dev->dev.archdata.edev = NULL; if (!(edev->pe->state & EEH_PE_KEEP)) eeh_rmv_from_parent_pe(edev); diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index aabba94ff9cb..7815095fe3d8 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -67,6 +67,7 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data) edev->pdn = pdn; edev->phb = phb; INIT_LIST_HEAD(&edev->list); + INIT_LIST_HEAD(&edev->rmv_list); return NULL; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 650cfb31ea3d..c0fe7a6be2c9 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -34,6 +34,11 @@ #include #include +struct eeh_rmv_data { + struct list_head edev_list; + int removed; +}; + /** * eeh_pcid_name - Retrieve name of PCI device driver * @pdev: PCI device @@ -211,6 +216,7 @@ static void *eeh_report_error(void *data, void *userdata) if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; if (*res == PCI_ERS_RESULT_NONE) *res = rc; + edev->in_error = true; eeh_pcid_put(dev); return NULL; } @@ -282,7 +288,8 @@ static void *eeh_report_reset(void *data, void *userdata) if (!driver->err_handler || !driver->err_handler->slot_reset || - (edev->mode & EEH_DEV_NO_HANDLER)) { + (edev->mode & EEH_DEV_NO_HANDLER) || + (!edev->in_error)) { eeh_pcid_put(dev); return NULL; } @@ -326,6 +333,7 @@ static void *eeh_report_resume(void *data, void *userdata) { struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + bool was_in_error; struct pci_driver *driver; if (!dev || eeh_dev_removed(edev)) @@ -335,11 +343,13 @@ static void *eeh_report_resume(void *data, void *userdata) driver = eeh_pcid_get(dev); if (!driver) return NULL; + was_in_error = edev->in_error; + edev->in_error = false; eeh_enable_irq(dev); if (!driver->err_handler || !driver->err_handler->resume || - (edev->mode & EEH_DEV_NO_HANDLER)) { + (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) { edev->mode &= ~EEH_DEV_NO_HANDLER; eeh_pcid_put(dev); return NULL; @@ -386,12 +396,40 @@ static void *eeh_report_failure(void *data, void *userdata) return NULL; } +static void *eeh_add_virt_device(void *data, void *userdata) +{ + struct pci_driver *driver; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + if (!(edev->physfn)) { + pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n", + __func__, edev->phb->global_number, pdn->busno, + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); + return NULL; + } + + driver = eeh_pcid_get(dev); + if (driver) { + eeh_pcid_put(dev); + if (driver->err_handler) + return NULL; + } + +#ifdef CONFIG_PPC_POWERNV + pci_iov_add_virtfn(edev->physfn, pdn->vf_index, 0); +#endif + return NULL; +} + static void *eeh_rmv_device(void *data, void *userdata) { struct pci_driver *driver; struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - int *removed = (int *)userdata; + struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata; + int *removed = rmv_data ? &rmv_data->removed : NULL; /* * Actually, we should remove the PCI bridges as well. @@ -416,7 +454,8 @@ static void *eeh_rmv_device(void *data, void *userdata) driver = eeh_pcid_get(dev); if (driver) { eeh_pcid_put(dev); - if (driver->err_handler && + if (removed && + driver->err_handler && driver->err_handler->error_detected && driver->err_handler->slot_reset) return NULL; @@ -427,11 +466,29 @@ static void *eeh_rmv_device(void *data, void *userdata) pci_name(dev)); edev->bus = dev->bus; edev->mode |= EEH_DEV_DISCONNECTED; - (*removed)++; + if (removed) + (*removed)++; - pci_lock_rescan_remove(); - pci_stop_and_remove_bus_device(dev); - pci_unlock_rescan_remove(); + if (edev->physfn) { +#ifdef CONFIG_PPC_POWERNV + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + pci_iov_remove_virtfn(edev->physfn, pdn->vf_index, 0); + edev->pdev = NULL; + + /* + * We have to set the VF PE number to invalid one, which is + * required to plug the VF successfully. + */ + pdn->pe_number = IODA_INVALID_PE; +#endif + if (rmv_data) + list_add(&edev->rmv_list, &rmv_data->edev_list); + } else { + pci_lock_rescan_remove(); + pci_stop_and_remove_bus_device(dev); + pci_unlock_rescan_remove(); + } return NULL; } @@ -545,11 +602,13 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) * During the reset, udev might be invoked because those affected * PCI devices will be removed and then added. */ -static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) +static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, + struct eeh_rmv_data *rmv_data) { struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); struct timeval tstamp; - int cnt, rc, removed = 0; + int cnt, rc; + struct eeh_dev *edev; /* pcibios will clear the counter; save the value */ cnt = pe->freeze_count; @@ -563,12 +622,16 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) */ eeh_pe_state_mark(pe, EEH_PE_KEEP); if (bus) { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); - pci_lock_rescan_remove(); - pcibios_remove_pci_devices(bus); - pci_unlock_rescan_remove(); + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + pci_lock_rescan_remove(); + pcibios_remove_pci_devices(bus); + pci_unlock_rescan_remove(); + } } else if (frozen_bus) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); + eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data); } /* @@ -610,14 +673,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * PE. We should disconnect it so the binding can be * rebuilt when adding PCI devices. */ + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - pcibios_add_pci_devices(bus); - } else if (frozen_bus && removed) { + if (pe->type & EEH_PE_VF) + eeh_add_virt_device(edev, NULL); + else + pcibios_add_pci_devices(bus); + } else if (frozen_bus && rmv_data->removed) { pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); ssleep(5); + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - pcibios_add_pci_devices(frozen_bus); + if (pe->type & EEH_PE_VF) + eeh_add_virt_device(edev, NULL); + else + pcibios_add_pci_devices(frozen_bus); } eeh_pe_state_clear(pe, EEH_PE_KEEP); @@ -636,8 +707,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) static void eeh_handle_normal_event(struct eeh_pe *pe) { struct pci_bus *frozen_bus; + struct eeh_dev *edev, *tmp; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; + struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; frozen_bus = eeh_pe_bus_get(pe); if (!frozen_bus) { @@ -692,7 +765,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) */ if (result == PCI_ERS_RESULT_NONE) { pr_info("EEH: Reset with hotplug activity\n"); - rc = eeh_reset_device(pe, frozen_bus); + rc = eeh_reset_device(pe, frozen_bus, NULL); if (rc) { pr_warn("%s: Unable to reset, err=%d\n", __func__, rc); @@ -744,7 +817,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { pr_info("EEH: Reset without hotplug activity\n"); - rc = eeh_reset_device(pe, NULL); + rc = eeh_reset_device(pe, NULL, &rmv_data); if (rc) { pr_warn("%s: Cannot reset, err=%d\n", __func__, rc); @@ -764,6 +837,15 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) goto hard_fail; } + /* + * For those hot removed VFs, we should add back them after PF get + * recovered properly. + */ + list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) { + eeh_add_virt_device(edev, NULL); + list_del(&edev->rmv_list); + } + /* Tell all device drivers that they can resume operations */ pr_info("EEH: Notify device driver to resume\n"); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); @@ -803,12 +885,17 @@ perm_error: * the their PCI config any more. */ if (frozen_bus) { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - pci_lock_rescan_remove(); - pcibios_remove_pci_devices(frozen_bus); - pci_unlock_rescan_remove(); + pci_lock_rescan_remove(); + pcibios_remove_pci_devices(frozen_bus); + pci_unlock_rescan_remove(); + } } } diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index e23bdf786dab..38102cb9baa9 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -139,6 +139,7 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev) #ifdef CONFIG_PCI_IOV static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, struct pci_dev *pdev, + int vf_index, int busno, int devfn) { struct pci_dn *pdn; @@ -158,6 +159,7 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, pdn->busno = busno; pdn->devfn = devfn; #ifdef CONFIG_PPC_POWERNV + pdn->vf_index = vf_index; pdn->pe_number = IODA_INVALID_PE; #endif INIT_LIST_HEAD(&pdn->child_list); @@ -197,7 +199,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) return NULL; for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { - pdn = add_one_dev_pci_data(parent, NULL, + pdn = add_one_dev_pci_data(parent, NULL, i, pci_iov_virtfn_bus(pdev, i), pci_iov_virtfn_devfn(pdev, i)); if (!pdn) { -- cgit v1.2.3 From c0efa9aee8504ce16250320c1e5464bdb7e24400 Mon Sep 17 00:00:00 2001 From: Christophe Lombard Date: Fri, 4 Mar 2016 12:26:33 +0100 Subject: powerpc: New possible return value from hcall The hcalls introduced for cxl use a possible new value: H_STATE (invalid state). Co-authored-by: Frederic Barrat Signed-off-by: Frederic Barrat Signed-off-by: Christophe Lombard Reviewed-by: Manoj Kumar Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hvcall.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index e3b54dd4f730..0bc9c284aa10 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -94,6 +94,7 @@ #define H_SG_LIST -72 #define H_OP_MODE -73 #define H_COP_HW -74 +#define H_STATE -75 #define H_UNSUPPORTED_FLAG_START -256 #define H_UNSUPPORTED_FLAG_END -511 #define H_MULTI_THREADS_ACTIVE -9005 -- cgit v1.2.3 From 7e393220b6e1ecfa5520d1b2ca31150b7588f458 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 7 Mar 2016 18:44:37 +0100 Subject: powerpc: optimise csum_partial() call when len is constant csum_partial is often called for small fixed length packets for which it is suboptimal to use the generic csum_partial() function. For instance, in my configuration, I got: * One place calling it with constant len 4 * Seven places calling it with constant len 8 * Three places calling it with constant len 14 * One place calling it with constant len 20 * One place calling it with constant len 24 * One place calling it with constant len 32 This patch renames csum_partial() to __csum_partial() and implements csum_partial() as a wrapper inline function which * uses csum_add() for small 16bits multiple constant length * uses ip_fast_csum() for other 32bits multiple constant * uses __csum_partial() in all other cases Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/checksum.h | 79 ++++++++++++++++++++++++++----------- arch/powerpc/lib/checksum_32.S | 4 +- arch/powerpc/lib/checksum_64.S | 4 +- arch/powerpc/lib/ppc_ksyms.c | 2 +- 4 files changed, 61 insertions(+), 28 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 74cd8d82628a..ee655ed1ff1b 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -12,20 +12,6 @@ #ifdef CONFIG_GENERIC_CSUM #include #else -/* - * computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit) - * - * returns a 32-bit number suitable for feeding into itself - * or csum_tcpudp_magic - * - * this function must be called with even lengths, except - * for the last fragment, which may be odd - * - * it's best to have buff aligned on a 32-bit boundary - */ -extern __wsum csum_partial(const void *buff, int len, __wsum sum); - /* * Computes the checksum of a memory block at src, length len, * and adds in "sum" (32-bit), while copying the block to dst. @@ -67,15 +53,6 @@ static inline __sum16 csum_fold(__wsum sum) return (__force __sum16)(~((__force u32)sum + tmp) >> 16); } -/* - * this routine is used for miscellaneous IP-like checksums, mainly - * in icmp.c - */ -static inline __sum16 ip_compute_csum(const void *buff, int len) -{ - return csum_fold(csum_partial(buff, len, 0)); -} - static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, unsigned short proto, @@ -174,6 +151,62 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) return csum_fold(ip_fast_csum_nofold(iph, ihl)); } +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +__wsum __csum_partial(const void *buff, int len, __wsum sum); + +static inline __wsum csum_partial(const void *buff, int len, __wsum sum) +{ + if (__builtin_constant_p(len) && len <= 16 && (len & 1) == 0) { + if (len == 2) + sum = csum_add(sum, (__force __wsum)*(const u16 *)buff); + if (len >= 4) + sum = csum_add(sum, (__force __wsum)*(const u32 *)buff); + if (len == 6) + sum = csum_add(sum, (__force __wsum) + *(const u16 *)(buff + 4)); + if (len >= 8) + sum = csum_add(sum, (__force __wsum) + *(const u32 *)(buff + 4)); + if (len == 10) + sum = csum_add(sum, (__force __wsum) + *(const u16 *)(buff + 8)); + if (len >= 12) + sum = csum_add(sum, (__force __wsum) + *(const u32 *)(buff + 8)); + if (len == 14) + sum = csum_add(sum, (__force __wsum) + *(const u16 *)(buff + 12)); + if (len >= 16) + sum = csum_add(sum, (__force __wsum) + *(const u32 *)(buff + 12)); + } else if (__builtin_constant_p(len) && (len & 3) == 0) { + sum = csum_add(sum, ip_fast_csum_nofold(buff, len >> 2)); + } else { + sum = __csum_partial(buff, len, sum); + } + return sum; +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +static inline __sum16 ip_compute_csum(const void *buff, int len) +{ + return csum_fold(csum_partial(buff, len, 0)); +} + #endif #endif /* __KERNEL__ */ #endif diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 0d34f47c8a5e..d90870a66b60 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -24,9 +24,9 @@ * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) * - * csum_partial(buff, len, sum) + * __csum_partial(buff, len, sum) */ -_GLOBAL(csum_partial) +_GLOBAL(__csum_partial) subi r3,r3,4 srawi. r6,r4,2 /* Divide len by 4 and also clear carry */ beq 3f /* if we're doing < 4 bytes */ diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index f53f4abb9282..8e6e51016cc5 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -21,9 +21,9 @@ * Computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit). * - * csum_partial(r3=buff, r4=len, r5=sum) + * __csum_partial(r3=buff, r4=len, r5=sum) */ -_GLOBAL(csum_partial) +_GLOBAL(__csum_partial) addic r0,r5,0 /* clear carry */ srdi. r6,r4,3 /* less than 8 bytes? */ diff --git a/arch/powerpc/lib/ppc_ksyms.c b/arch/powerpc/lib/ppc_ksyms.c index 8cd5c0b2986c..c422812f7405 100644 --- a/arch/powerpc/lib/ppc_ksyms.c +++ b/arch/powerpc/lib/ppc_ksyms.c @@ -17,7 +17,7 @@ EXPORT_SYMBOL(strcmp); EXPORT_SYMBOL(strncmp); #ifndef CONFIG_GENERIC_CSUM -EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(__csum_partial); EXPORT_SYMBOL(csum_partial_copy_generic); #endif -- cgit v1.2.3 From d4969e2459c6e852a6862256cf8e869aaa3e8adf Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 11 Jan 2016 14:55:25 -0800 Subject: powerpc/perf: Remove PME_ prefix for power7 events We used the PME_ prefix earlier to avoid some macro/variable name collisions. We have since changed the way we define/use the event macros so we no longer need the prefix. By dropping the prefix, we keep the the event macros consistent with their official names. Reported-by: Michael Ellerman Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/perf_event_server.h | 2 +- arch/powerpc/perf/power7-pmu.c | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 814622146d5a..069108717163 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -141,7 +141,7 @@ extern ssize_t power_events_sysfs_show(struct device *dev, #define EVENT_PTR(_id, _suffix) &EVENT_VAR(_id, _suffix).attr.attr #define EVENT_ATTR(_name, _id, _suffix) \ - PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_##_id, \ + PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), _id, \ power_events_sysfs_show) #define GENERIC_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _g) diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 5b62f2389290..a383c23a9070 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -54,7 +54,7 @@ * Power7 event codes. */ #define EVENT(_name, _code) \ - PME_##_name = _code, + _name = _code, enum { #include "power7-events-list.h" @@ -318,14 +318,14 @@ static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[]) } static int power7_generic_events[] = { - [PERF_COUNT_HW_CPU_CYCLES] = PME_PM_CYC, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PME_PM_GCT_NOSLOT_CYC, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PME_PM_CMPLU_STALL, - [PERF_COUNT_HW_INSTRUCTIONS] = PME_PM_INST_CMPL, - [PERF_COUNT_HW_CACHE_REFERENCES] = PME_PM_LD_REF_L1, - [PERF_COUNT_HW_CACHE_MISSES] = PME_PM_LD_MISS_L1, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PME_PM_BRU_FIN, - [PERF_COUNT_HW_BRANCH_MISSES] = PME_PM_BR_MPRED, + [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_GCT_NOSLOT_CYC, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL, + [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL, + [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_FIN, + [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED, }; #define C(x) PERF_COUNT_HW_CACHE_##x -- cgit v1.2.3 From e0728b50d480da6be228dd160a43b37e4c0b1636 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 11 Jan 2016 14:55:26 -0800 Subject: powerpc/perf: Export Power8 generic and cache events to sysfs Power8 supports a large number of events in each susbystem so when a user runs: perf stat -e branch-instructions sleep 1 perf stat -e L1-dcache-loads sleep 1 it is not clear as to which PMU events were monitored. Export the generic hardware and cache perf events for Power8 to sysfs, so users can precisely determine the PMU event monitored by the generic event. Eg: cat /sys/bus/event_source/devices/cpu/events/branch-instructions event=0x10068 $ cat /sys/bus/event_source/devices/cpu/events/L1-dcache-loads event=0x100ee Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/perf_event_server.h | 8 ++ arch/powerpc/perf/power8-events-list.h | 51 +++++++++++++ arch/powerpc/perf/power8-pmu.c | 110 ++++++++++++++++++--------- 3 files changed, 131 insertions(+), 38 deletions(-) create mode 100644 arch/powerpc/perf/power8-events-list.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 069108717163..e157489ee7a1 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -136,6 +136,11 @@ extern ssize_t power_events_sysfs_show(struct device *dev, * event 'cpu-cycles' can have two entries in sysfs: 'cpu-cycles' and * 'PM_CYC' where the latter is the name by which the event is known in * POWER CPU specification. + * + * Similarly, some hardware and cache events use the same event code. Eg. + * on POWER8, both "cache-references" and "L1-dcache-loads" events refer + * to the same event, PM_LD_REF_L1. The suffix, allows us to have two + * sysfs objects for the same event and thus two entries/aliases in sysfs. */ #define EVENT_VAR(_id, _suffix) event_attr_##_id##_suffix #define EVENT_PTR(_id, _suffix) &EVENT_VAR(_id, _suffix).attr.attr @@ -147,5 +152,8 @@ extern ssize_t power_events_sysfs_show(struct device *dev, #define GENERIC_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _g) #define GENERIC_EVENT_PTR(_id) EVENT_PTR(_id, _g) +#define CACHE_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _c) +#define CACHE_EVENT_PTR(_id) EVENT_PTR(_id, _c) + #define POWER_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _p) #define POWER_EVENT_PTR(_id) EVENT_PTR(_id, _p) diff --git a/arch/powerpc/perf/power8-events-list.h b/arch/powerpc/perf/power8-events-list.h new file mode 100644 index 000000000000..741b77edd03e --- /dev/null +++ b/arch/powerpc/perf/power8-events-list.h @@ -0,0 +1,51 @@ +/* + * Performance counter support for POWER8 processors. + * + * Copyright 2014 Sukadev Bhattiprolu, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * Power8 event codes. + */ +EVENT(PM_CYC, 0x0001e) +EVENT(PM_GCT_NOSLOT_CYC, 0x100f8) +EVENT(PM_CMPLU_STALL, 0x4000a) +EVENT(PM_INST_CMPL, 0x00002) +EVENT(PM_BRU_FIN, 0x10068) +EVENT(PM_BR_MPRED_CMPL, 0x400f6) + +/* All L1 D cache load references counted at finish, gated by reject */ +EVENT(PM_LD_REF_L1, 0x100ee) +/* Load Missed L1 */ +EVENT(PM_LD_MISS_L1, 0x3e054) +/* Store Missed L1 */ +EVENT(PM_ST_MISS_L1, 0x300f0) +/* L1 cache data prefetches */ +EVENT(PM_L1_PREF, 0x0d8b8) +/* Instruction fetches from L1 */ +EVENT(PM_INST_FROM_L1, 0x04080) +/* Demand iCache Miss */ +EVENT(PM_L1_ICACHE_MISS, 0x200fd) +/* Instruction Demand sectors wriittent into IL1 */ +EVENT(PM_L1_DEMAND_WRITE, 0x0408c) +/* Instruction prefetch written into IL1 */ +EVENT(PM_IC_PREF_WRITE, 0x0408e) +/* The data cache was reloaded from local core's L3 due to a demand load */ +EVENT(PM_DATA_FROM_L3, 0x4c042) +/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ +EVENT(PM_DATA_FROM_L3MISS, 0x300fe) +/* All successful D-side store dispatches for this thread */ +EVENT(PM_L2_ST, 0x17080) +/* All successful D-side store dispatches for this thread that were L2 Miss */ +EVENT(PM_L2_ST_MISS, 0x17082) +/* Total HW L3 prefetches(Load+store) */ +EVENT(PM_L3_PREF_ALL, 0x4e052) +/* Data PTEG reload */ +EVENT(PM_DTLB_MISS, 0x300fc) +/* ITLB Reloaded */ +EVENT(PM_ITLB_MISS, 0x400fc) diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index be9b7aec216f..690d9186a855 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -17,48 +17,16 @@ #include #include - /* * Some power8 event codes. */ -#define PM_CYC 0x0001e -#define PM_GCT_NOSLOT_CYC 0x100f8 -#define PM_CMPLU_STALL 0x4000a -#define PM_INST_CMPL 0x00002 -#define PM_BRU_FIN 0x10068 -#define PM_BR_MPRED_CMPL 0x400f6 - -/* All L1 D cache load references counted at finish, gated by reject */ -#define PM_LD_REF_L1 0x100ee -/* Load Missed L1 */ -#define PM_LD_MISS_L1 0x3e054 -/* Store Missed L1 */ -#define PM_ST_MISS_L1 0x300f0 -/* L1 cache data prefetches */ -#define PM_L1_PREF 0x0d8b8 -/* Instruction fetches from L1 */ -#define PM_INST_FROM_L1 0x04080 -/* Demand iCache Miss */ -#define PM_L1_ICACHE_MISS 0x200fd -/* Instruction Demand sectors wriittent into IL1 */ -#define PM_L1_DEMAND_WRITE 0x0408c -/* Instruction prefetch written into IL1 */ -#define PM_IC_PREF_WRITE 0x0408e -/* The data cache was reloaded from local core's L3 due to a demand load */ -#define PM_DATA_FROM_L3 0x4c042 -/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ -#define PM_DATA_FROM_L3MISS 0x300fe -/* All successful D-side store dispatches for this thread */ -#define PM_L2_ST 0x17080 -/* All successful D-side store dispatches for this thread that were L2 Miss */ -#define PM_L2_ST_MISS 0x17082 -/* Total HW L3 prefetches(Load+store) */ -#define PM_L3_PREF_ALL 0x4e052 -/* Data PTEG reload */ -#define PM_DTLB_MISS 0x300fc -/* ITLB Reloaded */ -#define PM_ITLB_MISS 0x400fc +#define EVENT(_name, _code) _name = _code, + +enum { +#include "power8-events-list.h" +}; +#undef EVENT /* * Raw event encoding for POWER8: @@ -604,6 +572,71 @@ static void power8_disable_pmc(unsigned int pmc, unsigned long mmcr[]) mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1)); } +GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); +GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_GCT_NOSLOT_CYC); +GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL); +GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL); +GENERIC_EVENT_ATTR(branch-instructions, PM_BRU_FIN); +GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL); +GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); +GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1); + +CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1); +CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1); + +CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_L1_PREF); +CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1); +CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS); +CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1); +CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE); + +CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); +CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3); +CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL); +CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS); +CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST); + +CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); +CACHE_EVENT_ATTR(branch-loads, PM_BRU_FIN); +CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); +CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); + +static struct attribute *power8_events_attr[] = { + GENERIC_EVENT_PTR(PM_CYC), + GENERIC_EVENT_PTR(PM_GCT_NOSLOT_CYC), + GENERIC_EVENT_PTR(PM_CMPLU_STALL), + GENERIC_EVENT_PTR(PM_INST_CMPL), + GENERIC_EVENT_PTR(PM_BRU_FIN), + GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), + GENERIC_EVENT_PTR(PM_LD_REF_L1), + GENERIC_EVENT_PTR(PM_LD_MISS_L1), + + CACHE_EVENT_PTR(PM_LD_MISS_L1), + CACHE_EVENT_PTR(PM_LD_REF_L1), + CACHE_EVENT_PTR(PM_L1_PREF), + CACHE_EVENT_PTR(PM_ST_MISS_L1), + CACHE_EVENT_PTR(PM_L1_ICACHE_MISS), + CACHE_EVENT_PTR(PM_INST_FROM_L1), + CACHE_EVENT_PTR(PM_IC_PREF_WRITE), + CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), + CACHE_EVENT_PTR(PM_DATA_FROM_L3), + CACHE_EVENT_PTR(PM_L3_PREF_ALL), + CACHE_EVENT_PTR(PM_L2_ST_MISS), + CACHE_EVENT_PTR(PM_L2_ST), + + CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), + CACHE_EVENT_PTR(PM_BRU_FIN), + + CACHE_EVENT_PTR(PM_DTLB_MISS), + CACHE_EVENT_PTR(PM_ITLB_MISS), + NULL +}; + +static struct attribute_group power8_pmu_events_group = { + .name = "events", + .attrs = power8_events_attr, +}; + PMU_FORMAT_ATTR(event, "config:0-49"); PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); PMU_FORMAT_ATTR(mark, "config:8"); @@ -640,6 +673,7 @@ struct attribute_group power8_pmu_format_group = { static const struct attribute_group *power8_pmu_attr_groups[] = { &power8_pmu_format_group, + &power8_pmu_events_group, NULL, }; -- cgit v1.2.3 From be00ed728c32ed8311d5191637067c9eaf0fca4b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:07:56 +0100 Subject: powerpc32: Fix pte_offset_kernel() to return NULL for bad pages The fixmap related functions try to map kernel pages that are already mapped through Large TLBs. pte_offset_kernel() has to return NULL for LTLBs, otherwise the caller will try to access level 2 table which doesn't exist Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/nohash/32/pgtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index c82cbf52d19e..e2016007c04e 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -309,7 +309,8 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) #define pte_index(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, addr) \ - ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr)) + (pmd_bad(*(dir)) ? NULL : (pte_t *)pmd_page_vaddr(*(dir)) + \ + pte_index(addr)) #define pte_offset_map(dir, addr) \ ((pte_t *) kmap_atomic(pmd_page(*(dir))) + pte_index(addr)) #define pte_unmap(pte) kunmap_atomic(pte) -- cgit v1.2.3 From e974cd4be0be8de0d370ee4dbf181d614c0de386 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:08:10 +0100 Subject: powerpc32: remove ioremap_base ioremap_base is not initialised and is nowhere used so remove it Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/nohash/32/pgtable.h | 2 +- arch/powerpc/mm/mmu_decl.h | 1 - arch/powerpc/mm/pgtable_32.c | 3 +-- arch/powerpc/platforms/embedded6xx/mpc10x.h | 10 ---------- 4 files changed, 2 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index e2016007c04e..780847597514 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -86,7 +86,7 @@ extern int icache_44x_need_flush; * We no longer map larger than phys RAM with the BATs so we don't have * to worry about the VMALLOC_OFFSET causing problems. We do have to worry * about clashes between our early calls to ioremap() that start growing down - * from ioremap_base being run into the VM area allocations (growing upwards + * from IOREMAP_TOP being run into the VM area allocations (growing upwards * from VMALLOC_START). For this reason we have ioremap_bot to check when * we actually run into our mappings setup in the early boot with the VM * system. This really does become a problem for machines with good amounts diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 4b85077d4828..bfb7c0bcabd5 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -100,7 +100,6 @@ extern void setbat(int index, unsigned long virt, phys_addr_t phys, extern int __map_without_bats; extern int __allow_ioremap_reserved; -extern unsigned long ioremap_base; extern unsigned int rtas_data, rtas_size; struct hash_pte; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index db0d35e0169b..815ccd780ac2 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -37,7 +37,6 @@ #include "mmu_decl.h" -unsigned long ioremap_base; unsigned long ioremap_bot; EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ @@ -173,7 +172,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags, /* * Choose an address to map it to. * Once the vmalloc system is running, we use it. - * Before then, we use space going down from ioremap_base + * Before then, we use space going down from IOREMAP_TOP * (ioremap_bot records where we're up to). */ p = addr & PAGE_MASK; diff --git a/arch/powerpc/platforms/embedded6xx/mpc10x.h b/arch/powerpc/platforms/embedded6xx/mpc10x.h index b290b63661f1..5ad12023e562 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc10x.h +++ b/arch/powerpc/platforms/embedded6xx/mpc10x.h @@ -24,13 +24,11 @@ * Processor: 0x80000000 - 0x807fffff -> PCI I/O: 0x00000000 - 0x007fffff * Processor: 0xc0000000 - 0xdfffffff -> PCI MEM: 0x00000000 - 0x1fffffff * PCI MEM: 0x80000000 -> Processor System Memory: 0x00000000 - * EUMB mapped to: ioremap_base - 0x00100000 (ioremap_base - 1 MB) * * MAP B (CHRP Map) * Processor: 0xfe000000 - 0xfebfffff -> PCI I/O: 0x00000000 - 0x00bfffff * Processor: 0x80000000 - 0xbfffffff -> PCI MEM: 0x80000000 - 0xbfffffff * PCI MEM: 0x00000000 -> Processor System Memory: 0x00000000 - * EUMB mapped to: ioremap_base - 0x00100000 (ioremap_base - 1 MB) */ /* @@ -138,14 +136,6 @@ #define MPC10X_EUMB_WP_OFFSET 0x000ff000 /* Data path diagnostic, watchpoint reg offset */ #define MPC10X_EUMB_WP_SIZE 0x00001000 /* Data path diagnostic, watchpoint reg size */ -/* - * Define some recommended places to put the EUMB regs. - * For both maps, recommend putting the EUMB from 0xeff00000 to 0xefffffff. - */ -extern unsigned long ioremap_base; -#define MPC10X_MAPA_EUMB_BASE (ioremap_base - MPC10X_EUMB_SIZE) -#define MPC10X_MAPB_EUMB_BASE MPC10X_MAPA_EUMB_BASE - enum ppc_sys_devices { MPC10X_IIC1, MPC10X_DMA0, -- cgit v1.2.3 From 7ee5cf6bfad7990cc33d10888d869c1eb7d4a927 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:08:12 +0100 Subject: powerpc/8xx: Add missing SPRN defines into reg_8xx.h Add missing SPRN defines into reg_8xx.h Some of them are defined in mmu-8xx.h, so we include mmu-8xx.h in reg_8xx.h, for that we remove references to PAGE_SHIFT in mmu-8xx.h to have it self sufficient, as includers of reg_8xx.h don't all include asm/page.h Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/mmu-8xx.h | 4 ++-- arch/powerpc/include/asm/reg_8xx.h | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index f05500a29a60..0a566f15f985 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -171,9 +171,9 @@ typedef struct { } mm_context_t; #endif /* !__ASSEMBLY__ */ -#if (PAGE_SHIFT == 12) +#if defined(CONFIG_PPC_4K_PAGES) #define mmu_virtual_psize MMU_PAGE_4K -#elif (PAGE_SHIFT == 14) +#elif defined(CONFIG_PPC_16K_PAGES) #define mmu_virtual_psize MMU_PAGE_16K #else #error "Unsupported PAGE_SIZE" diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h index e8ea346b21d3..0f71c811a841 100644 --- a/arch/powerpc/include/asm/reg_8xx.h +++ b/arch/powerpc/include/asm/reg_8xx.h @@ -4,6 +4,8 @@ #ifndef _ASM_POWERPC_REG_8xx_H #define _ASM_POWERPC_REG_8xx_H +#include + /* Cache control on the MPC8xx is provided through some additional * special purpose registers. */ @@ -14,6 +16,15 @@ #define SPRN_DC_ADR 569 /* Address needed for some commands */ #define SPRN_DC_DAT 570 /* Read-only data register */ +/* Misc Debug */ +#define SPRN_DPDR 630 +#define SPRN_MI_CAM 816 +#define SPRN_MI_RAM0 817 +#define SPRN_MI_RAM1 818 +#define SPRN_MD_CAM 824 +#define SPRN_MD_RAM0 825 +#define SPRN_MD_RAM1 826 + /* Commands. Only the first few are available to the instruction cache. */ #define IDC_ENABLE 0x02000000 /* Cache enable */ -- cgit v1.2.3 From 1458dd951f7cc0127508f928497c9ab06b5e557d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:08:14 +0100 Subject: powerpc/8xx: Handle CPU6 ERRATA directly in mtspr() macro MPC8xx has an ERRATA on the use of mtspr() for some registers This patch includes the ERRATA handling directly into mtspr() macro so that mtspr() users don't need to bother about that errata Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/reg.h | 2 + arch/powerpc/include/asm/reg_8xx.h | 82 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 52ed654d01ba..f5f4c66bbbc9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1219,9 +1219,11 @@ static inline void mtmsr_isync(unsigned long val) #define mfspr(rn) ({unsigned long rval; \ asm volatile("mfspr %0," __stringify(rn) \ : "=r" (rval)); rval;}) +#ifndef mtspr #define mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : \ : "r" ((unsigned long)(v)) \ : "memory") +#endif extern void msr_check_and_set(unsigned long bits); extern bool strict_msr_control; diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h index 0f71c811a841..d41412c7748b 100644 --- a/arch/powerpc/include/asm/reg_8xx.h +++ b/arch/powerpc/include/asm/reg_8xx.h @@ -50,4 +50,86 @@ #define DC_DFWT 0x40000000 /* Data cache is forced write through */ #define DC_LES 0x20000000 /* Caches are little endian mode */ +#ifdef CONFIG_8xx_CPU6 +#define do_mtspr_cpu6(rn, rn_addr, v) \ + do { \ + int _reg_cpu6 = rn_addr, _tmp_cpu6[1]; \ + asm volatile("stw %0, %1;" \ + "lwz %0, %1;" \ + "mtspr " __stringify(rn) ",%2" : \ + : "r" (_reg_cpu6), "m"(_tmp_cpu6), \ + "r" ((unsigned long)(v)) \ + : "memory"); \ + } while (0) + +#define do_mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : \ + : "r" ((unsigned long)(v)) \ + : "memory") +#define mtspr(rn, v) \ + do { \ + if (rn == SPRN_IMMR) \ + do_mtspr_cpu6(rn, 0x3d30, v); \ + else if (rn == SPRN_IC_CST) \ + do_mtspr_cpu6(rn, 0x2110, v); \ + else if (rn == SPRN_IC_ADR) \ + do_mtspr_cpu6(rn, 0x2310, v); \ + else if (rn == SPRN_IC_DAT) \ + do_mtspr_cpu6(rn, 0x2510, v); \ + else if (rn == SPRN_DC_CST) \ + do_mtspr_cpu6(rn, 0x3110, v); \ + else if (rn == SPRN_DC_ADR) \ + do_mtspr_cpu6(rn, 0x3310, v); \ + else if (rn == SPRN_DC_DAT) \ + do_mtspr_cpu6(rn, 0x3510, v); \ + else if (rn == SPRN_MI_CTR) \ + do_mtspr_cpu6(rn, 0x2180, v); \ + else if (rn == SPRN_MI_AP) \ + do_mtspr_cpu6(rn, 0x2580, v); \ + else if (rn == SPRN_MI_EPN) \ + do_mtspr_cpu6(rn, 0x2780, v); \ + else if (rn == SPRN_MI_TWC) \ + do_mtspr_cpu6(rn, 0x2b80, v); \ + else if (rn == SPRN_MI_RPN) \ + do_mtspr_cpu6(rn, 0x2d80, v); \ + else if (rn == SPRN_MI_CAM) \ + do_mtspr_cpu6(rn, 0x2190, v); \ + else if (rn == SPRN_MI_RAM0) \ + do_mtspr_cpu6(rn, 0x2390, v); \ + else if (rn == SPRN_MI_RAM1) \ + do_mtspr_cpu6(rn, 0x2590, v); \ + else if (rn == SPRN_MD_CTR) \ + do_mtspr_cpu6(rn, 0x3180, v); \ + else if (rn == SPRN_M_CASID) \ + do_mtspr_cpu6(rn, 0x3380, v); \ + else if (rn == SPRN_MD_AP) \ + do_mtspr_cpu6(rn, 0x3580, v); \ + else if (rn == SPRN_MD_EPN) \ + do_mtspr_cpu6(rn, 0x3780, v); \ + else if (rn == SPRN_M_TWB) \ + do_mtspr_cpu6(rn, 0x3980, v); \ + else if (rn == SPRN_MD_TWC) \ + do_mtspr_cpu6(rn, 0x3b80, v); \ + else if (rn == SPRN_MD_RPN) \ + do_mtspr_cpu6(rn, 0x3d80, v); \ + else if (rn == SPRN_M_TW) \ + do_mtspr_cpu6(rn, 0x3f80, v); \ + else if (rn == SPRN_MD_CAM) \ + do_mtspr_cpu6(rn, 0x3190, v); \ + else if (rn == SPRN_MD_RAM0) \ + do_mtspr_cpu6(rn, 0x3390, v); \ + else if (rn == SPRN_MD_RAM1) \ + do_mtspr_cpu6(rn, 0x3590, v); \ + else if (rn == SPRN_DEC) \ + do_mtspr_cpu6(rn, 0x2c00, v); \ + else if (rn == SPRN_TBWL) \ + do_mtspr_cpu6(rn, 0x3880, v); \ + else if (rn == SPRN_TBWU) \ + do_mtspr_cpu6(rn, 0x3a80, v); \ + else if (rn == SPRN_DPDR) \ + do_mtspr_cpu6(rn, 0x2d30, v); \ + else \ + do_mtspr(rn, v); \ + } while (0) +#endif + #endif /* _ASM_POWERPC_REG_8xx_H */ -- cgit v1.2.3 From 63e9e1c28fa6fe714364c7817e60dbaed3cec95e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:08:16 +0100 Subject: powerpc/8xx: remove special handling of CPU6 errata in set_dec() CPU6 ERRATA is now handled directly in mtspr(), so we can use the standard set_dec() fonction in all cases. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/time.h | 6 +----- arch/powerpc/kernel/head_8xx.S | 18 ------------------ 2 files changed, 1 insertion(+), 23 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 2d7109a8d296..1092fdd7e737 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -31,8 +31,6 @@ extern void tick_broadcast_ipi_handler(void); extern void generic_calibrate_decr(void); -extern void set_dec_cpu6(unsigned int val); - /* Some sane defaults: 125 MHz timebase, 1GHz processor */ extern unsigned long ppc_proc_freq; #define DEFAULT_PROC_FREQ (DEFAULT_TB_FREQ * 8) @@ -166,14 +164,12 @@ static inline void set_dec(int val) { #if defined(CONFIG_40x) mtspr(SPRN_PIT, val); -#elif defined(CONFIG_8xx_CPU6) - set_dec_cpu6(val - 1); #else #ifndef CONFIG_BOOKE --val; #endif mtspr(SPRN_DEC, val); -#endif /* not 40x or 8xx_CPU6 */ +#endif /* not 40x */ } static inline unsigned long tb_ticks_since(unsigned long tstamp) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 87d1f5f0d808..4a5e56d98bd6 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -926,24 +926,6 @@ _GLOBAL(set_context) SYNC blr -#ifdef CONFIG_8xx_CPU6 -/* It's here because it is unique to the 8xx. - * It is important we get called with interrupts disabled. I used to - * do that, but it appears that all code that calls this already had - * interrupt disabled. - */ - .globl set_dec_cpu6 -set_dec_cpu6: - lis r7, cpu6_errata_word@h - ori r7, r7, cpu6_errata_word@l - li r4, 0x2c00 - stw r4, 8(r7) - lwz r4, 8(r7) - mtspr 22, r3 /* Update Decrementer */ - SYNC - blr -#endif - /* * We put a few things here that have to be page-aligned. * This stuff goes at the beginning of the data segment, -- cgit v1.2.3 From d6bfa02fccf58b957f457c1bd0bb71f6ab169a0b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:08:23 +0100 Subject: powerpc: add inline functions for cache related instructions This patch adds inline functions to use dcbz, dcbi, dcbf, dcbst from C functions Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/cache.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 5f8229e24fe6..ffbafbf76b19 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -69,6 +69,25 @@ extern void _set_L3CR(unsigned long); #define _set_L3CR(val) do { } while(0) #endif +static inline void dcbz(void *addr) +{ + __asm__ __volatile__ ("dcbz 0, %0" : : "r"(addr) : "memory"); +} + +static inline void dcbi(void *addr) +{ + __asm__ __volatile__ ("dcbi 0, %0" : : "r"(addr) : "memory"); +} + +static inline void dcbf(void *addr) +{ + __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory"); +} + +static inline void dcbst(void *addr) +{ + __asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory"); +} #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_CACHE_H */ -- cgit v1.2.3 From 5736f96d12dd4204d3aac43bf7b512ab434b904f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:08:25 +0100 Subject: powerpc32: Remove clear_pages() and define clear_page() inline clear_pages() is never used expect by clear_page, and PPC32 is the only architecture (still) having this function. Neither PPC64 nor any other architecture has it. This patch removes clear_pages() and moves clear_page() function inline (same as PPC64) as it only is a few isns Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/page_32.h | 17 ++++++++++++++--- arch/powerpc/kernel/misc_32.S | 16 ---------------- arch/powerpc/kernel/ppc_ksyms_32.c | 1 - 3 files changed, 14 insertions(+), 20 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index 68d73b2a7bfc..6a8e1797f223 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -1,6 +1,8 @@ #ifndef _ASM_POWERPC_PAGE_32_H #define _ASM_POWERPC_PAGE_32_H +#include + #if defined(CONFIG_PHYSICAL_ALIGN) && (CONFIG_PHYSICAL_START != 0) #if (CONFIG_PHYSICAL_START % CONFIG_PHYSICAL_ALIGN) != 0 #error "CONFIG_PHYSICAL_START must be a multiple of CONFIG_PHYSICAL_ALIGN" @@ -36,9 +38,18 @@ typedef unsigned long long pte_basic_t; typedef unsigned long pte_basic_t; #endif -struct page; -extern void clear_pages(void *page, int order); -static inline void clear_page(void *page) { clear_pages(page, 0); } +/* + * Clear page using the dcbz instruction, which doesn't cause any + * memory traffic (except to write out any cache lines which get + * displaced). This only works on cacheable memory. + */ +static inline void clear_page(void *addr) +{ + unsigned int i; + + for (i = 0; i < PAGE_SIZE / L1_CACHE_BYTES; i++, addr += L1_CACHE_BYTES) + dcbz(addr); +} extern void copy_page(void *to, void *from); #include diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 7d1284f4d89e..181afc1d05d8 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -516,22 +516,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) blr #endif /* CONFIG_BOOKE */ -/* - * Clear pages using the dcbz instruction, which doesn't cause any - * memory traffic (except to write out any cache lines which get - * displaced). This only works on cacheable memory. - * - * void clear_pages(void *page, int order) ; - */ -_GLOBAL(clear_pages) - li r0,PAGE_SIZE/L1_CACHE_BYTES - slw r0,r0,r4 - mtctr r0 -1: dcbz 0,r3 - addi r3,r3,L1_CACHE_BYTES - bdnz 1b - blr - /* * Copy a whole page. We use the dcbz instruction on the destination * to reduce memory traffic (it eliminates the unnecessary reads of diff --git a/arch/powerpc/kernel/ppc_ksyms_32.c b/arch/powerpc/kernel/ppc_ksyms_32.c index 30ddd8a24eee..2bfaafe5be99 100644 --- a/arch/powerpc/kernel/ppc_ksyms_32.c +++ b/arch/powerpc/kernel/ppc_ksyms_32.c @@ -10,7 +10,6 @@ #include #include -EXPORT_SYMBOL(clear_pages); EXPORT_SYMBOL(ISA_DMA_THRESHOLD); EXPORT_SYMBOL(DMA_MODE_READ); EXPORT_SYMBOL(DMA_MODE_WRITE); -- cgit v1.2.3 From affe587bacf48e328fb8d4c5ef9007b9c555b128 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 9 Feb 2016 17:08:27 +0100 Subject: powerpc32: move xxxxx_dcache_range() functions inline flush/clean/invalidate _dcache_range() functions are all very similar and are quite short. They are mainly used in __dma_sync() perf_event locate them in the top 3 consumming functions during heavy ethernet activity They are good candidate for inlining, as __dma_sync() does almost nothing but calling them Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/cacheflush.h | 52 ++++++++++++++++++++++++++-- arch/powerpc/kernel/misc_32.S | 65 ----------------------------------- arch/powerpc/kernel/ppc_ksyms.c | 2 ++ 3 files changed, 51 insertions(+), 68 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 47add2e2364a..69fb16d7a811 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -45,12 +45,58 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr) } #endif -extern void flush_dcache_range(unsigned long start, unsigned long stop); #ifdef CONFIG_PPC32 -extern void clean_dcache_range(unsigned long start, unsigned long stop); -extern void invalidate_dcache_range(unsigned long start, unsigned long stop); +/* + * Write any modified data cache blocks out to memory and invalidate them. + * Does not invalidate the corresponding instruction cache blocks. + */ +static inline void flush_dcache_range(unsigned long start, unsigned long stop) +{ + void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1)); + unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1); + unsigned long i; + + for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES) + dcbf(addr); + mb(); /* sync */ +} + +/* + * Write any modified data cache blocks out to memory. + * Does not invalidate the corresponding cache lines (especially for + * any corresponding instruction cache). + */ +static inline void clean_dcache_range(unsigned long start, unsigned long stop) +{ + void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1)); + unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1); + unsigned long i; + + for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES) + dcbst(addr); + mb(); /* sync */ +} + +/* + * Like above, but invalidate the D-cache. This is used by the 8xx + * to invalidate the cache so the PPC core doesn't get stale data + * from the CPM (no cache snooping here :-). + */ +static inline void invalidate_dcache_range(unsigned long start, + unsigned long stop) +{ + void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1)); + unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1); + unsigned long i; + + for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES) + dcbi(addr); + mb(); /* sync */ +} + #endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC64 +extern void flush_dcache_range(unsigned long start, unsigned long stop); extern void flush_inval_dcache_range(unsigned long start, unsigned long stop); extern void flush_dcache_phys_range(unsigned long start, unsigned long stop); #endif diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 181afc1d05d8..09e1e5df0f34 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -374,71 +374,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) sync /* additional sync needed on g4 */ isync blr -/* - * Write any modified data cache blocks out to memory. - * Does not invalidate the corresponding cache lines (especially for - * any corresponding instruction cache). - * - * clean_dcache_range(unsigned long start, unsigned long stop) - */ -_GLOBAL(clean_dcache_range) - li r5,L1_CACHE_BYTES-1 - andc r3,r3,r5 - subf r4,r3,r4 - add r4,r4,r5 - srwi. r4,r4,L1_CACHE_SHIFT - beqlr - mtctr r4 - -1: dcbst 0,r3 - addi r3,r3,L1_CACHE_BYTES - bdnz 1b - sync /* wait for dcbst's to get to ram */ - blr - -/* - * Write any modified data cache blocks out to memory and invalidate them. - * Does not invalidate the corresponding instruction cache blocks. - * - * flush_dcache_range(unsigned long start, unsigned long stop) - */ -_GLOBAL(flush_dcache_range) - li r5,L1_CACHE_BYTES-1 - andc r3,r3,r5 - subf r4,r3,r4 - add r4,r4,r5 - srwi. r4,r4,L1_CACHE_SHIFT - beqlr - mtctr r4 - -1: dcbf 0,r3 - addi r3,r3,L1_CACHE_BYTES - bdnz 1b - sync /* wait for dcbst's to get to ram */ - blr - -/* - * Like above, but invalidate the D-cache. This is used by the 8xx - * to invalidate the cache so the PPC core doesn't get stale data - * from the CPM (no cache snooping here :-). - * - * invalidate_dcache_range(unsigned long start, unsigned long stop) - */ -_GLOBAL(invalidate_dcache_range) - li r5,L1_CACHE_BYTES-1 - andc r3,r3,r5 - subf r4,r3,r4 - add r4,r4,r5 - srwi. r4,r4,L1_CACHE_SHIFT - beqlr - mtctr r4 - -1: dcbi 0,r3 - addi r3,r3,L1_CACHE_BYTES - bdnz 1b - sync /* wait for dcbi's to get to ram */ - blr - /* * Flush a particular page from the data cache to RAM. * Note: this is necessary because the instruction cache does *not* diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index ef7024dacff7..9f01e28ecef3 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -6,7 +6,9 @@ #include #include +#ifdef CONFIG_PPC64 EXPORT_SYMBOL(flush_dcache_range); +#endif EXPORT_SYMBOL(flush_icache_range); EXPORT_SYMBOL(empty_zero_page); -- cgit v1.2.3 From 2e098dcee6208a690f73e11feb29d181cae7d978 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Mar 2016 14:07:49 +0100 Subject: powerpc/8xx: Fix do_mtspr_cpu6() build on older compilers GCC < 4.9 is unable to build this, saying: arch/powerpc/mm/8xx_mmu.c:139:2: error: memory input 1 is not directly addressable Change the one-element array into a simple variable to avoid this. Fixes: 1458dd951f7c ("powerpc/8xx: Handle CPU6 ERRATA directly in mtspr() macro") Signed-off-by: Christophe Leroy Cc: Scott Wood Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg_8xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h index d41412c7748b..94d01f81e668 100644 --- a/arch/powerpc/include/asm/reg_8xx.h +++ b/arch/powerpc/include/asm/reg_8xx.h @@ -53,7 +53,7 @@ #ifdef CONFIG_8xx_CPU6 #define do_mtspr_cpu6(rn, rn_addr, v) \ do { \ - int _reg_cpu6 = rn_addr, _tmp_cpu6[1]; \ + int _reg_cpu6 = rn_addr, _tmp_cpu6; \ asm volatile("stw %0, %1;" \ "lwz %0, %1;" \ "mtspr " __stringify(rn) ",%2" : \ -- cgit v1.2.3