diff options
Diffstat (limited to 'arch/powerpc/platforms')
48 files changed, 1423 insertions, 871 deletions
diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig index 6da813b65b42..e3e5217c9822 100644 --- a/arch/powerpc/platforms/40x/Kconfig +++ b/arch/powerpc/platforms/40x/Kconfig @@ -7,14 +7,6 @@ config ACADIA help This option enables support for the AMCC 405EZ Acadia evaluation board. -config EP405 - bool "EP405/EP405PC" - depends on 40x - select 405GP - select FORCE_PCI - help - This option enables support for the EP405/EP405PC boards. - config HOTFOOT bool "Hotfoot" depends on 40x @@ -45,33 +37,6 @@ config MAKALU help This option enables support for the AMCC PPC405EX board. -config WALNUT - bool "Walnut" - depends on 40x - default y - select 405GP - select FORCE_PCI - select OF_RTC - help - This option enables support for the IBM PPC405GP evaluation board. - -config XILINX_VIRTEX_GENERIC_BOARD - bool "Generic Xilinx Virtex board" - depends on 40x - select XILINX_VIRTEX_II_PRO - select XILINX_VIRTEX_4_FX - select XILINX_INTC - help - This option enables generic support for Xilinx Virtex based boards. - - The generic virtex board support matches any device tree which - specifies 'xilinx,virtex' in its compatible field. This includes - the Xilinx ML3xx and ML4xx reference designs using the powerpc - core. - - Most Virtex designs should use this unless it needs to do some - special configuration at board probe time. - config OBS600 bool "OpenBlockS 600" depends on 40x @@ -86,18 +51,6 @@ config PPC40x_SIMPLE help This option enables the simple PowerPC 40x platform support. -# OAK doesn't exist but wanted to keep this around for any future 403GCX boards -config 403GCX - bool - #depends on OAK - select IBM405_ERR51 - -config 405GP - bool - select IBM405_ERR77 - select IBM405_ERR51 - select IBM_EMAC_ZMII if IBM_EMAC - config 405EX bool select IBM_EMAC_EMAC4 if IBM_EMAC @@ -109,25 +62,6 @@ config 405EZ select IBM_EMAC_MAL_CLR_ICINTSTAT if IBM_EMAC select IBM_EMAC_MAL_COMMON_ERR if IBM_EMAC -config XILINX_VIRTEX - bool - select DEFAULT_UIMAGE - -config XILINX_VIRTEX_II_PRO - bool - select XILINX_VIRTEX - select IBM405_ERR77 - select IBM405_ERR51 - -config XILINX_VIRTEX_4_FX - bool - select XILINX_VIRTEX - -config STB03xxx - bool - select IBM405_ERR77 - select IBM405_ERR51 - config PPC4xx_GPIO bool "PPC4xx GPIO support" depends on 40x @@ -135,16 +69,6 @@ config PPC4xx_GPIO help Enable gpiolib support for ppc40x based boards -# 40x errata/workaround config symbols, selected by the CPU models above - -# All 405-based cores up until the 405GPR and 405EP have this errata. -config IBM405_ERR77 - bool - -# All 40x-based cores, up until the 405GPR and 405EP have this errata. -config IBM405_ERR51 - bool - config APM8018X bool "APM8018X" depends on 40x diff --git a/arch/powerpc/platforms/40x/Makefile b/arch/powerpc/platforms/40x/Makefile index 828d78340dd9..122de98527c4 100644 --- a/arch/powerpc/platforms/40x/Makefile +++ b/arch/powerpc/platforms/40x/Makefile @@ -1,5 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_WALNUT) += walnut.o -obj-$(CONFIG_XILINX_VIRTEX_GENERIC_BOARD) += virtex.o -obj-$(CONFIG_EP405) += ep405.o obj-$(CONFIG_PPC40x_SIMPLE) += ppc40x_simple.o diff --git a/arch/powerpc/platforms/40x/ep405.c b/arch/powerpc/platforms/40x/ep405.c deleted file mode 100644 index 1c8aec6e9bb7..000000000000 --- a/arch/powerpc/platforms/40x/ep405.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Architecture- / platform-specific boot-time initialization code for - * IBM PowerPC 4xx based boards. Adapted from original - * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek - * <dan@net4x.com>. - * - * Copyright(c) 1999-2000 Grant Erickson <grant@lcse.umn.edu> - * - * Rewritten and ported to the merged powerpc tree: - * Copyright 2007 IBM Corporation - * Josh Boyer <jwboyer@linux.vnet.ibm.com> - * - * Adapted to EP405 by Ben. Herrenschmidt <benh@kernel.crashing.org> - * - * TODO: Wire up the PCI IRQ mux and the southbridge interrupts - * - * 2002 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#include <linux/init.h> -#include <linux/of_platform.h> - -#include <asm/machdep.h> -#include <asm/prom.h> -#include <asm/udbg.h> -#include <asm/time.h> -#include <asm/uic.h> -#include <asm/pci-bridge.h> -#include <asm/ppc4xx.h> - -static struct device_node *bcsr_node; -static void __iomem *bcsr_regs; - -/* BCSR registers */ -#define BCSR_ID 0 -#define BCSR_PCI_CTRL 1 -#define BCSR_FLASH_NV_POR_CTRL 2 -#define BCSR_FENET_UART_CTRL 3 -#define BCSR_PCI_IRQ 4 -#define BCSR_XIRQ_SELECT 5 -#define BCSR_XIRQ_ROUTING 6 -#define BCSR_XIRQ_STATUS 7 -#define BCSR_XIRQ_STATUS2 8 -#define BCSR_SW_STAT_LED_CTRL 9 -#define BCSR_GPIO_IRQ_PAR_CTRL 10 -/* there's more, can't be bothered typing them tho */ - - -static const struct of_device_id ep405_of_bus[] __initconst = { - { .compatible = "ibm,plb3", }, - { .compatible = "ibm,opb", }, - { .compatible = "ibm,ebc", }, - {}, -}; - -static int __init ep405_device_probe(void) -{ - of_platform_bus_probe(NULL, ep405_of_bus, NULL); - - return 0; -} -machine_device_initcall(ep405, ep405_device_probe); - -static void __init ep405_init_bcsr(void) -{ - const u8 *irq_routing; - int i; - - /* Find the bloody thing & map it */ - bcsr_node = of_find_compatible_node(NULL, NULL, "ep405-bcsr"); - if (bcsr_node == NULL) { - printk(KERN_ERR "EP405 BCSR not found !\n"); - return; - } - bcsr_regs = of_iomap(bcsr_node, 0); - if (bcsr_regs == NULL) { - printk(KERN_ERR "EP405 BCSR failed to map !\n"); - return; - } - - /* Get the irq-routing property and apply the routing to the CPLD */ - irq_routing = of_get_property(bcsr_node, "irq-routing", NULL); - if (irq_routing == NULL) - return; - for (i = 0; i < 16; i++) { - u8 irq = irq_routing[i]; - out_8(bcsr_regs + BCSR_XIRQ_SELECT, i); - out_8(bcsr_regs + BCSR_XIRQ_ROUTING, irq); - } - in_8(bcsr_regs + BCSR_XIRQ_SELECT); - mb(); - out_8(bcsr_regs + BCSR_GPIO_IRQ_PAR_CTRL, 0xfe); -} - -static void __init ep405_setup_arch(void) -{ - /* Find & init the BCSR CPLD */ - ep405_init_bcsr(); - - pci_set_flags(PCI_REASSIGN_ALL_RSRC); -} - -static int __init ep405_probe(void) -{ - if (!of_machine_is_compatible("ep405")) - return 0; - - return 1; -} - -define_machine(ep405) { - .name = "EP405", - .probe = ep405_probe, - .setup_arch = ep405_setup_arch, - .progress = udbg_progress, - .init_IRQ = uic_init_tree, - .get_irq = uic_get_irq, - .restart = ppc4xx_reset_system, - .calibrate_decr = generic_calibrate_decr, -}; diff --git a/arch/powerpc/platforms/40x/virtex.c b/arch/powerpc/platforms/40x/virtex.c deleted file mode 100644 index e3d5e095846b..000000000000 --- a/arch/powerpc/platforms/40x/virtex.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Xilinx Virtex (IIpro & 4FX) based board support - * - * Copyright 2007 Secret Lab Technologies Ltd. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#include <linux/init.h> -#include <linux/of_platform.h> -#include <asm/machdep.h> -#include <asm/prom.h> -#include <asm/time.h> -#include <asm/xilinx_intc.h> -#include <asm/xilinx_pci.h> -#include <asm/ppc4xx.h> - -static const struct of_device_id xilinx_of_bus_ids[] __initconst = { - { .compatible = "xlnx,plb-v46-1.00.a", }, - { .compatible = "xlnx,plb-v34-1.01.a", }, - { .compatible = "xlnx,plb-v34-1.02.a", }, - { .compatible = "xlnx,opb-v20-1.10.c", }, - { .compatible = "xlnx,dcr-v29-1.00.a", }, - { .compatible = "xlnx,compound", }, - {} -}; - -static int __init virtex_device_probe(void) -{ - of_platform_bus_probe(NULL, xilinx_of_bus_ids, NULL); - - return 0; -} -machine_device_initcall(virtex, virtex_device_probe); - -static int __init virtex_probe(void) -{ - if (!of_machine_is_compatible("xlnx,virtex")) - return 0; - - return 1; -} - -define_machine(virtex) { - .name = "Xilinx Virtex", - .probe = virtex_probe, - .setup_arch = xilinx_pci_init, - .init_IRQ = xilinx_intc_init_tree, - .get_irq = xintc_get_irq, - .restart = ppc4xx_reset_system, - .calibrate_decr = generic_calibrate_decr, -}; diff --git a/arch/powerpc/platforms/40x/walnut.c b/arch/powerpc/platforms/40x/walnut.c deleted file mode 100644 index e5797815e2f1..000000000000 --- a/arch/powerpc/platforms/40x/walnut.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Architecture- / platform-specific boot-time initialization code for - * IBM PowerPC 4xx based boards. Adapted from original - * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek - * <dan@net4x.com>. - * - * Copyright(c) 1999-2000 Grant Erickson <grant@lcse.umn.edu> - * - * Rewritten and ported to the merged powerpc tree: - * Copyright 2007 IBM Corporation - * Josh Boyer <jwboyer@linux.vnet.ibm.com> - * - * 2002 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#include <linux/init.h> -#include <linux/of_platform.h> -#include <linux/rtc.h> - -#include <asm/machdep.h> -#include <asm/prom.h> -#include <asm/udbg.h> -#include <asm/time.h> -#include <asm/uic.h> -#include <asm/pci-bridge.h> -#include <asm/ppc4xx.h> - -static const struct of_device_id walnut_of_bus[] __initconst = { - { .compatible = "ibm,plb3", }, - { .compatible = "ibm,opb", }, - { .compatible = "ibm,ebc", }, - {}, -}; - -static int __init walnut_device_probe(void) -{ - of_platform_bus_probe(NULL, walnut_of_bus, NULL); - of_instantiate_rtc(); - - return 0; -} -machine_device_initcall(walnut, walnut_device_probe); - -static int __init walnut_probe(void) -{ - if (!of_machine_is_compatible("ibm,walnut")) - return 0; - - pci_set_flags(PCI_REASSIGN_ALL_RSRC); - - return 1; -} - -define_machine(walnut) { - .name = "Walnut", - .probe = walnut_probe, - .progress = udbg_progress, - .init_IRQ = uic_init_tree, - .get_irq = uic_get_irq, - .restart = ppc4xx_reset_system, - .calibrate_decr = generic_calibrate_decr, -}; diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 25ebe634a661..78ac6d67a935 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -167,8 +167,7 @@ config YOSEMITE config ISS4xx bool "ISS 4xx Simulator" - depends on (44x || 40x) - select 405GP if 40x + depends on 44x select 440GP if 44x && !PPC_47x select PPC_FPU select OF_RTC @@ -232,33 +231,6 @@ config ICON help This option enables support for the AMCC PPC440SPe evaluation board. -config XILINX_VIRTEX440_GENERIC_BOARD - bool "Generic Xilinx Virtex 5 FXT board support" - depends on 44x - select XILINX_VIRTEX_5_FXT - select XILINX_INTC - help - This option enables generic support for Xilinx Virtex based boards - that use a 440 based processor in the Virtex 5 FXT FPGA architecture. - - The generic virtex board support matches any device tree which - specifies 'xlnx,virtex440' in its compatible field. This includes - the Xilinx ML5xx reference designs using the powerpc core. - - Most Virtex 5 designs should use this unless it needs to do some - special configuration at board probe time. - -config XILINX_ML510 - bool "Xilinx ML510 extra support" - depends on XILINX_VIRTEX440_GENERIC_BOARD - select HAVE_PCI - select XILINX_PCI if PCI - select PPC_INDIRECT_PCI if PCI - select PPC_I8259 if PCI - help - This option enables extra support for features on the Xilinx ML510 - board. The ML510 has a PCI bus with ALI south bridge. - config PPC44x_SIMPLE bool "Simple PowerPC 44x board support" depends on 44x @@ -354,13 +326,3 @@ config 476FPE_ERR46 config IBM440EP_ERR42 bool -# Xilinx specific config options. -config XILINX_VIRTEX - bool - select DEFAULT_UIMAGE - -# Xilinx Virtex 5 FXT FPGA architecture, selected by a Xilinx board above -config XILINX_VIRTEX_5_FXT - bool - select XILINX_VIRTEX - diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile index 1b78c6af821a..5ba031f57652 100644 --- a/arch/powerpc/platforms/44x/Makefile +++ b/arch/powerpc/platforms/44x/Makefile @@ -7,8 +7,6 @@ obj-$(CONFIG_PPC44x_SIMPLE) += ppc44x_simple.o obj-$(CONFIG_EBONY) += ebony.o obj-$(CONFIG_SAM440EP) += sam440ep.o obj-$(CONFIG_WARP) += warp.o -obj-$(CONFIG_XILINX_VIRTEX_5_FXT) += virtex.o -obj-$(CONFIG_XILINX_ML510) += virtex_ml510.o obj-$(CONFIG_ISS4xx) += iss4xx.o obj-$(CONFIG_CANYONLANDS)+= canyonlands.o obj-$(CONFIG_CURRITUCK) += ppc476.o diff --git a/arch/powerpc/platforms/44x/virtex.c b/arch/powerpc/platforms/44x/virtex.c deleted file mode 100644 index 3eb13ed926ee..000000000000 --- a/arch/powerpc/platforms/44x/virtex.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Xilinx Virtex 5FXT based board support, derived from - * the Xilinx Virtex (IIpro & 4FX) based board support - * - * Copyright 2007 Secret Lab Technologies Ltd. - * Copyright 2008 Xilinx, Inc. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#include <linux/init.h> -#include <linux/of_platform.h> -#include <asm/machdep.h> -#include <asm/prom.h> -#include <asm/time.h> -#include <asm/xilinx_intc.h> -#include <asm/xilinx_pci.h> -#include <asm/reg.h> -#include <asm/ppc4xx.h> -#include "44x.h" - -static const struct of_device_id xilinx_of_bus_ids[] __initconst = { - { .compatible = "simple-bus", }, - { .compatible = "xlnx,plb-v46-1.00.a", }, - { .compatible = "xlnx,plb-v46-1.02.a", }, - { .compatible = "xlnx,plb-v34-1.01.a", }, - { .compatible = "xlnx,plb-v34-1.02.a", }, - { .compatible = "xlnx,opb-v20-1.10.c", }, - { .compatible = "xlnx,dcr-v29-1.00.a", }, - { .compatible = "xlnx,compound", }, - {} -}; - -static int __init virtex_device_probe(void) -{ - of_platform_bus_probe(NULL, xilinx_of_bus_ids, NULL); - - return 0; -} -machine_device_initcall(virtex, virtex_device_probe); - -static int __init virtex_probe(void) -{ - if (!of_machine_is_compatible("xlnx,virtex440")) - return 0; - - return 1; -} - -define_machine(virtex) { - .name = "Xilinx Virtex440", - .probe = virtex_probe, - .setup_arch = xilinx_pci_init, - .init_IRQ = xilinx_intc_init_tree, - .get_irq = xintc_get_irq, - .calibrate_decr = generic_calibrate_decr, - .restart = ppc4xx_reset_system, -}; diff --git a/arch/powerpc/platforms/44x/virtex_ml510.c b/arch/powerpc/platforms/44x/virtex_ml510.c deleted file mode 100644 index 349f218b335c..000000000000 --- a/arch/powerpc/platforms/44x/virtex_ml510.c +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <asm/i8259.h> -#include <linux/pci.h> -#include "44x.h" - -/** - * ml510_ail_quirk - */ -static void ml510_ali_quirk(struct pci_dev *dev) -{ - /* Enable the IDE controller */ - pci_write_config_byte(dev, 0x58, 0x4c); - /* Assign irq 14 to the primary ide channel */ - pci_write_config_byte(dev, 0x44, 0x0d); - /* Assign irq 15 to the secondary ide channel */ - pci_write_config_byte(dev, 0x75, 0x0f); - /* Set the ide controller in native mode */ - pci_write_config_byte(dev, 0x09, 0xff); - - /* INTB = disabled, INTA = disabled */ - pci_write_config_byte(dev, 0x48, 0x00); - /* INTD = disabled, INTC = disabled */ - pci_write_config_byte(dev, 0x4a, 0x00); - /* Audio = INT7, Modem = disabled. */ - pci_write_config_byte(dev, 0x4b, 0x60); - /* USB = INT7 */ - pci_write_config_byte(dev, 0x74, 0x06); -} -DECLARE_PCI_FIXUP_EARLY(0x10b9, 0x1533, ml510_ali_quirk); - diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/4xx/pci.c index e6e2adcc7b64..c13d64c3b019 100644 --- a/arch/powerpc/platforms/4xx/pci.c +++ b/arch/powerpc/platforms/4xx/pci.c @@ -1242,7 +1242,7 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port) if (mbase == NULL) { printk(KERN_ERR "%pOF: Can't map internal config space !", port->node); - goto done; + return; } while (attempt && (0 == (in_le32(mbase + PECFG_460SX_DLLSTA) @@ -1252,9 +1252,7 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port) } if (attempt) port->link = 1; -done: iounmap(mbase); - } static struct ppc4xx_pciex_hwops ppc460sx_pcie_hwops __initdata = { diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S index 3a9969c429b3..70083649c9ea 100644 --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -248,6 +248,7 @@ mmu_on: blr +_ASM_NOKPROBE_SYMBOL(lite5200_wakeup) /* ---------------------------------------------------------------------- */ @@ -391,6 +392,7 @@ restore_regs: LOAD_SPRN(TBWU, 0x5b); blr +_ASM_NOKPROBE_SYMBOL(restore_regs) diff --git a/arch/powerpc/platforms/82xx/pq2.c b/arch/powerpc/platforms/82xx/pq2.c index 1cdd5ed9d896..3b5cb39a564c 100644 --- a/arch/powerpc/platforms/82xx/pq2.c +++ b/arch/powerpc/platforms/82xx/pq2.c @@ -10,6 +10,8 @@ * Copyright (c) 2006 MontaVista Software, Inc. */ +#include <linux/kprobes.h> + #include <asm/cpm2.h> #include <asm/io.h> #include <asm/pci-bridge.h> @@ -29,6 +31,7 @@ void __noreturn pq2_restart(char *cmd) panic("Restart failed\n"); } +NOKPROBE_SYMBOL(pq2_restart) #ifdef CONFIG_PCI static int pq2_pci_exclude_device(struct pci_controller *hose, diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S index 3acd7470dc5e..bc6bd4d0ae96 100644 --- a/arch/powerpc/platforms/83xx/suspend-asm.S +++ b/arch/powerpc/platforms/83xx/suspend-asm.S @@ -548,3 +548,4 @@ mpc83xx_deep_resume: mtdec r0 rfi +_ASM_NOKPROBE_SYMBOL(mpc83xx_deep_resume) diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c index 5b91ea5694e3..dba3aa73c062 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c @@ -17,6 +17,7 @@ #include <asm/pci-bridge.h> #include <asm/mpic.h> #include <asm/cacheflush.h> +#include <asm/inst.h> #include <sysdev/fsl_soc.h> @@ -72,7 +73,7 @@ smp_86xx_kick_cpu(int nr) /* Setup fake reset vector to call __secondary_start_mpc86xx. */ target = (unsigned long) __secondary_start_mpc86xx; - patch_branch(vector, target, BRANCH_SET_LINK); + patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK); /* Kick that CPU */ smp_86xx_release_core(nr); @@ -82,7 +83,7 @@ smp_86xx_kick_cpu(int nr) mdelay(1); /* Restore the exception vector */ - patch_instruction(vector, save_vector); + patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector)); local_irq_restore(flags); diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index e0fe670f06f6..abb2b45b2789 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -98,15 +98,6 @@ menu "MPC8xx CPM Options" # 8xx specific questions. comment "Generic MPC8xx Options" -config 8xx_COPYBACK - bool "Copy-Back Data Cache (else Writethrough)" - help - Saying Y here will cause the cache on an MPC8xx processor to be used - in Copy-Back mode. If you say N here, it is used in Writethrough - mode. - - If in doubt, say Y here. - config 8xx_GPIO bool "GPIO API Support" select GPIOLIB @@ -171,4 +162,45 @@ config UCODE_PATCH default y depends on !NO_UCODE_PATCH +menu "8xx advanced setup" + depends on PPC_8xx + +config PIN_TLB + bool "Pinned Kernel TLBs" + depends on ADVANCED_OPTIONS + help + On the 8xx, we have 32 instruction TLBs and 32 data TLBs. In each + table 4 TLBs can be pinned. + + It reduces the amount of usable TLBs to 28 (ie by 12%). That's the + reason why we make it selectable. + + This option does nothing, it just activate the selection of what + to pin. + +config PIN_TLB_DATA + bool "Pinned TLB for DATA" + depends on PIN_TLB + default y + help + This pins the first 32 Mbytes of memory with 8M pages. + +config PIN_TLB_IMMR + bool "Pinned TLB for IMMR" + depends on PIN_TLB + default y + help + This pins the IMMR area with a 512kbytes page. In case + CONFIG_PIN_TLB_DATA is also selected, it will reduce + CONFIG_PIN_TLB_DATA to 24 Mbytes. + +config PIN_TLB_TEXT + bool "Pinned TLB for TEXT" + depends on PIN_TLB + default y + help + This pins kernel text with 8M pages. + +endmenu + endmenu diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 1f8025383caa..5e6479d409a0 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -317,8 +317,4 @@ config MCU_MPC8349EMITX also register MCU GPIOs with the generic GPIO API, so you'll able to use MCU pins as GPIOs. -config XILINX_PCI - bool "Xilinx PCI host bridge support" - depends on PCI && XILINX_VIRTEX - endmenu diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 27a81c291be8..d349603fb889 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -55,8 +55,8 @@ config PPC_8xx select SYS_SUPPORTS_HUGETLBFS select PPC_HAVE_KUEP select PPC_HAVE_KUAP - select PPC_MM_SLICES if HUGETLB_PAGE select HAVE_ARCH_VMAP_STACK + select HUGETLBFS config 40x bool "AMCC 40x" @@ -377,7 +377,7 @@ config PPC_HAVE_KUEP config PPC_KUEP bool "Kernel Userspace Execution Prevention" depends on PPC_HAVE_KUEP - default y + default y if !PPC_BOOK3S_32 help Enable support for Kernel Userspace Execution Prevention (KUEP) @@ -389,7 +389,7 @@ config PPC_HAVE_KUAP config PPC_KUAP bool "Kernel Userspace Access Protection" depends on PPC_HAVE_KUAP - default y + default y if !PPC_BOOK3S_32 help Enable support for Kernel Userspace Access Protection (KUAP) diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index ca9ffc1c8685..2124831cf57c 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -943,7 +943,7 @@ static int __init cell_iommu_fixed_mapping_init(void) fbase = max(fbase, dbase + dsize); } - fbase = _ALIGN_UP(fbase, 1 << IO_SEGMENT_SHIFT); + fbase = ALIGN(fbase, 1 << IO_SEGMENT_SHIFT); fsize = memblock_phys_mem_size(); if ((fbase + fsize) <= 0x800000000ul) @@ -963,8 +963,8 @@ static int __init cell_iommu_fixed_mapping_init(void) hend = hbase + htab_size_bytes; /* The window must start and end on a segment boundary */ - if ((hbase != _ALIGN_UP(hbase, 1 << IO_SEGMENT_SHIFT)) || - (hend != _ALIGN_UP(hend, 1 << IO_SEGMENT_SHIFT))) { + if ((hbase != ALIGN(hbase, 1 << IO_SEGMENT_SHIFT)) || + (hend != ALIGN(hend, 1 << IO_SEGMENT_SHIFT))) { pr_debug("iommu: hash window not segment aligned\n"); return -1; } diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 67e48b0a164e..a802ef957d63 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -172,19 +172,6 @@ static void wii_shutdown(void) flipper_quiesce(); } -define_machine(wii) { - .name = "wii", - .probe = wii_probe, - .setup_arch = wii_setup_arch, - .restart = wii_restart, - .halt = wii_halt, - .init_IRQ = wii_pic_probe, - .get_irq = flipper_pic_get_irq, - .calibrate_decr = generic_calibrate_decr, - .progress = udbg_progress, - .machine_shutdown = wii_shutdown, -}; - static const struct of_device_id wii_of_bus[] = { { .compatible = "nintendo,hollywood", }, { }, @@ -200,3 +187,15 @@ static int __init wii_device_probe(void) } device_initcall(wii_device_probe); +define_machine(wii) { + .name = "wii", + .probe = wii_probe, + .setup_arch = wii_setup_arch, + .restart = wii_restart, + .halt = wii_halt, + .init_IRQ = wii_pic_probe, + .get_irq = flipper_pic_get_irq, + .calibrate_decr = generic_calibrate_decr, + .progress = udbg_progress, + .machine_shutdown = wii_shutdown, +}; diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c index af309ee99114..9d4ecd292255 100644 --- a/arch/powerpc/platforms/powermac/bootx_init.c +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -108,7 +108,7 @@ static void * __init bootx_early_getprop(unsigned long base, #define dt_push_token(token, mem) \ do { \ - *(mem) = _ALIGN_UP(*(mem),4); \ + *(mem) = ALIGN(*(mem),4); \ *((u32 *)*(mem)) = token; \ *(mem) += 4; \ } while(0) @@ -150,7 +150,7 @@ static void __init bootx_dt_add_prop(char *name, void *data, int size, /* push property content */ if (size && data) { memcpy((void *)*mem_end, data, size); - *mem_end = _ALIGN_UP(*mem_end + size, 4); + *mem_end = ALIGN(*mem_end + size, 4); } } @@ -303,7 +303,7 @@ static void __init bootx_scan_dt_build_struct(unsigned long base, *lp++ = *p; } *lp = 0; - *mem_end = _ALIGN_UP((unsigned long)lp + 1, 4); + *mem_end = ALIGN((unsigned long)lp + 1, 4); /* get and store all properties */ while (*ppp) { @@ -356,11 +356,11 @@ static unsigned long __init bootx_flatten_dt(unsigned long start) /* Start using memory after the big blob passed by BootX, get * some space for the header */ - mem_start = mem_end = _ALIGN_UP(((unsigned long)bi) + start, 4); + mem_start = mem_end = ALIGN(((unsigned long)bi) + start, 4); DBG("Boot params header at: %x\n", mem_start); hdr = (struct boot_param_header *)mem_start; mem_end += sizeof(struct boot_param_header); - rsvmap = (u64 *)(_ALIGN_UP(mem_end, 8)); + rsvmap = (u64 *)(ALIGN(mem_end, 8)); hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - mem_start; mem_end = ((unsigned long)rsvmap) + 8 * sizeof(u64); @@ -386,7 +386,7 @@ static unsigned long __init bootx_flatten_dt(unsigned long start) hdr->dt_strings_size = bootx_dt_strend - bootx_dt_strbase; /* Build structure */ - mem_end = _ALIGN(mem_end, 16); + mem_end = ALIGN(mem_end, 16); DBG("Building device tree structure at: %x\n", mem_end); hdr->off_dt_struct = mem_end - mem_start; bootx_scan_dt_build_struct(base, 4, &mem_end); @@ -404,7 +404,7 @@ static unsigned long __init bootx_flatten_dt(unsigned long start) * also bump mem_reserve_cnt to cause further reservations to * fail since it's too late. */ - mem_end = _ALIGN(mem_end, PAGE_SIZE); + mem_end = ALIGN(mem_end, PAGE_SIZE); DBG("End of boot params: %x\n", mem_end); rsvmap[0] = mem_start; rsvmap[1] = mem_end; diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S index da69e0fcb4f1..ced225415486 100644 --- a/arch/powerpc/platforms/powermac/cache.S +++ b/arch/powerpc/platforms/powermac/cache.S @@ -184,6 +184,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) mtlr r10 blr +_ASM_NOKPROBE_SYMBOL(flush_disable_75x) /* This code is for 745x processors */ flush_disable_745x: @@ -351,4 +352,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR) mtmsr r11 /* restore DR and EE */ isync blr +_ASM_NOKPROBE_SYMBOL(flush_disable_745x) #endif /* CONFIG_PPC_BOOK3S_32 */ diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index dc7a5bae8f1c..853ccc4480e2 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -55,7 +55,7 @@ struct chrp_header { u8 cksum; u16 len; char name[12]; - u8 data[0]; + u8 data[]; }; struct core99_header { diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index bd6085b470b7..f9a680fdd9c4 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -244,7 +244,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) mtmsr r2 isync b 1b - +_ASM_NOKPROBE_SYMBOL(low_cpu_die) /* * Here is the resume code. */ @@ -282,6 +282,7 @@ _GLOBAL(core99_wake_up) lwz r1,0(r3) /* Pass thru to older resume code ... */ +_ASM_NOKPROBE_SYMBOL(core99_wake_up) /* * Here is the resume code for older machines. * r1 has the physical address of SL_PC(sp). @@ -429,6 +430,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) lwz r0,4(r1) mtlr r0 blr +_ASM_NOKPROBE_SYMBOL(grackle_wake_up) turn_on_mmu: mflr r4 @@ -438,6 +440,7 @@ turn_on_mmu: sync isync rfi +_ASM_NOKPROBE_SYMBOL(turn_on_mmu) #endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */ diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index be2ab5b11e57..9969c07035b6 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -49,6 +49,7 @@ #include <asm/keylargo.h> #include <asm/pmac_low_i2c.h> #include <asm/pmac_pfunc.h> +#include <asm/inst.h> #include "pmac.h" @@ -813,7 +814,7 @@ static int smp_core99_kick_cpu(int nr) * b __secondary_start_pmac_0 + nr*8 */ target = (unsigned long) __secondary_start_pmac_0 + nr * 8; - patch_branch(vector, target, BRANCH_SET_LINK); + patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK); /* Put some life in our friend */ pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0); @@ -826,7 +827,7 @@ static int smp_core99_kick_cpu(int nr) mdelay(1); /* Restore our exception vector */ - patch_instruction(vector, save_vector); + patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector)); local_irq_restore(flags); if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index c0f8120045c3..fe3f0fb5aeca 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -17,7 +17,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o obj-$(CONFIG_OPAL_PRD) += opal-prd.o obj-$(CONFIG_PERF_EVENTS) += opal-imc.o obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o -obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o +obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o vas-api.o obj-$(CONFIG_OCXL_BASE) += ocxl.o obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 78599bca66c2..2dd467383a88 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -1270,7 +1270,7 @@ static int pnv_parse_cpuidle_dt(void) /* Read residencies */ if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns", temp_u32, nr_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); rc = -EINVAL; goto out; } diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index b95b9e3c4c98..abeaa533b976 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -15,6 +15,7 @@ #include <asm/debugfs.h> #include <asm/powernv.h> +#include <asm/ppc-pci.h> #include <asm/opal.h> #include "pci.h" @@ -425,9 +426,10 @@ static void pnv_comp_attach_table_group(struct npu_comp *npucomp, ++npucomp->pe_num; } -struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe) +static struct iommu_table_group * + pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe) { - struct iommu_table_group *table_group; + struct iommu_table_group *compound_group; struct npu_comp *npucomp; struct pci_dev *gpdev = NULL; struct pci_controller *hose; @@ -446,39 +448,52 @@ struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe) hose = pci_bus_to_host(npdev->bus); if (hose->npu) { - table_group = &hose->npu->npucomp.table_group; - - if (!table_group->group) { - table_group->ops = &pnv_npu_peers_ops; - iommu_register_group(table_group, - hose->global_number, - pe->pe_number); - } + /* P9 case: compound group is per-NPU (all gpus, all links) */ + npucomp = &hose->npu->npucomp; } else { - /* Create a group for 1 GPU and attached NPUs for POWER8 */ - pe->npucomp = kzalloc(sizeof(*pe->npucomp), GFP_KERNEL); - table_group = &pe->npucomp->table_group; - table_group->ops = &pnv_npu_peers_ops; - iommu_register_group(table_group, hose->global_number, - pe->pe_number); + /* P8 case: Compound group is per-GPU (1 gpu, 2 links) */ + npucomp = pe->npucomp = kzalloc(sizeof(*npucomp), GFP_KERNEL); } - /* Steal capabilities from a GPU PE */ - table_group->max_dynamic_windows_supported = - pe->table_group.max_dynamic_windows_supported; - table_group->tce32_start = pe->table_group.tce32_start; - table_group->tce32_size = pe->table_group.tce32_size; - table_group->max_levels = pe->table_group.max_levels; - if (!table_group->pgsizes) - table_group->pgsizes = pe->table_group.pgsizes; + compound_group = &npucomp->table_group; + if (!compound_group->group) { + compound_group->ops = &pnv_npu_peers_ops; + iommu_register_group(compound_group, hose->global_number, + pe->pe_number); - npucomp = container_of(table_group, struct npu_comp, table_group); + /* Steal capabilities from a GPU PE */ + compound_group->max_dynamic_windows_supported = + pe->table_group.max_dynamic_windows_supported; + compound_group->tce32_start = pe->table_group.tce32_start; + compound_group->tce32_size = pe->table_group.tce32_size; + compound_group->max_levels = pe->table_group.max_levels; + if (!compound_group->pgsizes) + compound_group->pgsizes = pe->table_group.pgsizes; + } + + /* + * The gpu would have been added to the iommu group that's created + * for the PE. Pull it out now. + */ + iommu_del_device(&gpdev->dev); + + /* + * I'm not sure this is strictly required, but it's probably a good idea + * since the table_group for the PE is going to be attached to the + * compound table group. If we leave the PE's iommu group active then + * we might have the same table_group being modifiable via two sepeate + * iommu groups. + */ + iommu_group_put(pe->table_group.group); + + /* now put the GPU into the compound group */ pnv_comp_attach_table_group(npucomp, pe); + iommu_add_device(compound_group, &gpdev->dev); - return table_group; + return compound_group; } -struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe) +static struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe) { struct iommu_table_group *table_group; struct npu_comp *npucomp; @@ -521,6 +536,54 @@ struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe) return table_group; } + +void pnv_pci_npu_setup_iommu_groups(void) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + struct pnv_ioda_pe *pe; + + /* + * For non-nvlink devices the IOMMU group is registered when the PE is + * configured and devices are added to the group when the per-device + * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is + * only initialise for "normal" IODA PHBs. + * + * For NVLink devices we need to ensure the NVLinks and the GPU end up + * in the same IOMMU group, so that's handled here. + */ + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + + if (phb->type == PNV_PHB_IODA2) + list_for_each_entry(pe, &phb->ioda.pe_list, list) + pnv_try_setup_npu_table_group(pe); + } + + /* + * Now we have all PHBs discovered, time to add NPU devices to + * the corresponding IOMMU groups. + */ + list_for_each_entry(hose, &hose_list, list_node) { + unsigned long pgsizes; + + phb = hose->private_data; + + if (phb->type != PNV_PHB_NPU_NVLINK) + continue; + + pgsizes = pnv_ioda_parse_tce_sizes(phb); + list_for_each_entry(pe, &phb->ioda.pe_list, list) { + /* + * IODA2 bridges get this set up from + * pci_controller_ops::setup_bridge but NPU bridges + * do not have this hook defined so we do it here. + */ + pe->table_group.pgsizes = pgsizes; + pnv_npu_compound_attach(pe); + } + } +} #endif /* CONFIG_IOMMU_API */ int pnv_npu2_init(struct pci_controller *hose) diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index d361d37d975f..9a360ced663b 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -671,7 +671,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) * Firmware supports 32-bit field for size. Align it to PAGE_SIZE * and request firmware to copy multiple kernel boot memory regions. */ - fadump_conf->max_copy_size = _ALIGN_DOWN(U32_MAX, PAGE_SIZE); + fadump_conf->max_copy_size = ALIGN_DOWN(U32_MAX, PAGE_SIZE); /* * Check if dump has been initiated on last reboot. diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 2b3dfd0b6cdd..d95954ad4c0a 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -811,6 +811,10 @@ static int opal_add_one_export(struct kobject *parent, const char *export_name, goto out; attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) { + rc = -ENOMEM; + goto out; + } name = kstrdup(export_name, GFP_KERNEL); if (!name) { rc = -ENOMEM; diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index 5dc6847d5f4c..f923359d8afc 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -17,6 +17,34 @@ #include <asm/tce.h> #include "pci.h" +unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) +{ + struct pci_controller *hose = phb->hose; + struct device_node *dn = hose->dn; + unsigned long mask = 0; + int i, rc, count; + u32 val; + + count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes"); + if (count <= 0) { + mask = SZ_4K | SZ_64K; + /* Add 16M for POWER8 by default */ + if (cpu_has_feature(CPU_FTR_ARCH_207S) && + !cpu_has_feature(CPU_FTR_ARCH_300)) + mask |= SZ_16M | SZ_256M; + return mask; + } + + for (i = 0; i < count; i++) { + rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes", + i, &val); + if (rc == 0) + mask |= 1ULL << val; + } + + return mask; +} + void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned int page_shift) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 57d3a6af1d52..73a63efcf855 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -51,6 +51,7 @@ static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK", "NPU_OCAPI" }; static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); +static void pnv_pci_configure_bus(struct pci_bus *bus); void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...) @@ -264,8 +265,8 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, if (!r->parent || !pnv_pci_is_m64(phb, r)) continue; - start = _ALIGN_DOWN(r->start - base, sgsz); - end = _ALIGN_UP(r->end - base, sgsz); + start = ALIGN_DOWN(r->start - base, sgsz); + end = ALIGN(r->end - base, sgsz); for (segno = start / sgsz; segno < end / sgsz; segno++) { if (pe_bitmap) set_bit(segno, pe_bitmap); @@ -361,7 +362,7 @@ static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) return NULL; /* Allocate bitmap */ - size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); + size = ALIGN(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); pe_alloc = kzalloc(size, GFP_KERNEL); if (!pe_alloc) { pr_warn("%s: Out of memory !\n", @@ -660,6 +661,16 @@ static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) return state; } +struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn) +{ + int pe_number = phb->ioda.pe_rmap[bdfn]; + + if (pe_number == IODA_INVALID_PE) + return NULL; + + return &phb->ioda.pe_array[pe_number]; +} + struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); @@ -1110,34 +1121,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) return pe; } -static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - struct pci_dn *pdn = pci_get_pdn(dev); - - if (pdn == NULL) { - pr_warn("%s: No device node associated with device !\n", - pci_name(dev)); - continue; - } - - /* - * In partial hotplug case, the PCI device might be still - * associated with the PE and needn't attach it to the PE - * again. - */ - if (pdn->pe_number != IODA_INVALID_PE) - continue; - - pe->device_count++; - pdn->pe_number = pe->pe_number; - if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) - pnv_ioda_setup_same_PE(dev->subordinate, pe); - } -} - /* * There're 2 types of PCI bus sensitive PEs: One that is compromised of * single PCI bus. Another one that contains the primary PCI bus and its @@ -1156,15 +1139,13 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) * We should reuse it instead of allocating a new one. */ pe_num = phb->ioda.pe_rmap[bus->number << 8]; - if (pe_num != IODA_INVALID_PE) { + if (WARN_ON(pe_num != IODA_INVALID_PE)) { pe = &phb->ioda.pe_array[pe_num]; - pnv_ioda_setup_same_PE(bus, pe); return NULL; } /* PE number for root bus should have been reserved */ - if (pci_is_root_bus(bus) && - phb->ioda.root_pe_idx != IODA_INVALID_PE) + if (pci_is_root_bus(bus)) pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx]; /* Check if PE is determined by M64 */ @@ -1202,9 +1183,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) return NULL; } - /* Associate it with all child devices */ - pnv_ioda_setup_same_PE(bus, pe); - /* Put PE to the list */ list_add_tail(&pe->list, &phb->ioda.pe_list); @@ -1288,7 +1266,7 @@ static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus) pnv_ioda_setup_npu_PE(pdev); } -static void pnv_pci_ioda_setup_PEs(void) +static void pnv_pci_ioda_setup_nvlink(void) { struct pci_controller *hose; struct pnv_phb *phb; @@ -1312,6 +1290,11 @@ static void pnv_pci_ioda_setup_PEs(void) list_for_each_entry(pe, &phb->ioda.pe_list, list) pnv_npu2_map_lpar(pe, MSR_DR | MSR_PR | MSR_HV); } + +#ifdef CONFIG_IOMMU_API + /* setup iommu groups so we can do nvlink pass-thru */ + pnv_pci_npu_setup_iommu_groups(); +#endif } #ifdef CONFIG_PCI_IOV @@ -1550,11 +1533,6 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev) static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); -#ifdef CONFIG_IOMMU_API -static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe, - struct iommu_table_group *table_group, struct pci_bus *bus); - -#endif static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) { struct pci_bus *bus; @@ -1619,11 +1597,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) } pnv_pci_ioda2_setup_dma_pe(phb, pe); -#ifdef CONFIG_IOMMU_API - iommu_register_group(&pe->table_group, - pe->phb->hose->global_number, pe->pe_number); - pnv_ioda_setup_bus_iommu_group(pe, &pe->table_group, NULL); -#endif } } @@ -1767,24 +1740,39 @@ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev) struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; - /* - * The function can be called while the PE# - * hasn't been assigned. Do nothing for the - * case. - */ - if (!pdn || pdn->pe_number == IODA_INVALID_PE) - return; + /* Check if the BDFN for this device is associated with a PE yet */ + pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8)); + if (!pe) { + /* VF PEs should be pre-configured in pnv_pci_sriov_enable() */ + if (WARN_ON(pdev->is_virtfn)) + return; + + pnv_pci_configure_bus(pdev->bus); + pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8)); + pci_info(pdev, "Configured PE#%x\n", pe ? pe->pe_number : 0xfffff); + + + /* + * If we can't setup the IODA PE something has gone horribly + * wrong and we can't enable DMA for the device. + */ + if (WARN_ON(!pe)) + return; + } else { + pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number); + } + + if (pdn) + pdn->pe_number = pe->pe_number; + pe->device_count++; - pe = &phb->ioda.pe_array[pdn->pe_number]; WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); pdev->dev.archdata.dma_offset = pe->tce_bypass_base; set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); - /* - * Note: iommu_add_device() will fail here as - * for physical PE: the device is already added by now; - * for virtual PE: sysfs entries are not ready yet and - * tce_iommu_bus_notifier will add the device to a group later. - */ + + /* PEs with a DMA weight of zero won't have a group */ + if (pe->table_group.group) + iommu_add_device(&pe->table_group, &pdev->dev); } /* @@ -2297,9 +2285,6 @@ found: pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; iommu_init_table(tbl, phb->hose->node, 0, 0); - if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) - pnv_ioda_setup_bus_dma(pe, pe->pbus); - return; fail: /* XXX Failure: Try to fallback to 64-bit only ? */ @@ -2537,7 +2522,7 @@ unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, direct_table_size = 1UL << table_shift; for ( ; levels; --levels) { - bytes += _ALIGN_UP(tce_table_size, direct_table_size); + bytes += ALIGN(tce_table_size, direct_table_size); tce_table_size /= direct_table_size; tce_table_size <<= 3; @@ -2596,137 +2581,8 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = { .take_ownership = pnv_ioda2_take_ownership, .release_ownership = pnv_ioda2_release_ownership, }; - -static void pnv_ioda_setup_bus_iommu_group_add_devices(struct pnv_ioda_pe *pe, - struct iommu_table_group *table_group, - struct pci_bus *bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - iommu_add_device(table_group, &dev->dev); - - if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) - pnv_ioda_setup_bus_iommu_group_add_devices(pe, - table_group, dev->subordinate); - } -} - -static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe, - struct iommu_table_group *table_group, struct pci_bus *bus) -{ - - if (pe->flags & PNV_IODA_PE_DEV) - iommu_add_device(table_group, &pe->pdev->dev); - - if ((pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) || bus) - pnv_ioda_setup_bus_iommu_group_add_devices(pe, table_group, - bus); -} - -static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb); - -static void pnv_pci_ioda_setup_iommu_api(void) -{ - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - - /* - * There are 4 types of PEs: - * - PNV_IODA_PE_BUS: a downstream port with an adapter, - * created from pnv_pci_setup_bridge(); - * - PNV_IODA_PE_BUS_ALL: a PCI-PCIX bridge with devices behind it, - * created from pnv_pci_setup_bridge(); - * - PNV_IODA_PE_VF: a SRIOV virtual function, - * created from pnv_pcibios_sriov_enable(); - * - PNV_IODA_PE_DEV: an NPU or OCAPI device, - * created from pnv_pci_ioda_fixup(). - * - * Normally a PE is represented by an IOMMU group, however for - * devices with side channels the groups need to be more strict. - */ - list_for_each_entry(hose, &hose_list, list_node) { - phb = hose->private_data; - - if (phb->type == PNV_PHB_NPU_NVLINK || - phb->type == PNV_PHB_NPU_OCAPI) - continue; - - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - struct iommu_table_group *table_group; - - table_group = pnv_try_setup_npu_table_group(pe); - if (!table_group) { - if (!pnv_pci_ioda_pe_dma_weight(pe)) - continue; - - table_group = &pe->table_group; - iommu_register_group(&pe->table_group, - pe->phb->hose->global_number, - pe->pe_number); - } - pnv_ioda_setup_bus_iommu_group(pe, table_group, - pe->pbus); - } - } - - /* - * Now we have all PHBs discovered, time to add NPU devices to - * the corresponding IOMMU groups. - */ - list_for_each_entry(hose, &hose_list, list_node) { - unsigned long pgsizes; - - phb = hose->private_data; - - if (phb->type != PNV_PHB_NPU_NVLINK) - continue; - - pgsizes = pnv_ioda_parse_tce_sizes(phb); - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - /* - * IODA2 bridges get this set up from - * pci_controller_ops::setup_bridge but NPU bridges - * do not have this hook defined so we do it here. - */ - pe->table_group.pgsizes = pgsizes; - pnv_npu_compound_attach(pe); - } - } -} -#else /* !CONFIG_IOMMU_API */ -static void pnv_pci_ioda_setup_iommu_api(void) { }; #endif -static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) -{ - struct pci_controller *hose = phb->hose; - struct device_node *dn = hose->dn; - unsigned long mask = 0; - int i, rc, count; - u32 val; - - count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes"); - if (count <= 0) { - mask = SZ_4K | SZ_64K; - /* Add 16M for POWER8 by default */ - if (cpu_has_feature(CPU_FTR_ARCH_207S) && - !cpu_has_feature(CPU_FTR_ARCH_300)) - mask |= SZ_16M | SZ_256M; - return mask; - } - - for (i = 0; i < count; i++) { - rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes", - i, &val); - if (rc == 0) - mask |= 1ULL << val; - } - - return mask; -} - static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { @@ -2749,16 +2605,16 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, IOMMU_TABLE_GROUP_MAX_TABLES; pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS; pe->table_group.pgsizes = pnv_ioda_parse_tce_sizes(phb); -#ifdef CONFIG_IOMMU_API - pe->table_group.ops = &pnv_pci_ioda2_ops; -#endif rc = pnv_pci_ioda2_setup_default_config(pe); if (rc) return; - if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) - pnv_ioda_setup_bus_dma(pe, pe->pbus); +#ifdef CONFIG_IOMMU_API + pe->table_group.ops = &pnv_pci_ioda2_ops; + iommu_register_group(&pe->table_group, phb->hose->global_number, + pe->pe_number); +#endif } int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq) @@ -3220,8 +3076,7 @@ static void pnv_pci_enable_bridges(void) static void pnv_pci_ioda_fixup(void) { - pnv_pci_ioda_setup_PEs(); - pnv_pci_ioda_setup_iommu_api(); + pnv_pci_ioda_setup_nvlink(); pnv_pci_ioda_create_dbgfs(); pnv_pci_enable_bridges(); @@ -3333,28 +3188,18 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus, } } -static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type) +static void pnv_pci_configure_bus(struct pci_bus *bus) { struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; struct pci_dev *bridge = bus->self; struct pnv_ioda_pe *pe; - bool all = (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE); - - /* Extend bridge's windows if necessary */ - pnv_pci_fixup_bridge_resources(bus, type); + bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE); - /* The PE for root bus should be realized before any one else */ - if (!phb->ioda.root_pe_populated) { - pe = pnv_ioda_setup_bus_PE(phb->hose->bus, false); - if (pe) { - phb->ioda.root_pe_idx = pe->pe_number; - phb->ioda.root_pe_populated = true; - } - } + dev_info(&bus->dev, "Configuring PE for bus\n"); /* Don't assign PE to PCI bus, which doesn't have subordinate devices */ - if (list_empty(&bus->devices)) + if (WARN_ON(list_empty(&bus->devices))) return; /* Reserve PEs according to used M64 resources */ @@ -3599,6 +3444,8 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) struct pnv_phb *phb = pe->phb; struct pnv_ioda_pe *slave, *tmp; + pe_info(pe, "Releasing PE\n"); + mutex_lock(&phb->ioda.pe_list_mutex); list_del(&pe->list); mutex_unlock(&phb->ioda.pe_list_mutex); @@ -3633,11 +3480,10 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) * that it can be populated again in PCI hot add path. The PE * shouldn't be destroyed as it's the global reserved resource. */ - if (phb->ioda.root_pe_populated && - phb->ioda.root_pe_idx == pe->pe_number) - phb->ioda.root_pe_populated = false; - else - pnv_ioda_free_pe(pe); + if (phb->ioda.root_pe_idx == pe->pe_number) + return; + + pnv_ioda_free_pe(pe); } static void pnv_pci_release_device(struct pci_dev *pdev) @@ -3715,7 +3561,7 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .enable_device_hook = pnv_pci_enable_device_hook, .release_device = pnv_pci_release_device, .window_alignment = pnv_pci_window_alignment, - .setup_bridge = pnv_pci_setup_bridge, + .setup_bridge = pnv_pci_fixup_bridge_resources, .reset_secondary_bus = pnv_pci_reset_secondary_bus, .shutdown = pnv_pci_ioda_shutdown, }; @@ -3745,6 +3591,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, struct pnv_phb *phb; unsigned long size, m64map_off, m32map_off, pemap_off; unsigned long iomap_off = 0, dma32map_off = 0; + struct pnv_ioda_pe *root_pe; struct resource r; const __be64 *prop64; const __be32 *prop32; @@ -3863,7 +3710,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, PNV_IODA1_DMA32_SEGSIZE; /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ - size = _ALIGN_UP(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8, + size = ALIGN(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8, sizeof(unsigned long)); m64map_off = size; size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]); @@ -3912,7 +3759,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->ioda.root_pe_idx = phb->ioda.reserved_pe_idx - 1; pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx); } else { - phb->ioda.root_pe_idx = IODA_INVALID_PE; + /* otherwise just allocate one */ + root_pe = pnv_ioda_alloc_pe(phb); + phb->ioda.root_pe_idx = root_pe->pe_number; } INIT_LIST_HEAD(&phb->ioda.pe_list); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 5bf818246339..091fe1cf386b 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -955,28 +955,8 @@ static int pnv_tce_iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct pci_dev *pdev; - struct pci_dn *pdn; - struct pnv_ioda_pe *pe; - struct pci_controller *hose; - struct pnv_phb *phb; switch (action) { - case BUS_NOTIFY_ADD_DEVICE: - pdev = to_pci_dev(dev); - pdn = pci_get_pdn(pdev); - hose = pci_bus_to_host(pdev->bus); - phb = hose->private_data; - - WARN_ON_ONCE(!phb); - if (!pdn || pdn->pe_number == IODA_INVALID_PE || !phb) - return 0; - - pe = &phb->ioda.pe_array[pdn->pe_number]; - if (!pe->table_group.group) - return 0; - iommu_add_device(&pe->table_group, dev); - return 0; case BUS_NOTIFY_DEL_DEVICE: iommu_del_device(dev); return 0; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index d3bbdeab3a32..51c254f2f3cb 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -33,6 +33,24 @@ enum pnv_phb_model { #define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */ #define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */ +/* + * A brief note on PNV_IODA_PE_BUS_ALL + * + * This is needed because of the behaviour of PCIe-to-PCI bridges. The PHB uses + * the Requester ID field of the PCIe request header to determine the device + * (and PE) that initiated a DMA. In legacy PCI individual memory read/write + * requests aren't tagged with the RID. To work around this the PCIe-to-PCI + * bridge will use (secondary_bus_no << 8) | 0x00 as the RID on the PCIe side. + * + * PCIe-to-X bridges have a similar issue even though PCI-X requests also have + * a RID in the transaction header. The PCIe-to-X bridge is permitted to "take + * ownership" of a transaction by a PCI-X device when forwarding it to the PCIe + * side of the bridge. + * + * To work around these problems we use the BUS_ALL flag since every subordinate + * bus of the bridge should go into the same PE. + */ + /* Indicates operations are frozen for a PE: MMIO in PESTA & DMA in PESTB. */ #define PNV_IODA_STOPPED_STATE 0x8000000000000000 @@ -118,7 +136,6 @@ struct pnv_phb { unsigned int total_pe_num; unsigned int reserved_pe_idx; unsigned int root_pe_idx; - bool root_pe_populated; /* 32-bit MMIO window */ unsigned int m32_size; @@ -190,6 +207,7 @@ extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); +extern struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn); extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, @@ -209,11 +227,7 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, /* Nvlink functions */ extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass); extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); -extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe); -extern struct iommu_table_group *pnv_try_setup_npu_table_group( - struct pnv_ioda_pe *pe); -extern struct iommu_table_group *pnv_npu_compound_attach( - struct pnv_ioda_pe *pe); +extern void pnv_pci_npu_setup_iommu_groups(void); /* pci-ioda-tce.c */ #define POWERNV_IOMMU_DEFAULT_LEVELS 2 @@ -244,4 +258,6 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned int page_shift); +extern unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb); + #endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/vas-api.c b/arch/powerpc/platforms/powernv/vas-api.c new file mode 100644 index 000000000000..98ed5d8c5441 --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas-api.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * VAS user space API for its accelerators (Only NX-GZIP is supported now) + * Copyright (C) 2019 Haren Myneni, IBM Corp + */ + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <asm/vas.h> +#include <uapi/asm/vas-api.h> +#include "vas.h" + +/* + * The driver creates the device node that can be used as follows: + * For NX-GZIP + * + * fd = open("/dev/crypto/nx-gzip", O_RDWR); + * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr); + * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL). + * vas_copy(&crb, 0, 1); + * vas_paste(paste_addr, 0, 1); + * close(fd) or exit process to close window. + * + * where "vas_copy" and "vas_paste" are defined in copy-paste.h. + * copy/paste returns to the user space directly. So refer NX hardware + * documententation for exact copy/paste usage and completion / error + * conditions. + */ + +/* + * Wrapper object for the nx-gzip device - there is just one instance of + * this node for the whole system. + */ +static struct coproc_dev { + struct cdev cdev; + struct device *device; + char *name; + dev_t devt; + struct class *class; + enum vas_cop_type cop_type; +} coproc_device; + +struct coproc_instance { + struct coproc_dev *coproc; + struct vas_window *txwin; +}; + +static char *coproc_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev)); +} + +static int coproc_open(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst; + + cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL); + if (!cp_inst) + return -ENOMEM; + + cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev, + cdev); + fp->private_data = cp_inst; + + return 0; +} + +static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) +{ + void __user *uptr = (void __user *)arg; + struct vas_tx_win_attr txattr = {}; + struct vas_tx_win_open_attr uattr; + struct coproc_instance *cp_inst; + struct vas_window *txwin; + int rc, vasid; + + cp_inst = fp->private_data; + + /* + * One window for file descriptor + */ + if (cp_inst->txwin) + return -EEXIST; + + rc = copy_from_user(&uattr, uptr, sizeof(uattr)); + if (rc) { + pr_err("%s(): copy_from_user() returns %d\n", __func__, rc); + return -EFAULT; + } + + if (uattr.version != 1) { + pr_err("Invalid version\n"); + return -EINVAL; + } + + vasid = uattr.vas_id; + + vas_init_tx_win_attr(&txattr, cp_inst->coproc->cop_type); + + txattr.lpid = mfspr(SPRN_LPID); + txattr.pidr = mfspr(SPRN_PID); + txattr.user_win = true; + txattr.rsvd_txbuf_count = false; + txattr.pswid = false; + + pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr, + mfspr(SPRN_PID)); + + txwin = vas_tx_win_open(vasid, cp_inst->coproc->cop_type, &txattr); + if (IS_ERR(txwin)) { + pr_err("%s() vas_tx_win_open() failed, %ld\n", __func__, + PTR_ERR(txwin)); + return PTR_ERR(txwin); + } + + cp_inst->txwin = txwin; + + return 0; +} + +static int coproc_release(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst = fp->private_data; + + if (cp_inst->txwin) { + vas_win_close(cp_inst->txwin); + cp_inst->txwin = NULL; + } + + kfree(cp_inst); + fp->private_data = NULL; + + /* + * We don't know here if user has other receive windows + * open, so we can't really call clear_thread_tidr(). + * So, once the process calls set_thread_tidr(), the + * TIDR value sticks around until process exits, resulting + * in an extra copy in restore_sprs(). + */ + + return 0; +} + +static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) +{ + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + unsigned long pfn; + u64 paste_addr; + pgprot_t prot; + int rc; + + txwin = cp_inst->txwin; + + if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { + pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__, + (vma->vm_end - vma->vm_start), PAGE_SIZE); + return -EINVAL; + } + + /* Ensure instance has an open send window */ + if (!txwin) { + pr_err("%s(): No send window open?\n", __func__); + return -EINVAL; + } + + vas_win_paste_addr(txwin, &paste_addr, NULL); + pfn = paste_addr >> PAGE_SHIFT; + + /* flags, page_prot from cxl_mmap(), except we want cachable */ + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); + + prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY); + + rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, + vma->vm_end - vma->vm_start, prot); + + pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, + paste_addr, vma->vm_start, rc); + + return rc; +} + +static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case VAS_TX_WIN_OPEN: + return coproc_ioc_tx_win_open(fp, arg); + default: + return -EINVAL; + } +} + +static struct file_operations coproc_fops = { + .open = coproc_open, + .release = coproc_release, + .mmap = coproc_mmap, + .unlocked_ioctl = coproc_ioctl, +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + int rc = -EINVAL; + dev_t devno; + + rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name); + if (rc) { + pr_err("Unable to allocate coproc major number: %i\n", rc); + return rc; + } + + pr_devel("%s device allocated, dev [%i,%i]\n", name, + MAJOR(coproc_device.devt), MINOR(coproc_device.devt)); + + coproc_device.class = class_create(mod, name); + if (IS_ERR(coproc_device.class)) { + rc = PTR_ERR(coproc_device.class); + pr_err("Unable to create %s class %d\n", name, rc); + goto err_class; + } + coproc_device.class->devnode = coproc_devnode; + coproc_device.cop_type = cop_type; + + coproc_fops.owner = mod; + cdev_init(&coproc_device.cdev, &coproc_fops); + + devno = MKDEV(MAJOR(coproc_device.devt), 0); + rc = cdev_add(&coproc_device.cdev, devno, 1); + if (rc) { + pr_err("cdev_add() failed %d\n", rc); + goto err_cdev; + } + + coproc_device.device = device_create(coproc_device.class, NULL, + devno, NULL, name, MINOR(devno)); + if (IS_ERR(coproc_device.device)) { + rc = PTR_ERR(coproc_device.device); + pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc); + goto err; + } + + pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno), + MINOR(devno)); + + return 0; + +err: + cdev_del(&coproc_device.cdev); +err_cdev: + class_destroy(coproc_device.class); +err_class: + unregister_chrdev_region(coproc_device.devt, 1); + return rc; +} +EXPORT_SYMBOL_GPL(vas_register_coproc_api); + +void vas_unregister_coproc_api(void) +{ + dev_t devno; + + cdev_del(&coproc_device.cdev); + devno = MKDEV(MAJOR(coproc_device.devt), 0); + device_destroy(coproc_device.class, devno); + + class_destroy(coproc_device.class); + unregister_chrdev_region(coproc_device.devt, 1); +} +EXPORT_SYMBOL_GPL(vas_unregister_coproc_api); diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c index 44035a3d6414..41fa90d2f4ab 100644 --- a/arch/powerpc/platforms/powernv/vas-debug.c +++ b/arch/powerpc/platforms/powernv/vas-debug.c @@ -38,7 +38,7 @@ static int info_show(struct seq_file *s, void *private) seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop), window->tx_win ? "Send" : "Receive"); - seq_printf(s, "Pid : %d\n", window->pid); + seq_printf(s, "Pid : %d\n", vas_window_pid(window)); unlock: mutex_unlock(&vas_mutex); diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c new file mode 100644 index 000000000000..25db70be4c9c --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * VAS Fault handling. + * Copyright 2019, IBM Corporation + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/kthread.h> +#include <linux/sched/signal.h> +#include <linux/mmu_context.h> +#include <asm/icswx.h> + +#include "vas.h" + +/* + * The maximum FIFO size for fault window can be 8MB + * (VAS_RX_FIFO_SIZE_MAX). Using 4MB FIFO since each VAS + * instance will be having fault window. + * 8MB FIFO can be used if expects more faults for each VAS + * instance. + */ +#define VAS_FAULT_WIN_FIFO_SIZE (4 << 20) + +static void dump_crb(struct coprocessor_request_block *crb) +{ + struct data_descriptor_entry *dde; + struct nx_fault_stamp *nx; + + dde = &crb->source; + pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + dde = &crb->target; + pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + nx = &crb->stamp.nx; + pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n", + be32_to_cpu(nx->pswid), + be64_to_cpu(crb->stamp.nx.fault_storage_addr), + nx->flags, nx->fault_status); +} + +/* + * Update the CSB to indicate a translation error. + * + * User space will be polling on CSB after the request is issued. + * If NX can handle the request without any issues, it updates CSB. + * Whereas if NX encounters page fault, the kernel will handle the + * fault and update CSB with translation error. + * + * If we are unable to update the CSB means copy_to_user failed due to + * invalid csb_addr, send a signal to the process. + */ +static void update_csb(struct vas_window *window, + struct coprocessor_request_block *crb) +{ + struct coprocessor_status_block csb; + struct kernel_siginfo info; + struct task_struct *tsk; + void __user *csb_addr; + struct pid *pid; + int rc; + + /* + * NX user space windows can not be opened for task->mm=NULL + * and faults will not be generated for kernel requests. + */ + if (WARN_ON_ONCE(!window->mm || !window->user_win)) + return; + + csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); + + memset(&csb, 0, sizeof(csb)); + csb.cc = CSB_CC_TRANSLATION; + csb.ce = CSB_CE_TERMINATION; + csb.cs = 0; + csb.count = 0; + + /* + * NX operates and returns in BE format as defined CRB struct. + * So saves fault_storage_addr in BE as NX pastes in FIFO and + * expects user space to convert to CPU format. + */ + csb.address = crb->stamp.nx.fault_storage_addr; + csb.flags = 0; + + pid = window->pid; + tsk = get_pid_task(pid, PIDTYPE_PID); + /* + * Process closes send window after all pending NX requests are + * completed. In multi-thread applications, a child thread can + * open a window and can exit without closing it. May be some + * requests are pending or this window can be used by other + * threads later. We should handle faults if NX encounters + * pages faults on these requests. Update CSB with translation + * error and fault address. If csb_addr passed by user space is + * invalid, send SEGV signal to pid saved in window. If the + * child thread is not running, send the signal to tgid. + * Parent thread (tgid) will close this window upon its exit. + * + * pid and mm references are taken when window is opened by + * process (pid). So tgid is used only when child thread opens + * a window and exits without closing it. + */ + if (!tsk) { + pid = window->tgid; + tsk = get_pid_task(pid, PIDTYPE_PID); + /* + * Parent thread (tgid) will be closing window when it + * exits. So should not get here. + */ + if (WARN_ON_ONCE(!tsk)) + return; + } + + /* Return if the task is exiting. */ + if (tsk->flags & PF_EXITING) { + put_task_struct(tsk); + return; + } + + use_mm(window->mm); + rc = copy_to_user(csb_addr, &csb, sizeof(csb)); + /* + * User space polls on csb.flags (first byte). So add barrier + * then copy first byte with csb flags update. + */ + if (!rc) { + csb.flags = CSB_V; + /* Make sure update to csb.flags is visible now */ + smp_mb(); + rc = copy_to_user(csb_addr, &csb, sizeof(u8)); + } + unuse_mm(window->mm); + put_task_struct(tsk); + + /* Success */ + if (!rc) + return; + + pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n", + csb_addr, pid_vnr(pid)); + + clear_siginfo(&info); + info.si_signo = SIGSEGV; + info.si_errno = EFAULT; + info.si_code = SEGV_MAPERR; + info.si_addr = csb_addr; + + /* + * process will be polling on csb.flags after request is sent to + * NX. So generally CSB update should not fail except when an + * application passes invalid csb_addr. So an error message will + * be displayed and leave it to user space whether to ignore or + * handle this signal. + */ + rcu_read_lock(); + rc = kill_pid_info(SIGSEGV, &info, pid); + rcu_read_unlock(); + + pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, + pid_vnr(pid), rc); +} + +static void dump_fifo(struct vas_instance *vinst, void *entry) +{ + unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size; + unsigned long *fifo = entry; + int i; + + pr_err("Fault fifo size %d, Max crbs %d\n", vinst->fault_fifo_size, + vinst->fault_fifo_size / CRB_SIZE); + + /* Dump 10 CRB entries or until end of FIFO */ + pr_err("Fault FIFO Dump:\n"); + for (i = 0; i < 10*(CRB_SIZE/8) && fifo < end; i += 4, fifo += 4) { + pr_err("[%.3d, %p]: 0x%.16lx 0x%.16lx 0x%.16lx 0x%.16lx\n", + i, fifo, *fifo, *(fifo+1), *(fifo+2), *(fifo+3)); + } +} + +/* + * Process valid CRBs in fault FIFO. + * NX process user space requests, return credit and update the status + * in CRB. If it encounters transalation error when accessing CRB or + * request buffers, raises interrupt on the CPU to handle the fault. + * It takes credit on fault window, updates nx_fault_stamp in CRB with + * the following information and pastes CRB in fault FIFO. + * + * pswid - window ID of the window on which the request is sent. + * fault_storage_addr - fault address + * + * It can raise a single interrupt for multiple faults. Expects OS to + * process all valid faults and return credit for each fault on user + * space and fault windows. This fault FIFO control will be done with + * credit mechanism. NX can continuously paste CRBs until credits are not + * available on fault window. Otherwise, returns with RMA_reject. + * + * Total credits available on fault window: FIFO_SIZE(4MB)/CRBS_SIZE(128) + * + */ +irqreturn_t vas_fault_thread_fn(int irq, void *data) +{ + struct vas_instance *vinst = data; + struct coprocessor_request_block *crb, *entry; + struct coprocessor_request_block buf; + struct vas_window *window; + unsigned long flags; + void *fifo; + + crb = &buf; + + /* + * VAS can interrupt with multiple page faults. So process all + * valid CRBs within fault FIFO until reaches invalid CRB. + * We use CCW[0] and pswid to validate validate CRBs: + * + * CCW[0] Reserved bit. When NX pastes CRB, CCW[0]=0 + * OS sets this bit to 1 after reading CRB. + * pswid NX assigns window ID. Set pswid to -1 after + * reading CRB from fault FIFO. + * + * We exit this function if no valid CRBs are available to process. + * So acquire fault_lock and reset fifo_in_progress to 0 before + * exit. + * In case kernel receives another interrupt with different page + * fault, interrupt handler returns with IRQ_HANDLED if + * fifo_in_progress is set. Means these new faults will be + * handled by the current thread. Otherwise set fifo_in_progress + * and return IRQ_WAKE_THREAD to wake up thread. + */ + while (true) { + spin_lock_irqsave(&vinst->fault_lock, flags); + /* + * Advance the fault fifo pointer to next CRB. + * Use CRB_SIZE rather than sizeof(*crb) since the latter is + * aligned to CRB_ALIGN (256) but the CRB written to by VAS is + * only CRB_SIZE in len. + */ + fifo = vinst->fault_fifo + (vinst->fault_crbs * CRB_SIZE); + entry = fifo; + + if ((entry->stamp.nx.pswid == cpu_to_be32(FIFO_INVALID_ENTRY)) + || (entry->ccw & cpu_to_be32(CCW0_INVALID))) { + vinst->fifo_in_progress = 0; + spin_unlock_irqrestore(&vinst->fault_lock, flags); + return IRQ_HANDLED; + } + + spin_unlock_irqrestore(&vinst->fault_lock, flags); + vinst->fault_crbs++; + if (vinst->fault_crbs == (vinst->fault_fifo_size / CRB_SIZE)) + vinst->fault_crbs = 0; + + memcpy(crb, fifo, CRB_SIZE); + entry->stamp.nx.pswid = cpu_to_be32(FIFO_INVALID_ENTRY); + entry->ccw |= cpu_to_be32(CCW0_INVALID); + /* + * Return credit for the fault window. + */ + vas_return_credit(vinst->fault_win, false); + + pr_devel("VAS[%d] fault_fifo %p, fifo %p, fault_crbs %d\n", + vinst->vas_id, vinst->fault_fifo, fifo, + vinst->fault_crbs); + + dump_crb(crb); + window = vas_pswid_to_window(vinst, + be32_to_cpu(crb->stamp.nx.pswid)); + + if (IS_ERR(window)) { + /* + * We got an interrupt about a specific send + * window but we can't find that window and we can't + * even clean it up (return credit on user space + * window). + * But we should not get here. + * TODO: Disable IRQ. + */ + dump_fifo(vinst, (void *)entry); + pr_err("VAS[%d] fault_fifo %p, fifo %p, pswid 0x%x, fault_crbs %d bad CRB?\n", + vinst->vas_id, vinst->fault_fifo, fifo, + be32_to_cpu(crb->stamp.nx.pswid), + vinst->fault_crbs); + + WARN_ON_ONCE(1); + } else { + update_csb(window, crb); + /* + * Return credit for send window after processing + * fault CRB. + */ + vas_return_credit(window, true); + } + } +} + +irqreturn_t vas_fault_handler(int irq, void *dev_id) +{ + struct vas_instance *vinst = dev_id; + irqreturn_t ret = IRQ_WAKE_THREAD; + unsigned long flags; + + /* + * NX can generate an interrupt for multiple faults. So the + * fault handler thread process all CRBs until finds invalid + * entry. In case if NX sees continuous faults, it is possible + * that the thread function entered with the first interrupt + * can execute and process all valid CRBs. + * So wake up thread only if the fault thread is not in progress. + */ + spin_lock_irqsave(&vinst->fault_lock, flags); + + if (vinst->fifo_in_progress) + ret = IRQ_HANDLED; + else + vinst->fifo_in_progress = 1; + + spin_unlock_irqrestore(&vinst->fault_lock, flags); + + return ret; +} + +/* + * Fault window is opened per VAS instance. NX pastes fault CRB in fault + * FIFO upon page faults. + */ +int vas_setup_fault_window(struct vas_instance *vinst) +{ + struct vas_rx_win_attr attr; + + vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE; + vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL); + if (!vinst->fault_fifo) { + pr_err("Unable to alloc %d bytes for fault_fifo\n", + vinst->fault_fifo_size); + return -ENOMEM; + } + + /* + * Invalidate all CRB entries. NX pastes valid entry for each fault. + */ + memset(vinst->fault_fifo, FIFO_INVALID_ENTRY, vinst->fault_fifo_size); + vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT); + + attr.rx_fifo_size = vinst->fault_fifo_size; + attr.rx_fifo = vinst->fault_fifo; + + /* + * Max creds is based on number of CRBs can fit in the FIFO. + * (fault_fifo_size/CRB_SIZE). If 8MB FIFO is used, max creds + * will be 0xffff since the receive creds field is 16bits wide. + */ + attr.wcreds_max = vinst->fault_fifo_size / CRB_SIZE; + attr.lnotify_lpid = 0; + attr.lnotify_pid = mfspr(SPRN_PID); + attr.lnotify_tid = mfspr(SPRN_PID); + + vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, + &attr); + + if (IS_ERR(vinst->fault_win)) { + pr_err("VAS: Error %ld opening FaultWin\n", + PTR_ERR(vinst->fault_win)); + kfree(vinst->fault_fifo); + return PTR_ERR(vinst->fault_win); + } + + pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n", + vinst->fault_win->winid, attr.lnotify_lpid, + attr.lnotify_pid, attr.lnotify_tid); + + return 0; +} diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 0c0d27d17976..6434f9cb5aed 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -12,6 +12,8 @@ #include <linux/log2.h> #include <linux/rcupdate.h> #include <linux/cred.h> +#include <linux/sched/mm.h> +#include <linux/mmu_context.h> #include <asm/switch_to.h> #include <asm/ppc-opcode.h> #include "vas.h" @@ -24,7 +26,7 @@ * Compute the paste address region for the window @window using the * ->paste_base_addr and ->paste_win_id_shift we got from device tree. */ -static void compute_paste_address(struct vas_window *window, u64 *addr, int *len) +void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len) { int winid; u64 base, shift; @@ -78,7 +80,7 @@ static void *map_paste_region(struct vas_window *txwin) goto free_name; txwin->paste_addr_name = name; - compute_paste_address(txwin, &start, &len); + vas_win_paste_addr(txwin, &start, &len); if (!request_mem_region(start, len, name)) { pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n", @@ -136,7 +138,7 @@ static void unmap_paste_region(struct vas_window *window) u64 busaddr_start; if (window->paste_kaddr) { - compute_paste_address(window, &busaddr_start, &len); + vas_win_paste_addr(window, &busaddr_start, &len); unmap_region(window->paste_kaddr, busaddr_start, len); window->paste_kaddr = NULL; kfree(window->paste_addr_name); @@ -373,7 +375,7 @@ int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx) init_xlate_regs(window, winctx->user_win); val = 0ULL; - val = SET_FIELD(VAS_FAULT_TX_WIN, val, 0); + val = SET_FIELD(VAS_FAULT_TX_WIN, val, winctx->fault_win_id); write_hvwc_reg(window, VREG(FAULT_TX_WIN), val); /* In PowerNV, interrupts go to HV. */ @@ -748,6 +750,8 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin, winctx->min_scope = VAS_SCOPE_LOCAL; winctx->max_scope = VAS_SCOPE_VECTORED_GROUP; + if (rxwin->vinst->virq) + winctx->irq_port = rxwin->vinst->irq_port; } static bool rx_win_args_valid(enum vas_cop_type cop, @@ -768,7 +772,7 @@ static bool rx_win_args_valid(enum vas_cop_type cop, if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX) return false; - if (attr->wcreds_max > VAS_RX_WCREDS_MAX) + if (!attr->wcreds_max) return false; if (attr->nx_win) { @@ -813,7 +817,8 @@ void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop) { memset(rxattr, 0, sizeof(*rxattr)); - if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) { + if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI || + cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) { rxattr->pin_win = true; rxattr->nx_win = true; rxattr->fault_win = false; @@ -827,9 +832,9 @@ void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop) rxattr->fault_win = true; rxattr->notify_disable = true; rxattr->rx_wcred_mode = true; - rxattr->tx_wcred_mode = true; rxattr->rx_win_ord_mode = true; - rxattr->tx_win_ord_mode = true; + rxattr->rej_no_credit = true; + rxattr->tc_mode = VAS_THRESH_DISABLED; } else if (cop == VAS_COP_TYPE_FTW) { rxattr->user_win = true; rxattr->intr_disable = true; @@ -873,9 +878,7 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, rxwin->nx_win = rxattr->nx_win; rxwin->user_win = rxattr->user_win; rxwin->cop = cop; - rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT; - if (rxattr->user_win) - rxwin->pid = task_pid_vnr(current); + rxwin->wcreds_max = rxattr->wcreds_max; init_winctx_for_rxwin(rxwin, rxattr, &winctx); init_winctx_regs(rxwin, &winctx); @@ -890,7 +893,8 @@ void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop) { memset(txattr, 0, sizeof(*txattr)); - if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) { + if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI || + cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) { txattr->rej_no_credit = false; txattr->rx_wcred_mode = true; txattr->tx_wcred_mode = true; @@ -944,13 +948,22 @@ static void init_winctx_for_txwin(struct vas_window *txwin, winctx->lpid = txattr->lpid; winctx->pidr = txattr->pidr; winctx->rx_win_id = txwin->rxwin->winid; + /* + * IRQ and fault window setup is successful. Set fault window + * for the send window so that ready to handle faults. + */ + if (txwin->vinst->virq) + winctx->fault_win_id = txwin->vinst->fault_win->winid; winctx->dma_type = VAS_DMA_TYPE_INJECT; winctx->tc_mode = txattr->tc_mode; winctx->min_scope = VAS_SCOPE_LOCAL; winctx->max_scope = VAS_SCOPE_VECTORED_GROUP; + if (txwin->vinst->virq) + winctx->irq_port = txwin->vinst->irq_port; - winctx->pswid = 0; + winctx->pswid = txattr->pswid ? txattr->pswid : + encode_pswid(txwin->vinst->vas_id, txwin->winid); } static bool tx_win_args_valid(enum vas_cop_type cop, @@ -965,9 +978,14 @@ static bool tx_win_args_valid(enum vas_cop_type cop, if (attr->wcreds_max > VAS_TX_WCREDS_MAX) return false; - if (attr->user_win && - (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count)) - return false; + if (attr->user_win) { + if (attr->rsvd_txbuf_count) + return false; + + if (cop != VAS_COP_TYPE_FTW && cop != VAS_COP_TYPE_GZIP && + cop != VAS_COP_TYPE_GZIP_HIPRI) + return false; + } return true; } @@ -1016,7 +1034,6 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, txwin->tx_win = 1; txwin->rxwin = rxwin; txwin->nx_win = txwin->rxwin->nx_win; - txwin->pid = attr->pid; txwin->user_win = attr->user_win; txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT; @@ -1040,12 +1057,59 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, } } else { /* - * A user mapping must ensure that context switch issues - * CP_ABORT for this thread. + * Interrupt hanlder or fault window setup failed. Means + * NX can not generate fault for page fault. So not + * opening for user space tx window. */ - rc = set_thread_uses_vas(); - if (rc) + if (!vinst->virq) { + rc = -ENODEV; goto free_window; + } + + /* + * Window opened by a child thread may not be closed when + * it exits. So take reference to its pid and release it + * when the window is free by parent thread. + * Acquire a reference to the task's pid to make sure + * pid will not be re-used - needed only for multithread + * applications. + */ + txwin->pid = get_task_pid(current, PIDTYPE_PID); + /* + * Acquire a reference to the task's mm. + */ + txwin->mm = get_task_mm(current); + + if (!txwin->mm) { + put_pid(txwin->pid); + pr_err("VAS: pid(%d): mm_struct is not found\n", + current->pid); + rc = -EPERM; + goto free_window; + } + + mmgrab(txwin->mm); + mmput(txwin->mm); + mm_context_add_vas_window(txwin->mm); + /* + * Process closes window during exit. In the case of + * multithread application, the child thread can open + * window and can exit without closing it. Expects parent + * thread to use and close the window. So do not need + * to take pid reference for parent thread. + */ + txwin->tgid = find_get_pid(task_tgid_vnr(current)); + /* + * Even a process that has no foreign real address mapping can + * use an unpaired COPY instruction (to no real effect). Issue + * CP_ABORT to clear any pending COPY and prevent a covert + * channel. + * + * __switch_to() will issue CP_ABORT on future context switches + * if process / thread has any open VAS window (Use + * current->mm->context.vas_windows). + */ + asm volatile(PPC_CP_ABORT); } set_vinst_win(vinst, txwin); @@ -1128,6 +1192,7 @@ static void poll_window_credits(struct vas_window *window) { u64 val; int creds, mode; + int count = 0; val = read_hvwc_reg(window, VREG(WINCTL)); if (window->tx_win) @@ -1146,10 +1211,27 @@ retry: creds = GET_FIELD(VAS_LRX_WCRED, val); } + /* + * Takes around few milliseconds to complete all pending requests + * and return credits. + * TODO: Scan fault FIFO and invalidate CRBs points to this window + * and issue CRB Kill to stop all pending requests. Need only + * if there is a bug in NX or fault handling in kernel. + */ if (creds < window->wcreds_max) { val = 0; set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(msecs_to_jiffies(10)); + count++; + /* + * Process can not close send window until all credits are + * returned. + */ + if (!(count % 1000)) + pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n", + vas_window_pid(window), window->winid, + creds, count); + goto retry; } } @@ -1163,6 +1245,7 @@ static void poll_window_busy_state(struct vas_window *window) { int busy; u64 val; + int count = 0; retry: val = read_hvwc_reg(window, VREG(WIN_STATUS)); @@ -1170,7 +1253,16 @@ retry: if (busy) { val = 0; set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(msecs_to_jiffies(5)); + schedule_timeout(msecs_to_jiffies(10)); + count++; + /* + * Takes around few milliseconds to process all pending + * requests. + */ + if (!(count % 1000)) + pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n", + vas_window_pid(window), window->winid, count); + goto retry; } } @@ -1235,22 +1327,118 @@ int vas_win_close(struct vas_window *window) unmap_paste_region(window); - clear_vinst_win(window); - poll_window_busy_state(window); unpin_close_window(window); poll_window_credits(window); + clear_vinst_win(window); + poll_window_castout(window); /* if send window, drop reference to matching receive window */ - if (window->tx_win) + if (window->tx_win) { + if (window->user_win) { + /* Drop references to pid and mm */ + put_pid(window->pid); + if (window->mm) { + mm_context_remove_vas_window(window->mm); + mmdrop(window->mm); + } + } put_rx_win(window->rxwin); + } vas_window_free(window); return 0; } EXPORT_SYMBOL_GPL(vas_win_close); + +/* + * Return credit for the given window. + * Send windows and fault window uses credit mechanism as follows: + * + * Send windows: + * - The default number of credits available for each send window is + * 1024. It means 1024 requests can be issued asynchronously at the + * same time. If the credit is not available, that request will be + * returned with RMA_Busy. + * - One credit is taken when NX request is issued. + * - This credit is returned after NX processed that request. + * - If NX encounters translation error, kernel will return the + * credit on the specific send window after processing the fault CRB. + * + * Fault window: + * - The total number credits available is FIFO_SIZE/CRB_SIZE. + * Means 4MB/128 in the current implementation. If credit is not + * available, RMA_Reject is returned. + * - A credit is taken when NX pastes CRB in fault FIFO. + * - The kernel with return credit on fault window after reading entry + * from fault FIFO. + */ +void vas_return_credit(struct vas_window *window, bool tx) +{ + uint64_t val; + + val = 0ULL; + if (tx) { /* send window */ + val = SET_FIELD(VAS_TX_WCRED, val, 1); + write_hvwc_reg(window, VREG(TX_WCRED_ADDER), val); + } else { + val = SET_FIELD(VAS_LRX_WCRED, val, 1); + write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), val); + } +} + +struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, + uint32_t pswid) +{ + struct vas_window *window; + int winid; + + if (!pswid) { + pr_devel("%s: called for pswid 0!\n", __func__); + return ERR_PTR(-ESRCH); + } + + decode_pswid(pswid, NULL, &winid); + + if (winid >= VAS_WINDOWS_PER_CHIP) + return ERR_PTR(-ESRCH); + + /* + * If application closes the window before the hardware + * returns the fault CRB, we should wait in vas_win_close() + * for the pending requests. so the window must be active + * and the process alive. + * + * If its a kernel process, we should not get any faults and + * should not get here. + */ + window = vinst->windows[winid]; + + if (!window) { + pr_err("PSWID decode: Could not find window for winid %d pswid %d vinst 0x%p\n", + winid, pswid, vinst); + return NULL; + } + + /* + * Do some sanity checks on the decoded window. Window should be + * NX GZIP user send window. FTW windows should not incur faults + * since their CRBs are ignored (not queued on FIFO or processed + * by NX). + */ + if (!window->tx_win || !window->user_win || !window->nx_win || + window->cop == VAS_COP_TYPE_FAULT || + window->cop == VAS_COP_TYPE_FTW) { + pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n", + winid, window->tx_win, window->user_win, + window->nx_win, window->cop); + WARN_ON(1); + } + + return window; +} diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index ed9cc6df329a..598e4cd563fb 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -14,7 +14,10 @@ #include <linux/of_platform.h> #include <linux/of_address.h> #include <linux/of.h> +#include <linux/irqdomain.h> +#include <linux/interrupt.h> #include <asm/prom.h> +#include <asm/xive.h> #include "vas.h" @@ -23,12 +26,37 @@ static LIST_HEAD(vas_instances); static DEFINE_PER_CPU(int, cpu_vas_id); +static int vas_irq_fault_window_setup(struct vas_instance *vinst) +{ + char devname[64]; + int rc = 0; + + snprintf(devname, sizeof(devname), "vas-%d", vinst->vas_id); + rc = request_threaded_irq(vinst->virq, vas_fault_handler, + vas_fault_thread_fn, 0, devname, vinst); + + if (rc) { + pr_err("VAS[%d]: Request IRQ(%d) failed with %d\n", + vinst->vas_id, vinst->virq, rc); + goto out; + } + + rc = vas_setup_fault_window(vinst); + if (rc) + free_irq(vinst->virq, vinst); + +out: + return rc; +} + static int init_vas_instance(struct platform_device *pdev) { - int rc, cpu, vasid; - struct resource *res; - struct vas_instance *vinst; struct device_node *dn = pdev->dev.of_node; + struct vas_instance *vinst; + struct xive_irq_data *xd; + uint32_t chipid, hwirq; + struct resource *res; + int rc, cpu, vasid; rc = of_property_read_u32(dn, "ibm,vas-id", &vasid); if (rc) { @@ -36,6 +64,12 @@ static int init_vas_instance(struct platform_device *pdev) return -ENODEV; } + rc = of_property_read_u32(dn, "ibm,chip-id", &chipid); + if (rc) { + pr_err("No ibm,chip-id property for %s?\n", pdev->name); + return -ENODEV; + } + if (pdev->num_resources != 4) { pr_err("Unexpected DT configuration for [%s, %d]\n", pdev->name, vasid); @@ -69,9 +103,32 @@ static int init_vas_instance(struct platform_device *pdev) vinst->paste_win_id_shift = 63 - res->end; - pr_devel("Initialized instance [%s, %d], paste_base 0x%llx, " - "paste_win_id_shift 0x%llx\n", pdev->name, vasid, - vinst->paste_base_addr, vinst->paste_win_id_shift); + hwirq = xive_native_alloc_irq_on_chip(chipid); + if (!hwirq) { + pr_err("Inst%d: Unable to allocate global irq for chip %d\n", + vinst->vas_id, chipid); + return -ENOENT; + } + + vinst->virq = irq_create_mapping(NULL, hwirq); + if (!vinst->virq) { + pr_err("Inst%d: Unable to map global irq %d\n", + vinst->vas_id, hwirq); + return -EINVAL; + } + + xd = irq_get_handler_data(vinst->virq); + if (!xd) { + pr_err("Inst%d: Invalid virq %d\n", + vinst->vas_id, vinst->virq); + return -EINVAL; + } + + vinst->irq_port = xd->trig_page; + pr_devel("Initialized instance [%s, %d] paste_base 0x%llx paste_win_id_shift 0x%llx IRQ %d Port 0x%llx\n", + pdev->name, vasid, vinst->paste_base_addr, + vinst->paste_win_id_shift, vinst->virq, + vinst->irq_port); for_each_possible_cpu(cpu) { if (cpu_to_chip_id(cpu) == of_get_ibm_chip_id(dn)) @@ -82,6 +139,22 @@ static int init_vas_instance(struct platform_device *pdev) list_add(&vinst->node, &vas_instances); mutex_unlock(&vas_mutex); + spin_lock_init(&vinst->fault_lock); + /* + * IRQ and fault handling setup is needed only for user space + * send windows. + */ + if (vinst->virq) { + rc = vas_irq_fault_window_setup(vinst); + /* + * Fault window is used only for user space send windows. + * So if vinst->virq is NULL, tx_win_open returns -ENODEV + * for user space. + */ + if (rc) + vinst->virq = 0; + } + vas_instance_init_dbgdir(vinst); dev_set_drvdata(&pdev->dev, vinst); diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index 5574aec9ee88..70f793e8f6cc 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -101,11 +101,9 @@ /* * Initial per-process credits. * Max send window credits: 4K-1 (12-bits in VAS_TX_WCRED) - * Max receive window credits: 64K-1 (16 bits in VAS_LRX_WCRED) * * TODO: Needs tuning for per-process credits */ -#define VAS_RX_WCREDS_MAX ((64 << 10) - 1) #define VAS_TX_WCREDS_MAX ((4 << 10) - 1) #define VAS_WCREDS_DEFAULT (1 << 10) @@ -296,6 +294,22 @@ enum vas_notify_after_count { }; /* + * NX can generate an interrupt for multiple faults and expects kernel + * to process all of them. So read all valid CRB entries until find the + * invalid one. So use pswid which is pasted by NX and ccw[0] (reserved + * bit in BE) to check valid CRB. CCW[0] will not be touched by user + * space. Application gets CRB formt error if it updates this bit. + * + * Invalidate FIFO during allocation and process all entries from last + * successful read until finds invalid pswid and ccw[0] values. + * After reading each CRB entry from fault FIFO, the kernel invalidate + * it by updating pswid with FIFO_INVALID_ENTRY and CCW[0] with + * CCW0_INVALID. + */ +#define FIFO_INVALID_ENTRY 0xffffffff +#define CCW0_INVALID 1 + +/* * One per instance of VAS. Each instance will have a separate set of * receive windows, one per coprocessor type. * @@ -313,6 +327,15 @@ struct vas_instance { u64 paste_base_addr; u64 paste_win_id_shift; + u64 irq_port; + int virq; + int fault_crbs; + int fault_fifo_size; + int fifo_in_progress; /* To wake up thread or return IRQ_HANDLED */ + spinlock_t fault_lock; /* Protects fifo_in_progress update */ + void *fault_fifo; + struct vas_window *fault_win; /* Fault window */ + struct mutex mutex; struct vas_window *rxwin[VAS_COP_TYPE_MAX]; struct vas_window *windows[VAS_WINDOWS_PER_CHIP]; @@ -333,7 +356,9 @@ struct vas_window { bool user_win; /* True if user space window */ void *hvwc_map; /* HV window context */ void *uwc_map; /* OS/User window context */ - pid_t pid; /* Linux process id of owner */ + struct pid *pid; /* Linux process id of owner */ + struct pid *tgid; /* Thread group ID of owner */ + struct mm_struct *mm; /* Linux process mm_struct */ int wcreds_max; /* Window credits */ char *dbgname; @@ -406,6 +431,19 @@ extern void vas_init_dbgdir(void); extern void vas_instance_init_dbgdir(struct vas_instance *vinst); extern void vas_window_init_dbgdir(struct vas_window *win); extern void vas_window_free_dbgdir(struct vas_window *win); +extern int vas_setup_fault_window(struct vas_instance *vinst); +extern irqreturn_t vas_fault_thread_fn(int irq, void *data); +extern irqreturn_t vas_fault_handler(int irq, void *dev_id); +extern void vas_return_credit(struct vas_window *window, bool tx); +extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, + uint32_t pswid); +extern void vas_win_paste_addr(struct vas_window *window, u64 *addr, + int *len); + +static inline int vas_window_pid(struct vas_window *window) +{ + return pid_vnr(window->pid); +} static inline void vas_log_write(struct vas_window *win, char *name, void *regptr, u64 val) @@ -444,6 +482,21 @@ static inline u64 read_hvwc_reg(struct vas_window *win, return in_be64(win->hvwc_map+reg); } +/* + * Encode/decode the Partition Send Window ID (PSWID) for a window in + * a way that we can uniquely identify any window in the system. i.e. + * we should be able to locate the 'struct vas_window' given the PSWID. + * + * Bits Usage + * 0:7 VAS id (8 bits) + * 8:15 Unused, 0 (3 bits) + * 16:31 Window id (16 bits) + */ +static inline u32 encode_pswid(int vasid, int winid) +{ + return ((u32)winid | (vasid << (31 - 7))); +} + static inline void decode_pswid(u32 pswid, int *vasid, int *winid) { if (vasid) diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 423be34f0f5f..d094321964fb 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -200,13 +200,14 @@ void ps3_mm_vas_destroy(void) { int result; - DBG("%s:%d: map.vas_id = %llu\n", __func__, __LINE__, map.vas_id); - if (map.vas_id) { result = lv1_select_virtual_address_space(0); - BUG_ON(result); - result = lv1_destruct_virtual_address_space(map.vas_id); - BUG_ON(result); + result += lv1_destruct_virtual_address_space(map.vas_id); + + if (result) { + lv1_panic(0); + } + map.vas_id = 0; } } @@ -263,7 +264,7 @@ static int ps3_mm_region_create(struct mem_region *r, unsigned long size) int result; u64 muid; - r->size = _ALIGN_DOWN(size, 1 << PAGE_SHIFT_16M); + r->size = ALIGN_DOWN(size, 1 << PAGE_SHIFT_16M); DBG("%s:%d requested %lxh\n", __func__, __LINE__, size); DBG("%s:%d actual %llxh\n", __func__, __LINE__, r->size); @@ -304,19 +305,20 @@ static void ps3_mm_region_destroy(struct mem_region *r) int result; if (!r->destroy) { - pr_info("%s:%d: Not destroying high region: %llxh %llxh\n", - __func__, __LINE__, r->base, r->size); return; } - DBG("%s:%d: r->base = %llxh\n", __func__, __LINE__, r->base); - if (r->base) { result = lv1_release_memory(r->base); - BUG_ON(result); + + if (result) { + lv1_panic(0); + } + r->size = r->base = r->offset = 0; map.total = map.rm.size; } + ps3_mm_set_repository_highmem(NULL); } @@ -394,8 +396,8 @@ static struct dma_chunk * dma_find_chunk(struct ps3_dma_region *r, unsigned long bus_addr, unsigned long len) { struct dma_chunk *c; - unsigned long aligned_bus = _ALIGN_DOWN(bus_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len+bus_addr-aligned_bus, + unsigned long aligned_bus = ALIGN_DOWN(bus_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len+bus_addr-aligned_bus, 1 << r->page_size); list_for_each_entry(c, &r->chunk_list.head, link) { @@ -423,8 +425,8 @@ static struct dma_chunk *dma_find_chunk_lpar(struct ps3_dma_region *r, unsigned long lpar_addr, unsigned long len) { struct dma_chunk *c; - unsigned long aligned_lpar = _ALIGN_DOWN(lpar_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len + lpar_addr - aligned_lpar, + unsigned long aligned_lpar = ALIGN_DOWN(lpar_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len + lpar_addr - aligned_lpar, 1 << r->page_size); list_for_each_entry(c, &r->chunk_list.head, link) { @@ -775,8 +777,8 @@ static int dma_sb_map_area(struct ps3_dma_region *r, unsigned long virt_addr, struct dma_chunk *c; unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr) : virt_addr; - unsigned long aligned_phys = _ALIGN_DOWN(phys_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len + phys_addr - aligned_phys, + unsigned long aligned_phys = ALIGN_DOWN(phys_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len + phys_addr - aligned_phys, 1 << r->page_size); *bus_addr = dma_sb_lpar_to_bus(r, ps3_mm_phys_to_lpar(phys_addr)); @@ -830,8 +832,8 @@ static int dma_ioc0_map_area(struct ps3_dma_region *r, unsigned long virt_addr, struct dma_chunk *c; unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr) : virt_addr; - unsigned long aligned_phys = _ALIGN_DOWN(phys_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len + phys_addr - aligned_phys, + unsigned long aligned_phys = ALIGN_DOWN(phys_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len + phys_addr - aligned_phys, 1 << r->page_size); DBG(KERN_ERR "%s: vaddr=%#lx, len=%#lx\n", __func__, @@ -889,9 +891,9 @@ static int dma_sb_unmap_area(struct ps3_dma_region *r, dma_addr_t bus_addr, c = dma_find_chunk(r, bus_addr, len); if (!c) { - unsigned long aligned_bus = _ALIGN_DOWN(bus_addr, + unsigned long aligned_bus = ALIGN_DOWN(bus_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len + bus_addr + unsigned long aligned_len = ALIGN(len + bus_addr - aligned_bus, 1 << r->page_size); DBG("%s:%d: not found: bus_addr %llxh\n", __func__, __LINE__, bus_addr); @@ -926,9 +928,9 @@ static int dma_ioc0_unmap_area(struct ps3_dma_region *r, c = dma_find_chunk(r, bus_addr, len); if (!c) { - unsigned long aligned_bus = _ALIGN_DOWN(bus_addr, + unsigned long aligned_bus = ALIGN_DOWN(bus_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len + bus_addr + unsigned long aligned_len = ALIGN(len + bus_addr - aligned_bus, 1 << r->page_size); DBG("%s:%d: not found: bus_addr %llxh\n", @@ -974,7 +976,7 @@ static int dma_sb_region_create_linear(struct ps3_dma_region *r) pr_info("%s:%d: forcing 16M pages for linear map\n", __func__, __LINE__); r->page_size = PS3_DMA_16M; - r->len = _ALIGN_UP(r->len, 1 << r->page_size); + r->len = ALIGN(r->len, 1 << r->page_size); } } @@ -1125,7 +1127,7 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, r->offset = lpar_addr; if (r->offset >= map.rm.size) r->offset -= map.r1.offset; - r->len = len ? len : _ALIGN_UP(map.total, 1 << r->page_size); + r->len = len ? len : ALIGN(map.total, 1 << r->page_size); switch (dev->dev_type) { case PS3_DEVICE_TYPE_SB: diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index b29368931c56..e9ae5dd03593 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -138,7 +138,7 @@ static int __init early_parse_ps3fb(char *p) if (!p) return 1; - ps3fb_videomemory.size = _ALIGN_UP(memparse(p, &p), + ps3fb_videomemory.size = ALIGN(memparse(p, &p), ps3fb_videomemory.align); return 0; } diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 845342814edc..ace117f99d94 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -664,6 +664,8 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) if (!ret) return ret; + if (ret < 0) + break; /* * If RTAS returns a delay value that's above 100ms, cut it @@ -684,7 +686,11 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n", __func__, pe->phb->global_number, pe->addr, ret); - return ret; + /* PAPR defines -3 as "Parameter Error" for this function: */ + if (ret == -3) + return -EINVAL; + else + return -EIO; } /** diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index b91eb0929ed1..a6f101c958e8 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -47,6 +47,7 @@ #include <linux/stat.h> #include <linux/of_platform.h> #include <asm/ibmebus.h> +#include <asm/machdep.h> static struct device ibmebus_bus_device = { /* fake "parent" device */ .init_name = "ibmebus", @@ -464,4 +465,4 @@ static int __init ibmebus_bus_init(void) return 0; } -postcore_initcall(ibmebus_bus_init); +machine_postcore_initcall(pseries, ibmebus_bus_init); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index b571285f6c14..10d982997736 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -371,6 +371,9 @@ void post_mobility_fixup(void) /* Possibly switch to a new RFI flush type */ pseries_setup_rfi_flush(); + /* Reinitialise system information for hv-24x7 */ + read_24x7_sys_info(); + return; } diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 1d1da639b8b7..f3736fcd98fc 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -395,16 +395,31 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) /* * Some versions of FWNMI place the buffer inside the 4kB page starting at * 0x7000. Other versions place it inside the rtas buffer. We check both. + * Minimum size of the buffer is 16 bytes. */ #define VALID_FWNMI_BUFFER(A) \ - ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \ - (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16)))) + ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \ + (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16)))) static inline struct rtas_error_log *fwnmi_get_errlog(void) { return (struct rtas_error_log *)local_paca->mce_data_buf; } +static __be64 *fwnmi_get_savep(struct pt_regs *regs) +{ + unsigned long savep_ra; + + /* Mask top two bits */ + savep_ra = regs->gpr[3] & ~(0x3UL << 62); + if (!VALID_FWNMI_BUFFER(savep_ra)) { + printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); + return NULL; + } + + return __va(savep_ra); +} + /* * Get the error information for errors coming through the * FWNMI vectors. The pt_regs' r3 will be updated to reflect @@ -422,19 +437,14 @@ static inline struct rtas_error_log *fwnmi_get_errlog(void) */ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) { - unsigned long *savep; struct rtas_error_log *h; + __be64 *savep; - /* Mask top two bits */ - regs->gpr[3] &= ~(0x3UL << 62); - - if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { - printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); + savep = fwnmi_get_savep(regs); + if (!savep) return NULL; - } - savep = __va(regs->gpr[3]); - regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ h = (struct rtas_error_log *)&savep[1]; /* Use the per cpu buffer from paca to store rtas error log */ @@ -458,7 +468,15 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) */ static void fwnmi_release_errinfo(void) { - int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL); + struct rtas_args rtas_args; + int ret; + + /* + * On pseries, the machine check stack is limited to under 4GB, so + * args can be on-stack. + */ + rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL); + ret = be32_to_cpu(rtas_args.rets[0]); if (ret != 0) printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret); } @@ -481,11 +499,21 @@ int pSeries_system_reset_exception(struct pt_regs *regs) #endif if (fwnmi_active) { - struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs); - if (errhdr) { - /* XXX Should look at FWNMI information */ - } - fwnmi_release_errinfo(); + __be64 *savep; + + /* + * Firmware (PowerVM and KVM) saves r3 to a save area like + * machine check, which is not exactly what PAPR (2.9) + * suggests but there is no way to detect otherwise, so this + * is the interface now. + * + * System resets do not save any error log or require an + * "ibm,nmi-interlock" rtas call to release. + */ + + savep = fwnmi_get_savep(regs); + if (savep) + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ } if (smp_handle_nmi_ipi(regs)) diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c index 70c3013fdd07..81343908ed33 100644 --- a/arch/powerpc/platforms/pseries/rtas-fadump.c +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c @@ -506,7 +506,7 @@ void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) fadump_conf->fadump_supported = 1; /* Firmware supports 64-bit value for size, align it to pagesize. */ - fadump_conf->max_copy_size = _ALIGN_DOWN(U64_MAX, PAGE_SIZE); + fadump_conf->max_copy_size = ALIGN_DOWN(U64_MAX, PAGE_SIZE); /* * The 'ibm,kernel-dump' rtas node is present only if there is diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 0c8421dd01ab..64d18f4bf093 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -68,6 +68,7 @@ #include <asm/isa-bridge.h> #include <asm/security_features.h> #include <asm/asm-const.h> +#include <asm/idle.h> #include <asm/swiotlb.h> #include <asm/svm.h> @@ -83,6 +84,7 @@ unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K); EXPORT_SYMBOL(CMO_PageSize); int fwnmi_active; /* TRUE if an FWNMI handler is present */ +int ibm_nmi_interlock_token; static void pSeries_show_cpuinfo(struct seq_file *m) { @@ -113,9 +115,14 @@ static void __init fwnmi_init(void) struct slb_entry *slb_ptr; size_t size; #endif + int ibm_nmi_register_token; - int ibm_nmi_register = rtas_token("ibm,nmi-register"); - if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE) + ibm_nmi_register_token = rtas_token("ibm,nmi-register"); + if (ibm_nmi_register_token == RTAS_UNKNOWN_SERVICE) + return; + + ibm_nmi_interlock_token = rtas_token("ibm,nmi-interlock"); + if (WARN_ON(ibm_nmi_interlock_token == RTAS_UNKNOWN_SERVICE)) return; /* If the kernel's not linked at zero we point the firmware at low @@ -123,8 +130,8 @@ static void __init fwnmi_init(void) system_reset_addr = __pa(system_reset_fwnmi) - PHYSICAL_START; machine_check_addr = __pa(machine_check_fwnmi) - PHYSICAL_START; - if (0 == rtas_call(ibm_nmi_register, 2, 1, NULL, system_reset_addr, - machine_check_addr)) + if (0 == rtas_call(ibm_nmi_register_token, 2, 1, NULL, + system_reset_addr, machine_check_addr)) fwnmi_active = 1; /* @@ -317,6 +324,9 @@ static int alloc_dispatch_log_kmem_cache(void) } machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache); +DEFINE_PER_CPU(u64, idle_spurr_cycles); +DEFINE_PER_CPU(u64, idle_entry_purr_snap); +DEFINE_PER_CPU(u64, idle_entry_spurr_snap); static void pseries_lpar_idle(void) { /* @@ -328,7 +338,7 @@ static void pseries_lpar_idle(void) return; /* Indicate to hypervisor that we are idle. */ - get_lppaca()->idle = 1; + pseries_idle_prolog(); /* * Yield the processor to the hypervisor. We return if @@ -339,7 +349,7 @@ static void pseries_lpar_idle(void) */ cede_processor(); - get_lppaca()->idle = 0; + pseries_idle_epilog(); } /* diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 37f1f25ba804..0487b26f6f1a 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -31,6 +31,7 @@ #include <asm/tce.h> #include <asm/page.h> #include <asm/hvcall.h> +#include <asm/machdep.h> static struct vio_dev vio_bus_device = { /* fake "parent" device */ .name = "vio", @@ -1513,7 +1514,7 @@ static int __init vio_bus_init(void) return 0; } -postcore_initcall(vio_bus_init); +machine_postcore_initcall(pseries, vio_bus_init); static int __init vio_device_init(void) { @@ -1522,7 +1523,7 @@ static int __init vio_device_init(void) return 0; } -device_initcall(vio_device_init); +machine_device_initcall(pseries, vio_device_init); static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1703,4 +1704,4 @@ static int __init vio_init(void) dma_debug_add_bus(&vio_bus_type); return 0; } -fs_initcall(vio_init); +machine_fs_initcall(pseries, vio_init); |