diff options
author | Sam Ravnborg <sam@ravnborg.org> | 2008-12-03 03:11:52 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-12-04 09:17:21 -0800 |
commit | a88b5ba8bd8ac18aad65ee6c6a254e2e74876db3 (patch) | |
tree | eb3d0ffaf53c3f7ec6083752c2097cecd1cb892a /arch/sparc | |
parent | d670bd4f803c8b646acd20f3ba21e65458293faf (diff) | |
download | linux-a88b5ba8bd8ac18aad65ee6c6a254e2e74876db3.tar.bz2 |
sparc,sparc64: unify kernel/
o Move all files from sparc64/kernel/ to sparc/kernel
- rename as appropriate
o Update sparc/Makefile to the changes
o Update sparc/kernel/Makefile to include the sparc64 files
NOTE: This commit changes link order on sparc64!
Link order had to change for either of sparc32 and sparc64.
And assuming sparc64 see more testing than sparc32 change link
order on sparc64 where issues will be caught faster.
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc')
91 files changed, 43309 insertions, 6 deletions
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index f061d0ada158..4b39ac68c3b1 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -67,13 +67,10 @@ endif endif -head-$(CONFIG_SPARC32) := arch/sparc/kernel/head_$(BITS).o -head-$(CONFIG_SPARC32) += arch/sparc/kernel/init_task_$(BITS).o -head-$(CONFIG_SPARC64) := arch/sparc64/kernel/head.o -head-$(CONFIG_SPARC64) += arch/sparc64/kernel/init_task.o +head-y := arch/sparc/kernel/head_$(BITS).o +head-y += arch/sparc/kernel/init_task_$(BITS).o -core-$(CONFIG_SPARC32) += arch/sparc/kernel/ -core-$(CONFIG_SPARC64) += arch/sparc64/kernel/ +core-y += arch/sparc/kernel/ core-y += arch/sparc/mm/ arch/sparc/math-emu/ libs-y += arch/sparc/prom/ diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 6558eea5f0bc..46439465c3b2 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -38,6 +38,19 @@ obj-$(CONFIG_SPARC32) += muldiv.o obj-y += prom_$(BITS).o obj-y += of_device_$(BITS).o +obj-$(CONFIG_SPARC64) += reboot.o +obj-$(CONFIG_SPARC64) += sysfs.o +obj-$(CONFIG_SPARC64) += iommu.o +obj-$(CONFIG_SPARC64) += central.o +obj-$(CONFIG_SPARC64) += starfire.o +obj-$(CONFIG_SPARC64) += power.o +obj-$(CONFIG_SPARC64) += sbus.o +obj-$(CONFIG_SPARC64) += ebus.o +obj-$(CONFIG_SPARC64) += visemul.o +obj-$(CONFIG_SPARC64) += hvapi.o +obj-$(CONFIG_SPARC64) += sstate.o +obj-$(CONFIG_SPARC64) += mdesc.o + # sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation obj-$(CONFIG_SPARC32) += devres.o devres-y := ../../../kernel/irq/devres.o @@ -48,6 +61,7 @@ obj-$(CONFIG_SPARC32_PCI) += pcic.o obj-$(CONFIG_SMP) += trampoline_$(BITS).o smp_$(BITS).o obj-$(CONFIG_SPARC32_SMP) += sun4m_smp.o sun4d_smp.o +obj-$(CONFIG_SPARC64_SMP) += hvtramp.o obj-y += auxio_$(BITS).o obj-$(CONFIG_SUN_PM) += apc.o pmc.o @@ -56,3 +70,28 @@ obj-$(CONFIG_MODULES) += module_$(BITS).o obj-$(CONFIG_MODULES) += sparc_ksyms_$(BITS).o obj-$(CONFIG_SPARC_LED) += led.o obj-$(CONFIG_KGDB) += kgdb_$(BITS).o + + +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +CFLAGS_REMOVE_ftrace.o := -pg + +obj-$(CONFIG_STACKTRACE) += stacktrace.o +# sparc64 PCI +obj-$(CONFIG_SPARC64_PCI) += pci.o pci_common.o psycho_common.o +obj-$(CONFIG_SPARC64_PCI) += pci_psycho.o pci_sabre.o pci_schizo.o +obj-$(CONFIG_SPARC64_PCI) += pci_sun4v.o pci_sun4v_asm.o pci_fire.o +obj-$(CONFIG_PCI_MSI) += pci_msi.o + +obj-$(CONFIG_COMPAT) += sys32.o sys_sparc32.o signal32.o + +# sparc64 cpufreq +obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o +obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o +obj-$(CONFIG_US3_MC) += chmc.o + +obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_SUN_LDOMS) += ldc.o vio.o viohs.o ds.o + +obj-$(CONFIG_AUDIT) += audit.o +audit--$(CONFIG_AUDIT) := compat_audit.o +obj-$(CONFIG_COMPAT) += $(audit--y) diff --git a/arch/sparc/kernel/audit.c b/arch/sparc/kernel/audit.c new file mode 100644 index 000000000000..8fff0ac63d56 --- /dev/null +++ b/arch/sparc/kernel/audit.c @@ -0,0 +1,83 @@ +#include <linux/init.h> +#include <linux/types.h> +#include <linux/audit.h> +#include <asm/unistd.h> + +static unsigned dir_class[] = { +#include <asm-generic/audit_dir_write.h> +~0U +}; + +static unsigned read_class[] = { +#include <asm-generic/audit_read.h> +~0U +}; + +static unsigned write_class[] = { +#include <asm-generic/audit_write.h> +~0U +}; + +static unsigned chattr_class[] = { +#include <asm-generic/audit_change_attr.h> +~0U +}; + +static unsigned signal_class[] = { +#include <asm-generic/audit_signal.h> +~0U +}; + +int audit_classify_arch(int arch) +{ +#ifdef CONFIG_COMPAT + if (arch == AUDIT_ARCH_SPARC) + return 1; +#endif + return 0; +} + +int audit_classify_syscall(int abi, unsigned syscall) +{ +#ifdef CONFIG_COMPAT + extern int sparc32_classify_syscall(unsigned); + if (abi == AUDIT_ARCH_SPARC) + return sparc32_classify_syscall(syscall); +#endif + switch(syscall) { + case __NR_open: + return 2; + case __NR_openat: + return 3; + case __NR_socketcall: + return 4; + case __NR_execve: + return 5; + default: + return 0; + } +} + +static int __init audit_classes_init(void) +{ +#ifdef CONFIG_COMPAT + extern __u32 sparc32_dir_class[]; + extern __u32 sparc32_write_class[]; + extern __u32 sparc32_read_class[]; + extern __u32 sparc32_chattr_class[]; + extern __u32 sparc32_signal_class[]; + audit_register_class(AUDIT_CLASS_WRITE_32, sparc32_write_class); + audit_register_class(AUDIT_CLASS_READ_32, sparc32_read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE_32, sparc32_dir_class); + audit_register_class(AUDIT_CLASS_CHATTR_32, sparc32_chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL_32, sparc32_signal_class); +#endif + audit_register_class(AUDIT_CLASS_WRITE, write_class); + audit_register_class(AUDIT_CLASS_READ, read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); + audit_register_class(AUDIT_CLASS_CHATTR, chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL, signal_class); + return 0; +} + +__initcall(audit_classes_init); diff --git a/arch/sparc/kernel/auxio_64.c b/arch/sparc/kernel/auxio_64.c new file mode 100644 index 000000000000..8b67347d4221 --- /dev/null +++ b/arch/sparc/kernel/auxio_64.c @@ -0,0 +1,149 @@ +/* auxio.c: Probing for the Sparc AUXIO register at boot time. + * + * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) + * + * Refactoring for unified NCR/PCIO support 2002 Eric Brower (ebrower@usa.net) + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/of_device.h> + +#include <asm/prom.h> +#include <asm/io.h> +#include <asm/auxio.h> + +void __iomem *auxio_register = NULL; +EXPORT_SYMBOL(auxio_register); + +enum auxio_type { + AUXIO_TYPE_NODEV, + AUXIO_TYPE_SBUS, + AUXIO_TYPE_EBUS +}; + +static enum auxio_type auxio_devtype = AUXIO_TYPE_NODEV; +static DEFINE_SPINLOCK(auxio_lock); + +static void __auxio_rmw(u8 bits_on, u8 bits_off, int ebus) +{ + if (auxio_register) { + unsigned long flags; + u8 regval, newval; + + spin_lock_irqsave(&auxio_lock, flags); + + regval = (ebus ? + (u8) readl(auxio_register) : + sbus_readb(auxio_register)); + newval = regval | bits_on; + newval &= ~bits_off; + if (!ebus) + newval &= ~AUXIO_AUX1_MASK; + if (ebus) + writel((u32) newval, auxio_register); + else + sbus_writeb(newval, auxio_register); + + spin_unlock_irqrestore(&auxio_lock, flags); + } +} + +static void __auxio_set_bit(u8 bit, int on, int ebus) +{ + u8 bits_on = (ebus ? AUXIO_PCIO_LED : AUXIO_AUX1_LED); + u8 bits_off = 0; + + if (!on) { + u8 tmp = bits_off; + bits_off = bits_on; + bits_on = tmp; + } + __auxio_rmw(bits_on, bits_off, ebus); +} + +void auxio_set_led(int on) +{ + int ebus = auxio_devtype == AUXIO_TYPE_EBUS; + u8 bit; + + bit = (ebus ? AUXIO_PCIO_LED : AUXIO_AUX1_LED); + __auxio_set_bit(bit, on, ebus); +} + +static void __auxio_sbus_set_lte(int on) +{ + __auxio_set_bit(AUXIO_AUX1_LTE, on, 0); +} + +void auxio_set_lte(int on) +{ + switch(auxio_devtype) { + case AUXIO_TYPE_SBUS: + __auxio_sbus_set_lte(on); + break; + case AUXIO_TYPE_EBUS: + /* FALL-THROUGH */ + default: + break; + } +} + +static struct of_device_id __initdata auxio_match[] = { + { + .name = "auxio", + }, + {}, +}; + +MODULE_DEVICE_TABLE(of, auxio_match); + +static int __devinit auxio_probe(struct of_device *dev, const struct of_device_id *match) +{ + struct device_node *dp = dev->node; + unsigned long size; + + if (!strcmp(dp->parent->name, "ebus")) { + auxio_devtype = AUXIO_TYPE_EBUS; + size = sizeof(u32); + } else if (!strcmp(dp->parent->name, "sbus")) { + auxio_devtype = AUXIO_TYPE_SBUS; + size = 1; + } else { + printk("auxio: Unknown parent bus type [%s]\n", + dp->parent->name); + return -ENODEV; + } + auxio_register = of_ioremap(&dev->resource[0], 0, size, "auxio"); + if (!auxio_register) + return -ENODEV; + + printk(KERN_INFO "AUXIO: Found device at %s\n", + dp->full_name); + + if (auxio_devtype == AUXIO_TYPE_EBUS) + auxio_set_led(AUXIO_LED_ON); + + return 0; +} + +static struct of_platform_driver auxio_driver = { + .match_table = auxio_match, + .probe = auxio_probe, + .driver = { + .name = "auxio", + }, +}; + +static int __init auxio_init(void) +{ + return of_register_driver(&auxio_driver, &of_platform_bus_type); +} + +/* Must be after subsys_initcall() so that busses are probed. Must + * be before device_initcall() because things like the floppy driver + * need to use the AUXIO register. + */ +fs_initcall(auxio_init); diff --git a/arch/sparc/kernel/central.c b/arch/sparc/kernel/central.c new file mode 100644 index 000000000000..05f1c916db06 --- /dev/null +++ b/arch/sparc/kernel/central.c @@ -0,0 +1,268 @@ +/* central.c: Central FHC driver for Sunfire/Starfire/Wildfire. + * + * Copyright (C) 1997, 1999, 2008 David S. Miller (davem@davemloft.net) + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> + +#include <asm/fhc.h> +#include <asm/upa.h> + +struct clock_board { + void __iomem *clock_freq_regs; + void __iomem *clock_regs; + void __iomem *clock_ver_reg; + int num_slots; + struct resource leds_resource; + struct platform_device leds_pdev; +}; + +struct fhc { + void __iomem *pregs; + bool central; + bool jtag_master; + int board_num; + struct resource leds_resource; + struct platform_device leds_pdev; +}; + +static int __devinit clock_board_calc_nslots(struct clock_board *p) +{ + u8 reg = upa_readb(p->clock_regs + CLOCK_STAT1) & 0xc0; + + switch (reg) { + case 0x40: + return 16; + + case 0xc0: + return 8; + + case 0x80: + reg = 0; + if (p->clock_ver_reg) + reg = upa_readb(p->clock_ver_reg); + if (reg) { + if (reg & 0x80) + return 4; + else + return 5; + } + /* Fallthrough */ + default: + return 4; + } +} + +static int __devinit clock_board_probe(struct of_device *op, + const struct of_device_id *match) +{ + struct clock_board *p = kzalloc(sizeof(*p), GFP_KERNEL); + int err = -ENOMEM; + + if (!p) { + printk(KERN_ERR "clock_board: Cannot allocate struct clock_board\n"); + goto out; + } + + p->clock_freq_regs = of_ioremap(&op->resource[0], 0, + resource_size(&op->resource[0]), + "clock_board_freq"); + if (!p->clock_freq_regs) { + printk(KERN_ERR "clock_board: Cannot map clock_freq_regs\n"); + goto out_free; + } + + p->clock_regs = of_ioremap(&op->resource[1], 0, + resource_size(&op->resource[1]), + "clock_board_regs"); + if (!p->clock_regs) { + printk(KERN_ERR "clock_board: Cannot map clock_regs\n"); + goto out_unmap_clock_freq_regs; + } + + if (op->resource[2].flags) { + p->clock_ver_reg = of_ioremap(&op->resource[2], 0, + resource_size(&op->resource[2]), + "clock_ver_reg"); + if (!p->clock_ver_reg) { + printk(KERN_ERR "clock_board: Cannot map clock_ver_reg\n"); + goto out_unmap_clock_regs; + } + } + + p->num_slots = clock_board_calc_nslots(p); + + p->leds_resource.start = (unsigned long) + (p->clock_regs + CLOCK_CTRL); + p->leds_resource.end = p->leds_resource.end; + p->leds_resource.name = "leds"; + + p->leds_pdev.name = "sunfire-clockboard-leds"; + p->leds_pdev.resource = &p->leds_resource; + p->leds_pdev.num_resources = 1; + p->leds_pdev.dev.parent = &op->dev; + + err = platform_device_register(&p->leds_pdev); + if (err) { + printk(KERN_ERR "clock_board: Could not register LEDS " + "platform device\n"); + goto out_unmap_clock_ver_reg; + } + + printk(KERN_INFO "clock_board: Detected %d slot Enterprise system.\n", + p->num_slots); + + err = 0; +out: + return err; + +out_unmap_clock_ver_reg: + if (p->clock_ver_reg) + of_iounmap(&op->resource[2], p->clock_ver_reg, + resource_size(&op->resource[2])); + +out_unmap_clock_regs: + of_iounmap(&op->resource[1], p->clock_regs, + resource_size(&op->resource[1])); + +out_unmap_clock_freq_regs: + of_iounmap(&op->resource[0], p->clock_freq_regs, + resource_size(&op->resource[0])); + +out_free: + kfree(p); + goto out; +} + +static struct of_device_id __initdata clock_board_match[] = { + { + .name = "clock-board", + }, + {}, +}; + +static struct of_platform_driver clock_board_driver = { + .match_table = clock_board_match, + .probe = clock_board_probe, + .driver = { + .name = "clock_board", + }, +}; + +static int __devinit fhc_probe(struct of_device *op, + const struct of_device_id *match) +{ + struct fhc *p = kzalloc(sizeof(*p), GFP_KERNEL); + int err = -ENOMEM; + u32 reg; + + if (!p) { + printk(KERN_ERR "fhc: Cannot allocate struct fhc\n"); + goto out; + } + + if (!strcmp(op->node->parent->name, "central")) + p->central = true; + + p->pregs = of_ioremap(&op->resource[0], 0, + resource_size(&op->resource[0]), + "fhc_pregs"); + if (!p->pregs) { + printk(KERN_ERR "fhc: Cannot map pregs\n"); + goto out_free; + } + + if (p->central) { + reg = upa_readl(p->pregs + FHC_PREGS_BSR); + p->board_num = ((reg >> 16) & 1) | ((reg >> 12) & 0x0e); + } else { + p->board_num = of_getintprop_default(op->node, "board#", -1); + if (p->board_num == -1) { + printk(KERN_ERR "fhc: No board# property\n"); + goto out_unmap_pregs; + } + if (upa_readl(p->pregs + FHC_PREGS_JCTRL) & FHC_JTAG_CTRL_MENAB) + p->jtag_master = true; + } + + if (!p->central) { + p->leds_resource.start = (unsigned long) + (p->pregs + FHC_PREGS_CTRL); + p->leds_resource.end = p->leds_resource.end; + p->leds_resource.name = "leds"; + + p->leds_pdev.name = "sunfire-fhc-leds"; + p->leds_pdev.resource = &p->leds_resource; + p->leds_pdev.num_resources = 1; + p->leds_pdev.dev.parent = &op->dev; + + err = platform_device_register(&p->leds_pdev); + if (err) { + printk(KERN_ERR "fhc: Could not register LEDS " + "platform device\n"); + goto out_unmap_pregs; + } + } + reg = upa_readl(p->pregs + FHC_PREGS_CTRL); + + if (!p->central) + reg |= FHC_CONTROL_IXIST; + + reg &= ~(FHC_CONTROL_AOFF | + FHC_CONTROL_BOFF | + FHC_CONTROL_SLINE); + + upa_writel(reg, p->pregs + FHC_PREGS_CTRL); + upa_readl(p->pregs + FHC_PREGS_CTRL); + + reg = upa_readl(p->pregs + FHC_PREGS_ID); + printk(KERN_INFO "fhc: Board #%d, Version[%x] PartID[%x] Manuf[%x] %s\n", + p->board_num, + (reg & FHC_ID_VERS) >> 28, + (reg & FHC_ID_PARTID) >> 12, + (reg & FHC_ID_MANUF) >> 1, + (p->jtag_master ? + "(JTAG Master)" : + (p->central ? "(Central)" : ""))); + + err = 0; + +out: + return err; + +out_unmap_pregs: + of_iounmap(&op->resource[0], p->pregs, resource_size(&op->resource[0])); + +out_free: + kfree(p); + goto out; +} + +static struct of_device_id __initdata fhc_match[] = { + { + .name = "fhc", + }, + {}, +}; + +static struct of_platform_driver fhc_driver = { + .match_table = fhc_match, + .probe = fhc_probe, + .driver = { + .name = "fhc", + }, +}; + +static int __init sunfire_init(void) +{ + (void) of_register_driver(&fhc_driver, &of_platform_bus_type); + (void) of_register_driver(&clock_board_driver, &of_platform_bus_type); + return 0; +} + +subsys_initcall(sunfire_init); diff --git a/arch/sparc/kernel/cherrs.S b/arch/sparc/kernel/cherrs.S new file mode 100644 index 000000000000..4ee1ad420862 --- /dev/null +++ b/arch/sparc/kernel/cherrs.S @@ -0,0 +1,579 @@ + /* These get patched into the trap table at boot time + * once we know we have a cheetah processor. + */ + .globl cheetah_fecc_trap_vector + .type cheetah_fecc_trap_vector,#function +cheetah_fecc_trap_vector: + membar #Sync + ldxa [%g0] ASI_DCU_CONTROL_REG, %g1 + andn %g1, DCU_DC | DCU_IC, %g1 + stxa %g1, [%g0] ASI_DCU_CONTROL_REG + membar #Sync + sethi %hi(cheetah_fast_ecc), %g2 + jmpl %g2 + %lo(cheetah_fast_ecc), %g0 + mov 0, %g1 + .size cheetah_fecc_trap_vector,.-cheetah_fecc_trap_vector + + .globl cheetah_fecc_trap_vector_tl1 + .type cheetah_fecc_trap_vector_tl1,#function +cheetah_fecc_trap_vector_tl1: + membar #Sync + ldxa [%g0] ASI_DCU_CONTROL_REG, %g1 + andn %g1, DCU_DC | DCU_IC, %g1 + stxa %g1, [%g0] ASI_DCU_CONTROL_REG + membar #Sync + sethi %hi(cheetah_fast_ecc), %g2 + jmpl %g2 + %lo(cheetah_fast_ecc), %g0 + mov 1, %g1 + .size cheetah_fecc_trap_vector_tl1,.-cheetah_fecc_trap_vector_tl1 + + .globl cheetah_cee_trap_vector + .type cheetah_cee_trap_vector,#function +cheetah_cee_trap_vector: + membar #Sync + ldxa [%g0] ASI_DCU_CONTROL_REG, %g1 + andn %g1, DCU_IC, %g1 + stxa %g1, [%g0] ASI_DCU_CONTROL_REG + membar #Sync + sethi %hi(cheetah_cee), %g2 + jmpl %g2 + %lo(cheetah_cee), %g0 + mov 0, %g1 + .size cheetah_cee_trap_vector,.-cheetah_cee_trap_vector + + .globl cheetah_cee_trap_vector_tl1 + .type cheetah_cee_trap_vector_tl1,#function +cheetah_cee_trap_vector_tl1: + membar #Sync + ldxa [%g0] ASI_DCU_CONTROL_REG, %g1 + andn %g1, DCU_IC, %g1 + stxa %g1, [%g0] ASI_DCU_CONTROL_REG + membar #Sync + sethi %hi(cheetah_cee), %g2 + jmpl %g2 + %lo(cheetah_cee), %g0 + mov 1, %g1 + .size cheetah_cee_trap_vector_tl1,.-cheetah_cee_trap_vector_tl1 + + .globl cheetah_deferred_trap_vector + .type cheetah_deferred_trap_vector,#function +cheetah_deferred_trap_vector: + membar #Sync + ldxa [%g0] ASI_DCU_CONTROL_REG, %g1; + andn %g1, DCU_DC | DCU_IC, %g1; + stxa %g1, [%g0] ASI_DCU_CONTROL_REG; + membar #Sync; + sethi %hi(cheetah_deferred_trap), %g2 + jmpl %g2 + %lo(cheetah_deferred_trap), %g0 + mov 0, %g1 + .size cheetah_deferred_trap_vector,.-cheetah_deferred_trap_vector + + .globl cheetah_deferred_trap_vector_tl1 + .type cheetah_deferred_trap_vector_tl1,#function +cheetah_deferred_trap_vector_tl1: + membar #Sync; + ldxa [%g0] ASI_DCU_CONTROL_REG, %g1; + andn %g1, DCU_DC | DCU_IC, %g1; + stxa %g1, [%g0] ASI_DCU_CONTROL_REG; + membar #Sync; + sethi %hi(cheetah_deferred_trap), %g2 + jmpl %g2 + %lo(cheetah_deferred_trap), %g0 + mov 1, %g1 + .size cheetah_deferred_trap_vector_tl1,.-cheetah_deferred_trap_vector_tl1 + + /* Cheetah+ specific traps. These are for the new I/D cache parity + * error traps. The first argument to cheetah_plus_parity_handler + * is encoded as follows: + * + * Bit0: 0=dcache,1=icache + * Bit1: 0=recoverable,1=unrecoverable + */ + .globl cheetah_plus_dcpe_trap_vector + .type cheetah_plus_dcpe_trap_vector,#function +cheetah_plus_dcpe_trap_vector: + membar #Sync + sethi %hi(do_cheetah_plus_data_parity), %g7 + jmpl %g7 + %lo(do_cheetah_plus_data_parity), %g0 + nop + nop + nop + nop + nop + .size cheetah_plus_dcpe_trap_vector,.-cheetah_plus_dcpe_trap_vector + + .type do_cheetah_plus_data_parity,#function +do_cheetah_plus_data_parity: + rdpr %pil, %g2 + wrpr %g0, PIL_NORMAL_MAX, %pil + ba,pt %xcc, etrap_irq + rd %pc, %g7 +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_off + nop +#endif + mov 0x0, %o0 + call cheetah_plus_parity_error + add %sp, PTREGS_OFF, %o1 + ba,a,pt %xcc, rtrap_irq + .size do_cheetah_plus_data_parity,.-do_cheetah_plus_data_parity + + .globl cheetah_plus_dcpe_trap_vector_tl1 + .type cheetah_plus_dcpe_trap_vector_tl1,#function +cheetah_plus_dcpe_trap_vector_tl1: + membar #Sync + wrpr PSTATE_IG | PSTATE_PEF | PSTATE_PRIV, %pstate + sethi %hi(do_dcpe_tl1), %g3 + jmpl %g3 + %lo(do_dcpe_tl1), %g0 + nop + nop + nop + nop + .size cheetah_plus_dcpe_trap_vector_tl1,.-cheetah_plus_dcpe_trap_vector_tl1 + + .globl cheetah_plus_icpe_trap_vector + .type cheetah_plus_icpe_trap_vector,#function +cheetah_plus_icpe_trap_vector: + membar #Sync + sethi %hi(do_cheetah_plus_insn_parity), %g7 + jmpl %g7 + %lo(do_cheetah_plus_insn_parity), %g0 + nop + nop + nop + nop + nop + .size cheetah_plus_icpe_trap_vector,.-cheetah_plus_icpe_trap_vector + + .type do_cheetah_plus_insn_parity,#function +do_cheetah_plus_insn_parity: + rdpr %pil, %g2 + wrpr %g0, PIL_NORMAL_MAX, %pil + ba,pt %xcc, etrap_irq + rd %pc, %g7 +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_off + nop +#endif + mov 0x1, %o0 + call cheetah_plus_parity_error + add %sp, PTREGS_OFF, %o1 + ba,a,pt %xcc, rtrap_irq + .size do_cheetah_plus_insn_parity,.-do_cheetah_plus_insn_parity + + .globl cheetah_plus_icpe_trap_vector_tl1 + .type cheetah_plus_icpe_trap_vector_tl1,#function +cheetah_plus_icpe_trap_vector_tl1: + membar #Sync + wrpr PSTATE_IG | PSTATE_PEF | PSTATE_PRIV, %pstate + sethi %hi(do_icpe_tl1), %g3 + jmpl %g3 + %lo(do_icpe_tl1), %g0 + nop + nop + nop + nop + .size cheetah_plus_icpe_trap_vector_tl1,.-cheetah_plus_icpe_trap_vector_tl1 + + /* If we take one of these traps when tl >= 1, then we + * jump to interrupt globals. If some trap level above us + * was also using interrupt globals, we cannot recover. + * We may use all interrupt global registers except %g6. + */ + .globl do_dcpe_tl1 + .type do_dcpe_tl1,#function +do_dcpe_tl1: + rdpr %tl, %g1 ! Save original trap level + mov 1, %g2 ! Setup TSTATE checking loop + sethi %hi(TSTATE_IG), %g3 ! TSTATE mask bit +1: wrpr %g2, %tl ! Set trap level to check + rdpr %tstate, %g4 ! Read TSTATE for this level + andcc %g4, %g3, %g0 ! Interrupt globals in use? + bne,a,pn %xcc, do_dcpe_tl1_fatal ! Yep, irrecoverable + wrpr %g1, %tl ! Restore original trap level + add %g2, 1, %g2 ! Next trap level + cmp %g2, %g1 ! Hit them all yet? + ble,pt %icc, 1b ! Not yet + nop + wrpr %g1, %tl ! Restore original trap level +do_dcpe_tl1_nonfatal: /* Ok we may use interrupt globals safely. */ + sethi %hi(dcache_parity_tl1_occurred), %g2 + lduw [%g2 + %lo(dcache_parity_tl1_occurred)], %g1 + add %g1, 1, %g1 + stw %g1, [%g2 + %lo(dcache_parity_tl1_occurred)] + /* Reset D-cache parity */ + sethi %hi(1 << 16), %g1 ! D-cache size + mov (1 << 5), %g2 ! D-cache line size + sub %g1, %g2, %g1 ! Move down 1 cacheline +1: srl %g1, 14, %g3 ! Compute UTAG + membar #Sync + stxa %g3, [%g1] ASI_DCACHE_UTAG + membar #Sync + sub %g2, 8, %g3 ! 64-bit data word within line +2: membar #Sync + stxa %g0, [%g1 + %g3] ASI_DCACHE_DATA + membar #Sync + subcc %g3, 8, %g3 ! Next 64-bit data word + bge,pt %icc, 2b + nop + subcc %g1, %g2, %g1 ! Next cacheline + bge,pt %icc, 1b + nop + ba,pt %xcc, dcpe_icpe_tl1_common + nop + +do_dcpe_tl1_fatal: + sethi %hi(1f), %g7 + ba,pt %xcc, etraptl1 +1: or %g7, %lo(1b), %g7 + mov 0x2, %o0 + call cheetah_plus_parity_error + add %sp, PTREGS_OFF, %o1 + ba,pt %xcc, rtrap + nop + .size do_dcpe_tl1,.-do_dcpe_tl1 + + .globl do_icpe_tl1 + .type do_icpe_tl1,#function +do_icpe_tl1: + rdpr %tl, %g1 ! Save original trap level + mov 1, %g2 ! Setup TSTATE checking loop + sethi %hi(TSTATE_IG), %g3 ! TSTATE mask bit +1: wrpr %g2, %tl ! Set trap level to check + rdpr %tstate, %g4 ! Read TSTATE for this level + andcc %g4, %g3, %g0 ! Interrupt globals in use? + bne,a,pn %xcc, do_icpe_tl1_fatal ! Yep, irrecoverable + wrpr %g1, %tl ! Restore original trap level + add %g2, 1, %g2 ! Next trap level + cmp %g2, %g1 ! Hit them all yet? + ble,pt %icc, 1b ! Not yet + nop + wrpr %g1, %tl ! Restore original trap level +do_icpe_tl1_nonfatal: /* Ok we may use interrupt globals safely. */ + sethi %hi(icache_parity_tl1_occurred), %g2 + lduw [%g2 + %lo(icache_parity_tl1_occurred)], %g1 + add %g1, 1, %g1 + stw %g1, [%g2 + %lo(icache_parity_tl1_occurred)] + /* Flush I-cache */ + sethi %hi(1 << 15), %g1 ! I-cache size + mov (1 << 5), %g2 ! I-cache line size + sub %g1, %g2, %g1 +1: or %g1, (2 << 3), %g3 + stxa %g0, [%g3] ASI_IC_TAG + membar #Sync + subcc %g1, %g2, %g1 + bge,pt %icc, 1b + nop + ba,pt %xcc, dcpe_icpe_tl1_common + nop + +do_icpe_tl1_fatal: + sethi %hi(1f), %g7 + ba,pt %xcc, etraptl1 +1: or %g7, %lo(1b), %g7 + mov 0x3, %o0 + call cheetah_plus_parity_error + add %sp, PTREGS_OFF, %o1 + ba,pt %xcc, rtrap + nop + .size do_icpe_tl1,.-do_icpe_tl1 + + .type dcpe_icpe_tl1_common,#function +dcpe_icpe_tl1_common: + /* Flush D-cache, re-enable D/I caches in DCU and finally + * retry the trapping instruction. + */ + sethi %hi(1 << 16), %g1 ! D-cache size + mov (1 << 5), %g2 ! D-cache line size + sub %g1, %g2, %g1 +1: stxa %g0, [%g1] ASI_DCACHE_TAG + membar #Sync + subcc %g1, %g2, %g1 + bge,pt %icc, 1b + nop + ldxa [%g0] ASI_DCU_CONTROL_REG, %g1 + or %g1, (DCU_DC | DCU_IC), %g1 + stxa %g1, [%g0] ASI_DCU_CONTROL_REG + membar #Sync + retry + .size dcpe_icpe_tl1_common,.-dcpe_icpe_tl1_common + + /* Capture I/D/E-cache state into per-cpu error scoreboard. + * + * %g1: (TL>=0) ? 1 : 0 + * %g2: scratch + * %g3: scratch + * %g4: AFSR + * %g5: AFAR + * %g6: unused, will have current thread ptr after etrap + * %g7: scratch + */ + .type __cheetah_log_error,#function +__cheetah_log_error: + /* Put "TL1" software bit into AFSR. */ + and %g1, 0x1, %g1 + sllx %g1, 63, %g2 + or %g4, %g2, %g4 + + /* Get log entry pointer for this cpu at this trap level. */ + BRANCH_IF_JALAPENO(g2,g3,50f) + ldxa [%g0] ASI_SAFARI_CONFIG, %g2 + srlx %g2, 17, %g2 + ba,pt %xcc, 60f + and %g2, 0x3ff, %g2 + +50: ldxa [%g0] ASI_JBUS_CONFIG, %g2 + srlx %g2, 17, %g2 + and %g2, 0x1f, %g2 + +60: sllx %g2, 9, %g2 + sethi %hi(cheetah_error_log), %g3 + ldx [%g3 + %lo(cheetah_error_log)], %g3 + brz,pn %g3, 80f + nop + + add %g3, %g2, %g3 + sllx %g1, 8, %g1 + add %g3, %g1, %g1 + + /* %g1 holds pointer to the top of the logging scoreboard */ + ldx [%g1 + 0x0], %g7 + cmp %g7, -1 + bne,pn %xcc, 80f + nop + + stx %g4, [%g1 + 0x0] + stx %g5, [%g1 + 0x8] + add %g1, 0x10, %g1 + + /* %g1 now points to D-cache logging area */ + set 0x3ff8, %g2 /* DC_addr mask */ + and %g5, %g2, %g2 /* DC_addr bits of AFAR */ + srlx %g5, 12, %g3 + or %g3, 1, %g3 /* PHYS tag + valid */ + +10: ldxa [%g2] ASI_DCACHE_TAG, %g7 + cmp %g3, %g7 /* TAG match? */ + bne,pt %xcc, 13f + nop + + /* Yep, what we want, capture state. */ + stx %g2, [%g1 + 0x20] + stx %g7, [%g1 + 0x28] + + /* A membar Sync is required before and after utag access. */ + membar #Sync + ldxa [%g2] ASI_DCACHE_UTAG, %g7 + membar #Sync + stx %g7, [%g1 + 0x30] + ldxa [%g2] ASI_DCACHE_SNOOP_TAG, %g7 + stx %g7, [%g1 + 0x38] + clr %g3 + +12: ldxa [%g2 + %g3] ASI_DCACHE_DATA, %g7 + stx %g7, [%g1] + add %g3, (1 << 5), %g3 + cmp %g3, (4 << 5) + bl,pt %xcc, 12b + add %g1, 0x8, %g1 + + ba,pt %xcc, 20f + add %g1, 0x20, %g1 + +13: sethi %hi(1 << 14), %g7 + add %g2, %g7, %g2 + srlx %g2, 14, %g7 + cmp %g7, 4 + bl,pt %xcc, 10b + nop + + add %g1, 0x40, %g1 + + /* %g1 now points to I-cache logging area */ +20: set 0x1fe0, %g2 /* IC_addr mask */ + and %g5, %g2, %g2 /* IC_addr bits of AFAR */ + sllx %g2, 1, %g2 /* IC_addr[13:6]==VA[12:5] */ + srlx %g5, (13 - 8), %g3 /* Make PTAG */ + andn %g3, 0xff, %g3 /* Mask off undefined bits */ + +21: ldxa [%g2] ASI_IC_TAG, %g7 + andn %g7, 0xff, %g7 + cmp %g3, %g7 + bne,pt %xcc, 23f + nop + + /* Yep, what we want, capture state. */ + stx %g2, [%g1 + 0x40] + stx %g7, [%g1 + 0x48] + add %g2, (1 << 3), %g2 + ldxa [%g2] ASI_IC_TAG, %g7 + add %g2, (1 << 3), %g2 + stx %g7, [%g1 + 0x50] + ldxa [%g2] ASI_IC_TAG, %g7 + add %g2, (1 << 3), %g2 + stx %g7, [%g1 + 0x60] + ldxa [%g2] ASI_IC_TAG, %g7 + stx %g7, [%g1 + 0x68] + sub %g2, (3 << 3), %g2 + ldxa [%g2] ASI_IC_STAG, %g7 + stx %g7, [%g1 + 0x58] + clr %g3 + srlx %g2, 2, %g2 + +22: ldxa [%g2 + %g3] ASI_IC_INSTR, %g7 + stx %g7, [%g1] + add %g3, (1 << 3), %g3 + cmp %g3, (8 << 3) + bl,pt %xcc, 22b + add %g1, 0x8, %g1 + + ba,pt %xcc, 30f + add %g1, 0x30, %g1 + +23: sethi %hi(1 << 14), %g7 + add %g2, %g7, %g2 + srlx %g2, 14, %g7 + cmp %g7, 4 + bl,pt %xcc, 21b + nop + + add %g1, 0x70, %g1 + + /* %g1 now points to E-cache logging area */ +30: andn %g5, (32 - 1), %g2 + stx %g2, [%g1 + 0x20] + ldxa [%g2] ASI_EC_TAG_DATA, %g7 + stx %g7, [%g1 + 0x28] + ldxa [%g2] ASI_EC_R, %g0 + clr %g3 + +31: ldxa [%g3] ASI_EC_DATA, %g7 + stx %g7, [%g1 + %g3] + add %g3, 0x8, %g3 + cmp %g3, 0x20 + + bl,pt %xcc, 31b + nop +80: + rdpr %tt, %g2 + cmp %g2, 0x70 + be c_fast_ecc + cmp %g2, 0x63 + be c_cee + nop + ba,pt %xcc, c_deferred + .size __cheetah_log_error,.-__cheetah_log_error + + /* Cheetah FECC trap handling, we get here from tl{0,1}_fecc + * in the trap table. That code has done a memory barrier + * and has disabled both the I-cache and D-cache in the DCU + * control register. The I-cache is disabled so that we may + * capture the corrupted cache line, and the D-cache is disabled + * because corrupt data may have been placed there and we don't + * want to reference it. + * + * %g1 is one if this trap occurred at %tl >= 1. + * + * Next, we turn off error reporting so that we don't recurse. + */ + .globl cheetah_fast_ecc + .type cheetah_fast_ecc,#function +cheetah_fast_ecc: + ldxa [%g0] ASI_ESTATE_ERROR_EN, %g2 + andn %g2, ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN, %g2 + stxa %g2, [%g0] ASI_ESTATE_ERROR_EN + membar #Sync + + /* Fetch and clear AFSR/AFAR */ + ldxa [%g0] ASI_AFSR, %g4 + ldxa [%g0] ASI_AFAR, %g5 + stxa %g4, [%g0] ASI_AFSR + membar #Sync + + ba,pt %xcc, __cheetah_log_error + nop + .size cheetah_fast_ecc,.-cheetah_fast_ecc + + .type c_fast_ecc,#function +c_fast_ecc: + rdpr %pil, %g2 + wrpr %g0, PIL_NORMAL_MAX, %pil + ba,pt %xcc, etrap_irq + rd %pc, %g7 +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_off + nop +#endif + mov %l4, %o1 + mov %l5, %o2 + call cheetah_fecc_handler + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_irq + .size c_fast_ecc,.-c_fast_ecc + + /* Our caller has disabled I-cache and performed membar Sync. */ + .globl cheetah_cee + .type cheetah_cee,#function +cheetah_cee: + ldxa [%g0] ASI_ESTATE_ERROR_EN, %g2 + andn %g2, ESTATE_ERROR_CEEN, %g2 + stxa %g2, [%g0] ASI_ESTATE_ERROR_EN + membar #Sync + + /* Fetch and clear AFSR/AFAR */ + ldxa [%g0] ASI_AFSR, %g4 + ldxa [%g0] ASI_AFAR, %g5 + stxa %g4, [%g0] ASI_AFSR + membar #Sync + + ba,pt %xcc, __cheetah_log_error + nop + .size cheetah_cee,.-cheetah_cee + + .type c_cee,#function +c_cee: + rdpr %pil, %g2 + wrpr %g0, PIL_NORMAL_MAX, %pil + ba,pt %xcc, etrap_irq + rd %pc, %g7 +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_off + nop +#endif + mov %l4, %o1 + mov %l5, %o2 + call cheetah_cee_handler + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_irq + .size c_cee,.-c_cee + + /* Our caller has disabled I-cache+D-cache and performed membar Sync. */ + .globl cheetah_deferred_trap + .type cheetah_deferred_trap,#function +cheetah_deferred_trap: + ldxa [%g0] ASI_ESTATE_ERROR_EN, %g2 + andn %g2, ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN, %g2 + stxa %g2, [%g0] ASI_ESTATE_ERROR_EN + membar #Sync + + /* Fetch and clear AFSR/AFAR */ + ldxa [%g0] ASI_AFSR, %g4 + ldxa [%g0] ASI_AFAR, %g5 + stxa %g4, [%g0] ASI_AFSR + membar #Sync + + ba,pt %xcc, __cheetah_log_error + nop + .size cheetah_deferred_trap,.-cheetah_deferred_trap + + .type c_deferred,#function +c_deferred: + rdpr %pil, %g2 + wrpr %g0, PIL_NORMAL_MAX, %pil + ba,pt %xcc, etrap_irq + rd %pc, %g7 +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_off + nop +#endif + mov %l4, %o1 + mov %l5, %o2 + call cheetah_deferred_handler + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap_irq + .size c_deferred,.-c_deferred diff --git a/arch/sparc/kernel/chmc.c b/arch/sparc/kernel/chmc.c new file mode 100644 index 000000000000..3b9f4d6e14a9 --- /dev/null +++ b/arch/sparc/kernel/chmc.c @@ -0,0 +1,863 @@ +/* chmc.c: Driver for UltraSPARC-III memory controller. + * + * Copyright (C) 2001, 2007, 2008 David S. Miller (davem@davemloft.net) + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <asm/spitfire.h> +#include <asm/chmctrl.h> +#include <asm/cpudata.h> +#include <asm/oplib.h> +#include <asm/prom.h> +#include <asm/head.h> +#include <asm/io.h> +#include <asm/memctrl.h> + +#define DRV_MODULE_NAME "chmc" +#define PFX DRV_MODULE_NAME ": " +#define DRV_MODULE_VERSION "0.2" + +MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); +MODULE_DESCRIPTION("UltraSPARC-III memory controller driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_MODULE_VERSION); + +static int mc_type; +#define MC_TYPE_SAFARI 1 +#define MC_TYPE_JBUS 2 + +static dimm_printer_t us3mc_dimm_printer; + +#define CHMCTRL_NDGRPS 2 +#define CHMCTRL_NDIMMS 4 + +#define CHMC_DIMMS_PER_MC (CHMCTRL_NDGRPS * CHMCTRL_NDIMMS) + +/* OBP memory-layout property format. */ +struct chmc_obp_map { + unsigned char dimm_map[144]; + unsigned char pin_map[576]; +}; + +#define DIMM_LABEL_SZ 8 + +struct chmc_obp_mem_layout { + /* One max 8-byte string label per DIMM. Usually + * this matches the label on the motherboard where + * that DIMM resides. + */ + char dimm_labels[CHMC_DIMMS_PER_MC][DIMM_LABEL_SZ]; + + /* If symmetric use map[0], else it is + * asymmetric and map[1] should be used. + */ + char symmetric; + + struct chmc_obp_map map[2]; +}; + +#define CHMCTRL_NBANKS 4 + +struct chmc_bank_info { + struct chmc *p; + int bank_id; + + u64 raw_reg; + int valid; + int uk; + int um; + int lk; + int lm; + int interleave; + unsigned long base; + unsigned long size; +}; + +struct chmc { + struct list_head list; + int portid; + + struct chmc_obp_mem_layout layout_prop; + int layout_size; + + void __iomem *regs; + + u64 timing_control1; + u64 timing_control2; + u64 timing_control3; + u64 timing_control4; + u64 memaddr_control; + + struct chmc_bank_info logical_banks[CHMCTRL_NBANKS]; +}; + +#define JBUSMC_REGS_SIZE 8 + +#define JB_MC_REG1_DIMM2_BANK3 0x8000000000000000UL +#define JB_MC_REG1_DIMM1_BANK1 0x4000000000000000UL +#define JB_MC_REG1_DIMM2_BANK2 0x2000000000000000UL +#define JB_MC_REG1_DIMM1_BANK0 0x1000000000000000UL +#define JB_MC_REG1_XOR 0x0000010000000000UL +#define JB_MC_REG1_ADDR_GEN_2 0x000000e000000000UL +#define JB_MC_REG1_ADDR_GEN_2_SHIFT 37 +#define JB_MC_REG1_ADDR_GEN_1 0x0000001c00000000UL +#define JB_MC_REG1_ADDR_GEN_1_SHIFT 34 +#define JB_MC_REG1_INTERLEAVE 0x0000000001800000UL +#define JB_MC_REG1_INTERLEAVE_SHIFT 23 +#define JB_MC_REG1_DIMM2_PTYPE 0x0000000000200000UL +#define JB_MC_REG1_DIMM2_PTYPE_SHIFT 21 +#define JB_MC_REG1_DIMM1_PTYPE 0x0000000000100000UL +#define JB_MC_REG1_DIMM1_PTYPE_SHIFT 20 + +#define PART_TYPE_X8 0 +#define PART_TYPE_X4 1 + +#define INTERLEAVE_NONE 0 +#define INTERLEAVE_SAME 1 +#define INTERLEAVE_INTERNAL 2 +#define INTERLEAVE_BOTH 3 + +#define ADDR_GEN_128MB 0 +#define ADDR_GEN_256MB 1 +#define ADDR_GEN_512MB 2 +#define ADDR_GEN_1GB 3 + +#define JB_NUM_DIMM_GROUPS 2 +#define JB_NUM_DIMMS_PER_GROUP 2 +#define JB_NUM_DIMMS (JB_NUM_DIMM_GROUPS * JB_NUM_DIMMS_PER_GROUP) + +struct jbusmc_obp_map { + unsigned char dimm_map[18]; + unsigned char pin_map[144]; +}; + +struct jbusmc_obp_mem_layout { + /* One max 8-byte string label per DIMM. Usually + * this matches the label on the motherboard where + * that DIMM resides. + */ + char dimm_labels[JB_NUM_DIMMS][DIMM_LABEL_SZ]; + + /* If symmetric use map[0], else it is + * asymmetric and map[1] should be used. + */ + char symmetric; + + struct jbusmc_obp_map map; + + char _pad; +}; + +struct jbusmc_dimm_group { + struct jbusmc *controller; + int index; + u64 base_addr; + u64 size; +}; + +struct jbusmc { + void __iomem *regs; + u64 mc_reg_1; + u32 portid; + struct jbusmc_obp_mem_layout layout; + int layout_len; + int num_dimm_groups; + struct jbusmc_dimm_group dimm_groups[JB_NUM_DIMM_GROUPS]; + struct list_head list; +}; + +static DEFINE_SPINLOCK(mctrl_list_lock); +static LIST_HEAD(mctrl_list); + +static void mc_list_add(struct list_head *list) +{ + spin_lock(&mctrl_list_lock); + list_add(list, &mctrl_list); + spin_unlock(&mctrl_list_lock); +} + +static void mc_list_del(struct list_head *list) +{ + spin_lock(&mctrl_list_lock); + list_del_init(list); + spin_unlock(&mctrl_list_lock); +} + +#define SYNDROME_MIN -1 +#define SYNDROME_MAX 144 + +/* Covert syndrome code into the way the bits are positioned + * on the bus. + */ +static int syndrome_to_qword_code(int syndrome_code) +{ + if (syndrome_code < 128) + syndrome_code += 16; + else if (syndrome_code < 128 + 9) + syndrome_code -= (128 - 7); + else if (syndrome_code < (128 + 9 + 3)) + syndrome_code -= (128 + 9 - 4); + else + syndrome_code -= (128 + 9 + 3); + return syndrome_code; +} + +/* All this magic has to do with how a cache line comes over the wire + * on Safari and JBUS. A 64-bit line comes over in 1 or more quadword + * cycles, each of which transmit ECC/MTAG info as well as the actual + * data. + */ +#define L2_LINE_SIZE 64 +#define L2_LINE_ADDR_MSK (L2_LINE_SIZE - 1) +#define QW_PER_LINE 4 +#define QW_BYTES (L2_LINE_SIZE / QW_PER_LINE) +#define QW_BITS 144 +#define SAFARI_LAST_BIT (576 - 1) +#define JBUS_LAST_BIT (144 - 1) + +static void get_pin_and_dimm_str(int syndrome_code, unsigned long paddr, + int *pin_p, char **dimm_str_p, void *_prop, + int base_dimm_offset) +{ + int qword_code = syndrome_to_qword_code(syndrome_code); + int cache_line_offset; + int offset_inverse; + int dimm_map_index; + int map_val; + + if (mc_type == MC_TYPE_JBUS) { + struct jbusmc_obp_mem_layout *p = _prop; + + /* JBUS */ + cache_line_offset = qword_code; + offset_inverse = (JBUS_LAST_BIT - cache_line_offset); + dimm_map_index = offset_inverse / 8; + map_val = p->map.dimm_map[dimm_map_index]; + map_val = ((map_val >> ((7 - (offset_inverse & 7)))) & 1); + *dimm_str_p = p->dimm_labels[base_dimm_offset + map_val]; + *pin_p = p->map.pin_map[cache_line_offset]; + } else { + struct chmc_obp_mem_layout *p = _prop; + struct chmc_obp_map *mp; + int qword; + + /* Safari */ + if (p->symmetric) + mp = &p->map[0]; + else + mp = &p->map[1]; + + qword = (paddr & L2_LINE_ADDR_MSK) / QW_BYTES; + cache_line_offset = ((3 - qword) * QW_BITS) + qword_code; + offset_inverse = (SAFARI_LAST_BIT - cache_line_offset); + dimm_map_index = offset_inverse >> 2; + map_val = mp->dimm_map[dimm_map_index]; + map_val = ((map_val >> ((3 - (offset_inverse & 3)) << 1)) & 0x3); + *dimm_str_p = p->dimm_labels[base_dimm_offset + map_val]; + *pin_p = mp->pin_map[cache_line_offset]; + } +} + +static struct jbusmc_dimm_group *jbusmc_find_dimm_group(unsigned long phys_addr) +{ + struct jbusmc *p; + + list_for_each_entry(p, &mctrl_list, list) { + int i; + + for (i = 0; i < p->num_dimm_groups; i++) { + struct jbusmc_dimm_group *dp = &p->dimm_groups[i]; + + if (phys_addr < dp->base_addr || + (dp->base_addr + dp->size) <= phys_addr) + continue; + + return dp; + } + } + return NULL; +} + +static int jbusmc_print_dimm(int syndrome_code, + unsigned long phys_addr, + char *buf, int buflen) +{ + struct jbusmc_obp_mem_layout *prop; + struct jbusmc_dimm_group *dp; + struct jbusmc *p; + int first_dimm; + + dp = jbusmc_find_dimm_group(phys_addr); + if (dp == NULL || + syndrome_code < SYNDROME_MIN || + syndrome_code > SYNDROME_MAX) { + buf[0] = '?'; + buf[1] = '?'; + buf[2] = '?'; + buf[3] = '\0'; + } + p = dp->controller; + prop = &p->layout; + + first_dimm = dp->index * JB_NUM_DIMMS_PER_GROUP; + + if (syndrome_code != SYNDROME_MIN) { + char *dimm_str; + int pin; + + get_pin_and_dimm_str(syndrome_code, phys_addr, &pin, + &dimm_str, prop, first_dimm); + sprintf(buf, "%s, pin %3d", dimm_str, pin); + } else { + int dimm; + + /* Multi-bit error, we just dump out all the + * dimm labels associated with this dimm group. + */ + for (dimm = 0; dimm < JB_NUM_DIMMS_PER_GROUP; dimm++) { + sprintf(buf, "%s ", + prop->dimm_labels[first_dimm + dimm]); + buf += strlen(buf); + } + } + + return 0; +} + +static u64 __devinit jbusmc_dimm_group_size(u64 base, + const struct linux_prom64_registers *mem_regs, + int num_mem_regs) +{ + u64 max = base + (8UL * 1024 * 1024 * 1024); + u64 max_seen = base; + int i; + + for (i = 0; i < num_mem_regs; i++) { + const struct linux_prom64_registers *ent; + u64 this_base; + u64 this_end; + + ent = &mem_regs[i]; + this_base = ent->phys_addr; + this_end = this_base + ent->reg_size; + if (base < this_base || base >= this_end) + continue; + if (this_end > max) + this_end = max; + if (this_end > max_seen) + max_seen = this_end; + } + + return max_seen - base; +} + +static void __devinit jbusmc_construct_one_dimm_group(struct jbusmc *p, + unsigned long index, + const struct linux_prom64_registers *mem_regs, + int num_mem_regs) +{ + struct jbusmc_dimm_group *dp = &p->dimm_groups[index]; + + dp->controller = p; + dp->index = index; + + dp->base_addr = (p->portid * (64UL * 1024 * 1024 * 1024)); + dp->base_addr += (index * (8UL * 1024 * 1024 * 1024)); + dp->size = jbusmc_dimm_group_size(dp->base_addr, mem_regs, num_mem_regs); +} + +static void __devinit jbusmc_construct_dimm_groups(struct jbusmc *p, + const struct linux_prom64_registers *mem_regs, + int num_mem_regs) +{ + if (p->mc_reg_1 & JB_MC_REG1_DIMM1_BANK0) { + jbusmc_construct_one_dimm_group(p, 0, mem_regs, num_mem_regs); + p->num_dimm_groups++; + } + if (p->mc_reg_1 & JB_MC_REG1_DIMM2_BANK2) { + jbusmc_construct_one_dimm_group(p, 1, mem_regs, num_mem_regs); + p->num_dimm_groups++; + } +} + +static int __devinit jbusmc_probe(struct of_device *op, + const struct of_device_id *match) +{ + const struct linux_prom64_registers *mem_regs; + struct device_node *mem_node; + int err, len, num_mem_regs; + struct jbusmc *p; + const u32 *prop; + const void *ml; + + err = -ENODEV; + mem_node = of_find_node_by_path("/memory"); + if (!mem_node) { + printk(KERN_ERR PFX "Cannot find /memory node.\n"); + goto out; + } + mem_regs = of_get_property(mem_node, "reg", &len); + if (!mem_regs) { + printk(KERN_ERR PFX "Cannot get reg property of /memory node.\n"); + goto out; + } + num_mem_regs = len / sizeof(*mem_regs); + + err = -ENOMEM; + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + printk(KERN_ERR PFX "Cannot allocate struct jbusmc.\n"); + goto out; + } + + INIT_LIST_HEAD(&p->list); + + err = -ENODEV; + prop = of_get_property(op->node, "portid", &len); + if (!prop || len != 4) { + printk(KERN_ERR PFX "Cannot find portid.\n"); + goto out_free; + } + + p->portid = *prop; + + prop = of_get_property(op->node, "memory-control-register-1", &len); + if (!prop || len != 8) { + printk(KERN_ERR PFX "Cannot get memory control register 1.\n"); + goto out_free; + } + + p->mc_reg_1 = ((u64)prop[0] << 32) | (u64) prop[1]; + + err = -ENOMEM; + p->regs = of_ioremap(&op->resource[0], 0, JBUSMC_REGS_SIZE, "jbusmc"); + if (!p->regs) { + printk(KERN_ERR PFX "Cannot map jbusmc regs.\n"); + goto out_free; + } + + err = -ENODEV; + ml = of_get_property(op->node, "memory-layout", &p->layout_len); + if (!ml) { + printk(KERN_ERR PFX "Cannot get memory layout property.\n"); + goto out_iounmap; + } + if (p->layout_len > sizeof(p->layout)) { + printk(KERN_ERR PFX "Unexpected memory-layout size %d\n", + p->layout_len); + goto out_iounmap; + } + memcpy(&p->layout, ml, p->layout_len); + + jbusmc_construct_dimm_groups(p, mem_regs, num_mem_regs); + + mc_list_add(&p->list); + + printk(KERN_INFO PFX "UltraSPARC-IIIi memory controller at %s\n", + op->node->full_name); + + dev_set_drvdata(&op->dev, p); + + err = 0; + +out: + return err; + +out_iounmap: + of_iounmap(&op->resource[0], p->regs, JBUSMC_REGS_SIZE); + +out_free: + kfree(p); + goto out; +} + +/* Does BANK decode PHYS_ADDR? */ +static int chmc_bank_match(struct chmc_bank_info *bp, unsigned long phys_addr) +{ + unsigned long upper_bits = (phys_addr & PA_UPPER_BITS) >> PA_UPPER_BITS_SHIFT; + unsigned long lower_bits = (phys_addr & PA_LOWER_BITS) >> PA_LOWER_BITS_SHIFT; + + /* Bank must be enabled to match. */ + if (bp->valid == 0) + return 0; + + /* Would BANK match upper bits? */ + upper_bits ^= bp->um; /* What bits are different? */ + upper_bits = ~upper_bits; /* Invert. */ + upper_bits |= bp->uk; /* What bits don't matter for matching? */ + upper_bits = ~upper_bits; /* Invert. */ + + if (upper_bits) + return 0; + + /* Would BANK match lower bits? */ + lower_bits ^= bp->lm; /* What bits are different? */ + lower_bits = ~lower_bits; /* Invert. */ + lower_bits |= bp->lk; /* What bits don't matter for matching? */ + lower_bits = ~lower_bits; /* Invert. */ + + if (lower_bits) + return 0; + + /* I always knew you'd be the one. */ + return 1; +} + +/* Given PHYS_ADDR, search memory controller banks for a match. */ +static struct chmc_bank_info *chmc_find_bank(unsigned long phys_addr) +{ + struct chmc *p; + + list_for_each_entry(p, &mctrl_list, list) { + int bank_no; + + for (bank_no = 0; bank_no < CHMCTRL_NBANKS; bank_no++) { + struct chmc_bank_info *bp; + + bp = &p->logical_banks[bank_no]; + if (chmc_bank_match(bp, phys_addr)) + return bp; + } + } + + return NULL; +} + +/* This is the main purpose of this driver. */ +static int chmc_print_dimm(int syndrome_code, + unsigned long phys_addr, + char *buf, int buflen) +{ + struct chmc_bank_info *bp; + struct chmc_obp_mem_layout *prop; + int bank_in_controller, first_dimm; + + bp = chmc_find_bank(phys_addr); + if (bp == NULL || + syndrome_code < SYNDROME_MIN || + syndrome_code > SYNDROME_MAX) { + buf[0] = '?'; + buf[1] = '?'; + buf[2] = '?'; + buf[3] = '\0'; + return 0; + } + + prop = &bp->p->layout_prop; + bank_in_controller = bp->bank_id & (CHMCTRL_NBANKS - 1); + first_dimm = (bank_in_controller & (CHMCTRL_NDGRPS - 1)); + first_dimm *= CHMCTRL_NDIMMS; + + if (syndrome_code != SYNDROME_MIN) { + char *dimm_str; + int pin; + + get_pin_and_dimm_str(syndrome_code, phys_addr, &pin, + &dimm_str, prop, first_dimm); + sprintf(buf, "%s, pin %3d", dimm_str, pin); + } else { + int dimm; + + /* Multi-bit error, we just dump out all the + * dimm labels associated with this bank. + */ + for (dimm = 0; dimm < CHMCTRL_NDIMMS; dimm++) { + sprintf(buf, "%s ", + prop->dimm_labels[first_dimm + dimm]); + buf += strlen(buf); + } + } + return 0; +} + +/* Accessing the registers is slightly complicated. If you want + * to get at the memory controller which is on the same processor + * the code is executing, you must use special ASI load/store else + * you go through the global mapping. + */ +static u64 chmc_read_mcreg(struct chmc *p, unsigned long offset) +{ + unsigned long ret, this_cpu; + + preempt_disable(); + + this_cpu = real_hard_smp_processor_id(); + + if (p->portid == this_cpu) { + __asm__ __volatile__("ldxa [%1] %2, %0" + : "=r" (ret) + : "r" (offset), "i" (ASI_MCU_CTRL_REG)); + } else { + __asm__ __volatile__("ldxa [%1] %2, %0" + : "=r" (ret) + : "r" (p->regs + offset), + "i" (ASI_PHYS_BYPASS_EC_E)); + } + + preempt_enable(); + + return ret; +} + +#if 0 /* currently unused */ +static void chmc_write_mcreg(struct chmc *p, unsigned long offset, u64 val) +{ + if (p->portid == smp_processor_id()) { + __asm__ __volatile__("stxa %0, [%1] %2" + : : "r" (val), + "r" (offset), "i" (ASI_MCU_CTRL_REG)); + } else { + __asm__ __volatile__("ldxa %0, [%1] %2" + : : "r" (val), + "r" (p->regs + offset), + "i" (ASI_PHYS_BYPASS_EC_E)); + } +} +#endif + +static void chmc_interpret_one_decode_reg(struct chmc *p, int which_bank, u64 val) +{ + struct chmc_bank_info *bp = &p->logical_banks[which_bank]; + + bp->p = p; + bp->bank_id = (CHMCTRL_NBANKS * p->portid) + which_bank; + bp->raw_reg = val; + bp->valid = (val & MEM_DECODE_VALID) >> MEM_DECODE_VALID_SHIFT; + bp->uk = (val & MEM_DECODE_UK) >> MEM_DECODE_UK_SHIFT; + bp->um = (val & MEM_DECODE_UM) >> MEM_DECODE_UM_SHIFT; + bp->lk = (val & MEM_DECODE_LK) >> MEM_DECODE_LK_SHIFT; + bp->lm = (val & MEM_DECODE_LM) >> MEM_DECODE_LM_SHIFT; + + bp->base = (bp->um); + bp->base &= ~(bp->uk); + bp->base <<= PA_UPPER_BITS_SHIFT; + + switch(bp->lk) { + case 0xf: + default: + bp->interleave = 1; + break; + + case 0xe: + bp->interleave = 2; + break; + + case 0xc: + bp->interleave = 4; + break; + + case 0x8: + bp->interleave = 8; + break; + + case 0x0: + bp->interleave = 16; + break; + }; + + /* UK[10] is reserved, and UK[11] is not set for the SDRAM + * bank size definition. + */ + bp->size = (((unsigned long)bp->uk & + ((1UL << 10UL) - 1UL)) + 1UL) << PA_UPPER_BITS_SHIFT; + bp->size /= bp->interleave; +} + +static void chmc_fetch_decode_regs(struct chmc *p) +{ + if (p->layout_size == 0) + return; + + chmc_interpret_one_decode_reg(p, 0, + chmc_read_mcreg(p, CHMCTRL_DECODE1)); + chmc_interpret_one_decode_reg(p, 1, + chmc_read_mcreg(p, CHMCTRL_DECODE2)); + chmc_interpret_one_decode_reg(p, 2, + chmc_read_mcreg(p, CHMCTRL_DECODE3)); + chmc_interpret_one_decode_reg(p, 3, + chmc_read_mcreg(p, CHMCTRL_DECODE4)); +} + +static int __devinit chmc_probe(struct of_device *op, + const struct of_device_id *match) +{ + struct device_node *dp = op->node; + unsigned long ver; + const void *pval; + int len, portid; + struct chmc *p; + int err; + + err = -ENODEV; + __asm__ ("rdpr %%ver, %0" : "=r" (ver)); + if ((ver >> 32UL) == __JALAPENO_ID || + (ver >> 32UL) == __SERRANO_ID) + goto out; + + portid = of_getintprop_default(dp, "portid", -1); + if (portid == -1) + goto out; + + pval = of_get_property(dp, "memory-layout", &len); + if (pval && len > sizeof(p->layout_prop)) { + printk(KERN_ERR PFX "Unexpected memory-layout property " + "size %d.\n", len); + goto out; + } + + err = -ENOMEM; + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + printk(KERN_ERR PFX "Could not allocate struct chmc.\n"); + goto out; + } + + p->portid = portid; + p->layout_size = len; + if (!pval) + p->layout_size = 0; + else + memcpy(&p->layout_prop, pval, len); + + p->regs = of_ioremap(&op->resource[0], 0, 0x48, "chmc"); + if (!p->regs) { + printk(KERN_ERR PFX "Could not map registers.\n"); + goto out_free; + } + + if (p->layout_size != 0UL) { + p->timing_control1 = chmc_read_mcreg(p, CHMCTRL_TCTRL1); + p->timing_control2 = chmc_read_mcreg(p, CHMCTRL_TCTRL2); + p->timing_control3 = chmc_read_mcreg(p, CHMCTRL_TCTRL3); + p->timing_control4 = chmc_read_mcreg(p, CHMCTRL_TCTRL4); + p->memaddr_control = chmc_read_mcreg(p, CHMCTRL_MACTRL); + } + + chmc_fetch_decode_regs(p); + + mc_list_add(&p->list); + + printk(KERN_INFO PFX "UltraSPARC-III memory controller at %s [%s]\n", + dp->full_name, + (p->layout_size ? "ACTIVE" : "INACTIVE")); + + dev_set_drvdata(&op->dev, p); + + err = 0; + +out: + return err; + +out_free: + kfree(p); + goto out; +} + +static int __devinit us3mc_probe(struct of_device *op, + const struct of_device_id *match) +{ + if (mc_type == MC_TYPE_SAFARI) + return chmc_probe(op, match); + else if (mc_type == MC_TYPE_JBUS) + return jbusmc_probe(op, match); + return -ENODEV; +} + +static void __devexit chmc_destroy(struct of_device *op, struct chmc *p) +{ + list_del(&p->list); + of_iounmap(&op->resource[0], p->regs, 0x48); + kfree(p); +} + +static void __devexit jbusmc_destroy(struct of_device *op, struct jbusmc *p) +{ + mc_list_del(&p->list); + of_iounmap(&op->resource[0], p->regs, JBUSMC_REGS_SIZE); + kfree(p); +} + +static int __devexit us3mc_remove(struct of_device *op) +{ + void *p = dev_get_drvdata(&op->dev); + + if (p) { + if (mc_type == MC_TYPE_SAFARI) + chmc_destroy(op, p); + else if (mc_type == MC_TYPE_JBUS) + jbusmc_destroy(op, p); + } + return 0; +} + +static const struct of_device_id us3mc_match[] = { + { + .name = "memory-controller", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, us3mc_match); + +static struct of_platform_driver us3mc_driver = { + .name = "us3mc", + .match_table = us3mc_match, + .probe = us3mc_probe, + .remove = __devexit_p(us3mc_remove), +}; + +static inline bool us3mc_platform(void) +{ + if (tlb_type == cheetah || tlb_type == cheetah_plus) + return true; + return false; +} + +static int __init us3mc_init(void) +{ + unsigned long ver; + int ret; + + if (!us3mc_platform()) + return -ENODEV; + + __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver)); + if ((ver >> 32UL) == __JALAPENO_ID || + (ver >> 32UL) == __SERRANO_ID) { + mc_type = MC_TYPE_JBUS; + us3mc_dimm_printer = jbusmc_print_dimm; + } else { + mc_type = MC_TYPE_SAFARI; + us3mc_dimm_printer = chmc_print_dimm; + } + + ret = register_dimm_printer(us3mc_dimm_printer); + + if (!ret) { + ret = of_register_driver(&us3mc_driver, &of_bus_type); + if (ret) + unregister_dimm_printer(us3mc_dimm_printer); + } + return ret; +} + +static void __exit us3mc_cleanup(void) +{ + if (us3mc_platform()) { + unregister_dimm_printer(us3mc_dimm_printer); + of_unregister_driver(&us3mc_driver); + } +} + +module_init(us3mc_init); +module_exit(us3mc_cleanup); diff --git a/arch/sparc/kernel/compat_audit.c b/arch/sparc/kernel/compat_audit.c new file mode 100644 index 000000000000..d865575b25bf --- /dev/null +++ b/arch/sparc/kernel/compat_audit.c @@ -0,0 +1,43 @@ +#define __32bit_syscall_numbers__ +#include <asm/unistd.h> + +unsigned sparc32_dir_class[] = { +#include <asm-generic/audit_dir_write.h> +~0U +}; + +unsigned sparc32_chattr_class[] = { +#include <asm-generic/audit_change_attr.h> +~0U +}; + +unsigned sparc32_write_class[] = { +#include <asm-generic/audit_write.h> +~0U +}; + +unsigned sparc32_read_class[] = { +#include <asm-generic/audit_read.h> +~0U +}; + +unsigned sparc32_signal_class[] = { +#include <asm-generic/audit_signal.h> +~0U +}; + +int sparc32_classify_syscall(unsigned syscall) +{ + switch(syscall) { + case __NR_open: + return 2; + case __NR_openat: + return 3; + case __NR_socketcall: + return 4; + case __NR_execve: + return 5; + default: + return 1; + } +} diff --git a/arch/sparc/kernel/cpu_64.c b/arch/sparc/kernel/cpu_64.c new file mode 100644 index 000000000000..0c9ac83ed0a8 --- /dev/null +++ b/arch/sparc/kernel/cpu_64.c @@ -0,0 +1,166 @@ +/* cpu.c: Dinky routines to look for the kind of Sparc cpu + * we are on. + * + * Copyright (C) 1996, 2007, 2008 David S. Miller (davem@davemloft.net) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <asm/asi.h> +#include <asm/system.h> +#include <asm/fpumacro.h> +#include <asm/cpudata.h> +#include <asm/spitfire.h> +#include <asm/oplib.h> + +#include "entry.h" + +DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 }; + +struct cpu_chip_info { + unsigned short manuf; + unsigned short impl; + const char *cpu_name; + const char *fp_name; +}; + +static const struct cpu_chip_info cpu_chips[] = { + { + .manuf = 0x17, + .impl = 0x10, + .cpu_name = "TI UltraSparc I (SpitFire)", + .fp_name = "UltraSparc I integrated FPU", + }, + { + .manuf = 0x22, + .impl = 0x10, + .cpu_name = "TI UltraSparc I (SpitFire)", + .fp_name = "UltraSparc I integrated FPU", + }, + { + .manuf = 0x17, + .impl = 0x11, + .cpu_name = "TI UltraSparc II (BlackBird)", + .fp_name = "UltraSparc II integrated FPU", + }, + { + .manuf = 0x17, + .impl = 0x12, + .cpu_name = "TI UltraSparc IIi (Sabre)", + .fp_name = "UltraSparc IIi integrated FPU", + }, + { + .manuf = 0x17, + .impl = 0x13, + .cpu_name = "TI UltraSparc IIe (Hummingbird)", + .fp_name = "UltraSparc IIe integrated FPU", + }, + { + .manuf = 0x3e, + .impl = 0x14, + .cpu_name = "TI UltraSparc III (Cheetah)", + .fp_name = "UltraSparc III integrated FPU", + }, + { + .manuf = 0x3e, + .impl = 0x15, + .cpu_name = "TI UltraSparc III+ (Cheetah+)", + .fp_name = "UltraSparc III+ integrated FPU", + }, + { + .manuf = 0x3e, + .impl = 0x16, + .cpu_name = "TI UltraSparc IIIi (Jalapeno)", + .fp_name = "UltraSparc IIIi integrated FPU", + }, + { + .manuf = 0x3e, + .impl = 0x18, + .cpu_name = "TI UltraSparc IV (Jaguar)", + .fp_name = "UltraSparc IV integrated FPU", + }, + { + .manuf = 0x3e, + .impl = 0x19, + .cpu_name = "TI UltraSparc IV+ (Panther)", + .fp_name = "UltraSparc IV+ integrated FPU", + }, + { + .manuf = 0x3e, + .impl = 0x22, + .cpu_name = "TI UltraSparc IIIi+ (Serrano)", + .fp_name = "UltraSparc IIIi+ integrated FPU", + }, +}; + +#define NSPARCCHIPS ARRAY_SIZE(linux_sparc_chips) + +const char *sparc_cpu_type; +const char *sparc_fpu_type; + +static void __init sun4v_cpu_probe(void) +{ + switch (sun4v_chip_type) { + case SUN4V_CHIP_NIAGARA1: + sparc_cpu_type = "UltraSparc T1 (Niagara)"; + sparc_fpu_type = "UltraSparc T1 integrated FPU"; + break; + + case SUN4V_CHIP_NIAGARA2: + sparc_cpu_type = "UltraSparc T2 (Niagara2)"; + sparc_fpu_type = "UltraSparc T2 integrated FPU"; + break; + + default: + printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n", + prom_cpu_compatible); + sparc_cpu_type = "Unknown SUN4V CPU"; + sparc_fpu_type = "Unknown SUN4V FPU"; + break; + } +} + +static const struct cpu_chip_info * __init find_cpu_chip(unsigned short manuf, + unsigned short impl) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cpu_chips); i++) { + const struct cpu_chip_info *p = &cpu_chips[i]; + + if (p->manuf == manuf && p->impl == impl) + return p; + } + return NULL; +} + +static int __init cpu_type_probe(void) +{ + if (tlb_type == hypervisor) { + sun4v_cpu_probe(); + } else { + unsigned long ver, manuf, impl; + const struct cpu_chip_info *p; + + __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver)); + + manuf = ((ver >> 48) & 0xffff); + impl = ((ver >> 32) & 0xffff); + + p = find_cpu_chip(manuf, impl); + if (p) { + sparc_cpu_type = p->cpu_name; + sparc_fpu_type = p->fp_name; + } else { + printk(KERN_ERR "CPU: Unknown chip, manuf[%lx] impl[%lx]\n", + manuf, impl); + sparc_cpu_type = "Unknown CPU"; + sparc_fpu_type = "Unknown FPU"; + } + } + return 0; +} + +arch_initcall(cpu_type_probe); diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c new file mode 100644 index 000000000000..f52e0534d91d --- /dev/null +++ b/arch/sparc/kernel/ds.c @@ -0,0 +1,1244 @@ +/* ds.c: Domain Services driver for Logical Domains + * + * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/mutex.h> +#include <linux/kthread.h> +#include <linux/reboot.h> +#include <linux/cpu.h> + +#include <asm/ldc.h> +#include <asm/vio.h> +#include <asm/mdesc.h> +#include <asm/head.h> +#include <asm/irq.h> + +#define DRV_MODULE_NAME "ds" +#define PFX DRV_MODULE_NAME ": " +#define DRV_MODULE_VERSION "1.0" +#define DRV_MODULE_RELDATE "Jul 11, 2007" + +static char version[] __devinitdata = + DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; +MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); +MODULE_DESCRIPTION("Sun LDOM domain services driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_MODULE_VERSION); + +struct ds_msg_tag { + __u32 type; +#define DS_INIT_REQ 0x00 +#define DS_INIT_ACK 0x01 +#define DS_INIT_NACK 0x02 +#define DS_REG_REQ 0x03 +#define DS_REG_ACK 0x04 +#define DS_REG_NACK 0x05 +#define DS_UNREG_REQ 0x06 +#define DS_UNREG_ACK 0x07 +#define DS_UNREG_NACK 0x08 +#define DS_DATA 0x09 +#define DS_NACK 0x0a + + __u32 len; +}; + +/* Result codes */ +#define DS_OK 0x00 +#define DS_REG_VER_NACK 0x01 +#define DS_REG_DUP 0x02 +#define DS_INV_HDL 0x03 +#define DS_TYPE_UNKNOWN 0x04 + +struct ds_version { + __u16 major; + __u16 minor; +}; + +struct ds_ver_req { + struct ds_msg_tag tag; + struct ds_version ver; +}; + +struct ds_ver_ack { + struct ds_msg_tag tag; + __u16 minor; +}; + +struct ds_ver_nack { + struct ds_msg_tag tag; + __u16 major; +}; + +struct ds_reg_req { + struct ds_msg_tag tag; + __u64 handle; + __u16 major; + __u16 minor; + char svc_id[0]; +}; + +struct ds_reg_ack { + struct ds_msg_tag tag; + __u64 handle; + __u16 minor; +}; + +struct ds_reg_nack { + struct ds_msg_tag tag; + __u64 handle; + __u16 major; +}; + +struct ds_unreg_req { + struct ds_msg_tag tag; + __u64 handle; +}; + +struct ds_unreg_ack { + struct ds_msg_tag tag; + __u64 handle; +}; + +struct ds_unreg_nack { + struct ds_msg_tag tag; + __u64 handle; +}; + +struct ds_data { + struct ds_msg_tag tag; + __u64 handle; +}; + +struct ds_data_nack { + struct ds_msg_tag tag; + __u64 handle; + __u64 result; +}; + +struct ds_info; +struct ds_cap_state { + __u64 handle; + + void (*data)(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len); + + const char *service_id; + + u8 state; +#define CAP_STATE_UNKNOWN 0x00 +#define CAP_STATE_REG_SENT 0x01 +#define CAP_STATE_REGISTERED 0x02 +}; + +static void md_update_data(struct ds_info *dp, struct ds_cap_state *cp, + void *buf, int len); +static void domain_shutdown_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len); +static void domain_panic_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len); +#ifdef CONFIG_HOTPLUG_CPU +static void dr_cpu_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len); +#endif +static void ds_pri_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len); +static void ds_var_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len); + +static struct ds_cap_state ds_states_template[] = { + { + .service_id = "md-update", + .data = md_update_data, + }, + { + .service_id = "domain-shutdown", + .data = domain_shutdown_data, + }, + { + .service_id = "domain-panic", + .data = domain_panic_data, + }, +#ifdef CONFIG_HOTPLUG_CPU + { + .service_id = "dr-cpu", + .data = dr_cpu_data, + }, +#endif + { + .service_id = "pri", + .data = ds_pri_data, + }, + { + .service_id = "var-config", + .data = ds_var_data, + }, + { + .service_id = "var-config-backup", + .data = ds_var_data, + }, +}; + +static DEFINE_SPINLOCK(ds_lock); + +struct ds_info { + struct ldc_channel *lp; + u8 hs_state; +#define DS_HS_START 0x01 +#define DS_HS_DONE 0x02 + + u64 id; + + void *rcv_buf; + int rcv_buf_len; + + struct ds_cap_state *ds_states; + int num_ds_states; + + struct ds_info *next; +}; + +static struct ds_info *ds_info_list; + +static struct ds_cap_state *find_cap(struct ds_info *dp, u64 handle) +{ + unsigned int index = handle >> 32; + + if (index >= dp->num_ds_states) + return NULL; + return &dp->ds_states[index]; +} + +static struct ds_cap_state *find_cap_by_string(struct ds_info *dp, + const char *name) +{ + int i; + + for (i = 0; i < dp->num_ds_states; i++) { + if (strcmp(dp->ds_states[i].service_id, name)) + continue; + + return &dp->ds_states[i]; + } + return NULL; +} + +static int __ds_send(struct ldc_channel *lp, void *data, int len) +{ + int err, limit = 1000; + + err = -EINVAL; + while (limit-- > 0) { + err = ldc_write(lp, data, len); + if (!err || (err != -EAGAIN)) + break; + udelay(1); + } + + return err; +} + +static int ds_send(struct ldc_channel *lp, void *data, int len) +{ + unsigned long flags; + int err; + + spin_lock_irqsave(&ds_lock, flags); + err = __ds_send(lp, data, len); + spin_unlock_irqrestore(&ds_lock, flags); + + return err; +} + +struct ds_md_update_req { + __u64 req_num; +}; + +struct ds_md_update_res { + __u64 req_num; + __u32 result; +}; + +static void md_update_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len) +{ + struct ldc_channel *lp = dp->lp; + struct ds_data *dpkt = buf; + struct ds_md_update_req *rp; + struct { + struct ds_data data; + struct ds_md_update_res res; + } pkt; + + rp = (struct ds_md_update_req *) (dpkt + 1); + + printk(KERN_INFO "ds-%lu: Machine description update.\n", dp->id); + + mdesc_update(); + + memset(&pkt, 0, sizeof(pkt)); + pkt.data.tag.type = DS_DATA; + pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag); + pkt.data.handle = cp->handle; + pkt.res.req_num = rp->req_num; + pkt.res.result = DS_OK; + + ds_send(lp, &pkt, sizeof(pkt)); +} + +struct ds_shutdown_req { + __u64 req_num; + __u32 ms_delay; +}; + +struct ds_shutdown_res { + __u64 req_num; + __u32 result; + char reason[1]; +}; + +static void domain_shutdown_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len) +{ + struct ldc_channel *lp = dp->lp; + struct ds_data *dpkt = buf; + struct ds_shutdown_req *rp; + struct { + struct ds_data data; + struct ds_shutdown_res res; + } pkt; + + rp = (struct ds_shutdown_req *) (dpkt + 1); + + printk(KERN_ALERT "ds-%lu: Shutdown request from " + "LDOM manager received.\n", dp->id); + + memset(&pkt, 0, sizeof(pkt)); + pkt.data.tag.type = DS_DATA; + pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag); + pkt.data.handle = cp->handle; + pkt.res.req_num = rp->req_num; + pkt.res.result = DS_OK; + pkt.res.reason[0] = 0; + + ds_send(lp, &pkt, sizeof(pkt)); + + orderly_poweroff(true); +} + +struct ds_panic_req { + __u64 req_num; +}; + +struct ds_panic_res { + __u64 req_num; + __u32 result; + char reason[1]; +}; + +static void domain_panic_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len) +{ + struct ldc_channel *lp = dp->lp; + struct ds_data *dpkt = buf; + struct ds_panic_req *rp; + struct { + struct ds_data data; + struct ds_panic_res res; + } pkt; + + rp = (struct ds_panic_req *) (dpkt + 1); + + printk(KERN_ALERT "ds-%lu: Panic request from " + "LDOM manager received.\n", dp->id); + + memset(&pkt, 0, sizeof(pkt)); + pkt.data.tag.type = DS_DATA; + pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag); + pkt.data.handle = cp->handle; + pkt.res.req_num = rp->req_num; + pkt.res.result = DS_OK; + pkt.res.reason[0] = 0; + + ds_send(lp, &pkt, sizeof(pkt)); + + panic("PANIC requested by LDOM manager."); +} + +#ifdef CONFIG_HOTPLUG_CPU +struct dr_cpu_tag { + __u64 req_num; + __u32 type; +#define DR_CPU_CONFIGURE 0x43 +#define DR_CPU_UNCONFIGURE 0x55 +#define DR_CPU_FORCE_UNCONFIGURE 0x46 +#define DR_CPU_STATUS 0x53 + +/* Responses */ +#define DR_CPU_OK 0x6f +#define DR_CPU_ERROR 0x65 + + __u32 num_records; +}; + +struct dr_cpu_resp_entry { + __u32 cpu; + __u32 result; +#define DR_CPU_RES_OK 0x00 +#define DR_CPU_RES_FAILURE 0x01 +#define DR_CPU_RES_BLOCKED 0x02 +#define DR_CPU_RES_CPU_NOT_RESPONDING 0x03 +#define DR_CPU_RES_NOT_IN_MD 0x04 + + __u32 stat; +#define DR_CPU_STAT_NOT_PRESENT 0x00 +#define DR_CPU_STAT_UNCONFIGURED 0x01 +#define DR_CPU_STAT_CONFIGURED 0x02 + + __u32 str_off; +}; + +static void __dr_cpu_send_error(struct ds_info *dp, + struct ds_cap_state *cp, + struct ds_data *data) +{ + struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1); + struct { + struct ds_data data; + struct dr_cpu_tag tag; + } pkt; + int msg_len; + + memset(&pkt, 0, sizeof(pkt)); + pkt.data.tag.type = DS_DATA; + pkt.data.handle = cp->handle; + pkt.tag.req_num = tag->req_num; + pkt.tag.type = DR_CPU_ERROR; + pkt.tag.num_records = 0; + + msg_len = (sizeof(struct ds_data) + + sizeof(struct dr_cpu_tag)); + + pkt.data.tag.len = msg_len - sizeof(struct ds_msg_tag); + + __ds_send(dp->lp, &pkt, msg_len); +} + +static void dr_cpu_send_error(struct ds_info *dp, + struct ds_cap_state *cp, + struct ds_data *data) +{ + unsigned long flags; + + spin_lock_irqsave(&ds_lock, flags); + __dr_cpu_send_error(dp, cp, data); + spin_unlock_irqrestore(&ds_lock, flags); +} + +#define CPU_SENTINEL 0xffffffff + +static void purge_dups(u32 *list, u32 num_ents) +{ + unsigned int i; + + for (i = 0; i < num_ents; i++) { + u32 cpu = list[i]; + unsigned int j; + + if (cpu == CPU_SENTINEL) + continue; + + for (j = i + 1; j < num_ents; j++) { + if (list[j] == cpu) + list[j] = CPU_SENTINEL; + } + } +} + +static int dr_cpu_size_response(int ncpus) +{ + return (sizeof(struct ds_data) + + sizeof(struct dr_cpu_tag) + + (sizeof(struct dr_cpu_resp_entry) * ncpus)); +} + +static void dr_cpu_init_response(struct ds_data *resp, u64 req_num, + u64 handle, int resp_len, int ncpus, + cpumask_t *mask, u32 default_stat) +{ + struct dr_cpu_resp_entry *ent; + struct dr_cpu_tag *tag; + int i, cpu; + + tag = (struct dr_cpu_tag *) (resp + 1); + ent = (struct dr_cpu_resp_entry *) (tag + 1); + + resp->tag.type = DS_DATA; + resp->tag.len = resp_len - sizeof(struct ds_msg_tag); + resp->handle = handle; + tag->req_num = req_num; + tag->type = DR_CPU_OK; + tag->num_records = ncpus; + + i = 0; + for_each_cpu_mask(cpu, *mask) { + ent[i].cpu = cpu; + ent[i].result = DR_CPU_RES_OK; + ent[i].stat = default_stat; + i++; + } + BUG_ON(i != ncpus); +} + +static void dr_cpu_mark(struct ds_data *resp, int cpu, int ncpus, + u32 res, u32 stat) +{ + struct dr_cpu_resp_entry *ent; + struct dr_cpu_tag *tag; + int i; + + tag = (struct dr_cpu_tag *) (resp + 1); + ent = (struct dr_cpu_resp_entry *) (tag + 1); + + for (i = 0; i < ncpus; i++) { + if (ent[i].cpu != cpu) + continue; + ent[i].result = res; + ent[i].stat = stat; + break; + } +} + +static int __cpuinit dr_cpu_configure(struct ds_info *dp, + struct ds_cap_state *cp, + u64 req_num, + cpumask_t *mask) +{ + struct ds_data *resp; + int resp_len, ncpus, cpu; + unsigned long flags; + + ncpus = cpus_weight(*mask); + resp_len = dr_cpu_size_response(ncpus); + resp = kzalloc(resp_len, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + dr_cpu_init_response(resp, req_num, cp->handle, + resp_len, ncpus, mask, + DR_CPU_STAT_CONFIGURED); + + mdesc_fill_in_cpu_data(*mask); + + for_each_cpu_mask(cpu, *mask) { + int err; + + printk(KERN_INFO "ds-%lu: Starting cpu %d...\n", + dp->id, cpu); + err = cpu_up(cpu); + if (err) { + __u32 res = DR_CPU_RES_FAILURE; + __u32 stat = DR_CPU_STAT_UNCONFIGURED; + + if (!cpu_present(cpu)) { + /* CPU not present in MD */ + res = DR_CPU_RES_NOT_IN_MD; + stat = DR_CPU_STAT_NOT_PRESENT; + } else if (err == -ENODEV) { + /* CPU did not call in successfully */ + res = DR_CPU_RES_CPU_NOT_RESPONDING; + } + + printk(KERN_INFO "ds-%lu: CPU startup failed err=%d\n", + dp->id, err); + dr_cpu_mark(resp, cpu, ncpus, res, stat); + } + } + + spin_lock_irqsave(&ds_lock, flags); + __ds_send(dp->lp, resp, resp_len); + spin_unlock_irqrestore(&ds_lock, flags); + + kfree(resp); + + /* Redistribute IRQs, taking into account the new cpus. */ + fixup_irqs(); + + return 0; +} + +static int dr_cpu_unconfigure(struct ds_info *dp, + struct ds_cap_state *cp, + u64 req_num, + cpumask_t *mask) +{ + struct ds_data *resp; + int resp_len, ncpus, cpu; + unsigned long flags; + + ncpus = cpus_weight(*mask); + resp_len = dr_cpu_size_response(ncpus); + resp = kzalloc(resp_len, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + dr_cpu_init_response(resp, req_num, cp->handle, + resp_len, ncpus, mask, + DR_CPU_STAT_UNCONFIGURED); + + for_each_cpu_mask(cpu, *mask) { + int err; + + printk(KERN_INFO "ds-%lu: Shutting down cpu %d...\n", + dp->id, cpu); + err = cpu_down(cpu); + if (err) + dr_cpu_mark(resp, cpu, ncpus, + DR_CPU_RES_FAILURE, + DR_CPU_STAT_CONFIGURED); + } + + spin_lock_irqsave(&ds_lock, flags); + __ds_send(dp->lp, resp, resp_len); + spin_unlock_irqrestore(&ds_lock, flags); + + kfree(resp); + + return 0; +} + +static void __cpuinit dr_cpu_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len) +{ + struct ds_data *data = buf; + struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1); + u32 *cpu_list = (u32 *) (tag + 1); + u64 req_num = tag->req_num; + cpumask_t mask; + unsigned int i; + int err; + + switch (tag->type) { + case DR_CPU_CONFIGURE: + case DR_CPU_UNCONFIGURE: + case DR_CPU_FORCE_UNCONFIGURE: + break; + + default: + dr_cpu_send_error(dp, cp, data); + return; + } + + purge_dups(cpu_list, tag->num_records); + + cpus_clear(mask); + for (i = 0; i < tag->num_records; i++) { + if (cpu_list[i] == CPU_SENTINEL) + continue; + + if (cpu_list[i] < NR_CPUS) + cpu_set(cpu_list[i], mask); + } + + if (tag->type == DR_CPU_CONFIGURE) + err = dr_cpu_configure(dp, cp, req_num, &mask); + else + err = dr_cpu_unconfigure(dp, cp, req_num, &mask); + + if (err) + dr_cpu_send_error(dp, cp, data); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +struct ds_pri_msg { + __u64 req_num; + __u64 type; +#define DS_PRI_REQUEST 0x00 +#define DS_PRI_DATA 0x01 +#define DS_PRI_UPDATE 0x02 +}; + +static void ds_pri_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len) +{ + struct ds_data *dpkt = buf; + struct ds_pri_msg *rp; + + rp = (struct ds_pri_msg *) (dpkt + 1); + + printk(KERN_INFO "ds-%lu: PRI REQ [%lx:%lx], len=%d\n", + dp->id, rp->req_num, rp->type, len); +} + +struct ds_var_hdr { + __u32 type; +#define DS_VAR_SET_REQ 0x00 +#define DS_VAR_DELETE_REQ 0x01 +#define DS_VAR_SET_RESP 0x02 +#define DS_VAR_DELETE_RESP 0x03 +}; + +struct ds_var_set_msg { + struct ds_var_hdr hdr; + char name_and_value[0]; +}; + +struct ds_var_delete_msg { + struct ds_var_hdr hdr; + char name[0]; +}; + +struct ds_var_resp { + struct ds_var_hdr hdr; + __u32 result; +#define DS_VAR_SUCCESS 0x00 +#define DS_VAR_NO_SPACE 0x01 +#define DS_VAR_INVALID_VAR 0x02 +#define DS_VAR_INVALID_VAL 0x03 +#define DS_VAR_NOT_PRESENT 0x04 +}; + +static DEFINE_MUTEX(ds_var_mutex); +static int ds_var_doorbell; +static int ds_var_response; + +static void ds_var_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len) +{ + struct ds_data *dpkt = buf; + struct ds_var_resp *rp; + + rp = (struct ds_var_resp *) (dpkt + 1); + + if (rp->hdr.type != DS_VAR_SET_RESP && + rp->hdr.type != DS_VAR_DELETE_RESP) + return; + + ds_var_response = rp->result; + wmb(); + ds_var_doorbell = 1; +} + +void ldom_set_var(const char *var, const char *value) +{ + struct ds_cap_state *cp; + struct ds_info *dp; + unsigned long flags; + + spin_lock_irqsave(&ds_lock, flags); + cp = NULL; + for (dp = ds_info_list; dp; dp = dp->next) { + struct ds_cap_state *tmp; + + tmp = find_cap_by_string(dp, "var-config"); + if (tmp && tmp->state == CAP_STATE_REGISTERED) { + cp = tmp; + break; + } + } + if (!cp) { + for (dp = ds_info_list; dp; dp = dp->next) { + struct ds_cap_state *tmp; + + tmp = find_cap_by_string(dp, "var-config-backup"); + if (tmp && tmp->state == CAP_STATE_REGISTERED) { + cp = tmp; + break; + } + } + } + spin_unlock_irqrestore(&ds_lock, flags); + + if (cp) { + union { + struct { + struct ds_data data; + struct ds_var_set_msg msg; + } header; + char all[512]; + } pkt; + char *base, *p; + int msg_len, loops; + + memset(&pkt, 0, sizeof(pkt)); + pkt.header.data.tag.type = DS_DATA; + pkt.header.data.handle = cp->handle; + pkt.header.msg.hdr.type = DS_VAR_SET_REQ; + base = p = &pkt.header.msg.name_and_value[0]; + strcpy(p, var); + p += strlen(var) + 1; + strcpy(p, value); + p += strlen(value) + 1; + + msg_len = (sizeof(struct ds_data) + + sizeof(struct ds_var_set_msg) + + (p - base)); + msg_len = (msg_len + 3) & ~3; + pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag); + + mutex_lock(&ds_var_mutex); + + spin_lock_irqsave(&ds_lock, flags); + ds_var_doorbell = 0; + ds_var_response = -1; + + __ds_send(dp->lp, &pkt, msg_len); + spin_unlock_irqrestore(&ds_lock, flags); + + loops = 1000; + while (ds_var_doorbell == 0) { + if (loops-- < 0) + break; + barrier(); + udelay(100); + } + + mutex_unlock(&ds_var_mutex); + + if (ds_var_doorbell == 0 || + ds_var_response != DS_VAR_SUCCESS) + printk(KERN_ERR "ds-%lu: var-config [%s:%s] " + "failed, response(%d).\n", + dp->id, var, value, + ds_var_response); + } else { + printk(KERN_ERR PFX "var-config not registered so " + "could not set (%s) variable to (%s).\n", + var, value); + } +} + +void ldom_reboot(const char *boot_command) +{ + /* Don't bother with any of this if the boot_command + * is empty. + */ + if (boot_command && strlen(boot_command)) { + char full_boot_str[256]; + + strcpy(full_boot_str, "boot "); + strcpy(full_boot_str + strlen("boot "), boot_command); + + ldom_set_var("reboot-command", full_boot_str); + } + sun4v_mach_sir(); +} + +void ldom_power_off(void) +{ + sun4v_mach_exit(0); +} + +static void ds_conn_reset(struct ds_info *dp) +{ + printk(KERN_ERR "ds-%lu: ds_conn_reset() from %p\n", + dp->id, __builtin_return_address(0)); +} + +static int register_services(struct ds_info *dp) +{ + struct ldc_channel *lp = dp->lp; + int i; + + for (i = 0; i < dp->num_ds_states; i++) { + struct { + struct ds_reg_req req; + u8 id_buf[256]; + } pbuf; + struct ds_cap_state *cp = &dp->ds_states[i]; + int err, msg_len; + u64 new_count; + + if (cp->state == CAP_STATE_REGISTERED) + continue; + + new_count = sched_clock() & 0xffffffff; + cp->handle = ((u64) i << 32) | new_count; + + msg_len = (sizeof(struct ds_reg_req) + + strlen(cp->service_id)); + + memset(&pbuf, 0, sizeof(pbuf)); + pbuf.req.tag.type = DS_REG_REQ; + pbuf.req.tag.len = (msg_len - sizeof(struct ds_msg_tag)); + pbuf.req.handle = cp->handle; + pbuf.req.major = 1; + pbuf.req.minor = 0; + strcpy(pbuf.req.svc_id, cp->service_id); + + err = __ds_send(lp, &pbuf, msg_len); + if (err > 0) + cp->state = CAP_STATE_REG_SENT; + } + return 0; +} + +static int ds_handshake(struct ds_info *dp, struct ds_msg_tag *pkt) +{ + + if (dp->hs_state == DS_HS_START) { + if (pkt->type != DS_INIT_ACK) + goto conn_reset; + + dp->hs_state = DS_HS_DONE; + + return register_services(dp); + } + + if (dp->hs_state != DS_HS_DONE) + goto conn_reset; + + if (pkt->type == DS_REG_ACK) { + struct ds_reg_ack *ap = (struct ds_reg_ack *) pkt; + struct ds_cap_state *cp = find_cap(dp, ap->handle); + + if (!cp) { + printk(KERN_ERR "ds-%lu: REG ACK for unknown " + "handle %lx\n", dp->id, ap->handle); + return 0; + } + printk(KERN_INFO "ds-%lu: Registered %s service.\n", + dp->id, cp->service_id); + cp->state = CAP_STATE_REGISTERED; + } else if (pkt->type == DS_REG_NACK) { + struct ds_reg_nack *np = (struct ds_reg_nack *) pkt; + struct ds_cap_state *cp = find_cap(dp, np->handle); + + if (!cp) { + printk(KERN_ERR "ds-%lu: REG NACK for " + "unknown handle %lx\n", + dp->id, np->handle); + return 0; + } + cp->state = CAP_STATE_UNKNOWN; + } + + return 0; + +conn_reset: + ds_conn_reset(dp); + return -ECONNRESET; +} + +static void __send_ds_nack(struct ds_info *dp, u64 handle) +{ + struct ds_data_nack nack = { + .tag = { + .type = DS_NACK, + .len = (sizeof(struct ds_data_nack) - + sizeof(struct ds_msg_tag)), + }, + .handle = handle, + .result = DS_INV_HDL, + }; + + __ds_send(dp->lp, &nack, sizeof(nack)); +} + +static LIST_HEAD(ds_work_list); +static DECLARE_WAIT_QUEUE_HEAD(ds_wait); + +struct ds_queue_entry { + struct list_head list; + struct ds_info *dp; + int req_len; + int __pad; + u64 req[0]; +}; + +static void process_ds_work(void) +{ + struct ds_queue_entry *qp, *tmp; + unsigned long flags; + LIST_HEAD(todo); + + spin_lock_irqsave(&ds_lock, flags); + list_splice_init(&ds_work_list, &todo); + spin_unlock_irqrestore(&ds_lock, flags); + + list_for_each_entry_safe(qp, tmp, &todo, list) { + struct ds_data *dpkt = (struct ds_data *) qp->req; + struct ds_info *dp = qp->dp; + struct ds_cap_state *cp = find_cap(dp, dpkt->handle); + int req_len = qp->req_len; + + if (!cp) { + printk(KERN_ERR "ds-%lu: Data for unknown " + "handle %lu\n", + dp->id, dpkt->handle); + + spin_lock_irqsave(&ds_lock, flags); + __send_ds_nack(dp, dpkt->handle); + spin_unlock_irqrestore(&ds_lock, flags); + } else { + cp->data(dp, cp, dpkt, req_len); + } + + list_del(&qp->list); + kfree(qp); + } +} + +static int ds_thread(void *__unused) +{ + DEFINE_WAIT(wait); + + while (1) { + prepare_to_wait(&ds_wait, &wait, TASK_INTERRUPTIBLE); + if (list_empty(&ds_work_list)) + schedule(); + finish_wait(&ds_wait, &wait); + + if (kthread_should_stop()) + break; + + process_ds_work(); + } + + return 0; +} + +static int ds_data(struct ds_info *dp, struct ds_msg_tag *pkt, int len) +{ + struct ds_data *dpkt = (struct ds_data *) pkt; + struct ds_queue_entry *qp; + + qp = kmalloc(sizeof(struct ds_queue_entry) + len, GFP_ATOMIC); + if (!qp) { + __send_ds_nack(dp, dpkt->handle); + } else { + qp->dp = dp; + memcpy(&qp->req, pkt, len); + list_add_tail(&qp->list, &ds_work_list); + wake_up(&ds_wait); + } + return 0; +} + +static void ds_up(struct ds_info *dp) +{ + struct ldc_channel *lp = dp->lp; + struct ds_ver_req req; + int err; + + req.tag.type = DS_INIT_REQ; + req.tag.len = sizeof(req) - sizeof(struct ds_msg_tag); + req.ver.major = 1; + req.ver.minor = 0; + + err = __ds_send(lp, &req, sizeof(req)); + if (err > 0) + dp->hs_state = DS_HS_START; +} + +static void ds_reset(struct ds_info *dp) +{ + int i; + + dp->hs_state = 0; + + for (i = 0; i < dp->num_ds_states; i++) { + struct ds_cap_state *cp = &dp->ds_states[i]; + + cp->state = CAP_STATE_UNKNOWN; + } +} + +static void ds_event(void *arg, int event) +{ + struct ds_info *dp = arg; + struct ldc_channel *lp = dp->lp; + unsigned long flags; + int err; + + spin_lock_irqsave(&ds_lock, flags); + + if (event == LDC_EVENT_UP) { + ds_up(dp); + spin_unlock_irqrestore(&ds_lock, flags); + return; + } + + if (event == LDC_EVENT_RESET) { + ds_reset(dp); + spin_unlock_irqrestore(&ds_lock, flags); + return; + } + + if (event != LDC_EVENT_DATA_READY) { + printk(KERN_WARNING "ds-%lu: Unexpected LDC event %d\n", + dp->id, event); + spin_unlock_irqrestore(&ds_lock, flags); + return; + } + + err = 0; + while (1) { + struct ds_msg_tag *tag; + + err = ldc_read(lp, dp->rcv_buf, sizeof(*tag)); + + if (unlikely(err < 0)) { + if (err == -ECONNRESET) + ds_conn_reset(dp); + break; + } + if (err == 0) + break; + + tag = dp->rcv_buf; + err = ldc_read(lp, tag + 1, tag->len); + + if (unlikely(err < 0)) { + if (err == -ECONNRESET) + ds_conn_reset(dp); + break; + } + if (err < tag->len) + break; + + if (tag->type < DS_DATA) + err = ds_handshake(dp, dp->rcv_buf); + else + err = ds_data(dp, dp->rcv_buf, + sizeof(*tag) + err); + if (err == -ECONNRESET) + break; + } + + spin_unlock_irqrestore(&ds_lock, flags); +} + +static int __devinit ds_probe(struct vio_dev *vdev, + const struct vio_device_id *id) +{ + static int ds_version_printed; + struct ldc_channel_config ds_cfg = { + .event = ds_event, + .mtu = 4096, + .mode = LDC_MODE_STREAM, + }; + struct mdesc_handle *hp; + struct ldc_channel *lp; + struct ds_info *dp; + const u64 *val; + int err, i; + + if (ds_version_printed++ == 0) + printk(KERN_INFO "%s", version); + + dp = kzalloc(sizeof(*dp), GFP_KERNEL); + err = -ENOMEM; + if (!dp) + goto out_err; + + hp = mdesc_grab(); + val = mdesc_get_property(hp, vdev->mp, "id", NULL); + if (val) + dp->id = *val; + mdesc_release(hp); + + dp->rcv_buf = kzalloc(4096, GFP_KERNEL); + if (!dp->rcv_buf) + goto out_free_dp; + + dp->rcv_buf_len = 4096; + + dp->ds_states = kzalloc(sizeof(ds_states_template), + GFP_KERNEL); + if (!dp->ds_states) + goto out_free_rcv_buf; + + memcpy(dp->ds_states, ds_states_template, + sizeof(ds_states_template)); + dp->num_ds_states = ARRAY_SIZE(ds_states_template); + + for (i = 0; i < dp->num_ds_states; i++) + dp->ds_states[i].handle = ((u64)i << 32); + + ds_cfg.tx_irq = vdev->tx_irq; + ds_cfg.rx_irq = vdev->rx_irq; + + lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out_free_ds_states; + } + dp->lp = lp; + + err = ldc_bind(lp, "DS"); + if (err) + goto out_free_ldc; + + spin_lock_irq(&ds_lock); + dp->next = ds_info_list; + ds_info_list = dp; + spin_unlock_irq(&ds_lock); + + return err; + +out_free_ldc: + ldc_free(dp->lp); + +out_free_ds_states: + kfree(dp->ds_states); + +out_free_rcv_buf: + kfree(dp->rcv_buf); + +out_free_dp: + kfree(dp); + +out_err: + return err; +} + +static int ds_remove(struct vio_dev *vdev) +{ + return 0; +} + +static struct vio_device_id __initdata ds_match[] = { + { + .type = "domain-services-port", + }, + {}, +}; + +static struct vio_driver ds_driver = { + .id_table = ds_match, + .probe = ds_probe, + .remove = ds_remove, + .driver = { + .name = "ds", + .owner = THIS_MODULE, + } +}; + +static int __init ds_init(void) +{ + kthread_run(ds_thread, NULL, "kldomd"); + + return vio_register_driver(&ds_driver); +} + +subsys_initcall(ds_init); diff --git a/arch/sparc/kernel/dtlb_miss.S b/arch/sparc/kernel/dtlb_miss.S new file mode 100644 index 000000000000..09a6a15a7105 --- /dev/null +++ b/arch/sparc/kernel/dtlb_miss.S @@ -0,0 +1,39 @@ +/* DTLB ** ICACHE line 1: Context 0 check and TSB load */ + ldxa [%g0] ASI_DMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer + ldxa [%g0] ASI_DMMU, %g6 ! Get TAG TARGET + srlx %g6, 48, %g5 ! Get context + sllx %g6, 22, %g6 ! Zero out context + brz,pn %g5, kvmap_dtlb ! Context 0 processing + srlx %g6, 22, %g6 ! Delay slot + TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry + cmp %g4, %g6 ! Compare TAG + +/* DTLB ** ICACHE line 2: TSB compare and TLB load */ + bne,pn %xcc, tsb_miss_dtlb ! Miss + mov FAULT_CODE_DTLB, %g3 + stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Load TLB + retry ! Trap done + nop + nop + nop + nop + +/* DTLB ** ICACHE line 3: */ + nop + nop + nop + nop + nop + nop + nop + nop + +/* DTLB ** ICACHE line 4: */ + nop + nop + nop + nop + nop + nop + nop + nop diff --git a/arch/sparc/kernel/dtlb_prot.S b/arch/sparc/kernel/dtlb_prot.S new file mode 100644 index 000000000000..b2c2c5be281c --- /dev/null +++ b/arch/sparc/kernel/dtlb_prot.S @@ -0,0 +1,54 @@ +/* + * dtlb_prot.S: DTLB protection trap strategy. + * This is included directly into the trap table. + * + * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com) + * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz) + */ + +/* Ways we can get here: + * + * [TL == 0] 1) User stores to readonly pages. + * [TL == 0] 2) Nucleus stores to user readonly pages. + * [TL > 0] 3) Nucleus stores to user readonly stack frame. + */ + +/* PROT ** ICACHE line 1: User DTLB protection trap */ + mov TLB_SFSR, %g1 + stxa %g0, [%g1] ASI_DMMU ! Clear FaultValid bit + membar #Sync ! Synchronize stores + rdpr %pstate, %g5 ! Move into alt-globals + wrpr %g5, PSTATE_AG|PSTATE_MG, %pstate + rdpr %tl, %g1 ! Need a winfixup? + cmp %g1, 1 ! Trap level >1? + mov TLB_TAG_ACCESS, %g4 ! For reload of vaddr + +/* PROT ** ICACHE line 2: More real fault processing */ + bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup + ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5 + ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault + mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 + nop + nop + nop + nop + +/* PROT ** ICACHE line 3: Unused... */ + nop + nop + nop + nop + nop + nop + nop + nop + +/* PROT ** ICACHE line 4: Unused... */ + nop + nop + nop + nop + nop + nop + nop + nop diff --git a/arch/sparc/kernel/ebus.c b/arch/sparc/kernel/ebus.c new file mode 100644 index 000000000000..77dbf6d45faf --- /dev/null +++ b/arch/sparc/kernel/ebus.c @@ -0,0 +1,257 @@ +/* ebus.c: EBUS DMA library code. + * + * Copyright (C) 1997 Eddie C. Dost (ecd@skynet.be) + * Copyright (C) 1999 David S. Miller (davem@redhat.com) + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/delay.h> + +#include <asm/ebus_dma.h> +#include <asm/io.h> + +#define EBDMA_CSR 0x00UL /* Control/Status */ +#define EBDMA_ADDR 0x04UL /* DMA Address */ +#define EBDMA_COUNT 0x08UL /* DMA Count */ + +#define EBDMA_CSR_INT_PEND 0x00000001 +#define EBDMA_CSR_ERR_PEND 0x00000002 +#define EBDMA_CSR_DRAIN 0x00000004 +#define EBDMA_CSR_INT_EN 0x00000010 +#define EBDMA_CSR_RESET 0x00000080 +#define EBDMA_CSR_WRITE 0x00000100 +#define EBDMA_CSR_EN_DMA 0x00000200 +#define EBDMA_CSR_CYC_PEND 0x00000400 +#define EBDMA_CSR_DIAG_RD_DONE 0x00000800 +#define EBDMA_CSR_DIAG_WR_DONE 0x00001000 +#define EBDMA_CSR_EN_CNT 0x00002000 +#define EBDMA_CSR_TC 0x00004000 +#define EBDMA_CSR_DIS_CSR_DRN 0x00010000 +#define EBDMA_CSR_BURST_SZ_MASK 0x000c0000 +#define EBDMA_CSR_BURST_SZ_1 0x00080000 +#define EBDMA_CSR_BURST_SZ_4 0x00000000 +#define EBDMA_CSR_BURST_SZ_8 0x00040000 +#define EBDMA_CSR_BURST_SZ_16 0x000c0000 +#define EBDMA_CSR_DIAG_EN 0x00100000 +#define EBDMA_CSR_DIS_ERR_PEND 0x00400000 +#define EBDMA_CSR_TCI_DIS 0x00800000 +#define EBDMA_CSR_EN_NEXT 0x01000000 +#define EBDMA_CSR_DMA_ON 0x02000000 +#define EBDMA_CSR_A_LOADED 0x04000000 +#define EBDMA_CSR_NA_LOADED 0x08000000 +#define EBDMA_CSR_DEV_ID_MASK 0xf0000000 + +#define EBUS_DMA_RESET_TIMEOUT 10000 + +static void __ebus_dma_reset(struct ebus_dma_info *p, int no_drain) +{ + int i; + u32 val = 0; + + writel(EBDMA_CSR_RESET, p->regs + EBDMA_CSR); + udelay(1); + + if (no_drain) + return; + + for (i = EBUS_DMA_RESET_TIMEOUT; i > 0; i--) { + val = readl(p->regs + EBDMA_CSR); + + if (!(val & (EBDMA_CSR_DRAIN | EBDMA_CSR_CYC_PEND))) + break; + udelay(10); + } +} + +static irqreturn_t ebus_dma_irq(int irq, void *dev_id) +{ + struct ebus_dma_info *p = dev_id; + unsigned long flags; + u32 csr = 0; + + spin_lock_irqsave(&p->lock, flags); + csr = readl(p->regs + EBDMA_CSR); + writel(csr, p->regs + EBDMA_CSR); + spin_unlock_irqrestore(&p->lock, flags); + + if (csr & EBDMA_CSR_ERR_PEND) { + printk(KERN_CRIT "ebus_dma(%s): DMA error!\n", p->name); + p->callback(p, EBUS_DMA_EVENT_ERROR, p->client_cookie); + return IRQ_HANDLED; + } else if (csr & EBDMA_CSR_INT_PEND) { + p->callback(p, + (csr & EBDMA_CSR_TC) ? + EBUS_DMA_EVENT_DMA : EBUS_DMA_EVENT_DEVICE, + p->client_cookie); + return IRQ_HANDLED; + } + + return IRQ_NONE; + +} + +int ebus_dma_register(struct ebus_dma_info *p) +{ + u32 csr; + + if (!p->regs) + return -EINVAL; + if (p->flags & ~(EBUS_DMA_FLAG_USE_EBDMA_HANDLER | + EBUS_DMA_FLAG_TCI_DISABLE)) + return -EINVAL; + if ((p->flags & EBUS_DMA_FLAG_USE_EBDMA_HANDLER) && !p->callback) + return -EINVAL; + if (!strlen(p->name)) + return -EINVAL; + + __ebus_dma_reset(p, 1); + + csr = EBDMA_CSR_BURST_SZ_16 | EBDMA_CSR_EN_CNT; + + if (p->flags & EBUS_DMA_FLAG_TCI_DISABLE) + csr |= EBDMA_CSR_TCI_DIS; + + writel(csr, p->regs + EBDMA_CSR); + + return 0; +} +EXPORT_SYMBOL(ebus_dma_register); + +int ebus_dma_irq_enable(struct ebus_dma_info *p, int on) +{ + unsigned long flags; + u32 csr; + + if (on) { + if (p->flags & EBUS_DMA_FLAG_USE_EBDMA_HANDLER) { + if (request_irq(p->irq, ebus_dma_irq, IRQF_SHARED, p->name, p)) + return -EBUSY; + } + + spin_lock_irqsave(&p->lock, flags); + csr = readl(p->regs + EBDMA_CSR); + csr |= EBDMA_CSR_INT_EN; + writel(csr, p->regs + EBDMA_CSR); + spin_unlock_irqrestore(&p->lock, flags); + } else { + spin_lock_irqsave(&p->lock, flags); + csr = readl(p->regs + EBDMA_CSR); + csr &= ~EBDMA_CSR_INT_EN; + writel(csr, p->regs + EBDMA_CSR); + spin_unlock_irqrestore(&p->lock, flags); + + if (p->flags & EBUS_DMA_FLAG_USE_EBDMA_HANDLER) { + free_irq(p->irq, p); + } + } + + return 0; +} +EXPORT_SYMBOL(ebus_dma_irq_enable); + +void ebus_dma_unregister(struct ebus_dma_info *p) +{ + unsigned long flags; + u32 csr; + int irq_on = 0; + + spin_lock_irqsave(&p->lock, flags); + csr = readl(p->regs + EBDMA_CSR); + if (csr & EBDMA_CSR_INT_EN) { + csr &= ~EBDMA_CSR_INT_EN; + writel(csr, p->regs + EBDMA_CSR); + irq_on = 1; + } + spin_unlock_irqrestore(&p->lock, flags); + + if (irq_on) + free_irq(p->irq, p); +} +EXPORT_SYMBOL(ebus_dma_unregister); + +int ebus_dma_request(struct ebus_dma_info *p, dma_addr_t bus_addr, size_t len) +{ + unsigned long flags; + u32 csr; + int err; + + if (len >= (1 << 24)) + return -EINVAL; + + spin_lock_irqsave(&p->lock, flags); + csr = readl(p->regs + EBDMA_CSR); + err = -EINVAL; + if (!(csr & EBDMA_CSR_EN_DMA)) + goto out; + err = -EBUSY; + if (csr & EBDMA_CSR_NA_LOADED) + goto out; + + writel(len, p->regs + EBDMA_COUNT); + writel(bus_addr, p->regs + EBDMA_ADDR); + err = 0; + +out: + spin_unlock_irqrestore(&p->lock, flags); + + return err; +} +EXPORT_SYMBOL(ebus_dma_request); + +void ebus_dma_prepare(struct ebus_dma_info *p, int write) +{ + unsigned long flags; + u32 csr; + + spin_lock_irqsave(&p->lock, flags); + __ebus_dma_reset(p, 0); + + csr = (EBDMA_CSR_INT_EN | + EBDMA_CSR_EN_CNT | + EBDMA_CSR_BURST_SZ_16 | + EBDMA_CSR_EN_NEXT); + + if (write) + csr |= EBDMA_CSR_WRITE; + if (p->flags & EBUS_DMA_FLAG_TCI_DISABLE) + csr |= EBDMA_CSR_TCI_DIS; + + writel(csr, p->regs + EBDMA_CSR); + + spin_unlock_irqrestore(&p->lock, flags); +} +EXPORT_SYMBOL(ebus_dma_prepare); + +unsigned int ebus_dma_residue(struct ebus_dma_info *p) +{ + return readl(p->regs + EBDMA_COUNT); +} +EXPORT_SYMBOL(ebus_dma_residue); + +unsigned int ebus_dma_addr(struct ebus_dma_info *p) +{ + return readl(p->regs + EBDMA_ADDR); +} +EXPORT_SYMBOL(ebus_dma_addr); + +void ebus_dma_enable(struct ebus_dma_info *p, int on) +{ + unsigned long flags; + u32 orig_csr, csr; + + spin_lock_irqsave(&p->lock, flags); + orig_csr = csr = readl(p->regs + EBDMA_CSR); + if (on) + csr |= EBDMA_CSR_EN_DMA; + else + csr &= ~EBDMA_CSR_EN_DMA; + if ((orig_csr & EBDMA_CSR_EN_DMA) != + (csr & EBDMA_CSR_EN_DMA)) + writel(csr, p->regs + EBDMA_CSR); + spin_unlock_irqrestore(&p->lock, flags); +} +EXPORT_SYMBOL(ebus_dma_enable); diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h new file mode 100644 index 000000000000..34d7ab5e10d2 --- /dev/null +++ b/arch/sparc/kernel/entry.h @@ -0,0 +1,195 @@ +#ifndef _ENTRY_H +#define _ENTRY_H + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/init.h> + +extern const char *sparc_cpu_type; +extern const char *sparc_fpu_type; + +extern void __init per_cpu_patch(void); +extern void __init sun4v_patch(void); +extern void __init boot_cpu_id_too_large(int cpu); +extern unsigned int dcache_parity_tl1_occurred; +extern unsigned int icache_parity_tl1_occurred; + +extern asmlinkage void update_perfctrs(void); +extern asmlinkage void sparc_breakpoint(struct pt_regs *regs); +extern void timer_interrupt(int irq, struct pt_regs *regs); + +extern void do_notify_resume(struct pt_regs *regs, + unsigned long orig_i0, + unsigned long thread_info_flags); + +extern asmlinkage int syscall_trace_enter(struct pt_regs *regs); +extern asmlinkage void syscall_trace_leave(struct pt_regs *regs); + +extern void bad_trap_tl1(struct pt_regs *regs, long lvl); + +extern void do_fpe_common(struct pt_regs *regs); +extern void do_fpieee(struct pt_regs *regs); +extern void do_fpother(struct pt_regs *regs); +extern void do_tof(struct pt_regs *regs); +extern void do_div0(struct pt_regs *regs); +extern void do_illegal_instruction(struct pt_regs *regs); +extern void mem_address_unaligned(struct pt_regs *regs, + unsigned long sfar, + unsigned long sfsr); +extern void sun4v_do_mna(struct pt_regs *regs, + unsigned long addr, + unsigned long type_ctx); +extern void do_privop(struct pt_regs *regs); +extern void do_privact(struct pt_regs *regs); +extern void do_cee(struct pt_regs *regs); +extern void do_cee_tl1(struct pt_regs *regs); +extern void do_dae_tl1(struct pt_regs *regs); +extern void do_iae_tl1(struct pt_regs *regs); +extern void do_div0_tl1(struct pt_regs *regs); +extern void do_fpdis_tl1(struct pt_regs *regs); +extern void do_fpieee_tl1(struct pt_regs *regs); +extern void do_fpother_tl1(struct pt_regs *regs); +extern void do_ill_tl1(struct pt_regs *regs); +extern void do_irq_tl1(struct pt_regs *regs); +extern void do_lddfmna_tl1(struct pt_regs *regs); +extern void do_stdfmna_tl1(struct pt_regs *regs); +extern void do_paw(struct pt_regs *regs); +extern void do_paw_tl1(struct pt_regs *regs); +extern void do_vaw(struct pt_regs *regs); +extern void do_vaw_tl1(struct pt_regs *regs); +extern void do_tof_tl1(struct pt_regs *regs); +extern void do_getpsr(struct pt_regs *regs); + +extern void spitfire_insn_access_exception(struct pt_regs *regs, + unsigned long sfsr, + unsigned long sfar); +extern void spitfire_insn_access_exception_tl1(struct pt_regs *regs, + unsigned long sfsr, + unsigned long sfar); +extern void spitfire_data_access_exception(struct pt_regs *regs, + unsigned long sfsr, + unsigned long sfar); +extern void spitfire_data_access_exception_tl1(struct pt_regs *regs, + unsigned long sfsr, + unsigned long sfar); +extern void spitfire_access_error(struct pt_regs *regs, + unsigned long status_encoded, + unsigned long afar); + +extern void cheetah_fecc_handler(struct pt_regs *regs, + unsigned long afsr, + unsigned long afar); +extern void cheetah_cee_handler(struct pt_regs *regs, + unsigned long afsr, + unsigned long afar); +extern void cheetah_deferred_handler(struct pt_regs *regs, + unsigned long afsr, + unsigned long afar); +extern void cheetah_plus_parity_error(int type, struct pt_regs *regs); + +extern void sun4v_insn_access_exception(struct pt_regs *regs, + unsigned long addr, + unsigned long type_ctx); +extern void sun4v_insn_access_exception_tl1(struct pt_regs *regs, + unsigned long addr, + unsigned long type_ctx); +extern void sun4v_data_access_exception(struct pt_regs *regs, + unsigned long addr, + unsigned long type_ctx); +extern void sun4v_data_access_exception_tl1(struct pt_regs *regs, + unsigned long addr, + unsigned long type_ctx); +extern void sun4v_resum_error(struct pt_regs *regs, + unsigned long offset); +extern void sun4v_resum_overflow(struct pt_regs *regs); +extern void sun4v_nonresum_error(struct pt_regs *regs, + unsigned long offset); +extern void sun4v_nonresum_overflow(struct pt_regs *regs); + +extern unsigned long sun4v_err_itlb_vaddr; +extern unsigned long sun4v_err_itlb_ctx; +extern unsigned long sun4v_err_itlb_pte; +extern unsigned long sun4v_err_itlb_error; + +extern void sun4v_itlb_error_report(struct pt_regs *regs, int tl); + +extern unsigned long sun4v_err_dtlb_vaddr; +extern unsigned long sun4v_err_dtlb_ctx; +extern unsigned long sun4v_err_dtlb_pte; +extern unsigned long sun4v_err_dtlb_error; + +extern void sun4v_dtlb_error_report(struct pt_regs *regs, int tl); +extern void hypervisor_tlbop_error(unsigned long err, + unsigned long op); +extern void hypervisor_tlbop_error_xcall(unsigned long err, + unsigned long op); + +/* WARNING: The error trap handlers in assembly know the precise + * layout of the following structure. + * + * C-level handlers in traps.c use this information to log the + * error and then determine how to recover (if possible). + */ +struct cheetah_err_info { +/*0x00*/u64 afsr; +/*0x08*/u64 afar; + + /* D-cache state */ +/*0x10*/u64 dcache_data[4]; /* The actual data */ +/*0x30*/u64 dcache_index; /* D-cache index */ +/*0x38*/u64 dcache_tag; /* D-cache tag/valid */ +/*0x40*/u64 dcache_utag; /* D-cache microtag */ +/*0x48*/u64 dcache_stag; /* D-cache snooptag */ + + /* I-cache state */ +/*0x50*/u64 icache_data[8]; /* The actual insns + predecode */ +/*0x90*/u64 icache_index; /* I-cache index */ +/*0x98*/u64 icache_tag; /* I-cache phys tag */ +/*0xa0*/u64 icache_utag; /* I-cache microtag */ +/*0xa8*/u64 icache_stag; /* I-cache snooptag */ +/*0xb0*/u64 icache_upper; /* I-cache upper-tag */ +/*0xb8*/u64 icache_lower; /* I-cache lower-tag */ + + /* E-cache state */ +/*0xc0*/u64 ecache_data[4]; /* 32 bytes from staging registers */ +/*0xe0*/u64 ecache_index; /* E-cache index */ +/*0xe8*/u64 ecache_tag; /* E-cache tag/state */ + +/*0xf0*/u64 __pad[32 - 30]; +}; +#define CHAFSR_INVALID ((u64)-1L) + +/* This is allocated at boot time based upon the largest hardware + * cpu ID in the system. We allocate two entries per cpu, one for + * TL==0 logging and one for TL >= 1 logging. + */ +extern struct cheetah_err_info *cheetah_error_log; + +/* UPA nodes send interrupt packet to UltraSparc with first data reg + * value low 5 (7 on Starfire) bits holding the IRQ identifier being + * delivered. We must translate this into a non-vector IRQ so we can + * set the softint on this cpu. + * + * To make processing these packets efficient and race free we use + * an array of irq buckets below. The interrupt vector handler in + * entry.S feeds incoming packets into per-cpu pil-indexed lists. + * + * If you make changes to ino_bucket, please update hand coded assembler + * of the vectored interrupt trap handler(s) in entry.S and sun4v_ivec.S + */ +struct ino_bucket { +/*0x00*/unsigned long __irq_chain_pa; + + /* Virtual interrupt number assigned to this INO. */ +/*0x08*/unsigned int __virt_irq; +/*0x0c*/unsigned int __pad; +}; + +extern struct ino_bucket *ivector_table; +extern unsigned long ivector_table_pa; + +extern void handler_irq(int irq, struct pt_regs *regs); +extern void init_irqwork_curcpu(void); +extern void __cpuinit sun4v_register_mondo_queues(int this_cpu); + +#endif /* _ENTRY_H */ diff --git a/arch/sparc/kernel/etrap_64.S b/arch/sparc/kernel/etrap_64.S new file mode 100644 index 000000000000..786b185e6e3f --- /dev/null +++ b/arch/sparc/kernel/etrap_64.S @@ -0,0 +1,236 @@ +/* + * etrap.S: Preparing for entry into the kernel on Sparc V9. + * + * Copyright (C) 1996, 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz) + */ + + +#include <asm/asi.h> +#include <asm/pstate.h> +#include <asm/ptrace.h> +#include <asm/page.h> +#include <asm/spitfire.h> +#include <asm/head.h> +#include <asm/processor.h> +#include <asm/mmu.h> + +#define TASK_REGOFF (THREAD_SIZE-TRACEREG_SZ-STACKFRAME_SZ) +#define ETRAP_PSTATE1 (PSTATE_TSO | PSTATE_PRIV) +#define ETRAP_PSTATE2 \ + (PSTATE_TSO | PSTATE_PEF | PSTATE_PRIV | PSTATE_IE) + +/* + * On entry, %g7 is return address - 0x4. + * %g4 and %g5 will be preserved %l4 and %l5 respectively. + */ + + .text + .align 64 + .globl etrap_syscall, etrap, etrap_irq, etraptl1 +etrap: rdpr %pil, %g2 +etrap_irq: clr %g3 +etrap_syscall: TRAP_LOAD_THREAD_REG(%g6, %g1) + rdpr %tstate, %g1 + or %g1, %g3, %g1 + sllx %g2, 20, %g3 + andcc %g1, TSTATE_PRIV, %g0 + or %g1, %g3, %g1 + bne,pn %xcc, 1f + sub %sp, STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS, %g2 + wrpr %g0, 7, %cleanwin + + sethi %hi(TASK_REGOFF), %g2 + sethi %hi(TSTATE_PEF), %g3 + or %g2, %lo(TASK_REGOFF), %g2 + and %g1, %g3, %g3 + brnz,pn %g3, 1f + add %g6, %g2, %g2 + wr %g0, 0, %fprs +1: rdpr %tpc, %g3 + + stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TSTATE] + rdpr %tnpc, %g1 + stx %g3, [%g2 + STACKFRAME_SZ + PT_V9_TPC] + rd %y, %g3 + stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC] + rdpr %tt, %g1 + st %g3, [%g2 + STACKFRAME_SZ + PT_V9_Y] + sethi %hi(PT_REGS_MAGIC), %g3 + or %g3, %g1, %g1 + st %g1, [%g2 + STACKFRAME_SZ + PT_V9_MAGIC] + + rdpr %cansave, %g1 + brnz,pt %g1, etrap_save + nop + + rdpr %cwp, %g1 + add %g1, 2, %g1 + wrpr %g1, %cwp + be,pt %xcc, etrap_user_spill + mov ASI_AIUP, %g3 + + rdpr %otherwin, %g3 + brz %g3, etrap_kernel_spill + mov ASI_AIUS, %g3 + +etrap_user_spill: + + wr %g3, 0x0, %asi + ldx [%g6 + TI_FLAGS], %g3 + and %g3, _TIF_32BIT, %g3 + brnz,pt %g3, etrap_user_spill_32bit + nop + ba,a,pt %xcc, etrap_user_spill_64bit + +etrap_save: save %g2, -STACK_BIAS, %sp + mov %g6, %l6 + + bne,pn %xcc, 3f + mov PRIMARY_CONTEXT, %l4 + rdpr %canrestore, %g3 + rdpr %wstate, %g2 + wrpr %g0, 0, %canrestore + sll %g2, 3, %g2 + mov 1, %l5 + stb %l5, [%l6 + TI_FPDEPTH] + + wrpr %g3, 0, %otherwin + wrpr %g2, 0, %wstate + sethi %hi(sparc64_kern_pri_context), %g2 + ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3 + +661: stxa %g3, [%l4] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g3, [%l4] ASI_MMU + .previous + + sethi %hi(KERNBASE), %l4 + flush %l4 + mov ASI_AIUS, %l7 +2: mov %g4, %l4 + mov %g5, %l5 + add %g7, 4, %l2 + + /* Go to trap time globals so we can save them. */ +661: wrpr %g0, ETRAP_PSTATE1, %pstate + .section .sun4v_1insn_patch, "ax" + .word 661b + SET_GL(0) + .previous + + stx %g1, [%sp + PTREGS_OFF + PT_V9_G1] + stx %g2, [%sp + PTREGS_OFF + PT_V9_G2] + sllx %l7, 24, %l7 + stx %g3, [%sp + PTREGS_OFF + PT_V9_G3] + rdpr %cwp, %l0 + stx %g4, [%sp + PTREGS_OFF + PT_V9_G4] + stx %g5, [%sp + PTREGS_OFF + PT_V9_G5] + stx %g6, [%sp + PTREGS_OFF + PT_V9_G6] + stx %g7, [%sp + PTREGS_OFF + PT_V9_G7] + or %l7, %l0, %l7 + sethi %hi(TSTATE_TSO | TSTATE_PEF), %l0 + or %l7, %l0, %l7 + wrpr %l2, %tnpc + wrpr %l7, (TSTATE_PRIV | TSTATE_IE), %tstate + stx %i0, [%sp + PTREGS_OFF + PT_V9_I0] + stx %i1, [%sp + PTREGS_OFF + PT_V9_I1] + stx %i2, [%sp + PTREGS_OFF + PT_V9_I2] + stx %i3, [%sp + PTREGS_OFF + PT_V9_I3] + stx %i4, [%sp + PTREGS_OFF + PT_V9_I4] + stx %i5, [%sp + PTREGS_OFF + PT_V9_I5] + stx %i6, [%sp + PTREGS_OFF + PT_V9_I6] + mov %l6, %g6 + stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] + LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %l1) + ldx [%g6 + TI_TASK], %g4 + done + +3: mov ASI_P, %l7 + ldub [%l6 + TI_FPDEPTH], %l5 + add %l6, TI_FPSAVED + 1, %l4 + srl %l5, 1, %l3 + add %l5, 2, %l5 + stb %l5, [%l6 + TI_FPDEPTH] + ba,pt %xcc, 2b + stb %g0, [%l4 + %l3] + nop + +etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. + * We place this right after pt_regs on the trap stack. + * The layout is: + * 0x00 TL1's TSTATE + * 0x08 TL1's TPC + * 0x10 TL1's TNPC + * 0x18 TL1's TT + * ... + * 0x58 TL4's TT + * 0x60 TL + */ + TRAP_LOAD_THREAD_REG(%g6, %g1) + sub %sp, ((4 * 8) * 4) + 8, %g2 + rdpr %tl, %g1 + + wrpr %g0, 1, %tl + rdpr %tstate, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x00] + rdpr %tpc, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x08] + rdpr %tnpc, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x10] + rdpr %tt, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x18] + + wrpr %g0, 2, %tl + rdpr %tstate, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x20] + rdpr %tpc, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x28] + rdpr %tnpc, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x30] + rdpr %tt, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x38] + + sethi %hi(is_sun4v), %g3 + lduw [%g3 + %lo(is_sun4v)], %g3 + brnz,pn %g3, finish_tl1_capture + nop + + wrpr %g0, 3, %tl + rdpr %tstate, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x40] + rdpr %tpc, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x48] + rdpr %tnpc, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x50] + rdpr %tt, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x58] + + wrpr %g0, 4, %tl + rdpr %tstate, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x60] + rdpr %tpc, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x68] + rdpr %tnpc, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x70] + rdpr %tt, %g3 + stx %g3, [%g2 + STACK_BIAS + 0x78] + + stx %g1, [%g2 + STACK_BIAS + 0x80] + +finish_tl1_capture: + wrpr %g0, 1, %tl +661: nop + .section .sun4v_1insn_patch, "ax" + .word 661b + SET_GL(1) + .previous + + rdpr %tstate, %g1 + sub %g2, STACKFRAME_SZ + TRACEREG_SZ - STACK_BIAS, %g2 + ba,pt %xcc, 1b + andcc %g1, TSTATE_PRIV, %g0 + +#undef TASK_REGOFF +#undef ETRAP_PSTATE1 diff --git a/arch/sparc/kernel/fpu_traps.S b/arch/sparc/kernel/fpu_traps.S new file mode 100644 index 000000000000..a6864826a4bd --- /dev/null +++ b/arch/sparc/kernel/fpu_traps.S @@ -0,0 +1,384 @@ + /* This is trivial with the new code... */ + .globl do_fpdis + .type do_fpdis,#function +do_fpdis: + sethi %hi(TSTATE_PEF), %g4 + rdpr %tstate, %g5 + andcc %g5, %g4, %g0 + be,pt %xcc, 1f + nop + rd %fprs, %g5 + andcc %g5, FPRS_FEF, %g0 + be,pt %xcc, 1f + nop + + /* Legal state when DCR_IFPOE is set in Cheetah %dcr. */ + sethi %hi(109f), %g7 + ba,pt %xcc, etrap +109: or %g7, %lo(109b), %g7 + add %g0, %g0, %g0 + ba,a,pt %xcc, rtrap + +1: TRAP_LOAD_THREAD_REG(%g6, %g1) + ldub [%g6 + TI_FPSAVED], %g5 + wr %g0, FPRS_FEF, %fprs + andcc %g5, FPRS_FEF, %g0 + be,a,pt %icc, 1f + clr %g7 + ldx [%g6 + TI_GSR], %g7 +1: andcc %g5, FPRS_DL, %g0 + bne,pn %icc, 2f + fzero %f0 + andcc %g5, FPRS_DU, %g0 + bne,pn %icc, 1f + fzero %f2 + faddd %f0, %f2, %f4 + fmuld %f0, %f2, %f6 + faddd %f0, %f2, %f8 + fmuld %f0, %f2, %f10 + faddd %f0, %f2, %f12 + fmuld %f0, %f2, %f14 + faddd %f0, %f2, %f16 + fmuld %f0, %f2, %f18 + faddd %f0, %f2, %f20 + fmuld %f0, %f2, %f22 + faddd %f0, %f2, %f24 + fmuld %f0, %f2, %f26 + faddd %f0, %f2, %f28 + fmuld %f0, %f2, %f30 + faddd %f0, %f2, %f32 + fmuld %f0, %f2, %f34 + faddd %f0, %f2, %f36 + fmuld %f0, %f2, %f38 + faddd %f0, %f2, %f40 + fmuld %f0, %f2, %f42 + faddd %f0, %f2, %f44 + fmuld %f0, %f2, %f46 + faddd %f0, %f2, %f48 + fmuld %f0, %f2, %f50 + faddd %f0, %f2, %f52 + fmuld %f0, %f2, %f54 + faddd %f0, %f2, %f56 + fmuld %f0, %f2, %f58 + b,pt %xcc, fpdis_exit2 + faddd %f0, %f2, %f60 +1: mov SECONDARY_CONTEXT, %g3 + add %g6, TI_FPREGS + 0x80, %g1 + faddd %f0, %f2, %f4 + fmuld %f0, %f2, %f6 + +661: ldxa [%g3] ASI_DMMU, %g5 + .section .sun4v_1insn_patch, "ax" + .word 661b + ldxa [%g3] ASI_MMU, %g5 + .previous + + sethi %hi(sparc64_kern_sec_context), %g2 + ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2 + +661: stxa %g2, [%g3] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g3] ASI_MMU + .previous + + membar #Sync + add %g6, TI_FPREGS + 0xc0, %g2 + faddd %f0, %f2, %f8 + fmuld %f0, %f2, %f10 + membar #Sync + ldda [%g1] ASI_BLK_S, %f32 + ldda [%g2] ASI_BLK_S, %f48 + membar #Sync + faddd %f0, %f2, %f12 + fmuld %f0, %f2, %f14 + faddd %f0, %f2, %f16 + fmuld %f0, %f2, %f18 + faddd %f0, %f2, %f20 + fmuld %f0, %f2, %f22 + faddd %f0, %f2, %f24 + fmuld %f0, %f2, %f26 + faddd %f0, %f2, %f28 + fmuld %f0, %f2, %f30 + b,pt %xcc, fpdis_exit + nop +2: andcc %g5, FPRS_DU, %g0 + bne,pt %icc, 3f + fzero %f32 + mov SECONDARY_CONTEXT, %g3 + fzero %f34 + +661: ldxa [%g3] ASI_DMMU, %g5 + .section .sun4v_1insn_patch, "ax" + .word 661b + ldxa [%g3] ASI_MMU, %g5 + .previous + + add %g6, TI_FPREGS, %g1 + sethi %hi(sparc64_kern_sec_context), %g2 + ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2 + +661: stxa %g2, [%g3] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g3] ASI_MMU + .previous + + membar #Sync + add %g6, TI_FPREGS + 0x40, %g2 + faddd %f32, %f34, %f36 + fmuld %f32, %f34, %f38 + membar #Sync + ldda [%g1] ASI_BLK_S, %f0 + ldda [%g2] ASI_BLK_S, %f16 + membar #Sync + faddd %f32, %f34, %f40 + fmuld %f32, %f34, %f42 + faddd %f32, %f34, %f44 + fmuld %f32, %f34, %f46 + faddd %f32, %f34, %f48 + fmuld %f32, %f34, %f50 + faddd %f32, %f34, %f52 + fmuld %f32, %f34, %f54 + faddd %f32, %f34, %f56 + fmuld %f32, %f34, %f58 + faddd %f32, %f34, %f60 + fmuld %f32, %f34, %f62 + ba,pt %xcc, fpdis_exit + nop +3: mov SECONDARY_CONTEXT, %g3 + add %g6, TI_FPREGS, %g1 + +661: ldxa [%g3] ASI_DMMU, %g5 + .section .sun4v_1insn_patch, "ax" + .word 661b + ldxa [%g3] ASI_MMU, %g5 + .previous + + sethi %hi(sparc64_kern_sec_context), %g2 + ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2 + +661: stxa %g2, [%g3] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g3] ASI_MMU + .previous + + membar #Sync + mov 0x40, %g2 + membar #Sync + ldda [%g1] ASI_BLK_S, %f0 + ldda [%g1 + %g2] ASI_BLK_S, %f16 + add %g1, 0x80, %g1 + ldda [%g1] ASI_BLK_S, %f32 + ldda [%g1 + %g2] ASI_BLK_S, %f48 + membar #Sync +fpdis_exit: + +661: stxa %g5, [%g3] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g5, [%g3] ASI_MMU + .previous + + membar #Sync +fpdis_exit2: + wr %g7, 0, %gsr + ldx [%g6 + TI_XFSR], %fsr + rdpr %tstate, %g3 + or %g3, %g4, %g3 ! anal... + wrpr %g3, %tstate + wr %g0, FPRS_FEF, %fprs ! clean DU/DL bits + retry + .size do_fpdis,.-do_fpdis + + .align 32 + .type fp_other_bounce,#function +fp_other_bounce: + call do_fpother + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + .size fp_other_bounce,.-fp_other_bounce + + .align 32 + .globl do_fpother_check_fitos + .type do_fpother_check_fitos,#function +do_fpother_check_fitos: + TRAP_LOAD_THREAD_REG(%g6, %g1) + sethi %hi(fp_other_bounce - 4), %g7 + or %g7, %lo(fp_other_bounce - 4), %g7 + + /* NOTE: Need to preserve %g7 until we fully commit + * to the fitos fixup. + */ + stx %fsr, [%g6 + TI_XFSR] + rdpr %tstate, %g3 + andcc %g3, TSTATE_PRIV, %g0 + bne,pn %xcc, do_fptrap_after_fsr + nop + ldx [%g6 + TI_XFSR], %g3 + srlx %g3, 14, %g1 + and %g1, 7, %g1 + cmp %g1, 2 ! Unfinished FP-OP + bne,pn %xcc, do_fptrap_after_fsr + sethi %hi(1 << 23), %g1 ! Inexact + andcc %g3, %g1, %g0 + bne,pn %xcc, do_fptrap_after_fsr + rdpr %tpc, %g1 + lduwa [%g1] ASI_AIUP, %g3 ! This cannot ever fail +#define FITOS_MASK 0xc1f83fe0 +#define FITOS_COMPARE 0x81a01880 + sethi %hi(FITOS_MASK), %g1 + or %g1, %lo(FITOS_MASK), %g1 + and %g3, %g1, %g1 + sethi %hi(FITOS_COMPARE), %g2 + or %g2, %lo(FITOS_COMPARE), %g2 + cmp %g1, %g2 + bne,pn %xcc, do_fptrap_after_fsr + nop + std %f62, [%g6 + TI_FPREGS + (62 * 4)] + sethi %hi(fitos_table_1), %g1 + and %g3, 0x1f, %g2 + or %g1, %lo(fitos_table_1), %g1 + sllx %g2, 2, %g2 + jmpl %g1 + %g2, %g0 + ba,pt %xcc, fitos_emul_continue + +fitos_table_1: + fitod %f0, %f62 + fitod %f1, %f62 + fitod %f2, %f62 + fitod %f3, %f62 + fitod %f4, %f62 + fitod %f5, %f62 + fitod %f6, %f62 + fitod %f7, %f62 + fitod %f8, %f62 + fitod %f9, %f62 + fitod %f10, %f62 + fitod %f11, %f62 + fitod %f12, %f62 + fitod %f13, %f62 + fitod %f14, %f62 + fitod %f15, %f62 + fitod %f16, %f62 + fitod %f17, %f62 + fitod %f18, %f62 + fitod %f19, %f62 + fitod %f20, %f62 + fitod %f21, %f62 + fitod %f22, %f62 + fitod %f23, %f62 + fitod %f24, %f62 + fitod %f25, %f62 + fitod %f26, %f62 + fitod %f27, %f62 + fitod %f28, %f62 + fitod %f29, %f62 + fitod %f30, %f62 + fitod %f31, %f62 + +fitos_emul_continue: + sethi %hi(fitos_table_2), %g1 + srl %g3, 25, %g2 + or %g1, %lo(fitos_table_2), %g1 + and %g2, 0x1f, %g2 + sllx %g2, 2, %g2 + jmpl %g1 + %g2, %g0 + ba,pt %xcc, fitos_emul_fini + +fitos_table_2: + fdtos %f62, %f0 + fdtos %f62, %f1 + fdtos %f62, %f2 + fdtos %f62, %f3 + fdtos %f62, %f4 + fdtos %f62, %f5 + fdtos %f62, %f6 + fdtos %f62, %f7 + fdtos %f62, %f8 + fdtos %f62, %f9 + fdtos %f62, %f10 + fdtos %f62, %f11 + fdtos %f62, %f12 + fdtos %f62, %f13 + fdtos %f62, %f14 + fdtos %f62, %f15 + fdtos %f62, %f16 + fdtos %f62, %f17 + fdtos %f62, %f18 + fdtos %f62, %f19 + fdtos %f62, %f20 + fdtos %f62, %f21 + fdtos %f62, %f22 + fdtos %f62, %f23 + fdtos %f62, %f24 + fdtos %f62, %f25 + fdtos %f62, %f26 + fdtos %f62, %f27 + fdtos %f62, %f28 + fdtos %f62, %f29 + fdtos %f62, %f30 + fdtos %f62, %f31 + +fitos_emul_fini: + ldd [%g6 + TI_FPREGS + (62 * 4)], %f62 + done + .size do_fpother_check_fitos,.-do_fpother_check_fitos + + .align 32 + .globl do_fptrap + .type do_fptrap,#function +do_fptrap: + TRAP_LOAD_THREAD_REG(%g6, %g1) + stx %fsr, [%g6 + TI_XFSR] +do_fptrap_after_fsr: + ldub [%g6 + TI_FPSAVED], %g3 + rd %fprs, %g1 + or %g3, %g1, %g3 + stb %g3, [%g6 + TI_FPSAVED] + rd %gsr, %g3 + stx %g3, [%g6 + TI_GSR] + mov SECONDARY_CONTEXT, %g3 + +661: ldxa [%g3] ASI_DMMU, %g5 + .section .sun4v_1insn_patch, "ax" + .word 661b + ldxa [%g3] ASI_MMU, %g5 + .previous + + sethi %hi(sparc64_kern_sec_context), %g2 + ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2 + +661: stxa %g2, [%g3] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g3] ASI_MMU + .previous + + membar #Sync + add %g6, TI_FPREGS, %g2 + andcc %g1, FPRS_DL, %g0 + be,pn %icc, 4f + mov 0x40, %g3 + stda %f0, [%g2] ASI_BLK_S + stda %f16, [%g2 + %g3] ASI_BLK_S + andcc %g1, FPRS_DU, %g0 + be,pn %icc, 5f +4: add %g2, 128, %g2 + stda %f32, [%g2] ASI_BLK_S + stda %f48, [%g2 + %g3] ASI_BLK_S +5: mov SECONDARY_CONTEXT, %g1 + membar #Sync + +661: stxa %g5, [%g1] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g5, [%g1] ASI_MMU + .previous + + membar #Sync + ba,pt %xcc, etrap + wr %g0, 0, %fprs + .size do_fptrap,.-do_fptrap diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c new file mode 100644 index 000000000000..d0218e73f982 --- /dev/null +++ b/arch/sparc/kernel/ftrace.c @@ -0,0 +1,76 @@ +#include <linux/spinlock.h> +#include <linux/hardirq.h> +#include <linux/ftrace.h> +#include <linux/percpu.h> +#include <linux/init.h> +#include <linux/list.h> + +#include <asm/ftrace.h> + +static const u32 ftrace_nop = 0x01000000; + +unsigned char *ftrace_nop_replace(void) +{ + return (char *)&ftrace_nop; +} + +unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +{ + static u32 call; + s32 off; + + off = ((s32)addr - (s32)ip); + call = 0x40000000 | ((u32)off >> 2); + + return (unsigned char *) &call; +} + +int +ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code) +{ + u32 old = *(u32 *)old_code; + u32 new = *(u32 *)new_code; + u32 replaced; + int faulted; + + __asm__ __volatile__( + "1: cas [%[ip]], %[old], %[new]\n" + " flush %[ip]\n" + " mov 0, %[faulted]\n" + "2:\n" + " .section .fixup,#alloc,#execinstr\n" + " .align 4\n" + "3: sethi %%hi(2b), %[faulted]\n" + " jmpl %[faulted] + %%lo(2b), %%g0\n" + " mov 1, %[faulted]\n" + " .previous\n" + " .section __ex_table,\"a\"\n" + " .align 4\n" + " .word 1b, 3b\n" + " .previous\n" + : "=r" (replaced), [faulted] "=r" (faulted) + : [new] "0" (new), [old] "r" (old), [ip] "r" (ip) + : "memory"); + + if (replaced != old && replaced != new) + faulted = 2; + + return faulted; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip = (unsigned long)(&ftrace_call); + unsigned char old[MCOUNT_INSN_SIZE], *new; + + memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); + new = ftrace_call_replace(ip, (unsigned long)func); + return ftrace_modify_code(ip, old, new); +} + +int __init ftrace_dyn_arch_init(void *data) +{ + ftrace_mcount_set(data); + return 0; +} diff --git a/arch/sparc/kernel/getsetcc.S b/arch/sparc/kernel/getsetcc.S new file mode 100644 index 000000000000..a14d272d2061 --- /dev/null +++ b/arch/sparc/kernel/getsetcc.S @@ -0,0 +1,24 @@ + .globl getcc + .type getcc,#function +getcc: + ldx [%o0 + PT_V9_TSTATE], %o1 + srlx %o1, 32, %o1 + and %o1, 0xf, %o1 + retl + stx %o1, [%o0 + PT_V9_G1] + .size getcc,.-getcc + + .globl setcc + .type setcc,#function +setcc: + ldx [%o0 + PT_V9_TSTATE], %o1 + ldx [%o0 + PT_V9_G1], %o2 + or %g0, %ulo(TSTATE_ICC), %o3 + sllx %o3, 32, %o3 + andn %o1, %o3, %o1 + sllx %o2, 32, %o2 + and %o2, %o3, %o2 + or %o1, %o2, %o1 + retl + stx %o1, [%o0 + PT_V9_TSTATE] + .size setcc,.-setcc diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S new file mode 100644 index 000000000000..8ffee714f932 --- /dev/null +++ b/arch/sparc/kernel/head_64.S @@ -0,0 +1,900 @@ +/* head.S: Initial boot code for the Sparc64 port of Linux. + * + * Copyright (C) 1996, 1997, 2007 David S. Miller (davem@davemloft.net) + * Copyright (C) 1996 David Sitsky (David.Sitsky@anu.edu.au) + * Copyright (C) 1997, 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 Miguel de Icaza (miguel@nuclecu.unam.mx) + */ + +#include <linux/version.h> +#include <linux/errno.h> +#include <linux/threads.h> +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/thread_info.h> +#include <asm/asi.h> +#include <asm/pstate.h> +#include <asm/ptrace.h> +#include <asm/spitfire.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/errno.h> +#include <asm/signal.h> +#include <asm/processor.h> +#include <asm/lsu.h> +#include <asm/dcr.h> +#include <asm/dcu.h> +#include <asm/head.h> +#include <asm/ttable.h> +#include <asm/mmu.h> +#include <asm/cpudata.h> +#include <asm/pil.h> +#include <asm/estate.h> +#include <asm/sfafsr.h> +#include <asm/unistd.h> + +/* This section from from _start to sparc64_boot_end should fit into + * 0x0000000000404000 to 0x0000000000408000. + */ + .text + .globl start, _start, stext, _stext +_start: +start: +_stext: +stext: +! 0x0000000000404000 + b sparc64_boot + flushw /* Flush register file. */ + +/* This stuff has to be in sync with SILO and other potential boot loaders + * Fields should be kept upward compatible and whenever any change is made, + * HdrS version should be incremented. + */ + .global root_flags, ram_flags, root_dev + .global sparc_ramdisk_image, sparc_ramdisk_size + .global sparc_ramdisk_image64 + + .ascii "HdrS" + .word LINUX_VERSION_CODE + + /* History: + * + * 0x0300 : Supports being located at other than 0x4000 + * 0x0202 : Supports kernel params string + * 0x0201 : Supports reboot_command + */ + .half 0x0301 /* HdrS version */ + +root_flags: + .half 1 +root_dev: + .half 0 +ram_flags: + .half 0 +sparc_ramdisk_image: + .word 0 +sparc_ramdisk_size: + .word 0 + .xword reboot_command + .xword bootstr_info +sparc_ramdisk_image64: + .xword 0 + .word _end + + /* PROM cif handler code address is in %o4. */ +sparc64_boot: + mov %o4, %l7 + + /* We need to remap the kernel. Use position independant + * code to remap us to KERNBASE. + * + * SILO can invoke us with 32-bit address masking enabled, + * so make sure that's clear. + */ + rdpr %pstate, %g1 + andn %g1, PSTATE_AM, %g1 + wrpr %g1, 0x0, %pstate + ba,a,pt %xcc, 1f + + .globl prom_finddev_name, prom_chosen_path, prom_root_node + .globl prom_getprop_name, prom_mmu_name, prom_peer_name + .globl prom_callmethod_name, prom_translate_name, prom_root_compatible + .globl prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache + .globl prom_boot_mapped_pc, prom_boot_mapping_mode + .globl prom_boot_mapping_phys_high, prom_boot_mapping_phys_low + .globl prom_compatible_name, prom_cpu_path, prom_cpu_compatible + .globl is_sun4v, sun4v_chip_type, prom_set_trap_table_name +prom_peer_name: + .asciz "peer" +prom_compatible_name: + .asciz "compatible" +prom_finddev_name: + .asciz "finddevice" +prom_chosen_path: + .asciz "/chosen" +prom_cpu_path: + .asciz "/cpu" +prom_getprop_name: + .asciz "getprop" +prom_mmu_name: + .asciz "mmu" +prom_callmethod_name: + .asciz "call-method" +prom_translate_name: + .asciz "translate" +prom_map_name: + .asciz "map" +prom_unmap_name: + .asciz "unmap" +prom_set_trap_table_name: + .asciz "SUNW,set-trap-table" +prom_sun4v_name: + .asciz "sun4v" +prom_niagara_prefix: + .asciz "SUNW,UltraSPARC-T" + .align 4 +prom_root_compatible: + .skip 64 +prom_cpu_compatible: + .skip 64 +prom_root_node: + .word 0 +prom_mmu_ihandle_cache: + .word 0 +prom_boot_mapped_pc: + .word 0 +prom_boot_mapping_mode: + .word 0 + .align 8 +prom_boot_mapping_phys_high: + .xword 0 +prom_boot_mapping_phys_low: + .xword 0 +is_sun4v: + .word 0 +sun4v_chip_type: + .word SUN4V_CHIP_INVALID +1: + rd %pc, %l0 + + mov (1b - prom_peer_name), %l1 + sub %l0, %l1, %l1 + mov 0, %l2 + + /* prom_root_node = prom_peer(0) */ + stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "peer" + mov 1, %l3 + stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1 + stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1 + stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, 0 + stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1 + call %l7 + add %sp, (2047 + 128), %o0 ! argument array + + ldx [%sp + 2047 + 128 + 0x20], %l4 ! prom root node + mov (1b - prom_root_node), %l1 + sub %l0, %l1, %l1 + stw %l4, [%l1] + + mov (1b - prom_getprop_name), %l1 + mov (1b - prom_compatible_name), %l2 + mov (1b - prom_root_compatible), %l5 + sub %l0, %l1, %l1 + sub %l0, %l2, %l2 + sub %l0, %l5, %l5 + + /* prom_getproperty(prom_root_node, "compatible", + * &prom_root_compatible, 64) + */ + stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop" + mov 4, %l3 + stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4 + mov 1, %l3 + stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1 + stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, prom_root_node + stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "compatible" + stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_root_compatible + mov 64, %l3 + stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, size + stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1 + call %l7 + add %sp, (2047 + 128), %o0 ! argument array + + mov (1b - prom_finddev_name), %l1 + mov (1b - prom_chosen_path), %l2 + mov (1b - prom_boot_mapped_pc), %l3 + sub %l0, %l1, %l1 + sub %l0, %l2, %l2 + sub %l0, %l3, %l3 + stw %l0, [%l3] + sub %sp, (192 + 128), %sp + + /* chosen_node = prom_finddevice("/chosen") */ + stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "finddevice" + mov 1, %l3 + stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1 + stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1 + stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, "/chosen" + stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1 + call %l7 + add %sp, (2047 + 128), %o0 ! argument array + + ldx [%sp + 2047 + 128 + 0x20], %l4 ! chosen device node + + mov (1b - prom_getprop_name), %l1 + mov (1b - prom_mmu_name), %l2 + mov (1b - prom_mmu_ihandle_cache), %l5 + sub %l0, %l1, %l1 + sub %l0, %l2, %l2 + sub %l0, %l5, %l5 + + /* prom_mmu_ihandle_cache = prom_getint(chosen_node, "mmu") */ + stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop" + mov 4, %l3 + stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4 + mov 1, %l3 + stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1 + stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, chosen_node + stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "mmu" + stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_mmu_ihandle_cache + mov 4, %l3 + stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, sizeof(arg3) + stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1 + call %l7 + add %sp, (2047 + 128), %o0 ! argument array + + mov (1b - prom_callmethod_name), %l1 + mov (1b - prom_translate_name), %l2 + sub %l0, %l1, %l1 + sub %l0, %l2, %l2 + lduw [%l5], %l5 ! prom_mmu_ihandle_cache + + stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "call-method" + mov 3, %l3 + stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 3 + mov 5, %l3 + stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 5 + stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1: "translate" + stx %l5, [%sp + 2047 + 128 + 0x20] ! arg2: prom_mmu_ihandle_cache + /* PAGE align */ + srlx %l0, 13, %l3 + sllx %l3, 13, %l3 + stx %l3, [%sp + 2047 + 128 + 0x28] ! arg3: vaddr, our PC + stx %g0, [%sp + 2047 + 128 + 0x30] ! res1 + stx %g0, [%sp + 2047 + 128 + 0x38] ! res2 + stx %g0, [%sp + 2047 + 128 + 0x40] ! res3 + stx %g0, [%sp + 2047 + 128 + 0x48] ! res4 + stx %g0, [%sp + 2047 + 128 + 0x50] ! res5 + call %l7 + add %sp, (2047 + 128), %o0 ! argument array + + ldx [%sp + 2047 + 128 + 0x40], %l1 ! translation mode + mov (1b - prom_boot_mapping_mode), %l4 + sub %l0, %l4, %l4 + stw %l1, [%l4] + mov (1b - prom_boot_mapping_phys_high), %l4 + sub %l0, %l4, %l4 + ldx [%sp + 2047 + 128 + 0x48], %l2 ! physaddr high + stx %l2, [%l4 + 0x0] + ldx [%sp + 2047 + 128 + 0x50], %l3 ! physaddr low + /* 4MB align */ + srlx %l3, 22, %l3 + sllx %l3, 22, %l3 + stx %l3, [%l4 + 0x8] + + /* Leave service as-is, "call-method" */ + mov 7, %l3 + stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 7 + mov 1, %l3 + stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1 + mov (1b - prom_map_name), %l3 + sub %l0, %l3, %l3 + stx %l3, [%sp + 2047 + 128 + 0x18] ! arg1: "map" + /* Leave arg2 as-is, prom_mmu_ihandle_cache */ + mov -1, %l3 + stx %l3, [%sp + 2047 + 128 + 0x28] ! arg3: mode (-1 default) + /* 4MB align the kernel image size. */ + set (_end - KERNBASE), %l3 + set ((4 * 1024 * 1024) - 1), %l4 + add %l3, %l4, %l3 + andn %l3, %l4, %l3 + stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4: roundup(ksize, 4MB) + sethi %hi(KERNBASE), %l3 + stx %l3, [%sp + 2047 + 128 + 0x38] ! arg5: vaddr (KERNBASE) + stx %g0, [%sp + 2047 + 128 + 0x40] ! arg6: empty + mov (1b - prom_boot_mapping_phys_low), %l3 + sub %l0, %l3, %l3 + ldx [%l3], %l3 + stx %l3, [%sp + 2047 + 128 + 0x48] ! arg7: phys addr + call %l7 + add %sp, (2047 + 128), %o0 ! argument array + + add %sp, (192 + 128), %sp + + sethi %hi(prom_root_compatible), %g1 + or %g1, %lo(prom_root_compatible), %g1 + sethi %hi(prom_sun4v_name), %g7 + or %g7, %lo(prom_sun4v_name), %g7 + mov 5, %g3 +90: ldub [%g7], %g2 + ldub [%g1], %g4 + cmp %g2, %g4 + bne,pn %icc, 80f + add %g7, 1, %g7 + subcc %g3, 1, %g3 + bne,pt %xcc, 90b + add %g1, 1, %g1 + + sethi %hi(is_sun4v), %g1 + or %g1, %lo(is_sun4v), %g1 + mov 1, %g7 + stw %g7, [%g1] + + /* cpu_node = prom_finddevice("/cpu") */ + mov (1b - prom_finddev_name), %l1 + mov (1b - prom_cpu_path), %l2 + sub %l0, %l1, %l1 + sub %l0, %l2, %l2 + sub %sp, (192 + 128), %sp + + stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "finddevice" + mov 1, %l3 + stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1 + stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1 + stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, "/cpu" + stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1 + call %l7 + add %sp, (2047 + 128), %o0 ! argument array + + ldx [%sp + 2047 + 128 + 0x20], %l4 ! cpu device node + + mov (1b - prom_getprop_name), %l1 + mov (1b - prom_compatible_name), %l2 + mov (1b - prom_cpu_compatible), %l5 + sub %l0, %l1, %l1 + sub %l0, %l2, %l2 + sub %l0, %l5, %l5 + + /* prom_getproperty(cpu_node, "compatible", + * &prom_cpu_compatible, 64) + */ + stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop" + mov 4, %l3 + stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4 + mov 1, %l3 + stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1 + stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, cpu_node + stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "compatible" + stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_cpu_compatible + mov 64, %l3 + stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, size + stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1 + call %l7 + add %sp, (2047 + 128), %o0 ! argument array + + add %sp, (192 + 128), %sp + + sethi %hi(prom_cpu_compatible), %g1 + or %g1, %lo(prom_cpu_compatible), %g1 + sethi %hi(prom_niagara_prefix), %g7 + or %g7, %lo(prom_niagara_prefix), %g7 + mov 17, %g3 +90: ldub [%g7], %g2 + ldub [%g1], %g4 + cmp %g2, %g4 + bne,pn %icc, 4f + add %g7, 1, %g7 + subcc %g3, 1, %g3 + bne,pt %xcc, 90b + add %g1, 1, %g1 + + sethi %hi(prom_cpu_compatible), %g1 + or %g1, %lo(prom_cpu_compatible), %g1 + ldub [%g1 + 17], %g2 + cmp %g2, '1' + be,pt %xcc, 5f + mov SUN4V_CHIP_NIAGARA1, %g4 + cmp %g2, '2' + be,pt %xcc, 5f + mov SUN4V_CHIP_NIAGARA2, %g4 +4: + mov SUN4V_CHIP_UNKNOWN, %g4 +5: sethi %hi(sun4v_chip_type), %g2 + or %g2, %lo(sun4v_chip_type), %g2 + stw %g4, [%g2] + +80: + BRANCH_IF_SUN4V(g1, jump_to_sun4u_init) + BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot) + BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot) + ba,pt %xcc, spitfire_boot + nop + +cheetah_plus_boot: + /* Preserve OBP chosen DCU and DCR register settings. */ + ba,pt %xcc, cheetah_generic_boot + nop + +cheetah_boot: + mov DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1 + wr %g1, %asr18 + + sethi %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7 + or %g7, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7 + sllx %g7, 32, %g7 + or %g7, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g7 + stxa %g7, [%g0] ASI_DCU_CONTROL_REG + membar #Sync + +cheetah_generic_boot: + mov TSB_EXTENSION_P, %g3 + stxa %g0, [%g3] ASI_DMMU + stxa %g0, [%g3] ASI_IMMU + membar #Sync + + mov TSB_EXTENSION_S, %g3 + stxa %g0, [%g3] ASI_DMMU + membar #Sync + + mov TSB_EXTENSION_N, %g3 + stxa %g0, [%g3] ASI_DMMU + stxa %g0, [%g3] ASI_IMMU + membar #Sync + + ba,a,pt %xcc, jump_to_sun4u_init + +spitfire_boot: + /* Typically PROM has already enabled both MMU's and both on-chip + * caches, but we do it here anyway just to be paranoid. + */ + mov (LSU_CONTROL_IC|LSU_CONTROL_DC|LSU_CONTROL_IM|LSU_CONTROL_DM), %g1 + stxa %g1, [%g0] ASI_LSU_CONTROL + membar #Sync + +jump_to_sun4u_init: + /* + * Make sure we are in privileged mode, have address masking, + * using the ordinary globals and have enabled floating + * point. + * + * Again, typically PROM has left %pil at 13 or similar, and + * (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE) in %pstate. + */ + wrpr %g0, (PSTATE_PRIV|PSTATE_PEF|PSTATE_IE), %pstate + wr %g0, 0, %fprs + + set sun4u_init, %g2 + jmpl %g2 + %g0, %g0 + nop + + .section .text.init.refok +sun4u_init: + BRANCH_IF_SUN4V(g1, sun4v_init) + + /* Set ctx 0 */ + mov PRIMARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_DMMU + membar #Sync + + mov SECONDARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_DMMU + membar #Sync + + ba,pt %xcc, sun4u_continue + nop + +sun4v_init: + /* Set ctx 0 */ + mov PRIMARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_MMU + membar #Sync + + mov SECONDARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_MMU + membar #Sync + ba,pt %xcc, niagara_tlb_fixup + nop + +sun4u_continue: + BRANCH_IF_ANY_CHEETAH(g1, g7, cheetah_tlb_fixup) + + ba,pt %xcc, spitfire_tlb_fixup + nop + +niagara_tlb_fixup: + mov 3, %g2 /* Set TLB type to hypervisor. */ + sethi %hi(tlb_type), %g1 + stw %g2, [%g1 + %lo(tlb_type)] + + /* Patch copy/clear ops. */ + sethi %hi(sun4v_chip_type), %g1 + lduw [%g1 + %lo(sun4v_chip_type)], %g1 + cmp %g1, SUN4V_CHIP_NIAGARA1 + be,pt %xcc, niagara_patch + cmp %g1, SUN4V_CHIP_NIAGARA2 + be,pt %xcc, niagara2_patch + nop + + call generic_patch_copyops + nop + call generic_patch_bzero + nop + call generic_patch_pageops + nop + + ba,a,pt %xcc, 80f +niagara2_patch: + call niagara2_patch_copyops + nop + call niagara_patch_bzero + nop + call niagara2_patch_pageops + nop + + ba,a,pt %xcc, 80f + +niagara_patch: + call niagara_patch_copyops + nop + call niagara_patch_bzero + nop + call niagara_patch_pageops + nop + +80: + /* Patch TLB/cache ops. */ + call hypervisor_patch_cachetlbops + nop + + ba,pt %xcc, tlb_fixup_done + nop + +cheetah_tlb_fixup: + mov 2, %g2 /* Set TLB type to cheetah+. */ + BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f) + + mov 1, %g2 /* Set TLB type to cheetah. */ + +1: sethi %hi(tlb_type), %g1 + stw %g2, [%g1 + %lo(tlb_type)] + + /* Patch copy/page operations to cheetah optimized versions. */ + call cheetah_patch_copyops + nop + call cheetah_patch_copy_page + nop + call cheetah_patch_cachetlbops + nop + + ba,pt %xcc, tlb_fixup_done + nop + +spitfire_tlb_fixup: + /* Set TLB type to spitfire. */ + mov 0, %g2 + sethi %hi(tlb_type), %g1 + stw %g2, [%g1 + %lo(tlb_type)] + +tlb_fixup_done: + sethi %hi(init_thread_union), %g6 + or %g6, %lo(init_thread_union), %g6 + ldx [%g6 + TI_TASK], %g4 + mov %sp, %l6 + + wr %g0, ASI_P, %asi + mov 1, %g1 + sllx %g1, THREAD_SHIFT, %g1 + sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1 + add %g6, %g1, %sp + mov 0, %fp + + /* Set per-cpu pointer initially to zero, this makes + * the boot-cpu use the in-kernel-image per-cpu areas + * before setup_per_cpu_area() is invoked. + */ + clr %g5 + + wrpr %g0, 0, %wstate + wrpr %g0, 0x0, %tl + + /* Clear the bss */ + sethi %hi(__bss_start), %o0 + or %o0, %lo(__bss_start), %o0 + sethi %hi(_end), %o1 + or %o1, %lo(_end), %o1 + call __bzero + sub %o1, %o0, %o1 + +#ifdef CONFIG_LOCKDEP + /* We have this call this super early, as even prom_init can grab + * spinlocks and thus call into the lockdep code. + */ + call lockdep_init + nop +#endif + + mov %l6, %o1 ! OpenPROM stack + call prom_init + mov %l7, %o0 ! OpenPROM cif handler + + /* Initialize current_thread_info()->cpu as early as possible. + * In order to do that accurately we have to patch up the get_cpuid() + * assembler sequences. And that, in turn, requires that we know + * if we are on a Starfire box or not. While we're here, patch up + * the sun4v sequences as well. + */ + call check_if_starfire + nop + call per_cpu_patch + nop + call sun4v_patch + nop + +#ifdef CONFIG_SMP + call hard_smp_processor_id + nop + cmp %o0, NR_CPUS + blu,pt %xcc, 1f + nop + call boot_cpu_id_too_large + nop + /* Not reached... */ + +1: + /* If we boot on a non-zero cpu, all of the per-cpu + * variable references we make before setting up the + * per-cpu areas will use a bogus offset. Put a + * compensating factor into __per_cpu_base to handle + * this cleanly. + * + * What the per-cpu code calculates is: + * + * __per_cpu_base + (cpu << __per_cpu_shift) + * + * These two variables are zero initially, so to + * make it all cancel out to zero we need to put + * "0 - (cpu << 0)" into __per_cpu_base so that the + * above formula evaluates to zero. + * + * We cannot even perform a printk() until this stuff + * is setup as that calls cpu_clock() which uses + * per-cpu variables. + */ + sub %g0, %o0, %o1 + sethi %hi(__per_cpu_base), %o2 + stx %o1, [%o2 + %lo(__per_cpu_base)] +#else + mov 0, %o0 +#endif + sth %o0, [%g6 + TI_CPU] + + call prom_init_report + nop + + /* Off we go.... */ + call start_kernel + nop + /* Not reached... */ + + .previous + + /* This is meant to allow the sharing of this code between + * boot processor invocation (via setup_tba() below) and + * secondary processor startup (via trampoline.S). The + * former does use this code, the latter does not yet due + * to some complexities. That should be fixed up at some + * point. + * + * There used to be enormous complexity wrt. transferring + * over from the firwmare's trap table to the Linux kernel's. + * For example, there was a chicken & egg problem wrt. building + * the OBP page tables, yet needing to be on the Linux kernel + * trap table (to translate PAGE_OFFSET addresses) in order to + * do that. + * + * We now handle OBP tlb misses differently, via linear lookups + * into the prom_trans[] array. So that specific problem no + * longer exists. Yet, unfortunately there are still some issues + * preventing trampoline.S from using this code... ho hum. + */ + .globl setup_trap_table +setup_trap_table: + save %sp, -192, %sp + + /* Force interrupts to be disabled. */ + rdpr %pstate, %l0 + andn %l0, PSTATE_IE, %o1 + wrpr %o1, 0x0, %pstate + rdpr %pil, %l1 + wrpr %g0, PIL_NORMAL_MAX, %pil + + /* Make the firmware call to jump over to the Linux trap table. */ + sethi %hi(is_sun4v), %o0 + lduw [%o0 + %lo(is_sun4v)], %o0 + brz,pt %o0, 1f + nop + + TRAP_LOAD_TRAP_BLOCK(%g2, %g3) + add %g2, TRAP_PER_CPU_FAULT_INFO, %g2 + stxa %g2, [%g0] ASI_SCRATCHPAD + + /* Compute physical address: + * + * paddr = kern_base + (mmfsa_vaddr - KERNBASE) + */ + sethi %hi(KERNBASE), %g3 + sub %g2, %g3, %g2 + sethi %hi(kern_base), %g3 + ldx [%g3 + %lo(kern_base)], %g3 + add %g2, %g3, %o1 + sethi %hi(sparc64_ttable_tl0), %o0 + + set prom_set_trap_table_name, %g2 + stx %g2, [%sp + 2047 + 128 + 0x00] + mov 2, %g2 + stx %g2, [%sp + 2047 + 128 + 0x08] + mov 0, %g2 + stx %g2, [%sp + 2047 + 128 + 0x10] + stx %o0, [%sp + 2047 + 128 + 0x18] + stx %o1, [%sp + 2047 + 128 + 0x20] + sethi %hi(p1275buf), %g2 + or %g2, %lo(p1275buf), %g2 + ldx [%g2 + 0x08], %o1 + call %o1 + add %sp, (2047 + 128), %o0 + + ba,pt %xcc, 2f + nop + +1: sethi %hi(sparc64_ttable_tl0), %o0 + set prom_set_trap_table_name, %g2 + stx %g2, [%sp + 2047 + 128 + 0x00] + mov 1, %g2 + stx %g2, [%sp + 2047 + 128 + 0x08] + mov 0, %g2 + stx %g2, [%sp + 2047 + 128 + 0x10] + stx %o0, [%sp + 2047 + 128 + 0x18] + sethi %hi(p1275buf), %g2 + or %g2, %lo(p1275buf), %g2 + ldx [%g2 + 0x08], %o1 + call %o1 + add %sp, (2047 + 128), %o0 + + /* Start using proper page size encodings in ctx register. */ +2: sethi %hi(sparc64_kern_pri_context), %g3 + ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2 + + mov PRIMARY_CONTEXT, %g1 + +661: stxa %g2, [%g1] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g1] ASI_MMU + .previous + + membar #Sync + + BRANCH_IF_SUN4V(o2, 1f) + + /* Kill PROM timer */ + sethi %hi(0x80000000), %o2 + sllx %o2, 32, %o2 + wr %o2, 0, %tick_cmpr + + BRANCH_IF_ANY_CHEETAH(o2, o3, 1f) + + ba,pt %xcc, 2f + nop + + /* Disable STICK_INT interrupts. */ +1: + sethi %hi(0x80000000), %o2 + sllx %o2, 32, %o2 + wr %o2, %asr25 + +2: + wrpr %g0, %g0, %wstate + + call init_irqwork_curcpu + nop + + /* Now we can restore interrupt state. */ + wrpr %l0, 0, %pstate + wrpr %l1, 0x0, %pil + + ret + restore + + .globl setup_tba +setup_tba: + save %sp, -192, %sp + + /* The boot processor is the only cpu which invokes this + * routine, the other cpus set things up via trampoline.S. + * So save the OBP trap table address here. + */ + rdpr %tba, %g7 + sethi %hi(prom_tba), %o1 + or %o1, %lo(prom_tba), %o1 + stx %g7, [%o1] + + call setup_trap_table + nop + + ret + restore +sparc64_boot_end: + +#include "etrap_64.S" +#include "rtrap_64.S" +#include "winfixup.S" +#include "fpu_traps.S" +#include "ivec.S" +#include "getsetcc.S" +#include "utrap.S" +#include "spiterrs.S" +#include "cherrs.S" +#include "misctrap.S" +#include "syscalls.S" +#include "helpers.S" +#include "hvcalls.S" +#include "sun4v_tlb_miss.S" +#include "sun4v_ivec.S" +#include "ktlb.S" +#include "tsb.S" + +/* + * The following skip makes sure the trap table in ttable.S is aligned + * on a 32K boundary as required by the v9 specs for TBA register. + * + * We align to a 32K boundary, then we have the 32K kernel TSB, + * the 64K kernel 4MB TSB, and then the 32K aligned trap table. + */ +1: + .skip 0x4000 + _start - 1b + +! 0x0000000000408000 + + .globl swapper_tsb +swapper_tsb: + .skip (32 * 1024) + + .globl swapper_4m_tsb +swapper_4m_tsb: + .skip (64 * 1024) + +! 0x0000000000420000 + + /* Some care needs to be exercised if you try to move the + * location of the trap table relative to other things. For + * one thing there are br* instructions in some of the + * trap table entires which branch back to code in ktlb.S + * Those instructions can only handle a signed 16-bit + * displacement. + * + * There is a binutils bug (bugzilla #4558) which causes + * the relocation overflow checks for such instructions to + * not be done correctly. So bintuils will not notice the + * error and will instead write junk into the relocation and + * you'll have an unbootable kernel. + */ +#include "ttable.S" + +! 0x0000000000428000 + +#include "systbls_64.S" + + .data + .align 8 + .globl prom_tba, tlb_type +prom_tba: .xword 0 +tlb_type: .word 0 /* Must NOT end up in BSS */ + .section ".fixup",#alloc,#execinstr + + .globl __ret_efault, __retl_efault +__ret_efault: + ret + restore %g0, -EFAULT, %o0 +__retl_efault: + retl + mov -EFAULT, %o0 diff --git a/arch/sparc/kernel/helpers.S b/arch/sparc/kernel/helpers.S new file mode 100644 index 000000000000..314dd0c9fc5b --- /dev/null +++ b/arch/sparc/kernel/helpers.S @@ -0,0 +1,63 @@ + .align 32 + .globl __flushw_user + .type __flushw_user,#function +__flushw_user: + rdpr %otherwin, %g1 + brz,pn %g1, 2f + clr %g2 +1: save %sp, -128, %sp + rdpr %otherwin, %g1 + brnz,pt %g1, 1b + add %g2, 1, %g2 +1: sub %g2, 1, %g2 + brnz,pt %g2, 1b + restore %g0, %g0, %g0 +2: retl + nop + .size __flushw_user,.-__flushw_user + + /* Flush %fp and %i7 to the stack for all register + * windows active inside of the cpu. This allows + * show_stack_trace() to avoid using an expensive + * 'flushw'. + */ + .globl stack_trace_flush + .type stack_trace_flush,#function +stack_trace_flush: + rdpr %pstate, %o0 + wrpr %o0, PSTATE_IE, %pstate + + rdpr %cwp, %g1 + rdpr %canrestore, %g2 + sub %g1, 1, %g3 + +1: brz,pn %g2, 2f + sub %g2, 1, %g2 + wrpr %g3, %cwp + stx %fp, [%sp + STACK_BIAS + RW_V9_I6] + stx %i7, [%sp + STACK_BIAS + RW_V9_I7] + ba,pt %xcc, 1b + sub %g3, 1, %g3 + +2: wrpr %g1, %cwp + wrpr %o0, %pstate + + retl + nop + .size stack_trace_flush,.-stack_trace_flush + +#ifdef CONFIG_SMP + .globl hard_smp_processor_id + .type hard_smp_processor_id,#function +hard_smp_processor_id: +#endif + .globl real_hard_smp_processor_id + .type real_hard_smp_processor_id,#function +real_hard_smp_processor_id: + __GET_CPUID(%o0) + retl + nop +#ifdef CONFIG_SMP + .size hard_smp_processor_id,.-hard_smp_processor_id +#endif + .size real_hard_smp_processor_id,.-real_hard_smp_processor_id diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c new file mode 100644 index 000000000000..1d272c3b5740 --- /dev/null +++ b/arch/sparc/kernel/hvapi.c @@ -0,0 +1,193 @@ +/* hvapi.c: Hypervisor API management. + * + * Copyright (C) 2007 David S. Miller <davem@davemloft.net> + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> + +#include <asm/hypervisor.h> +#include <asm/oplib.h> + +/* If the hypervisor indicates that the API setting + * calls are unsupported, by returning HV_EBADTRAP or + * HV_ENOTSUPPORTED, we assume that API groups with the + * PRE_API flag set are major 1 minor 0. + */ +struct api_info { + unsigned long group; + unsigned long major; + unsigned long minor; + unsigned int refcnt; + unsigned int flags; +#define FLAG_PRE_API 0x00000001 +}; + +static struct api_info api_table[] = { + { .group = HV_GRP_SUN4V, .flags = FLAG_PRE_API }, + { .group = HV_GRP_CORE, .flags = FLAG_PRE_API }, + { .group = HV_GRP_INTR, }, + { .group = HV_GRP_SOFT_STATE, }, + { .group = HV_GRP_PCI, .flags = FLAG_PRE_API }, + { .group = HV_GRP_LDOM, }, + { .group = HV_GRP_SVC_CHAN, .flags = FLAG_PRE_API }, + { .group = HV_GRP_NCS, .flags = FLAG_PRE_API }, + { .group = HV_GRP_RNG, }, + { .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API }, + { .group = HV_GRP_FIRE_PERF, }, + { .group = HV_GRP_N2_CPU, }, + { .group = HV_GRP_NIU, }, + { .group = HV_GRP_VF_CPU, }, + { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, +}; + +static DEFINE_SPINLOCK(hvapi_lock); + +static struct api_info *__get_info(unsigned long group) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(api_table); i++) { + if (api_table[i].group == group) + return &api_table[i]; + } + return NULL; +} + +static void __get_ref(struct api_info *p) +{ + p->refcnt++; +} + +static void __put_ref(struct api_info *p) +{ + if (--p->refcnt == 0) { + unsigned long ignore; + + sun4v_set_version(p->group, 0, 0, &ignore); + p->major = p->minor = 0; + } +} + +/* Register a hypervisor API specification. It indicates the + * API group and desired major+minor. + * + * If an existing API registration exists '0' (success) will + * be returned if it is compatible with the one being registered. + * Otherwise a negative error code will be returned. + * + * Otherwise an attempt will be made to negotiate the requested + * API group/major/minor with the hypervisor, and errors returned + * if that does not succeed. + */ +int sun4v_hvapi_register(unsigned long group, unsigned long major, + unsigned long *minor) +{ + struct api_info *p; + unsigned long flags; + int ret; + + spin_lock_irqsave(&hvapi_lock, flags); + p = __get_info(group); + ret = -EINVAL; + if (p) { + if (p->refcnt) { + ret = -EINVAL; + if (p->major == major) { + *minor = p->minor; + ret = 0; + } + } else { + unsigned long actual_minor; + unsigned long hv_ret; + + hv_ret = sun4v_set_version(group, major, *minor, + &actual_minor); + ret = -EINVAL; + if (hv_ret == HV_EOK) { + *minor = actual_minor; + p->major = major; + p->minor = actual_minor; + ret = 0; + } else if (hv_ret == HV_EBADTRAP || + hv_ret == HV_ENOTSUPPORTED) { + if (p->flags & FLAG_PRE_API) { + if (major == 1) { + p->major = 1; + p->minor = 0; + *minor = 0; + ret = 0; + } + } + } + } + + if (ret == 0) + __get_ref(p); + } + spin_unlock_irqrestore(&hvapi_lock, flags); + + return ret; +} +EXPORT_SYMBOL(sun4v_hvapi_register); + +void sun4v_hvapi_unregister(unsigned long group) +{ + struct api_info *p; + unsigned long flags; + + spin_lock_irqsave(&hvapi_lock, flags); + p = __get_info(group); + if (p) + __put_ref(p); + spin_unlock_irqrestore(&hvapi_lock, flags); +} +EXPORT_SYMBOL(sun4v_hvapi_unregister); + +int sun4v_hvapi_get(unsigned long group, + unsigned long *major, + unsigned long *minor) +{ + struct api_info *p; + unsigned long flags; + int ret; + + spin_lock_irqsave(&hvapi_lock, flags); + ret = -EINVAL; + p = __get_info(group); + if (p && p->refcnt) { + *major = p->major; + *minor = p->minor; + ret = 0; + } + spin_unlock_irqrestore(&hvapi_lock, flags); + + return ret; +} +EXPORT_SYMBOL(sun4v_hvapi_get); + +void __init sun4v_hvapi_init(void) +{ + unsigned long group, major, minor; + + group = HV_GRP_SUN4V; + major = 1; + minor = 0; + if (sun4v_hvapi_register(group, major, &minor)) + goto bad; + + group = HV_GRP_CORE; + major = 1; + minor = 1; + if (sun4v_hvapi_register(group, major, &minor)) + goto bad; + + return; + +bad: + prom_printf("HVAPI: Cannot register API group " + "%lx with major(%u) minor(%u)\n", + group, major, minor); + prom_halt(); +} diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S new file mode 100644 index 000000000000..8a5f35ffb15e --- /dev/null +++ b/arch/sparc/kernel/hvcalls.S @@ -0,0 +1,800 @@ + /* %o0: devhandle + * %o1: devino + * + * returns %o0: sysino + */ +ENTRY(sun4v_devino_to_sysino) + mov HV_FAST_INTR_DEVINO2SYSINO, %o5 + ta HV_FAST_TRAP + retl + mov %o1, %o0 +ENDPROC(sun4v_devino_to_sysino) + + /* %o0: sysino + * + * returns %o0: intr_enabled (HV_INTR_{DISABLED,ENABLED}) + */ +ENTRY(sun4v_intr_getenabled) + mov HV_FAST_INTR_GETENABLED, %o5 + ta HV_FAST_TRAP + retl + mov %o1, %o0 +ENDPROC(sun4v_intr_getenabled) + + /* %o0: sysino + * %o1: intr_enabled (HV_INTR_{DISABLED,ENABLED}) + */ +ENTRY(sun4v_intr_setenabled) + mov HV_FAST_INTR_SETENABLED, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_intr_setenabled) + + /* %o0: sysino + * + * returns %o0: intr_state (HV_INTR_STATE_*) + */ +ENTRY(sun4v_intr_getstate) + mov HV_FAST_INTR_GETSTATE, %o5 + ta HV_FAST_TRAP + retl + mov %o1, %o0 +ENDPROC(sun4v_intr_getstate) + + /* %o0: sysino + * %o1: intr_state (HV_INTR_STATE_*) + */ +ENTRY(sun4v_intr_setstate) + mov HV_FAST_INTR_SETSTATE, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_intr_setstate) + + /* %o0: sysino + * + * returns %o0: cpuid + */ +ENTRY(sun4v_intr_gettarget) + mov HV_FAST_INTR_GETTARGET, %o5 + ta HV_FAST_TRAP + retl + mov %o1, %o0 +ENDPROC(sun4v_intr_gettarget) + + /* %o0: sysino + * %o1: cpuid + */ +ENTRY(sun4v_intr_settarget) + mov HV_FAST_INTR_SETTARGET, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_intr_settarget) + + /* %o0: cpuid + * %o1: pc + * %o2: rtba + * %o3: arg0 + * + * returns %o0: status + */ +ENTRY(sun4v_cpu_start) + mov HV_FAST_CPU_START, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_cpu_start) + + /* %o0: cpuid + * + * returns %o0: status + */ +ENTRY(sun4v_cpu_stop) + mov HV_FAST_CPU_STOP, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_cpu_stop) + + /* returns %o0: status */ +ENTRY(sun4v_cpu_yield) + mov HV_FAST_CPU_YIELD, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_cpu_yield) + + /* %o0: type + * %o1: queue paddr + * %o2: num queue entries + * + * returns %o0: status + */ +ENTRY(sun4v_cpu_qconf) + mov HV_FAST_CPU_QCONF, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_cpu_qconf) + + /* %o0: num cpus in cpu list + * %o1: cpu list paddr + * %o2: mondo block paddr + * + * returns %o0: status + */ +ENTRY(sun4v_cpu_mondo_send) + mov HV_FAST_CPU_MONDO_SEND, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_cpu_mondo_send) + + /* %o0: CPU ID + * + * returns %o0: -status if status non-zero, else + * %o0: cpu state as HV_CPU_STATE_* + */ +ENTRY(sun4v_cpu_state) + mov HV_FAST_CPU_STATE, %o5 + ta HV_FAST_TRAP + brnz,pn %o0, 1f + sub %g0, %o0, %o0 + mov %o1, %o0 +1: retl + nop +ENDPROC(sun4v_cpu_state) + + /* %o0: virtual address + * %o1: must be zero + * %o2: TTE + * %o3: HV_MMU_* flags + * + * returns %o0: status + */ +ENTRY(sun4v_mmu_map_perm_addr) + mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_mmu_map_perm_addr) + + /* %o0: number of TSB descriptions + * %o1: TSB descriptions real address + * + * returns %o0: status + */ +ENTRY(sun4v_mmu_tsb_ctx0) + mov HV_FAST_MMU_TSB_CTX0, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_mmu_tsb_ctx0) + + /* %o0: API group number + * %o1: pointer to unsigned long major number storage + * %o2: pointer to unsigned long minor number storage + * + * returns %o0: status + */ +ENTRY(sun4v_get_version) + mov HV_CORE_GET_VER, %o5 + mov %o1, %o3 + mov %o2, %o4 + ta HV_CORE_TRAP + stx %o1, [%o3] + retl + stx %o2, [%o4] +ENDPROC(sun4v_get_version) + + /* %o0: API group number + * %o1: desired major number + * %o2: desired minor number + * %o3: pointer to unsigned long actual minor number storage + * + * returns %o0: status + */ +ENTRY(sun4v_set_version) + mov HV_CORE_SET_VER, %o5 + mov %o3, %o4 + ta HV_CORE_TRAP + retl + stx %o1, [%o4] +ENDPROC(sun4v_set_version) + + /* %o0: pointer to unsigned long time + * + * returns %o0: status + */ +ENTRY(sun4v_tod_get) + mov %o0, %o4 + mov HV_FAST_TOD_GET, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_tod_get) + + /* %o0: time + * + * returns %o0: status + */ +ENTRY(sun4v_tod_set) + mov HV_FAST_TOD_SET, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_tod_set) + + /* %o0: pointer to unsigned long status + * + * returns %o0: signed character + */ +ENTRY(sun4v_con_getchar) + mov %o0, %o4 + mov HV_FAST_CONS_GETCHAR, %o5 + clr %o0 + clr %o1 + ta HV_FAST_TRAP + stx %o0, [%o4] + retl + sra %o1, 0, %o0 +ENDPROC(sun4v_con_getchar) + + /* %o0: signed long character + * + * returns %o0: status + */ +ENTRY(sun4v_con_putchar) + mov HV_FAST_CONS_PUTCHAR, %o5 + ta HV_FAST_TRAP + retl + sra %o0, 0, %o0 +ENDPROC(sun4v_con_putchar) + + /* %o0: buffer real address + * %o1: buffer size + * %o2: pointer to unsigned long bytes_read + * + * returns %o0: status + */ +ENTRY(sun4v_con_read) + mov %o2, %o4 + mov HV_FAST_CONS_READ, %o5 + ta HV_FAST_TRAP + brnz %o0, 1f + cmp %o1, -1 /* break */ + be,a,pn %icc, 1f + mov %o1, %o0 + cmp %o1, -2 /* hup */ + be,a,pn %icc, 1f + mov %o1, %o0 + stx %o1, [%o4] +1: retl + nop +ENDPROC(sun4v_con_read) + + /* %o0: buffer real address + * %o1: buffer size + * %o2: pointer to unsigned long bytes_written + * + * returns %o0: status + */ +ENTRY(sun4v_con_write) + mov %o2, %o4 + mov HV_FAST_CONS_WRITE, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_con_write) + + /* %o0: soft state + * %o1: address of description string + * + * returns %o0: status + */ +ENTRY(sun4v_mach_set_soft_state) + mov HV_FAST_MACH_SET_SOFT_STATE, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_mach_set_soft_state) + + /* %o0: exit code + * + * Does not return. + */ +ENTRY(sun4v_mach_exit) + mov HV_FAST_MACH_EXIT, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_mach_exit) + + /* %o0: buffer real address + * %o1: buffer length + * %o2: pointer to unsigned long real_buf_len + * + * returns %o0: status + */ +ENTRY(sun4v_mach_desc) + mov %o2, %o4 + mov HV_FAST_MACH_DESC, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_mach_desc) + + /* %o0: new timeout in milliseconds + * %o1: pointer to unsigned long orig_timeout + * + * returns %o0: status + */ +ENTRY(sun4v_mach_set_watchdog) + mov %o1, %o4 + mov HV_FAST_MACH_SET_WATCHDOG, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_mach_set_watchdog) + + /* No inputs and does not return. */ +ENTRY(sun4v_mach_sir) + mov %o1, %o4 + mov HV_FAST_MACH_SIR, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_mach_sir) + + /* %o0: channel + * %o1: ra + * %o2: num_entries + * + * returns %o0: status + */ +ENTRY(sun4v_ldc_tx_qconf) + mov HV_FAST_LDC_TX_QCONF, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_ldc_tx_qconf) + + /* %o0: channel + * %o1: pointer to unsigned long ra + * %o2: pointer to unsigned long num_entries + * + * returns %o0: status + */ +ENTRY(sun4v_ldc_tx_qinfo) + mov %o1, %g1 + mov %o2, %g2 + mov HV_FAST_LDC_TX_QINFO, %o5 + ta HV_FAST_TRAP + stx %o1, [%g1] + stx %o2, [%g2] + retl + nop +ENDPROC(sun4v_ldc_tx_qinfo) + + /* %o0: channel + * %o1: pointer to unsigned long head_off + * %o2: pointer to unsigned long tail_off + * %o2: pointer to unsigned long chan_state + * + * returns %o0: status + */ +ENTRY(sun4v_ldc_tx_get_state) + mov %o1, %g1 + mov %o2, %g2 + mov %o3, %g3 + mov HV_FAST_LDC_TX_GET_STATE, %o5 + ta HV_FAST_TRAP + stx %o1, [%g1] + stx %o2, [%g2] + stx %o3, [%g3] + retl + nop +ENDPROC(sun4v_ldc_tx_get_state) + + /* %o0: channel + * %o1: tail_off + * + * returns %o0: status + */ +ENTRY(sun4v_ldc_tx_set_qtail) + mov HV_FAST_LDC_TX_SET_QTAIL, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_ldc_tx_set_qtail) + + /* %o0: channel + * %o1: ra + * %o2: num_entries + * + * returns %o0: status + */ +ENTRY(sun4v_ldc_rx_qconf) + mov HV_FAST_LDC_RX_QCONF, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_ldc_rx_qconf) + + /* %o0: channel + * %o1: pointer to unsigned long ra + * %o2: pointer to unsigned long num_entries + * + * returns %o0: status + */ +ENTRY(sun4v_ldc_rx_qinfo) + mov %o1, %g1 + mov %o2, %g2 + mov HV_FAST_LDC_RX_QINFO, %o5 + ta HV_FAST_TRAP + stx %o1, [%g1] + stx %o2, [%g2] + retl + nop +ENDPROC(sun4v_ldc_rx_qinfo) + + /* %o0: channel + * %o1: pointer to unsigned long head_off + * %o2: pointer to unsigned long tail_off + * %o2: pointer to unsigned long chan_state + * + * returns %o0: status + */ +ENTRY(sun4v_ldc_rx_get_state) + mov %o1, %g1 + mov %o2, %g2 + mov %o3, %g3 + mov HV_FAST_LDC_RX_GET_STATE, %o5 + ta HV_FAST_TRAP + stx %o1, [%g1] + stx %o2, [%g2] + stx %o3, [%g3] + retl + nop +ENDPROC(sun4v_ldc_rx_get_state) + + /* %o0: channel + * %o1: head_off + * + * returns %o0: status + */ +ENTRY(sun4v_ldc_rx_set_qhead) + mov HV_FAST_LDC_RX_SET_QHEAD, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_ldc_rx_set_qhead) + + /* %o0: channel + * %o1: ra + * %o2: num_entries + * + * returns %o0: status + */ +ENTRY(sun4v_ldc_set_map_table) + mov HV_FAST_LDC_SET_MAP_TABLE, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_ldc_set_map_table) + + /* %o0: channel + * %o1: pointer to unsigned long ra + * %o2: pointer to unsigned long num_entries + * + * returns %o0: status + */ +ENTRY(sun4v_ldc_get_map_table) + mov %o1, %g1 + mov %o2, %g2 + mov HV_FAST_LDC_GET_MAP_TABLE, %o5 + ta HV_FAST_TRAP + stx %o1, [%g1] + stx %o2, [%g2] + retl + nop +ENDPROC(sun4v_ldc_get_map_table) + + /* %o0: channel + * %o1: dir_code + * %o2: tgt_raddr + * %o3: lcl_raddr + * %o4: len + * %o5: pointer to unsigned long actual_len + * + * returns %o0: status + */ +ENTRY(sun4v_ldc_copy) + mov %o5, %g1 + mov HV_FAST_LDC_COPY, %o5 + ta HV_FAST_TRAP + stx %o1, [%g1] + retl + nop +ENDPROC(sun4v_ldc_copy) + + /* %o0: channel + * %o1: cookie + * %o2: pointer to unsigned long ra + * %o3: pointer to unsigned long perm + * + * returns %o0: status + */ +ENTRY(sun4v_ldc_mapin) + mov %o2, %g1 + mov %o3, %g2 + mov HV_FAST_LDC_MAPIN, %o5 + ta HV_FAST_TRAP + stx %o1, [%g1] + stx %o2, [%g2] + retl + nop +ENDPROC(sun4v_ldc_mapin) + + /* %o0: ra + * + * returns %o0: status + */ +ENTRY(sun4v_ldc_unmap) + mov HV_FAST_LDC_UNMAP, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_ldc_unmap) + + /* %o0: channel + * %o1: cookie + * %o2: mte_cookie + * + * returns %o0: status + */ +ENTRY(sun4v_ldc_revoke) + mov HV_FAST_LDC_REVOKE, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_ldc_revoke) + + /* %o0: device handle + * %o1: device INO + * %o2: pointer to unsigned long cookie + * + * returns %o0: status + */ +ENTRY(sun4v_vintr_get_cookie) + mov %o2, %g1 + mov HV_FAST_VINTR_GET_COOKIE, %o5 + ta HV_FAST_TRAP + stx %o1, [%g1] + retl + nop +ENDPROC(sun4v_vintr_get_cookie) + + /* %o0: device handle + * %o1: device INO + * %o2: cookie + * + * returns %o0: status + */ +ENTRY(sun4v_vintr_set_cookie) + mov HV_FAST_VINTR_SET_COOKIE, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_vintr_set_cookie) + + /* %o0: device handle + * %o1: device INO + * %o2: pointer to unsigned long valid_state + * + * returns %o0: status + */ +ENTRY(sun4v_vintr_get_valid) + mov %o2, %g1 + mov HV_FAST_VINTR_GET_VALID, %o5 + ta HV_FAST_TRAP + stx %o1, [%g1] + retl + nop +ENDPROC(sun4v_vintr_get_valid) + + /* %o0: device handle + * %o1: device INO + * %o2: valid_state + * + * returns %o0: status + */ +ENTRY(sun4v_vintr_set_valid) + mov HV_FAST_VINTR_SET_VALID, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_vintr_set_valid) + + /* %o0: device handle + * %o1: device INO + * %o2: pointer to unsigned long state + * + * returns %o0: status + */ +ENTRY(sun4v_vintr_get_state) + mov %o2, %g1 + mov HV_FAST_VINTR_GET_STATE, %o5 + ta HV_FAST_TRAP + stx %o1, [%g1] + retl + nop +ENDPROC(sun4v_vintr_get_state) + + /* %o0: device handle + * %o1: device INO + * %o2: state + * + * returns %o0: status + */ +ENTRY(sun4v_vintr_set_state) + mov HV_FAST_VINTR_SET_STATE, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_vintr_set_state) + + /* %o0: device handle + * %o1: device INO + * %o2: pointer to unsigned long cpuid + * + * returns %o0: status + */ +ENTRY(sun4v_vintr_get_target) + mov %o2, %g1 + mov HV_FAST_VINTR_GET_TARGET, %o5 + ta HV_FAST_TRAP + stx %o1, [%g1] + retl + nop +ENDPROC(sun4v_vintr_get_target) + + /* %o0: device handle + * %o1: device INO + * %o2: cpuid + * + * returns %o0: status + */ +ENTRY(sun4v_vintr_set_target) + mov HV_FAST_VINTR_SET_TARGET, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_vintr_set_target) + + /* %o0: NCS sub-function + * %o1: sub-function arg real-address + * %o2: sub-function arg size + * + * returns %o0: status + */ +ENTRY(sun4v_ncs_request) + mov HV_FAST_NCS_REQUEST, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_ncs_request) + +ENTRY(sun4v_svc_send) + save %sp, -192, %sp + mov %i0, %o0 + mov %i1, %o1 + mov %i2, %o2 + mov HV_FAST_SVC_SEND, %o5 + ta HV_FAST_TRAP + stx %o1, [%i3] + ret + restore +ENDPROC(sun4v_svc_send) + +ENTRY(sun4v_svc_recv) + save %sp, -192, %sp + mov %i0, %o0 + mov %i1, %o1 + mov %i2, %o2 + mov HV_FAST_SVC_RECV, %o5 + ta HV_FAST_TRAP + stx %o1, [%i3] + ret + restore +ENDPROC(sun4v_svc_recv) + +ENTRY(sun4v_svc_getstatus) + mov HV_FAST_SVC_GETSTATUS, %o5 + mov %o1, %o4 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_svc_getstatus) + +ENTRY(sun4v_svc_setstatus) + mov HV_FAST_SVC_SETSTATUS, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_svc_setstatus) + +ENTRY(sun4v_svc_clrstatus) + mov HV_FAST_SVC_CLRSTATUS, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_svc_clrstatus) + +ENTRY(sun4v_mmustat_conf) + mov %o1, %o4 + mov HV_FAST_MMUSTAT_CONF, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_mmustat_conf) + +ENTRY(sun4v_mmustat_info) + mov %o0, %o4 + mov HV_FAST_MMUSTAT_INFO, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_mmustat_info) + +ENTRY(sun4v_mmu_demap_all) + clr %o0 + clr %o1 + mov HV_MMU_ALL, %o2 + mov HV_FAST_MMU_DEMAP_ALL, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_mmu_demap_all) + +ENTRY(sun4v_niagara_getperf) + mov %o0, %o4 + mov HV_FAST_GET_PERFREG, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_niagara_getperf) + +ENTRY(sun4v_niagara_setperf) + mov HV_FAST_SET_PERFREG, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_niagara_setperf) + +ENTRY(sun4v_niagara2_getperf) + mov %o0, %o4 + mov HV_FAST_N2_GET_PERFREG, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_niagara2_getperf) + +ENTRY(sun4v_niagara2_setperf) + mov HV_FAST_N2_SET_PERFREG, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_niagara2_setperf) diff --git a/arch/sparc/kernel/hvtramp.S b/arch/sparc/kernel/hvtramp.S new file mode 100644 index 000000000000..9365432904d6 --- /dev/null +++ b/arch/sparc/kernel/hvtramp.S @@ -0,0 +1,140 @@ +/* hvtramp.S: Hypervisor start-cpu trampoline code. + * + * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net> + */ + +#include <linux/init.h> + +#include <asm/thread_info.h> +#include <asm/hypervisor.h> +#include <asm/scratchpad.h> +#include <asm/spitfire.h> +#include <asm/hvtramp.h> +#include <asm/pstate.h> +#include <asm/ptrace.h> +#include <asm/head.h> +#include <asm/asi.h> +#include <asm/pil.h> + + __CPUINIT + .align 8 + .globl hv_cpu_startup, hv_cpu_startup_end + + /* This code executes directly out of the hypervisor + * with physical addressing (va==pa). %o0 contains + * our client argument which for Linux points to + * a descriptor data structure which defines the + * MMU entries we need to load up. + * + * After we set things up we enable the MMU and call + * into the kernel. + * + * First setup basic privileged cpu state. + */ +hv_cpu_startup: + SET_GL(0) + wrpr %g0, PIL_NORMAL_MAX, %pil + wrpr %g0, 0, %canrestore + wrpr %g0, 0, %otherwin + wrpr %g0, 6, %cansave + wrpr %g0, 6, %cleanwin + wrpr %g0, 0, %cwp + wrpr %g0, 0, %wstate + wrpr %g0, 0, %tl + + sethi %hi(sparc64_ttable_tl0), %g1 + wrpr %g1, %tba + + mov %o0, %l0 + + lduw [%l0 + HVTRAMP_DESCR_CPU], %g1 + mov SCRATCHPAD_CPUID, %g2 + stxa %g1, [%g2] ASI_SCRATCHPAD + + ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_VA], %g2 + stxa %g2, [%g0] ASI_SCRATCHPAD + + mov 0, %l1 + lduw [%l0 + HVTRAMP_DESCR_NUM_MAPPINGS], %l2 + add %l0, HVTRAMP_DESCR_MAPS, %l3 + +1: ldx [%l3 + HVTRAMP_MAPPING_VADDR], %o0 + clr %o1 + ldx [%l3 + HVTRAMP_MAPPING_TTE], %o2 + mov HV_MMU_IMMU | HV_MMU_DMMU, %o3 + mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 + ta HV_FAST_TRAP + + brnz,pn %o0, 80f + nop + + add %l1, 1, %l1 + cmp %l1, %l2 + blt,a,pt %xcc, 1b + add %l3, HVTRAMP_MAPPING_SIZE, %l3 + + ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_PA], %o0 + mov HV_FAST_MMU_FAULT_AREA_CONF, %o5 + ta HV_FAST_TRAP + + brnz,pn %o0, 80f + nop + + wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate + + ldx [%l0 + HVTRAMP_DESCR_THREAD_REG], %l6 + + mov 1, %o0 + set 1f, %o1 + mov HV_FAST_MMU_ENABLE, %o5 + ta HV_FAST_TRAP + + ba,pt %xcc, 80f + nop + +1: + wr %g0, 0, %fprs + wr %g0, ASI_P, %asi + + mov PRIMARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_MMU + membar #Sync + + mov SECONDARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_MMU + membar #Sync + + mov %l6, %g6 + ldx [%g6 + TI_TASK], %g4 + + mov 1, %g5 + sllx %g5, THREAD_SHIFT, %g5 + sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 + add %g6, %g5, %sp + mov 0, %fp + + call init_irqwork_curcpu + nop + call hard_smp_processor_id + nop + + call sun4v_register_mondo_queues + nop + + call init_cur_cpu_trap + mov %g6, %o0 + + wrpr %g0, (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE), %pstate + + call smp_callin + nop + call cpu_idle + mov 0, %o0 + call cpu_panic + nop + +80: ba,pt %xcc, 80b + nop + + .align 8 +hv_cpu_startup_end: diff --git a/arch/sparc/kernel/idprom_64.c b/arch/sparc/kernel/idprom_64.c new file mode 100644 index 000000000000..5b45a808c621 --- /dev/null +++ b/arch/sparc/kernel/idprom_64.c @@ -0,0 +1,49 @@ +/* + * idprom.c: Routines to load the idprom into kernel addresses and + * interpret the data contained within. + * + * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/init.h> + +#include <asm/oplib.h> +#include <asm/idprom.h> + +struct idprom *idprom; +static struct idprom idprom_buffer; + +/* Calculate the IDPROM checksum (xor of the data bytes). */ +static unsigned char __init calc_idprom_cksum(struct idprom *idprom) +{ + unsigned char cksum, i, *ptr = (unsigned char *)idprom; + + for (i = cksum = 0; i <= 0x0E; i++) + cksum ^= *ptr++; + + return cksum; +} + +/* Create a local IDPROM copy and verify integrity. */ +void __init idprom_init(void) +{ + prom_get_idprom((char *) &idprom_buffer, sizeof(idprom_buffer)); + + idprom = &idprom_buffer; + + if (idprom->id_format != 0x01) { + prom_printf("IDPROM: Warning, unknown format type!\n"); + } + + if (idprom->id_cksum != calc_idprom_cksum(idprom)) { + prom_printf("IDPROM: Warning, checksum failure (nvram=%x, calc=%x)!\n", + idprom->id_cksum, calc_idprom_cksum(idprom)); + } + + printk("Ethernet address: %02x:%02x:%02x:%02x:%02x:%02x\n", + idprom->id_ethaddr[0], idprom->id_ethaddr[1], + idprom->id_ethaddr[2], idprom->id_ethaddr[3], + idprom->id_ethaddr[4], idprom->id_ethaddr[5]); +} diff --git a/arch/sparc/kernel/init_task_64.c b/arch/sparc/kernel/init_task_64.c new file mode 100644 index 000000000000..d2b312381c19 --- /dev/null +++ b/arch/sparc/kernel/init_task_64.c @@ -0,0 +1,35 @@ +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/init_task.h> +#include <linux/mqueue.h> + +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/processor.h> + +static struct fs_struct init_fs = INIT_FS; +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +struct mm_struct init_mm = INIT_MM(init_mm); + +EXPORT_SYMBOL(init_mm); + +/* .text section in head.S is aligned at 2 page boundary and this gets linked + * right after that so that the init_thread_union is aligned properly as well. + * We really don't need this special alignment like the Intel does, but + * I do it anyways for completeness. + */ +__asm__ (".text"); +union thread_union init_thread_union = { INIT_THREAD_INFO(init_task) }; + +/* + * Initial task structure. + * + * All other task structs will be allocated on slabs in fork.c + */ +EXPORT_SYMBOL(init_task); + +__asm__(".data"); +struct task_struct init_task = INIT_TASK(init_task); diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c new file mode 100644 index 000000000000..1cc1995531e2 --- /dev/null +++ b/arch/sparc/kernel/iommu.c @@ -0,0 +1,866 @@ +/* iommu.c: Generic sparc64 IOMMU support. + * + * Copyright (C) 1999, 2007, 2008 David S. Miller (davem@davemloft.net) + * Copyright (C) 1999, 2000 Jakub Jelinek (jakub@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/errno.h> +#include <linux/iommu-helper.h> + +#ifdef CONFIG_PCI +#include <linux/pci.h> +#endif + +#include <asm/iommu.h> + +#include "iommu_common.h" + +#define STC_CTXMATCH_ADDR(STC, CTX) \ + ((STC)->strbuf_ctxmatch_base + ((CTX) << 3)) +#define STC_FLUSHFLAG_INIT(STC) \ + (*((STC)->strbuf_flushflag) = 0UL) +#define STC_FLUSHFLAG_SET(STC) \ + (*((STC)->strbuf_flushflag) != 0UL) + +#define iommu_read(__reg) \ +({ u64 __ret; \ + __asm__ __volatile__("ldxa [%1] %2, %0" \ + : "=r" (__ret) \ + : "r" (__reg), "i" (ASI_PHYS_BYPASS_EC_E) \ + : "memory"); \ + __ret; \ +}) +#define iommu_write(__reg, __val) \ + __asm__ __volatile__("stxa %0, [%1] %2" \ + : /* no outputs */ \ + : "r" (__val), "r" (__reg), \ + "i" (ASI_PHYS_BYPASS_EC_E)) + +/* Must be invoked under the IOMMU lock. */ +static void iommu_flushall(struct iommu *iommu) +{ + if (iommu->iommu_flushinv) { + iommu_write(iommu->iommu_flushinv, ~(u64)0); + } else { + unsigned long tag; + int entry; + + tag = iommu->iommu_tags; + for (entry = 0; entry < 16; entry++) { + iommu_write(tag, 0); + tag += 8; + } + + /* Ensure completion of previous PIO writes. */ + (void) iommu_read(iommu->write_complete_reg); + } +} + +#define IOPTE_CONSISTENT(CTX) \ + (IOPTE_VALID | IOPTE_CACHE | \ + (((CTX) << 47) & IOPTE_CONTEXT)) + +#define IOPTE_STREAMING(CTX) \ + (IOPTE_CONSISTENT(CTX) | IOPTE_STBUF) + +/* Existing mappings are never marked invalid, instead they + * are pointed to a dummy page. + */ +#define IOPTE_IS_DUMMY(iommu, iopte) \ + ((iopte_val(*iopte) & IOPTE_PAGE) == (iommu)->dummy_page_pa) + +static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte) +{ + unsigned long val = iopte_val(*iopte); + + val &= ~IOPTE_PAGE; + val |= iommu->dummy_page_pa; + + iopte_val(*iopte) = val; +} + +/* Based almost entirely upon the ppc64 iommu allocator. If you use the 'handle' + * facility it must all be done in one pass while under the iommu lock. + * + * On sun4u platforms, we only flush the IOMMU once every time we've passed + * over the entire page table doing allocations. Therefore we only ever advance + * the hint and cannot backtrack it. + */ +unsigned long iommu_range_alloc(struct device *dev, + struct iommu *iommu, + unsigned long npages, + unsigned long *handle) +{ + unsigned long n, end, start, limit, boundary_size; + struct iommu_arena *arena = &iommu->arena; + int pass = 0; + + /* This allocator was derived from x86_64's bit string search */ + + /* Sanity check */ + if (unlikely(npages == 0)) { + if (printk_ratelimit()) + WARN_ON(1); + return DMA_ERROR_CODE; + } + + if (handle && *handle) + start = *handle; + else + start = arena->hint; + + limit = arena->limit; + + /* The case below can happen if we have a small segment appended + * to a large, or when the previous alloc was at the very end of + * the available space. If so, go back to the beginning and flush. + */ + if (start >= limit) { + start = 0; + if (iommu->flush_all) + iommu->flush_all(iommu); + } + + again: + + if (dev) + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + 1 << IO_PAGE_SHIFT); + else + boundary_size = ALIGN(1UL << 32, 1 << IO_PAGE_SHIFT); + + n = iommu_area_alloc(arena->map, limit, start, npages, + iommu->page_table_map_base >> IO_PAGE_SHIFT, + boundary_size >> IO_PAGE_SHIFT, 0); + if (n == -1) { + if (likely(pass < 1)) { + /* First failure, rescan from the beginning. */ + start = 0; + if (iommu->flush_all) + iommu->flush_all(iommu); + pass++; + goto again; + } else { + /* Second failure, give up */ + return DMA_ERROR_CODE; + } + } + + end = n + npages; + + arena->hint = end; + + /* Update handle for SG allocations */ + if (handle) + *handle = end; + + return n; +} + +void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long npages) +{ + struct iommu_arena *arena = &iommu->arena; + unsigned long entry; + + entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT; + + iommu_area_free(arena->map, entry, npages); +} + +int iommu_table_init(struct iommu *iommu, int tsbsize, + u32 dma_offset, u32 dma_addr_mask, + int numa_node) +{ + unsigned long i, order, sz, num_tsb_entries; + struct page *page; + + num_tsb_entries = tsbsize / sizeof(iopte_t); + + /* Setup initial software IOMMU state. */ + spin_lock_init(&iommu->lock); + iommu->ctx_lowest_free = 1; + iommu->page_table_map_base = dma_offset; + iommu->dma_addr_mask = dma_addr_mask; + + /* Allocate and initialize the free area map. */ + sz = num_tsb_entries / 8; + sz = (sz + 7UL) & ~7UL; + iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node); + if (!iommu->arena.map) { + printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n"); + return -ENOMEM; + } + memset(iommu->arena.map, 0, sz); + iommu->arena.limit = num_tsb_entries; + + if (tlb_type != hypervisor) + iommu->flush_all = iommu_flushall; + + /* Allocate and initialize the dummy page which we + * set inactive IO PTEs to point to. + */ + page = alloc_pages_node(numa_node, GFP_KERNEL, 0); + if (!page) { + printk(KERN_ERR "IOMMU: Error, gfp(dummy_page) failed.\n"); + goto out_free_map; + } + iommu->dummy_page = (unsigned long) page_address(page); + memset((void *)iommu->dummy_page, 0, PAGE_SIZE); + iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); + + /* Now allocate and setup the IOMMU page table itself. */ + order = get_order(tsbsize); + page = alloc_pages_node(numa_node, GFP_KERNEL, order); + if (!page) { + printk(KERN_ERR "IOMMU: Error, gfp(tsb) failed.\n"); + goto out_free_dummy_page; + } + iommu->page_table = (iopte_t *)page_address(page); + + for (i = 0; i < num_tsb_entries; i++) + iopte_make_dummy(iommu, &iommu->page_table[i]); + + return 0; + +out_free_dummy_page: + free_page(iommu->dummy_page); + iommu->dummy_page = 0UL; + +out_free_map: + kfree(iommu->arena.map); + iommu->arena.map = NULL; + + return -ENOMEM; +} + +static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu, + unsigned long npages) +{ + unsigned long entry; + + entry = iommu_range_alloc(dev, iommu, npages, NULL); + if (unlikely(entry == DMA_ERROR_CODE)) + return NULL; + + return iommu->page_table + entry; +} + +static int iommu_alloc_ctx(struct iommu *iommu) +{ + int lowest = iommu->ctx_lowest_free; + int sz = IOMMU_NUM_CTXS - lowest; + int n = find_next_zero_bit(iommu->ctx_bitmap, sz, lowest); + + if (unlikely(n == sz)) { + n = find_next_zero_bit(iommu->ctx_bitmap, lowest, 1); + if (unlikely(n == lowest)) { + printk(KERN_WARNING "IOMMU: Ran out of contexts.\n"); + n = 0; + } + } + if (n) + __set_bit(n, iommu->ctx_bitmap); + + return n; +} + +static inline void iommu_free_ctx(struct iommu *iommu, int ctx) +{ + if (likely(ctx)) { + __clear_bit(ctx, iommu->ctx_bitmap); + if (ctx < iommu->ctx_lowest_free) + iommu->ctx_lowest_free = ctx; + } +} + +static void *dma_4u_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addrp, gfp_t gfp) +{ + unsigned long flags, order, first_page; + struct iommu *iommu; + struct page *page; + int npages, nid; + iopte_t *iopte; + void *ret; + + size = IO_PAGE_ALIGN(size); + order = get_order(size); + if (order >= 10) + return NULL; + + nid = dev->archdata.numa_node; + page = alloc_pages_node(nid, gfp, order); + if (unlikely(!page)) + return NULL; + + first_page = (unsigned long) page_address(page); + memset((char *)first_page, 0, PAGE_SIZE << order); + + iommu = dev->archdata.iommu; + + spin_lock_irqsave(&iommu->lock, flags); + iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (unlikely(iopte == NULL)) { + free_pages(first_page, order); + return NULL; + } + + *dma_addrp = (iommu->page_table_map_base + + ((iopte - iommu->page_table) << IO_PAGE_SHIFT)); + ret = (void *) first_page; + npages = size >> IO_PAGE_SHIFT; + first_page = __pa(first_page); + while (npages--) { + iopte_val(*iopte) = (IOPTE_CONSISTENT(0UL) | + IOPTE_WRITE | + (first_page & IOPTE_PAGE)); + iopte++; + first_page += IO_PAGE_SIZE; + } + + return ret; +} + +static void dma_4u_free_coherent(struct device *dev, size_t size, + void *cpu, dma_addr_t dvma) +{ + struct iommu *iommu; + iopte_t *iopte; + unsigned long flags, order, npages; + + npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; + iommu = dev->archdata.iommu; + iopte = iommu->page_table + + ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + + spin_lock_irqsave(&iommu->lock, flags); + + iommu_range_free(iommu, dvma, npages); + + spin_unlock_irqrestore(&iommu->lock, flags); + + order = get_order(size); + if (order < 10) + free_pages((unsigned long)cpu, order); +} + +static dma_addr_t dma_4u_map_single(struct device *dev, void *ptr, size_t sz, + enum dma_data_direction direction) +{ + struct iommu *iommu; + struct strbuf *strbuf; + iopte_t *base; + unsigned long flags, npages, oaddr; + unsigned long i, base_paddr, ctx; + u32 bus_addr, ret; + unsigned long iopte_protection; + + iommu = dev->archdata.iommu; + strbuf = dev->archdata.stc; + + if (unlikely(direction == DMA_NONE)) + goto bad_no_ctx; + + oaddr = (unsigned long)ptr; + npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); + npages >>= IO_PAGE_SHIFT; + + spin_lock_irqsave(&iommu->lock, flags); + base = alloc_npages(dev, iommu, npages); + ctx = 0; + if (iommu->iommu_ctxflush) + ctx = iommu_alloc_ctx(iommu); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (unlikely(!base)) + goto bad; + + bus_addr = (iommu->page_table_map_base + + ((base - iommu->page_table) << IO_PAGE_SHIFT)); + ret = bus_addr | (oaddr & ~IO_PAGE_MASK); + base_paddr = __pa(oaddr & IO_PAGE_MASK); + if (strbuf->strbuf_enabled) + iopte_protection = IOPTE_STREAMING(ctx); + else + iopte_protection = IOPTE_CONSISTENT(ctx); + if (direction != DMA_TO_DEVICE) + iopte_protection |= IOPTE_WRITE; + + for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE) + iopte_val(*base) = iopte_protection | base_paddr; + + return ret; + +bad: + iommu_free_ctx(iommu, ctx); +bad_no_ctx: + if (printk_ratelimit()) + WARN_ON(1); + return DMA_ERROR_CODE; +} + +static void strbuf_flush(struct strbuf *strbuf, struct iommu *iommu, + u32 vaddr, unsigned long ctx, unsigned long npages, + enum dma_data_direction direction) +{ + int limit; + + if (strbuf->strbuf_ctxflush && + iommu->iommu_ctxflush) { + unsigned long matchreg, flushreg; + u64 val; + + flushreg = strbuf->strbuf_ctxflush; + matchreg = STC_CTXMATCH_ADDR(strbuf, ctx); + + iommu_write(flushreg, ctx); + val = iommu_read(matchreg); + val &= 0xffff; + if (!val) + goto do_flush_sync; + + while (val) { + if (val & 0x1) + iommu_write(flushreg, ctx); + val >>= 1; + } + val = iommu_read(matchreg); + if (unlikely(val)) { + printk(KERN_WARNING "strbuf_flush: ctx flush " + "timeout matchreg[%lx] ctx[%lx]\n", + val, ctx); + goto do_page_flush; + } + } else { + unsigned long i; + + do_page_flush: + for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE) + iommu_write(strbuf->strbuf_pflush, vaddr); + } + +do_flush_sync: + /* If the device could not have possibly put dirty data into + * the streaming cache, no flush-flag synchronization needs + * to be performed. + */ + if (direction == DMA_TO_DEVICE) + return; + + STC_FLUSHFLAG_INIT(strbuf); + iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa); + (void) iommu_read(iommu->write_complete_reg); + + limit = 100000; + while (!STC_FLUSHFLAG_SET(strbuf)) { + limit--; + if (!limit) + break; + udelay(1); + rmb(); + } + if (!limit) + printk(KERN_WARNING "strbuf_flush: flushflag timeout " + "vaddr[%08x] ctx[%lx] npages[%ld]\n", + vaddr, ctx, npages); +} + +static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr, + size_t sz, enum dma_data_direction direction) +{ + struct iommu *iommu; + struct strbuf *strbuf; + iopte_t *base; + unsigned long flags, npages, ctx, i; + + if (unlikely(direction == DMA_NONE)) { + if (printk_ratelimit()) + WARN_ON(1); + return; + } + + iommu = dev->archdata.iommu; + strbuf = dev->archdata.stc; + + npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); + npages >>= IO_PAGE_SHIFT; + base = iommu->page_table + + ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + bus_addr &= IO_PAGE_MASK; + + spin_lock_irqsave(&iommu->lock, flags); + + /* Record the context, if any. */ + ctx = 0; + if (iommu->iommu_ctxflush) + ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; + + /* Step 1: Kick data out of streaming buffers if necessary. */ + if (strbuf->strbuf_enabled) + strbuf_flush(strbuf, iommu, bus_addr, ctx, + npages, direction); + + /* Step 2: Clear out TSB entries. */ + for (i = 0; i < npages; i++) + iopte_make_dummy(iommu, base + i); + + iommu_range_free(iommu, bus_addr, npages); + + iommu_free_ctx(iommu, ctx); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + struct scatterlist *s, *outs, *segstart; + unsigned long flags, handle, prot, ctx; + dma_addr_t dma_next = 0, dma_addr; + unsigned int max_seg_size; + unsigned long seg_boundary_size; + int outcount, incount, i; + struct strbuf *strbuf; + struct iommu *iommu; + unsigned long base_shift; + + BUG_ON(direction == DMA_NONE); + + iommu = dev->archdata.iommu; + strbuf = dev->archdata.stc; + if (nelems == 0 || !iommu) + return 0; + + spin_lock_irqsave(&iommu->lock, flags); + + ctx = 0; + if (iommu->iommu_ctxflush) + ctx = iommu_alloc_ctx(iommu); + + if (strbuf->strbuf_enabled) + prot = IOPTE_STREAMING(ctx); + else + prot = IOPTE_CONSISTENT(ctx); + if (direction != DMA_TO_DEVICE) + prot |= IOPTE_WRITE; + + outs = s = segstart = &sglist[0]; + outcount = 1; + incount = nelems; + handle = 0; + + /* Init first segment length for backout at failure */ + outs->dma_length = 0; + + max_seg_size = dma_get_max_seg_size(dev); + seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + IO_PAGE_SIZE) >> IO_PAGE_SHIFT; + base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT; + for_each_sg(sglist, s, nelems, i) { + unsigned long paddr, npages, entry, out_entry = 0, slen; + iopte_t *base; + + slen = s->length; + /* Sanity check */ + if (slen == 0) { + dma_next = 0; + continue; + } + /* Allocate iommu entries for that segment */ + paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); + npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE); + entry = iommu_range_alloc(dev, iommu, npages, &handle); + + /* Handle failure */ + if (unlikely(entry == DMA_ERROR_CODE)) { + if (printk_ratelimit()) + printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx" + " npages %lx\n", iommu, paddr, npages); + goto iommu_map_failed; + } + + base = iommu->page_table + entry; + + /* Convert entry to a dma_addr_t */ + dma_addr = iommu->page_table_map_base + + (entry << IO_PAGE_SHIFT); + dma_addr |= (s->offset & ~IO_PAGE_MASK); + + /* Insert into HW table */ + paddr &= IO_PAGE_MASK; + while (npages--) { + iopte_val(*base) = prot | paddr; + base++; + paddr += IO_PAGE_SIZE; + } + + /* If we are in an open segment, try merging */ + if (segstart != s) { + /* We cannot merge if: + * - allocated dma_addr isn't contiguous to previous allocation + */ + if ((dma_addr != dma_next) || + (outs->dma_length + s->length > max_seg_size) || + (is_span_boundary(out_entry, base_shift, + seg_boundary_size, outs, s))) { + /* Can't merge: create a new segment */ + segstart = s; + outcount++; + outs = sg_next(outs); + } else { + outs->dma_length += s->length; + } + } + + if (segstart == s) { + /* This is a new segment, fill entries */ + outs->dma_address = dma_addr; + outs->dma_length = slen; + out_entry = entry; + } + + /* Calculate next page pointer for contiguous check */ + dma_next = dma_addr + slen; + } + + spin_unlock_irqrestore(&iommu->lock, flags); + + if (outcount < incount) { + outs = sg_next(outs); + outs->dma_address = DMA_ERROR_CODE; + outs->dma_length = 0; + } + + return outcount; + +iommu_map_failed: + for_each_sg(sglist, s, nelems, i) { + if (s->dma_length != 0) { + unsigned long vaddr, npages, entry, j; + iopte_t *base; + + vaddr = s->dma_address & IO_PAGE_MASK; + npages = iommu_num_pages(s->dma_address, s->dma_length, + IO_PAGE_SIZE); + iommu_range_free(iommu, vaddr, npages); + + entry = (vaddr - iommu->page_table_map_base) + >> IO_PAGE_SHIFT; + base = iommu->page_table + entry; + + for (j = 0; j < npages; j++) + iopte_make_dummy(iommu, base + j); + + s->dma_address = DMA_ERROR_CODE; + s->dma_length = 0; + } + if (s == outs) + break; + } + spin_unlock_irqrestore(&iommu->lock, flags); + + return 0; +} + +/* If contexts are being used, they are the same in all of the mappings + * we make for a particular SG. + */ +static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg) +{ + unsigned long ctx = 0; + + if (iommu->iommu_ctxflush) { + iopte_t *base; + u32 bus_addr; + + bus_addr = sg->dma_address & IO_PAGE_MASK; + base = iommu->page_table + + ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + + ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; + } + return ctx; +} + +static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + unsigned long flags, ctx; + struct scatterlist *sg; + struct strbuf *strbuf; + struct iommu *iommu; + + BUG_ON(direction == DMA_NONE); + + iommu = dev->archdata.iommu; + strbuf = dev->archdata.stc; + + ctx = fetch_sg_ctx(iommu, sglist); + + spin_lock_irqsave(&iommu->lock, flags); + + sg = sglist; + while (nelems--) { + dma_addr_t dma_handle = sg->dma_address; + unsigned int len = sg->dma_length; + unsigned long npages, entry; + iopte_t *base; + int i; + + if (!len) + break; + npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE); + iommu_range_free(iommu, dma_handle, npages); + + entry = ((dma_handle - iommu->page_table_map_base) + >> IO_PAGE_SHIFT); + base = iommu->page_table + entry; + + dma_handle &= IO_PAGE_MASK; + if (strbuf->strbuf_enabled) + strbuf_flush(strbuf, iommu, dma_handle, ctx, + npages, direction); + + for (i = 0; i < npages; i++) + iopte_make_dummy(iommu, base + i); + + sg = sg_next(sg); + } + + iommu_free_ctx(iommu, ctx); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static void dma_4u_sync_single_for_cpu(struct device *dev, + dma_addr_t bus_addr, size_t sz, + enum dma_data_direction direction) +{ + struct iommu *iommu; + struct strbuf *strbuf; + unsigned long flags, ctx, npages; + + iommu = dev->archdata.iommu; + strbuf = dev->archdata.stc; + + if (!strbuf->strbuf_enabled) + return; + + spin_lock_irqsave(&iommu->lock, flags); + + npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); + npages >>= IO_PAGE_SHIFT; + bus_addr &= IO_PAGE_MASK; + + /* Step 1: Record the context, if any. */ + ctx = 0; + if (iommu->iommu_ctxflush && + strbuf->strbuf_ctxflush) { + iopte_t *iopte; + + iopte = iommu->page_table + + ((bus_addr - iommu->page_table_map_base)>>IO_PAGE_SHIFT); + ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; + } + + /* Step 2: Kick data out of streaming buffers. */ + strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static void dma_4u_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sglist, int nelems, + enum dma_data_direction direction) +{ + struct iommu *iommu; + struct strbuf *strbuf; + unsigned long flags, ctx, npages, i; + struct scatterlist *sg, *sgprv; + u32 bus_addr; + + iommu = dev->archdata.iommu; + strbuf = dev->archdata.stc; + + if (!strbuf->strbuf_enabled) + return; + + spin_lock_irqsave(&iommu->lock, flags); + + /* Step 1: Record the context, if any. */ + ctx = 0; + if (iommu->iommu_ctxflush && + strbuf->strbuf_ctxflush) { + iopte_t *iopte; + + iopte = iommu->page_table + + ((sglist[0].dma_address - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; + } + + /* Step 2: Kick data out of streaming buffers. */ + bus_addr = sglist[0].dma_address & IO_PAGE_MASK; + sgprv = NULL; + for_each_sg(sglist, sg, nelems, i) { + if (sg->dma_length == 0) + break; + sgprv = sg; + } + + npages = (IO_PAGE_ALIGN(sgprv->dma_address + sgprv->dma_length) + - bus_addr) >> IO_PAGE_SHIFT; + strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static const struct dma_ops sun4u_dma_ops = { + .alloc_coherent = dma_4u_alloc_coherent, + .free_coherent = dma_4u_free_coherent, + .map_single = dma_4u_map_single, + .unmap_single = dma_4u_unmap_single, + .map_sg = dma_4u_map_sg, + .unmap_sg = dma_4u_unmap_sg, + .sync_single_for_cpu = dma_4u_sync_single_for_cpu, + .sync_sg_for_cpu = dma_4u_sync_sg_for_cpu, +}; + +const struct dma_ops *dma_ops = &sun4u_dma_ops; +EXPORT_SYMBOL(dma_ops); + +int dma_supported(struct device *dev, u64 device_mask) +{ + struct iommu *iommu = dev->archdata.iommu; + u64 dma_addr_mask = iommu->dma_addr_mask; + + if (device_mask >= (1UL << 32UL)) + return 0; + + if ((device_mask & dma_addr_mask) == dma_addr_mask) + return 1; + +#ifdef CONFIG_PCI + if (dev->bus == &pci_bus_type) + return pci_dma_supported(to_pci_dev(dev), device_mask); +#endif + + return 0; +} +EXPORT_SYMBOL(dma_supported); + +int dma_set_mask(struct device *dev, u64 dma_mask) +{ +#ifdef CONFIG_PCI + if (dev->bus == &pci_bus_type) + return pci_set_dma_mask(to_pci_dev(dev), dma_mask); +#endif + return -EINVAL; +} +EXPORT_SYMBOL(dma_set_mask); diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h new file mode 100644 index 000000000000..591f5879039c --- /dev/null +++ b/arch/sparc/kernel/iommu_common.h @@ -0,0 +1,59 @@ +/* iommu_common.h: UltraSparc SBUS/PCI common iommu declarations. + * + * Copyright (C) 1999, 2008 David S. Miller (davem@davemloft.net) + */ + +#ifndef _IOMMU_COMMON_H +#define _IOMMU_COMMON_H + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/scatterlist.h> +#include <linux/device.h> +#include <linux/iommu-helper.h> + +#include <asm/iommu.h> +#include <asm/scatterlist.h> + +/* + * These give mapping size of each iommu pte/tlb. + */ +#define IO_PAGE_SHIFT 13 +#define IO_PAGE_SIZE (1UL << IO_PAGE_SHIFT) +#define IO_PAGE_MASK (~(IO_PAGE_SIZE-1)) +#define IO_PAGE_ALIGN(addr) ALIGN(addr, IO_PAGE_SIZE) + +#define IO_TSB_ENTRIES (128*1024) +#define IO_TSB_SIZE (IO_TSB_ENTRIES * 8) + +/* + * This is the hardwired shift in the iotlb tag/data parts. + */ +#define IOMMU_PAGE_SHIFT 13 + +#define SG_ENT_PHYS_ADDRESS(SG) (__pa(sg_virt((SG)))) + +static inline int is_span_boundary(unsigned long entry, + unsigned long shift, + unsigned long boundary_size, + struct scatterlist *outs, + struct scatterlist *sg) +{ + unsigned long paddr = SG_ENT_PHYS_ADDRESS(outs); + int nr = iommu_num_pages(paddr, outs->dma_length + sg->length, + IO_PAGE_SIZE); + + return iommu_is_span_boundary(entry, nr, shift, boundary_size); +} + +extern unsigned long iommu_range_alloc(struct device *dev, + struct iommu *iommu, + unsigned long npages, + unsigned long *handle); +extern void iommu_range_free(struct iommu *iommu, + dma_addr_t dma_addr, + unsigned long npages); + +#endif /* _IOMMU_COMMON_H */ diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c new file mode 100644 index 000000000000..a3ea2bcb95de --- /dev/null +++ b/arch/sparc/kernel/irq_64.c @@ -0,0 +1,1101 @@ +/* irq.c: UltraSparc IRQ handling/init/registry. + * + * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net) + * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) + * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz) + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/linkage.h> +#include <linux/ptrace.h> +#include <linux/errno.h> +#include <linux/kernel_stat.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/random.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/bootmem.h> +#include <linux/irq.h> + +#include <asm/ptrace.h> +#include <asm/processor.h> +#include <asm/atomic.h> +#include <asm/system.h> +#include <asm/irq.h> +#include <asm/io.h> +#include <asm/iommu.h> +#include <asm/upa.h> +#include <asm/oplib.h> +#include <asm/prom.h> +#include <asm/timer.h> +#include <asm/smp.h> +#include <asm/starfire.h> +#include <asm/uaccess.h> +#include <asm/cache.h> +#include <asm/cpudata.h> +#include <asm/auxio.h> +#include <asm/head.h> +#include <asm/hypervisor.h> +#include <asm/cacheflush.h> + +#include "entry.h" + +#define NUM_IVECS (IMAP_INR + 1) + +struct ino_bucket *ivector_table; +unsigned long ivector_table_pa; + +/* On several sun4u processors, it is illegal to mix bypass and + * non-bypass accesses. Therefore we access all INO buckets + * using bypass accesses only. + */ +static unsigned long bucket_get_chain_pa(unsigned long bucket_pa) +{ + unsigned long ret; + + __asm__ __volatile__("ldxa [%1] %2, %0" + : "=&r" (ret) + : "r" (bucket_pa + + offsetof(struct ino_bucket, + __irq_chain_pa)), + "i" (ASI_PHYS_USE_EC)); + + return ret; +} + +static void bucket_clear_chain_pa(unsigned long bucket_pa) +{ + __asm__ __volatile__("stxa %%g0, [%0] %1" + : /* no outputs */ + : "r" (bucket_pa + + offsetof(struct ino_bucket, + __irq_chain_pa)), + "i" (ASI_PHYS_USE_EC)); +} + +static unsigned int bucket_get_virt_irq(unsigned long bucket_pa) +{ + unsigned int ret; + + __asm__ __volatile__("lduwa [%1] %2, %0" + : "=&r" (ret) + : "r" (bucket_pa + + offsetof(struct ino_bucket, + __virt_irq)), + "i" (ASI_PHYS_USE_EC)); + + return ret; +} + +static void bucket_set_virt_irq(unsigned long bucket_pa, + unsigned int virt_irq) +{ + __asm__ __volatile__("stwa %0, [%1] %2" + : /* no outputs */ + : "r" (virt_irq), + "r" (bucket_pa + + offsetof(struct ino_bucket, + __virt_irq)), + "i" (ASI_PHYS_USE_EC)); +} + +#define irq_work_pa(__cpu) &(trap_block[(__cpu)].irq_worklist_pa) + +static struct { + unsigned int dev_handle; + unsigned int dev_ino; + unsigned int in_use; +} virt_irq_table[NR_IRQS]; +static DEFINE_SPINLOCK(virt_irq_alloc_lock); + +unsigned char virt_irq_alloc(unsigned int dev_handle, + unsigned int dev_ino) +{ + unsigned long flags; + unsigned char ent; + + BUILD_BUG_ON(NR_IRQS >= 256); + + spin_lock_irqsave(&virt_irq_alloc_lock, flags); + + for (ent = 1; ent < NR_IRQS; ent++) { + if (!virt_irq_table[ent].in_use) + break; + } + if (ent >= NR_IRQS) { + printk(KERN_ERR "IRQ: Out of virtual IRQs.\n"); + ent = 0; + } else { + virt_irq_table[ent].dev_handle = dev_handle; + virt_irq_table[ent].dev_ino = dev_ino; + virt_irq_table[ent].in_use = 1; + } + + spin_unlock_irqrestore(&virt_irq_alloc_lock, flags); + + return ent; +} + +#ifdef CONFIG_PCI_MSI +void virt_irq_free(unsigned int virt_irq) +{ + unsigned long flags; + + if (virt_irq >= NR_IRQS) + return; + + spin_lock_irqsave(&virt_irq_alloc_lock, flags); + + virt_irq_table[virt_irq].in_use = 0; + + spin_unlock_irqrestore(&virt_irq_alloc_lock, flags); +} +#endif + +/* + * /proc/interrupts printing: + */ + +int show_interrupts(struct seq_file *p, void *v) +{ + int i = *(loff_t *) v, j; + struct irqaction * action; + unsigned long flags; + + if (i == 0) { + seq_printf(p, " "); + for_each_online_cpu(j) + seq_printf(p, "CPU%d ",j); + seq_putc(p, '\n'); + } + + if (i < NR_IRQS) { + spin_lock_irqsave(&irq_desc[i].lock, flags); + action = irq_desc[i].action; + if (!action) + goto skip; + seq_printf(p, "%3d: ",i); +#ifndef CONFIG_SMP + seq_printf(p, "%10u ", kstat_irqs(i)); +#else + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); +#endif + seq_printf(p, " %9s", irq_desc[i].chip->typename); + seq_printf(p, " %s", action->name); + + for (action=action->next; action; action = action->next) + seq_printf(p, ", %s", action->name); + + seq_putc(p, '\n'); +skip: + spin_unlock_irqrestore(&irq_desc[i].lock, flags); + } + return 0; +} + +static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid) +{ + unsigned int tid; + + if (this_is_starfire) { + tid = starfire_translate(imap, cpuid); + tid <<= IMAP_TID_SHIFT; + tid &= IMAP_TID_UPA; + } else { + if (tlb_type == cheetah || tlb_type == cheetah_plus) { + unsigned long ver; + + __asm__ ("rdpr %%ver, %0" : "=r" (ver)); + if ((ver >> 32UL) == __JALAPENO_ID || + (ver >> 32UL) == __SERRANO_ID) { + tid = cpuid << IMAP_TID_SHIFT; + tid &= IMAP_TID_JBUS; + } else { + unsigned int a = cpuid & 0x1f; + unsigned int n = (cpuid >> 5) & 0x1f; + + tid = ((a << IMAP_AID_SHIFT) | + (n << IMAP_NID_SHIFT)); + tid &= (IMAP_AID_SAFARI | + IMAP_NID_SAFARI);; + } + } else { + tid = cpuid << IMAP_TID_SHIFT; + tid &= IMAP_TID_UPA; + } + } + + return tid; +} + +struct irq_handler_data { + unsigned long iclr; + unsigned long imap; + + void (*pre_handler)(unsigned int, void *, void *); + void *arg1; + void *arg2; +}; + +#ifdef CONFIG_SMP +static int irq_choose_cpu(unsigned int virt_irq) +{ + cpumask_t mask = irq_desc[virt_irq].affinity; + int cpuid; + + if (cpus_equal(mask, CPU_MASK_ALL)) { + static int irq_rover; + static DEFINE_SPINLOCK(irq_rover_lock); + unsigned long flags; + + /* Round-robin distribution... */ + do_round_robin: + spin_lock_irqsave(&irq_rover_lock, flags); + + while (!cpu_online(irq_rover)) { + if (++irq_rover >= NR_CPUS) + irq_rover = 0; + } + cpuid = irq_rover; + do { + if (++irq_rover >= NR_CPUS) + irq_rover = 0; + } while (!cpu_online(irq_rover)); + + spin_unlock_irqrestore(&irq_rover_lock, flags); + } else { + cpumask_t tmp; + + cpus_and(tmp, cpu_online_map, mask); + + if (cpus_empty(tmp)) + goto do_round_robin; + + cpuid = first_cpu(tmp); + } + + return cpuid; +} +#else +static int irq_choose_cpu(unsigned int virt_irq) +{ + return real_hard_smp_processor_id(); +} +#endif + +static void sun4u_irq_enable(unsigned int virt_irq) +{ + struct irq_handler_data *data = get_irq_chip_data(virt_irq); + + if (likely(data)) { + unsigned long cpuid, imap, val; + unsigned int tid; + + cpuid = irq_choose_cpu(virt_irq); + imap = data->imap; + + tid = sun4u_compute_tid(imap, cpuid); + + val = upa_readq(imap); + val &= ~(IMAP_TID_UPA | IMAP_TID_JBUS | + IMAP_AID_SAFARI | IMAP_NID_SAFARI); + val |= tid | IMAP_VALID; + upa_writeq(val, imap); + upa_writeq(ICLR_IDLE, data->iclr); + } +} + +static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask) +{ + sun4u_irq_enable(virt_irq); +} + +static void sun4u_irq_disable(unsigned int virt_irq) +{ + struct irq_handler_data *data = get_irq_chip_data(virt_irq); + + if (likely(data)) { + unsigned long imap = data->imap; + unsigned long tmp = upa_readq(imap); + + tmp &= ~IMAP_VALID; + upa_writeq(tmp, imap); + } +} + +static void sun4u_irq_eoi(unsigned int virt_irq) +{ + struct irq_handler_data *data = get_irq_chip_data(virt_irq); + struct irq_desc *desc = irq_desc + virt_irq; + + if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS))) + return; + + if (likely(data)) + upa_writeq(ICLR_IDLE, data->iclr); +} + +static void sun4v_irq_enable(unsigned int virt_irq) +{ + unsigned int ino = virt_irq_table[virt_irq].dev_ino; + unsigned long cpuid = irq_choose_cpu(virt_irq); + int err; + + err = sun4v_intr_settarget(ino, cpuid); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): " + "err(%d)\n", ino, cpuid, err); + err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_intr_setstate(%x): " + "err(%d)\n", ino, err); + err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_intr_setenabled(%x): err(%d)\n", + ino, err); +} + +static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask) +{ + unsigned int ino = virt_irq_table[virt_irq].dev_ino; + unsigned long cpuid = irq_choose_cpu(virt_irq); + int err; + + err = sun4v_intr_settarget(ino, cpuid); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): " + "err(%d)\n", ino, cpuid, err); +} + +static void sun4v_irq_disable(unsigned int virt_irq) +{ + unsigned int ino = virt_irq_table[virt_irq].dev_ino; + int err; + + err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_intr_setenabled(%x): " + "err(%d)\n", ino, err); +} + +static void sun4v_irq_eoi(unsigned int virt_irq) +{ + unsigned int ino = virt_irq_table[virt_irq].dev_ino; + struct irq_desc *desc = irq_desc + virt_irq; + int err; + + if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS))) + return; + + err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_intr_setstate(%x): " + "err(%d)\n", ino, err); +} + +static void sun4v_virq_enable(unsigned int virt_irq) +{ + unsigned long cpuid, dev_handle, dev_ino; + int err; + + cpuid = irq_choose_cpu(virt_irq); + + dev_handle = virt_irq_table[virt_irq].dev_handle; + dev_ino = virt_irq_table[virt_irq].dev_ino; + + err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " + "err(%d)\n", + dev_handle, dev_ino, cpuid, err); + err = sun4v_vintr_set_state(dev_handle, dev_ino, + HV_INTR_STATE_IDLE); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx," + "HV_INTR_STATE_IDLE): err(%d)\n", + dev_handle, dev_ino, err); + err = sun4v_vintr_set_valid(dev_handle, dev_ino, + HV_INTR_ENABLED); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx," + "HV_INTR_ENABLED): err(%d)\n", + dev_handle, dev_ino, err); +} + +static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask) +{ + unsigned long cpuid, dev_handle, dev_ino; + int err; + + cpuid = irq_choose_cpu(virt_irq); + + dev_handle = virt_irq_table[virt_irq].dev_handle; + dev_ino = virt_irq_table[virt_irq].dev_ino; + + err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " + "err(%d)\n", + dev_handle, dev_ino, cpuid, err); +} + +static void sun4v_virq_disable(unsigned int virt_irq) +{ + unsigned long dev_handle, dev_ino; + int err; + + dev_handle = virt_irq_table[virt_irq].dev_handle; + dev_ino = virt_irq_table[virt_irq].dev_ino; + + err = sun4v_vintr_set_valid(dev_handle, dev_ino, + HV_INTR_DISABLED); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx," + "HV_INTR_DISABLED): err(%d)\n", + dev_handle, dev_ino, err); +} + +static void sun4v_virq_eoi(unsigned int virt_irq) +{ + struct irq_desc *desc = irq_desc + virt_irq; + unsigned long dev_handle, dev_ino; + int err; + + if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS))) + return; + + dev_handle = virt_irq_table[virt_irq].dev_handle; + dev_ino = virt_irq_table[virt_irq].dev_ino; + + err = sun4v_vintr_set_state(dev_handle, dev_ino, + HV_INTR_STATE_IDLE); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx," + "HV_INTR_STATE_IDLE): err(%d)\n", + dev_handle, dev_ino, err); +} + +static struct irq_chip sun4u_irq = { + .typename = "sun4u", + .enable = sun4u_irq_enable, + .disable = sun4u_irq_disable, + .eoi = sun4u_irq_eoi, + .set_affinity = sun4u_set_affinity, +}; + +static struct irq_chip sun4v_irq = { + .typename = "sun4v", + .enable = sun4v_irq_enable, + .disable = sun4v_irq_disable, + .eoi = sun4v_irq_eoi, + .set_affinity = sun4v_set_affinity, +}; + +static struct irq_chip sun4v_virq = { + .typename = "vsun4v", + .enable = sun4v_virq_enable, + .disable = sun4v_virq_disable, + .eoi = sun4v_virq_eoi, + .set_affinity = sun4v_virt_set_affinity, +}; + +static void pre_flow_handler(unsigned int virt_irq, + struct irq_desc *desc) +{ + struct irq_handler_data *data = get_irq_chip_data(virt_irq); + unsigned int ino = virt_irq_table[virt_irq].dev_ino; + + data->pre_handler(ino, data->arg1, data->arg2); + + handle_fasteoi_irq(virt_irq, desc); +} + +void irq_install_pre_handler(int virt_irq, + void (*func)(unsigned int, void *, void *), + void *arg1, void *arg2) +{ + struct irq_handler_data *data = get_irq_chip_data(virt_irq); + struct irq_desc *desc = irq_desc + virt_irq; + + data->pre_handler = func; + data->arg1 = arg1; + data->arg2 = arg2; + + desc->handle_irq = pre_flow_handler; +} + +unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap) +{ + struct ino_bucket *bucket; + struct irq_handler_data *data; + unsigned int virt_irq; + int ino; + + BUG_ON(tlb_type == hypervisor); + + ino = (upa_readq(imap) & (IMAP_IGN | IMAP_INO)) + inofixup; + bucket = &ivector_table[ino]; + virt_irq = bucket_get_virt_irq(__pa(bucket)); + if (!virt_irq) { + virt_irq = virt_irq_alloc(0, ino); + bucket_set_virt_irq(__pa(bucket), virt_irq); + set_irq_chip_and_handler_name(virt_irq, + &sun4u_irq, + handle_fasteoi_irq, + "IVEC"); + } + + data = get_irq_chip_data(virt_irq); + if (unlikely(data)) + goto out; + + data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); + if (unlikely(!data)) { + prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n"); + prom_halt(); + } + set_irq_chip_data(virt_irq, data); + + data->imap = imap; + data->iclr = iclr; + +out: + return virt_irq; +} + +static unsigned int sun4v_build_common(unsigned long sysino, + struct irq_chip *chip) +{ + struct ino_bucket *bucket; + struct irq_handler_data *data; + unsigned int virt_irq; + + BUG_ON(tlb_type != hypervisor); + + bucket = &ivector_table[sysino]; + virt_irq = bucket_get_virt_irq(__pa(bucket)); + if (!virt_irq) { + virt_irq = virt_irq_alloc(0, sysino); + bucket_set_virt_irq(__pa(bucket), virt_irq); + set_irq_chip_and_handler_name(virt_irq, chip, + handle_fasteoi_irq, + "IVEC"); + } + + data = get_irq_chip_data(virt_irq); + if (unlikely(data)) + goto out; + + data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); + if (unlikely(!data)) { + prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n"); + prom_halt(); + } + set_irq_chip_data(virt_irq, data); + + /* Catch accidental accesses to these things. IMAP/ICLR handling + * is done by hypervisor calls on sun4v platforms, not by direct + * register accesses. + */ + data->imap = ~0UL; + data->iclr = ~0UL; + +out: + return virt_irq; +} + +unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino) +{ + unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino); + + return sun4v_build_common(sysino, &sun4v_irq); +} + +unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino) +{ + struct irq_handler_data *data; + unsigned long hv_err, cookie; + struct ino_bucket *bucket; + struct irq_desc *desc; + unsigned int virt_irq; + + bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC); + if (unlikely(!bucket)) + return 0; + __flush_dcache_range((unsigned long) bucket, + ((unsigned long) bucket + + sizeof(struct ino_bucket))); + + virt_irq = virt_irq_alloc(devhandle, devino); + bucket_set_virt_irq(__pa(bucket), virt_irq); + + set_irq_chip_and_handler_name(virt_irq, &sun4v_virq, + handle_fasteoi_irq, + "IVEC"); + + data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); + if (unlikely(!data)) + return 0; + + /* In order to make the LDC channel startup sequence easier, + * especially wrt. locking, we do not let request_irq() enable + * the interrupt. + */ + desc = irq_desc + virt_irq; + desc->status |= IRQ_NOAUTOEN; + + set_irq_chip_data(virt_irq, data); + + /* Catch accidental accesses to these things. IMAP/ICLR handling + * is done by hypervisor calls on sun4v platforms, not by direct + * register accesses. + */ + data->imap = ~0UL; + data->iclr = ~0UL; + + cookie = ~__pa(bucket); + hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie); + if (hv_err) { + prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] " + "err=%lu\n", devhandle, devino, hv_err); + prom_halt(); + } + + return virt_irq; +} + +void ack_bad_irq(unsigned int virt_irq) +{ + unsigned int ino = virt_irq_table[virt_irq].dev_ino; + + if (!ino) + ino = 0xdeadbeef; + + printk(KERN_CRIT "Unexpected IRQ from ino[%x] virt_irq[%u]\n", + ino, virt_irq); +} + +void *hardirq_stack[NR_CPUS]; +void *softirq_stack[NR_CPUS]; + +static __attribute__((always_inline)) void *set_hardirq_stack(void) +{ + void *orig_sp, *sp = hardirq_stack[smp_processor_id()]; + + __asm__ __volatile__("mov %%sp, %0" : "=r" (orig_sp)); + if (orig_sp < sp || + orig_sp > (sp + THREAD_SIZE)) { + sp += THREAD_SIZE - 192 - STACK_BIAS; + __asm__ __volatile__("mov %0, %%sp" : : "r" (sp)); + } + + return orig_sp; +} +static __attribute__((always_inline)) void restore_hardirq_stack(void *orig_sp) +{ + __asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp)); +} + +void handler_irq(int irq, struct pt_regs *regs) +{ + unsigned long pstate, bucket_pa; + struct pt_regs *old_regs; + void *orig_sp; + + clear_softint(1 << irq); + + old_regs = set_irq_regs(regs); + irq_enter(); + + /* Grab an atomic snapshot of the pending IVECs. */ + __asm__ __volatile__("rdpr %%pstate, %0\n\t" + "wrpr %0, %3, %%pstate\n\t" + "ldx [%2], %1\n\t" + "stx %%g0, [%2]\n\t" + "wrpr %0, 0x0, %%pstate\n\t" + : "=&r" (pstate), "=&r" (bucket_pa) + : "r" (irq_work_pa(smp_processor_id())), + "i" (PSTATE_IE) + : "memory"); + + orig_sp = set_hardirq_stack(); + + while (bucket_pa) { + struct irq_desc *desc; + unsigned long next_pa; + unsigned int virt_irq; + + next_pa = bucket_get_chain_pa(bucket_pa); + virt_irq = bucket_get_virt_irq(bucket_pa); + bucket_clear_chain_pa(bucket_pa); + + desc = irq_desc + virt_irq; + + desc->handle_irq(virt_irq, desc); + + bucket_pa = next_pa; + } + + restore_hardirq_stack(orig_sp); + + irq_exit(); + set_irq_regs(old_regs); +} + +void do_softirq(void) +{ + unsigned long flags; + + if (in_interrupt()) + return; + + local_irq_save(flags); + + if (local_softirq_pending()) { + void *orig_sp, *sp = softirq_stack[smp_processor_id()]; + + sp += THREAD_SIZE - 192 - STACK_BIAS; + + __asm__ __volatile__("mov %%sp, %0\n\t" + "mov %1, %%sp" + : "=&r" (orig_sp) + : "r" (sp)); + __do_softirq(); + __asm__ __volatile__("mov %0, %%sp" + : : "r" (orig_sp)); + } + + local_irq_restore(flags); +} + +static void unhandled_perf_irq(struct pt_regs *regs) +{ + unsigned long pcr, pic; + + read_pcr(pcr); + read_pic(pic); + + write_pcr(0); + + printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n", + smp_processor_id()); + printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n", + smp_processor_id(), pcr, pic); +} + +/* Almost a direct copy of the powerpc PMC code. */ +static DEFINE_SPINLOCK(perf_irq_lock); +static void *perf_irq_owner_caller; /* mostly for debugging */ +static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq; + +/* Invoked from level 15 PIL handler in trap table. */ +void perfctr_irq(int irq, struct pt_regs *regs) +{ + clear_softint(1 << irq); + perf_irq(regs); +} + +int register_perfctr_intr(void (*handler)(struct pt_regs *)) +{ + int ret; + + if (!handler) + return -EINVAL; + + spin_lock(&perf_irq_lock); + if (perf_irq != unhandled_perf_irq) { + printk(KERN_WARNING "register_perfctr_intr: " + "perf IRQ busy (reserved by caller %p)\n", + perf_irq_owner_caller); + ret = -EBUSY; + goto out; + } + + perf_irq_owner_caller = __builtin_return_address(0); + perf_irq = handler; + + ret = 0; +out: + spin_unlock(&perf_irq_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(register_perfctr_intr); + +void release_perfctr_intr(void (*handler)(struct pt_regs *)) +{ + spin_lock(&perf_irq_lock); + perf_irq_owner_caller = NULL; + perf_irq = unhandled_perf_irq; + spin_unlock(&perf_irq_lock); +} +EXPORT_SYMBOL_GPL(release_perfctr_intr); + +#ifdef CONFIG_HOTPLUG_CPU +void fixup_irqs(void) +{ + unsigned int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + unsigned long flags; + + spin_lock_irqsave(&irq_desc[irq].lock, flags); + if (irq_desc[irq].action && + !(irq_desc[irq].status & IRQ_PER_CPU)) { + if (irq_desc[irq].chip->set_affinity) + irq_desc[irq].chip->set_affinity(irq, + irq_desc[irq].affinity); + } + spin_unlock_irqrestore(&irq_desc[irq].lock, flags); + } + + tick_ops->disable_irq(); +} +#endif + +struct sun5_timer { + u64 count0; + u64 limit0; + u64 count1; + u64 limit1; +}; + +static struct sun5_timer *prom_timers; +static u64 prom_limit0, prom_limit1; + +static void map_prom_timers(void) +{ + struct device_node *dp; + const unsigned int *addr; + + /* PROM timer node hangs out in the top level of device siblings... */ + dp = of_find_node_by_path("/"); + dp = dp->child; + while (dp) { + if (!strcmp(dp->name, "counter-timer")) + break; + dp = dp->sibling; + } + + /* Assume if node is not present, PROM uses different tick mechanism + * which we should not care about. + */ + if (!dp) { + prom_timers = (struct sun5_timer *) 0; + return; + } + + /* If PROM is really using this, it must be mapped by him. */ + addr = of_get_property(dp, "address", NULL); + if (!addr) { + prom_printf("PROM does not have timer mapped, trying to continue.\n"); + prom_timers = (struct sun5_timer *) 0; + return; + } + prom_timers = (struct sun5_timer *) ((unsigned long)addr[0]); +} + +static void kill_prom_timer(void) +{ + if (!prom_timers) + return; + + /* Save them away for later. */ + prom_limit0 = prom_timers->limit0; + prom_limit1 = prom_timers->limit1; + + /* Just as in sun4c/sun4m PROM uses timer which ticks at IRQ 14. + * We turn both off here just to be paranoid. + */ + prom_timers->limit0 = 0; + prom_timers->limit1 = 0; + + /* Wheee, eat the interrupt packet too... */ + __asm__ __volatile__( +" mov 0x40, %%g2\n" +" ldxa [%%g0] %0, %%g1\n" +" ldxa [%%g2] %1, %%g1\n" +" stxa %%g0, [%%g0] %0\n" +" membar #Sync\n" + : /* no outputs */ + : "i" (ASI_INTR_RECEIVE), "i" (ASI_INTR_R) + : "g1", "g2"); +} + +void notrace init_irqwork_curcpu(void) +{ + int cpu = hard_smp_processor_id(); + + trap_block[cpu].irq_worklist_pa = 0UL; +} + +/* Please be very careful with register_one_mondo() and + * sun4v_register_mondo_queues(). + * + * On SMP this gets invoked from the CPU trampoline before + * the cpu has fully taken over the trap table from OBP, + * and it's kernel stack + %g6 thread register state is + * not fully cooked yet. + * + * Therefore you cannot make any OBP calls, not even prom_printf, + * from these two routines. + */ +static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type, unsigned long qmask) +{ + unsigned long num_entries = (qmask + 1) / 64; + unsigned long status; + + status = sun4v_cpu_qconf(type, paddr, num_entries); + if (status != HV_EOK) { + prom_printf("SUN4V: sun4v_cpu_qconf(%lu:%lx:%lu) failed, " + "err %lu\n", type, paddr, num_entries, status); + prom_halt(); + } +} + +void __cpuinit notrace sun4v_register_mondo_queues(int this_cpu) +{ + struct trap_per_cpu *tb = &trap_block[this_cpu]; + + register_one_mondo(tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO, + tb->cpu_mondo_qmask); + register_one_mondo(tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO, + tb->dev_mondo_qmask); + register_one_mondo(tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR, + tb->resum_qmask); + register_one_mondo(tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR, + tb->nonresum_qmask); +} + +static void __init alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask) +{ + unsigned long size = PAGE_ALIGN(qmask + 1); + void *p = __alloc_bootmem(size, size, 0); + if (!p) { + prom_printf("SUN4V: Error, cannot allocate mondo queue.\n"); + prom_halt(); + } + + *pa_ptr = __pa(p); +} + +static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask) +{ + unsigned long size = PAGE_ALIGN(qmask + 1); + void *p = __alloc_bootmem(size, size, 0); + + if (!p) { + prom_printf("SUN4V: Error, cannot allocate kbuf page.\n"); + prom_halt(); + } + + *pa_ptr = __pa(p); +} + +static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb) +{ +#ifdef CONFIG_SMP + void *page; + + BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64)); + + page = alloc_bootmem_pages(PAGE_SIZE); + if (!page) { + prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n"); + prom_halt(); + } + + tb->cpu_mondo_block_pa = __pa(page); + tb->cpu_list_pa = __pa(page + 64); +#endif +} + +/* Allocate mondo and error queues for all possible cpus. */ +static void __init sun4v_init_mondo_queues(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct trap_per_cpu *tb = &trap_block[cpu]; + + alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask); + alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask); + alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask); + alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask); + alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask); + alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, + tb->nonresum_qmask); + } +} + +static void __init init_send_mondo_info(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct trap_per_cpu *tb = &trap_block[cpu]; + + init_cpu_send_mondo_info(tb); + } +} + +static struct irqaction timer_irq_action = { + .name = "timer", +}; + +/* Only invoked on boot processor. */ +void __init init_IRQ(void) +{ + unsigned long size; + + map_prom_timers(); + kill_prom_timer(); + + size = sizeof(struct ino_bucket) * NUM_IVECS; + ivector_table = alloc_bootmem(size); + if (!ivector_table) { + prom_printf("Fatal error, cannot allocate ivector_table\n"); + prom_halt(); + } + __flush_dcache_range((unsigned long) ivector_table, + ((unsigned long) ivector_table) + size); + + ivector_table_pa = __pa(ivector_table); + + if (tlb_type == hypervisor) + sun4v_init_mondo_queues(); + + init_send_mondo_info(); + + if (tlb_type == hypervisor) { + /* Load up the boot cpu's entries. */ + sun4v_register_mondo_queues(hard_smp_processor_id()); + } + + /* We need to clear any IRQ's pending in the soft interrupt + * registers, a spurious one could be left around from the + * PROM timer which we just disabled. + */ + clear_softint(get_softint()); + + /* Now that ivector table is initialized, it is safe + * to receive IRQ vector traps. We will normally take + * one or two right now, in case some device PROM used + * to boot us wants to speak to us. We just ignore them. + */ + __asm__ __volatile__("rdpr %%pstate, %%g1\n\t" + "or %%g1, %0, %%g1\n\t" + "wrpr %%g1, 0x0, %%pstate" + : /* No outputs */ + : "i" (PSTATE_IE) + : "g1"); + + irq_desc[0].action = &timer_irq_action; +} diff --git a/arch/sparc/kernel/itlb_miss.S b/arch/sparc/kernel/itlb_miss.S new file mode 100644 index 000000000000..5a8377b54955 --- /dev/null +++ b/arch/sparc/kernel/itlb_miss.S @@ -0,0 +1,39 @@ +/* ITLB ** ICACHE line 1: Context 0 check and TSB load */ + ldxa [%g0] ASI_IMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer + ldxa [%g0] ASI_IMMU, %g6 ! Get TAG TARGET + srlx %g6, 48, %g5 ! Get context + sllx %g6, 22, %g6 ! Zero out context + brz,pn %g5, kvmap_itlb ! Context 0 processing + srlx %g6, 22, %g6 ! Delay slot + TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry + cmp %g4, %g6 ! Compare TAG + +/* ITLB ** ICACHE line 2: TSB compare and TLB load */ + bne,pn %xcc, tsb_miss_itlb ! Miss + mov FAULT_CODE_ITLB, %g3 + sethi %hi(_PAGE_EXEC_4U), %g4 + andcc %g5, %g4, %g0 ! Executable? + be,pn %xcc, tsb_do_fault + nop ! Delay slot, fill me + stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load TLB + retry ! Trap done + +/* ITLB ** ICACHE line 3: */ + nop + nop + nop + nop + nop + nop + nop + nop + +/* ITLB ** ICACHE line 4: */ + nop + nop + nop + nop + nop + nop + nop + nop diff --git a/arch/sparc/kernel/ivec.S b/arch/sparc/kernel/ivec.S new file mode 100644 index 000000000000..d29f92ebca5e --- /dev/null +++ b/arch/sparc/kernel/ivec.S @@ -0,0 +1,51 @@ + /* The registers for cross calls will be: + * + * DATA 0: [low 32-bits] Address of function to call, jmp to this + * [high 32-bits] MMU Context Argument 0, place in %g5 + * DATA 1: Address Argument 1, place in %g1 + * DATA 2: Address Argument 2, place in %g7 + * + * With this method we can do most of the cross-call tlb/cache + * flushing very quickly. + */ + .align 32 + .globl do_ivec + .type do_ivec,#function +do_ivec: + mov 0x40, %g3 + ldxa [%g3 + %g0] ASI_INTR_R, %g3 + sethi %hi(KERNBASE), %g4 + cmp %g3, %g4 + bgeu,pn %xcc, do_ivec_xcall + srlx %g3, 32, %g5 + stxa %g0, [%g0] ASI_INTR_RECEIVE + membar #Sync + + sethi %hi(ivector_table_pa), %g2 + ldx [%g2 + %lo(ivector_table_pa)], %g2 + sllx %g3, 4, %g3 + add %g2, %g3, %g3 + + TRAP_LOAD_IRQ_WORK_PA(%g6, %g1) + + ldx [%g6], %g5 + stxa %g5, [%g3] ASI_PHYS_USE_EC + stx %g3, [%g6] + wr %g0, 1 << PIL_DEVICE_IRQ, %set_softint + retry +do_ivec_xcall: + mov 0x50, %g1 + ldxa [%g1 + %g0] ASI_INTR_R, %g1 + srl %g3, 0, %g3 + + mov 0x60, %g7 + ldxa [%g7 + %g0] ASI_INTR_R, %g7 + stxa %g0, [%g0] ASI_INTR_RECEIVE + membar #Sync + ba,pt %xcc, 1f + nop + + .align 32 +1: jmpl %g3, %g0 + nop + .size do_ivec,.-do_ivec diff --git a/arch/sparc/kernel/kgdb_64.c b/arch/sparc/kernel/kgdb_64.c new file mode 100644 index 000000000000..fefbe6dc51be --- /dev/null +++ b/arch/sparc/kernel/kgdb_64.c @@ -0,0 +1,186 @@ +/* kgdb.c: KGDB support for 64-bit sparc. + * + * Copyright (C) 2008 David S. Miller <davem@davemloft.net> + */ + +#include <linux/kgdb.h> +#include <linux/kdebug.h> + +#include <asm/kdebug.h> +#include <asm/ptrace.h> +#include <asm/irq.h> + +void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + struct reg_window *win; + int i; + + gdb_regs[GDB_G0] = 0; + for (i = 0; i < 15; i++) + gdb_regs[GDB_G1 + i] = regs->u_regs[UREG_G1 + i]; + + win = (struct reg_window *) (regs->u_regs[UREG_FP] + STACK_BIAS); + for (i = 0; i < 8; i++) + gdb_regs[GDB_L0 + i] = win->locals[i]; + for (i = 0; i < 8; i++) + gdb_regs[GDB_I0 + i] = win->ins[i]; + + for (i = GDB_F0; i <= GDB_F62; i++) + gdb_regs[i] = 0; + + gdb_regs[GDB_PC] = regs->tpc; + gdb_regs[GDB_NPC] = regs->tnpc; + gdb_regs[GDB_STATE] = regs->tstate; + gdb_regs[GDB_FSR] = 0; + gdb_regs[GDB_FPRS] = 0; + gdb_regs[GDB_Y] = regs->y; +} + +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + struct thread_info *t = task_thread_info(p); + extern unsigned int switch_to_pc; + extern unsigned int ret_from_syscall; + struct reg_window *win; + unsigned long pc, cwp; + int i; + + for (i = GDB_G0; i < GDB_G6; i++) + gdb_regs[i] = 0; + gdb_regs[GDB_G6] = (unsigned long) t; + gdb_regs[GDB_G7] = (unsigned long) p; + for (i = GDB_O0; i < GDB_SP; i++) + gdb_regs[i] = 0; + gdb_regs[GDB_SP] = t->ksp; + gdb_regs[GDB_O7] = 0; + + win = (struct reg_window *) (t->ksp + STACK_BIAS); + for (i = 0; i < 8; i++) + gdb_regs[GDB_L0 + i] = win->locals[i]; + for (i = 0; i < 8; i++) + gdb_regs[GDB_I0 + i] = win->ins[i]; + + for (i = GDB_F0; i <= GDB_F62; i++) + gdb_regs[i] = 0; + + if (t->new_child) + pc = (unsigned long) &ret_from_syscall; + else + pc = (unsigned long) &switch_to_pc; + + gdb_regs[GDB_PC] = pc; + gdb_regs[GDB_NPC] = pc + 4; + + cwp = __thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP]; + + gdb_regs[GDB_STATE] = (TSTATE_PRIV | TSTATE_IE | cwp); + gdb_regs[GDB_FSR] = 0; + gdb_regs[GDB_FPRS] = 0; + gdb_regs[GDB_Y] = 0; +} + +void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + struct reg_window *win; + int i; + + for (i = 0; i < 15; i++) + regs->u_regs[UREG_G1 + i] = gdb_regs[GDB_G1 + i]; + + /* If the TSTATE register is changing, we have to preserve + * the CWP field, otherwise window save/restore explodes. + */ + if (regs->tstate != gdb_regs[GDB_STATE]) { + unsigned long cwp = regs->tstate & TSTATE_CWP; + + regs->tstate = (gdb_regs[GDB_STATE] & ~TSTATE_CWP) | cwp; + } + + regs->tpc = gdb_regs[GDB_PC]; + regs->tnpc = gdb_regs[GDB_NPC]; + regs->y = gdb_regs[GDB_Y]; + + win = (struct reg_window *) (regs->u_regs[UREG_FP] + STACK_BIAS); + for (i = 0; i < 8; i++) + win->locals[i] = gdb_regs[GDB_L0 + i]; + for (i = 0; i < 8; i++) + win->ins[i] = gdb_regs[GDB_I0 + i]; +} + +#ifdef CONFIG_SMP +void smp_kgdb_capture_client(struct pt_regs *regs) +{ + unsigned long flags; + + __asm__ __volatile__("rdpr %%pstate, %0\n\t" + "wrpr %0, %1, %%pstate" + : "=r" (flags) + : "i" (PSTATE_IE)); + + flushw_all(); + + if (atomic_read(&kgdb_active) != -1) + kgdb_nmicallback(raw_smp_processor_id(), regs); + + __asm__ __volatile__("wrpr %0, 0, %%pstate" + : : "r" (flags)); +} +#endif + +int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, + char *remcomInBuffer, char *remcomOutBuffer, + struct pt_regs *linux_regs) +{ + unsigned long addr; + char *ptr; + + switch (remcomInBuffer[0]) { + case 'c': + /* try to read optional parameter, pc unchanged if no parm */ + ptr = &remcomInBuffer[1]; + if (kgdb_hex2long(&ptr, &addr)) { + linux_regs->tpc = addr; + linux_regs->tnpc = addr + 4; + } + /* fallthru */ + + case 'D': + case 'k': + if (linux_regs->tpc == (unsigned long) arch_kgdb_breakpoint) { + linux_regs->tpc = linux_regs->tnpc; + linux_regs->tnpc += 4; + } + return 0; + } + return -1; +} + +asmlinkage void kgdb_trap(unsigned long trap_level, struct pt_regs *regs) +{ + unsigned long flags; + + if (user_mode(regs)) { + bad_trap(regs, trap_level); + return; + } + + flushw_all(); + + local_irq_save(flags); + kgdb_handle_exception(0x172, SIGTRAP, 0, regs); + local_irq_restore(flags); +} + +int kgdb_arch_init(void) +{ + return 0; +} + +void kgdb_arch_exit(void) +{ +} + +struct kgdb_arch arch_kgdb_ops = { + /* Breakpoint instruction: ta 0x72 */ + .gdb_bpt_instr = { 0x91, 0xd0, 0x20, 0x72 }, +}; diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c new file mode 100644 index 000000000000..201a6e547e4a --- /dev/null +++ b/arch/sparc/kernel/kprobes.c @@ -0,0 +1,593 @@ +/* arch/sparc64/kernel/kprobes.c + * + * Copyright (C) 2004 David S. Miller <davem@davemloft.net> + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/module.h> +#include <linux/kdebug.h> +#include <asm/signal.h> +#include <asm/cacheflush.h> +#include <asm/uaccess.h> + +/* We do not have hardware single-stepping on sparc64. + * So we implement software single-stepping with breakpoint + * traps. The top-level scheme is similar to that used + * in the x86 kprobes implementation. + * + * In the kprobe->ainsn.insn[] array we store the original + * instruction at index zero and a break instruction at + * index one. + * + * When we hit a kprobe we: + * - Run the pre-handler + * - Remember "regs->tnpc" and interrupt level stored in + * "regs->tstate" so we can restore them later + * - Disable PIL interrupts + * - Set regs->tpc to point to kprobe->ainsn.insn[0] + * - Set regs->tnpc to point to kprobe->ainsn.insn[1] + * - Mark that we are actively in a kprobe + * + * At this point we wait for the second breakpoint at + * kprobe->ainsn.insn[1] to hit. When it does we: + * - Run the post-handler + * - Set regs->tpc to "remembered" regs->tnpc stored above, + * restore the PIL interrupt level in "regs->tstate" as well + * - Make any adjustments necessary to regs->tnpc in order + * to handle relative branches correctly. See below. + * - Mark that we are no longer actively in a kprobe. + */ + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + p->ainsn.insn[0] = *p->addr; + flushi(&p->ainsn.insn[0]); + + p->ainsn.insn[1] = BREAKPOINT_INSTRUCTION_2; + flushi(&p->ainsn.insn[1]); + + p->opcode = *p->addr; + return 0; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + *p->addr = BREAKPOINT_INSTRUCTION; + flushi(p->addr); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + *p->addr = p->opcode; + flushi(p->addr); +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; + kcb->prev_kprobe.orig_tnpc = kcb->kprobe_orig_tnpc; + kcb->prev_kprobe.orig_tstate_pil = kcb->kprobe_orig_tstate_pil; +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + kcb->kprobe_status = kcb->prev_kprobe.status; + kcb->kprobe_orig_tnpc = kcb->prev_kprobe.orig_tnpc; + kcb->kprobe_orig_tstate_pil = kcb->prev_kprobe.orig_tstate_pil; +} + +static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + __get_cpu_var(current_kprobe) = p; + kcb->kprobe_orig_tnpc = regs->tnpc; + kcb->kprobe_orig_tstate_pil = (regs->tstate & TSTATE_PIL); +} + +static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + regs->tstate |= TSTATE_PIL; + + /*single step inline, if it a breakpoint instruction*/ + if (p->opcode == BREAKPOINT_INSTRUCTION) { + regs->tpc = (unsigned long) p->addr; + regs->tnpc = kcb->kprobe_orig_tnpc; + } else { + regs->tpc = (unsigned long) &p->ainsn.insn[0]; + regs->tnpc = (unsigned long) &p->ainsn.insn[1]; + } +} + +static int __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p; + void *addr = (void *) regs->tpc; + int ret = 0; + struct kprobe_ctlblk *kcb; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); + + if (kprobe_running()) { + p = get_kprobe(addr); + if (p) { + if (kcb->kprobe_status == KPROBE_HIT_SS) { + regs->tstate = ((regs->tstate & ~TSTATE_PIL) | + kcb->kprobe_orig_tstate_pil); + goto no_kprobe; + } + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(kcb); + set_current_kprobe(p, regs, kcb); + kprobes_inc_nmissed_count(p); + kcb->kprobe_status = KPROBE_REENTER; + prepare_singlestep(p, regs, kcb); + return 1; + } else { + if (*(u32 *)addr != BREAKPOINT_INSTRUCTION) { + /* The breakpoint instruction was removed by + * another cpu right after we hit, no further + * handling of this interrupt is appropriate + */ + ret = 1; + goto no_kprobe; + } + p = __get_cpu_var(current_kprobe); + if (p->break_handler && p->break_handler(p, regs)) + goto ss_probe; + } + goto no_kprobe; + } + + p = get_kprobe(addr); + if (!p) { + if (*(u32 *)addr != BREAKPOINT_INSTRUCTION) { + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + */ + ret = 1; + } + /* Not one of ours: let kernel handle it */ + goto no_kprobe; + } + + set_current_kprobe(p, regs, kcb); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (p->pre_handler && p->pre_handler(p, regs)) + return 1; + +ss_probe: + prepare_singlestep(p, regs, kcb); + kcb->kprobe_status = KPROBE_HIT_SS; + return 1; + +no_kprobe: + preempt_enable_no_resched(); + return ret; +} + +/* If INSN is a relative control transfer instruction, + * return the corrected branch destination value. + * + * regs->tpc and regs->tnpc still hold the values of the + * program counters at the time of trap due to the execution + * of the BREAKPOINT_INSTRUCTION_2 at p->ainsn.insn[1] + * + */ +static unsigned long __kprobes relbranch_fixup(u32 insn, struct kprobe *p, + struct pt_regs *regs) +{ + unsigned long real_pc = (unsigned long) p->addr; + + /* Branch not taken, no mods necessary. */ + if (regs->tnpc == regs->tpc + 0x4UL) + return real_pc + 0x8UL; + + /* The three cases are call, branch w/prediction, + * and traditional branch. + */ + if ((insn & 0xc0000000) == 0x40000000 || + (insn & 0xc1c00000) == 0x00400000 || + (insn & 0xc1c00000) == 0x00800000) { + unsigned long ainsn_addr; + + ainsn_addr = (unsigned long) &p->ainsn.insn[0]; + + /* The instruction did all the work for us + * already, just apply the offset to the correct + * instruction location. + */ + return (real_pc + (regs->tnpc - ainsn_addr)); + } + + /* It is jmpl or some other absolute PC modification instruction, + * leave NPC as-is. + */ + return regs->tnpc; +} + +/* If INSN is an instruction which writes it's PC location + * into a destination register, fix that up. + */ +static void __kprobes retpc_fixup(struct pt_regs *regs, u32 insn, + unsigned long real_pc) +{ + unsigned long *slot = NULL; + + /* Simplest case is 'call', which always uses %o7 */ + if ((insn & 0xc0000000) == 0x40000000) { + slot = ®s->u_regs[UREG_I7]; + } + + /* 'jmpl' encodes the register inside of the opcode */ + if ((insn & 0xc1f80000) == 0x81c00000) { + unsigned long rd = ((insn >> 25) & 0x1f); + + if (rd <= 15) { + slot = ®s->u_regs[rd]; + } else { + /* Hard case, it goes onto the stack. */ + flushw_all(); + + rd -= 16; + slot = (unsigned long *) + (regs->u_regs[UREG_FP] + STACK_BIAS); + slot += rd; + } + } + if (slot != NULL) + *slot = real_pc; +} + +/* + * Called after single-stepping. p->addr is the address of the + * instruction which has been replaced by the breakpoint + * instruction. To avoid the SMP problems that can occur when we + * temporarily put back the original opcode to single-step, we + * single-stepped a copy of the instruction. The address of this + * copy is &p->ainsn.insn[0]. + * + * This function prepares to return from the post-single-step + * breakpoint trap. + */ +static void __kprobes resume_execution(struct kprobe *p, + struct pt_regs *regs, struct kprobe_ctlblk *kcb) +{ + u32 insn = p->ainsn.insn[0]; + + regs->tnpc = relbranch_fixup(insn, p, regs); + + /* This assignment must occur after relbranch_fixup() */ + regs->tpc = kcb->kprobe_orig_tnpc; + + retpc_fixup(regs, insn, (unsigned long) p->addr); + + regs->tstate = ((regs->tstate & ~TSTATE_PIL) | + kcb->kprobe_orig_tstate_pil); +} + +static int __kprobes post_kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) + return 0; + + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); + } + + resume_execution(cur, regs, kcb); + + /*Restore back the original saved kprobes variables and continue. */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + goto out; + } + reset_current_kprobe(); +out: + preempt_enable_no_resched(); + + return 1; +} + +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + const struct exception_table_entry *entry; + + switch(kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the tpc points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + regs->tpc = (unsigned long)cur->addr; + regs->tnpc = kcb->kprobe_orig_tnpc; + regs->tstate = ((regs->tstate & ~TSTATE_PIL) | + kcb->kprobe_orig_tstate_pil); + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); + preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accouting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + + entry = search_exception_tables(regs->tpc); + if (entry) { + regs->tpc = entry->fixup; + regs->tnpc = regs->tpc + 4; + return 1; + } + + /* + * fixup_exception() could not handle it, + * Let do_page_fault() fix it. + */ + break; + default: + break; + } + + return 0; +} + +/* + * Wrapper routine to for handling exceptions. + */ +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + if (args->regs && user_mode(args->regs)) + return ret; + + switch (val) { + case DIE_DEBUG: + if (kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_DEBUG_2: + if (post_kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; +} + +asmlinkage void __kprobes kprobe_trap(unsigned long trap_level, + struct pt_regs *regs) +{ + BUG_ON(trap_level != 0x170 && trap_level != 0x171); + + if (user_mode(regs)) { + local_irq_enable(); + bad_trap(regs, trap_level); + return; + } + + /* trap_level == 0x170 --> ta 0x70 + * trap_level == 0x171 --> ta 0x71 + */ + if (notify_die((trap_level == 0x170) ? DIE_DEBUG : DIE_DEBUG_2, + (trap_level == 0x170) ? "debug" : "debug_2", + regs, 0, trap_level, SIGTRAP) != NOTIFY_STOP) + bad_trap(regs, trap_level); +} + +/* Jprobes support. */ +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + memcpy(&(kcb->jprobe_saved_regs), regs, sizeof(*regs)); + + regs->tpc = (unsigned long) jp->entry; + regs->tnpc = ((unsigned long) jp->entry) + 0x4UL; + regs->tstate |= TSTATE_PIL; + + return 1; +} + +void __kprobes jprobe_return(void) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + register unsigned long orig_fp asm("g1"); + + orig_fp = kcb->jprobe_saved_regs.u_regs[UREG_FP]; + __asm__ __volatile__("\n" +"1: cmp %%sp, %0\n\t" + "blu,a,pt %%xcc, 1b\n\t" + " restore\n\t" + ".globl jprobe_return_trap_instruction\n" +"jprobe_return_trap_instruction:\n\t" + "ta 0x70" + : /* no outputs */ + : "r" (orig_fp)); +} + +extern void jprobe_return_trap_instruction(void); + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + u32 *addr = (u32 *) regs->tpc; + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (addr == (u32 *) jprobe_return_trap_instruction) { + memcpy(regs, &(kcb->jprobe_saved_regs), sizeof(*regs)); + preempt_enable_no_resched(); + return 1; + } + return 0; +} + +/* The value stored in the return address register is actually 2 + * instructions before where the callee will return to. + * Sequences usually look something like this + * + * call some_function <--- return register points here + * nop <--- call delay slot + * whatever <--- where callee returns to + * + * To keep trampoline_probe_handler logic simpler, we normalize the + * value kept in ri->ret_addr so we don't need to keep adjusting it + * back and forth. + */ +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *)(regs->u_regs[UREG_RETPC] + 8); + + /* Replace the return addr with trampoline addr */ + regs->u_regs[UREG_RETPC] = + ((unsigned long)kretprobe_trampoline) - 8; +} + +/* + * Called when the probe at kretprobe trampoline is hit + */ +int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head, empty_rp; + struct hlist_node *node, *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + + /* + * It is possible to have multiple instances associated with a given + * task either because an multiple functions in the call path + * have a return probe installed on them, and/or more then one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + + orig_ret_address = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + regs->tpc = orig_ret_address; + regs->tnpc = orig_ret_address + 4; + + reset_current_kprobe(); + kretprobe_hash_unlock(current, &flags); + preempt_enable_no_resched(); + + hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ + return 1; +} + +void kretprobe_trampoline_holder(void) +{ + asm volatile(".global kretprobe_trampoline\n" + "kretprobe_trampoline:\n" + "\tnop\n" + "\tnop\n"); +} +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init_kprobes(void) +{ + return register_kprobe(&trampoline_p); +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) + return 1; + + return 0; +} diff --git a/arch/sparc/kernel/kstack.h b/arch/sparc/kernel/kstack.h new file mode 100644 index 000000000000..4248d969272f --- /dev/null +++ b/arch/sparc/kernel/kstack.h @@ -0,0 +1,60 @@ +#ifndef _KSTACK_H +#define _KSTACK_H + +#include <linux/thread_info.h> +#include <linux/sched.h> +#include <asm/ptrace.h> +#include <asm/irq.h> + +/* SP must be STACK_BIAS adjusted already. */ +static inline bool kstack_valid(struct thread_info *tp, unsigned long sp) +{ + unsigned long base = (unsigned long) tp; + + if (sp >= (base + sizeof(struct thread_info)) && + sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf))) + return true; + + if (hardirq_stack[tp->cpu]) { + base = (unsigned long) hardirq_stack[tp->cpu]; + if (sp >= base && + sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf))) + return true; + base = (unsigned long) softirq_stack[tp->cpu]; + if (sp >= base && + sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf))) + return true; + } + return false; +} + +/* Does "regs" point to a valid pt_regs trap frame? */ +static inline bool kstack_is_trap_frame(struct thread_info *tp, struct pt_regs *regs) +{ + unsigned long base = (unsigned long) tp; + unsigned long addr = (unsigned long) regs; + + if (addr >= base && + addr <= (base + THREAD_SIZE - sizeof(*regs))) + goto check_magic; + + if (hardirq_stack[tp->cpu]) { + base = (unsigned long) hardirq_stack[tp->cpu]; + if (addr >= base && + addr <= (base + THREAD_SIZE - sizeof(*regs))) + goto check_magic; + base = (unsigned long) softirq_stack[tp->cpu]; + if (addr >= base && + addr <= (base + THREAD_SIZE - sizeof(*regs))) + goto check_magic; + } + return false; + +check_magic: + if ((regs->magic & ~0x1ff) == PT_REGS_MAGIC) + return true; + return false; + +} + +#endif /* _KSTACK_H */ diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S new file mode 100644 index 000000000000..cef8defcd7a9 --- /dev/null +++ b/arch/sparc/kernel/ktlb.S @@ -0,0 +1,304 @@ +/* arch/sparc64/kernel/ktlb.S: Kernel mapping TLB miss handling. + * + * Copyright (C) 1995, 1997, 2005, 2008 David S. Miller <davem@davemloft.net> + * Copyright (C) 1996 Eddie C. Dost (ecd@brainaid.de) + * Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx) + * Copyright (C) 1996,98,99 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + */ + +#include <asm/head.h> +#include <asm/asi.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/tsb.h> + + .text + .align 32 + +kvmap_itlb: + /* g6: TAG TARGET */ + mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_IMMU, %g4 + + /* sun4v_itlb_miss branches here with the missing virtual + * address already loaded into %g4 + */ +kvmap_itlb_4v: + +kvmap_itlb_nonlinear: + /* Catch kernel NULL pointer calls. */ + sethi %hi(PAGE_SIZE), %g5 + cmp %g4, %g5 + bleu,pn %xcc, kvmap_dtlb_longpath + nop + + KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_itlb_load) + +kvmap_itlb_tsb_miss: + sethi %hi(LOW_OBP_ADDRESS), %g5 + cmp %g4, %g5 + blu,pn %xcc, kvmap_itlb_vmalloc_addr + mov 0x1, %g5 + sllx %g5, 32, %g5 + cmp %g4, %g5 + blu,pn %xcc, kvmap_itlb_obp + nop + +kvmap_itlb_vmalloc_addr: + KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath) + + KTSB_LOCK_TAG(%g1, %g2, %g7) + + /* Load and check PTE. */ + ldxa [%g5] ASI_PHYS_USE_EC, %g5 + mov 1, %g7 + sllx %g7, TSB_TAG_INVALID_BIT, %g7 + brgez,a,pn %g5, kvmap_itlb_longpath + KTSB_STORE(%g1, %g7) + + KTSB_WRITE(%g1, %g5, %g6) + + /* fallthrough to TLB load */ + +kvmap_itlb_load: + +661: stxa %g5, [%g0] ASI_ITLB_DATA_IN + retry + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + + /* For sun4v the ASI_ITLB_DATA_IN store and the retry + * instruction get nop'd out and we get here to branch + * to the sun4v tlb load code. The registers are setup + * as follows: + * + * %g4: vaddr + * %g5: PTE + * %g6: TAG + * + * The sun4v TLB load wants the PTE in %g3 so we fix that + * up here. + */ + ba,pt %xcc, sun4v_itlb_load + mov %g5, %g3 + +kvmap_itlb_longpath: + +661: rdpr %pstate, %g5 + wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate + .section .sun4v_2insn_patch, "ax" + .word 661b + SET_GL(1) + nop + .previous + + rdpr %tpc, %g5 + ba,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_ITLB, %g4 + +kvmap_itlb_obp: + OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath) + + KTSB_LOCK_TAG(%g1, %g2, %g7) + + KTSB_WRITE(%g1, %g5, %g6) + + ba,pt %xcc, kvmap_itlb_load + nop + +kvmap_dtlb_obp: + OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath) + + KTSB_LOCK_TAG(%g1, %g2, %g7) + + KTSB_WRITE(%g1, %g5, %g6) + + ba,pt %xcc, kvmap_dtlb_load + nop + + .align 32 +kvmap_dtlb_tsb4m_load: + KTSB_LOCK_TAG(%g1, %g2, %g7) + KTSB_WRITE(%g1, %g5, %g6) + ba,pt %xcc, kvmap_dtlb_load + nop + +kvmap_dtlb: + /* %g6: TAG TARGET */ + mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_DMMU, %g4 + + /* sun4v_dtlb_miss branches here with the missing virtual + * address already loaded into %g4 + */ +kvmap_dtlb_4v: + brgez,pn %g4, kvmap_dtlb_nonlinear + nop + +#ifdef CONFIG_DEBUG_PAGEALLOC + /* Index through the base page size TSB even for linear + * mappings when using page allocation debugging. + */ + KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load) +#else + /* Correct TAG_TARGET is already in %g6, check 4mb TSB. */ + KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load) +#endif + /* TSB entry address left in %g1, lookup linear PTE. + * Must preserve %g1 and %g6 (TAG). + */ +kvmap_dtlb_tsb4m_miss: + sethi %hi(kpte_linear_bitmap), %g2 + or %g2, %lo(kpte_linear_bitmap), %g2 + + /* Clear the PAGE_OFFSET top virtual bits, then shift + * down to get a 256MB physical address index. + */ + sllx %g4, 21, %g5 + mov 1, %g7 + srlx %g5, 21 + 28, %g5 + + /* Don't try this at home kids... this depends upon srlx + * only taking the low 6 bits of the shift count in %g5. + */ + sllx %g7, %g5, %g7 + + /* Divide by 64 to get the offset into the bitmask. */ + srlx %g5, 6, %g5 + sllx %g5, 3, %g5 + + /* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */ + ldx [%g2 + %g5], %g2 + andcc %g2, %g7, %g0 + sethi %hi(kern_linear_pte_xor), %g5 + or %g5, %lo(kern_linear_pte_xor), %g5 + bne,a,pt %xcc, 1f + add %g5, 8, %g5 + +1: ldx [%g5], %g2 + + .globl kvmap_linear_patch +kvmap_linear_patch: + ba,pt %xcc, kvmap_dtlb_tsb4m_load + xor %g2, %g4, %g5 + +kvmap_dtlb_vmalloc_addr: + KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) + + KTSB_LOCK_TAG(%g1, %g2, %g7) + + /* Load and check PTE. */ + ldxa [%g5] ASI_PHYS_USE_EC, %g5 + mov 1, %g7 + sllx %g7, TSB_TAG_INVALID_BIT, %g7 + brgez,a,pn %g5, kvmap_dtlb_longpath + KTSB_STORE(%g1, %g7) + + KTSB_WRITE(%g1, %g5, %g6) + + /* fallthrough to TLB load */ + +kvmap_dtlb_load: + +661: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB + retry + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + + /* For sun4v the ASI_DTLB_DATA_IN store and the retry + * instruction get nop'd out and we get here to branch + * to the sun4v tlb load code. The registers are setup + * as follows: + * + * %g4: vaddr + * %g5: PTE + * %g6: TAG + * + * The sun4v TLB load wants the PTE in %g3 so we fix that + * up here. + */ + ba,pt %xcc, sun4v_dtlb_load + mov %g5, %g3 + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +kvmap_vmemmap: + sub %g4, %g5, %g5 + srlx %g5, 22, %g5 + sethi %hi(vmemmap_table), %g1 + sllx %g5, 3, %g5 + or %g1, %lo(vmemmap_table), %g1 + ba,pt %xcc, kvmap_dtlb_load + ldx [%g1 + %g5], %g5 +#endif + +kvmap_dtlb_nonlinear: + /* Catch kernel NULL pointer derefs. */ + sethi %hi(PAGE_SIZE), %g5 + cmp %g4, %g5 + bleu,pn %xcc, kvmap_dtlb_longpath + nop + +#ifdef CONFIG_SPARSEMEM_VMEMMAP + /* Do not use the TSB for vmemmap. */ + mov (VMEMMAP_BASE >> 24), %g5 + sllx %g5, 24, %g5 + cmp %g4,%g5 + bgeu,pn %xcc, kvmap_vmemmap + nop +#endif + + KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load) + +kvmap_dtlb_tsbmiss: + sethi %hi(MODULES_VADDR), %g5 + cmp %g4, %g5 + blu,pn %xcc, kvmap_dtlb_longpath + mov (VMALLOC_END >> 24), %g5 + sllx %g5, 24, %g5 + cmp %g4, %g5 + bgeu,pn %xcc, kvmap_dtlb_longpath + nop + +kvmap_check_obp: + sethi %hi(LOW_OBP_ADDRESS), %g5 + cmp %g4, %g5 + blu,pn %xcc, kvmap_dtlb_vmalloc_addr + mov 0x1, %g5 + sllx %g5, 32, %g5 + cmp %g4, %g5 + blu,pn %xcc, kvmap_dtlb_obp + nop + ba,pt %xcc, kvmap_dtlb_vmalloc_addr + nop + +kvmap_dtlb_longpath: + +661: rdpr %pstate, %g5 + wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate + .section .sun4v_2insn_patch, "ax" + .word 661b + SET_GL(1) + ldxa [%g0] ASI_SCRATCHPAD, %g5 + .previous + + rdpr %tl, %g3 + cmp %g3, 1 + +661: mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_DMMU, %g5 + .section .sun4v_2insn_patch, "ax" + .word 661b + ldx [%g5 + HV_FAULT_D_ADDR_OFFSET], %g5 + nop + .previous + + be,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_DTLB, %g4 + ba,pt %xcc, winfix_trampoline + nop diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c new file mode 100644 index 000000000000..d68982330f66 --- /dev/null +++ b/arch/sparc/kernel/ldc.c @@ -0,0 +1,2378 @@ +/* ldc.c: Logical Domain Channel link-layer protocol driver. + * + * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/scatterlist.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/init.h> + +#include <asm/hypervisor.h> +#include <asm/iommu.h> +#include <asm/page.h> +#include <asm/ldc.h> +#include <asm/mdesc.h> + +#define DRV_MODULE_NAME "ldc" +#define PFX DRV_MODULE_NAME ": " +#define DRV_MODULE_VERSION "1.1" +#define DRV_MODULE_RELDATE "July 22, 2008" + +static char version[] __devinitdata = + DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; +#define LDC_PACKET_SIZE 64 + +/* Packet header layout for unreliable and reliable mode frames. + * When in RAW mode, packets are simply straight 64-byte payloads + * with no headers. + */ +struct ldc_packet { + u8 type; +#define LDC_CTRL 0x01 +#define LDC_DATA 0x02 +#define LDC_ERR 0x10 + + u8 stype; +#define LDC_INFO 0x01 +#define LDC_ACK 0x02 +#define LDC_NACK 0x04 + + u8 ctrl; +#define LDC_VERS 0x01 /* Link Version */ +#define LDC_RTS 0x02 /* Request To Send */ +#define LDC_RTR 0x03 /* Ready To Receive */ +#define LDC_RDX 0x04 /* Ready for Data eXchange */ +#define LDC_CTRL_MSK 0x0f + + u8 env; +#define LDC_LEN 0x3f +#define LDC_FRAG_MASK 0xc0 +#define LDC_START 0x40 +#define LDC_STOP 0x80 + + u32 seqid; + + union { + u8 u_data[LDC_PACKET_SIZE - 8]; + struct { + u32 pad; + u32 ackid; + u8 r_data[LDC_PACKET_SIZE - 8 - 8]; + } r; + } u; +}; + +struct ldc_version { + u16 major; + u16 minor; +}; + +/* Ordered from largest major to lowest. */ +static struct ldc_version ver_arr[] = { + { .major = 1, .minor = 0 }, +}; + +#define LDC_DEFAULT_MTU (4 * LDC_PACKET_SIZE) +#define LDC_DEFAULT_NUM_ENTRIES (PAGE_SIZE / LDC_PACKET_SIZE) + +struct ldc_channel; + +struct ldc_mode_ops { + int (*write)(struct ldc_channel *, const void *, unsigned int); + int (*read)(struct ldc_channel *, void *, unsigned int); +}; + +static const struct ldc_mode_ops raw_ops; +static const struct ldc_mode_ops nonraw_ops; +static const struct ldc_mode_ops stream_ops; + +int ldom_domaining_enabled; + +struct ldc_iommu { + /* Protects arena alloc/free. */ + spinlock_t lock; + struct iommu_arena arena; + struct ldc_mtable_entry *page_table; +}; + +struct ldc_channel { + /* Protects all operations that depend upon channel state. */ + spinlock_t lock; + + unsigned long id; + + u8 *mssbuf; + u32 mssbuf_len; + u32 mssbuf_off; + + struct ldc_packet *tx_base; + unsigned long tx_head; + unsigned long tx_tail; + unsigned long tx_num_entries; + unsigned long tx_ra; + + unsigned long tx_acked; + + struct ldc_packet *rx_base; + unsigned long rx_head; + unsigned long rx_tail; + unsigned long rx_num_entries; + unsigned long rx_ra; + + u32 rcv_nxt; + u32 snd_nxt; + + unsigned long chan_state; + + struct ldc_channel_config cfg; + void *event_arg; + + const struct ldc_mode_ops *mops; + + struct ldc_iommu iommu; + + struct ldc_version ver; + + u8 hs_state; +#define LDC_HS_CLOSED 0x00 +#define LDC_HS_OPEN 0x01 +#define LDC_HS_GOTVERS 0x02 +#define LDC_HS_SENTRTR 0x03 +#define LDC_HS_GOTRTR 0x04 +#define LDC_HS_COMPLETE 0x10 + + u8 flags; +#define LDC_FLAG_ALLOCED_QUEUES 0x01 +#define LDC_FLAG_REGISTERED_QUEUES 0x02 +#define LDC_FLAG_REGISTERED_IRQS 0x04 +#define LDC_FLAG_RESET 0x10 + + u8 mss; + u8 state; + +#define LDC_IRQ_NAME_MAX 32 + char rx_irq_name[LDC_IRQ_NAME_MAX]; + char tx_irq_name[LDC_IRQ_NAME_MAX]; + + struct hlist_head mh_list; + + struct hlist_node list; +}; + +#define ldcdbg(TYPE, f, a...) \ +do { if (lp->cfg.debug & LDC_DEBUG_##TYPE) \ + printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \ +} while (0) + +static const char *state_to_str(u8 state) +{ + switch (state) { + case LDC_STATE_INVALID: + return "INVALID"; + case LDC_STATE_INIT: + return "INIT"; + case LDC_STATE_BOUND: + return "BOUND"; + case LDC_STATE_READY: + return "READY"; + case LDC_STATE_CONNECTED: + return "CONNECTED"; + default: + return "<UNKNOWN>"; + } +} + +static void ldc_set_state(struct ldc_channel *lp, u8 state) +{ + ldcdbg(STATE, "STATE (%s) --> (%s)\n", + state_to_str(lp->state), + state_to_str(state)); + + lp->state = state; +} + +static unsigned long __advance(unsigned long off, unsigned long num_entries) +{ + off += LDC_PACKET_SIZE; + if (off == (num_entries * LDC_PACKET_SIZE)) + off = 0; + + return off; +} + +static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off) +{ + return __advance(off, lp->rx_num_entries); +} + +static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off) +{ + return __advance(off, lp->tx_num_entries); +} + +static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp, + unsigned long *new_tail) +{ + struct ldc_packet *p; + unsigned long t; + + t = tx_advance(lp, lp->tx_tail); + if (t == lp->tx_head) + return NULL; + + *new_tail = t; + + p = lp->tx_base; + return p + (lp->tx_tail / LDC_PACKET_SIZE); +} + +/* When we are in reliable or stream mode, have to track the next packet + * we haven't gotten an ACK for in the TX queue using tx_acked. We have + * to be careful not to stomp over the queue past that point. During + * the handshake, we don't have TX data packets pending in the queue + * and that's why handshake_get_tx_packet() need not be mindful of + * lp->tx_acked. + */ +static unsigned long head_for_data(struct ldc_channel *lp) +{ + if (lp->cfg.mode == LDC_MODE_STREAM) + return lp->tx_acked; + return lp->tx_head; +} + +static int tx_has_space_for(struct ldc_channel *lp, unsigned int size) +{ + unsigned long limit, tail, new_tail, diff; + unsigned int mss; + + limit = head_for_data(lp); + tail = lp->tx_tail; + new_tail = tx_advance(lp, tail); + if (new_tail == limit) + return 0; + + if (limit > new_tail) + diff = limit - new_tail; + else + diff = (limit + + ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail)); + diff /= LDC_PACKET_SIZE; + mss = lp->mss; + + if (diff * mss < size) + return 0; + + return 1; +} + +static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp, + unsigned long *new_tail) +{ + struct ldc_packet *p; + unsigned long h, t; + + h = head_for_data(lp); + t = tx_advance(lp, lp->tx_tail); + if (t == h) + return NULL; + + *new_tail = t; + + p = lp->tx_base; + return p + (lp->tx_tail / LDC_PACKET_SIZE); +} + +static int set_tx_tail(struct ldc_channel *lp, unsigned long tail) +{ + unsigned long orig_tail = lp->tx_tail; + int limit = 1000; + + lp->tx_tail = tail; + while (limit-- > 0) { + unsigned long err; + + err = sun4v_ldc_tx_set_qtail(lp->id, tail); + if (!err) + return 0; + + if (err != HV_EWOULDBLOCK) { + lp->tx_tail = orig_tail; + return -EINVAL; + } + udelay(1); + } + + lp->tx_tail = orig_tail; + return -EBUSY; +} + +/* This just updates the head value in the hypervisor using + * a polling loop with a timeout. The caller takes care of + * upating software state representing the head change, if any. + */ +static int __set_rx_head(struct ldc_channel *lp, unsigned long head) +{ + int limit = 1000; + + while (limit-- > 0) { + unsigned long err; + + err = sun4v_ldc_rx_set_qhead(lp->id, head); + if (!err) + return 0; + + if (err != HV_EWOULDBLOCK) + return -EINVAL; + + udelay(1); + } + + return -EBUSY; +} + +static int send_tx_packet(struct ldc_channel *lp, + struct ldc_packet *p, + unsigned long new_tail) +{ + BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE))); + + return set_tx_tail(lp, new_tail); +} + +static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp, + u8 stype, u8 ctrl, + void *data, int dlen, + unsigned long *new_tail) +{ + struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail); + + if (p) { + memset(p, 0, sizeof(*p)); + p->type = LDC_CTRL; + p->stype = stype; + p->ctrl = ctrl; + if (data) + memcpy(p->u.u_data, data, dlen); + } + return p; +} + +static int start_handshake(struct ldc_channel *lp) +{ + struct ldc_packet *p; + struct ldc_version *ver; + unsigned long new_tail; + + ver = &ver_arr[0]; + + ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n", + ver->major, ver->minor); + + p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS, + ver, sizeof(*ver), &new_tail); + if (p) { + int err = send_tx_packet(lp, p, new_tail); + if (!err) + lp->flags &= ~LDC_FLAG_RESET; + return err; + } + return -EBUSY; +} + +static int send_version_nack(struct ldc_channel *lp, + u16 major, u16 minor) +{ + struct ldc_packet *p; + struct ldc_version ver; + unsigned long new_tail; + + ver.major = major; + ver.minor = minor; + + p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS, + &ver, sizeof(ver), &new_tail); + if (p) { + ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n", + ver.major, ver.minor); + + return send_tx_packet(lp, p, new_tail); + } + return -EBUSY; +} + +static int send_version_ack(struct ldc_channel *lp, + struct ldc_version *vp) +{ + struct ldc_packet *p; + unsigned long new_tail; + + p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS, + vp, sizeof(*vp), &new_tail); + if (p) { + ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n", + vp->major, vp->minor); + + return send_tx_packet(lp, p, new_tail); + } + return -EBUSY; +} + +static int send_rts(struct ldc_channel *lp) +{ + struct ldc_packet *p; + unsigned long new_tail; + + p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0, + &new_tail); + if (p) { + p->env = lp->cfg.mode; + p->seqid = 0; + lp->rcv_nxt = 0; + + ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n", + p->env, p->seqid); + + return send_tx_packet(lp, p, new_tail); + } + return -EBUSY; +} + +static int send_rtr(struct ldc_channel *lp) +{ + struct ldc_packet *p; + unsigned long new_tail; + + p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0, + &new_tail); + if (p) { + p->env = lp->cfg.mode; + p->seqid = 0; + + ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n", + p->env, p->seqid); + + return send_tx_packet(lp, p, new_tail); + } + return -EBUSY; +} + +static int send_rdx(struct ldc_channel *lp) +{ + struct ldc_packet *p; + unsigned long new_tail; + + p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0, + &new_tail); + if (p) { + p->env = 0; + p->seqid = ++lp->snd_nxt; + p->u.r.ackid = lp->rcv_nxt; + + ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n", + p->env, p->seqid, p->u.r.ackid); + + return send_tx_packet(lp, p, new_tail); + } + return -EBUSY; +} + +static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt) +{ + struct ldc_packet *p; + unsigned long new_tail; + int err; + + p = data_get_tx_packet(lp, &new_tail); + if (!p) + return -EBUSY; + memset(p, 0, sizeof(*p)); + p->type = data_pkt->type; + p->stype = LDC_NACK; + p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK; + p->seqid = lp->snd_nxt + 1; + p->u.r.ackid = lp->rcv_nxt; + + ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n", + p->type, p->ctrl, p->seqid, p->u.r.ackid); + + err = send_tx_packet(lp, p, new_tail); + if (!err) + lp->snd_nxt++; + + return err; +} + +static int ldc_abort(struct ldc_channel *lp) +{ + unsigned long hv_err; + + ldcdbg(STATE, "ABORT\n"); + + /* We report but do not act upon the hypervisor errors because + * there really isn't much we can do if they fail at this point. + */ + hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries); + if (hv_err) + printk(KERN_ERR PFX "ldc_abort: " + "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n", + lp->id, lp->tx_ra, lp->tx_num_entries, hv_err); + + hv_err = sun4v_ldc_tx_get_state(lp->id, + &lp->tx_head, + &lp->tx_tail, + &lp->chan_state); + if (hv_err) + printk(KERN_ERR PFX "ldc_abort: " + "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n", + lp->id, hv_err); + + hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries); + if (hv_err) + printk(KERN_ERR PFX "ldc_abort: " + "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n", + lp->id, lp->rx_ra, lp->rx_num_entries, hv_err); + + /* Refetch the RX queue state as well, because we could be invoked + * here in the queue processing context. + */ + hv_err = sun4v_ldc_rx_get_state(lp->id, + &lp->rx_head, + &lp->rx_tail, + &lp->chan_state); + if (hv_err) + printk(KERN_ERR PFX "ldc_abort: " + "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n", + lp->id, hv_err); + + return -ECONNRESET; +} + +static struct ldc_version *find_by_major(u16 major) +{ + struct ldc_version *ret = NULL; + int i; + + for (i = 0; i < ARRAY_SIZE(ver_arr); i++) { + struct ldc_version *v = &ver_arr[i]; + if (v->major <= major) { + ret = v; + break; + } + } + return ret; +} + +static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp) +{ + struct ldc_version *vap; + int err; + + ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n", + vp->major, vp->minor); + + if (lp->hs_state == LDC_HS_GOTVERS) { + lp->hs_state = LDC_HS_OPEN; + memset(&lp->ver, 0, sizeof(lp->ver)); + } + + vap = find_by_major(vp->major); + if (!vap) { + err = send_version_nack(lp, 0, 0); + } else if (vap->major != vp->major) { + err = send_version_nack(lp, vap->major, vap->minor); + } else { + struct ldc_version ver = *vp; + if (ver.minor > vap->minor) + ver.minor = vap->minor; + err = send_version_ack(lp, &ver); + if (!err) { + lp->ver = ver; + lp->hs_state = LDC_HS_GOTVERS; + } + } + if (err) + return ldc_abort(lp); + + return 0; +} + +static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp) +{ + ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n", + vp->major, vp->minor); + + if (lp->hs_state == LDC_HS_GOTVERS) { + if (lp->ver.major != vp->major || + lp->ver.minor != vp->minor) + return ldc_abort(lp); + } else { + lp->ver = *vp; + lp->hs_state = LDC_HS_GOTVERS; + } + if (send_rts(lp)) + return ldc_abort(lp); + return 0; +} + +static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp) +{ + struct ldc_version *vap; + + if ((vp->major == 0 && vp->minor == 0) || + !(vap = find_by_major(vp->major))) { + return ldc_abort(lp); + } else { + struct ldc_packet *p; + unsigned long new_tail; + + p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS, + vap, sizeof(*vap), + &new_tail); + if (p) + return send_tx_packet(lp, p, new_tail); + else + return ldc_abort(lp); + } +} + +static int process_version(struct ldc_channel *lp, + struct ldc_packet *p) +{ + struct ldc_version *vp; + + vp = (struct ldc_version *) p->u.u_data; + + switch (p->stype) { + case LDC_INFO: + return process_ver_info(lp, vp); + + case LDC_ACK: + return process_ver_ack(lp, vp); + + case LDC_NACK: + return process_ver_nack(lp, vp); + + default: + return ldc_abort(lp); + } +} + +static int process_rts(struct ldc_channel *lp, + struct ldc_packet *p) +{ + ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n", + p->stype, p->seqid, p->env); + + if (p->stype != LDC_INFO || + lp->hs_state != LDC_HS_GOTVERS || + p->env != lp->cfg.mode) + return ldc_abort(lp); + + lp->snd_nxt = p->seqid; + lp->rcv_nxt = p->seqid; + lp->hs_state = LDC_HS_SENTRTR; + if (send_rtr(lp)) + return ldc_abort(lp); + + return 0; +} + +static int process_rtr(struct ldc_channel *lp, + struct ldc_packet *p) +{ + ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n", + p->stype, p->seqid, p->env); + + if (p->stype != LDC_INFO || + p->env != lp->cfg.mode) + return ldc_abort(lp); + + lp->snd_nxt = p->seqid; + lp->hs_state = LDC_HS_COMPLETE; + ldc_set_state(lp, LDC_STATE_CONNECTED); + send_rdx(lp); + + return LDC_EVENT_UP; +} + +static int rx_seq_ok(struct ldc_channel *lp, u32 seqid) +{ + return lp->rcv_nxt + 1 == seqid; +} + +static int process_rdx(struct ldc_channel *lp, + struct ldc_packet *p) +{ + ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n", + p->stype, p->seqid, p->env, p->u.r.ackid); + + if (p->stype != LDC_INFO || + !(rx_seq_ok(lp, p->seqid))) + return ldc_abort(lp); + + lp->rcv_nxt = p->seqid; + + lp->hs_state = LDC_HS_COMPLETE; + ldc_set_state(lp, LDC_STATE_CONNECTED); + + return LDC_EVENT_UP; +} + +static int process_control_frame(struct ldc_channel *lp, + struct ldc_packet *p) +{ + switch (p->ctrl) { + case LDC_VERS: + return process_version(lp, p); + + case LDC_RTS: + return process_rts(lp, p); + + case LDC_RTR: + return process_rtr(lp, p); + + case LDC_RDX: + return process_rdx(lp, p); + + default: + return ldc_abort(lp); + } +} + +static int process_error_frame(struct ldc_channel *lp, + struct ldc_packet *p) +{ + return ldc_abort(lp); +} + +static int process_data_ack(struct ldc_channel *lp, + struct ldc_packet *ack) +{ + unsigned long head = lp->tx_acked; + u32 ackid = ack->u.r.ackid; + + while (1) { + struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE); + + head = tx_advance(lp, head); + + if (p->seqid == ackid) { + lp->tx_acked = head; + return 0; + } + if (head == lp->tx_tail) + return ldc_abort(lp); + } + + return 0; +} + +static void send_events(struct ldc_channel *lp, unsigned int event_mask) +{ + if (event_mask & LDC_EVENT_RESET) + lp->cfg.event(lp->event_arg, LDC_EVENT_RESET); + if (event_mask & LDC_EVENT_UP) + lp->cfg.event(lp->event_arg, LDC_EVENT_UP); + if (event_mask & LDC_EVENT_DATA_READY) + lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY); +} + +static irqreturn_t ldc_rx(int irq, void *dev_id) +{ + struct ldc_channel *lp = dev_id; + unsigned long orig_state, hv_err, flags; + unsigned int event_mask; + + spin_lock_irqsave(&lp->lock, flags); + + orig_state = lp->chan_state; + hv_err = sun4v_ldc_rx_get_state(lp->id, + &lp->rx_head, + &lp->rx_tail, + &lp->chan_state); + + ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n", + orig_state, lp->chan_state, lp->rx_head, lp->rx_tail); + + event_mask = 0; + + if (lp->cfg.mode == LDC_MODE_RAW && + lp->chan_state == LDC_CHANNEL_UP) { + lp->hs_state = LDC_HS_COMPLETE; + ldc_set_state(lp, LDC_STATE_CONNECTED); + + event_mask |= LDC_EVENT_UP; + + orig_state = lp->chan_state; + } + + /* If we are in reset state, flush the RX queue and ignore + * everything. + */ + if (lp->flags & LDC_FLAG_RESET) { + (void) __set_rx_head(lp, lp->rx_tail); + goto out; + } + + /* Once we finish the handshake, we let the ldc_read() + * paths do all of the control frame and state management. + * Just trigger the callback. + */ + if (lp->hs_state == LDC_HS_COMPLETE) { +handshake_complete: + if (lp->chan_state != orig_state) { + unsigned int event = LDC_EVENT_RESET; + + if (lp->chan_state == LDC_CHANNEL_UP) + event = LDC_EVENT_UP; + + event_mask |= event; + } + if (lp->rx_head != lp->rx_tail) + event_mask |= LDC_EVENT_DATA_READY; + + goto out; + } + + if (lp->chan_state != orig_state) + goto out; + + while (lp->rx_head != lp->rx_tail) { + struct ldc_packet *p; + unsigned long new; + int err; + + p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE); + + switch (p->type) { + case LDC_CTRL: + err = process_control_frame(lp, p); + if (err > 0) + event_mask |= err; + break; + + case LDC_DATA: + event_mask |= LDC_EVENT_DATA_READY; + err = 0; + break; + + case LDC_ERR: + err = process_error_frame(lp, p); + break; + + default: + err = ldc_abort(lp); + break; + } + + if (err < 0) + break; + + new = lp->rx_head; + new += LDC_PACKET_SIZE; + if (new == (lp->rx_num_entries * LDC_PACKET_SIZE)) + new = 0; + lp->rx_head = new; + + err = __set_rx_head(lp, new); + if (err < 0) { + (void) ldc_abort(lp); + break; + } + if (lp->hs_state == LDC_HS_COMPLETE) + goto handshake_complete; + } + +out: + spin_unlock_irqrestore(&lp->lock, flags); + + send_events(lp, event_mask); + + return IRQ_HANDLED; +} + +static irqreturn_t ldc_tx(int irq, void *dev_id) +{ + struct ldc_channel *lp = dev_id; + unsigned long flags, hv_err, orig_state; + unsigned int event_mask = 0; + + spin_lock_irqsave(&lp->lock, flags); + + orig_state = lp->chan_state; + hv_err = sun4v_ldc_tx_get_state(lp->id, + &lp->tx_head, + &lp->tx_tail, + &lp->chan_state); + + ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n", + orig_state, lp->chan_state, lp->tx_head, lp->tx_tail); + + if (lp->cfg.mode == LDC_MODE_RAW && + lp->chan_state == LDC_CHANNEL_UP) { + lp->hs_state = LDC_HS_COMPLETE; + ldc_set_state(lp, LDC_STATE_CONNECTED); + + event_mask |= LDC_EVENT_UP; + } + + spin_unlock_irqrestore(&lp->lock, flags); + + send_events(lp, event_mask); + + return IRQ_HANDLED; +} + +/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so + * XXX that addition and removal from the ldc_channel_list has + * XXX atomicity, otherwise the __ldc_channel_exists() check is + * XXX totally pointless as another thread can slip into ldc_alloc() + * XXX and add a channel with the same ID. There also needs to be + * XXX a spinlock for ldc_channel_list. + */ +static HLIST_HEAD(ldc_channel_list); + +static int __ldc_channel_exists(unsigned long id) +{ + struct ldc_channel *lp; + struct hlist_node *n; + + hlist_for_each_entry(lp, n, &ldc_channel_list, list) { + if (lp->id == id) + return 1; + } + return 0; +} + +static int alloc_queue(const char *name, unsigned long num_entries, + struct ldc_packet **base, unsigned long *ra) +{ + unsigned long size, order; + void *q; + + size = num_entries * LDC_PACKET_SIZE; + order = get_order(size); + + q = (void *) __get_free_pages(GFP_KERNEL, order); + if (!q) { + printk(KERN_ERR PFX "Alloc of %s queue failed with " + "size=%lu order=%lu\n", name, size, order); + return -ENOMEM; + } + + memset(q, 0, PAGE_SIZE << order); + + *base = q; + *ra = __pa(q); + + return 0; +} + +static void free_queue(unsigned long num_entries, struct ldc_packet *q) +{ + unsigned long size, order; + + if (!q) + return; + + size = num_entries * LDC_PACKET_SIZE; + order = get_order(size); + + free_pages((unsigned long)q, order); +} + +/* XXX Make this configurable... XXX */ +#define LDC_IOTABLE_SIZE (8 * 1024) + +static int ldc_iommu_init(struct ldc_channel *lp) +{ + unsigned long sz, num_tsb_entries, tsbsize, order; + struct ldc_iommu *iommu = &lp->iommu; + struct ldc_mtable_entry *table; + unsigned long hv_err; + int err; + + num_tsb_entries = LDC_IOTABLE_SIZE; + tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry); + + spin_lock_init(&iommu->lock); + + sz = num_tsb_entries / 8; + sz = (sz + 7UL) & ~7UL; + iommu->arena.map = kzalloc(sz, GFP_KERNEL); + if (!iommu->arena.map) { + printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz); + return -ENOMEM; + } + + iommu->arena.limit = num_tsb_entries; + + order = get_order(tsbsize); + + table = (struct ldc_mtable_entry *) + __get_free_pages(GFP_KERNEL, order); + err = -ENOMEM; + if (!table) { + printk(KERN_ERR PFX "Alloc of MTE table failed, " + "size=%lu order=%lu\n", tsbsize, order); + goto out_free_map; + } + + memset(table, 0, PAGE_SIZE << order); + + iommu->page_table = table; + + hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table), + num_tsb_entries); + err = -EINVAL; + if (hv_err) + goto out_free_table; + + return 0; + +out_free_table: + free_pages((unsigned long) table, order); + iommu->page_table = NULL; + +out_free_map: + kfree(iommu->arena.map); + iommu->arena.map = NULL; + + return err; +} + +static void ldc_iommu_release(struct ldc_channel *lp) +{ + struct ldc_iommu *iommu = &lp->iommu; + unsigned long num_tsb_entries, tsbsize, order; + + (void) sun4v_ldc_set_map_table(lp->id, 0, 0); + + num_tsb_entries = iommu->arena.limit; + tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry); + order = get_order(tsbsize); + + free_pages((unsigned long) iommu->page_table, order); + iommu->page_table = NULL; + + kfree(iommu->arena.map); + iommu->arena.map = NULL; +} + +struct ldc_channel *ldc_alloc(unsigned long id, + const struct ldc_channel_config *cfgp, + void *event_arg) +{ + struct ldc_channel *lp; + const struct ldc_mode_ops *mops; + unsigned long dummy1, dummy2, hv_err; + u8 mss, *mssbuf; + int err; + + err = -ENODEV; + if (!ldom_domaining_enabled) + goto out_err; + + err = -EINVAL; + if (!cfgp) + goto out_err; + + switch (cfgp->mode) { + case LDC_MODE_RAW: + mops = &raw_ops; + mss = LDC_PACKET_SIZE; + break; + + case LDC_MODE_UNRELIABLE: + mops = &nonraw_ops; + mss = LDC_PACKET_SIZE - 8; + break; + + case LDC_MODE_STREAM: + mops = &stream_ops; + mss = LDC_PACKET_SIZE - 8 - 8; + break; + + default: + goto out_err; + } + + if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq) + goto out_err; + + hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2); + err = -ENODEV; + if (hv_err == HV_ECHANNEL) + goto out_err; + + err = -EEXIST; + if (__ldc_channel_exists(id)) + goto out_err; + + mssbuf = NULL; + + lp = kzalloc(sizeof(*lp), GFP_KERNEL); + err = -ENOMEM; + if (!lp) + goto out_err; + + spin_lock_init(&lp->lock); + + lp->id = id; + + err = ldc_iommu_init(lp); + if (err) + goto out_free_ldc; + + lp->mops = mops; + lp->mss = mss; + + lp->cfg = *cfgp; + if (!lp->cfg.mtu) + lp->cfg.mtu = LDC_DEFAULT_MTU; + + if (lp->cfg.mode == LDC_MODE_STREAM) { + mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL); + if (!mssbuf) { + err = -ENOMEM; + goto out_free_iommu; + } + lp->mssbuf = mssbuf; + } + + lp->event_arg = event_arg; + + /* XXX allow setting via ldc_channel_config to override defaults + * XXX or use some formula based upon mtu + */ + lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES; + lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES; + + err = alloc_queue("TX", lp->tx_num_entries, + &lp->tx_base, &lp->tx_ra); + if (err) + goto out_free_mssbuf; + + err = alloc_queue("RX", lp->rx_num_entries, + &lp->rx_base, &lp->rx_ra); + if (err) + goto out_free_txq; + + lp->flags |= LDC_FLAG_ALLOCED_QUEUES; + + lp->hs_state = LDC_HS_CLOSED; + ldc_set_state(lp, LDC_STATE_INIT); + + INIT_HLIST_NODE(&lp->list); + hlist_add_head(&lp->list, &ldc_channel_list); + + INIT_HLIST_HEAD(&lp->mh_list); + + return lp; + +out_free_txq: + free_queue(lp->tx_num_entries, lp->tx_base); + +out_free_mssbuf: + if (mssbuf) + kfree(mssbuf); + +out_free_iommu: + ldc_iommu_release(lp); + +out_free_ldc: + kfree(lp); + +out_err: + return ERR_PTR(err); +} +EXPORT_SYMBOL(ldc_alloc); + +void ldc_free(struct ldc_channel *lp) +{ + if (lp->flags & LDC_FLAG_REGISTERED_IRQS) { + free_irq(lp->cfg.rx_irq, lp); + free_irq(lp->cfg.tx_irq, lp); + } + + if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) { + sun4v_ldc_tx_qconf(lp->id, 0, 0); + sun4v_ldc_rx_qconf(lp->id, 0, 0); + lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES; + } + if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) { + free_queue(lp->tx_num_entries, lp->tx_base); + free_queue(lp->rx_num_entries, lp->rx_base); + lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES; + } + + hlist_del(&lp->list); + + if (lp->mssbuf) + kfree(lp->mssbuf); + + ldc_iommu_release(lp); + + kfree(lp); +} +EXPORT_SYMBOL(ldc_free); + +/* Bind the channel. This registers the LDC queues with + * the hypervisor and puts the channel into a pseudo-listening + * state. This does not initiate a handshake, ldc_connect() does + * that. + */ +int ldc_bind(struct ldc_channel *lp, const char *name) +{ + unsigned long hv_err, flags; + int err = -EINVAL; + + if (!name || + (lp->state != LDC_STATE_INIT)) + return -EINVAL; + + snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); + snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); + + err = request_irq(lp->cfg.rx_irq, ldc_rx, + IRQF_SAMPLE_RANDOM | IRQF_SHARED, + lp->rx_irq_name, lp); + if (err) + return err; + + err = request_irq(lp->cfg.tx_irq, ldc_tx, + IRQF_SAMPLE_RANDOM | IRQF_SHARED, + lp->tx_irq_name, lp); + if (err) { + free_irq(lp->cfg.rx_irq, lp); + return err; + } + + + spin_lock_irqsave(&lp->lock, flags); + + enable_irq(lp->cfg.rx_irq); + enable_irq(lp->cfg.tx_irq); + + lp->flags |= LDC_FLAG_REGISTERED_IRQS; + + err = -ENODEV; + hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0); + if (hv_err) + goto out_free_irqs; + + hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries); + if (hv_err) + goto out_free_irqs; + + hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0); + if (hv_err) + goto out_unmap_tx; + + hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries); + if (hv_err) + goto out_unmap_tx; + + lp->flags |= LDC_FLAG_REGISTERED_QUEUES; + + hv_err = sun4v_ldc_tx_get_state(lp->id, + &lp->tx_head, + &lp->tx_tail, + &lp->chan_state); + err = -EBUSY; + if (hv_err) + goto out_unmap_rx; + + lp->tx_acked = lp->tx_head; + + lp->hs_state = LDC_HS_OPEN; + ldc_set_state(lp, LDC_STATE_BOUND); + + spin_unlock_irqrestore(&lp->lock, flags); + + return 0; + +out_unmap_rx: + lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES; + sun4v_ldc_rx_qconf(lp->id, 0, 0); + +out_unmap_tx: + sun4v_ldc_tx_qconf(lp->id, 0, 0); + +out_free_irqs: + lp->flags &= ~LDC_FLAG_REGISTERED_IRQS; + free_irq(lp->cfg.tx_irq, lp); + free_irq(lp->cfg.rx_irq, lp); + + spin_unlock_irqrestore(&lp->lock, flags); + + return err; +} +EXPORT_SYMBOL(ldc_bind); + +int ldc_connect(struct ldc_channel *lp) +{ + unsigned long flags; + int err; + + if (lp->cfg.mode == LDC_MODE_RAW) + return -EINVAL; + + spin_lock_irqsave(&lp->lock, flags); + + if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) || + !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) || + lp->hs_state != LDC_HS_OPEN) + err = -EINVAL; + else + err = start_handshake(lp); + + spin_unlock_irqrestore(&lp->lock, flags); + + return err; +} +EXPORT_SYMBOL(ldc_connect); + +int ldc_disconnect(struct ldc_channel *lp) +{ + unsigned long hv_err, flags; + int err; + + if (lp->cfg.mode == LDC_MODE_RAW) + return -EINVAL; + + if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) || + !(lp->flags & LDC_FLAG_REGISTERED_QUEUES)) + return -EINVAL; + + spin_lock_irqsave(&lp->lock, flags); + + err = -ENODEV; + hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0); + if (hv_err) + goto out_err; + + hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries); + if (hv_err) + goto out_err; + + hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0); + if (hv_err) + goto out_err; + + hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries); + if (hv_err) + goto out_err; + + ldc_set_state(lp, LDC_STATE_BOUND); + lp->hs_state = LDC_HS_OPEN; + lp->flags |= LDC_FLAG_RESET; + + spin_unlock_irqrestore(&lp->lock, flags); + + return 0; + +out_err: + sun4v_ldc_tx_qconf(lp->id, 0, 0); + sun4v_ldc_rx_qconf(lp->id, 0, 0); + free_irq(lp->cfg.tx_irq, lp); + free_irq(lp->cfg.rx_irq, lp); + lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS | + LDC_FLAG_REGISTERED_QUEUES); + ldc_set_state(lp, LDC_STATE_INIT); + + spin_unlock_irqrestore(&lp->lock, flags); + + return err; +} +EXPORT_SYMBOL(ldc_disconnect); + +int ldc_state(struct ldc_channel *lp) +{ + return lp->state; +} +EXPORT_SYMBOL(ldc_state); + +static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size) +{ + struct ldc_packet *p; + unsigned long new_tail; + int err; + + if (size > LDC_PACKET_SIZE) + return -EMSGSIZE; + + p = data_get_tx_packet(lp, &new_tail); + if (!p) + return -EAGAIN; + + memcpy(p, buf, size); + + err = send_tx_packet(lp, p, new_tail); + if (!err) + err = size; + + return err; +} + +static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size) +{ + struct ldc_packet *p; + unsigned long hv_err, new; + int err; + + if (size < LDC_PACKET_SIZE) + return -EINVAL; + + hv_err = sun4v_ldc_rx_get_state(lp->id, + &lp->rx_head, + &lp->rx_tail, + &lp->chan_state); + if (hv_err) + return ldc_abort(lp); + + if (lp->chan_state == LDC_CHANNEL_DOWN || + lp->chan_state == LDC_CHANNEL_RESETTING) + return -ECONNRESET; + + if (lp->rx_head == lp->rx_tail) + return 0; + + p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE); + memcpy(buf, p, LDC_PACKET_SIZE); + + new = rx_advance(lp, lp->rx_head); + lp->rx_head = new; + + err = __set_rx_head(lp, new); + if (err < 0) + err = -ECONNRESET; + else + err = LDC_PACKET_SIZE; + + return err; +} + +static const struct ldc_mode_ops raw_ops = { + .write = write_raw, + .read = read_raw, +}; + +static int write_nonraw(struct ldc_channel *lp, const void *buf, + unsigned int size) +{ + unsigned long hv_err, tail; + unsigned int copied; + u32 seq; + int err; + + hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail, + &lp->chan_state); + if (unlikely(hv_err)) + return -EBUSY; + + if (unlikely(lp->chan_state != LDC_CHANNEL_UP)) + return ldc_abort(lp); + + if (!tx_has_space_for(lp, size)) + return -EAGAIN; + + seq = lp->snd_nxt; + copied = 0; + tail = lp->tx_tail; + while (copied < size) { + struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE); + u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ? + p->u.u_data : + p->u.r.r_data); + int data_len; + + p->type = LDC_DATA; + p->stype = LDC_INFO; + p->ctrl = 0; + + data_len = size - copied; + if (data_len > lp->mss) + data_len = lp->mss; + + BUG_ON(data_len > LDC_LEN); + + p->env = (data_len | + (copied == 0 ? LDC_START : 0) | + (data_len == size - copied ? LDC_STOP : 0)); + + p->seqid = ++seq; + + ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n", + p->type, + p->stype, + p->ctrl, + p->env, + p->seqid); + + memcpy(data, buf, data_len); + buf += data_len; + copied += data_len; + + tail = tx_advance(lp, tail); + } + + err = set_tx_tail(lp, tail); + if (!err) { + lp->snd_nxt = seq; + err = size; + } + + return err; +} + +static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p, + struct ldc_packet *first_frag) +{ + int err; + + if (first_frag) + lp->rcv_nxt = first_frag->seqid - 1; + + err = send_data_nack(lp, p); + if (err) + return err; + + err = __set_rx_head(lp, lp->rx_tail); + if (err < 0) + return ldc_abort(lp); + + return 0; +} + +static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p) +{ + if (p->stype & LDC_ACK) { + int err = process_data_ack(lp, p); + if (err) + return err; + } + if (p->stype & LDC_NACK) + return ldc_abort(lp); + + return 0; +} + +static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head) +{ + unsigned long dummy; + int limit = 1000; + + ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n", + cur_head, lp->rx_head, lp->rx_tail); + while (limit-- > 0) { + unsigned long hv_err; + + hv_err = sun4v_ldc_rx_get_state(lp->id, + &dummy, + &lp->rx_tail, + &lp->chan_state); + if (hv_err) + return ldc_abort(lp); + + if (lp->chan_state == LDC_CHANNEL_DOWN || + lp->chan_state == LDC_CHANNEL_RESETTING) + return -ECONNRESET; + + if (cur_head != lp->rx_tail) { + ldcdbg(DATA, "DATA WAIT DONE " + "head[%lx] tail[%lx] chan_state[%lx]\n", + dummy, lp->rx_tail, lp->chan_state); + return 0; + } + + udelay(1); + } + return -EAGAIN; +} + +static int rx_set_head(struct ldc_channel *lp, unsigned long head) +{ + int err = __set_rx_head(lp, head); + + if (err < 0) + return ldc_abort(lp); + + lp->rx_head = head; + return 0; +} + +static void send_data_ack(struct ldc_channel *lp) +{ + unsigned long new_tail; + struct ldc_packet *p; + + p = data_get_tx_packet(lp, &new_tail); + if (likely(p)) { + int err; + + memset(p, 0, sizeof(*p)); + p->type = LDC_DATA; + p->stype = LDC_ACK; + p->ctrl = 0; + p->seqid = lp->snd_nxt + 1; + p->u.r.ackid = lp->rcv_nxt; + + err = send_tx_packet(lp, p, new_tail); + if (!err) + lp->snd_nxt++; + } +} + +static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size) +{ + struct ldc_packet *first_frag; + unsigned long hv_err, new; + int err, copied; + + hv_err = sun4v_ldc_rx_get_state(lp->id, + &lp->rx_head, + &lp->rx_tail, + &lp->chan_state); + if (hv_err) + return ldc_abort(lp); + + if (lp->chan_state == LDC_CHANNEL_DOWN || + lp->chan_state == LDC_CHANNEL_RESETTING) + return -ECONNRESET; + + if (lp->rx_head == lp->rx_tail) + return 0; + + first_frag = NULL; + copied = err = 0; + new = lp->rx_head; + while (1) { + struct ldc_packet *p; + int pkt_len; + + BUG_ON(new == lp->rx_tail); + p = lp->rx_base + (new / LDC_PACKET_SIZE); + + ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] " + "rcv_nxt[%08x]\n", + p->type, + p->stype, + p->ctrl, + p->env, + p->seqid, + p->u.r.ackid, + lp->rcv_nxt); + + if (unlikely(!rx_seq_ok(lp, p->seqid))) { + err = rx_bad_seq(lp, p, first_frag); + copied = 0; + break; + } + + if (p->type & LDC_CTRL) { + err = process_control_frame(lp, p); + if (err < 0) + break; + err = 0; + } + + lp->rcv_nxt = p->seqid; + + if (!(p->type & LDC_DATA)) { + new = rx_advance(lp, new); + goto no_data; + } + if (p->stype & (LDC_ACK | LDC_NACK)) { + err = data_ack_nack(lp, p); + if (err) + break; + } + if (!(p->stype & LDC_INFO)) { + new = rx_advance(lp, new); + err = rx_set_head(lp, new); + if (err) + break; + goto no_data; + } + + pkt_len = p->env & LDC_LEN; + + /* Every initial packet starts with the START bit set. + * + * Singleton packets will have both START+STOP set. + * + * Fragments will have START set in the first frame, STOP + * set in the last frame, and neither bit set in middle + * frames of the packet. + * + * Therefore if we are at the beginning of a packet and + * we don't see START, or we are in the middle of a fragmented + * packet and do see START, we are unsynchronized and should + * flush the RX queue. + */ + if ((first_frag == NULL && !(p->env & LDC_START)) || + (first_frag != NULL && (p->env & LDC_START))) { + if (!first_frag) + new = rx_advance(lp, new); + + err = rx_set_head(lp, new); + if (err) + break; + + if (!first_frag) + goto no_data; + } + if (!first_frag) + first_frag = p; + + if (pkt_len > size - copied) { + /* User didn't give us a big enough buffer, + * what to do? This is a pretty serious error. + * + * Since we haven't updated the RX ring head to + * consume any of the packets, signal the error + * to the user and just leave the RX ring alone. + * + * This seems the best behavior because this allows + * a user of the LDC layer to start with a small + * RX buffer for ldc_read() calls and use -EMSGSIZE + * as a cue to enlarge it's read buffer. + */ + err = -EMSGSIZE; + break; + } + + /* Ok, we are gonna eat this one. */ + new = rx_advance(lp, new); + + memcpy(buf, + (lp->cfg.mode == LDC_MODE_UNRELIABLE ? + p->u.u_data : p->u.r.r_data), pkt_len); + buf += pkt_len; + copied += pkt_len; + + if (p->env & LDC_STOP) + break; + +no_data: + if (new == lp->rx_tail) { + err = rx_data_wait(lp, new); + if (err) + break; + } + } + + if (!err) + err = rx_set_head(lp, new); + + if (err && first_frag) + lp->rcv_nxt = first_frag->seqid - 1; + + if (!err) { + err = copied; + if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE) + send_data_ack(lp); + } + + return err; +} + +static const struct ldc_mode_ops nonraw_ops = { + .write = write_nonraw, + .read = read_nonraw, +}; + +static int write_stream(struct ldc_channel *lp, const void *buf, + unsigned int size) +{ + if (size > lp->cfg.mtu) + size = lp->cfg.mtu; + return write_nonraw(lp, buf, size); +} + +static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size) +{ + if (!lp->mssbuf_len) { + int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu); + if (err < 0) + return err; + + lp->mssbuf_len = err; + lp->mssbuf_off = 0; + } + + if (size > lp->mssbuf_len) + size = lp->mssbuf_len; + memcpy(buf, lp->mssbuf + lp->mssbuf_off, size); + + lp->mssbuf_off += size; + lp->mssbuf_len -= size; + + return size; +} + +static const struct ldc_mode_ops stream_ops = { + .write = write_stream, + .read = read_stream, +}; + +int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size) +{ + unsigned long flags; + int err; + + if (!buf) + return -EINVAL; + + if (!size) + return 0; + + spin_lock_irqsave(&lp->lock, flags); + + if (lp->hs_state != LDC_HS_COMPLETE) + err = -ENOTCONN; + else + err = lp->mops->write(lp, buf, size); + + spin_unlock_irqrestore(&lp->lock, flags); + + return err; +} +EXPORT_SYMBOL(ldc_write); + +int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size) +{ + unsigned long flags; + int err; + + if (!buf) + return -EINVAL; + + if (!size) + return 0; + + spin_lock_irqsave(&lp->lock, flags); + + if (lp->hs_state != LDC_HS_COMPLETE) + err = -ENOTCONN; + else + err = lp->mops->read(lp, buf, size); + + spin_unlock_irqrestore(&lp->lock, flags); + + return err; +} +EXPORT_SYMBOL(ldc_read); + +static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages) +{ + struct iommu_arena *arena = &iommu->arena; + unsigned long n, i, start, end, limit; + int pass; + + limit = arena->limit; + start = arena->hint; + pass = 0; + +again: + n = find_next_zero_bit(arena->map, limit, start); + end = n + npages; + if (unlikely(end >= limit)) { + if (likely(pass < 1)) { + limit = start; + start = 0; + pass++; + goto again; + } else { + /* Scanned the whole thing, give up. */ + return -1; + } + } + + for (i = n; i < end; i++) { + if (test_bit(i, arena->map)) { + start = i + 1; + goto again; + } + } + + for (i = n; i < end; i++) + __set_bit(i, arena->map); + + arena->hint = end; + + return n; +} + +#define COOKIE_PGSZ_CODE 0xf000000000000000ULL +#define COOKIE_PGSZ_CODE_SHIFT 60ULL + +static u64 pagesize_code(void) +{ + switch (PAGE_SIZE) { + default: + case (8ULL * 1024ULL): + return 0; + case (64ULL * 1024ULL): + return 1; + case (512ULL * 1024ULL): + return 2; + case (4ULL * 1024ULL * 1024ULL): + return 3; + case (32ULL * 1024ULL * 1024ULL): + return 4; + case (256ULL * 1024ULL * 1024ULL): + return 5; + } +} + +static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset) +{ + return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) | + (index << PAGE_SHIFT) | + page_offset); +} + +static u64 cookie_to_index(u64 cookie, unsigned long *shift) +{ + u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT; + + cookie &= ~COOKIE_PGSZ_CODE; + + *shift = szcode * 3; + + return (cookie >> (13ULL + (szcode * 3ULL))); +} + +static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu, + unsigned long npages) +{ + long entry; + + entry = arena_alloc(iommu, npages); + if (unlikely(entry < 0)) + return NULL; + + return iommu->page_table + entry; +} + +static u64 perm_to_mte(unsigned int map_perm) +{ + u64 mte_base; + + mte_base = pagesize_code(); + + if (map_perm & LDC_MAP_SHADOW) { + if (map_perm & LDC_MAP_R) + mte_base |= LDC_MTE_COPY_R; + if (map_perm & LDC_MAP_W) + mte_base |= LDC_MTE_COPY_W; + } + if (map_perm & LDC_MAP_DIRECT) { + if (map_perm & LDC_MAP_R) + mte_base |= LDC_MTE_READ; + if (map_perm & LDC_MAP_W) + mte_base |= LDC_MTE_WRITE; + if (map_perm & LDC_MAP_X) + mte_base |= LDC_MTE_EXEC; + } + if (map_perm & LDC_MAP_IO) { + if (map_perm & LDC_MAP_R) + mte_base |= LDC_MTE_IOMMU_R; + if (map_perm & LDC_MAP_W) + mte_base |= LDC_MTE_IOMMU_W; + } + + return mte_base; +} + +static int pages_in_region(unsigned long base, long len) +{ + int count = 0; + + do { + unsigned long new = (base + PAGE_SIZE) & PAGE_MASK; + + len -= (new - base); + base = new; + count++; + } while (len > 0); + + return count; +} + +struct cookie_state { + struct ldc_mtable_entry *page_table; + struct ldc_trans_cookie *cookies; + u64 mte_base; + u64 prev_cookie; + u32 pte_idx; + u32 nc; +}; + +static void fill_cookies(struct cookie_state *sp, unsigned long pa, + unsigned long off, unsigned long len) +{ + do { + unsigned long tlen, new = pa + PAGE_SIZE; + u64 this_cookie; + + sp->page_table[sp->pte_idx].mte = sp->mte_base | pa; + + tlen = PAGE_SIZE; + if (off) + tlen = PAGE_SIZE - off; + if (tlen > len) + tlen = len; + + this_cookie = make_cookie(sp->pte_idx, + pagesize_code(), off); + + off = 0; + + if (this_cookie == sp->prev_cookie) { + sp->cookies[sp->nc - 1].cookie_size += tlen; + } else { + sp->cookies[sp->nc].cookie_addr = this_cookie; + sp->cookies[sp->nc].cookie_size = tlen; + sp->nc++; + } + sp->prev_cookie = this_cookie + tlen; + + sp->pte_idx++; + + len -= tlen; + pa = new; + } while (len > 0); +} + +static int sg_count_one(struct scatterlist *sg) +{ + unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT; + long len = sg->length; + + if ((sg->offset | len) & (8UL - 1)) + return -EFAULT; + + return pages_in_region(base + sg->offset, len); +} + +static int sg_count_pages(struct scatterlist *sg, int num_sg) +{ + int count; + int i; + + count = 0; + for (i = 0; i < num_sg; i++) { + int err = sg_count_one(sg + i); + if (err < 0) + return err; + count += err; + } + + return count; +} + +int ldc_map_sg(struct ldc_channel *lp, + struct scatterlist *sg, int num_sg, + struct ldc_trans_cookie *cookies, int ncookies, + unsigned int map_perm) +{ + unsigned long i, npages, flags; + struct ldc_mtable_entry *base; + struct cookie_state state; + struct ldc_iommu *iommu; + int err; + + if (map_perm & ~LDC_MAP_ALL) + return -EINVAL; + + err = sg_count_pages(sg, num_sg); + if (err < 0) + return err; + + npages = err; + if (err > ncookies) + return -EMSGSIZE; + + iommu = &lp->iommu; + + spin_lock_irqsave(&iommu->lock, flags); + base = alloc_npages(iommu, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (!base) + return -ENOMEM; + + state.page_table = iommu->page_table; + state.cookies = cookies; + state.mte_base = perm_to_mte(map_perm); + state.prev_cookie = ~(u64)0; + state.pte_idx = (base - iommu->page_table); + state.nc = 0; + + for (i = 0; i < num_sg; i++) + fill_cookies(&state, page_to_pfn(sg_page(&sg[i])) << PAGE_SHIFT, + sg[i].offset, sg[i].length); + + return state.nc; +} +EXPORT_SYMBOL(ldc_map_sg); + +int ldc_map_single(struct ldc_channel *lp, + void *buf, unsigned int len, + struct ldc_trans_cookie *cookies, int ncookies, + unsigned int map_perm) +{ + unsigned long npages, pa, flags; + struct ldc_mtable_entry *base; + struct cookie_state state; + struct ldc_iommu *iommu; + + if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1)) + return -EINVAL; + + pa = __pa(buf); + if ((pa | len) & (8UL - 1)) + return -EFAULT; + + npages = pages_in_region(pa, len); + + iommu = &lp->iommu; + + spin_lock_irqsave(&iommu->lock, flags); + base = alloc_npages(iommu, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (!base) + return -ENOMEM; + + state.page_table = iommu->page_table; + state.cookies = cookies; + state.mte_base = perm_to_mte(map_perm); + state.prev_cookie = ~(u64)0; + state.pte_idx = (base - iommu->page_table); + state.nc = 0; + fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len); + BUG_ON(state.nc != 1); + + return state.nc; +} +EXPORT_SYMBOL(ldc_map_single); + +static void free_npages(unsigned long id, struct ldc_iommu *iommu, + u64 cookie, u64 size) +{ + struct iommu_arena *arena = &iommu->arena; + unsigned long i, shift, index, npages; + struct ldc_mtable_entry *base; + + npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT; + index = cookie_to_index(cookie, &shift); + base = iommu->page_table + index; + + BUG_ON(index > arena->limit || + (index + npages) > arena->limit); + + for (i = 0; i < npages; i++) { + if (base->cookie) + sun4v_ldc_revoke(id, cookie + (i << shift), + base->cookie); + base->mte = 0; + __clear_bit(index + i, arena->map); + } +} + +void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies, + int ncookies) +{ + struct ldc_iommu *iommu = &lp->iommu; + unsigned long flags; + int i; + + spin_lock_irqsave(&iommu->lock, flags); + for (i = 0; i < ncookies; i++) { + u64 addr = cookies[i].cookie_addr; + u64 size = cookies[i].cookie_size; + + free_npages(lp->id, iommu, addr, size); + } + spin_unlock_irqrestore(&iommu->lock, flags); +} +EXPORT_SYMBOL(ldc_unmap); + +int ldc_copy(struct ldc_channel *lp, int copy_dir, + void *buf, unsigned int len, unsigned long offset, + struct ldc_trans_cookie *cookies, int ncookies) +{ + unsigned int orig_len; + unsigned long ra; + int i; + + if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) { + printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n", + lp->id, copy_dir); + return -EINVAL; + } + + ra = __pa(buf); + if ((ra | len | offset) & (8UL - 1)) { + printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer " + "ra[%lx] len[%x] offset[%lx]\n", + lp->id, ra, len, offset); + return -EFAULT; + } + + if (lp->hs_state != LDC_HS_COMPLETE || + (lp->flags & LDC_FLAG_RESET)) { + printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] " + "flags[%x]\n", lp->id, lp->hs_state, lp->flags); + return -ECONNRESET; + } + + orig_len = len; + for (i = 0; i < ncookies; i++) { + unsigned long cookie_raddr = cookies[i].cookie_addr; + unsigned long this_len = cookies[i].cookie_size; + unsigned long actual_len; + + if (unlikely(offset)) { + unsigned long this_off = offset; + + if (this_off > this_len) + this_off = this_len; + + offset -= this_off; + this_len -= this_off; + if (!this_len) + continue; + cookie_raddr += this_off; + } + + if (this_len > len) + this_len = len; + + while (1) { + unsigned long hv_err; + + hv_err = sun4v_ldc_copy(lp->id, copy_dir, + cookie_raddr, ra, + this_len, &actual_len); + if (unlikely(hv_err)) { + printk(KERN_ERR PFX "ldc_copy: ID[%lu] " + "HV error %lu\n", + lp->id, hv_err); + if (lp->hs_state != LDC_HS_COMPLETE || + (lp->flags & LDC_FLAG_RESET)) + return -ECONNRESET; + else + return -EFAULT; + } + + cookie_raddr += actual_len; + ra += actual_len; + len -= actual_len; + if (actual_len == this_len) + break; + + this_len -= actual_len; + } + + if (!len) + break; + } + + /* It is caller policy what to do about short copies. + * For example, a networking driver can declare the + * packet a runt and drop it. + */ + + return orig_len - len; +} +EXPORT_SYMBOL(ldc_copy); + +void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len, + struct ldc_trans_cookie *cookies, int *ncookies, + unsigned int map_perm) +{ + void *buf; + int err; + + if (len & (8UL - 1)) + return ERR_PTR(-EINVAL); + + buf = kzalloc(len, GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm); + if (err < 0) { + kfree(buf); + return ERR_PTR(err); + } + *ncookies = err; + + return buf; +} +EXPORT_SYMBOL(ldc_alloc_exp_dring); + +void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len, + struct ldc_trans_cookie *cookies, int ncookies) +{ + ldc_unmap(lp, cookies, ncookies); + kfree(buf); +} +EXPORT_SYMBOL(ldc_free_exp_dring); + +static int __init ldc_init(void) +{ + unsigned long major, minor; + struct mdesc_handle *hp; + const u64 *v; + int err; + u64 mp; + + hp = mdesc_grab(); + if (!hp) + return -ENODEV; + + mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform"); + err = -ENODEV; + if (mp == MDESC_NODE_NULL) + goto out; + + v = mdesc_get_property(hp, mp, "domaining-enabled", NULL); + if (!v) + goto out; + + major = 1; + minor = 0; + if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) { + printk(KERN_INFO PFX "Could not register LDOM hvapi.\n"); + goto out; + } + + printk(KERN_INFO "%s", version); + + if (!*v) { + printk(KERN_INFO PFX "Domaining disabled.\n"); + goto out; + } + ldom_domaining_enabled = 1; + err = 0; + +out: + mdesc_release(hp); + return err; +} + +core_initcall(ldc_init); diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c new file mode 100644 index 000000000000..dde52bcf5c64 --- /dev/null +++ b/arch/sparc/kernel/mdesc.c @@ -0,0 +1,916 @@ +/* mdesc.c: Sun4V machine description handling. + * + * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net> + */ +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/lmb.h> +#include <linux/log2.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/miscdevice.h> + +#include <asm/hypervisor.h> +#include <asm/mdesc.h> +#include <asm/prom.h> +#include <asm/oplib.h> +#include <asm/smp.h> + +/* Unlike the OBP device tree, the machine description is a full-on + * DAG. An arbitrary number of ARCs are possible from one + * node to other nodes and thus we can't use the OBP device_node + * data structure to represent these nodes inside of the kernel. + * + * Actually, it isn't even a DAG, because there are back pointers + * which create cycles in the graph. + * + * mdesc_hdr and mdesc_elem describe the layout of the data structure + * we get from the Hypervisor. + */ +struct mdesc_hdr { + u32 version; /* Transport version */ + u32 node_sz; /* node block size */ + u32 name_sz; /* name block size */ + u32 data_sz; /* data block size */ +} __attribute__((aligned(16))); + +struct mdesc_elem { + u8 tag; +#define MD_LIST_END 0x00 +#define MD_NODE 0x4e +#define MD_NODE_END 0x45 +#define MD_NOOP 0x20 +#define MD_PROP_ARC 0x61 +#define MD_PROP_VAL 0x76 +#define MD_PROP_STR 0x73 +#define MD_PROP_DATA 0x64 + u8 name_len; + u16 resv; + u32 name_offset; + union { + struct { + u32 data_len; + u32 data_offset; + } data; + u64 val; + } d; +}; + +struct mdesc_mem_ops { + struct mdesc_handle *(*alloc)(unsigned int mdesc_size); + void (*free)(struct mdesc_handle *handle); +}; + +struct mdesc_handle { + struct list_head list; + struct mdesc_mem_ops *mops; + void *self_base; + atomic_t refcnt; + unsigned int handle_size; + struct mdesc_hdr mdesc; +}; + +static void mdesc_handle_init(struct mdesc_handle *hp, + unsigned int handle_size, + void *base) +{ + BUG_ON(((unsigned long)&hp->mdesc) & (16UL - 1)); + + memset(hp, 0, handle_size); + INIT_LIST_HEAD(&hp->list); + hp->self_base = base; + atomic_set(&hp->refcnt, 1); + hp->handle_size = handle_size; +} + +static struct mdesc_handle * __init mdesc_lmb_alloc(unsigned int mdesc_size) +{ + unsigned int handle_size, alloc_size; + struct mdesc_handle *hp; + unsigned long paddr; + + handle_size = (sizeof(struct mdesc_handle) - + sizeof(struct mdesc_hdr) + + mdesc_size); + alloc_size = PAGE_ALIGN(handle_size); + + paddr = lmb_alloc(alloc_size, PAGE_SIZE); + + hp = NULL; + if (paddr) { + hp = __va(paddr); + mdesc_handle_init(hp, handle_size, hp); + } + return hp; +} + +static void mdesc_lmb_free(struct mdesc_handle *hp) +{ + unsigned int alloc_size, handle_size = hp->handle_size; + unsigned long start, end; + + BUG_ON(atomic_read(&hp->refcnt) != 0); + BUG_ON(!list_empty(&hp->list)); + + alloc_size = PAGE_ALIGN(handle_size); + + start = (unsigned long) hp; + end = start + alloc_size; + + while (start < end) { + struct page *p; + + p = virt_to_page(start); + ClearPageReserved(p); + __free_page(p); + start += PAGE_SIZE; + } +} + +static struct mdesc_mem_ops lmb_mdesc_ops = { + .alloc = mdesc_lmb_alloc, + .free = mdesc_lmb_free, +}; + +static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) +{ + unsigned int handle_size; + void *base; + + handle_size = (sizeof(struct mdesc_handle) - + sizeof(struct mdesc_hdr) + + mdesc_size); + + base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_NOFAIL); + if (base) { + struct mdesc_handle *hp; + unsigned long addr; + + addr = (unsigned long)base; + addr = (addr + 15UL) & ~15UL; + hp = (struct mdesc_handle *) addr; + + mdesc_handle_init(hp, handle_size, base); + return hp; + } + + return NULL; +} + +static void mdesc_kfree(struct mdesc_handle *hp) +{ + BUG_ON(atomic_read(&hp->refcnt) != 0); + BUG_ON(!list_empty(&hp->list)); + + kfree(hp->self_base); +} + +static struct mdesc_mem_ops kmalloc_mdesc_memops = { + .alloc = mdesc_kmalloc, + .free = mdesc_kfree, +}; + +static struct mdesc_handle *mdesc_alloc(unsigned int mdesc_size, + struct mdesc_mem_ops *mops) +{ + struct mdesc_handle *hp = mops->alloc(mdesc_size); + + if (hp) + hp->mops = mops; + + return hp; +} + +static void mdesc_free(struct mdesc_handle *hp) +{ + hp->mops->free(hp); +} + +static struct mdesc_handle *cur_mdesc; +static LIST_HEAD(mdesc_zombie_list); +static DEFINE_SPINLOCK(mdesc_lock); + +struct mdesc_handle *mdesc_grab(void) +{ + struct mdesc_handle *hp; + unsigned long flags; + + spin_lock_irqsave(&mdesc_lock, flags); + hp = cur_mdesc; + if (hp) + atomic_inc(&hp->refcnt); + spin_unlock_irqrestore(&mdesc_lock, flags); + + return hp; +} +EXPORT_SYMBOL(mdesc_grab); + +void mdesc_release(struct mdesc_handle *hp) +{ + unsigned long flags; + + spin_lock_irqsave(&mdesc_lock, flags); + if (atomic_dec_and_test(&hp->refcnt)) { + list_del_init(&hp->list); + hp->mops->free(hp); + } + spin_unlock_irqrestore(&mdesc_lock, flags); +} +EXPORT_SYMBOL(mdesc_release); + +static DEFINE_MUTEX(mdesc_mutex); +static struct mdesc_notifier_client *client_list; + +void mdesc_register_notifier(struct mdesc_notifier_client *client) +{ + u64 node; + + mutex_lock(&mdesc_mutex); + client->next = client_list; + client_list = client; + + mdesc_for_each_node_by_name(cur_mdesc, node, client->node_name) + client->add(cur_mdesc, node); + + mutex_unlock(&mdesc_mutex); +} + +static const u64 *parent_cfg_handle(struct mdesc_handle *hp, u64 node) +{ + const u64 *id; + u64 a; + + id = NULL; + mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) { + u64 target; + + target = mdesc_arc_target(hp, a); + id = mdesc_get_property(hp, target, + "cfg-handle", NULL); + if (id) + break; + } + + return id; +} + +/* Run 'func' on nodes which are in A but not in B. */ +static void invoke_on_missing(const char *name, + struct mdesc_handle *a, + struct mdesc_handle *b, + void (*func)(struct mdesc_handle *, u64)) +{ + u64 node; + + mdesc_for_each_node_by_name(a, node, name) { + int found = 0, is_vdc_port = 0; + const char *name_prop; + const u64 *id; + u64 fnode; + + name_prop = mdesc_get_property(a, node, "name", NULL); + if (name_prop && !strcmp(name_prop, "vdc-port")) { + is_vdc_port = 1; + id = parent_cfg_handle(a, node); + } else + id = mdesc_get_property(a, node, "id", NULL); + + if (!id) { + printk(KERN_ERR "MD: Cannot find ID for %s node.\n", + (name_prop ? name_prop : name)); + continue; + } + + mdesc_for_each_node_by_name(b, fnode, name) { + const u64 *fid; + + if (is_vdc_port) { + name_prop = mdesc_get_property(b, fnode, + "name", NULL); + if (!name_prop || + strcmp(name_prop, "vdc-port")) + continue; + fid = parent_cfg_handle(b, fnode); + if (!fid) { + printk(KERN_ERR "MD: Cannot find ID " + "for vdc-port node.\n"); + continue; + } + } else + fid = mdesc_get_property(b, fnode, + "id", NULL); + + if (*id == *fid) { + found = 1; + break; + } + } + if (!found) + func(a, node); + } +} + +static void notify_one(struct mdesc_notifier_client *p, + struct mdesc_handle *old_hp, + struct mdesc_handle *new_hp) +{ + invoke_on_missing(p->node_name, old_hp, new_hp, p->remove); + invoke_on_missing(p->node_name, new_hp, old_hp, p->add); +} + +static void mdesc_notify_clients(struct mdesc_handle *old_hp, + struct mdesc_handle *new_hp) +{ + struct mdesc_notifier_client *p = client_list; + + while (p) { + notify_one(p, old_hp, new_hp); + p = p->next; + } +} + +void mdesc_update(void) +{ + unsigned long len, real_len, status; + struct mdesc_handle *hp, *orig_hp; + unsigned long flags; + + mutex_lock(&mdesc_mutex); + + (void) sun4v_mach_desc(0UL, 0UL, &len); + + hp = mdesc_alloc(len, &kmalloc_mdesc_memops); + if (!hp) { + printk(KERN_ERR "MD: mdesc alloc fails\n"); + goto out; + } + + status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len); + if (status != HV_EOK || real_len > len) { + printk(KERN_ERR "MD: mdesc reread fails with %lu\n", + status); + atomic_dec(&hp->refcnt); + mdesc_free(hp); + goto out; + } + + spin_lock_irqsave(&mdesc_lock, flags); + orig_hp = cur_mdesc; + cur_mdesc = hp; + spin_unlock_irqrestore(&mdesc_lock, flags); + + mdesc_notify_clients(orig_hp, hp); + + spin_lock_irqsave(&mdesc_lock, flags); + if (atomic_dec_and_test(&orig_hp->refcnt)) + mdesc_free(orig_hp); + else + list_add(&orig_hp->list, &mdesc_zombie_list); + spin_unlock_irqrestore(&mdesc_lock, flags); + +out: + mutex_unlock(&mdesc_mutex); +} + +static struct mdesc_elem *node_block(struct mdesc_hdr *mdesc) +{ + return (struct mdesc_elem *) (mdesc + 1); +} + +static void *name_block(struct mdesc_hdr *mdesc) +{ + return ((void *) node_block(mdesc)) + mdesc->node_sz; +} + +static void *data_block(struct mdesc_hdr *mdesc) +{ + return ((void *) name_block(mdesc)) + mdesc->name_sz; +} + +u64 mdesc_node_by_name(struct mdesc_handle *hp, + u64 from_node, const char *name) +{ + struct mdesc_elem *ep = node_block(&hp->mdesc); + const char *names = name_block(&hp->mdesc); + u64 last_node = hp->mdesc.node_sz / 16; + u64 ret; + + if (from_node == MDESC_NODE_NULL) { + ret = from_node = 0; + } else if (from_node >= last_node) { + return MDESC_NODE_NULL; + } else { + ret = ep[from_node].d.val; + } + + while (ret < last_node) { + if (ep[ret].tag != MD_NODE) + return MDESC_NODE_NULL; + if (!strcmp(names + ep[ret].name_offset, name)) + break; + ret = ep[ret].d.val; + } + if (ret >= last_node) + ret = MDESC_NODE_NULL; + return ret; +} +EXPORT_SYMBOL(mdesc_node_by_name); + +const void *mdesc_get_property(struct mdesc_handle *hp, u64 node, + const char *name, int *lenp) +{ + const char *names = name_block(&hp->mdesc); + u64 last_node = hp->mdesc.node_sz / 16; + void *data = data_block(&hp->mdesc); + struct mdesc_elem *ep; + + if (node == MDESC_NODE_NULL || node >= last_node) + return NULL; + + ep = node_block(&hp->mdesc) + node; + ep++; + for (; ep->tag != MD_NODE_END; ep++) { + void *val = NULL; + int len = 0; + + switch (ep->tag) { + case MD_PROP_VAL: + val = &ep->d.val; + len = 8; + break; + + case MD_PROP_STR: + case MD_PROP_DATA: + val = data + ep->d.data.data_offset; + len = ep->d.data.data_len; + break; + + default: + break; + } + if (!val) + continue; + + if (!strcmp(names + ep->name_offset, name)) { + if (lenp) + *lenp = len; + return val; + } + } + + return NULL; +} +EXPORT_SYMBOL(mdesc_get_property); + +u64 mdesc_next_arc(struct mdesc_handle *hp, u64 from, const char *arc_type) +{ + struct mdesc_elem *ep, *base = node_block(&hp->mdesc); + const char *names = name_block(&hp->mdesc); + u64 last_node = hp->mdesc.node_sz / 16; + + if (from == MDESC_NODE_NULL || from >= last_node) + return MDESC_NODE_NULL; + + ep = base + from; + + ep++; + for (; ep->tag != MD_NODE_END; ep++) { + if (ep->tag != MD_PROP_ARC) + continue; + + if (strcmp(names + ep->name_offset, arc_type)) + continue; + + return ep - base; + } + + return MDESC_NODE_NULL; +} +EXPORT_SYMBOL(mdesc_next_arc); + +u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc) +{ + struct mdesc_elem *ep, *base = node_block(&hp->mdesc); + + ep = base + arc; + + return ep->d.val; +} +EXPORT_SYMBOL(mdesc_arc_target); + +const char *mdesc_node_name(struct mdesc_handle *hp, u64 node) +{ + struct mdesc_elem *ep, *base = node_block(&hp->mdesc); + const char *names = name_block(&hp->mdesc); + u64 last_node = hp->mdesc.node_sz / 16; + + if (node == MDESC_NODE_NULL || node >= last_node) + return NULL; + + ep = base + node; + if (ep->tag != MD_NODE) + return NULL; + + return names + ep->name_offset; +} +EXPORT_SYMBOL(mdesc_node_name); + +static void __init report_platform_properties(void) +{ + struct mdesc_handle *hp = mdesc_grab(); + u64 pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform"); + const char *s; + const u64 *v; + + if (pn == MDESC_NODE_NULL) { + prom_printf("No platform node in machine-description.\n"); + prom_halt(); + } + + s = mdesc_get_property(hp, pn, "banner-name", NULL); + printk("PLATFORM: banner-name [%s]\n", s); + s = mdesc_get_property(hp, pn, "name", NULL); + printk("PLATFORM: name [%s]\n", s); + + v = mdesc_get_property(hp, pn, "hostid", NULL); + if (v) + printk("PLATFORM: hostid [%08lx]\n", *v); + v = mdesc_get_property(hp, pn, "serial#", NULL); + if (v) + printk("PLATFORM: serial# [%08lx]\n", *v); + v = mdesc_get_property(hp, pn, "stick-frequency", NULL); + printk("PLATFORM: stick-frequency [%08lx]\n", *v); + v = mdesc_get_property(hp, pn, "mac-address", NULL); + if (v) + printk("PLATFORM: mac-address [%lx]\n", *v); + v = mdesc_get_property(hp, pn, "watchdog-resolution", NULL); + if (v) + printk("PLATFORM: watchdog-resolution [%lu ms]\n", *v); + v = mdesc_get_property(hp, pn, "watchdog-max-timeout", NULL); + if (v) + printk("PLATFORM: watchdog-max-timeout [%lu ms]\n", *v); + v = mdesc_get_property(hp, pn, "max-cpus", NULL); + if (v) + printk("PLATFORM: max-cpus [%lu]\n", *v); + +#ifdef CONFIG_SMP + { + int max_cpu, i; + + if (v) { + max_cpu = *v; + if (max_cpu > NR_CPUS) + max_cpu = NR_CPUS; + } else { + max_cpu = NR_CPUS; + } + for (i = 0; i < max_cpu; i++) + cpu_set(i, cpu_possible_map); + } +#endif + + mdesc_release(hp); +} + +static void __devinit fill_in_one_cache(cpuinfo_sparc *c, + struct mdesc_handle *hp, + u64 mp) +{ + const u64 *level = mdesc_get_property(hp, mp, "level", NULL); + const u64 *size = mdesc_get_property(hp, mp, "size", NULL); + const u64 *line_size = mdesc_get_property(hp, mp, "line-size", NULL); + const char *type; + int type_len; + + type = mdesc_get_property(hp, mp, "type", &type_len); + + switch (*level) { + case 1: + if (of_find_in_proplist(type, "instn", type_len)) { + c->icache_size = *size; + c->icache_line_size = *line_size; + } else if (of_find_in_proplist(type, "data", type_len)) { + c->dcache_size = *size; + c->dcache_line_size = *line_size; + } + break; + + case 2: + c->ecache_size = *size; + c->ecache_line_size = *line_size; + break; + + default: + break; + } + + if (*level == 1) { + u64 a; + + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { + u64 target = mdesc_arc_target(hp, a); + const char *name = mdesc_node_name(hp, target); + + if (!strcmp(name, "cache")) + fill_in_one_cache(c, hp, target); + } + } +} + +static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp, + int core_id) +{ + u64 a; + + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) { + u64 t = mdesc_arc_target(hp, a); + const char *name; + const u64 *id; + + name = mdesc_node_name(hp, t); + if (!strcmp(name, "cpu")) { + id = mdesc_get_property(hp, t, "id", NULL); + if (*id < NR_CPUS) + cpu_data(*id).core_id = core_id; + } else { + u64 j; + + mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) { + u64 n = mdesc_arc_target(hp, j); + const char *n_name; + + n_name = mdesc_node_name(hp, n); + if (strcmp(n_name, "cpu")) + continue; + + id = mdesc_get_property(hp, n, "id", NULL); + if (*id < NR_CPUS) + cpu_data(*id).core_id = core_id; + } + } + } +} + +static void __devinit set_core_ids(struct mdesc_handle *hp) +{ + int idx; + u64 mp; + + idx = 1; + mdesc_for_each_node_by_name(hp, mp, "cache") { + const u64 *level; + const char *type; + int len; + + level = mdesc_get_property(hp, mp, "level", NULL); + if (*level != 1) + continue; + + type = mdesc_get_property(hp, mp, "type", &len); + if (!of_find_in_proplist(type, "instn", len)) + continue; + + mark_core_ids(hp, mp, idx); + + idx++; + } +} + +static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp, + int proc_id) +{ + u64 a; + + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) { + u64 t = mdesc_arc_target(hp, a); + const char *name; + const u64 *id; + + name = mdesc_node_name(hp, t); + if (strcmp(name, "cpu")) + continue; + + id = mdesc_get_property(hp, t, "id", NULL); + if (*id < NR_CPUS) + cpu_data(*id).proc_id = proc_id; + } +} + +static void __devinit __set_proc_ids(struct mdesc_handle *hp, + const char *exec_unit_name) +{ + int idx; + u64 mp; + + idx = 0; + mdesc_for_each_node_by_name(hp, mp, exec_unit_name) { + const char *type; + int len; + + type = mdesc_get_property(hp, mp, "type", &len); + if (!of_find_in_proplist(type, "int", len) && + !of_find_in_proplist(type, "integer", len)) + continue; + + mark_proc_ids(hp, mp, idx); + + idx++; + } +} + +static void __devinit set_proc_ids(struct mdesc_handle *hp) +{ + __set_proc_ids(hp, "exec_unit"); + __set_proc_ids(hp, "exec-unit"); +} + +static void __devinit get_one_mondo_bits(const u64 *p, unsigned int *mask, + unsigned char def) +{ + u64 val; + + if (!p) + goto use_default; + val = *p; + + if (!val || val >= 64) + goto use_default; + + *mask = ((1U << val) * 64U) - 1U; + return; + +use_default: + *mask = ((1U << def) * 64U) - 1U; +} + +static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp, + struct trap_per_cpu *tb) +{ + const u64 *val; + + val = mdesc_get_property(hp, mp, "q-cpu-mondo-#bits", NULL); + get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7); + + val = mdesc_get_property(hp, mp, "q-dev-mondo-#bits", NULL); + get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7); + + val = mdesc_get_property(hp, mp, "q-resumable-#bits", NULL); + get_one_mondo_bits(val, &tb->resum_qmask, 6); + + val = mdesc_get_property(hp, mp, "q-nonresumable-#bits", NULL); + get_one_mondo_bits(val, &tb->nonresum_qmask, 2); +} + +void __cpuinit mdesc_fill_in_cpu_data(cpumask_t mask) +{ + struct mdesc_handle *hp = mdesc_grab(); + u64 mp; + + ncpus_probed = 0; + mdesc_for_each_node_by_name(hp, mp, "cpu") { + const u64 *id = mdesc_get_property(hp, mp, "id", NULL); + const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL); + struct trap_per_cpu *tb; + cpuinfo_sparc *c; + int cpuid; + u64 a; + + ncpus_probed++; + + cpuid = *id; + +#ifdef CONFIG_SMP + if (cpuid >= NR_CPUS) { + printk(KERN_WARNING "Ignoring CPU %d which is " + ">= NR_CPUS (%d)\n", + cpuid, NR_CPUS); + continue; + } + if (!cpu_isset(cpuid, mask)) + continue; +#else + /* On uniprocessor we only want the values for the + * real physical cpu the kernel booted onto, however + * cpu_data() only has one entry at index 0. + */ + if (cpuid != real_hard_smp_processor_id()) + continue; + cpuid = 0; +#endif + + c = &cpu_data(cpuid); + c->clock_tick = *cfreq; + + tb = &trap_block[cpuid]; + get_mondo_data(hp, mp, tb); + + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { + u64 j, t = mdesc_arc_target(hp, a); + const char *t_name; + + t_name = mdesc_node_name(hp, t); + if (!strcmp(t_name, "cache")) { + fill_in_one_cache(c, hp, t); + continue; + } + + mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_FWD) { + u64 n = mdesc_arc_target(hp, j); + const char *n_name; + + n_name = mdesc_node_name(hp, n); + if (!strcmp(n_name, "cache")) + fill_in_one_cache(c, hp, n); + } + } + +#ifdef CONFIG_SMP + cpu_set(cpuid, cpu_present_map); +#endif + + c->core_id = 0; + c->proc_id = -1; + } + +#ifdef CONFIG_SMP + sparc64_multi_core = 1; +#endif + + set_core_ids(hp); + set_proc_ids(hp); + + smp_fill_in_sib_core_maps(); + + mdesc_release(hp); +} + +static ssize_t mdesc_read(struct file *file, char __user *buf, + size_t len, loff_t *offp) +{ + struct mdesc_handle *hp = mdesc_grab(); + int err; + + if (!hp) + return -ENODEV; + + err = hp->handle_size; + if (len < hp->handle_size) + err = -EMSGSIZE; + else if (copy_to_user(buf, &hp->mdesc, hp->handle_size)) + err = -EFAULT; + mdesc_release(hp); + + return err; +} + +static const struct file_operations mdesc_fops = { + .read = mdesc_read, + .owner = THIS_MODULE, +}; + +static struct miscdevice mdesc_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "mdesc", + .fops = &mdesc_fops, +}; + +static int __init mdesc_misc_init(void) +{ + return misc_register(&mdesc_misc); +} + +__initcall(mdesc_misc_init); + +void __init sun4v_mdesc_init(void) +{ + struct mdesc_handle *hp; + unsigned long len, real_len, status; + cpumask_t mask; + + (void) sun4v_mach_desc(0UL, 0UL, &len); + + printk("MDESC: Size is %lu bytes.\n", len); + + hp = mdesc_alloc(len, &lmb_mdesc_ops); + if (hp == NULL) { + prom_printf("MDESC: alloc of %lu bytes failed.\n", len); + prom_halt(); + } + + status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len); + if (status != HV_EOK || real_len > len) { + prom_printf("sun4v_mach_desc fails, err(%lu), " + "len(%lu), real_len(%lu)\n", + status, len, real_len); + mdesc_free(hp); + prom_halt(); + } + + cur_mdesc = hp; + + report_platform_properties(); + + cpus_setall(mask); + mdesc_fill_in_cpu_data(mask); +} diff --git a/arch/sparc/kernel/misctrap.S b/arch/sparc/kernel/misctrap.S new file mode 100644 index 000000000000..753b4f031bfb --- /dev/null +++ b/arch/sparc/kernel/misctrap.S @@ -0,0 +1,97 @@ +#ifdef CONFIG_KGDB + .globl arch_kgdb_breakpoint + .type arch_kgdb_breakpoint,#function +arch_kgdb_breakpoint: + ta 0x72 + retl + nop + .size arch_kgdb_breakpoint,.-arch_kgdb_breakpoint +#endif + + .type __do_privact,#function +__do_privact: + mov TLB_SFSR, %g3 + stxa %g0, [%g3] ASI_DMMU ! Clear FaultValid bit + membar #Sync + sethi %hi(109f), %g7 + ba,pt %xcc, etrap +109: or %g7, %lo(109b), %g7 + call do_privact + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + .size __do_privact,.-__do_privact + + .type do_mna,#function +do_mna: + rdpr %tl, %g3 + cmp %g3, 1 + + /* Setup %g4/%g5 now as they are used in the + * winfixup code. + */ + mov TLB_SFSR, %g3 + mov DMMU_SFAR, %g4 + ldxa [%g4] ASI_DMMU, %g4 + ldxa [%g3] ASI_DMMU, %g5 + stxa %g0, [%g3] ASI_DMMU ! Clear FaultValid bit + membar #Sync + bgu,pn %icc, winfix_mna + rdpr %tpc, %g3 + +1: sethi %hi(109f), %g7 + ba,pt %xcc, etrap +109: or %g7, %lo(109b), %g7 + mov %l4, %o1 + mov %l5, %o2 + call mem_address_unaligned + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + .size do_mna,.-do_mna + + .type do_lddfmna,#function +do_lddfmna: + sethi %hi(109f), %g7 + mov TLB_SFSR, %g4 + ldxa [%g4] ASI_DMMU, %g5 + stxa %g0, [%g4] ASI_DMMU ! Clear FaultValid bit + membar #Sync + mov DMMU_SFAR, %g4 + ldxa [%g4] ASI_DMMU, %g4 + ba,pt %xcc, etrap +109: or %g7, %lo(109b), %g7 + mov %l4, %o1 + mov %l5, %o2 + call handle_lddfmna + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + .size do_lddfmna,.-do_lddfmna + + .type do_stdfmna,#function +do_stdfmna: + sethi %hi(109f), %g7 + mov TLB_SFSR, %g4 + ldxa [%g4] ASI_DMMU, %g5 + stxa %g0, [%g4] ASI_DMMU ! Clear FaultValid bit + membar #Sync + mov DMMU_SFAR, %g4 + ldxa [%g4] ASI_DMMU, %g4 + ba,pt %xcc, etrap +109: or %g7, %lo(109b), %g7 + mov %l4, %o1 + mov %l5, %o2 + call handle_stdfmna + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + .size do_stdfmna,.-do_stdfmna + + .type breakpoint_trap,#function +breakpoint_trap: + call sparc_breakpoint + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + .size breakpoint_trap,.-breakpoint_trap diff --git a/arch/sparc/kernel/module_64.c b/arch/sparc/kernel/module_64.c new file mode 100644 index 000000000000..158484bf5999 --- /dev/null +++ b/arch/sparc/kernel/module_64.c @@ -0,0 +1,213 @@ +/* Kernel module help for sparc64. + * + * Copyright (C) 2001 Rusty Russell. + * Copyright (C) 2002 David S. Miller. + */ + +#include <linux/moduleloader.h> +#include <linux/kernel.h> +#include <linux/elf.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/mm.h> + +#include <asm/processor.h> +#include <asm/spitfire.h> + +static void *module_map(unsigned long size) +{ + struct vm_struct *area; + + size = PAGE_ALIGN(size); + if (!size || size > MODULES_LEN) + return NULL; + + area = __get_vm_area(size, VM_ALLOC, MODULES_VADDR, MODULES_END); + if (!area) + return NULL; + + return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL); +} + +void *module_alloc(unsigned long size) +{ + void *ret; + + /* We handle the zero case fine, unlike vmalloc */ + if (size == 0) + return NULL; + + ret = module_map(size); + if (!ret) + ret = ERR_PTR(-ENOMEM); + else + memset(ret, 0, size); + + return ret; +} + +/* Free memory returned from module_core_alloc/module_init_alloc */ +void module_free(struct module *mod, void *module_region) +{ + vfree(module_region); + /* FIXME: If module_region == mod->init_region, trim exception + table entries. */ +} + +/* Make generic code ignore STT_REGISTER dummy undefined symbols. */ +int module_frob_arch_sections(Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, + char *secstrings, + struct module *mod) +{ + unsigned int symidx; + Elf64_Sym *sym; + const char *strtab; + int i; + + for (symidx = 0; sechdrs[symidx].sh_type != SHT_SYMTAB; symidx++) { + if (symidx == hdr->e_shnum-1) { + printk("%s: no symtab found.\n", mod->name); + return -ENOEXEC; + } + } + sym = (Elf64_Sym *)sechdrs[symidx].sh_addr; + strtab = (char *)sechdrs[sechdrs[symidx].sh_link].sh_addr; + + for (i = 1; i < sechdrs[symidx].sh_size / sizeof(Elf_Sym); i++) { + if (sym[i].st_shndx == SHN_UNDEF && + ELF64_ST_TYPE(sym[i].st_info) == STT_REGISTER) + sym[i].st_shndx = SHN_ABS; + } + return 0; +} + +int apply_relocate(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + printk(KERN_ERR "module %s: non-ADD RELOCATION unsupported\n", + me->name); + return -ENOEXEC; +} + +int apply_relocate_add(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + unsigned int i; + Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; + Elf64_Sym *sym; + u8 *location; + u32 *loc32; + + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + Elf64_Addr v; + + /* This is where to make the change */ + location = (u8 *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rel[i].r_offset; + loc32 = (u32 *) location; + + BUG_ON(((u64)location >> (u64)32) != (u64)0); + + /* This is the symbol it is referring to. Note that all + undefined symbols have been resolved. */ + sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + + ELF64_R_SYM(rel[i].r_info); + v = sym->st_value + rel[i].r_addend; + + switch (ELF64_R_TYPE(rel[i].r_info) & 0xff) { + case R_SPARC_64: + location[0] = v >> 56; + location[1] = v >> 48; + location[2] = v >> 40; + location[3] = v >> 32; + location[4] = v >> 24; + location[5] = v >> 16; + location[6] = v >> 8; + location[7] = v >> 0; + break; + + case R_SPARC_32: + location[0] = v >> 24; + location[1] = v >> 16; + location[2] = v >> 8; + location[3] = v >> 0; + break; + + case R_SPARC_DISP32: + v -= (Elf64_Addr) location; + *loc32 = v; + break; + + case R_SPARC_WDISP30: + v -= (Elf64_Addr) location; + *loc32 = (*loc32 & ~0x3fffffff) | + ((v >> 2) & 0x3fffffff); + break; + + case R_SPARC_WDISP22: + v -= (Elf64_Addr) location; + *loc32 = (*loc32 & ~0x3fffff) | + ((v >> 2) & 0x3fffff); + break; + + case R_SPARC_WDISP19: + v -= (Elf64_Addr) location; + *loc32 = (*loc32 & ~0x7ffff) | + ((v >> 2) & 0x7ffff); + break; + + case R_SPARC_LO10: + *loc32 = (*loc32 & ~0x3ff) | (v & 0x3ff); + break; + + case R_SPARC_HI22: + *loc32 = (*loc32 & ~0x3fffff) | + ((v >> 10) & 0x3fffff); + break; + + case R_SPARC_OLO10: + *loc32 = (*loc32 & ~0x1fff) | + (((v & 0x3ff) + + (ELF64_R_TYPE(rel[i].r_info) >> 8)) + & 0x1fff); + break; + + default: + printk(KERN_ERR "module %s: Unknown relocation: %x\n", + me->name, + (int) (ELF64_R_TYPE(rel[i].r_info) & 0xff)); + return -ENOEXEC; + }; + } + return 0; +} + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + /* Cheetah's I-cache is fully coherent. */ + if (tlb_type == spitfire) { + unsigned long va; + + flushw_all(); + for (va = 0; va < (PAGE_SIZE << 1); va += 32) + spitfire_put_icache_tag(va, 0x0); + __asm__ __volatile__("flush %g6"); + } + + return 0; +} + +void module_arch_cleanup(struct module *mod) +{ +} diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c new file mode 100644 index 000000000000..0f616ae3246c --- /dev/null +++ b/arch/sparc/kernel/of_device_64.c @@ -0,0 +1,898 @@ +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mod_devicetable.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/irq.h> +#include <linux/of_device.h> +#include <linux/of_platform.h> + +void __iomem *of_ioremap(struct resource *res, unsigned long offset, unsigned long size, char *name) +{ + unsigned long ret = res->start + offset; + struct resource *r; + + if (res->flags & IORESOURCE_MEM) + r = request_mem_region(ret, size, name); + else + r = request_region(ret, size, name); + if (!r) + ret = 0; + + return (void __iomem *) ret; +} +EXPORT_SYMBOL(of_ioremap); + +void of_iounmap(struct resource *res, void __iomem *base, unsigned long size) +{ + if (res->flags & IORESOURCE_MEM) + release_mem_region((unsigned long) base, size); + else + release_region((unsigned long) base, size); +} +EXPORT_SYMBOL(of_iounmap); + +static int node_match(struct device *dev, void *data) +{ + struct of_device *op = to_of_device(dev); + struct device_node *dp = data; + + return (op->node == dp); +} + +struct of_device *of_find_device_by_node(struct device_node *dp) +{ + struct device *dev = bus_find_device(&of_platform_bus_type, NULL, + dp, node_match); + + if (dev) + return to_of_device(dev); + + return NULL; +} +EXPORT_SYMBOL(of_find_device_by_node); + +unsigned int irq_of_parse_and_map(struct device_node *node, int index) +{ + struct of_device *op = of_find_device_by_node(node); + + if (!op || index >= op->num_irqs) + return 0; + + return op->irqs[index]; +} +EXPORT_SYMBOL(irq_of_parse_and_map); + +/* Take the archdata values for IOMMU, STC, and HOSTDATA found in + * BUS and propagate to all child of_device objects. + */ +void of_propagate_archdata(struct of_device *bus) +{ + struct dev_archdata *bus_sd = &bus->dev.archdata; + struct device_node *bus_dp = bus->node; + struct device_node *dp; + + for (dp = bus_dp->child; dp; dp = dp->sibling) { + struct of_device *op = of_find_device_by_node(dp); + + op->dev.archdata.iommu = bus_sd->iommu; + op->dev.archdata.stc = bus_sd->stc; + op->dev.archdata.host_controller = bus_sd->host_controller; + op->dev.archdata.numa_node = bus_sd->numa_node; + + if (dp->child) + of_propagate_archdata(op); + } +} + +struct bus_type of_platform_bus_type; +EXPORT_SYMBOL(of_platform_bus_type); + +static inline u64 of_read_addr(const u32 *cell, int size) +{ + u64 r = 0; + while (size--) + r = (r << 32) | *(cell++); + return r; +} + +static void __init get_cells(struct device_node *dp, + int *addrc, int *sizec) +{ + if (addrc) + *addrc = of_n_addr_cells(dp); + if (sizec) + *sizec = of_n_size_cells(dp); +} + +/* Max address size we deal with */ +#define OF_MAX_ADDR_CELLS 4 + +struct of_bus { + const char *name; + const char *addr_prop_name; + int (*match)(struct device_node *parent); + void (*count_cells)(struct device_node *child, + int *addrc, int *sizec); + int (*map)(u32 *addr, const u32 *range, + int na, int ns, int pna); + unsigned long (*get_flags)(const u32 *addr, unsigned long); +}; + +/* + * Default translator (generic bus) + */ + +static void of_bus_default_count_cells(struct device_node *dev, + int *addrc, int *sizec) +{ + get_cells(dev, addrc, sizec); +} + +/* Make sure the least significant 64-bits are in-range. Even + * for 3 or 4 cell values it is a good enough approximation. + */ +static int of_out_of_range(const u32 *addr, const u32 *base, + const u32 *size, int na, int ns) +{ + u64 a = of_read_addr(addr, na); + u64 b = of_read_addr(base, na); + + if (a < b) + return 1; + + b += of_read_addr(size, ns); + if (a >= b) + return 1; + + return 0; +} + +static int of_bus_default_map(u32 *addr, const u32 *range, + int na, int ns, int pna) +{ + u32 result[OF_MAX_ADDR_CELLS]; + int i; + + if (ns > 2) { + printk("of_device: Cannot handle size cells (%d) > 2.", ns); + return -EINVAL; + } + + if (of_out_of_range(addr, range, range + na + pna, na, ns)) + return -EINVAL; + + /* Start with the parent range base. */ + memcpy(result, range + na, pna * 4); + + /* Add in the child address offset. */ + for (i = 0; i < na; i++) + result[pna - 1 - i] += + (addr[na - 1 - i] - + range[na - 1 - i]); + + memcpy(addr, result, pna * 4); + + return 0; +} + +static unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags) +{ + if (flags) + return flags; + return IORESOURCE_MEM; +} + +/* + * PCI bus specific translator + */ + +static int of_bus_pci_match(struct device_node *np) +{ + if (!strcmp(np->name, "pci")) { + const char *model = of_get_property(np, "model", NULL); + + if (model && !strcmp(model, "SUNW,simba")) + return 0; + + /* Do not do PCI specific frobbing if the + * PCI bridge lacks a ranges property. We + * want to pass it through up to the next + * parent as-is, not with the PCI translate + * method which chops off the top address cell. + */ + if (!of_find_property(np, "ranges", NULL)) + return 0; + + return 1; + } + + return 0; +} + +static int of_bus_simba_match(struct device_node *np) +{ + const char *model = of_get_property(np, "model", NULL); + + if (model && !strcmp(model, "SUNW,simba")) + return 1; + + /* Treat PCI busses lacking ranges property just like + * simba. + */ + if (!strcmp(np->name, "pci")) { + if (!of_find_property(np, "ranges", NULL)) + return 1; + } + + return 0; +} + +static int of_bus_simba_map(u32 *addr, const u32 *range, + int na, int ns, int pna) +{ + return 0; +} + +static void of_bus_pci_count_cells(struct device_node *np, + int *addrc, int *sizec) +{ + if (addrc) + *addrc = 3; + if (sizec) + *sizec = 2; +} + +static int of_bus_pci_map(u32 *addr, const u32 *range, + int na, int ns, int pna) +{ + u32 result[OF_MAX_ADDR_CELLS]; + int i; + + /* Check address type match */ + if ((addr[0] ^ range[0]) & 0x03000000) + return -EINVAL; + + if (of_out_of_range(addr + 1, range + 1, range + na + pna, + na - 1, ns)) + return -EINVAL; + + /* Start with the parent range base. */ + memcpy(result, range + na, pna * 4); + + /* Add in the child address offset, skipping high cell. */ + for (i = 0; i < na - 1; i++) + result[pna - 1 - i] += + (addr[na - 1 - i] - + range[na - 1 - i]); + + memcpy(addr, result, pna * 4); + + return 0; +} + +static unsigned long of_bus_pci_get_flags(const u32 *addr, unsigned long flags) +{ + u32 w = addr[0]; + + /* For PCI, we override whatever child busses may have used. */ + flags = 0; + switch((w >> 24) & 0x03) { + case 0x01: + flags |= IORESOURCE_IO; + break; + + case 0x02: /* 32 bits */ + case 0x03: /* 64 bits */ + flags |= IORESOURCE_MEM; + break; + } + if (w & 0x40000000) + flags |= IORESOURCE_PREFETCH; + return flags; +} + +/* + * SBUS bus specific translator + */ + +static int of_bus_sbus_match(struct device_node *np) +{ + return !strcmp(np->name, "sbus") || + !strcmp(np->name, "sbi"); +} + +static void of_bus_sbus_count_cells(struct device_node *child, + int *addrc, int *sizec) +{ + if (addrc) + *addrc = 2; + if (sizec) + *sizec = 1; +} + +/* + * FHC/Central bus specific translator. + * + * This is just needed to hard-code the address and size cell + * counts. 'fhc' and 'central' nodes lack the #address-cells and + * #size-cells properties, and if you walk to the root on such + * Enterprise boxes all you'll get is a #size-cells of 2 which is + * not what we want to use. + */ +static int of_bus_fhc_match(struct device_node *np) +{ + return !strcmp(np->name, "fhc") || + !strcmp(np->name, "central"); +} + +#define of_bus_fhc_count_cells of_bus_sbus_count_cells + +/* + * Array of bus specific translators + */ + +static struct of_bus of_busses[] = { + /* PCI */ + { + .name = "pci", + .addr_prop_name = "assigned-addresses", + .match = of_bus_pci_match, + .count_cells = of_bus_pci_count_cells, + .map = of_bus_pci_map, + .get_flags = of_bus_pci_get_flags, + }, + /* SIMBA */ + { + .name = "simba", + .addr_prop_name = "assigned-addresses", + .match = of_bus_simba_match, + .count_cells = of_bus_pci_count_cells, + .map = of_bus_simba_map, + .get_flags = of_bus_pci_get_flags, + }, + /* SBUS */ + { + .name = "sbus", + .addr_prop_name = "reg", + .match = of_bus_sbus_match, + .count_cells = of_bus_sbus_count_cells, + .map = of_bus_default_map, + .get_flags = of_bus_default_get_flags, + }, + /* FHC */ + { + .name = "fhc", + .addr_prop_name = "reg", + .match = of_bus_fhc_match, + .count_cells = of_bus_fhc_count_cells, + .map = of_bus_default_map, + .get_flags = of_bus_default_get_flags, + }, + /* Default */ + { + .name = "default", + .addr_prop_name = "reg", + .match = NULL, + .count_cells = of_bus_default_count_cells, + .map = of_bus_default_map, + .get_flags = of_bus_default_get_flags, + }, +}; + +static struct of_bus *of_match_bus(struct device_node *np) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(of_busses); i ++) + if (!of_busses[i].match || of_busses[i].match(np)) + return &of_busses[i]; + BUG(); + return NULL; +} + +static int __init build_one_resource(struct device_node *parent, + struct of_bus *bus, + struct of_bus *pbus, + u32 *addr, + int na, int ns, int pna) +{ + const u32 *ranges; + int rone, rlen; + + ranges = of_get_property(parent, "ranges", &rlen); + if (ranges == NULL || rlen == 0) { + u32 result[OF_MAX_ADDR_CELLS]; + int i; + + memset(result, 0, pna * 4); + for (i = 0; i < na; i++) + result[pna - 1 - i] = + addr[na - 1 - i]; + + memcpy(addr, result, pna * 4); + return 0; + } + + /* Now walk through the ranges */ + rlen /= 4; + rone = na + pna + ns; + for (; rlen >= rone; rlen -= rone, ranges += rone) { + if (!bus->map(addr, ranges, na, ns, pna)) + return 0; + } + + /* When we miss an I/O space match on PCI, just pass it up + * to the next PCI bridge and/or controller. + */ + if (!strcmp(bus->name, "pci") && + (addr[0] & 0x03000000) == 0x01000000) + return 0; + + return 1; +} + +static int __init use_1to1_mapping(struct device_node *pp) +{ + /* If we have a ranges property in the parent, use it. */ + if (of_find_property(pp, "ranges", NULL) != NULL) + return 0; + + /* If the parent is the dma node of an ISA bus, pass + * the translation up to the root. + * + * Some SBUS devices use intermediate nodes to express + * hierarchy within the device itself. These aren't + * real bus nodes, and don't have a 'ranges' property. + * But, we should still pass the translation work up + * to the SBUS itself. + */ + if (!strcmp(pp->name, "dma") || + !strcmp(pp->name, "espdma") || + !strcmp(pp->name, "ledma") || + !strcmp(pp->name, "lebuffer")) + return 0; + + /* Similarly for all PCI bridges, if we get this far + * it lacks a ranges property, and this will include + * cases like Simba. + */ + if (!strcmp(pp->name, "pci")) + return 0; + + return 1; +} + +static int of_resource_verbose; + +static void __init build_device_resources(struct of_device *op, + struct device *parent) +{ + struct of_device *p_op; + struct of_bus *bus; + int na, ns; + int index, num_reg; + const void *preg; + + if (!parent) + return; + + p_op = to_of_device(parent); + bus = of_match_bus(p_op->node); + bus->count_cells(op->node, &na, &ns); + + preg = of_get_property(op->node, bus->addr_prop_name, &num_reg); + if (!preg || num_reg == 0) + return; + + /* Convert to num-cells. */ + num_reg /= 4; + + /* Convert to num-entries. */ + num_reg /= na + ns; + + /* Prevent overrunning the op->resources[] array. */ + if (num_reg > PROMREG_MAX) { + printk(KERN_WARNING "%s: Too many regs (%d), " + "limiting to %d.\n", + op->node->full_name, num_reg, PROMREG_MAX); + num_reg = PROMREG_MAX; + } + + for (index = 0; index < num_reg; index++) { + struct resource *r = &op->resource[index]; + u32 addr[OF_MAX_ADDR_CELLS]; + const u32 *reg = (preg + (index * ((na + ns) * 4))); + struct device_node *dp = op->node; + struct device_node *pp = p_op->node; + struct of_bus *pbus, *dbus; + u64 size, result = OF_BAD_ADDR; + unsigned long flags; + int dna, dns; + int pna, pns; + + size = of_read_addr(reg + na, ns); + memcpy(addr, reg, na * 4); + + flags = bus->get_flags(addr, 0); + + if (use_1to1_mapping(pp)) { + result = of_read_addr(addr, na); + goto build_res; + } + + dna = na; + dns = ns; + dbus = bus; + + while (1) { + dp = pp; + pp = dp->parent; + if (!pp) { + result = of_read_addr(addr, dna); + break; + } + + pbus = of_match_bus(pp); + pbus->count_cells(dp, &pna, &pns); + + if (build_one_resource(dp, dbus, pbus, addr, + dna, dns, pna)) + break; + + flags = pbus->get_flags(addr, flags); + + dna = pna; + dns = pns; + dbus = pbus; + } + + build_res: + memset(r, 0, sizeof(*r)); + + if (of_resource_verbose) + printk("%s reg[%d] -> %lx\n", + op->node->full_name, index, + result); + + if (result != OF_BAD_ADDR) { + if (tlb_type == hypervisor) + result &= 0x0fffffffffffffffUL; + + r->start = result; + r->end = result + size - 1; + r->flags = flags; + } + r->name = op->node->name; + } +} + +static struct device_node * __init +apply_interrupt_map(struct device_node *dp, struct device_node *pp, + const u32 *imap, int imlen, const u32 *imask, + unsigned int *irq_p) +{ + struct device_node *cp; + unsigned int irq = *irq_p; + struct of_bus *bus; + phandle handle; + const u32 *reg; + int na, num_reg, i; + + bus = of_match_bus(pp); + bus->count_cells(dp, &na, NULL); + + reg = of_get_property(dp, "reg", &num_reg); + if (!reg || !num_reg) + return NULL; + + imlen /= ((na + 3) * 4); + handle = 0; + for (i = 0; i < imlen; i++) { + int j; + + for (j = 0; j < na; j++) { + if ((reg[j] & imask[j]) != imap[j]) + goto next; + } + if (imap[na] == irq) { + handle = imap[na + 1]; + irq = imap[na + 2]; + break; + } + + next: + imap += (na + 3); + } + if (i == imlen) { + /* Psycho and Sabre PCI controllers can have 'interrupt-map' + * properties that do not include the on-board device + * interrupts. Instead, the device's 'interrupts' property + * is already a fully specified INO value. + * + * Handle this by deciding that, if we didn't get a + * match in the parent's 'interrupt-map', and the + * parent is an IRQ translater, then use the parent as + * our IRQ controller. + */ + if (pp->irq_trans) + return pp; + + return NULL; + } + + *irq_p = irq; + cp = of_find_node_by_phandle(handle); + + return cp; +} + +static unsigned int __init pci_irq_swizzle(struct device_node *dp, + struct device_node *pp, + unsigned int irq) +{ + const struct linux_prom_pci_registers *regs; + unsigned int bus, devfn, slot, ret; + + if (irq < 1 || irq > 4) + return irq; + + regs = of_get_property(dp, "reg", NULL); + if (!regs) + return irq; + + bus = (regs->phys_hi >> 16) & 0xff; + devfn = (regs->phys_hi >> 8) & 0xff; + slot = (devfn >> 3) & 0x1f; + + if (pp->irq_trans) { + /* Derived from Table 8-3, U2P User's Manual. This branch + * is handling a PCI controller that lacks a proper set of + * interrupt-map and interrupt-map-mask properties. The + * Ultra-E450 is one example. + * + * The bit layout is BSSLL, where: + * B: 0 on bus A, 1 on bus B + * D: 2-bit slot number, derived from PCI device number as + * (dev - 1) for bus A, or (dev - 2) for bus B + * L: 2-bit line number + */ + if (bus & 0x80) { + /* PBM-A */ + bus = 0x00; + slot = (slot - 1) << 2; + } else { + /* PBM-B */ + bus = 0x10; + slot = (slot - 2) << 2; + } + irq -= 1; + + ret = (bus | slot | irq); + } else { + /* Going through a PCI-PCI bridge that lacks a set of + * interrupt-map and interrupt-map-mask properties. + */ + ret = ((irq - 1 + (slot & 3)) & 3) + 1; + } + + return ret; +} + +static int of_irq_verbose; + +static unsigned int __init build_one_device_irq(struct of_device *op, + struct device *parent, + unsigned int irq) +{ + struct device_node *dp = op->node; + struct device_node *pp, *ip; + unsigned int orig_irq = irq; + int nid; + + if (irq == 0xffffffff) + return irq; + + if (dp->irq_trans) { + irq = dp->irq_trans->irq_build(dp, irq, + dp->irq_trans->data); + + if (of_irq_verbose) + printk("%s: direct translate %x --> %x\n", + dp->full_name, orig_irq, irq); + + goto out; + } + + /* Something more complicated. Walk up to the root, applying + * interrupt-map or bus specific translations, until we hit + * an IRQ translator. + * + * If we hit a bus type or situation we cannot handle, we + * stop and assume that the original IRQ number was in a + * format which has special meaning to it's immediate parent. + */ + pp = dp->parent; + ip = NULL; + while (pp) { + const void *imap, *imsk; + int imlen; + + imap = of_get_property(pp, "interrupt-map", &imlen); + imsk = of_get_property(pp, "interrupt-map-mask", NULL); + if (imap && imsk) { + struct device_node *iret; + int this_orig_irq = irq; + + iret = apply_interrupt_map(dp, pp, + imap, imlen, imsk, + &irq); + + if (of_irq_verbose) + printk("%s: Apply [%s:%x] imap --> [%s:%x]\n", + op->node->full_name, + pp->full_name, this_orig_irq, + (iret ? iret->full_name : "NULL"), irq); + + if (!iret) + break; + + if (iret->irq_trans) { + ip = iret; + break; + } + } else { + if (!strcmp(pp->name, "pci")) { + unsigned int this_orig_irq = irq; + + irq = pci_irq_swizzle(dp, pp, irq); + if (of_irq_verbose) + printk("%s: PCI swizzle [%s] " + "%x --> %x\n", + op->node->full_name, + pp->full_name, this_orig_irq, + irq); + + } + + if (pp->irq_trans) { + ip = pp; + break; + } + } + dp = pp; + pp = pp->parent; + } + if (!ip) + return orig_irq; + + irq = ip->irq_trans->irq_build(op->node, irq, + ip->irq_trans->data); + if (of_irq_verbose) + printk("%s: Apply IRQ trans [%s] %x --> %x\n", + op->node->full_name, ip->full_name, orig_irq, irq); + +out: + nid = of_node_to_nid(dp); + if (nid != -1) { + cpumask_t numa_mask = node_to_cpumask(nid); + + irq_set_affinity(irq, numa_mask); + } + + return irq; +} + +static struct of_device * __init scan_one_device(struct device_node *dp, + struct device *parent) +{ + struct of_device *op = kzalloc(sizeof(*op), GFP_KERNEL); + const unsigned int *irq; + struct dev_archdata *sd; + int len, i; + + if (!op) + return NULL; + + sd = &op->dev.archdata; + sd->prom_node = dp; + sd->op = op; + + op->node = dp; + + op->clock_freq = of_getintprop_default(dp, "clock-frequency", + (25*1000*1000)); + op->portid = of_getintprop_default(dp, "upa-portid", -1); + if (op->portid == -1) + op->portid = of_getintprop_default(dp, "portid", -1); + + irq = of_get_property(dp, "interrupts", &len); + if (irq) { + memcpy(op->irqs, irq, len); + op->num_irqs = len / 4; + } else { + op->num_irqs = 0; + } + + /* Prevent overrunning the op->irqs[] array. */ + if (op->num_irqs > PROMINTR_MAX) { + printk(KERN_WARNING "%s: Too many irqs (%d), " + "limiting to %d.\n", + dp->full_name, op->num_irqs, PROMINTR_MAX); + op->num_irqs = PROMINTR_MAX; + } + + build_device_resources(op, parent); + for (i = 0; i < op->num_irqs; i++) + op->irqs[i] = build_one_device_irq(op, parent, op->irqs[i]); + + op->dev.parent = parent; + op->dev.bus = &of_platform_bus_type; + if (!parent) + dev_set_name(&op->dev, "root"); + else + dev_set_name(&op->dev, "%08x", dp->node); + + if (of_device_register(op)) { + printk("%s: Could not register of device.\n", + dp->full_name); + kfree(op); + op = NULL; + } + + return op; +} + +static void __init scan_tree(struct device_node *dp, struct device *parent) +{ + while (dp) { + struct of_device *op = scan_one_device(dp, parent); + + if (op) + scan_tree(dp->child, &op->dev); + + dp = dp->sibling; + } +} + +static void __init scan_of_devices(void) +{ + struct device_node *root = of_find_node_by_path("/"); + struct of_device *parent; + + parent = scan_one_device(root, NULL); + if (!parent) + return; + + scan_tree(root->child, &parent->dev); +} + +static int __init of_bus_driver_init(void) +{ + int err; + + err = of_bus_type_init(&of_platform_bus_type, "of"); + if (!err) + scan_of_devices(); + + return err; +} + +postcore_initcall(of_bus_driver_init); + +static int __init of_debug(char *str) +{ + int val = 0; + + get_option(&str, &val); + if (val & 1) + of_resource_verbose = 1; + if (val & 2) + of_irq_verbose = 1; + return 1; +} + +__setup("of_debug=", of_debug); diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c new file mode 100644 index 000000000000..bdb7c0a6d83d --- /dev/null +++ b/arch/sparc/kernel/pci.c @@ -0,0 +1,1095 @@ +/* pci.c: UltraSparc PCI controller support. + * + * Copyright (C) 1997, 1998, 1999 David S. Miller (davem@redhat.com) + * Copyright (C) 1998, 1999 Eddie C. Dost (ecd@skynet.be) + * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz) + * + * OF tree based PCI bus probing taken from the PowerPC port + * with minor modifications, see there for credits. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/capability.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/msi.h> +#include <linux/irq.h> +#include <linux/init.h> +#include <linux/of.h> +#include <linux/of_device.h> + +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/irq.h> +#include <asm/prom.h> +#include <asm/apb.h> + +#include "pci_impl.h" + +/* List of all PCI controllers found in the system. */ +struct pci_pbm_info *pci_pbm_root = NULL; + +/* Each PBM found gets a unique index. */ +int pci_num_pbms = 0; + +volatile int pci_poke_in_progress; +volatile int pci_poke_cpu = -1; +volatile int pci_poke_faulted; + +static DEFINE_SPINLOCK(pci_poke_lock); + +void pci_config_read8(u8 *addr, u8 *ret) +{ + unsigned long flags; + u8 byte; + + spin_lock_irqsave(&pci_poke_lock, flags); + pci_poke_cpu = smp_processor_id(); + pci_poke_in_progress = 1; + pci_poke_faulted = 0; + __asm__ __volatile__("membar #Sync\n\t" + "lduba [%1] %2, %0\n\t" + "membar #Sync" + : "=r" (byte) + : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L) + : "memory"); + pci_poke_in_progress = 0; + pci_poke_cpu = -1; + if (!pci_poke_faulted) + *ret = byte; + spin_unlock_irqrestore(&pci_poke_lock, flags); +} + +void pci_config_read16(u16 *addr, u16 *ret) +{ + unsigned long flags; + u16 word; + + spin_lock_irqsave(&pci_poke_lock, flags); + pci_poke_cpu = smp_processor_id(); + pci_poke_in_progress = 1; + pci_poke_faulted = 0; + __asm__ __volatile__("membar #Sync\n\t" + "lduha [%1] %2, %0\n\t" + "membar #Sync" + : "=r" (word) + : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L) + : "memory"); + pci_poke_in_progress = 0; + pci_poke_cpu = -1; + if (!pci_poke_faulted) + *ret = word; + spin_unlock_irqrestore(&pci_poke_lock, flags); +} + +void pci_config_read32(u32 *addr, u32 *ret) +{ + unsigned long flags; + u32 dword; + + spin_lock_irqsave(&pci_poke_lock, flags); + pci_poke_cpu = smp_processor_id(); + pci_poke_in_progress = 1; + pci_poke_faulted = 0; + __asm__ __volatile__("membar #Sync\n\t" + "lduwa [%1] %2, %0\n\t" + "membar #Sync" + : "=r" (dword) + : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L) + : "memory"); + pci_poke_in_progress = 0; + pci_poke_cpu = -1; + if (!pci_poke_faulted) + *ret = dword; + spin_unlock_irqrestore(&pci_poke_lock, flags); +} + +void pci_config_write8(u8 *addr, u8 val) +{ + unsigned long flags; + + spin_lock_irqsave(&pci_poke_lock, flags); + pci_poke_cpu = smp_processor_id(); + pci_poke_in_progress = 1; + pci_poke_faulted = 0; + __asm__ __volatile__("membar #Sync\n\t" + "stba %0, [%1] %2\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (val), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L) + : "memory"); + pci_poke_in_progress = 0; + pci_poke_cpu = -1; + spin_unlock_irqrestore(&pci_poke_lock, flags); +} + +void pci_config_write16(u16 *addr, u16 val) +{ + unsigned long flags; + + spin_lock_irqsave(&pci_poke_lock, flags); + pci_poke_cpu = smp_processor_id(); + pci_poke_in_progress = 1; + pci_poke_faulted = 0; + __asm__ __volatile__("membar #Sync\n\t" + "stha %0, [%1] %2\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (val), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L) + : "memory"); + pci_poke_in_progress = 0; + pci_poke_cpu = -1; + spin_unlock_irqrestore(&pci_poke_lock, flags); +} + +void pci_config_write32(u32 *addr, u32 val) +{ + unsigned long flags; + + spin_lock_irqsave(&pci_poke_lock, flags); + pci_poke_cpu = smp_processor_id(); + pci_poke_in_progress = 1; + pci_poke_faulted = 0; + __asm__ __volatile__("membar #Sync\n\t" + "stwa %0, [%1] %2\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (val), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L) + : "memory"); + pci_poke_in_progress = 0; + pci_poke_cpu = -1; + spin_unlock_irqrestore(&pci_poke_lock, flags); +} + +static int ofpci_verbose; + +static int __init ofpci_debug(char *str) +{ + int val = 0; + + get_option(&str, &val); + if (val) + ofpci_verbose = 1; + return 1; +} + +__setup("ofpci_debug=", ofpci_debug); + +static unsigned long pci_parse_of_flags(u32 addr0) +{ + unsigned long flags = 0; + + if (addr0 & 0x02000000) { + flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY; + flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64; + flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M; + if (addr0 & 0x40000000) + flags |= IORESOURCE_PREFETCH + | PCI_BASE_ADDRESS_MEM_PREFETCH; + } else if (addr0 & 0x01000000) + flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO; + return flags; +} + +/* The of_device layer has translated all of the assigned-address properties + * into physical address resources, we only have to figure out the register + * mapping. + */ +static void pci_parse_of_addrs(struct of_device *op, + struct device_node *node, + struct pci_dev *dev) +{ + struct resource *op_res; + const u32 *addrs; + int proplen; + + addrs = of_get_property(node, "assigned-addresses", &proplen); + if (!addrs) + return; + if (ofpci_verbose) + printk(" parse addresses (%d bytes) @ %p\n", + proplen, addrs); + op_res = &op->resource[0]; + for (; proplen >= 20; proplen -= 20, addrs += 5, op_res++) { + struct resource *res; + unsigned long flags; + int i; + + flags = pci_parse_of_flags(addrs[0]); + if (!flags) + continue; + i = addrs[0] & 0xff; + if (ofpci_verbose) + printk(" start: %lx, end: %lx, i: %x\n", + op_res->start, op_res->end, i); + + if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) { + res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2]; + } else if (i == dev->rom_base_reg) { + res = &dev->resource[PCI_ROM_RESOURCE]; + flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE; + } else { + printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i); + continue; + } + res->start = op_res->start; + res->end = op_res->end; + res->flags = flags; + res->name = pci_name(dev); + } +} + +static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, + struct device_node *node, + struct pci_bus *bus, int devfn) +{ + struct dev_archdata *sd; + struct of_device *op; + struct pci_dev *dev; + const char *type; + u32 class; + + dev = alloc_pci_dev(); + if (!dev) + return NULL; + + sd = &dev->dev.archdata; + sd->iommu = pbm->iommu; + sd->stc = &pbm->stc; + sd->host_controller = pbm; + sd->prom_node = node; + sd->op = op = of_find_device_by_node(node); + sd->numa_node = pbm->numa_node; + + sd = &op->dev.archdata; + sd->iommu = pbm->iommu; + sd->stc = &pbm->stc; + sd->numa_node = pbm->numa_node; + + if (!strcmp(node->name, "ebus")) + of_propagate_archdata(op); + + type = of_get_property(node, "device_type", NULL); + if (type == NULL) + type = ""; + + if (ofpci_verbose) + printk(" create device, devfn: %x, type: %s\n", + devfn, type); + + dev->bus = bus; + dev->sysdata = node; + dev->dev.parent = bus->bridge; + dev->dev.bus = &pci_bus_type; + dev->devfn = devfn; + dev->multifunction = 0; /* maybe a lie? */ + + dev->vendor = of_getintprop_default(node, "vendor-id", 0xffff); + dev->device = of_getintprop_default(node, "device-id", 0xffff); + dev->subsystem_vendor = + of_getintprop_default(node, "subsystem-vendor-id", 0); + dev->subsystem_device = + of_getintprop_default(node, "subsystem-id", 0); + + dev->cfg_size = pci_cfg_space_size(dev); + + /* We can't actually use the firmware value, we have + * to read what is in the register right now. One + * reason is that in the case of IDE interfaces the + * firmware can sample the value before the the IDE + * interface is programmed into native mode. + */ + pci_read_config_dword(dev, PCI_CLASS_REVISION, &class); + dev->class = class >> 8; + dev->revision = class & 0xff; + + dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(bus), + dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); + + if (ofpci_verbose) + printk(" class: 0x%x device name: %s\n", + dev->class, pci_name(dev)); + + /* I have seen IDE devices which will not respond to + * the bmdma simplex check reads if bus mastering is + * disabled. + */ + if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE) + pci_set_master(dev); + + dev->current_state = 4; /* unknown power state */ + dev->error_state = pci_channel_io_normal; + + if (!strcmp(node->name, "pci")) { + /* a PCI-PCI bridge */ + dev->hdr_type = PCI_HEADER_TYPE_BRIDGE; + dev->rom_base_reg = PCI_ROM_ADDRESS1; + } else if (!strcmp(type, "cardbus")) { + dev->hdr_type = PCI_HEADER_TYPE_CARDBUS; + } else { + dev->hdr_type = PCI_HEADER_TYPE_NORMAL; + dev->rom_base_reg = PCI_ROM_ADDRESS; + + dev->irq = sd->op->irqs[0]; + if (dev->irq == 0xffffffff) + dev->irq = PCI_IRQ_NONE; + } + + pci_parse_of_addrs(sd->op, node, dev); + + if (ofpci_verbose) + printk(" adding to system ...\n"); + + pci_device_add(dev, bus); + + return dev; +} + +static void __devinit apb_calc_first_last(u8 map, u32 *first_p, u32 *last_p) +{ + u32 idx, first, last; + + first = 8; + last = 0; + for (idx = 0; idx < 8; idx++) { + if ((map & (1 << idx)) != 0) { + if (first > idx) + first = idx; + if (last < idx) + last = idx; + } + } + + *first_p = first; + *last_p = last; +} + +static void pci_resource_adjust(struct resource *res, + struct resource *root) +{ + res->start += root->start; + res->end += root->start; +} + +/* For PCI bus devices which lack a 'ranges' property we interrogate + * the config space values to set the resources, just like the generic + * Linux PCI probing code does. + */ +static void __devinit pci_cfg_fake_ranges(struct pci_dev *dev, + struct pci_bus *bus, + struct pci_pbm_info *pbm) +{ + struct resource *res; + u8 io_base_lo, io_limit_lo; + u16 mem_base_lo, mem_limit_lo; + unsigned long base, limit; + + pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo); + pci_read_config_byte(dev, PCI_IO_LIMIT, &io_limit_lo); + base = (io_base_lo & PCI_IO_RANGE_MASK) << 8; + limit = (io_limit_lo & PCI_IO_RANGE_MASK) << 8; + + if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) { + u16 io_base_hi, io_limit_hi; + + pci_read_config_word(dev, PCI_IO_BASE_UPPER16, &io_base_hi); + pci_read_config_word(dev, PCI_IO_LIMIT_UPPER16, &io_limit_hi); + base |= (io_base_hi << 16); + limit |= (io_limit_hi << 16); + } + + res = bus->resource[0]; + if (base <= limit) { + res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO; + if (!res->start) + res->start = base; + if (!res->end) + res->end = limit + 0xfff; + pci_resource_adjust(res, &pbm->io_space); + } + + pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo); + pci_read_config_word(dev, PCI_MEMORY_LIMIT, &mem_limit_lo); + base = (mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16; + limit = (mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16; + + res = bus->resource[1]; + if (base <= limit) { + res->flags = ((mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | + IORESOURCE_MEM); + res->start = base; + res->end = limit + 0xfffff; + pci_resource_adjust(res, &pbm->mem_space); + } + + pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo); + pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo); + base = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16; + limit = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16; + + if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) { + u32 mem_base_hi, mem_limit_hi; + + pci_read_config_dword(dev, PCI_PREF_BASE_UPPER32, &mem_base_hi); + pci_read_config_dword(dev, PCI_PREF_LIMIT_UPPER32, &mem_limit_hi); + + /* + * Some bridges set the base > limit by default, and some + * (broken) BIOSes do not initialize them. If we find + * this, just assume they are not being used. + */ + if (mem_base_hi <= mem_limit_hi) { + base |= ((long) mem_base_hi) << 32; + limit |= ((long) mem_limit_hi) << 32; + } + } + + res = bus->resource[2]; + if (base <= limit) { + res->flags = ((mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | + IORESOURCE_MEM | IORESOURCE_PREFETCH); + res->start = base; + res->end = limit + 0xfffff; + pci_resource_adjust(res, &pbm->mem_space); + } +} + +/* Cook up fake bus resources for SUNW,simba PCI bridges which lack + * a proper 'ranges' property. + */ +static void __devinit apb_fake_ranges(struct pci_dev *dev, + struct pci_bus *bus, + struct pci_pbm_info *pbm) +{ + struct resource *res; + u32 first, last; + u8 map; + + pci_read_config_byte(dev, APB_IO_ADDRESS_MAP, &map); + apb_calc_first_last(map, &first, &last); + res = bus->resource[0]; + res->start = (first << 21); + res->end = (last << 21) + ((1 << 21) - 1); + res->flags = IORESOURCE_IO; + pci_resource_adjust(res, &pbm->io_space); + + pci_read_config_byte(dev, APB_MEM_ADDRESS_MAP, &map); + apb_calc_first_last(map, &first, &last); + res = bus->resource[1]; + res->start = (first << 21); + res->end = (last << 21) + ((1 << 21) - 1); + res->flags = IORESOURCE_MEM; + pci_resource_adjust(res, &pbm->mem_space); +} + +static void __devinit pci_of_scan_bus(struct pci_pbm_info *pbm, + struct device_node *node, + struct pci_bus *bus); + +#define GET_64BIT(prop, i) ((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1]) + +static void __devinit of_scan_pci_bridge(struct pci_pbm_info *pbm, + struct device_node *node, + struct pci_dev *dev) +{ + struct pci_bus *bus; + const u32 *busrange, *ranges; + int len, i, simba; + struct resource *res; + unsigned int flags; + u64 size; + + if (ofpci_verbose) + printk("of_scan_pci_bridge(%s)\n", node->full_name); + + /* parse bus-range property */ + busrange = of_get_property(node, "bus-range", &len); + if (busrange == NULL || len != 8) { + printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n", + node->full_name); + return; + } + ranges = of_get_property(node, "ranges", &len); + simba = 0; + if (ranges == NULL) { + const char *model = of_get_property(node, "model", NULL); + if (model && !strcmp(model, "SUNW,simba")) + simba = 1; + } + + bus = pci_add_new_bus(dev->bus, dev, busrange[0]); + if (!bus) { + printk(KERN_ERR "Failed to create pci bus for %s\n", + node->full_name); + return; + } + + bus->primary = dev->bus->number; + bus->subordinate = busrange[1]; + bus->bridge_ctl = 0; + + /* parse ranges property, or cook one up by hand for Simba */ + /* PCI #address-cells == 3 and #size-cells == 2 always */ + res = &dev->resource[PCI_BRIDGE_RESOURCES]; + for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) { + res->flags = 0; + bus->resource[i] = res; + ++res; + } + if (simba) { + apb_fake_ranges(dev, bus, pbm); + goto after_ranges; + } else if (ranges == NULL) { + pci_cfg_fake_ranges(dev, bus, pbm); + goto after_ranges; + } + i = 1; + for (; len >= 32; len -= 32, ranges += 8) { + struct resource *root; + + flags = pci_parse_of_flags(ranges[0]); + size = GET_64BIT(ranges, 6); + if (flags == 0 || size == 0) + continue; + if (flags & IORESOURCE_IO) { + res = bus->resource[0]; + if (res->flags) { + printk(KERN_ERR "PCI: ignoring extra I/O range" + " for bridge %s\n", node->full_name); + continue; + } + root = &pbm->io_space; + } else { + if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) { + printk(KERN_ERR "PCI: too many memory ranges" + " for bridge %s\n", node->full_name); + continue; + } + res = bus->resource[i]; + ++i; + root = &pbm->mem_space; + } + + res->start = GET_64BIT(ranges, 1); + res->end = res->start + size - 1; + res->flags = flags; + + /* Another way to implement this would be to add an of_device + * layer routine that can calculate a resource for a given + * range property value in a PCI device. + */ + pci_resource_adjust(res, root); + } +after_ranges: + sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), + bus->number); + if (ofpci_verbose) + printk(" bus name: %s\n", bus->name); + + pci_of_scan_bus(pbm, node, bus); +} + +static void __devinit pci_of_scan_bus(struct pci_pbm_info *pbm, + struct device_node *node, + struct pci_bus *bus) +{ + struct device_node *child; + const u32 *reg; + int reglen, devfn, prev_devfn; + struct pci_dev *dev; + + if (ofpci_verbose) + printk("PCI: scan_bus[%s] bus no %d\n", + node->full_name, bus->number); + + child = NULL; + prev_devfn = -1; + while ((child = of_get_next_child(node, child)) != NULL) { + if (ofpci_verbose) + printk(" * %s\n", child->full_name); + reg = of_get_property(child, "reg", ®len); + if (reg == NULL || reglen < 20) + continue; + + devfn = (reg[0] >> 8) & 0xff; + + /* This is a workaround for some device trees + * which list PCI devices twice. On the V100 + * for example, device number 3 is listed twice. + * Once as "pm" and once again as "lomp". + */ + if (devfn == prev_devfn) + continue; + prev_devfn = devfn; + + /* create a new pci_dev for this device */ + dev = of_create_pci_dev(pbm, child, bus, devfn); + if (!dev) + continue; + if (ofpci_verbose) + printk("PCI: dev header type: %x\n", + dev->hdr_type); + + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || + dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) + of_scan_pci_bridge(pbm, child, dev); + } +} + +static ssize_t +show_pciobppath_attr(struct device * dev, struct device_attribute * attr, char * buf) +{ + struct pci_dev *pdev; + struct device_node *dp; + + pdev = to_pci_dev(dev); + dp = pdev->dev.archdata.prom_node; + + return snprintf (buf, PAGE_SIZE, "%s\n", dp->full_name); +} + +static DEVICE_ATTR(obppath, S_IRUSR | S_IRGRP | S_IROTH, show_pciobppath_attr, NULL); + +static void __devinit pci_bus_register_of_sysfs(struct pci_bus *bus) +{ + struct pci_dev *dev; + struct pci_bus *child_bus; + int err; + + list_for_each_entry(dev, &bus->devices, bus_list) { + /* we don't really care if we can create this file or + * not, but we need to assign the result of the call + * or the world will fall under alien invasion and + * everybody will be frozen on a spaceship ready to be + * eaten on alpha centauri by some green and jelly + * humanoid. + */ + err = sysfs_create_file(&dev->dev.kobj, &dev_attr_obppath.attr); + } + list_for_each_entry(child_bus, &bus->children, node) + pci_bus_register_of_sysfs(child_bus); +} + +struct pci_bus * __devinit pci_scan_one_pbm(struct pci_pbm_info *pbm, + struct device *parent) +{ + struct device_node *node = pbm->op->node; + struct pci_bus *bus; + + printk("PCI: Scanning PBM %s\n", node->full_name); + + bus = pci_create_bus(parent, pbm->pci_first_busno, pbm->pci_ops, pbm); + if (!bus) { + printk(KERN_ERR "Failed to create bus for %s\n", + node->full_name); + return NULL; + } + bus->secondary = pbm->pci_first_busno; + bus->subordinate = pbm->pci_last_busno; + + bus->resource[0] = &pbm->io_space; + bus->resource[1] = &pbm->mem_space; + + pci_of_scan_bus(pbm, node, bus); + pci_bus_add_devices(bus); + pci_bus_register_of_sysfs(bus); + + return bus; +} + +void __devinit pcibios_fixup_bus(struct pci_bus *pbus) +{ + struct pci_pbm_info *pbm = pbus->sysdata; + + /* Generic PCI bus probing sets these to point at + * &io{port,mem}_resouce which is wrong for us. + */ + pbus->resource[0] = &pbm->io_space; + pbus->resource[1] = &pbm->mem_space; +} + +struct resource *pcibios_select_root(struct pci_dev *pdev, struct resource *r) +{ + struct pci_pbm_info *pbm = pdev->bus->sysdata; + struct resource *root = NULL; + + if (r->flags & IORESOURCE_IO) + root = &pbm->io_space; + if (r->flags & IORESOURCE_MEM) + root = &pbm->mem_space; + + return root; +} + +void pcibios_update_irq(struct pci_dev *pdev, int irq) +{ +} + +void pcibios_align_resource(void *data, struct resource *res, + resource_size_t size, resource_size_t align) +{ +} + +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + u16 cmd, oldcmd; + int i; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + oldcmd = cmd; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &dev->resource[i]; + + /* Only set up the requested stuff */ + if (!(mask & (1<<i))) + continue; + + if (res->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (res->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + + if (cmd != oldcmd) { + printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n", + pci_name(dev), cmd); + /* Enable the appropriate bits in the PCI command register. */ + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + +void pcibios_resource_to_bus(struct pci_dev *pdev, struct pci_bus_region *region, + struct resource *res) +{ + struct pci_pbm_info *pbm = pdev->bus->sysdata; + struct resource zero_res, *root; + + zero_res.start = 0; + zero_res.end = 0; + zero_res.flags = res->flags; + + if (res->flags & IORESOURCE_IO) + root = &pbm->io_space; + else + root = &pbm->mem_space; + + pci_resource_adjust(&zero_res, root); + + region->start = res->start - zero_res.start; + region->end = res->end - zero_res.start; +} +EXPORT_SYMBOL(pcibios_resource_to_bus); + +void pcibios_bus_to_resource(struct pci_dev *pdev, struct resource *res, + struct pci_bus_region *region) +{ + struct pci_pbm_info *pbm = pdev->bus->sysdata; + struct resource *root; + + res->start = region->start; + res->end = region->end; + + if (res->flags & IORESOURCE_IO) + root = &pbm->io_space; + else + root = &pbm->mem_space; + + pci_resource_adjust(res, root); +} +EXPORT_SYMBOL(pcibios_bus_to_resource); + +char * __devinit pcibios_setup(char *str) +{ + return str; +} + +/* Platform support for /proc/bus/pci/X/Y mmap()s. */ + +/* If the user uses a host-bridge as the PCI device, he may use + * this to perform a raw mmap() of the I/O or MEM space behind + * that controller. + * + * This can be useful for execution of x86 PCI bios initialization code + * on a PCI card, like the xfree86 int10 stuff does. + */ +static int __pci_mmap_make_offset_bus(struct pci_dev *pdev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state) +{ + struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller; + unsigned long space_size, user_offset, user_size; + + if (mmap_state == pci_mmap_io) { + space_size = (pbm->io_space.end - + pbm->io_space.start) + 1; + } else { + space_size = (pbm->mem_space.end - + pbm->mem_space.start) + 1; + } + + /* Make sure the request is in range. */ + user_offset = vma->vm_pgoff << PAGE_SHIFT; + user_size = vma->vm_end - vma->vm_start; + + if (user_offset >= space_size || + (user_offset + user_size) > space_size) + return -EINVAL; + + if (mmap_state == pci_mmap_io) { + vma->vm_pgoff = (pbm->io_space.start + + user_offset) >> PAGE_SHIFT; + } else { + vma->vm_pgoff = (pbm->mem_space.start + + user_offset) >> PAGE_SHIFT; + } + + return 0; +} + +/* Adjust vm_pgoff of VMA such that it is the physical page offset + * corresponding to the 32-bit pci bus offset for DEV requested by the user. + * + * Basically, the user finds the base address for his device which he wishes + * to mmap. They read the 32-bit value from the config space base register, + * add whatever PAGE_SIZE multiple offset they wish, and feed this into the + * offset parameter of mmap on /proc/bus/pci/XXX for that device. + * + * Returns negative error code on failure, zero on success. + */ +static int __pci_mmap_make_offset(struct pci_dev *pdev, + struct vm_area_struct *vma, + enum pci_mmap_state mmap_state) +{ + unsigned long user_paddr, user_size; + int i, err; + + /* First compute the physical address in vma->vm_pgoff, + * making sure the user offset is within range in the + * appropriate PCI space. + */ + err = __pci_mmap_make_offset_bus(pdev, vma, mmap_state); + if (err) + return err; + + /* If this is a mapping on a host bridge, any address + * is OK. + */ + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST) + return err; + + /* Otherwise make sure it's in the range for one of the + * device's resources. + */ + user_paddr = vma->vm_pgoff << PAGE_SHIFT; + user_size = vma->vm_end - vma->vm_start; + + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + struct resource *rp = &pdev->resource[i]; + resource_size_t aligned_end; + + /* Active? */ + if (!rp->flags) + continue; + + /* Same type? */ + if (i == PCI_ROM_RESOURCE) { + if (mmap_state != pci_mmap_mem) + continue; + } else { + if ((mmap_state == pci_mmap_io && + (rp->flags & IORESOURCE_IO) == 0) || + (mmap_state == pci_mmap_mem && + (rp->flags & IORESOURCE_MEM) == 0)) + continue; + } + + /* Align the resource end to the next page address. + * PAGE_SIZE intentionally added instead of (PAGE_SIZE - 1), + * because actually we need the address of the next byte + * after rp->end. + */ + aligned_end = (rp->end + PAGE_SIZE) & PAGE_MASK; + + if ((rp->start <= user_paddr) && + (user_paddr + user_size) <= aligned_end) + break; + } + + if (i > PCI_ROM_RESOURCE) + return -EINVAL; + + return 0; +} + +/* Set vm_flags of VMA, as appropriate for this architecture, for a pci device + * mapping. + */ +static void __pci_mmap_set_flags(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state) +{ + vma->vm_flags |= (VM_IO | VM_RESERVED); +} + +/* Set vm_page_prot of VMA, as appropriate for this architecture, for a pci + * device mapping. + */ +static void __pci_mmap_set_pgprot(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state) +{ + /* Our io_remap_pfn_range takes care of this, do nothing. */ +} + +/* Perform the actual remap of the pages for a PCI device mapping, as appropriate + * for this architecture. The region in the process to map is described by vm_start + * and vm_end members of VMA, the base physical address is found in vm_pgoff. + * The pci device structure is provided so that architectures may make mapping + * decisions on a per-device or per-bus basis. + * + * Returns a negative error code on failure, zero on success. + */ +int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, + int write_combine) +{ + int ret; + + ret = __pci_mmap_make_offset(dev, vma, mmap_state); + if (ret < 0) + return ret; + + __pci_mmap_set_flags(dev, vma, mmap_state); + __pci_mmap_set_pgprot(dev, vma, mmap_state); + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + ret = io_remap_pfn_range(vma, vma->vm_start, + vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); + if (ret) + return ret; + + return 0; +} + +#ifdef CONFIG_NUMA +int pcibus_to_node(struct pci_bus *pbus) +{ + struct pci_pbm_info *pbm = pbus->sysdata; + + return pbm->numa_node; +} +EXPORT_SYMBOL(pcibus_to_node); +#endif + +/* Return the domain number for this pci bus */ + +int pci_domain_nr(struct pci_bus *pbus) +{ + struct pci_pbm_info *pbm = pbus->sysdata; + int ret; + + if (!pbm) { + ret = -ENXIO; + } else { + ret = pbm->index; + } + + return ret; +} +EXPORT_SYMBOL(pci_domain_nr); + +#ifdef CONFIG_PCI_MSI +int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) +{ + struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller; + unsigned int virt_irq; + + if (!pbm->setup_msi_irq) + return -EINVAL; + + return pbm->setup_msi_irq(&virt_irq, pdev, desc); +} + +void arch_teardown_msi_irq(unsigned int virt_irq) +{ + struct msi_desc *entry = get_irq_msi(virt_irq); + struct pci_dev *pdev = entry->dev; + struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller; + + if (pbm->teardown_msi_irq) + pbm->teardown_msi_irq(virt_irq, pdev); +} +#endif /* !(CONFIG_PCI_MSI) */ + +struct device_node *pci_device_to_OF_node(struct pci_dev *pdev) +{ + return pdev->dev.archdata.prom_node; +} +EXPORT_SYMBOL(pci_device_to_OF_node); + +static void ali_sound_dma_hack(struct pci_dev *pdev, int set_bit) +{ + struct pci_dev *ali_isa_bridge; + u8 val; + + /* ALI sound chips generate 31-bits of DMA, a special register + * determines what bit 31 is emitted as. + */ + ali_isa_bridge = pci_get_device(PCI_VENDOR_ID_AL, + PCI_DEVICE_ID_AL_M1533, + NULL); + + pci_read_config_byte(ali_isa_bridge, 0x7e, &val); + if (set_bit) + val |= 0x01; + else + val &= ~0x01; + pci_write_config_byte(ali_isa_bridge, 0x7e, val); + pci_dev_put(ali_isa_bridge); +} + +int pci_dma_supported(struct pci_dev *pdev, u64 device_mask) +{ + u64 dma_addr_mask; + + if (pdev == NULL) { + dma_addr_mask = 0xffffffff; + } else { + struct iommu *iommu = pdev->dev.archdata.iommu; + + dma_addr_mask = iommu->dma_addr_mask; + + if (pdev->vendor == PCI_VENDOR_ID_AL && + pdev->device == PCI_DEVICE_ID_AL_M5451 && + device_mask == 0x7fffffff) { + ali_sound_dma_hack(pdev, + (dma_addr_mask & 0x80000000) != 0); + return 1; + } + } + + if (device_mask >= (1UL << 32UL)) + return 0; + + return (device_mask & dma_addr_mask) == dma_addr_mask; +} + +void pci_resource_to_user(const struct pci_dev *pdev, int bar, + const struct resource *rp, resource_size_t *start, + resource_size_t *end) +{ + struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller; + unsigned long offset; + + if (rp->flags & IORESOURCE_IO) + offset = pbm->io_space.start; + else + offset = pbm->mem_space.start; + + *start = rp->start - offset; + *end = rp->end - offset; +} diff --git a/arch/sparc/kernel/pci_common.c b/arch/sparc/kernel/pci_common.c new file mode 100644 index 000000000000..23b88082d0b2 --- /dev/null +++ b/arch/sparc/kernel/pci_common.c @@ -0,0 +1,545 @@ +/* pci_common.c: PCI controller common support. + * + * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net) + */ + +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/device.h> +#include <linux/of_device.h> + +#include <asm/prom.h> +#include <asm/oplib.h> + +#include "pci_impl.h" +#include "pci_sun4v.h" + +static int config_out_of_range(struct pci_pbm_info *pbm, + unsigned long bus, + unsigned long devfn, + unsigned long reg) +{ + if (bus < pbm->pci_first_busno || + bus > pbm->pci_last_busno) + return 1; + return 0; +} + +static void *sun4u_config_mkaddr(struct pci_pbm_info *pbm, + unsigned long bus, + unsigned long devfn, + unsigned long reg) +{ + unsigned long rbits = pbm->config_space_reg_bits; + + if (config_out_of_range(pbm, bus, devfn, reg)) + return NULL; + + reg = (reg & ((1 << rbits) - 1)); + devfn <<= rbits; + bus <<= rbits + 8; + + return (void *) (pbm->config_space | bus | devfn | reg); +} + +/* At least on Sabre, it is necessary to access all PCI host controller + * registers at their natural size, otherwise zeros are returned. + * Strange but true, and I see no language in the UltraSPARC-IIi + * programmer's manual that mentions this even indirectly. + */ +static int sun4u_read_pci_cfg_host(struct pci_pbm_info *pbm, + unsigned char bus, unsigned int devfn, + int where, int size, u32 *value) +{ + u32 tmp32, *addr; + u16 tmp16; + u8 tmp8; + + addr = sun4u_config_mkaddr(pbm, bus, devfn, where); + if (!addr) + return PCIBIOS_SUCCESSFUL; + + switch (size) { + case 1: + if (where < 8) { + unsigned long align = (unsigned long) addr; + + align &= ~1; + pci_config_read16((u16 *)align, &tmp16); + if (where & 1) + *value = tmp16 >> 8; + else + *value = tmp16 & 0xff; + } else { + pci_config_read8((u8 *)addr, &tmp8); + *value = (u32) tmp8; + } + break; + + case 2: + if (where < 8) { + pci_config_read16((u16 *)addr, &tmp16); + *value = (u32) tmp16; + } else { + pci_config_read8((u8 *)addr, &tmp8); + *value = (u32) tmp8; + pci_config_read8(((u8 *)addr) + 1, &tmp8); + *value |= ((u32) tmp8) << 8; + } + break; + + case 4: + tmp32 = 0xffffffff; + sun4u_read_pci_cfg_host(pbm, bus, devfn, + where, 2, &tmp32); + *value = tmp32; + + tmp32 = 0xffffffff; + sun4u_read_pci_cfg_host(pbm, bus, devfn, + where + 2, 2, &tmp32); + *value |= tmp32 << 16; + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int sun4u_read_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn, + int where, int size, u32 *value) +{ + struct pci_pbm_info *pbm = bus_dev->sysdata; + unsigned char bus = bus_dev->number; + u32 *addr; + u16 tmp16; + u8 tmp8; + + switch (size) { + case 1: + *value = 0xff; + break; + case 2: + *value = 0xffff; + break; + case 4: + *value = 0xffffffff; + break; + } + + if (!bus_dev->number && !PCI_SLOT(devfn)) + return sun4u_read_pci_cfg_host(pbm, bus, devfn, where, + size, value); + + addr = sun4u_config_mkaddr(pbm, bus, devfn, where); + if (!addr) + return PCIBIOS_SUCCESSFUL; + + switch (size) { + case 1: + pci_config_read8((u8 *)addr, &tmp8); + *value = (u32) tmp8; + break; + + case 2: + if (where & 0x01) { + printk("pci_read_config_word: misaligned reg [%x]\n", + where); + return PCIBIOS_SUCCESSFUL; + } + pci_config_read16((u16 *)addr, &tmp16); + *value = (u32) tmp16; + break; + + case 4: + if (where & 0x03) { + printk("pci_read_config_dword: misaligned reg [%x]\n", + where); + return PCIBIOS_SUCCESSFUL; + } + pci_config_read32(addr, value); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int sun4u_write_pci_cfg_host(struct pci_pbm_info *pbm, + unsigned char bus, unsigned int devfn, + int where, int size, u32 value) +{ + u32 *addr; + + addr = sun4u_config_mkaddr(pbm, bus, devfn, where); + if (!addr) + return PCIBIOS_SUCCESSFUL; + + switch (size) { + case 1: + if (where < 8) { + unsigned long align = (unsigned long) addr; + u16 tmp16; + + align &= ~1; + pci_config_read16((u16 *)align, &tmp16); + if (where & 1) { + tmp16 &= 0x00ff; + tmp16 |= value << 8; + } else { + tmp16 &= 0xff00; + tmp16 |= value; + } + pci_config_write16((u16 *)align, tmp16); + } else + pci_config_write8((u8 *)addr, value); + break; + case 2: + if (where < 8) { + pci_config_write16((u16 *)addr, value); + } else { + pci_config_write8((u8 *)addr, value & 0xff); + pci_config_write8(((u8 *)addr) + 1, value >> 8); + } + break; + case 4: + sun4u_write_pci_cfg_host(pbm, bus, devfn, + where, 2, value & 0xffff); + sun4u_write_pci_cfg_host(pbm, bus, devfn, + where + 2, 2, value >> 16); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int sun4u_write_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn, + int where, int size, u32 value) +{ + struct pci_pbm_info *pbm = bus_dev->sysdata; + unsigned char bus = bus_dev->number; + u32 *addr; + + if (!bus_dev->number && !PCI_SLOT(devfn)) + return sun4u_write_pci_cfg_host(pbm, bus, devfn, where, + size, value); + + addr = sun4u_config_mkaddr(pbm, bus, devfn, where); + if (!addr) + return PCIBIOS_SUCCESSFUL; + + switch (size) { + case 1: + pci_config_write8((u8 *)addr, value); + break; + + case 2: + if (where & 0x01) { + printk("pci_write_config_word: misaligned reg [%x]\n", + where); + return PCIBIOS_SUCCESSFUL; + } + pci_config_write16((u16 *)addr, value); + break; + + case 4: + if (where & 0x03) { + printk("pci_write_config_dword: misaligned reg [%x]\n", + where); + return PCIBIOS_SUCCESSFUL; + } + pci_config_write32(addr, value); + } + return PCIBIOS_SUCCESSFUL; +} + +struct pci_ops sun4u_pci_ops = { + .read = sun4u_read_pci_cfg, + .write = sun4u_write_pci_cfg, +}; + +static int sun4v_read_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn, + int where, int size, u32 *value) +{ + struct pci_pbm_info *pbm = bus_dev->sysdata; + u32 devhandle = pbm->devhandle; + unsigned int bus = bus_dev->number; + unsigned int device = PCI_SLOT(devfn); + unsigned int func = PCI_FUNC(devfn); + unsigned long ret; + + if (config_out_of_range(pbm, bus, devfn, where)) { + ret = ~0UL; + } else { + ret = pci_sun4v_config_get(devhandle, + HV_PCI_DEVICE_BUILD(bus, device, func), + where, size); + } + switch (size) { + case 1: + *value = ret & 0xff; + break; + case 2: + *value = ret & 0xffff; + break; + case 4: + *value = ret & 0xffffffff; + break; + }; + + + return PCIBIOS_SUCCESSFUL; +} + +static int sun4v_write_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn, + int where, int size, u32 value) +{ + struct pci_pbm_info *pbm = bus_dev->sysdata; + u32 devhandle = pbm->devhandle; + unsigned int bus = bus_dev->number; + unsigned int device = PCI_SLOT(devfn); + unsigned int func = PCI_FUNC(devfn); + unsigned long ret; + + if (config_out_of_range(pbm, bus, devfn, where)) { + /* Do nothing. */ + } else { + ret = pci_sun4v_config_put(devhandle, + HV_PCI_DEVICE_BUILD(bus, device, func), + where, size, value); + } + return PCIBIOS_SUCCESSFUL; +} + +struct pci_ops sun4v_pci_ops = { + .read = sun4v_read_pci_cfg, + .write = sun4v_write_pci_cfg, +}; + +void pci_get_pbm_props(struct pci_pbm_info *pbm) +{ + const u32 *val = of_get_property(pbm->op->node, "bus-range", NULL); + + pbm->pci_first_busno = val[0]; + pbm->pci_last_busno = val[1]; + + val = of_get_property(pbm->op->node, "ino-bitmap", NULL); + if (val) { + pbm->ino_bitmap = (((u64)val[1] << 32UL) | + ((u64)val[0] << 0UL)); + } +} + +static void pci_register_legacy_regions(struct resource *io_res, + struct resource *mem_res) +{ + struct resource *p; + + /* VGA Video RAM. */ + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return; + + p->name = "Video RAM area"; + p->start = mem_res->start + 0xa0000UL; + p->end = p->start + 0x1ffffUL; + p->flags = IORESOURCE_BUSY; + request_resource(mem_res, p); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return; + + p->name = "System ROM"; + p->start = mem_res->start + 0xf0000UL; + p->end = p->start + 0xffffUL; + p->flags = IORESOURCE_BUSY; + request_resource(mem_res, p); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return; + + p->name = "Video ROM"; + p->start = mem_res->start + 0xc0000UL; + p->end = p->start + 0x7fffUL; + p->flags = IORESOURCE_BUSY; + request_resource(mem_res, p); +} + +static void pci_register_iommu_region(struct pci_pbm_info *pbm) +{ + const u32 *vdma = of_get_property(pbm->op->node, "virtual-dma", NULL); + + if (vdma) { + struct resource *rp = kmalloc(sizeof(*rp), GFP_KERNEL); + + if (!rp) { + prom_printf("Cannot allocate IOMMU resource.\n"); + prom_halt(); + } + rp->name = "IOMMU"; + rp->start = pbm->mem_space.start + (unsigned long) vdma[0]; + rp->end = rp->start + (unsigned long) vdma[1] - 1UL; + rp->flags = IORESOURCE_BUSY; + request_resource(&pbm->mem_space, rp); + } +} + +void pci_determine_mem_io_space(struct pci_pbm_info *pbm) +{ + const struct linux_prom_pci_ranges *pbm_ranges; + int i, saw_mem, saw_io; + int num_pbm_ranges; + + saw_mem = saw_io = 0; + pbm_ranges = of_get_property(pbm->op->node, "ranges", &i); + if (!pbm_ranges) { + prom_printf("PCI: Fatal error, missing PBM ranges property " + " for %s\n", + pbm->name); + prom_halt(); + } + + num_pbm_ranges = i / sizeof(*pbm_ranges); + + for (i = 0; i < num_pbm_ranges; i++) { + const struct linux_prom_pci_ranges *pr = &pbm_ranges[i]; + unsigned long a, size; + u32 parent_phys_hi, parent_phys_lo; + u32 size_hi, size_lo; + int type; + + parent_phys_hi = pr->parent_phys_hi; + parent_phys_lo = pr->parent_phys_lo; + if (tlb_type == hypervisor) + parent_phys_hi &= 0x0fffffff; + + size_hi = pr->size_hi; + size_lo = pr->size_lo; + + type = (pr->child_phys_hi >> 24) & 0x3; + a = (((unsigned long)parent_phys_hi << 32UL) | + ((unsigned long)parent_phys_lo << 0UL)); + size = (((unsigned long)size_hi << 32UL) | + ((unsigned long)size_lo << 0UL)); + + switch (type) { + case 0: + /* PCI config space, 16MB */ + pbm->config_space = a; + break; + + case 1: + /* 16-bit IO space, 16MB */ + pbm->io_space.start = a; + pbm->io_space.end = a + size - 1UL; + pbm->io_space.flags = IORESOURCE_IO; + saw_io = 1; + break; + + case 2: + /* 32-bit MEM space, 2GB */ + pbm->mem_space.start = a; + pbm->mem_space.end = a + size - 1UL; + pbm->mem_space.flags = IORESOURCE_MEM; + saw_mem = 1; + break; + + case 3: + /* XXX 64-bit MEM handling XXX */ + + default: + break; + }; + } + + if (!saw_io || !saw_mem) { + prom_printf("%s: Fatal error, missing %s PBM range.\n", + pbm->name, + (!saw_io ? "IO" : "MEM")); + prom_halt(); + } + + printk("%s: PCI IO[%lx] MEM[%lx]\n", + pbm->name, + pbm->io_space.start, + pbm->mem_space.start); + + pbm->io_space.name = pbm->mem_space.name = pbm->name; + + request_resource(&ioport_resource, &pbm->io_space); + request_resource(&iomem_resource, &pbm->mem_space); + + pci_register_legacy_regions(&pbm->io_space, + &pbm->mem_space); + pci_register_iommu_region(pbm); +} + +/* Generic helper routines for PCI error reporting. */ +void pci_scan_for_target_abort(struct pci_pbm_info *pbm, + struct pci_bus *pbus) +{ + struct pci_dev *pdev; + struct pci_bus *bus; + + list_for_each_entry(pdev, &pbus->devices, bus_list) { + u16 status, error_bits; + + pci_read_config_word(pdev, PCI_STATUS, &status); + error_bits = + (status & (PCI_STATUS_SIG_TARGET_ABORT | + PCI_STATUS_REC_TARGET_ABORT)); + if (error_bits) { + pci_write_config_word(pdev, PCI_STATUS, error_bits); + printk("%s: Device %s saw Target Abort [%016x]\n", + pbm->name, pci_name(pdev), status); + } + } + + list_for_each_entry(bus, &pbus->children, node) + pci_scan_for_target_abort(pbm, bus); +} + +void pci_scan_for_master_abort(struct pci_pbm_info *pbm, + struct pci_bus *pbus) +{ + struct pci_dev *pdev; + struct pci_bus *bus; + + list_for_each_entry(pdev, &pbus->devices, bus_list) { + u16 status, error_bits; + + pci_read_config_word(pdev, PCI_STATUS, &status); + error_bits = + (status & (PCI_STATUS_REC_MASTER_ABORT)); + if (error_bits) { + pci_write_config_word(pdev, PCI_STATUS, error_bits); + printk("%s: Device %s received Master Abort [%016x]\n", + pbm->name, pci_name(pdev), status); + } + } + + list_for_each_entry(bus, &pbus->children, node) + pci_scan_for_master_abort(pbm, bus); +} + +void pci_scan_for_parity_error(struct pci_pbm_info *pbm, + struct pci_bus *pbus) +{ + struct pci_dev *pdev; + struct pci_bus *bus; + + list_for_each_entry(pdev, &pbus->devices, bus_list) { + u16 status, error_bits; + + pci_read_config_word(pdev, PCI_STATUS, &status); + error_bits = + (status & (PCI_STATUS_PARITY | + PCI_STATUS_DETECTED_PARITY)); + if (error_bits) { + pci_write_config_word(pdev, PCI_STATUS, error_bits); + printk("%s: Device %s saw Parity Error [%016x]\n", + pbm->name, pci_name(pdev), status); + } + } + + list_for_each_entry(bus, &pbus->children, node) + pci_scan_for_parity_error(pbm, bus); +} diff --git a/arch/sparc/kernel/pci_fire.c b/arch/sparc/kernel/pci_fire.c new file mode 100644 index 000000000000..9462b68f4894 --- /dev/null +++ b/arch/sparc/kernel/pci_fire.c @@ -0,0 +1,521 @@ +/* pci_fire.c: Sun4u platform PCI-E controller support. + * + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/msi.h> +#include <linux/irq.h> +#include <linux/of_device.h> + +#include <asm/prom.h> +#include <asm/irq.h> +#include <asm/upa.h> + +#include "pci_impl.h" + +#define DRIVER_NAME "fire" +#define PFX DRIVER_NAME ": " + +#define FIRE_IOMMU_CONTROL 0x40000UL +#define FIRE_IOMMU_TSBBASE 0x40008UL +#define FIRE_IOMMU_FLUSH 0x40100UL +#define FIRE_IOMMU_FLUSHINV 0x40108UL + +static int pci_fire_pbm_iommu_init(struct pci_pbm_info *pbm) +{ + struct iommu *iommu = pbm->iommu; + u32 vdma[2], dma_mask; + u64 control; + int tsbsize, err; + + /* No virtual-dma property on these guys, use largest size. */ + vdma[0] = 0xc0000000; /* base */ + vdma[1] = 0x40000000; /* size */ + dma_mask = 0xffffffff; + tsbsize = 128; + + /* Register addresses. */ + iommu->iommu_control = pbm->pbm_regs + FIRE_IOMMU_CONTROL; + iommu->iommu_tsbbase = pbm->pbm_regs + FIRE_IOMMU_TSBBASE; + iommu->iommu_flush = pbm->pbm_regs + FIRE_IOMMU_FLUSH; + iommu->iommu_flushinv = pbm->pbm_regs + FIRE_IOMMU_FLUSHINV; + + /* We use the main control/status register of FIRE as the write + * completion register. + */ + iommu->write_complete_reg = pbm->controller_regs + 0x410000UL; + + /* + * Invalidate TLB Entries. + */ + upa_writeq(~(u64)0, iommu->iommu_flushinv); + + err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask, + pbm->numa_node); + if (err) + return err; + + upa_writeq(__pa(iommu->page_table) | 0x7UL, iommu->iommu_tsbbase); + + control = upa_readq(iommu->iommu_control); + control |= (0x00000400 /* TSB cache snoop enable */ | + 0x00000300 /* Cache mode */ | + 0x00000002 /* Bypass enable */ | + 0x00000001 /* Translation enable */); + upa_writeq(control, iommu->iommu_control); + + return 0; +} + +#ifdef CONFIG_PCI_MSI +struct pci_msiq_entry { + u64 word0; +#define MSIQ_WORD0_RESV 0x8000000000000000UL +#define MSIQ_WORD0_FMT_TYPE 0x7f00000000000000UL +#define MSIQ_WORD0_FMT_TYPE_SHIFT 56 +#define MSIQ_WORD0_LEN 0x00ffc00000000000UL +#define MSIQ_WORD0_LEN_SHIFT 46 +#define MSIQ_WORD0_ADDR0 0x00003fff00000000UL +#define MSIQ_WORD0_ADDR0_SHIFT 32 +#define MSIQ_WORD0_RID 0x00000000ffff0000UL +#define MSIQ_WORD0_RID_SHIFT 16 +#define MSIQ_WORD0_DATA0 0x000000000000ffffUL +#define MSIQ_WORD0_DATA0_SHIFT 0 + +#define MSIQ_TYPE_MSG 0x6 +#define MSIQ_TYPE_MSI32 0xb +#define MSIQ_TYPE_MSI64 0xf + + u64 word1; +#define MSIQ_WORD1_ADDR1 0xffffffffffff0000UL +#define MSIQ_WORD1_ADDR1_SHIFT 16 +#define MSIQ_WORD1_DATA1 0x000000000000ffffUL +#define MSIQ_WORD1_DATA1_SHIFT 0 + + u64 resv[6]; +}; + +/* All MSI registers are offset from pbm->pbm_regs */ +#define EVENT_QUEUE_BASE_ADDR_REG 0x010000UL +#define EVENT_QUEUE_BASE_ADDR_ALL_ONES 0xfffc000000000000UL + +#define EVENT_QUEUE_CONTROL_SET(EQ) (0x011000UL + (EQ) * 0x8UL) +#define EVENT_QUEUE_CONTROL_SET_OFLOW 0x0200000000000000UL +#define EVENT_QUEUE_CONTROL_SET_EN 0x0000100000000000UL + +#define EVENT_QUEUE_CONTROL_CLEAR(EQ) (0x011200UL + (EQ) * 0x8UL) +#define EVENT_QUEUE_CONTROL_CLEAR_OF 0x0200000000000000UL +#define EVENT_QUEUE_CONTROL_CLEAR_E2I 0x0000800000000000UL +#define EVENT_QUEUE_CONTROL_CLEAR_DIS 0x0000100000000000UL + +#define EVENT_QUEUE_STATE(EQ) (0x011400UL + (EQ) * 0x8UL) +#define EVENT_QUEUE_STATE_MASK 0x0000000000000007UL +#define EVENT_QUEUE_STATE_IDLE 0x0000000000000001UL +#define EVENT_QUEUE_STATE_ACTIVE 0x0000000000000002UL +#define EVENT_QUEUE_STATE_ERROR 0x0000000000000004UL + +#define EVENT_QUEUE_TAIL(EQ) (0x011600UL + (EQ) * 0x8UL) +#define EVENT_QUEUE_TAIL_OFLOW 0x0200000000000000UL +#define EVENT_QUEUE_TAIL_VAL 0x000000000000007fUL + +#define EVENT_QUEUE_HEAD(EQ) (0x011800UL + (EQ) * 0x8UL) +#define EVENT_QUEUE_HEAD_VAL 0x000000000000007fUL + +#define MSI_MAP(MSI) (0x020000UL + (MSI) * 0x8UL) +#define MSI_MAP_VALID 0x8000000000000000UL +#define MSI_MAP_EQWR_N 0x4000000000000000UL +#define MSI_MAP_EQNUM 0x000000000000003fUL + +#define MSI_CLEAR(MSI) (0x028000UL + (MSI) * 0x8UL) +#define MSI_CLEAR_EQWR_N 0x4000000000000000UL + +#define IMONDO_DATA0 0x02C000UL +#define IMONDO_DATA0_DATA 0xffffffffffffffc0UL + +#define IMONDO_DATA1 0x02C008UL +#define IMONDO_DATA1_DATA 0xffffffffffffffffUL + +#define MSI_32BIT_ADDR 0x034000UL +#define MSI_32BIT_ADDR_VAL 0x00000000ffff0000UL + +#define MSI_64BIT_ADDR 0x034008UL +#define MSI_64BIT_ADDR_VAL 0xffffffffffff0000UL + +static int pci_fire_get_head(struct pci_pbm_info *pbm, unsigned long msiqid, + unsigned long *head) +{ + *head = upa_readq(pbm->pbm_regs + EVENT_QUEUE_HEAD(msiqid)); + return 0; +} + +static int pci_fire_dequeue_msi(struct pci_pbm_info *pbm, unsigned long msiqid, + unsigned long *head, unsigned long *msi) +{ + unsigned long type_fmt, type, msi_num; + struct pci_msiq_entry *base, *ep; + + base = (pbm->msi_queues + ((msiqid - pbm->msiq_first) * 8192)); + ep = &base[*head]; + + if ((ep->word0 & MSIQ_WORD0_FMT_TYPE) == 0) + return 0; + + type_fmt = ((ep->word0 & MSIQ_WORD0_FMT_TYPE) >> + MSIQ_WORD0_FMT_TYPE_SHIFT); + type = (type_fmt >> 3); + if (unlikely(type != MSIQ_TYPE_MSI32 && + type != MSIQ_TYPE_MSI64)) + return -EINVAL; + + *msi = msi_num = ((ep->word0 & MSIQ_WORD0_DATA0) >> + MSIQ_WORD0_DATA0_SHIFT); + + upa_writeq(MSI_CLEAR_EQWR_N, pbm->pbm_regs + MSI_CLEAR(msi_num)); + + /* Clear the entry. */ + ep->word0 &= ~MSIQ_WORD0_FMT_TYPE; + + /* Go to next entry in ring. */ + (*head)++; + if (*head >= pbm->msiq_ent_count) + *head = 0; + + return 1; +} + +static int pci_fire_set_head(struct pci_pbm_info *pbm, unsigned long msiqid, + unsigned long head) +{ + upa_writeq(head, pbm->pbm_regs + EVENT_QUEUE_HEAD(msiqid)); + return 0; +} + +static int pci_fire_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid, + unsigned long msi, int is_msi64) +{ + u64 val; + + val = upa_readq(pbm->pbm_regs + MSI_MAP(msi)); + val &= ~(MSI_MAP_EQNUM); + val |= msiqid; + upa_writeq(val, pbm->pbm_regs + MSI_MAP(msi)); + + upa_writeq(MSI_CLEAR_EQWR_N, pbm->pbm_regs + MSI_CLEAR(msi)); + + val = upa_readq(pbm->pbm_regs + MSI_MAP(msi)); + val |= MSI_MAP_VALID; + upa_writeq(val, pbm->pbm_regs + MSI_MAP(msi)); + + return 0; +} + +static int pci_fire_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi) +{ + unsigned long msiqid; + u64 val; + + val = upa_readq(pbm->pbm_regs + MSI_MAP(msi)); + msiqid = (val & MSI_MAP_EQNUM); + + val &= ~MSI_MAP_VALID; + + upa_writeq(val, pbm->pbm_regs + MSI_MAP(msi)); + + return 0; +} + +static int pci_fire_msiq_alloc(struct pci_pbm_info *pbm) +{ + unsigned long pages, order, i; + + order = get_order(512 * 1024); + pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order); + if (pages == 0UL) { + printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n", + order); + return -ENOMEM; + } + memset((char *)pages, 0, PAGE_SIZE << order); + pbm->msi_queues = (void *) pages; + + upa_writeq((EVENT_QUEUE_BASE_ADDR_ALL_ONES | + __pa(pbm->msi_queues)), + pbm->pbm_regs + EVENT_QUEUE_BASE_ADDR_REG); + + upa_writeq(pbm->portid << 6, pbm->pbm_regs + IMONDO_DATA0); + upa_writeq(0, pbm->pbm_regs + IMONDO_DATA1); + + upa_writeq(pbm->msi32_start, pbm->pbm_regs + MSI_32BIT_ADDR); + upa_writeq(pbm->msi64_start, pbm->pbm_regs + MSI_64BIT_ADDR); + + for (i = 0; i < pbm->msiq_num; i++) { + upa_writeq(0, pbm->pbm_regs + EVENT_QUEUE_HEAD(i)); + upa_writeq(0, pbm->pbm_regs + EVENT_QUEUE_TAIL(i)); + } + + return 0; +} + +static void pci_fire_msiq_free(struct pci_pbm_info *pbm) +{ + unsigned long pages, order; + + order = get_order(512 * 1024); + pages = (unsigned long) pbm->msi_queues; + + free_pages(pages, order); + + pbm->msi_queues = NULL; +} + +static int pci_fire_msiq_build_irq(struct pci_pbm_info *pbm, + unsigned long msiqid, + unsigned long devino) +{ + unsigned long cregs = (unsigned long) pbm->pbm_regs; + unsigned long imap_reg, iclr_reg, int_ctrlr; + unsigned int virt_irq; + int fixup; + u64 val; + + imap_reg = cregs + (0x001000UL + (devino * 0x08UL)); + iclr_reg = cregs + (0x001400UL + (devino * 0x08UL)); + + /* XXX iterate amongst the 4 IRQ controllers XXX */ + int_ctrlr = (1UL << 6); + + val = upa_readq(imap_reg); + val |= (1UL << 63) | int_ctrlr; + upa_writeq(val, imap_reg); + + fixup = ((pbm->portid << 6) | devino) - int_ctrlr; + + virt_irq = build_irq(fixup, iclr_reg, imap_reg); + if (!virt_irq) + return -ENOMEM; + + upa_writeq(EVENT_QUEUE_CONTROL_SET_EN, + pbm->pbm_regs + EVENT_QUEUE_CONTROL_SET(msiqid)); + + return virt_irq; +} + +static const struct sparc64_msiq_ops pci_fire_msiq_ops = { + .get_head = pci_fire_get_head, + .dequeue_msi = pci_fire_dequeue_msi, + .set_head = pci_fire_set_head, + .msi_setup = pci_fire_msi_setup, + .msi_teardown = pci_fire_msi_teardown, + .msiq_alloc = pci_fire_msiq_alloc, + .msiq_free = pci_fire_msiq_free, + .msiq_build_irq = pci_fire_msiq_build_irq, +}; + +static void pci_fire_msi_init(struct pci_pbm_info *pbm) +{ + sparc64_pbm_msi_init(pbm, &pci_fire_msiq_ops); +} +#else /* CONFIG_PCI_MSI */ +static void pci_fire_msi_init(struct pci_pbm_info *pbm) +{ +} +#endif /* !(CONFIG_PCI_MSI) */ + +/* Based at pbm->controller_regs */ +#define FIRE_PARITY_CONTROL 0x470010UL +#define FIRE_PARITY_ENAB 0x8000000000000000UL +#define FIRE_FATAL_RESET_CTL 0x471028UL +#define FIRE_FATAL_RESET_SPARE 0x0000000004000000UL +#define FIRE_FATAL_RESET_MB 0x0000000002000000UL +#define FIRE_FATAL_RESET_CPE 0x0000000000008000UL +#define FIRE_FATAL_RESET_APE 0x0000000000004000UL +#define FIRE_FATAL_RESET_PIO 0x0000000000000040UL +#define FIRE_FATAL_RESET_JW 0x0000000000000004UL +#define FIRE_FATAL_RESET_JI 0x0000000000000002UL +#define FIRE_FATAL_RESET_JR 0x0000000000000001UL +#define FIRE_CORE_INTR_ENABLE 0x471800UL + +/* Based at pbm->pbm_regs */ +#define FIRE_TLU_CTRL 0x80000UL +#define FIRE_TLU_CTRL_TIM 0x00000000da000000UL +#define FIRE_TLU_CTRL_QDET 0x0000000000000100UL +#define FIRE_TLU_CTRL_CFG 0x0000000000000001UL +#define FIRE_TLU_DEV_CTRL 0x90008UL +#define FIRE_TLU_LINK_CTRL 0x90020UL +#define FIRE_TLU_LINK_CTRL_CLK 0x0000000000000040UL +#define FIRE_LPU_RESET 0xe2008UL +#define FIRE_LPU_LLCFG 0xe2200UL +#define FIRE_LPU_LLCFG_VC0 0x0000000000000100UL +#define FIRE_LPU_FCTRL_UCTRL 0xe2240UL +#define FIRE_LPU_FCTRL_UCTRL_N 0x0000000000000002UL +#define FIRE_LPU_FCTRL_UCTRL_P 0x0000000000000001UL +#define FIRE_LPU_TXL_FIFOP 0xe2430UL +#define FIRE_LPU_LTSSM_CFG2 0xe2788UL +#define FIRE_LPU_LTSSM_CFG3 0xe2790UL +#define FIRE_LPU_LTSSM_CFG4 0xe2798UL +#define FIRE_LPU_LTSSM_CFG5 0xe27a0UL +#define FIRE_DMC_IENAB 0x31800UL +#define FIRE_DMC_DBG_SEL_A 0x53000UL +#define FIRE_DMC_DBG_SEL_B 0x53008UL +#define FIRE_PEC_IENAB 0x51800UL + +static void pci_fire_hw_init(struct pci_pbm_info *pbm) +{ + u64 val; + + upa_writeq(FIRE_PARITY_ENAB, + pbm->controller_regs + FIRE_PARITY_CONTROL); + + upa_writeq((FIRE_FATAL_RESET_SPARE | + FIRE_FATAL_RESET_MB | + FIRE_FATAL_RESET_CPE | + FIRE_FATAL_RESET_APE | + FIRE_FATAL_RESET_PIO | + FIRE_FATAL_RESET_JW | + FIRE_FATAL_RESET_JI | + FIRE_FATAL_RESET_JR), + pbm->controller_regs + FIRE_FATAL_RESET_CTL); + + upa_writeq(~(u64)0, pbm->controller_regs + FIRE_CORE_INTR_ENABLE); + + val = upa_readq(pbm->pbm_regs + FIRE_TLU_CTRL); + val |= (FIRE_TLU_CTRL_TIM | + FIRE_TLU_CTRL_QDET | + FIRE_TLU_CTRL_CFG); + upa_writeq(val, pbm->pbm_regs + FIRE_TLU_CTRL); + upa_writeq(0, pbm->pbm_regs + FIRE_TLU_DEV_CTRL); + upa_writeq(FIRE_TLU_LINK_CTRL_CLK, + pbm->pbm_regs + FIRE_TLU_LINK_CTRL); + + upa_writeq(0, pbm->pbm_regs + FIRE_LPU_RESET); + upa_writeq(FIRE_LPU_LLCFG_VC0, pbm->pbm_regs + FIRE_LPU_LLCFG); + upa_writeq((FIRE_LPU_FCTRL_UCTRL_N | FIRE_LPU_FCTRL_UCTRL_P), + pbm->pbm_regs + FIRE_LPU_FCTRL_UCTRL); + upa_writeq(((0xffff << 16) | (0x0000 << 0)), + pbm->pbm_regs + FIRE_LPU_TXL_FIFOP); + upa_writeq(3000000, pbm->pbm_regs + FIRE_LPU_LTSSM_CFG2); + upa_writeq(500000, pbm->pbm_regs + FIRE_LPU_LTSSM_CFG3); + upa_writeq((2 << 16) | (140 << 8), + pbm->pbm_regs + FIRE_LPU_LTSSM_CFG4); + upa_writeq(0, pbm->pbm_regs + FIRE_LPU_LTSSM_CFG5); + + upa_writeq(~(u64)0, pbm->pbm_regs + FIRE_DMC_IENAB); + upa_writeq(0, pbm->pbm_regs + FIRE_DMC_DBG_SEL_A); + upa_writeq(0, pbm->pbm_regs + FIRE_DMC_DBG_SEL_B); + + upa_writeq(~(u64)0, pbm->pbm_regs + FIRE_PEC_IENAB); +} + +static int __init pci_fire_pbm_init(struct pci_pbm_info *pbm, + struct of_device *op, u32 portid) +{ + const struct linux_prom64_registers *regs; + struct device_node *dp = op->node; + int err; + + pbm->numa_node = -1; + + pbm->pci_ops = &sun4u_pci_ops; + pbm->config_space_reg_bits = 12; + + pbm->index = pci_num_pbms++; + + pbm->portid = portid; + pbm->op = op; + pbm->name = dp->full_name; + + regs = of_get_property(dp, "reg", NULL); + pbm->pbm_regs = regs[0].phys_addr; + pbm->controller_regs = regs[1].phys_addr - 0x410000UL; + + printk("%s: SUN4U PCIE Bus Module\n", pbm->name); + + pci_determine_mem_io_space(pbm); + + pci_get_pbm_props(pbm); + + pci_fire_hw_init(pbm); + + err = pci_fire_pbm_iommu_init(pbm); + if (err) + return err; + + pci_fire_msi_init(pbm); + + pbm->pci_bus = pci_scan_one_pbm(pbm, &op->dev); + + /* XXX register error interrupt handlers XXX */ + + pbm->next = pci_pbm_root; + pci_pbm_root = pbm; + + return 0; +} + +static int __devinit fire_probe(struct of_device *op, + const struct of_device_id *match) +{ + struct device_node *dp = op->node; + struct pci_pbm_info *pbm; + struct iommu *iommu; + u32 portid; + int err; + + portid = of_getintprop_default(dp, "portid", 0xff); + + err = -ENOMEM; + pbm = kzalloc(sizeof(*pbm), GFP_KERNEL); + if (!pbm) { + printk(KERN_ERR PFX "Cannot allocate pci_pbminfo.\n"); + goto out_err; + } + + iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL); + if (!iommu) { + printk(KERN_ERR PFX "Cannot allocate PBM iommu.\n"); + goto out_free_controller; + } + + pbm->iommu = iommu; + + err = pci_fire_pbm_init(pbm, op, portid); + if (err) + goto out_free_iommu; + + dev_set_drvdata(&op->dev, pbm); + + return 0; + +out_free_iommu: + kfree(pbm->iommu); + +out_free_controller: + kfree(pbm); + +out_err: + return err; +} + +static struct of_device_id __initdata fire_match[] = { + { + .name = "pci", + .compatible = "pciex108e,80f0", + }, + {}, +}; + +static struct of_platform_driver fire_driver = { + .name = DRIVER_NAME, + .match_table = fire_match, + .probe = fire_probe, +}; + +static int __init fire_init(void) +{ + return of_register_driver(&fire_driver, &of_bus_type); +} + +subsys_initcall(fire_init); diff --git a/arch/sparc/kernel/pci_impl.h b/arch/sparc/kernel/pci_impl.h new file mode 100644 index 000000000000..03186824327e --- /dev/null +++ b/arch/sparc/kernel/pci_impl.h @@ -0,0 +1,185 @@ +/* pci_impl.h: Helper definitions for PCI controller support. + * + * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net) + */ + +#ifndef PCI_IMPL_H +#define PCI_IMPL_H + +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/pci.h> +#include <linux/msi.h> +#include <linux/of_device.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/iommu.h> + +/* The abstraction used here is that there are PCI controllers, + * each with one (Sabre) or two (PSYCHO/SCHIZO) PCI bus modules + * underneath. Each PCI bus module uses an IOMMU (shared by both + * PBMs of a controller, or per-PBM), and if a streaming buffer + * is present, each PCI bus module has it's own. (ie. the IOMMU + * might be shared between PBMs, the STC is never shared) + * Furthermore, each PCI bus module controls it's own autonomous + * PCI bus. + */ + +#define PCI_STC_FLUSHFLAG_INIT(STC) \ + (*((STC)->strbuf_flushflag) = 0UL) +#define PCI_STC_FLUSHFLAG_SET(STC) \ + (*((STC)->strbuf_flushflag) != 0UL) + +#ifdef CONFIG_PCI_MSI +struct pci_pbm_info; +struct sparc64_msiq_ops { + int (*get_head)(struct pci_pbm_info *pbm, unsigned long msiqid, + unsigned long *head); + int (*dequeue_msi)(struct pci_pbm_info *pbm, unsigned long msiqid, + unsigned long *head, unsigned long *msi); + int (*set_head)(struct pci_pbm_info *pbm, unsigned long msiqid, + unsigned long head); + int (*msi_setup)(struct pci_pbm_info *pbm, unsigned long msiqid, + unsigned long msi, int is_msi64); + int (*msi_teardown)(struct pci_pbm_info *pbm, unsigned long msi); + int (*msiq_alloc)(struct pci_pbm_info *pbm); + void (*msiq_free)(struct pci_pbm_info *pbm); + int (*msiq_build_irq)(struct pci_pbm_info *pbm, unsigned long msiqid, + unsigned long devino); +}; + +extern void sparc64_pbm_msi_init(struct pci_pbm_info *pbm, + const struct sparc64_msiq_ops *ops); + +struct sparc64_msiq_cookie { + struct pci_pbm_info *pbm; + unsigned long msiqid; +}; +#endif + +struct pci_pbm_info { + struct pci_pbm_info *next; + struct pci_pbm_info *sibling; + int index; + + /* Physical address base of controller registers. */ + unsigned long controller_regs; + + /* Physical address base of PBM registers. */ + unsigned long pbm_regs; + + /* Physical address of DMA sync register, if any. */ + unsigned long sync_reg; + + /* Opaque 32-bit system bus Port ID. */ + u32 portid; + + /* Opaque 32-bit handle used for hypervisor calls. */ + u32 devhandle; + + /* Chipset version information. */ + int chip_type; +#define PBM_CHIP_TYPE_SABRE 1 +#define PBM_CHIP_TYPE_PSYCHO 2 +#define PBM_CHIP_TYPE_SCHIZO 3 +#define PBM_CHIP_TYPE_SCHIZO_PLUS 4 +#define PBM_CHIP_TYPE_TOMATILLO 5 + int chip_version; + int chip_revision; + + /* Name used for top-level resources. */ + char *name; + + /* OBP specific information. */ + struct of_device *op; + u64 ino_bitmap; + + /* PBM I/O and Memory space resources. */ + struct resource io_space; + struct resource mem_space; + + /* Base of PCI Config space, can be per-PBM or shared. */ + unsigned long config_space; + + /* This will be 12 on PCI-E controllers, 8 elsewhere. */ + unsigned long config_space_reg_bits; + + unsigned long pci_afsr; + unsigned long pci_afar; + unsigned long pci_csr; + + /* State of 66MHz capabilities on this PBM. */ + int is_66mhz_capable; + int all_devs_66mhz; + +#ifdef CONFIG_PCI_MSI + /* MSI info. */ + u32 msiq_num; + u32 msiq_ent_count; + u32 msiq_first; + u32 msiq_first_devino; + u32 msiq_rotor; + struct sparc64_msiq_cookie *msiq_irq_cookies; + u32 msi_num; + u32 msi_first; + u32 msi_data_mask; + u32 msix_data_width; + u64 msi32_start; + u64 msi64_start; + u32 msi32_len; + u32 msi64_len; + void *msi_queues; + unsigned long *msi_bitmap; + unsigned int *msi_irq_table; + int (*setup_msi_irq)(unsigned int *virt_irq_p, struct pci_dev *pdev, + struct msi_desc *entry); + void (*teardown_msi_irq)(unsigned int virt_irq, struct pci_dev *pdev); + const struct sparc64_msiq_ops *msi_ops; +#endif /* !(CONFIG_PCI_MSI) */ + + /* This PBM's streaming buffer. */ + struct strbuf stc; + + /* IOMMU state, potentially shared by both PBM segments. */ + struct iommu *iommu; + + /* Now things for the actual PCI bus probes. */ + unsigned int pci_first_busno; + unsigned int pci_last_busno; + struct pci_bus *pci_bus; + struct pci_ops *pci_ops; + + int numa_node; +}; + +extern struct pci_pbm_info *pci_pbm_root; + +extern int pci_num_pbms; + +/* PCI bus scanning and fixup support. */ +extern void pci_get_pbm_props(struct pci_pbm_info *pbm); +extern struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm, + struct device *parent); +extern void pci_determine_mem_io_space(struct pci_pbm_info *pbm); + +/* Error reporting support. */ +extern void pci_scan_for_target_abort(struct pci_pbm_info *, struct pci_bus *); +extern void pci_scan_for_master_abort(struct pci_pbm_info *, struct pci_bus *); +extern void pci_scan_for_parity_error(struct pci_pbm_info *, struct pci_bus *); + +/* Configuration space access. */ +extern void pci_config_read8(u8 *addr, u8 *ret); +extern void pci_config_read16(u16 *addr, u16 *ret); +extern void pci_config_read32(u32 *addr, u32 *ret); +extern void pci_config_write8(u8 *addr, u8 val); +extern void pci_config_write16(u16 *addr, u16 val); +extern void pci_config_write32(u32 *addr, u32 val); + +extern struct pci_ops sun4u_pci_ops; +extern struct pci_ops sun4v_pci_ops; + +extern volatile int pci_poke_in_progress; +extern volatile int pci_poke_cpu; +extern volatile int pci_poke_faulted; + +#endif /* !(PCI_IMPL_H) */ diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c new file mode 100644 index 000000000000..2e680f34f727 --- /dev/null +++ b/arch/sparc/kernel/pci_msi.c @@ -0,0 +1,447 @@ +/* pci_msi.c: Sparc64 MSI support common layer. + * + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/irq.h> + +#include "pci_impl.h" + +static irqreturn_t sparc64_msiq_interrupt(int irq, void *cookie) +{ + struct sparc64_msiq_cookie *msiq_cookie = cookie; + struct pci_pbm_info *pbm = msiq_cookie->pbm; + unsigned long msiqid = msiq_cookie->msiqid; + const struct sparc64_msiq_ops *ops; + unsigned long orig_head, head; + int err; + + ops = pbm->msi_ops; + + err = ops->get_head(pbm, msiqid, &head); + if (unlikely(err < 0)) + goto err_get_head; + + orig_head = head; + for (;;) { + unsigned long msi; + + err = ops->dequeue_msi(pbm, msiqid, &head, &msi); + if (likely(err > 0)) { + struct irq_desc *desc; + unsigned int virt_irq; + + virt_irq = pbm->msi_irq_table[msi - pbm->msi_first]; + desc = irq_desc + virt_irq; + + desc->handle_irq(virt_irq, desc); + } + + if (unlikely(err < 0)) + goto err_dequeue; + + if (err == 0) + break; + } + if (likely(head != orig_head)) { + err = ops->set_head(pbm, msiqid, head); + if (unlikely(err < 0)) + goto err_set_head; + } + return IRQ_HANDLED; + +err_get_head: + printk(KERN_EMERG "MSI: Get head on msiqid[%lu] gives error %d\n", + msiqid, err); + goto err_out; + +err_dequeue: + printk(KERN_EMERG "MSI: Dequeue head[%lu] from msiqid[%lu] " + "gives error %d\n", + head, msiqid, err); + goto err_out; + +err_set_head: + printk(KERN_EMERG "MSI: Set head[%lu] on msiqid[%lu] " + "gives error %d\n", + head, msiqid, err); + goto err_out; + +err_out: + return IRQ_NONE; +} + +static u32 pick_msiq(struct pci_pbm_info *pbm) +{ + static DEFINE_SPINLOCK(rotor_lock); + unsigned long flags; + u32 ret, rotor; + + spin_lock_irqsave(&rotor_lock, flags); + + rotor = pbm->msiq_rotor; + ret = pbm->msiq_first + rotor; + + if (++rotor >= pbm->msiq_num) + rotor = 0; + pbm->msiq_rotor = rotor; + + spin_unlock_irqrestore(&rotor_lock, flags); + + return ret; +} + + +static int alloc_msi(struct pci_pbm_info *pbm) +{ + int i; + + for (i = 0; i < pbm->msi_num; i++) { + if (!test_and_set_bit(i, pbm->msi_bitmap)) + return i + pbm->msi_first; + } + + return -ENOENT; +} + +static void free_msi(struct pci_pbm_info *pbm, int msi_num) +{ + msi_num -= pbm->msi_first; + clear_bit(msi_num, pbm->msi_bitmap); +} + +static struct irq_chip msi_irq = { + .typename = "PCI-MSI", + .mask = mask_msi_irq, + .unmask = unmask_msi_irq, + .enable = unmask_msi_irq, + .disable = mask_msi_irq, + /* XXX affinity XXX */ +}; + +static int sparc64_setup_msi_irq(unsigned int *virt_irq_p, + struct pci_dev *pdev, + struct msi_desc *entry) +{ + struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller; + const struct sparc64_msiq_ops *ops = pbm->msi_ops; + struct msi_msg msg; + int msi, err; + u32 msiqid; + + *virt_irq_p = virt_irq_alloc(0, 0); + err = -ENOMEM; + if (!*virt_irq_p) + goto out_err; + + set_irq_chip_and_handler_name(*virt_irq_p, &msi_irq, + handle_simple_irq, "MSI"); + + err = alloc_msi(pbm); + if (unlikely(err < 0)) + goto out_virt_irq_free; + + msi = err; + + msiqid = pick_msiq(pbm); + + err = ops->msi_setup(pbm, msiqid, msi, + (entry->msi_attrib.is_64 ? 1 : 0)); + if (err) + goto out_msi_free; + + pbm->msi_irq_table[msi - pbm->msi_first] = *virt_irq_p; + + if (entry->msi_attrib.is_64) { + msg.address_hi = pbm->msi64_start >> 32; + msg.address_lo = pbm->msi64_start & 0xffffffff; + } else { + msg.address_hi = 0; + msg.address_lo = pbm->msi32_start; + } + msg.data = msi; + + set_irq_msi(*virt_irq_p, entry); + write_msi_msg(*virt_irq_p, &msg); + + return 0; + +out_msi_free: + free_msi(pbm, msi); + +out_virt_irq_free: + set_irq_chip(*virt_irq_p, NULL); + virt_irq_free(*virt_irq_p); + *virt_irq_p = 0; + +out_err: + return err; +} + +static void sparc64_teardown_msi_irq(unsigned int virt_irq, + struct pci_dev *pdev) +{ + struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller; + const struct sparc64_msiq_ops *ops = pbm->msi_ops; + unsigned int msi_num; + int i, err; + + for (i = 0; i < pbm->msi_num; i++) { + if (pbm->msi_irq_table[i] == virt_irq) + break; + } + if (i >= pbm->msi_num) { + printk(KERN_ERR "%s: teardown: No MSI for irq %u\n", + pbm->name, virt_irq); + return; + } + + msi_num = pbm->msi_first + i; + pbm->msi_irq_table[i] = ~0U; + + err = ops->msi_teardown(pbm, msi_num); + if (err) { + printk(KERN_ERR "%s: teardown: ops->teardown() on MSI %u, " + "irq %u, gives error %d\n", + pbm->name, msi_num, virt_irq, err); + return; + } + + free_msi(pbm, msi_num); + + set_irq_chip(virt_irq, NULL); + virt_irq_free(virt_irq); +} + +static int msi_bitmap_alloc(struct pci_pbm_info *pbm) +{ + unsigned long size, bits_per_ulong; + + bits_per_ulong = sizeof(unsigned long) * 8; + size = (pbm->msi_num + (bits_per_ulong - 1)) & ~(bits_per_ulong - 1); + size /= 8; + BUG_ON(size % sizeof(unsigned long)); + + pbm->msi_bitmap = kzalloc(size, GFP_KERNEL); + if (!pbm->msi_bitmap) + return -ENOMEM; + + return 0; +} + +static void msi_bitmap_free(struct pci_pbm_info *pbm) +{ + kfree(pbm->msi_bitmap); + pbm->msi_bitmap = NULL; +} + +static int msi_table_alloc(struct pci_pbm_info *pbm) +{ + int size, i; + + size = pbm->msiq_num * sizeof(struct sparc64_msiq_cookie); + pbm->msiq_irq_cookies = kzalloc(size, GFP_KERNEL); + if (!pbm->msiq_irq_cookies) + return -ENOMEM; + + for (i = 0; i < pbm->msiq_num; i++) { + struct sparc64_msiq_cookie *p; + + p = &pbm->msiq_irq_cookies[i]; + p->pbm = pbm; + p->msiqid = pbm->msiq_first + i; + } + + size = pbm->msi_num * sizeof(unsigned int); + pbm->msi_irq_table = kzalloc(size, GFP_KERNEL); + if (!pbm->msi_irq_table) { + kfree(pbm->msiq_irq_cookies); + pbm->msiq_irq_cookies = NULL; + return -ENOMEM; + } + + return 0; +} + +static void msi_table_free(struct pci_pbm_info *pbm) +{ + kfree(pbm->msiq_irq_cookies); + pbm->msiq_irq_cookies = NULL; + + kfree(pbm->msi_irq_table); + pbm->msi_irq_table = NULL; +} + +static int bringup_one_msi_queue(struct pci_pbm_info *pbm, + const struct sparc64_msiq_ops *ops, + unsigned long msiqid, + unsigned long devino) +{ + int irq = ops->msiq_build_irq(pbm, msiqid, devino); + int err, nid; + + if (irq < 0) + return irq; + + nid = pbm->numa_node; + if (nid != -1) { + cpumask_t numa_mask = node_to_cpumask(nid); + + irq_set_affinity(irq, numa_mask); + } + err = request_irq(irq, sparc64_msiq_interrupt, 0, + "MSIQ", + &pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]); + if (err) + return err; + + return 0; +} + +static int sparc64_bringup_msi_queues(struct pci_pbm_info *pbm, + const struct sparc64_msiq_ops *ops) +{ + int i; + + for (i = 0; i < pbm->msiq_num; i++) { + unsigned long msiqid = i + pbm->msiq_first; + unsigned long devino = i + pbm->msiq_first_devino; + int err; + + err = bringup_one_msi_queue(pbm, ops, msiqid, devino); + if (err) + return err; + } + + return 0; +} + +void sparc64_pbm_msi_init(struct pci_pbm_info *pbm, + const struct sparc64_msiq_ops *ops) +{ + const u32 *val; + int len; + + val = of_get_property(pbm->op->node, "#msi-eqs", &len); + if (!val || len != 4) + goto no_msi; + pbm->msiq_num = *val; + if (pbm->msiq_num) { + const struct msiq_prop { + u32 first_msiq; + u32 num_msiq; + u32 first_devino; + } *mqp; + const struct msi_range_prop { + u32 first_msi; + u32 num_msi; + } *mrng; + const struct addr_range_prop { + u32 msi32_high; + u32 msi32_low; + u32 msi32_len; + u32 msi64_high; + u32 msi64_low; + u32 msi64_len; + } *arng; + + val = of_get_property(pbm->op->node, "msi-eq-size", &len); + if (!val || len != 4) + goto no_msi; + + pbm->msiq_ent_count = *val; + + mqp = of_get_property(pbm->op->node, + "msi-eq-to-devino", &len); + if (!mqp) + mqp = of_get_property(pbm->op->node, + "msi-eq-devino", &len); + if (!mqp || len != sizeof(struct msiq_prop)) + goto no_msi; + + pbm->msiq_first = mqp->first_msiq; + pbm->msiq_first_devino = mqp->first_devino; + + val = of_get_property(pbm->op->node, "#msi", &len); + if (!val || len != 4) + goto no_msi; + pbm->msi_num = *val; + + mrng = of_get_property(pbm->op->node, "msi-ranges", &len); + if (!mrng || len != sizeof(struct msi_range_prop)) + goto no_msi; + pbm->msi_first = mrng->first_msi; + + val = of_get_property(pbm->op->node, "msi-data-mask", &len); + if (!val || len != 4) + goto no_msi; + pbm->msi_data_mask = *val; + + val = of_get_property(pbm->op->node, "msix-data-width", &len); + if (!val || len != 4) + goto no_msi; + pbm->msix_data_width = *val; + + arng = of_get_property(pbm->op->node, "msi-address-ranges", + &len); + if (!arng || len != sizeof(struct addr_range_prop)) + goto no_msi; + pbm->msi32_start = ((u64)arng->msi32_high << 32) | + (u64) arng->msi32_low; + pbm->msi64_start = ((u64)arng->msi64_high << 32) | + (u64) arng->msi64_low; + pbm->msi32_len = arng->msi32_len; + pbm->msi64_len = arng->msi64_len; + + if (msi_bitmap_alloc(pbm)) + goto no_msi; + + if (msi_table_alloc(pbm)) { + msi_bitmap_free(pbm); + goto no_msi; + } + + if (ops->msiq_alloc(pbm)) { + msi_table_free(pbm); + msi_bitmap_free(pbm); + goto no_msi; + } + + if (sparc64_bringup_msi_queues(pbm, ops)) { + ops->msiq_free(pbm); + msi_table_free(pbm); + msi_bitmap_free(pbm); + goto no_msi; + } + + printk(KERN_INFO "%s: MSI Queue first[%u] num[%u] count[%u] " + "devino[0x%x]\n", + pbm->name, + pbm->msiq_first, pbm->msiq_num, + pbm->msiq_ent_count, + pbm->msiq_first_devino); + printk(KERN_INFO "%s: MSI first[%u] num[%u] mask[0x%x] " + "width[%u]\n", + pbm->name, + pbm->msi_first, pbm->msi_num, pbm->msi_data_mask, + pbm->msix_data_width); + printk(KERN_INFO "%s: MSI addr32[0x%lx:0x%x] " + "addr64[0x%lx:0x%x]\n", + pbm->name, + pbm->msi32_start, pbm->msi32_len, + pbm->msi64_start, pbm->msi64_len); + printk(KERN_INFO "%s: MSI queues at RA [%016lx]\n", + pbm->name, + __pa(pbm->msi_queues)); + + pbm->msi_ops = ops; + pbm->setup_msi_irq = sparc64_setup_msi_irq; + pbm->teardown_msi_irq = sparc64_teardown_msi_irq; + } + return; + +no_msi: + pbm->msiq_num = 0; + printk(KERN_INFO "%s: No MSI support.\n", pbm->name); +} diff --git a/arch/sparc/kernel/pci_psycho.c b/arch/sparc/kernel/pci_psycho.c new file mode 100644 index 000000000000..dfb3ec892987 --- /dev/null +++ b/arch/sparc/kernel/pci_psycho.c @@ -0,0 +1,618 @@ +/* pci_psycho.c: PSYCHO/U2P specific PCI controller support. + * + * Copyright (C) 1997, 1998, 1999, 2007 David S. Miller (davem@davemloft.net) + * Copyright (C) 1998, 1999 Eddie C. Dost (ecd@skynet.be) + * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/of_device.h> + +#include <asm/iommu.h> +#include <asm/irq.h> +#include <asm/starfire.h> +#include <asm/prom.h> +#include <asm/upa.h> + +#include "pci_impl.h" +#include "iommu_common.h" +#include "psycho_common.h" + +#define DRIVER_NAME "psycho" +#define PFX DRIVER_NAME ": " + +/* Misc. PSYCHO PCI controller register offsets and definitions. */ +#define PSYCHO_CONTROL 0x0010UL +#define PSYCHO_CONTROL_IMPL 0xf000000000000000UL /* Implementation of this PSYCHO*/ +#define PSYCHO_CONTROL_VER 0x0f00000000000000UL /* Version of this PSYCHO */ +#define PSYCHO_CONTROL_MID 0x00f8000000000000UL /* UPA Module ID of PSYCHO */ +#define PSYCHO_CONTROL_IGN 0x0007c00000000000UL /* Interrupt Group Number */ +#define PSYCHO_CONTROL_RESV 0x00003ffffffffff0UL /* Reserved */ +#define PSYCHO_CONTROL_APCKEN 0x0000000000000008UL /* Address Parity Check Enable */ +#define PSYCHO_CONTROL_APERR 0x0000000000000004UL /* Incoming System Addr Parerr */ +#define PSYCHO_CONTROL_IAP 0x0000000000000002UL /* Invert UPA Parity */ +#define PSYCHO_CONTROL_MODE 0x0000000000000001UL /* PSYCHO clock mode */ +#define PSYCHO_PCIA_CTRL 0x2000UL +#define PSYCHO_PCIB_CTRL 0x4000UL +#define PSYCHO_PCICTRL_RESV1 0xfffffff000000000UL /* Reserved */ +#define PSYCHO_PCICTRL_SBH_ERR 0x0000000800000000UL /* Streaming byte hole error */ +#define PSYCHO_PCICTRL_SERR 0x0000000400000000UL /* SERR signal asserted */ +#define PSYCHO_PCICTRL_SPEED 0x0000000200000000UL /* PCI speed (1 is U2P clock) */ +#define PSYCHO_PCICTRL_RESV2 0x00000001ffc00000UL /* Reserved */ +#define PSYCHO_PCICTRL_ARB_PARK 0x0000000000200000UL /* PCI arbitration parking */ +#define PSYCHO_PCICTRL_RESV3 0x00000000001ff800UL /* Reserved */ +#define PSYCHO_PCICTRL_SBH_INT 0x0000000000000400UL /* Streaming byte hole int enab */ +#define PSYCHO_PCICTRL_WEN 0x0000000000000200UL /* Power Mgmt Wake Enable */ +#define PSYCHO_PCICTRL_EEN 0x0000000000000100UL /* PCI Error Interrupt Enable */ +#define PSYCHO_PCICTRL_RESV4 0x00000000000000c0UL /* Reserved */ +#define PSYCHO_PCICTRL_AEN 0x000000000000003fUL /* PCI DVMA Arbitration Enable */ + +/* PSYCHO error handling support. */ + +/* Helper function of IOMMU error checking, which checks out + * the state of the streaming buffers. The IOMMU lock is + * held when this is called. + * + * For the PCI error case we know which PBM (and thus which + * streaming buffer) caused the error, but for the uncorrectable + * error case we do not. So we always check both streaming caches. + */ +#define PSYCHO_STRBUF_CONTROL_A 0x2800UL +#define PSYCHO_STRBUF_CONTROL_B 0x4800UL +#define PSYCHO_STRBUF_CTRL_LPTR 0x00000000000000f0UL /* LRU Lock Pointer */ +#define PSYCHO_STRBUF_CTRL_LENAB 0x0000000000000008UL /* LRU Lock Enable */ +#define PSYCHO_STRBUF_CTRL_RRDIS 0x0000000000000004UL /* Rerun Disable */ +#define PSYCHO_STRBUF_CTRL_DENAB 0x0000000000000002UL /* Diagnostic Mode Enable */ +#define PSYCHO_STRBUF_CTRL_ENAB 0x0000000000000001UL /* Streaming Buffer Enable */ +#define PSYCHO_STRBUF_FLUSH_A 0x2808UL +#define PSYCHO_STRBUF_FLUSH_B 0x4808UL +#define PSYCHO_STRBUF_FSYNC_A 0x2810UL +#define PSYCHO_STRBUF_FSYNC_B 0x4810UL +#define PSYCHO_STC_DATA_A 0xb000UL +#define PSYCHO_STC_DATA_B 0xc000UL +#define PSYCHO_STC_ERR_A 0xb400UL +#define PSYCHO_STC_ERR_B 0xc400UL +#define PSYCHO_STC_TAG_A 0xb800UL +#define PSYCHO_STC_TAG_B 0xc800UL +#define PSYCHO_STC_LINE_A 0xb900UL +#define PSYCHO_STC_LINE_B 0xc900UL + +/* When an Uncorrectable Error or a PCI Error happens, we + * interrogate the IOMMU state to see if it is the cause. + */ +#define PSYCHO_IOMMU_CONTROL 0x0200UL +#define PSYCHO_IOMMU_CTRL_RESV 0xfffffffff9000000UL /* Reserved */ +#define PSYCHO_IOMMU_CTRL_XLTESTAT 0x0000000006000000UL /* Translation Error Status */ +#define PSYCHO_IOMMU_CTRL_XLTEERR 0x0000000001000000UL /* Translation Error encountered */ +#define PSYCHO_IOMMU_CTRL_LCKEN 0x0000000000800000UL /* Enable translation locking */ +#define PSYCHO_IOMMU_CTRL_LCKPTR 0x0000000000780000UL /* Translation lock pointer */ +#define PSYCHO_IOMMU_CTRL_TSBSZ 0x0000000000070000UL /* TSB Size */ +#define PSYCHO_IOMMU_TSBSZ_1K 0x0000000000000000UL /* TSB Table 1024 8-byte entries */ +#define PSYCHO_IOMMU_TSBSZ_2K 0x0000000000010000UL /* TSB Table 2048 8-byte entries */ +#define PSYCHO_IOMMU_TSBSZ_4K 0x0000000000020000UL /* TSB Table 4096 8-byte entries */ +#define PSYCHO_IOMMU_TSBSZ_8K 0x0000000000030000UL /* TSB Table 8192 8-byte entries */ +#define PSYCHO_IOMMU_TSBSZ_16K 0x0000000000040000UL /* TSB Table 16k 8-byte entries */ +#define PSYCHO_IOMMU_TSBSZ_32K 0x0000000000050000UL /* TSB Table 32k 8-byte entries */ +#define PSYCHO_IOMMU_TSBSZ_64K 0x0000000000060000UL /* TSB Table 64k 8-byte entries */ +#define PSYCHO_IOMMU_TSBSZ_128K 0x0000000000070000UL /* TSB Table 128k 8-byte entries */ +#define PSYCHO_IOMMU_CTRL_RESV2 0x000000000000fff8UL /* Reserved */ +#define PSYCHO_IOMMU_CTRL_TBWSZ 0x0000000000000004UL /* Assumed page size, 0=8k 1=64k */ +#define PSYCHO_IOMMU_CTRL_DENAB 0x0000000000000002UL /* Diagnostic mode enable */ +#define PSYCHO_IOMMU_CTRL_ENAB 0x0000000000000001UL /* IOMMU Enable */ +#define PSYCHO_IOMMU_TSBBASE 0x0208UL +#define PSYCHO_IOMMU_FLUSH 0x0210UL +#define PSYCHO_IOMMU_TAG 0xa580UL +#define PSYCHO_IOMMU_DATA 0xa600UL + +/* Uncorrectable Errors. Cause of the error and the address are + * recorded in the UE_AFSR and UE_AFAR of PSYCHO. They are errors + * relating to UPA interface transactions. + */ +#define PSYCHO_UE_AFSR 0x0030UL +#define PSYCHO_UEAFSR_PPIO 0x8000000000000000UL /* Primary PIO is cause */ +#define PSYCHO_UEAFSR_PDRD 0x4000000000000000UL /* Primary DVMA read is cause */ +#define PSYCHO_UEAFSR_PDWR 0x2000000000000000UL /* Primary DVMA write is cause */ +#define PSYCHO_UEAFSR_SPIO 0x1000000000000000UL /* Secondary PIO is cause */ +#define PSYCHO_UEAFSR_SDRD 0x0800000000000000UL /* Secondary DVMA read is cause */ +#define PSYCHO_UEAFSR_SDWR 0x0400000000000000UL /* Secondary DVMA write is cause*/ +#define PSYCHO_UEAFSR_RESV1 0x03ff000000000000UL /* Reserved */ +#define PSYCHO_UEAFSR_BMSK 0x0000ffff00000000UL /* Bytemask of failed transfer */ +#define PSYCHO_UEAFSR_DOFF 0x00000000e0000000UL /* Doubleword Offset */ +#define PSYCHO_UEAFSR_MID 0x000000001f000000UL /* UPA MID causing the fault */ +#define PSYCHO_UEAFSR_BLK 0x0000000000800000UL /* Trans was block operation */ +#define PSYCHO_UEAFSR_RESV2 0x00000000007fffffUL /* Reserved */ +#define PSYCHO_UE_AFAR 0x0038UL + +static irqreturn_t psycho_ue_intr(int irq, void *dev_id) +{ + struct pci_pbm_info *pbm = dev_id; + unsigned long afsr_reg = pbm->controller_regs + PSYCHO_UE_AFSR; + unsigned long afar_reg = pbm->controller_regs + PSYCHO_UE_AFAR; + unsigned long afsr, afar, error_bits; + int reported; + + /* Latch uncorrectable error status. */ + afar = upa_readq(afar_reg); + afsr = upa_readq(afsr_reg); + + /* Clear the primary/secondary error status bits. */ + error_bits = afsr & + (PSYCHO_UEAFSR_PPIO | PSYCHO_UEAFSR_PDRD | PSYCHO_UEAFSR_PDWR | + PSYCHO_UEAFSR_SPIO | PSYCHO_UEAFSR_SDRD | PSYCHO_UEAFSR_SDWR); + if (!error_bits) + return IRQ_NONE; + upa_writeq(error_bits, afsr_reg); + + /* Log the error. */ + printk("%s: Uncorrectable Error, primary error type[%s]\n", + pbm->name, + (((error_bits & PSYCHO_UEAFSR_PPIO) ? + "PIO" : + ((error_bits & PSYCHO_UEAFSR_PDRD) ? + "DMA Read" : + ((error_bits & PSYCHO_UEAFSR_PDWR) ? + "DMA Write" : "???"))))); + printk("%s: bytemask[%04lx] dword_offset[%lx] UPA_MID[%02lx] was_block(%d)\n", + pbm->name, + (afsr & PSYCHO_UEAFSR_BMSK) >> 32UL, + (afsr & PSYCHO_UEAFSR_DOFF) >> 29UL, + (afsr & PSYCHO_UEAFSR_MID) >> 24UL, + ((afsr & PSYCHO_UEAFSR_BLK) ? 1 : 0)); + printk("%s: UE AFAR [%016lx]\n", pbm->name, afar); + printk("%s: UE Secondary errors [", pbm->name); + reported = 0; + if (afsr & PSYCHO_UEAFSR_SPIO) { + reported++; + printk("(PIO)"); + } + if (afsr & PSYCHO_UEAFSR_SDRD) { + reported++; + printk("(DMA Read)"); + } + if (afsr & PSYCHO_UEAFSR_SDWR) { + reported++; + printk("(DMA Write)"); + } + if (!reported) + printk("(none)"); + printk("]\n"); + + /* Interrogate both IOMMUs for error status. */ + psycho_check_iommu_error(pbm, afsr, afar, UE_ERR); + if (pbm->sibling) + psycho_check_iommu_error(pbm->sibling, afsr, afar, UE_ERR); + + return IRQ_HANDLED; +} + +/* Correctable Errors. */ +#define PSYCHO_CE_AFSR 0x0040UL +#define PSYCHO_CEAFSR_PPIO 0x8000000000000000UL /* Primary PIO is cause */ +#define PSYCHO_CEAFSR_PDRD 0x4000000000000000UL /* Primary DVMA read is cause */ +#define PSYCHO_CEAFSR_PDWR 0x2000000000000000UL /* Primary DVMA write is cause */ +#define PSYCHO_CEAFSR_SPIO 0x1000000000000000UL /* Secondary PIO is cause */ +#define PSYCHO_CEAFSR_SDRD 0x0800000000000000UL /* Secondary DVMA read is cause */ +#define PSYCHO_CEAFSR_SDWR 0x0400000000000000UL /* Secondary DVMA write is cause*/ +#define PSYCHO_CEAFSR_RESV1 0x0300000000000000UL /* Reserved */ +#define PSYCHO_CEAFSR_ESYND 0x00ff000000000000UL /* Syndrome Bits */ +#define PSYCHO_CEAFSR_BMSK 0x0000ffff00000000UL /* Bytemask of failed transfer */ +#define PSYCHO_CEAFSR_DOFF 0x00000000e0000000UL /* Double Offset */ +#define PSYCHO_CEAFSR_MID 0x000000001f000000UL /* UPA MID causing the fault */ +#define PSYCHO_CEAFSR_BLK 0x0000000000800000UL /* Trans was block operation */ +#define PSYCHO_CEAFSR_RESV2 0x00000000007fffffUL /* Reserved */ +#define PSYCHO_CE_AFAR 0x0040UL + +static irqreturn_t psycho_ce_intr(int irq, void *dev_id) +{ + struct pci_pbm_info *pbm = dev_id; + unsigned long afsr_reg = pbm->controller_regs + PSYCHO_CE_AFSR; + unsigned long afar_reg = pbm->controller_regs + PSYCHO_CE_AFAR; + unsigned long afsr, afar, error_bits; + int reported; + + /* Latch error status. */ + afar = upa_readq(afar_reg); + afsr = upa_readq(afsr_reg); + + /* Clear primary/secondary error status bits. */ + error_bits = afsr & + (PSYCHO_CEAFSR_PPIO | PSYCHO_CEAFSR_PDRD | PSYCHO_CEAFSR_PDWR | + PSYCHO_CEAFSR_SPIO | PSYCHO_CEAFSR_SDRD | PSYCHO_CEAFSR_SDWR); + if (!error_bits) + return IRQ_NONE; + upa_writeq(error_bits, afsr_reg); + + /* Log the error. */ + printk("%s: Correctable Error, primary error type[%s]\n", + pbm->name, + (((error_bits & PSYCHO_CEAFSR_PPIO) ? + "PIO" : + ((error_bits & PSYCHO_CEAFSR_PDRD) ? + "DMA Read" : + ((error_bits & PSYCHO_CEAFSR_PDWR) ? + "DMA Write" : "???"))))); + + /* XXX Use syndrome and afar to print out module string just like + * XXX UDB CE trap handler does... -DaveM + */ + printk("%s: syndrome[%02lx] bytemask[%04lx] dword_offset[%lx] " + "UPA_MID[%02lx] was_block(%d)\n", + pbm->name, + (afsr & PSYCHO_CEAFSR_ESYND) >> 48UL, + (afsr & PSYCHO_CEAFSR_BMSK) >> 32UL, + (afsr & PSYCHO_CEAFSR_DOFF) >> 29UL, + (afsr & PSYCHO_CEAFSR_MID) >> 24UL, + ((afsr & PSYCHO_CEAFSR_BLK) ? 1 : 0)); + printk("%s: CE AFAR [%016lx]\n", pbm->name, afar); + printk("%s: CE Secondary errors [", pbm->name); + reported = 0; + if (afsr & PSYCHO_CEAFSR_SPIO) { + reported++; + printk("(PIO)"); + } + if (afsr & PSYCHO_CEAFSR_SDRD) { + reported++; + printk("(DMA Read)"); + } + if (afsr & PSYCHO_CEAFSR_SDWR) { + reported++; + printk("(DMA Write)"); + } + if (!reported) + printk("(none)"); + printk("]\n"); + + return IRQ_HANDLED; +} + +/* PCI Errors. They are signalled by the PCI bus module since they + * are associated with a specific bus segment. + */ +#define PSYCHO_PCI_AFSR_A 0x2010UL +#define PSYCHO_PCI_AFSR_B 0x4010UL +#define PSYCHO_PCI_AFAR_A 0x2018UL +#define PSYCHO_PCI_AFAR_B 0x4018UL + +/* XXX What about PowerFail/PowerManagement??? -DaveM */ +#define PSYCHO_ECC_CTRL 0x0020 +#define PSYCHO_ECCCTRL_EE 0x8000000000000000UL /* Enable ECC Checking */ +#define PSYCHO_ECCCTRL_UE 0x4000000000000000UL /* Enable UE Interrupts */ +#define PSYCHO_ECCCTRL_CE 0x2000000000000000UL /* Enable CE INterrupts */ +static void psycho_register_error_handlers(struct pci_pbm_info *pbm) +{ + struct of_device *op = of_find_device_by_node(pbm->op->node); + unsigned long base = pbm->controller_regs; + u64 tmp; + int err; + + if (!op) + return; + + /* Psycho interrupt property order is: + * 0: PCIERR INO for this PBM + * 1: UE ERR + * 2: CE ERR + * 3: POWER FAIL + * 4: SPARE HARDWARE + * 5: POWER MANAGEMENT + */ + + if (op->num_irqs < 6) + return; + + /* We really mean to ignore the return result here. Two + * PCI controller share the same interrupt numbers and + * drive the same front-end hardware. Whichever of the + * two get in here first will register the IRQ handler + * the second will just error out since we do not pass in + * IRQF_SHARED. + */ + err = request_irq(op->irqs[1], psycho_ue_intr, IRQF_SHARED, + "PSYCHO_UE", pbm); + err = request_irq(op->irqs[2], psycho_ce_intr, IRQF_SHARED, + "PSYCHO_CE", pbm); + + /* This one, however, ought not to fail. We can just warn + * about it since the system can still operate properly even + * if this fails. + */ + err = request_irq(op->irqs[0], psycho_pcierr_intr, IRQF_SHARED, + "PSYCHO_PCIERR", pbm); + if (err) + printk(KERN_WARNING "%s: Could not register PCIERR, " + "err=%d\n", pbm->name, err); + + /* Enable UE and CE interrupts for controller. */ + upa_writeq((PSYCHO_ECCCTRL_EE | + PSYCHO_ECCCTRL_UE | + PSYCHO_ECCCTRL_CE), base + PSYCHO_ECC_CTRL); + + /* Enable PCI Error interrupts and clear error + * bits for each PBM. + */ + tmp = upa_readq(base + PSYCHO_PCIA_CTRL); + tmp |= (PSYCHO_PCICTRL_SERR | + PSYCHO_PCICTRL_SBH_ERR | + PSYCHO_PCICTRL_EEN); + tmp &= ~(PSYCHO_PCICTRL_SBH_INT); + upa_writeq(tmp, base + PSYCHO_PCIA_CTRL); + + tmp = upa_readq(base + PSYCHO_PCIB_CTRL); + tmp |= (PSYCHO_PCICTRL_SERR | + PSYCHO_PCICTRL_SBH_ERR | + PSYCHO_PCICTRL_EEN); + tmp &= ~(PSYCHO_PCICTRL_SBH_INT); + upa_writeq(tmp, base + PSYCHO_PCIB_CTRL); +} + +/* PSYCHO boot time probing and initialization. */ +static void pbm_config_busmastering(struct pci_pbm_info *pbm) +{ + u8 *addr; + + /* Set cache-line size to 64 bytes, this is actually + * a nop but I do it for completeness. + */ + addr = psycho_pci_config_mkaddr(pbm, pbm->pci_first_busno, + 0, PCI_CACHE_LINE_SIZE); + pci_config_write8(addr, 64 / sizeof(u32)); + + /* Set PBM latency timer to 64 PCI clocks. */ + addr = psycho_pci_config_mkaddr(pbm, pbm->pci_first_busno, + 0, PCI_LATENCY_TIMER); + pci_config_write8(addr, 64); +} + +static void __init psycho_scan_bus(struct pci_pbm_info *pbm, + struct device *parent) +{ + pbm_config_busmastering(pbm); + pbm->is_66mhz_capable = 0; + pbm->pci_bus = pci_scan_one_pbm(pbm, parent); + + /* After the PCI bus scan is complete, we can register + * the error interrupt handlers. + */ + psycho_register_error_handlers(pbm); +} + +#define PSYCHO_IRQ_RETRY 0x1a00UL +#define PSYCHO_PCIA_DIAG 0x2020UL +#define PSYCHO_PCIB_DIAG 0x4020UL +#define PSYCHO_PCIDIAG_RESV 0xffffffffffffff80UL /* Reserved */ +#define PSYCHO_PCIDIAG_DRETRY 0x0000000000000040UL /* Disable retry limit */ +#define PSYCHO_PCIDIAG_DISYNC 0x0000000000000020UL /* Disable DMA wr / irq sync */ +#define PSYCHO_PCIDIAG_DDWSYNC 0x0000000000000010UL /* Disable DMA wr / PIO rd sync */ +#define PSYCHO_PCIDIAG_IDDPAR 0x0000000000000008UL /* Invert DMA data parity */ +#define PSYCHO_PCIDIAG_IPDPAR 0x0000000000000004UL /* Invert PIO data parity */ +#define PSYCHO_PCIDIAG_IPAPAR 0x0000000000000002UL /* Invert PIO address parity */ +#define PSYCHO_PCIDIAG_LPBACK 0x0000000000000001UL /* Enable loopback mode */ + +static void psycho_controller_hwinit(struct pci_pbm_info *pbm) +{ + u64 tmp; + + upa_writeq(5, pbm->controller_regs + PSYCHO_IRQ_RETRY); + + /* Enable arbiter for all PCI slots. */ + tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIA_CTRL); + tmp |= PSYCHO_PCICTRL_AEN; + upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIA_CTRL); + + tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIB_CTRL); + tmp |= PSYCHO_PCICTRL_AEN; + upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIB_CTRL); + + /* Disable DMA write / PIO read synchronization on + * both PCI bus segments. + * [ U2P Erratum 1243770, STP2223BGA data sheet ] + */ + tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIA_DIAG); + tmp |= PSYCHO_PCIDIAG_DDWSYNC; + upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIA_DIAG); + + tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIB_DIAG); + tmp |= PSYCHO_PCIDIAG_DDWSYNC; + upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIB_DIAG); +} + +static void psycho_pbm_strbuf_init(struct pci_pbm_info *pbm, + int is_pbm_a) +{ + unsigned long base = pbm->controller_regs; + u64 control; + + if (is_pbm_a) { + pbm->stc.strbuf_control = base + PSYCHO_STRBUF_CONTROL_A; + pbm->stc.strbuf_pflush = base + PSYCHO_STRBUF_FLUSH_A; + pbm->stc.strbuf_fsync = base + PSYCHO_STRBUF_FSYNC_A; + pbm->stc.strbuf_err_stat = base + PSYCHO_STC_ERR_A; + pbm->stc.strbuf_tag_diag = base + PSYCHO_STC_TAG_A; + pbm->stc.strbuf_line_diag= base + PSYCHO_STC_LINE_A; + } else { + pbm->stc.strbuf_control = base + PSYCHO_STRBUF_CONTROL_B; + pbm->stc.strbuf_pflush = base + PSYCHO_STRBUF_FLUSH_B; + pbm->stc.strbuf_fsync = base + PSYCHO_STRBUF_FSYNC_B; + pbm->stc.strbuf_err_stat = base + PSYCHO_STC_ERR_B; + pbm->stc.strbuf_tag_diag = base + PSYCHO_STC_TAG_B; + pbm->stc.strbuf_line_diag= base + PSYCHO_STC_LINE_B; + } + /* PSYCHO's streaming buffer lacks ctx flushing. */ + pbm->stc.strbuf_ctxflush = 0; + pbm->stc.strbuf_ctxmatch_base = 0; + + pbm->stc.strbuf_flushflag = (volatile unsigned long *) + ((((unsigned long)&pbm->stc.__flushflag_buf[0]) + + 63UL) + & ~63UL); + pbm->stc.strbuf_flushflag_pa = (unsigned long) + __pa(pbm->stc.strbuf_flushflag); + + /* Enable the streaming buffer. We have to be careful + * just in case OBP left it with LRU locking enabled. + * + * It is possible to control if PBM will be rerun on + * line misses. Currently I just retain whatever setting + * OBP left us with. All checks so far show it having + * a value of zero. + */ +#undef PSYCHO_STRBUF_RERUN_ENABLE +#undef PSYCHO_STRBUF_RERUN_DISABLE + control = upa_readq(pbm->stc.strbuf_control); + control |= PSYCHO_STRBUF_CTRL_ENAB; + control &= ~(PSYCHO_STRBUF_CTRL_LENAB | PSYCHO_STRBUF_CTRL_LPTR); +#ifdef PSYCHO_STRBUF_RERUN_ENABLE + control &= ~(PSYCHO_STRBUF_CTRL_RRDIS); +#else +#ifdef PSYCHO_STRBUF_RERUN_DISABLE + control |= PSYCHO_STRBUF_CTRL_RRDIS; +#endif +#endif + upa_writeq(control, pbm->stc.strbuf_control); + + pbm->stc.strbuf_enabled = 1; +} + +#define PSYCHO_IOSPACE_A 0x002000000UL +#define PSYCHO_IOSPACE_B 0x002010000UL +#define PSYCHO_IOSPACE_SIZE 0x00000ffffUL +#define PSYCHO_MEMSPACE_A 0x100000000UL +#define PSYCHO_MEMSPACE_B 0x180000000UL +#define PSYCHO_MEMSPACE_SIZE 0x07fffffffUL + +static void __init psycho_pbm_init(struct pci_pbm_info *pbm, + struct of_device *op, int is_pbm_a) +{ + psycho_pbm_init_common(pbm, op, "PSYCHO", PBM_CHIP_TYPE_PSYCHO); + psycho_pbm_strbuf_init(pbm, is_pbm_a); + psycho_scan_bus(pbm, &op->dev); +} + +static struct pci_pbm_info * __devinit psycho_find_sibling(u32 upa_portid) +{ + struct pci_pbm_info *pbm; + + for (pbm = pci_pbm_root; pbm; pbm = pbm->next) { + if (pbm->portid == upa_portid) + return pbm; + } + return NULL; +} + +#define PSYCHO_CONFIGSPACE 0x001000000UL + +static int __devinit psycho_probe(struct of_device *op, + const struct of_device_id *match) +{ + const struct linux_prom64_registers *pr_regs; + struct device_node *dp = op->node; + struct pci_pbm_info *pbm; + struct iommu *iommu; + int is_pbm_a, err; + u32 upa_portid; + + upa_portid = of_getintprop_default(dp, "upa-portid", 0xff); + + err = -ENOMEM; + pbm = kzalloc(sizeof(*pbm), GFP_KERNEL); + if (!pbm) { + printk(KERN_ERR PFX "Cannot allocate pci_pbm_info.\n"); + goto out_err; + } + + pbm->sibling = psycho_find_sibling(upa_portid); + if (pbm->sibling) { + iommu = pbm->sibling->iommu; + } else { + iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL); + if (!iommu) { + printk(KERN_ERR PFX "Cannot allocate PBM iommu.\n"); + goto out_free_controller; + } + } + + pbm->iommu = iommu; + pbm->portid = upa_portid; + + pr_regs = of_get_property(dp, "reg", NULL); + err = -ENODEV; + if (!pr_regs) { + printk(KERN_ERR PFX "No reg property.\n"); + goto out_free_iommu; + } + + is_pbm_a = ((pr_regs[0].phys_addr & 0x6000) == 0x2000); + + pbm->controller_regs = pr_regs[2].phys_addr; + pbm->config_space = (pr_regs[2].phys_addr + PSYCHO_CONFIGSPACE); + + if (is_pbm_a) { + pbm->pci_afsr = pbm->controller_regs + PSYCHO_PCI_AFSR_A; + pbm->pci_afar = pbm->controller_regs + PSYCHO_PCI_AFAR_A; + pbm->pci_csr = pbm->controller_regs + PSYCHO_PCIA_CTRL; + } else { + pbm->pci_afsr = pbm->controller_regs + PSYCHO_PCI_AFSR_B; + pbm->pci_afar = pbm->controller_regs + PSYCHO_PCI_AFAR_B; + pbm->pci_csr = pbm->controller_regs + PSYCHO_PCIB_CTRL; + } + + psycho_controller_hwinit(pbm); + if (!pbm->sibling) { + err = psycho_iommu_init(pbm, 128, 0xc0000000, + 0xffffffff, PSYCHO_CONTROL); + if (err) + goto out_free_iommu; + + /* If necessary, hook us up for starfire IRQ translations. */ + if (this_is_starfire) + starfire_hookup(pbm->portid); + } + + psycho_pbm_init(pbm, op, is_pbm_a); + + pbm->next = pci_pbm_root; + pci_pbm_root = pbm; + + if (pbm->sibling) + pbm->sibling->sibling = pbm; + + dev_set_drvdata(&op->dev, pbm); + + return 0; + +out_free_iommu: + if (!pbm->sibling) + kfree(pbm->iommu); + +out_free_controller: + kfree(pbm); + +out_err: + return err; +} + +static struct of_device_id __initdata psycho_match[] = { + { + .name = "pci", + .compatible = "pci108e,8000", + }, + {}, +}; + +static struct of_platform_driver psycho_driver = { + .name = DRIVER_NAME, + .match_table = psycho_match, + .probe = psycho_probe, +}; + +static int __init psycho_init(void) +{ + return of_register_driver(&psycho_driver, &of_bus_type); +} + +subsys_initcall(psycho_init); diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c new file mode 100644 index 000000000000..713257b6963c --- /dev/null +++ b/arch/sparc/kernel/pci_sabre.c @@ -0,0 +1,609 @@ +/* pci_sabre.c: Sabre specific PCI controller support. + * + * Copyright (C) 1997, 1998, 1999, 2007 David S. Miller (davem@davemloft.net) + * Copyright (C) 1998, 1999 Eddie C. Dost (ecd@skynet.be) + * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/of_device.h> + +#include <asm/apb.h> +#include <asm/iommu.h> +#include <asm/irq.h> +#include <asm/prom.h> +#include <asm/upa.h> + +#include "pci_impl.h" +#include "iommu_common.h" +#include "psycho_common.h" + +#define DRIVER_NAME "sabre" +#define PFX DRIVER_NAME ": " + +/* SABRE PCI controller register offsets and definitions. */ +#define SABRE_UE_AFSR 0x0030UL +#define SABRE_UEAFSR_PDRD 0x4000000000000000UL /* Primary PCI DMA Read */ +#define SABRE_UEAFSR_PDWR 0x2000000000000000UL /* Primary PCI DMA Write */ +#define SABRE_UEAFSR_SDRD 0x0800000000000000UL /* Secondary PCI DMA Read */ +#define SABRE_UEAFSR_SDWR 0x0400000000000000UL /* Secondary PCI DMA Write */ +#define SABRE_UEAFSR_SDTE 0x0200000000000000UL /* Secondary DMA Translation Error */ +#define SABRE_UEAFSR_PDTE 0x0100000000000000UL /* Primary DMA Translation Error */ +#define SABRE_UEAFSR_BMSK 0x0000ffff00000000UL /* Bytemask */ +#define SABRE_UEAFSR_OFF 0x00000000e0000000UL /* Offset (AFAR bits [5:3] */ +#define SABRE_UEAFSR_BLK 0x0000000000800000UL /* Was block operation */ +#define SABRE_UECE_AFAR 0x0038UL +#define SABRE_CE_AFSR 0x0040UL +#define SABRE_CEAFSR_PDRD 0x4000000000000000UL /* Primary PCI DMA Read */ +#define SABRE_CEAFSR_PDWR 0x2000000000000000UL /* Primary PCI DMA Write */ +#define SABRE_CEAFSR_SDRD 0x0800000000000000UL /* Secondary PCI DMA Read */ +#define SABRE_CEAFSR_SDWR 0x0400000000000000UL /* Secondary PCI DMA Write */ +#define SABRE_CEAFSR_ESYND 0x00ff000000000000UL /* ECC Syndrome */ +#define SABRE_CEAFSR_BMSK 0x0000ffff00000000UL /* Bytemask */ +#define SABRE_CEAFSR_OFF 0x00000000e0000000UL /* Offset */ +#define SABRE_CEAFSR_BLK 0x0000000000800000UL /* Was block operation */ +#define SABRE_UECE_AFAR_ALIAS 0x0048UL /* Aliases to 0x0038 */ +#define SABRE_IOMMU_CONTROL 0x0200UL +#define SABRE_IOMMUCTRL_ERRSTS 0x0000000006000000UL /* Error status bits */ +#define SABRE_IOMMUCTRL_ERR 0x0000000001000000UL /* Error present in IOTLB */ +#define SABRE_IOMMUCTRL_LCKEN 0x0000000000800000UL /* IOTLB lock enable */ +#define SABRE_IOMMUCTRL_LCKPTR 0x0000000000780000UL /* IOTLB lock pointer */ +#define SABRE_IOMMUCTRL_TSBSZ 0x0000000000070000UL /* TSB Size */ +#define SABRE_IOMMU_TSBSZ_1K 0x0000000000000000 +#define SABRE_IOMMU_TSBSZ_2K 0x0000000000010000 +#define SABRE_IOMMU_TSBSZ_4K 0x0000000000020000 +#define SABRE_IOMMU_TSBSZ_8K 0x0000000000030000 +#define SABRE_IOMMU_TSBSZ_16K 0x0000000000040000 +#define SABRE_IOMMU_TSBSZ_32K 0x0000000000050000 +#define SABRE_IOMMU_TSBSZ_64K 0x0000000000060000 +#define SABRE_IOMMU_TSBSZ_128K 0x0000000000070000 +#define SABRE_IOMMUCTRL_TBWSZ 0x0000000000000004UL /* TSB assumed page size */ +#define SABRE_IOMMUCTRL_DENAB 0x0000000000000002UL /* Diagnostic Mode Enable */ +#define SABRE_IOMMUCTRL_ENAB 0x0000000000000001UL /* IOMMU Enable */ +#define SABRE_IOMMU_TSBBASE 0x0208UL +#define SABRE_IOMMU_FLUSH 0x0210UL +#define SABRE_IMAP_A_SLOT0 0x0c00UL +#define SABRE_IMAP_B_SLOT0 0x0c20UL +#define SABRE_IMAP_SCSI 0x1000UL +#define SABRE_IMAP_ETH 0x1008UL +#define SABRE_IMAP_BPP 0x1010UL +#define SABRE_IMAP_AU_REC 0x1018UL +#define SABRE_IMAP_AU_PLAY 0x1020UL +#define SABRE_IMAP_PFAIL 0x1028UL +#define SABRE_IMAP_KMS 0x1030UL +#define SABRE_IMAP_FLPY 0x1038UL +#define SABRE_IMAP_SHW 0x1040UL +#define SABRE_IMAP_KBD 0x1048UL +#define SABRE_IMAP_MS 0x1050UL +#define SABRE_IMAP_SER 0x1058UL +#define SABRE_IMAP_UE 0x1070UL +#define SABRE_IMAP_CE 0x1078UL +#define SABRE_IMAP_PCIERR 0x1080UL +#define SABRE_IMAP_GFX 0x1098UL +#define SABRE_IMAP_EUPA 0x10a0UL +#define SABRE_ICLR_A_SLOT0 0x1400UL +#define SABRE_ICLR_B_SLOT0 0x1480UL +#define SABRE_ICLR_SCSI 0x1800UL +#define SABRE_ICLR_ETH 0x1808UL +#define SABRE_ICLR_BPP 0x1810UL +#define SABRE_ICLR_AU_REC 0x1818UL +#define SABRE_ICLR_AU_PLAY 0x1820UL +#define SABRE_ICLR_PFAIL 0x1828UL +#define SABRE_ICLR_KMS 0x1830UL +#define SABRE_ICLR_FLPY 0x1838UL +#define SABRE_ICLR_SHW 0x1840UL +#define SABRE_ICLR_KBD 0x1848UL +#define SABRE_ICLR_MS 0x1850UL +#define SABRE_ICLR_SER 0x1858UL +#define SABRE_ICLR_UE 0x1870UL +#define SABRE_ICLR_CE 0x1878UL +#define SABRE_ICLR_PCIERR 0x1880UL +#define SABRE_WRSYNC 0x1c20UL +#define SABRE_PCICTRL 0x2000UL +#define SABRE_PCICTRL_MRLEN 0x0000001000000000UL /* Use MemoryReadLine for block loads/stores */ +#define SABRE_PCICTRL_SERR 0x0000000400000000UL /* Set when SERR asserted on PCI bus */ +#define SABRE_PCICTRL_ARBPARK 0x0000000000200000UL /* Bus Parking 0=Ultra-IIi 1=prev-bus-owner */ +#define SABRE_PCICTRL_CPUPRIO 0x0000000000100000UL /* Ultra-IIi granted every other bus cycle */ +#define SABRE_PCICTRL_ARBPRIO 0x00000000000f0000UL /* Slot which is granted every other bus cycle */ +#define SABRE_PCICTRL_ERREN 0x0000000000000100UL /* PCI Error Interrupt Enable */ +#define SABRE_PCICTRL_RTRYWE 0x0000000000000080UL /* DMA Flow Control 0=wait-if-possible 1=retry */ +#define SABRE_PCICTRL_AEN 0x000000000000000fUL /* Slot PCI arbitration enables */ +#define SABRE_PIOAFSR 0x2010UL +#define SABRE_PIOAFSR_PMA 0x8000000000000000UL /* Primary Master Abort */ +#define SABRE_PIOAFSR_PTA 0x4000000000000000UL /* Primary Target Abort */ +#define SABRE_PIOAFSR_PRTRY 0x2000000000000000UL /* Primary Excessive Retries */ +#define SABRE_PIOAFSR_PPERR 0x1000000000000000UL /* Primary Parity Error */ +#define SABRE_PIOAFSR_SMA 0x0800000000000000UL /* Secondary Master Abort */ +#define SABRE_PIOAFSR_STA 0x0400000000000000UL /* Secondary Target Abort */ +#define SABRE_PIOAFSR_SRTRY 0x0200000000000000UL /* Secondary Excessive Retries */ +#define SABRE_PIOAFSR_SPERR 0x0100000000000000UL /* Secondary Parity Error */ +#define SABRE_PIOAFSR_BMSK 0x0000ffff00000000UL /* Byte Mask */ +#define SABRE_PIOAFSR_BLK 0x0000000080000000UL /* Was Block Operation */ +#define SABRE_PIOAFAR 0x2018UL +#define SABRE_PCIDIAG 0x2020UL +#define SABRE_PCIDIAG_DRTRY 0x0000000000000040UL /* Disable PIO Retry Limit */ +#define SABRE_PCIDIAG_IPAPAR 0x0000000000000008UL /* Invert PIO Address Parity */ +#define SABRE_PCIDIAG_IPDPAR 0x0000000000000004UL /* Invert PIO Data Parity */ +#define SABRE_PCIDIAG_IDDPAR 0x0000000000000002UL /* Invert DMA Data Parity */ +#define SABRE_PCIDIAG_ELPBK 0x0000000000000001UL /* Loopback Enable - not supported */ +#define SABRE_PCITASR 0x2028UL +#define SABRE_PCITASR_EF 0x0000000000000080UL /* Respond to 0xe0000000-0xffffffff */ +#define SABRE_PCITASR_CD 0x0000000000000040UL /* Respond to 0xc0000000-0xdfffffff */ +#define SABRE_PCITASR_AB 0x0000000000000020UL /* Respond to 0xa0000000-0xbfffffff */ +#define SABRE_PCITASR_89 0x0000000000000010UL /* Respond to 0x80000000-0x9fffffff */ +#define SABRE_PCITASR_67 0x0000000000000008UL /* Respond to 0x60000000-0x7fffffff */ +#define SABRE_PCITASR_45 0x0000000000000004UL /* Respond to 0x40000000-0x5fffffff */ +#define SABRE_PCITASR_23 0x0000000000000002UL /* Respond to 0x20000000-0x3fffffff */ +#define SABRE_PCITASR_01 0x0000000000000001UL /* Respond to 0x00000000-0x1fffffff */ +#define SABRE_PIOBUF_DIAG 0x5000UL +#define SABRE_DMABUF_DIAGLO 0x5100UL +#define SABRE_DMABUF_DIAGHI 0x51c0UL +#define SABRE_IMAP_GFX_ALIAS 0x6000UL /* Aliases to 0x1098 */ +#define SABRE_IMAP_EUPA_ALIAS 0x8000UL /* Aliases to 0x10a0 */ +#define SABRE_IOMMU_VADIAG 0xa400UL +#define SABRE_IOMMU_TCDIAG 0xa408UL +#define SABRE_IOMMU_TAG 0xa580UL +#define SABRE_IOMMUTAG_ERRSTS 0x0000000001800000UL /* Error status bits */ +#define SABRE_IOMMUTAG_ERR 0x0000000000400000UL /* Error present */ +#define SABRE_IOMMUTAG_WRITE 0x0000000000200000UL /* Page is writable */ +#define SABRE_IOMMUTAG_STREAM 0x0000000000100000UL /* Streamable bit - unused */ +#define SABRE_IOMMUTAG_SIZE 0x0000000000080000UL /* 0=8k 1=16k */ +#define SABRE_IOMMUTAG_VPN 0x000000000007ffffUL /* Virtual Page Number [31:13] */ +#define SABRE_IOMMU_DATA 0xa600UL +#define SABRE_IOMMUDATA_VALID 0x0000000040000000UL /* Valid */ +#define SABRE_IOMMUDATA_USED 0x0000000020000000UL /* Used (for LRU algorithm) */ +#define SABRE_IOMMUDATA_CACHE 0x0000000010000000UL /* Cacheable */ +#define SABRE_IOMMUDATA_PPN 0x00000000001fffffUL /* Physical Page Number [33:13] */ +#define SABRE_PCI_IRQSTATE 0xa800UL +#define SABRE_OBIO_IRQSTATE 0xa808UL +#define SABRE_FFBCFG 0xf000UL +#define SABRE_FFBCFG_SPRQS 0x000000000f000000 /* Slave P_RQST queue size */ +#define SABRE_FFBCFG_ONEREAD 0x0000000000004000 /* Slave supports one outstanding read */ +#define SABRE_MCCTRL0 0xf010UL +#define SABRE_MCCTRL0_RENAB 0x0000000080000000 /* Refresh Enable */ +#define SABRE_MCCTRL0_EENAB 0x0000000010000000 /* Enable all ECC functions */ +#define SABRE_MCCTRL0_11BIT 0x0000000000001000 /* Enable 11-bit column addressing */ +#define SABRE_MCCTRL0_DPP 0x0000000000000f00 /* DIMM Pair Present Bits */ +#define SABRE_MCCTRL0_RINTVL 0x00000000000000ff /* Refresh Interval */ +#define SABRE_MCCTRL1 0xf018UL +#define SABRE_MCCTRL1_AMDC 0x0000000038000000 /* Advance Memdata Clock */ +#define SABRE_MCCTRL1_ARDC 0x0000000007000000 /* Advance DRAM Read Data Clock */ +#define SABRE_MCCTRL1_CSR 0x0000000000e00000 /* CAS to RAS delay for CBR refresh */ +#define SABRE_MCCTRL1_CASRW 0x00000000001c0000 /* CAS length for read/write */ +#define SABRE_MCCTRL1_RCD 0x0000000000038000 /* RAS to CAS delay */ +#define SABRE_MCCTRL1_CP 0x0000000000007000 /* CAS Precharge */ +#define SABRE_MCCTRL1_RP 0x0000000000000e00 /* RAS Precharge */ +#define SABRE_MCCTRL1_RAS 0x00000000000001c0 /* Length of RAS for refresh */ +#define SABRE_MCCTRL1_CASRW2 0x0000000000000038 /* Must be same as CASRW */ +#define SABRE_MCCTRL1_RSC 0x0000000000000007 /* RAS after CAS hold time */ +#define SABRE_RESETCTRL 0xf020UL + +#define SABRE_CONFIGSPACE 0x001000000UL +#define SABRE_IOSPACE 0x002000000UL +#define SABRE_IOSPACE_SIZE 0x000ffffffUL +#define SABRE_MEMSPACE 0x100000000UL +#define SABRE_MEMSPACE_SIZE 0x07fffffffUL + +static int hummingbird_p; +static struct pci_bus *sabre_root_bus; + +static irqreturn_t sabre_ue_intr(int irq, void *dev_id) +{ + struct pci_pbm_info *pbm = dev_id; + unsigned long afsr_reg = pbm->controller_regs + SABRE_UE_AFSR; + unsigned long afar_reg = pbm->controller_regs + SABRE_UECE_AFAR; + unsigned long afsr, afar, error_bits; + int reported; + + /* Latch uncorrectable error status. */ + afar = upa_readq(afar_reg); + afsr = upa_readq(afsr_reg); + + /* Clear the primary/secondary error status bits. */ + error_bits = afsr & + (SABRE_UEAFSR_PDRD | SABRE_UEAFSR_PDWR | + SABRE_UEAFSR_SDRD | SABRE_UEAFSR_SDWR | + SABRE_UEAFSR_SDTE | SABRE_UEAFSR_PDTE); + if (!error_bits) + return IRQ_NONE; + upa_writeq(error_bits, afsr_reg); + + /* Log the error. */ + printk("%s: Uncorrectable Error, primary error type[%s%s]\n", + pbm->name, + ((error_bits & SABRE_UEAFSR_PDRD) ? + "DMA Read" : + ((error_bits & SABRE_UEAFSR_PDWR) ? + "DMA Write" : "???")), + ((error_bits & SABRE_UEAFSR_PDTE) ? + ":Translation Error" : "")); + printk("%s: bytemask[%04lx] dword_offset[%lx] was_block(%d)\n", + pbm->name, + (afsr & SABRE_UEAFSR_BMSK) >> 32UL, + (afsr & SABRE_UEAFSR_OFF) >> 29UL, + ((afsr & SABRE_UEAFSR_BLK) ? 1 : 0)); + printk("%s: UE AFAR [%016lx]\n", pbm->name, afar); + printk("%s: UE Secondary errors [", pbm->name); + reported = 0; + if (afsr & SABRE_UEAFSR_SDRD) { + reported++; + printk("(DMA Read)"); + } + if (afsr & SABRE_UEAFSR_SDWR) { + reported++; + printk("(DMA Write)"); + } + if (afsr & SABRE_UEAFSR_SDTE) { + reported++; + printk("(Translation Error)"); + } + if (!reported) + printk("(none)"); + printk("]\n"); + + /* Interrogate IOMMU for error status. */ + psycho_check_iommu_error(pbm, afsr, afar, UE_ERR); + + return IRQ_HANDLED; +} + +static irqreturn_t sabre_ce_intr(int irq, void *dev_id) +{ + struct pci_pbm_info *pbm = dev_id; + unsigned long afsr_reg = pbm->controller_regs + SABRE_CE_AFSR; + unsigned long afar_reg = pbm->controller_regs + SABRE_UECE_AFAR; + unsigned long afsr, afar, error_bits; + int reported; + + /* Latch error status. */ + afar = upa_readq(afar_reg); + afsr = upa_readq(afsr_reg); + + /* Clear primary/secondary error status bits. */ + error_bits = afsr & + (SABRE_CEAFSR_PDRD | SABRE_CEAFSR_PDWR | + SABRE_CEAFSR_SDRD | SABRE_CEAFSR_SDWR); + if (!error_bits) + return IRQ_NONE; + upa_writeq(error_bits, afsr_reg); + + /* Log the error. */ + printk("%s: Correctable Error, primary error type[%s]\n", + pbm->name, + ((error_bits & SABRE_CEAFSR_PDRD) ? + "DMA Read" : + ((error_bits & SABRE_CEAFSR_PDWR) ? + "DMA Write" : "???"))); + + /* XXX Use syndrome and afar to print out module string just like + * XXX UDB CE trap handler does... -DaveM + */ + printk("%s: syndrome[%02lx] bytemask[%04lx] dword_offset[%lx] " + "was_block(%d)\n", + pbm->name, + (afsr & SABRE_CEAFSR_ESYND) >> 48UL, + (afsr & SABRE_CEAFSR_BMSK) >> 32UL, + (afsr & SABRE_CEAFSR_OFF) >> 29UL, + ((afsr & SABRE_CEAFSR_BLK) ? 1 : 0)); + printk("%s: CE AFAR [%016lx]\n", pbm->name, afar); + printk("%s: CE Secondary errors [", pbm->name); + reported = 0; + if (afsr & SABRE_CEAFSR_SDRD) { + reported++; + printk("(DMA Read)"); + } + if (afsr & SABRE_CEAFSR_SDWR) { + reported++; + printk("(DMA Write)"); + } + if (!reported) + printk("(none)"); + printk("]\n"); + + return IRQ_HANDLED; +} + +static void sabre_register_error_handlers(struct pci_pbm_info *pbm) +{ + struct device_node *dp = pbm->op->node; + struct of_device *op; + unsigned long base = pbm->controller_regs; + u64 tmp; + int err; + + if (pbm->chip_type == PBM_CHIP_TYPE_SABRE) + dp = dp->parent; + + op = of_find_device_by_node(dp); + if (!op) + return; + + /* Sabre/Hummingbird IRQ property layout is: + * 0: PCI ERR + * 1: UE ERR + * 2: CE ERR + * 3: POWER FAIL + */ + if (op->num_irqs < 4) + return; + + /* We clear the error bits in the appropriate AFSR before + * registering the handler so that we don't get spurious + * interrupts. + */ + upa_writeq((SABRE_UEAFSR_PDRD | SABRE_UEAFSR_PDWR | + SABRE_UEAFSR_SDRD | SABRE_UEAFSR_SDWR | + SABRE_UEAFSR_SDTE | SABRE_UEAFSR_PDTE), + base + SABRE_UE_AFSR); + + err = request_irq(op->irqs[1], sabre_ue_intr, 0, "SABRE_UE", pbm); + if (err) + printk(KERN_WARNING "%s: Couldn't register UE, err=%d.\n", + pbm->name, err); + + upa_writeq((SABRE_CEAFSR_PDRD | SABRE_CEAFSR_PDWR | + SABRE_CEAFSR_SDRD | SABRE_CEAFSR_SDWR), + base + SABRE_CE_AFSR); + + + err = request_irq(op->irqs[2], sabre_ce_intr, 0, "SABRE_CE", pbm); + if (err) + printk(KERN_WARNING "%s: Couldn't register CE, err=%d.\n", + pbm->name, err); + err = request_irq(op->irqs[0], psycho_pcierr_intr, 0, + "SABRE_PCIERR", pbm); + if (err) + printk(KERN_WARNING "%s: Couldn't register PCIERR, err=%d.\n", + pbm->name, err); + + tmp = upa_readq(base + SABRE_PCICTRL); + tmp |= SABRE_PCICTRL_ERREN; + upa_writeq(tmp, base + SABRE_PCICTRL); +} + +static void apb_init(struct pci_bus *sabre_bus) +{ + struct pci_dev *pdev; + + list_for_each_entry(pdev, &sabre_bus->devices, bus_list) { + if (pdev->vendor == PCI_VENDOR_ID_SUN && + pdev->device == PCI_DEVICE_ID_SUN_SIMBA) { + u16 word16; + + pci_read_config_word(pdev, PCI_COMMAND, &word16); + word16 |= PCI_COMMAND_SERR | PCI_COMMAND_PARITY | + PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY | + PCI_COMMAND_IO; + pci_write_config_word(pdev, PCI_COMMAND, word16); + + /* Status register bits are "write 1 to clear". */ + pci_write_config_word(pdev, PCI_STATUS, 0xffff); + pci_write_config_word(pdev, PCI_SEC_STATUS, 0xffff); + + /* Use a primary/seconday latency timer value + * of 64. + */ + pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 64); + pci_write_config_byte(pdev, PCI_SEC_LATENCY_TIMER, 64); + + /* Enable reporting/forwarding of master aborts, + * parity, and SERR. + */ + pci_write_config_byte(pdev, PCI_BRIDGE_CONTROL, + (PCI_BRIDGE_CTL_PARITY | + PCI_BRIDGE_CTL_SERR | + PCI_BRIDGE_CTL_MASTER_ABORT)); + } + } +} + +static void __init sabre_scan_bus(struct pci_pbm_info *pbm, + struct device *parent) +{ + static int once; + + /* The APB bridge speaks to the Sabre host PCI bridge + * at 66Mhz, but the front side of APB runs at 33Mhz + * for both segments. + * + * Hummingbird systems do not use APB, so they run + * at 66MHZ. + */ + if (hummingbird_p) + pbm->is_66mhz_capable = 1; + else + pbm->is_66mhz_capable = 0; + + /* This driver has not been verified to handle + * multiple SABREs yet, so trap this. + * + * Also note that the SABRE host bridge is hardwired + * to live at bus 0. + */ + if (once != 0) { + printk(KERN_ERR PFX "Multiple controllers unsupported.\n"); + return; + } + once++; + + pbm->pci_bus = pci_scan_one_pbm(pbm, parent); + if (!pbm->pci_bus) + return; + + sabre_root_bus = pbm->pci_bus; + + apb_init(pbm->pci_bus); + + sabre_register_error_handlers(pbm); +} + +static void __init sabre_pbm_init(struct pci_pbm_info *pbm, + struct of_device *op) +{ + psycho_pbm_init_common(pbm, op, "SABRE", PBM_CHIP_TYPE_SABRE); + pbm->pci_afsr = pbm->controller_regs + SABRE_PIOAFSR; + pbm->pci_afar = pbm->controller_regs + SABRE_PIOAFAR; + pbm->pci_csr = pbm->controller_regs + SABRE_PCICTRL; + sabre_scan_bus(pbm, &op->dev); +} + +static int __devinit sabre_probe(struct of_device *op, + const struct of_device_id *match) +{ + const struct linux_prom64_registers *pr_regs; + struct device_node *dp = op->node; + struct pci_pbm_info *pbm; + u32 upa_portid, dma_mask; + struct iommu *iommu; + int tsbsize, err; + const u32 *vdma; + u64 clear_irq; + + hummingbird_p = (match->data != NULL); + if (!hummingbird_p) { + struct device_node *cpu_dp; + + /* Of course, Sun has to encode things a thousand + * different ways, inconsistently. + */ + for_each_node_by_type(cpu_dp, "cpu") { + if (!strcmp(cpu_dp->name, "SUNW,UltraSPARC-IIe")) + hummingbird_p = 1; + } + } + + err = -ENOMEM; + pbm = kzalloc(sizeof(*pbm), GFP_KERNEL); + if (!pbm) { + printk(KERN_ERR PFX "Cannot allocate pci_pbm_info.\n"); + goto out_err; + } + + iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); + if (!iommu) { + printk(KERN_ERR PFX "Cannot allocate PBM iommu.\n"); + goto out_free_controller; + } + + pbm->iommu = iommu; + + upa_portid = of_getintprop_default(dp, "upa-portid", 0xff); + + pbm->portid = upa_portid; + + /* + * Map in SABRE register set and report the presence of this SABRE. + */ + + pr_regs = of_get_property(dp, "reg", NULL); + err = -ENODEV; + if (!pr_regs) { + printk(KERN_ERR PFX "No reg property\n"); + goto out_free_iommu; + } + + /* + * First REG in property is base of entire SABRE register space. + */ + pbm->controller_regs = pr_regs[0].phys_addr; + + /* Clear interrupts */ + + /* PCI first */ + for (clear_irq = SABRE_ICLR_A_SLOT0; clear_irq < SABRE_ICLR_B_SLOT0 + 0x80; clear_irq += 8) + upa_writeq(0x0UL, pbm->controller_regs + clear_irq); + + /* Then OBIO */ + for (clear_irq = SABRE_ICLR_SCSI; clear_irq < SABRE_ICLR_SCSI + 0x80; clear_irq += 8) + upa_writeq(0x0UL, pbm->controller_regs + clear_irq); + + /* Error interrupts are enabled later after the bus scan. */ + upa_writeq((SABRE_PCICTRL_MRLEN | SABRE_PCICTRL_SERR | + SABRE_PCICTRL_ARBPARK | SABRE_PCICTRL_AEN), + pbm->controller_regs + SABRE_PCICTRL); + + /* Now map in PCI config space for entire SABRE. */ + pbm->config_space = pbm->controller_regs + SABRE_CONFIGSPACE; + + vdma = of_get_property(dp, "virtual-dma", NULL); + if (!vdma) { + printk(KERN_ERR PFX "No virtual-dma property\n"); + goto out_free_iommu; + } + + dma_mask = vdma[0]; + switch(vdma[1]) { + case 0x20000000: + dma_mask |= 0x1fffffff; + tsbsize = 64; + break; + case 0x40000000: + dma_mask |= 0x3fffffff; + tsbsize = 128; + break; + + case 0x80000000: + dma_mask |= 0x7fffffff; + tsbsize = 128; + break; + default: + printk(KERN_ERR PFX "Strange virtual-dma size.\n"); + goto out_free_iommu; + } + + err = psycho_iommu_init(pbm, tsbsize, vdma[0], dma_mask, SABRE_WRSYNC); + if (err) + goto out_free_iommu; + + /* + * Look for APB underneath. + */ + sabre_pbm_init(pbm, op); + + pbm->next = pci_pbm_root; + pci_pbm_root = pbm; + + dev_set_drvdata(&op->dev, pbm); + + return 0; + +out_free_iommu: + kfree(pbm->iommu); + +out_free_controller: + kfree(pbm); + +out_err: + return err; +} + +static struct of_device_id __initdata sabre_match[] = { + { + .name = "pci", + .compatible = "pci108e,a001", + .data = (void *) 1, + }, + { + .name = "pci", + .compatible = "pci108e,a000", + }, + {}, +}; + +static struct of_platform_driver sabre_driver = { + .name = DRIVER_NAME, + .match_table = sabre_match, + .probe = sabre_probe, +}; + +static int __init sabre_init(void) +{ + return of_register_driver(&sabre_driver, &of_bus_type); +} + +subsys_initcall(sabre_init); diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c new file mode 100644 index 000000000000..45d9dba1ba11 --- /dev/null +++ b/arch/sparc/kernel/pci_schizo.c @@ -0,0 +1,1504 @@ +/* pci_schizo.c: SCHIZO/TOMATILLO specific PCI controller support. + * + * Copyright (C) 2001, 2002, 2003, 2007, 2008 David S. Miller (davem@davemloft.net) + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/of_device.h> + +#include <asm/iommu.h> +#include <asm/irq.h> +#include <asm/pstate.h> +#include <asm/prom.h> +#include <asm/upa.h> + +#include "pci_impl.h" +#include "iommu_common.h" + +#define DRIVER_NAME "schizo" +#define PFX DRIVER_NAME ": " + +/* This is a convention that at least Excalibur and Merlin + * follow. I suppose the SCHIZO used in Starcat and friends + * will do similar. + * + * The only way I could see this changing is if the newlink + * block requires more space in Schizo's address space than + * they predicted, thus requiring an address space reorg when + * the newer Schizo is taped out. + */ + +/* Streaming buffer control register. */ +#define SCHIZO_STRBUF_CTRL_LPTR 0x00000000000000f0UL /* LRU Lock Pointer */ +#define SCHIZO_STRBUF_CTRL_LENAB 0x0000000000000008UL /* LRU Lock Enable */ +#define SCHIZO_STRBUF_CTRL_RRDIS 0x0000000000000004UL /* Rerun Disable */ +#define SCHIZO_STRBUF_CTRL_DENAB 0x0000000000000002UL /* Diagnostic Mode Enable */ +#define SCHIZO_STRBUF_CTRL_ENAB 0x0000000000000001UL /* Streaming Buffer Enable */ + +/* IOMMU control register. */ +#define SCHIZO_IOMMU_CTRL_RESV 0xfffffffff9000000UL /* Reserved */ +#define SCHIZO_IOMMU_CTRL_XLTESTAT 0x0000000006000000UL /* Translation Error Status */ +#define SCHIZO_IOMMU_CTRL_XLTEERR 0x0000000001000000UL /* Translation Error encountered */ +#define SCHIZO_IOMMU_CTRL_LCKEN 0x0000000000800000UL /* Enable translation locking */ +#define SCHIZO_IOMMU_CTRL_LCKPTR 0x0000000000780000UL /* Translation lock pointer */ +#define SCHIZO_IOMMU_CTRL_TSBSZ 0x0000000000070000UL /* TSB Size */ +#define SCHIZO_IOMMU_TSBSZ_1K 0x0000000000000000UL /* TSB Table 1024 8-byte entries */ +#define SCHIZO_IOMMU_TSBSZ_2K 0x0000000000010000UL /* TSB Table 2048 8-byte entries */ +#define SCHIZO_IOMMU_TSBSZ_4K 0x0000000000020000UL /* TSB Table 4096 8-byte entries */ +#define SCHIZO_IOMMU_TSBSZ_8K 0x0000000000030000UL /* TSB Table 8192 8-byte entries */ +#define SCHIZO_IOMMU_TSBSZ_16K 0x0000000000040000UL /* TSB Table 16k 8-byte entries */ +#define SCHIZO_IOMMU_TSBSZ_32K 0x0000000000050000UL /* TSB Table 32k 8-byte entries */ +#define SCHIZO_IOMMU_TSBSZ_64K 0x0000000000060000UL /* TSB Table 64k 8-byte entries */ +#define SCHIZO_IOMMU_TSBSZ_128K 0x0000000000070000UL /* TSB Table 128k 8-byte entries */ +#define SCHIZO_IOMMU_CTRL_RESV2 0x000000000000fff8UL /* Reserved */ +#define SCHIZO_IOMMU_CTRL_TBWSZ 0x0000000000000004UL /* Assumed page size, 0=8k 1=64k */ +#define SCHIZO_IOMMU_CTRL_DENAB 0x0000000000000002UL /* Diagnostic mode enable */ +#define SCHIZO_IOMMU_CTRL_ENAB 0x0000000000000001UL /* IOMMU Enable */ + +/* Schizo config space address format is nearly identical to + * that of PSYCHO: + * + * 32 24 23 16 15 11 10 8 7 2 1 0 + * --------------------------------------------------------- + * |0 0 0 0 0 0 0 0 0| bus | device | function | reg | 0 0 | + * --------------------------------------------------------- + */ +#define SCHIZO_CONFIG_BASE(PBM) ((PBM)->config_space) +#define SCHIZO_CONFIG_ENCODE(BUS, DEVFN, REG) \ + (((unsigned long)(BUS) << 16) | \ + ((unsigned long)(DEVFN) << 8) | \ + ((unsigned long)(REG))) + +static void *schizo_pci_config_mkaddr(struct pci_pbm_info *pbm, + unsigned char bus, + unsigned int devfn, + int where) +{ + if (!pbm) + return NULL; + bus -= pbm->pci_first_busno; + return (void *) + (SCHIZO_CONFIG_BASE(pbm) | + SCHIZO_CONFIG_ENCODE(bus, devfn, where)); +} + +/* SCHIZO error handling support. */ +enum schizo_error_type { + UE_ERR, CE_ERR, PCI_ERR, SAFARI_ERR +}; + +static DEFINE_SPINLOCK(stc_buf_lock); +static unsigned long stc_error_buf[128]; +static unsigned long stc_tag_buf[16]; +static unsigned long stc_line_buf[16]; + +#define SCHIZO_UE_INO 0x30 /* Uncorrectable ECC error */ +#define SCHIZO_CE_INO 0x31 /* Correctable ECC error */ +#define SCHIZO_PCIERR_A_INO 0x32 /* PBM A PCI bus error */ +#define SCHIZO_PCIERR_B_INO 0x33 /* PBM B PCI bus error */ +#define SCHIZO_SERR_INO 0x34 /* Safari interface error */ + +#define SCHIZO_STC_ERR 0xb800UL /* --> 0xba00 */ +#define SCHIZO_STC_TAG 0xba00UL /* --> 0xba80 */ +#define SCHIZO_STC_LINE 0xbb00UL /* --> 0xbb80 */ + +#define SCHIZO_STCERR_WRITE 0x2UL +#define SCHIZO_STCERR_READ 0x1UL + +#define SCHIZO_STCTAG_PPN 0x3fffffff00000000UL +#define SCHIZO_STCTAG_VPN 0x00000000ffffe000UL +#define SCHIZO_STCTAG_VALID 0x8000000000000000UL +#define SCHIZO_STCTAG_READ 0x4000000000000000UL + +#define SCHIZO_STCLINE_LINDX 0x0000000007800000UL +#define SCHIZO_STCLINE_SPTR 0x000000000007e000UL +#define SCHIZO_STCLINE_LADDR 0x0000000000001fc0UL +#define SCHIZO_STCLINE_EPTR 0x000000000000003fUL +#define SCHIZO_STCLINE_VALID 0x0000000000600000UL +#define SCHIZO_STCLINE_FOFN 0x0000000000180000UL + +static void __schizo_check_stc_error_pbm(struct pci_pbm_info *pbm, + enum schizo_error_type type) +{ + struct strbuf *strbuf = &pbm->stc; + unsigned long regbase = pbm->pbm_regs; + unsigned long err_base, tag_base, line_base; + u64 control; + int i; + + err_base = regbase + SCHIZO_STC_ERR; + tag_base = regbase + SCHIZO_STC_TAG; + line_base = regbase + SCHIZO_STC_LINE; + + spin_lock(&stc_buf_lock); + + /* This is __REALLY__ dangerous. When we put the + * streaming buffer into diagnostic mode to probe + * it's tags and error status, we _must_ clear all + * of the line tag valid bits before re-enabling + * the streaming buffer. If any dirty data lives + * in the STC when we do this, we will end up + * invalidating it before it has a chance to reach + * main memory. + */ + control = upa_readq(strbuf->strbuf_control); + upa_writeq((control | SCHIZO_STRBUF_CTRL_DENAB), + strbuf->strbuf_control); + for (i = 0; i < 128; i++) { + unsigned long val; + + val = upa_readq(err_base + (i * 8UL)); + upa_writeq(0UL, err_base + (i * 8UL)); + stc_error_buf[i] = val; + } + for (i = 0; i < 16; i++) { + stc_tag_buf[i] = upa_readq(tag_base + (i * 8UL)); + stc_line_buf[i] = upa_readq(line_base + (i * 8UL)); + upa_writeq(0UL, tag_base + (i * 8UL)); + upa_writeq(0UL, line_base + (i * 8UL)); + } + + /* OK, state is logged, exit diagnostic mode. */ + upa_writeq(control, strbuf->strbuf_control); + + for (i = 0; i < 16; i++) { + int j, saw_error, first, last; + + saw_error = 0; + first = i * 8; + last = first + 8; + for (j = first; j < last; j++) { + unsigned long errval = stc_error_buf[j]; + if (errval != 0) { + saw_error++; + printk("%s: STC_ERR(%d)[wr(%d)rd(%d)]\n", + pbm->name, + j, + (errval & SCHIZO_STCERR_WRITE) ? 1 : 0, + (errval & SCHIZO_STCERR_READ) ? 1 : 0); + } + } + if (saw_error != 0) { + unsigned long tagval = stc_tag_buf[i]; + unsigned long lineval = stc_line_buf[i]; + printk("%s: STC_TAG(%d)[PA(%016lx)VA(%08lx)V(%d)R(%d)]\n", + pbm->name, + i, + ((tagval & SCHIZO_STCTAG_PPN) >> 19UL), + (tagval & SCHIZO_STCTAG_VPN), + ((tagval & SCHIZO_STCTAG_VALID) ? 1 : 0), + ((tagval & SCHIZO_STCTAG_READ) ? 1 : 0)); + + /* XXX Should spit out per-bank error information... -DaveM */ + printk("%s: STC_LINE(%d)[LIDX(%lx)SP(%lx)LADDR(%lx)EP(%lx)" + "V(%d)FOFN(%d)]\n", + pbm->name, + i, + ((lineval & SCHIZO_STCLINE_LINDX) >> 23UL), + ((lineval & SCHIZO_STCLINE_SPTR) >> 13UL), + ((lineval & SCHIZO_STCLINE_LADDR) >> 6UL), + ((lineval & SCHIZO_STCLINE_EPTR) >> 0UL), + ((lineval & SCHIZO_STCLINE_VALID) ? 1 : 0), + ((lineval & SCHIZO_STCLINE_FOFN) ? 1 : 0)); + } + } + + spin_unlock(&stc_buf_lock); +} + +/* IOMMU is per-PBM in Schizo, so interrogate both for anonymous + * controller level errors. + */ + +#define SCHIZO_IOMMU_TAG 0xa580UL +#define SCHIZO_IOMMU_DATA 0xa600UL + +#define SCHIZO_IOMMU_TAG_CTXT 0x0000001ffe000000UL +#define SCHIZO_IOMMU_TAG_ERRSTS 0x0000000001800000UL +#define SCHIZO_IOMMU_TAG_ERR 0x0000000000400000UL +#define SCHIZO_IOMMU_TAG_WRITE 0x0000000000200000UL +#define SCHIZO_IOMMU_TAG_STREAM 0x0000000000100000UL +#define SCHIZO_IOMMU_TAG_SIZE 0x0000000000080000UL +#define SCHIZO_IOMMU_TAG_VPAGE 0x000000000007ffffUL + +#define SCHIZO_IOMMU_DATA_VALID 0x0000000100000000UL +#define SCHIZO_IOMMU_DATA_CACHE 0x0000000040000000UL +#define SCHIZO_IOMMU_DATA_PPAGE 0x000000003fffffffUL + +static void schizo_check_iommu_error_pbm(struct pci_pbm_info *pbm, + enum schizo_error_type type) +{ + struct iommu *iommu = pbm->iommu; + unsigned long iommu_tag[16]; + unsigned long iommu_data[16]; + unsigned long flags; + u64 control; + int i; + + spin_lock_irqsave(&iommu->lock, flags); + control = upa_readq(iommu->iommu_control); + if (control & SCHIZO_IOMMU_CTRL_XLTEERR) { + unsigned long base; + char *type_string; + + /* Clear the error encountered bit. */ + control &= ~SCHIZO_IOMMU_CTRL_XLTEERR; + upa_writeq(control, iommu->iommu_control); + + switch((control & SCHIZO_IOMMU_CTRL_XLTESTAT) >> 25UL) { + case 0: + type_string = "Protection Error"; + break; + case 1: + type_string = "Invalid Error"; + break; + case 2: + type_string = "TimeOut Error"; + break; + case 3: + default: + type_string = "ECC Error"; + break; + }; + printk("%s: IOMMU Error, type[%s]\n", + pbm->name, type_string); + + /* Put the IOMMU into diagnostic mode and probe + * it's TLB for entries with error status. + * + * It is very possible for another DVMA to occur + * while we do this probe, and corrupt the system + * further. But we are so screwed at this point + * that we are likely to crash hard anyways, so + * get as much diagnostic information to the + * console as we can. + */ + upa_writeq(control | SCHIZO_IOMMU_CTRL_DENAB, + iommu->iommu_control); + + base = pbm->pbm_regs; + + for (i = 0; i < 16; i++) { + iommu_tag[i] = + upa_readq(base + SCHIZO_IOMMU_TAG + (i * 8UL)); + iommu_data[i] = + upa_readq(base + SCHIZO_IOMMU_DATA + (i * 8UL)); + + /* Now clear out the entry. */ + upa_writeq(0, base + SCHIZO_IOMMU_TAG + (i * 8UL)); + upa_writeq(0, base + SCHIZO_IOMMU_DATA + (i * 8UL)); + } + + /* Leave diagnostic mode. */ + upa_writeq(control, iommu->iommu_control); + + for (i = 0; i < 16; i++) { + unsigned long tag, data; + + tag = iommu_tag[i]; + if (!(tag & SCHIZO_IOMMU_TAG_ERR)) + continue; + + data = iommu_data[i]; + switch((tag & SCHIZO_IOMMU_TAG_ERRSTS) >> 23UL) { + case 0: + type_string = "Protection Error"; + break; + case 1: + type_string = "Invalid Error"; + break; + case 2: + type_string = "TimeOut Error"; + break; + case 3: + default: + type_string = "ECC Error"; + break; + }; + printk("%s: IOMMU TAG(%d)[error(%s) ctx(%x) wr(%d) str(%d) " + "sz(%dK) vpg(%08lx)]\n", + pbm->name, i, type_string, + (int)((tag & SCHIZO_IOMMU_TAG_CTXT) >> 25UL), + ((tag & SCHIZO_IOMMU_TAG_WRITE) ? 1 : 0), + ((tag & SCHIZO_IOMMU_TAG_STREAM) ? 1 : 0), + ((tag & SCHIZO_IOMMU_TAG_SIZE) ? 64 : 8), + (tag & SCHIZO_IOMMU_TAG_VPAGE) << IOMMU_PAGE_SHIFT); + printk("%s: IOMMU DATA(%d)[valid(%d) cache(%d) ppg(%016lx)]\n", + pbm->name, i, + ((data & SCHIZO_IOMMU_DATA_VALID) ? 1 : 0), + ((data & SCHIZO_IOMMU_DATA_CACHE) ? 1 : 0), + (data & SCHIZO_IOMMU_DATA_PPAGE) << IOMMU_PAGE_SHIFT); + } + } + if (pbm->stc.strbuf_enabled) + __schizo_check_stc_error_pbm(pbm, type); + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static void schizo_check_iommu_error(struct pci_pbm_info *pbm, + enum schizo_error_type type) +{ + schizo_check_iommu_error_pbm(pbm, type); + if (pbm->sibling) + schizo_check_iommu_error_pbm(pbm->sibling, type); +} + +/* Uncorrectable ECC error status gathering. */ +#define SCHIZO_UE_AFSR 0x10030UL +#define SCHIZO_UE_AFAR 0x10038UL + +#define SCHIZO_UEAFSR_PPIO 0x8000000000000000UL /* Safari */ +#define SCHIZO_UEAFSR_PDRD 0x4000000000000000UL /* Safari/Tomatillo */ +#define SCHIZO_UEAFSR_PDWR 0x2000000000000000UL /* Safari */ +#define SCHIZO_UEAFSR_SPIO 0x1000000000000000UL /* Safari */ +#define SCHIZO_UEAFSR_SDMA 0x0800000000000000UL /* Safari/Tomatillo */ +#define SCHIZO_UEAFSR_ERRPNDG 0x0300000000000000UL /* Safari */ +#define SCHIZO_UEAFSR_BMSK 0x000003ff00000000UL /* Safari */ +#define SCHIZO_UEAFSR_QOFF 0x00000000c0000000UL /* Safari/Tomatillo */ +#define SCHIZO_UEAFSR_AID 0x000000001f000000UL /* Safari/Tomatillo */ +#define SCHIZO_UEAFSR_PARTIAL 0x0000000000800000UL /* Safari */ +#define SCHIZO_UEAFSR_OWNEDIN 0x0000000000400000UL /* Safari */ +#define SCHIZO_UEAFSR_MTAGSYND 0x00000000000f0000UL /* Safari */ +#define SCHIZO_UEAFSR_MTAG 0x000000000000e000UL /* Safari */ +#define SCHIZO_UEAFSR_ECCSYND 0x00000000000001ffUL /* Safari */ + +static irqreturn_t schizo_ue_intr(int irq, void *dev_id) +{ + struct pci_pbm_info *pbm = dev_id; + unsigned long afsr_reg = pbm->controller_regs + SCHIZO_UE_AFSR; + unsigned long afar_reg = pbm->controller_regs + SCHIZO_UE_AFAR; + unsigned long afsr, afar, error_bits; + int reported, limit; + + /* Latch uncorrectable error status. */ + afar = upa_readq(afar_reg); + + /* If either of the error pending bits are set in the + * AFSR, the error status is being actively updated by + * the hardware and we must re-read to get a clean value. + */ + limit = 1000; + do { + afsr = upa_readq(afsr_reg); + } while ((afsr & SCHIZO_UEAFSR_ERRPNDG) != 0 && --limit); + + /* Clear the primary/secondary error status bits. */ + error_bits = afsr & + (SCHIZO_UEAFSR_PPIO | SCHIZO_UEAFSR_PDRD | SCHIZO_UEAFSR_PDWR | + SCHIZO_UEAFSR_SPIO | SCHIZO_UEAFSR_SDMA); + if (!error_bits) + return IRQ_NONE; + upa_writeq(error_bits, afsr_reg); + + /* Log the error. */ + printk("%s: Uncorrectable Error, primary error type[%s]\n", + pbm->name, + (((error_bits & SCHIZO_UEAFSR_PPIO) ? + "PIO" : + ((error_bits & SCHIZO_UEAFSR_PDRD) ? + "DMA Read" : + ((error_bits & SCHIZO_UEAFSR_PDWR) ? + "DMA Write" : "???"))))); + printk("%s: bytemask[%04lx] qword_offset[%lx] SAFARI_AID[%02lx]\n", + pbm->name, + (afsr & SCHIZO_UEAFSR_BMSK) >> 32UL, + (afsr & SCHIZO_UEAFSR_QOFF) >> 30UL, + (afsr & SCHIZO_UEAFSR_AID) >> 24UL); + printk("%s: partial[%d] owned_in[%d] mtag[%lx] mtag_synd[%lx] ecc_sync[%lx]\n", + pbm->name, + (afsr & SCHIZO_UEAFSR_PARTIAL) ? 1 : 0, + (afsr & SCHIZO_UEAFSR_OWNEDIN) ? 1 : 0, + (afsr & SCHIZO_UEAFSR_MTAG) >> 13UL, + (afsr & SCHIZO_UEAFSR_MTAGSYND) >> 16UL, + (afsr & SCHIZO_UEAFSR_ECCSYND) >> 0UL); + printk("%s: UE AFAR [%016lx]\n", pbm->name, afar); + printk("%s: UE Secondary errors [", pbm->name); + reported = 0; + if (afsr & SCHIZO_UEAFSR_SPIO) { + reported++; + printk("(PIO)"); + } + if (afsr & SCHIZO_UEAFSR_SDMA) { + reported++; + printk("(DMA)"); + } + if (!reported) + printk("(none)"); + printk("]\n"); + + /* Interrogate IOMMU for error status. */ + schizo_check_iommu_error(pbm, UE_ERR); + + return IRQ_HANDLED; +} + +#define SCHIZO_CE_AFSR 0x10040UL +#define SCHIZO_CE_AFAR 0x10048UL + +#define SCHIZO_CEAFSR_PPIO 0x8000000000000000UL +#define SCHIZO_CEAFSR_PDRD 0x4000000000000000UL +#define SCHIZO_CEAFSR_PDWR 0x2000000000000000UL +#define SCHIZO_CEAFSR_SPIO 0x1000000000000000UL +#define SCHIZO_CEAFSR_SDMA 0x0800000000000000UL +#define SCHIZO_CEAFSR_ERRPNDG 0x0300000000000000UL +#define SCHIZO_CEAFSR_BMSK 0x000003ff00000000UL +#define SCHIZO_CEAFSR_QOFF 0x00000000c0000000UL +#define SCHIZO_CEAFSR_AID 0x000000001f000000UL +#define SCHIZO_CEAFSR_PARTIAL 0x0000000000800000UL +#define SCHIZO_CEAFSR_OWNEDIN 0x0000000000400000UL +#define SCHIZO_CEAFSR_MTAGSYND 0x00000000000f0000UL +#define SCHIZO_CEAFSR_MTAG 0x000000000000e000UL +#define SCHIZO_CEAFSR_ECCSYND 0x00000000000001ffUL + +static irqreturn_t schizo_ce_intr(int irq, void *dev_id) +{ + struct pci_pbm_info *pbm = dev_id; + unsigned long afsr_reg = pbm->controller_regs + SCHIZO_CE_AFSR; + unsigned long afar_reg = pbm->controller_regs + SCHIZO_CE_AFAR; + unsigned long afsr, afar, error_bits; + int reported, limit; + + /* Latch error status. */ + afar = upa_readq(afar_reg); + + /* If either of the error pending bits are set in the + * AFSR, the error status is being actively updated by + * the hardware and we must re-read to get a clean value. + */ + limit = 1000; + do { + afsr = upa_readq(afsr_reg); + } while ((afsr & SCHIZO_UEAFSR_ERRPNDG) != 0 && --limit); + + /* Clear primary/secondary error status bits. */ + error_bits = afsr & + (SCHIZO_CEAFSR_PPIO | SCHIZO_CEAFSR_PDRD | SCHIZO_CEAFSR_PDWR | + SCHIZO_CEAFSR_SPIO | SCHIZO_CEAFSR_SDMA); + if (!error_bits) + return IRQ_NONE; + upa_writeq(error_bits, afsr_reg); + + /* Log the error. */ + printk("%s: Correctable Error, primary error type[%s]\n", + pbm->name, + (((error_bits & SCHIZO_CEAFSR_PPIO) ? + "PIO" : + ((error_bits & SCHIZO_CEAFSR_PDRD) ? + "DMA Read" : + ((error_bits & SCHIZO_CEAFSR_PDWR) ? + "DMA Write" : "???"))))); + + /* XXX Use syndrome and afar to print out module string just like + * XXX UDB CE trap handler does... -DaveM + */ + printk("%s: bytemask[%04lx] qword_offset[%lx] SAFARI_AID[%02lx]\n", + pbm->name, + (afsr & SCHIZO_UEAFSR_BMSK) >> 32UL, + (afsr & SCHIZO_UEAFSR_QOFF) >> 30UL, + (afsr & SCHIZO_UEAFSR_AID) >> 24UL); + printk("%s: partial[%d] owned_in[%d] mtag[%lx] mtag_synd[%lx] ecc_sync[%lx]\n", + pbm->name, + (afsr & SCHIZO_UEAFSR_PARTIAL) ? 1 : 0, + (afsr & SCHIZO_UEAFSR_OWNEDIN) ? 1 : 0, + (afsr & SCHIZO_UEAFSR_MTAG) >> 13UL, + (afsr & SCHIZO_UEAFSR_MTAGSYND) >> 16UL, + (afsr & SCHIZO_UEAFSR_ECCSYND) >> 0UL); + printk("%s: CE AFAR [%016lx]\n", pbm->name, afar); + printk("%s: CE Secondary errors [", pbm->name); + reported = 0; + if (afsr & SCHIZO_CEAFSR_SPIO) { + reported++; + printk("(PIO)"); + } + if (afsr & SCHIZO_CEAFSR_SDMA) { + reported++; + printk("(DMA)"); + } + if (!reported) + printk("(none)"); + printk("]\n"); + + return IRQ_HANDLED; +} + +#define SCHIZO_PCI_AFSR 0x2010UL +#define SCHIZO_PCI_AFAR 0x2018UL + +#define SCHIZO_PCIAFSR_PMA 0x8000000000000000UL /* Schizo/Tomatillo */ +#define SCHIZO_PCIAFSR_PTA 0x4000000000000000UL /* Schizo/Tomatillo */ +#define SCHIZO_PCIAFSR_PRTRY 0x2000000000000000UL /* Schizo/Tomatillo */ +#define SCHIZO_PCIAFSR_PPERR 0x1000000000000000UL /* Schizo/Tomatillo */ +#define SCHIZO_PCIAFSR_PTTO 0x0800000000000000UL /* Schizo/Tomatillo */ +#define SCHIZO_PCIAFSR_PUNUS 0x0400000000000000UL /* Schizo */ +#define SCHIZO_PCIAFSR_SMA 0x0200000000000000UL /* Schizo/Tomatillo */ +#define SCHIZO_PCIAFSR_STA 0x0100000000000000UL /* Schizo/Tomatillo */ +#define SCHIZO_PCIAFSR_SRTRY 0x0080000000000000UL /* Schizo/Tomatillo */ +#define SCHIZO_PCIAFSR_SPERR 0x0040000000000000UL /* Schizo/Tomatillo */ +#define SCHIZO_PCIAFSR_STTO 0x0020000000000000UL /* Schizo/Tomatillo */ +#define SCHIZO_PCIAFSR_SUNUS 0x0010000000000000UL /* Schizo */ +#define SCHIZO_PCIAFSR_BMSK 0x000003ff00000000UL /* Schizo/Tomatillo */ +#define SCHIZO_PCIAFSR_BLK 0x0000000080000000UL /* Schizo/Tomatillo */ +#define SCHIZO_PCIAFSR_CFG 0x0000000040000000UL /* Schizo/Tomatillo */ +#define SCHIZO_PCIAFSR_MEM 0x0000000020000000UL /* Schizo/Tomatillo */ +#define SCHIZO_PCIAFSR_IO 0x0000000010000000UL /* Schizo/Tomatillo */ + +#define SCHIZO_PCI_CTRL (0x2000UL) +#define SCHIZO_PCICTRL_BUS_UNUS (1UL << 63UL) /* Safari */ +#define SCHIZO_PCICTRL_DTO_INT (1UL << 61UL) /* Tomatillo */ +#define SCHIZO_PCICTRL_ARB_PRIO (0x1ff << 52UL) /* Tomatillo */ +#define SCHIZO_PCICTRL_ESLCK (1UL << 51UL) /* Safari */ +#define SCHIZO_PCICTRL_ERRSLOT (7UL << 48UL) /* Safari */ +#define SCHIZO_PCICTRL_TTO_ERR (1UL << 38UL) /* Safari/Tomatillo */ +#define SCHIZO_PCICTRL_RTRY_ERR (1UL << 37UL) /* Safari/Tomatillo */ +#define SCHIZO_PCICTRL_DTO_ERR (1UL << 36UL) /* Safari/Tomatillo */ +#define SCHIZO_PCICTRL_SBH_ERR (1UL << 35UL) /* Safari */ +#define SCHIZO_PCICTRL_SERR (1UL << 34UL) /* Safari/Tomatillo */ +#define SCHIZO_PCICTRL_PCISPD (1UL << 33UL) /* Safari */ +#define SCHIZO_PCICTRL_MRM_PREF (1UL << 30UL) /* Tomatillo */ +#define SCHIZO_PCICTRL_RDO_PREF (1UL << 29UL) /* Tomatillo */ +#define SCHIZO_PCICTRL_RDL_PREF (1UL << 28UL) /* Tomatillo */ +#define SCHIZO_PCICTRL_PTO (3UL << 24UL) /* Safari/Tomatillo */ +#define SCHIZO_PCICTRL_PTO_SHIFT 24UL +#define SCHIZO_PCICTRL_TRWSW (7UL << 21UL) /* Tomatillo */ +#define SCHIZO_PCICTRL_F_TGT_A (1UL << 20UL) /* Tomatillo */ +#define SCHIZO_PCICTRL_S_DTO_INT (1UL << 19UL) /* Safari */ +#define SCHIZO_PCICTRL_F_TGT_RT (1UL << 19UL) /* Tomatillo */ +#define SCHIZO_PCICTRL_SBH_INT (1UL << 18UL) /* Safari */ +#define SCHIZO_PCICTRL_T_DTO_INT (1UL << 18UL) /* Tomatillo */ +#define SCHIZO_PCICTRL_EEN (1UL << 17UL) /* Safari/Tomatillo */ +#define SCHIZO_PCICTRL_PARK (1UL << 16UL) /* Safari/Tomatillo */ +#define SCHIZO_PCICTRL_PCIRST (1UL << 8UL) /* Safari */ +#define SCHIZO_PCICTRL_ARB_S (0x3fUL << 0UL) /* Safari */ +#define SCHIZO_PCICTRL_ARB_T (0xffUL << 0UL) /* Tomatillo */ + +static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm) +{ + unsigned long csr_reg, csr, csr_error_bits; + irqreturn_t ret = IRQ_NONE; + u16 stat; + + csr_reg = pbm->pbm_regs + SCHIZO_PCI_CTRL; + csr = upa_readq(csr_reg); + csr_error_bits = + csr & (SCHIZO_PCICTRL_BUS_UNUS | + SCHIZO_PCICTRL_TTO_ERR | + SCHIZO_PCICTRL_RTRY_ERR | + SCHIZO_PCICTRL_DTO_ERR | + SCHIZO_PCICTRL_SBH_ERR | + SCHIZO_PCICTRL_SERR); + if (csr_error_bits) { + /* Clear the errors. */ + upa_writeq(csr, csr_reg); + + /* Log 'em. */ + if (csr_error_bits & SCHIZO_PCICTRL_BUS_UNUS) + printk("%s: Bus unusable error asserted.\n", + pbm->name); + if (csr_error_bits & SCHIZO_PCICTRL_TTO_ERR) + printk("%s: PCI TRDY# timeout error asserted.\n", + pbm->name); + if (csr_error_bits & SCHIZO_PCICTRL_RTRY_ERR) + printk("%s: PCI excessive retry error asserted.\n", + pbm->name); + if (csr_error_bits & SCHIZO_PCICTRL_DTO_ERR) + printk("%s: PCI discard timeout error asserted.\n", + pbm->name); + if (csr_error_bits & SCHIZO_PCICTRL_SBH_ERR) + printk("%s: PCI streaming byte hole error asserted.\n", + pbm->name); + if (csr_error_bits & SCHIZO_PCICTRL_SERR) + printk("%s: PCI SERR signal asserted.\n", + pbm->name); + ret = IRQ_HANDLED; + } + pci_read_config_word(pbm->pci_bus->self, PCI_STATUS, &stat); + if (stat & (PCI_STATUS_PARITY | + PCI_STATUS_SIG_TARGET_ABORT | + PCI_STATUS_REC_TARGET_ABORT | + PCI_STATUS_REC_MASTER_ABORT | + PCI_STATUS_SIG_SYSTEM_ERROR)) { + printk("%s: PCI bus error, PCI_STATUS[%04x]\n", + pbm->name, stat); + pci_write_config_word(pbm->pci_bus->self, PCI_STATUS, 0xffff); + ret = IRQ_HANDLED; + } + return ret; +} + +static irqreturn_t schizo_pcierr_intr(int irq, void *dev_id) +{ + struct pci_pbm_info *pbm = dev_id; + unsigned long afsr_reg, afar_reg, base; + unsigned long afsr, afar, error_bits; + int reported; + + base = pbm->pbm_regs; + + afsr_reg = base + SCHIZO_PCI_AFSR; + afar_reg = base + SCHIZO_PCI_AFAR; + + /* Latch error status. */ + afar = upa_readq(afar_reg); + afsr = upa_readq(afsr_reg); + + /* Clear primary/secondary error status bits. */ + error_bits = afsr & + (SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_PTA | + SCHIZO_PCIAFSR_PRTRY | SCHIZO_PCIAFSR_PPERR | + SCHIZO_PCIAFSR_PTTO | SCHIZO_PCIAFSR_PUNUS | + SCHIZO_PCIAFSR_SMA | SCHIZO_PCIAFSR_STA | + SCHIZO_PCIAFSR_SRTRY | SCHIZO_PCIAFSR_SPERR | + SCHIZO_PCIAFSR_STTO | SCHIZO_PCIAFSR_SUNUS); + if (!error_bits) + return schizo_pcierr_intr_other(pbm); + upa_writeq(error_bits, afsr_reg); + + /* Log the error. */ + printk("%s: PCI Error, primary error type[%s]\n", + pbm->name, + (((error_bits & SCHIZO_PCIAFSR_PMA) ? + "Master Abort" : + ((error_bits & SCHIZO_PCIAFSR_PTA) ? + "Target Abort" : + ((error_bits & SCHIZO_PCIAFSR_PRTRY) ? + "Excessive Retries" : + ((error_bits & SCHIZO_PCIAFSR_PPERR) ? + "Parity Error" : + ((error_bits & SCHIZO_PCIAFSR_PTTO) ? + "Timeout" : + ((error_bits & SCHIZO_PCIAFSR_PUNUS) ? + "Bus Unusable" : "???")))))))); + printk("%s: bytemask[%04lx] was_block(%d) space(%s)\n", + pbm->name, + (afsr & SCHIZO_PCIAFSR_BMSK) >> 32UL, + (afsr & SCHIZO_PCIAFSR_BLK) ? 1 : 0, + ((afsr & SCHIZO_PCIAFSR_CFG) ? + "Config" : + ((afsr & SCHIZO_PCIAFSR_MEM) ? + "Memory" : + ((afsr & SCHIZO_PCIAFSR_IO) ? + "I/O" : "???")))); + printk("%s: PCI AFAR [%016lx]\n", + pbm->name, afar); + printk("%s: PCI Secondary errors [", + pbm->name); + reported = 0; + if (afsr & SCHIZO_PCIAFSR_SMA) { + reported++; + printk("(Master Abort)"); + } + if (afsr & SCHIZO_PCIAFSR_STA) { + reported++; + printk("(Target Abort)"); + } + if (afsr & SCHIZO_PCIAFSR_SRTRY) { + reported++; + printk("(Excessive Retries)"); + } + if (afsr & SCHIZO_PCIAFSR_SPERR) { + reported++; + printk("(Parity Error)"); + } + if (afsr & SCHIZO_PCIAFSR_STTO) { + reported++; + printk("(Timeout)"); + } + if (afsr & SCHIZO_PCIAFSR_SUNUS) { + reported++; + printk("(Bus Unusable)"); + } + if (!reported) + printk("(none)"); + printk("]\n"); + + /* For the error types shown, scan PBM's PCI bus for devices + * which have logged that error type. + */ + + /* If we see a Target Abort, this could be the result of an + * IOMMU translation error of some sort. It is extremely + * useful to log this information as usually it indicates + * a bug in the IOMMU support code or a PCI device driver. + */ + if (error_bits & (SCHIZO_PCIAFSR_PTA | SCHIZO_PCIAFSR_STA)) { + schizo_check_iommu_error(pbm, PCI_ERR); + pci_scan_for_target_abort(pbm, pbm->pci_bus); + } + if (error_bits & (SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_SMA)) + pci_scan_for_master_abort(pbm, pbm->pci_bus); + + /* For excessive retries, PSYCHO/PBM will abort the device + * and there is no way to specifically check for excessive + * retries in the config space status registers. So what + * we hope is that we'll catch it via the master/target + * abort events. + */ + + if (error_bits & (SCHIZO_PCIAFSR_PPERR | SCHIZO_PCIAFSR_SPERR)) + pci_scan_for_parity_error(pbm, pbm->pci_bus); + + return IRQ_HANDLED; +} + +#define SCHIZO_SAFARI_ERRLOG 0x10018UL + +#define SAFARI_ERRLOG_ERROUT 0x8000000000000000UL + +#define BUS_ERROR_BADCMD 0x4000000000000000UL /* Schizo/Tomatillo */ +#define BUS_ERROR_SSMDIS 0x2000000000000000UL /* Safari */ +#define BUS_ERROR_BADMA 0x1000000000000000UL /* Safari */ +#define BUS_ERROR_BADMB 0x0800000000000000UL /* Safari */ +#define BUS_ERROR_BADMC 0x0400000000000000UL /* Safari */ +#define BUS_ERROR_SNOOP_GR 0x0000000000200000UL /* Tomatillo */ +#define BUS_ERROR_SNOOP_PCI 0x0000000000100000UL /* Tomatillo */ +#define BUS_ERROR_SNOOP_RD 0x0000000000080000UL /* Tomatillo */ +#define BUS_ERROR_SNOOP_RDS 0x0000000000020000UL /* Tomatillo */ +#define BUS_ERROR_SNOOP_RDSA 0x0000000000010000UL /* Tomatillo */ +#define BUS_ERROR_SNOOP_OWN 0x0000000000008000UL /* Tomatillo */ +#define BUS_ERROR_SNOOP_RDO 0x0000000000004000UL /* Tomatillo */ +#define BUS_ERROR_CPU1PS 0x0000000000002000UL /* Safari */ +#define BUS_ERROR_WDATA_PERR 0x0000000000002000UL /* Tomatillo */ +#define BUS_ERROR_CPU1PB 0x0000000000001000UL /* Safari */ +#define BUS_ERROR_CTRL_PERR 0x0000000000001000UL /* Tomatillo */ +#define BUS_ERROR_CPU0PS 0x0000000000000800UL /* Safari */ +#define BUS_ERROR_SNOOP_ERR 0x0000000000000800UL /* Tomatillo */ +#define BUS_ERROR_CPU0PB 0x0000000000000400UL /* Safari */ +#define BUS_ERROR_JBUS_ILL_B 0x0000000000000400UL /* Tomatillo */ +#define BUS_ERROR_CIQTO 0x0000000000000200UL /* Safari */ +#define BUS_ERROR_LPQTO 0x0000000000000100UL /* Safari */ +#define BUS_ERROR_JBUS_ILL_C 0x0000000000000100UL /* Tomatillo */ +#define BUS_ERROR_SFPQTO 0x0000000000000080UL /* Safari */ +#define BUS_ERROR_UFPQTO 0x0000000000000040UL /* Safari */ +#define BUS_ERROR_RD_PERR 0x0000000000000040UL /* Tomatillo */ +#define BUS_ERROR_APERR 0x0000000000000020UL /* Safari/Tomatillo */ +#define BUS_ERROR_UNMAP 0x0000000000000010UL /* Safari/Tomatillo */ +#define BUS_ERROR_BUSERR 0x0000000000000004UL /* Safari/Tomatillo */ +#define BUS_ERROR_TIMEOUT 0x0000000000000002UL /* Safari/Tomatillo */ +#define BUS_ERROR_ILL 0x0000000000000001UL /* Safari */ + +/* We only expect UNMAP errors here. The rest of the Safari errors + * are marked fatal and thus cause a system reset. + */ +static irqreturn_t schizo_safarierr_intr(int irq, void *dev_id) +{ + struct pci_pbm_info *pbm = dev_id; + u64 errlog; + + errlog = upa_readq(pbm->controller_regs + SCHIZO_SAFARI_ERRLOG); + upa_writeq(errlog & ~(SAFARI_ERRLOG_ERROUT), + pbm->controller_regs + SCHIZO_SAFARI_ERRLOG); + + if (!(errlog & BUS_ERROR_UNMAP)) { + printk("%s: Unexpected Safari/JBUS error interrupt, errlog[%016lx]\n", + pbm->name, errlog); + + return IRQ_HANDLED; + } + + printk("%s: Safari/JBUS interrupt, UNMAPPED error, interrogating IOMMUs.\n", + pbm->name); + schizo_check_iommu_error(pbm, SAFARI_ERR); + + return IRQ_HANDLED; +} + +/* Nearly identical to PSYCHO equivalents... */ +#define SCHIZO_ECC_CTRL 0x10020UL +#define SCHIZO_ECCCTRL_EE 0x8000000000000000UL /* Enable ECC Checking */ +#define SCHIZO_ECCCTRL_UE 0x4000000000000000UL /* Enable UE Interrupts */ +#define SCHIZO_ECCCTRL_CE 0x2000000000000000UL /* Enable CE INterrupts */ + +#define SCHIZO_SAFARI_ERRCTRL 0x10008UL +#define SCHIZO_SAFERRCTRL_EN 0x8000000000000000UL +#define SCHIZO_SAFARI_IRQCTRL 0x10010UL +#define SCHIZO_SAFIRQCTRL_EN 0x8000000000000000UL + +static int pbm_routes_this_ino(struct pci_pbm_info *pbm, u32 ino) +{ + ino &= IMAP_INO; + + if (pbm->ino_bitmap & (1UL << ino)) + return 1; + + return 0; +} + +/* How the Tomatillo IRQs are routed around is pure guesswork here. + * + * All the Tomatillo devices I see in prtconf dumps seem to have only + * a single PCI bus unit attached to it. It would seem they are separate + * devices because their PortID (ie. JBUS ID) values are all different + * and thus the registers are mapped to totally different locations. + * + * However, two Tomatillo's look "similar" in that the only difference + * in their PortID is the lowest bit. + * + * So if we were to ignore this lower bit, it certainly looks like two + * PCI bus units of the same Tomatillo. I still have not really + * figured this out... + */ +static void tomatillo_register_error_handlers(struct pci_pbm_info *pbm) +{ + struct of_device *op = of_find_device_by_node(pbm->op->node); + u64 tmp, err_mask, err_no_mask; + int err; + + /* Tomatillo IRQ property layout is: + * 0: PCIERR + * 1: UE ERR + * 2: CE ERR + * 3: SERR + * 4: POWER FAIL? + */ + + if (pbm_routes_this_ino(pbm, SCHIZO_UE_INO)) { + err = request_irq(op->irqs[1], schizo_ue_intr, 0, + "TOMATILLO_UE", pbm); + if (err) + printk(KERN_WARNING "%s: Could not register UE, " + "err=%d\n", pbm->name, err); + } + if (pbm_routes_this_ino(pbm, SCHIZO_CE_INO)) { + err = request_irq(op->irqs[2], schizo_ce_intr, 0, + "TOMATILLO_CE", pbm); + if (err) + printk(KERN_WARNING "%s: Could not register CE, " + "err=%d\n", pbm->name, err); + } + err = 0; + if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_A_INO)) { + err = request_irq(op->irqs[0], schizo_pcierr_intr, 0, + "TOMATILLO_PCIERR", pbm); + } else if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_B_INO)) { + err = request_irq(op->irqs[0], schizo_pcierr_intr, 0, + "TOMATILLO_PCIERR", pbm); + } + if (err) + printk(KERN_WARNING "%s: Could not register PCIERR, " + "err=%d\n", pbm->name, err); + + if (pbm_routes_this_ino(pbm, SCHIZO_SERR_INO)) { + err = request_irq(op->irqs[3], schizo_safarierr_intr, 0, + "TOMATILLO_SERR", pbm); + if (err) + printk(KERN_WARNING "%s: Could not register SERR, " + "err=%d\n", pbm->name, err); + } + + /* Enable UE and CE interrupts for controller. */ + upa_writeq((SCHIZO_ECCCTRL_EE | + SCHIZO_ECCCTRL_UE | + SCHIZO_ECCCTRL_CE), pbm->controller_regs + SCHIZO_ECC_CTRL); + + /* Enable PCI Error interrupts and clear error + * bits. + */ + err_mask = (SCHIZO_PCICTRL_BUS_UNUS | + SCHIZO_PCICTRL_TTO_ERR | + SCHIZO_PCICTRL_RTRY_ERR | + SCHIZO_PCICTRL_SERR | + SCHIZO_PCICTRL_EEN); + + err_no_mask = SCHIZO_PCICTRL_DTO_ERR; + + tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_CTRL); + tmp |= err_mask; + tmp &= ~err_no_mask; + upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_CTRL); + + err_mask = (SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_PTA | + SCHIZO_PCIAFSR_PRTRY | SCHIZO_PCIAFSR_PPERR | + SCHIZO_PCIAFSR_PTTO | + SCHIZO_PCIAFSR_SMA | SCHIZO_PCIAFSR_STA | + SCHIZO_PCIAFSR_SRTRY | SCHIZO_PCIAFSR_SPERR | + SCHIZO_PCIAFSR_STTO); + + upa_writeq(err_mask, pbm->pbm_regs + SCHIZO_PCI_AFSR); + + err_mask = (BUS_ERROR_BADCMD | BUS_ERROR_SNOOP_GR | + BUS_ERROR_SNOOP_PCI | BUS_ERROR_SNOOP_RD | + BUS_ERROR_SNOOP_RDS | BUS_ERROR_SNOOP_RDSA | + BUS_ERROR_SNOOP_OWN | BUS_ERROR_SNOOP_RDO | + BUS_ERROR_WDATA_PERR | BUS_ERROR_CTRL_PERR | + BUS_ERROR_SNOOP_ERR | BUS_ERROR_JBUS_ILL_B | + BUS_ERROR_JBUS_ILL_C | BUS_ERROR_RD_PERR | + BUS_ERROR_APERR | BUS_ERROR_UNMAP | + BUS_ERROR_BUSERR | BUS_ERROR_TIMEOUT); + + upa_writeq((SCHIZO_SAFERRCTRL_EN | err_mask), + pbm->controller_regs + SCHIZO_SAFARI_ERRCTRL); + + upa_writeq((SCHIZO_SAFIRQCTRL_EN | (BUS_ERROR_UNMAP)), + pbm->controller_regs + SCHIZO_SAFARI_IRQCTRL); +} + +static void schizo_register_error_handlers(struct pci_pbm_info *pbm) +{ + struct of_device *op = of_find_device_by_node(pbm->op->node); + u64 tmp, err_mask, err_no_mask; + int err; + + /* Schizo IRQ property layout is: + * 0: PCIERR + * 1: UE ERR + * 2: CE ERR + * 3: SERR + * 4: POWER FAIL? + */ + + if (pbm_routes_this_ino(pbm, SCHIZO_UE_INO)) { + err = request_irq(op->irqs[1], schizo_ue_intr, 0, + "SCHIZO_UE", pbm); + if (err) + printk(KERN_WARNING "%s: Could not register UE, " + "err=%d\n", pbm->name, err); + } + if (pbm_routes_this_ino(pbm, SCHIZO_CE_INO)) { + err = request_irq(op->irqs[2], schizo_ce_intr, 0, + "SCHIZO_CE", pbm); + if (err) + printk(KERN_WARNING "%s: Could not register CE, " + "err=%d\n", pbm->name, err); + } + err = 0; + if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_A_INO)) { + err = request_irq(op->irqs[0], schizo_pcierr_intr, 0, + "SCHIZO_PCIERR", pbm); + } else if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_B_INO)) { + err = request_irq(op->irqs[0], schizo_pcierr_intr, 0, + "SCHIZO_PCIERR", pbm); + } + if (err) + printk(KERN_WARNING "%s: Could not register PCIERR, " + "err=%d\n", pbm->name, err); + + if (pbm_routes_this_ino(pbm, SCHIZO_SERR_INO)) { + err = request_irq(op->irqs[3], schizo_safarierr_intr, 0, + "SCHIZO_SERR", pbm); + if (err) + printk(KERN_WARNING "%s: Could not register SERR, " + "err=%d\n", pbm->name, err); + } + + /* Enable UE and CE interrupts for controller. */ + upa_writeq((SCHIZO_ECCCTRL_EE | + SCHIZO_ECCCTRL_UE | + SCHIZO_ECCCTRL_CE), pbm->controller_regs + SCHIZO_ECC_CTRL); + + err_mask = (SCHIZO_PCICTRL_BUS_UNUS | + SCHIZO_PCICTRL_ESLCK | + SCHIZO_PCICTRL_TTO_ERR | + SCHIZO_PCICTRL_RTRY_ERR | + SCHIZO_PCICTRL_SBH_ERR | + SCHIZO_PCICTRL_SERR | + SCHIZO_PCICTRL_EEN); + + err_no_mask = (SCHIZO_PCICTRL_DTO_ERR | + SCHIZO_PCICTRL_SBH_INT); + + /* Enable PCI Error interrupts and clear error + * bits for each PBM. + */ + tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_CTRL); + tmp |= err_mask; + tmp &= ~err_no_mask; + upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_CTRL); + + upa_writeq((SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_PTA | + SCHIZO_PCIAFSR_PRTRY | SCHIZO_PCIAFSR_PPERR | + SCHIZO_PCIAFSR_PTTO | SCHIZO_PCIAFSR_PUNUS | + SCHIZO_PCIAFSR_SMA | SCHIZO_PCIAFSR_STA | + SCHIZO_PCIAFSR_SRTRY | SCHIZO_PCIAFSR_SPERR | + SCHIZO_PCIAFSR_STTO | SCHIZO_PCIAFSR_SUNUS), + pbm->pbm_regs + SCHIZO_PCI_AFSR); + + /* Make all Safari error conditions fatal except unmapped + * errors which we make generate interrupts. + */ + err_mask = (BUS_ERROR_BADCMD | BUS_ERROR_SSMDIS | + BUS_ERROR_BADMA | BUS_ERROR_BADMB | + BUS_ERROR_BADMC | + BUS_ERROR_CPU1PS | BUS_ERROR_CPU1PB | + BUS_ERROR_CPU0PS | BUS_ERROR_CPU0PB | + BUS_ERROR_CIQTO | + BUS_ERROR_LPQTO | BUS_ERROR_SFPQTO | + BUS_ERROR_UFPQTO | BUS_ERROR_APERR | + BUS_ERROR_BUSERR | BUS_ERROR_TIMEOUT | + BUS_ERROR_ILL); +#if 1 + /* XXX Something wrong with some Excalibur systems + * XXX Sun is shipping. The behavior on a 2-cpu + * XXX machine is that both CPU1 parity error bits + * XXX are set and are immediately set again when + * XXX their error status bits are cleared. Just + * XXX ignore them for now. -DaveM + */ + err_mask &= ~(BUS_ERROR_CPU1PS | BUS_ERROR_CPU1PB | + BUS_ERROR_CPU0PS | BUS_ERROR_CPU0PB); +#endif + + upa_writeq((SCHIZO_SAFERRCTRL_EN | err_mask), + pbm->controller_regs + SCHIZO_SAFARI_ERRCTRL); +} + +static void pbm_config_busmastering(struct pci_pbm_info *pbm) +{ + u8 *addr; + + /* Set cache-line size to 64 bytes, this is actually + * a nop but I do it for completeness. + */ + addr = schizo_pci_config_mkaddr(pbm, pbm->pci_first_busno, + 0, PCI_CACHE_LINE_SIZE); + pci_config_write8(addr, 64 / sizeof(u32)); + + /* Set PBM latency timer to 64 PCI clocks. */ + addr = schizo_pci_config_mkaddr(pbm, pbm->pci_first_busno, + 0, PCI_LATENCY_TIMER); + pci_config_write8(addr, 64); +} + +static void __devinit schizo_scan_bus(struct pci_pbm_info *pbm, + struct device *parent) +{ + pbm_config_busmastering(pbm); + pbm->is_66mhz_capable = + (of_find_property(pbm->op->node, "66mhz-capable", NULL) + != NULL); + + pbm->pci_bus = pci_scan_one_pbm(pbm, parent); + + if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO) + tomatillo_register_error_handlers(pbm); + else + schizo_register_error_handlers(pbm); +} + +#define SCHIZO_STRBUF_CONTROL (0x02800UL) +#define SCHIZO_STRBUF_FLUSH (0x02808UL) +#define SCHIZO_STRBUF_FSYNC (0x02810UL) +#define SCHIZO_STRBUF_CTXFLUSH (0x02818UL) +#define SCHIZO_STRBUF_CTXMATCH (0x10000UL) + +static void schizo_pbm_strbuf_init(struct pci_pbm_info *pbm) +{ + unsigned long base = pbm->pbm_regs; + u64 control; + + if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO) { + /* TOMATILLO lacks streaming cache. */ + return; + } + + /* SCHIZO has context flushing. */ + pbm->stc.strbuf_control = base + SCHIZO_STRBUF_CONTROL; + pbm->stc.strbuf_pflush = base + SCHIZO_STRBUF_FLUSH; + pbm->stc.strbuf_fsync = base + SCHIZO_STRBUF_FSYNC; + pbm->stc.strbuf_ctxflush = base + SCHIZO_STRBUF_CTXFLUSH; + pbm->stc.strbuf_ctxmatch_base = base + SCHIZO_STRBUF_CTXMATCH; + + pbm->stc.strbuf_flushflag = (volatile unsigned long *) + ((((unsigned long)&pbm->stc.__flushflag_buf[0]) + + 63UL) + & ~63UL); + pbm->stc.strbuf_flushflag_pa = (unsigned long) + __pa(pbm->stc.strbuf_flushflag); + + /* Turn off LRU locking and diag mode, enable the + * streaming buffer and leave the rerun-disable + * setting however OBP set it. + */ + control = upa_readq(pbm->stc.strbuf_control); + control &= ~(SCHIZO_STRBUF_CTRL_LPTR | + SCHIZO_STRBUF_CTRL_LENAB | + SCHIZO_STRBUF_CTRL_DENAB); + control |= SCHIZO_STRBUF_CTRL_ENAB; + upa_writeq(control, pbm->stc.strbuf_control); + + pbm->stc.strbuf_enabled = 1; +} + +#define SCHIZO_IOMMU_CONTROL (0x00200UL) +#define SCHIZO_IOMMU_TSBBASE (0x00208UL) +#define SCHIZO_IOMMU_FLUSH (0x00210UL) +#define SCHIZO_IOMMU_CTXFLUSH (0x00218UL) + +static int schizo_pbm_iommu_init(struct pci_pbm_info *pbm) +{ + static const u32 vdma_default[] = { 0xc0000000, 0x40000000 }; + unsigned long i, tagbase, database; + struct iommu *iommu = pbm->iommu; + int tsbsize, err; + const u32 *vdma; + u32 dma_mask; + u64 control; + + vdma = of_get_property(pbm->op->node, "virtual-dma", NULL); + if (!vdma) + vdma = vdma_default; + + dma_mask = vdma[0]; + switch (vdma[1]) { + case 0x20000000: + dma_mask |= 0x1fffffff; + tsbsize = 64; + break; + + case 0x40000000: + dma_mask |= 0x3fffffff; + tsbsize = 128; + break; + + case 0x80000000: + dma_mask |= 0x7fffffff; + tsbsize = 128; + break; + + default: + printk(KERN_ERR PFX "Strange virtual-dma size.\n"); + return -EINVAL; + } + + /* Register addresses, SCHIZO has iommu ctx flushing. */ + iommu->iommu_control = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL; + iommu->iommu_tsbbase = pbm->pbm_regs + SCHIZO_IOMMU_TSBBASE; + iommu->iommu_flush = pbm->pbm_regs + SCHIZO_IOMMU_FLUSH; + iommu->iommu_tags = iommu->iommu_flush + (0xa580UL - 0x0210UL); + iommu->iommu_ctxflush = pbm->pbm_regs + SCHIZO_IOMMU_CTXFLUSH; + + /* We use the main control/status register of SCHIZO as the write + * completion register. + */ + iommu->write_complete_reg = pbm->controller_regs + 0x10000UL; + + /* + * Invalidate TLB Entries. + */ + control = upa_readq(iommu->iommu_control); + control |= SCHIZO_IOMMU_CTRL_DENAB; + upa_writeq(control, iommu->iommu_control); + + tagbase = SCHIZO_IOMMU_TAG, database = SCHIZO_IOMMU_DATA; + + for (i = 0; i < 16; i++) { + upa_writeq(0, pbm->pbm_regs + tagbase + (i * 8UL)); + upa_writeq(0, pbm->pbm_regs + database + (i * 8UL)); + } + + /* Leave diag mode enabled for full-flushing done + * in pci_iommu.c + */ + err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask, + pbm->numa_node); + if (err) { + printk(KERN_ERR PFX "iommu_table_init() fails with %d\n", err); + return err; + } + + upa_writeq(__pa(iommu->page_table), iommu->iommu_tsbbase); + + control = upa_readq(iommu->iommu_control); + control &= ~(SCHIZO_IOMMU_CTRL_TSBSZ | SCHIZO_IOMMU_CTRL_TBWSZ); + switch (tsbsize) { + case 64: + control |= SCHIZO_IOMMU_TSBSZ_64K; + break; + case 128: + control |= SCHIZO_IOMMU_TSBSZ_128K; + break; + } + + control |= SCHIZO_IOMMU_CTRL_ENAB; + upa_writeq(control, iommu->iommu_control); + + return 0; +} + +#define SCHIZO_PCI_IRQ_RETRY (0x1a00UL) +#define SCHIZO_IRQ_RETRY_INF 0xffUL + +#define SCHIZO_PCI_DIAG (0x2020UL) +#define SCHIZO_PCIDIAG_D_BADECC (1UL << 10UL) /* Disable BAD ECC errors (Schizo) */ +#define SCHIZO_PCIDIAG_D_BYPASS (1UL << 9UL) /* Disable MMU bypass mode (Schizo/Tomatillo) */ +#define SCHIZO_PCIDIAG_D_TTO (1UL << 8UL) /* Disable TTO errors (Schizo/Tomatillo) */ +#define SCHIZO_PCIDIAG_D_RTRYARB (1UL << 7UL) /* Disable retry arbitration (Schizo) */ +#define SCHIZO_PCIDIAG_D_RETRY (1UL << 6UL) /* Disable retry limit (Schizo/Tomatillo) */ +#define SCHIZO_PCIDIAG_D_INTSYNC (1UL << 5UL) /* Disable interrupt/DMA synch (Schizo/Tomatillo) */ +#define SCHIZO_PCIDIAG_I_DMA_PARITY (1UL << 3UL) /* Invert DMA parity (Schizo/Tomatillo) */ +#define SCHIZO_PCIDIAG_I_PIOD_PARITY (1UL << 2UL) /* Invert PIO data parity (Schizo/Tomatillo) */ +#define SCHIZO_PCIDIAG_I_PIOA_PARITY (1UL << 1UL) /* Invert PIO address parity (Schizo/Tomatillo) */ + +#define TOMATILLO_PCI_IOC_CSR (0x2248UL) +#define TOMATILLO_IOC_PART_WPENAB 0x0000000000080000UL +#define TOMATILLO_IOC_RDMULT_PENAB 0x0000000000040000UL +#define TOMATILLO_IOC_RDONE_PENAB 0x0000000000020000UL +#define TOMATILLO_IOC_RDLINE_PENAB 0x0000000000010000UL +#define TOMATILLO_IOC_RDMULT_PLEN 0x000000000000c000UL +#define TOMATILLO_IOC_RDMULT_PLEN_SHIFT 14UL +#define TOMATILLO_IOC_RDONE_PLEN 0x0000000000003000UL +#define TOMATILLO_IOC_RDONE_PLEN_SHIFT 12UL +#define TOMATILLO_IOC_RDLINE_PLEN 0x0000000000000c00UL +#define TOMATILLO_IOC_RDLINE_PLEN_SHIFT 10UL +#define TOMATILLO_IOC_PREF_OFF 0x00000000000003f8UL +#define TOMATILLO_IOC_PREF_OFF_SHIFT 3UL +#define TOMATILLO_IOC_RDMULT_CPENAB 0x0000000000000004UL +#define TOMATILLO_IOC_RDONE_CPENAB 0x0000000000000002UL +#define TOMATILLO_IOC_RDLINE_CPENAB 0x0000000000000001UL + +#define TOMATILLO_PCI_IOC_TDIAG (0x2250UL) +#define TOMATILLO_PCI_IOC_DDIAG (0x2290UL) + +static void schizo_pbm_hw_init(struct pci_pbm_info *pbm) +{ + u64 tmp; + + upa_writeq(5, pbm->pbm_regs + SCHIZO_PCI_IRQ_RETRY); + + tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_CTRL); + + /* Enable arbiter for all PCI slots. */ + tmp |= 0xff; + + if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO && + pbm->chip_version >= 0x2) + tmp |= 0x3UL << SCHIZO_PCICTRL_PTO_SHIFT; + + if (!of_find_property(pbm->op->node, "no-bus-parking", NULL)) + tmp |= SCHIZO_PCICTRL_PARK; + else + tmp &= ~SCHIZO_PCICTRL_PARK; + + if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO && + pbm->chip_version <= 0x1) + tmp |= SCHIZO_PCICTRL_DTO_INT; + else + tmp &= ~SCHIZO_PCICTRL_DTO_INT; + + if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO) + tmp |= (SCHIZO_PCICTRL_MRM_PREF | + SCHIZO_PCICTRL_RDO_PREF | + SCHIZO_PCICTRL_RDL_PREF); + + upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_CTRL); + + tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_DIAG); + tmp &= ~(SCHIZO_PCIDIAG_D_RTRYARB | + SCHIZO_PCIDIAG_D_RETRY | + SCHIZO_PCIDIAG_D_INTSYNC); + upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_DIAG); + + if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO) { + /* Clear prefetch lengths to workaround a bug in + * Jalapeno... + */ + tmp = (TOMATILLO_IOC_PART_WPENAB | + (1 << TOMATILLO_IOC_PREF_OFF_SHIFT) | + TOMATILLO_IOC_RDMULT_CPENAB | + TOMATILLO_IOC_RDONE_CPENAB | + TOMATILLO_IOC_RDLINE_CPENAB); + + upa_writeq(tmp, pbm->pbm_regs + TOMATILLO_PCI_IOC_CSR); + } +} + +static int __devinit schizo_pbm_init(struct pci_pbm_info *pbm, + struct of_device *op, u32 portid, + int chip_type) +{ + const struct linux_prom64_registers *regs; + struct device_node *dp = op->node; + const char *chipset_name; + int is_pbm_a, err; + + switch (chip_type) { + case PBM_CHIP_TYPE_TOMATILLO: + chipset_name = "TOMATILLO"; + break; + + case PBM_CHIP_TYPE_SCHIZO_PLUS: + chipset_name = "SCHIZO+"; + break; + + case PBM_CHIP_TYPE_SCHIZO: + default: + chipset_name = "SCHIZO"; + break; + }; + + /* For SCHIZO, three OBP regs: + * 1) PBM controller regs + * 2) Schizo front-end controller regs (same for both PBMs) + * 3) PBM PCI config space + * + * For TOMATILLO, four OBP regs: + * 1) PBM controller regs + * 2) Tomatillo front-end controller regs + * 3) PBM PCI config space + * 4) Ichip regs + */ + regs = of_get_property(dp, "reg", NULL); + + is_pbm_a = ((regs[0].phys_addr & 0x00700000) == 0x00600000); + + pbm->next = pci_pbm_root; + pci_pbm_root = pbm; + + pbm->numa_node = -1; + + pbm->pci_ops = &sun4u_pci_ops; + pbm->config_space_reg_bits = 8; + + pbm->index = pci_num_pbms++; + + pbm->portid = portid; + pbm->op = op; + + pbm->chip_type = chip_type; + pbm->chip_version = of_getintprop_default(dp, "version#", 0); + pbm->chip_revision = of_getintprop_default(dp, "module-version#", 0); + + pbm->pbm_regs = regs[0].phys_addr; + pbm->controller_regs = regs[1].phys_addr - 0x10000UL; + + if (chip_type == PBM_CHIP_TYPE_TOMATILLO) + pbm->sync_reg = regs[3].phys_addr + 0x1a18UL; + + pbm->name = dp->full_name; + + printk("%s: %s PCI Bus Module ver[%x:%x]\n", + pbm->name, chipset_name, + pbm->chip_version, pbm->chip_revision); + + schizo_pbm_hw_init(pbm); + + pci_determine_mem_io_space(pbm); + + pci_get_pbm_props(pbm); + + err = schizo_pbm_iommu_init(pbm); + if (err) + return err; + + schizo_pbm_strbuf_init(pbm); + + schizo_scan_bus(pbm, &op->dev); + + return 0; +} + +static inline int portid_compare(u32 x, u32 y, int chip_type) +{ + if (chip_type == PBM_CHIP_TYPE_TOMATILLO) { + if (x == (y ^ 1)) + return 1; + return 0; + } + return (x == y); +} + +static struct pci_pbm_info * __devinit schizo_find_sibling(u32 portid, + int chip_type) +{ + struct pci_pbm_info *pbm; + + for (pbm = pci_pbm_root; pbm; pbm = pbm->next) { + if (portid_compare(pbm->portid, portid, chip_type)) + return pbm; + } + return NULL; +} + +static int __devinit __schizo_init(struct of_device *op, unsigned long chip_type) +{ + struct device_node *dp = op->node; + struct pci_pbm_info *pbm; + struct iommu *iommu; + u32 portid; + int err; + + portid = of_getintprop_default(dp, "portid", 0xff); + + err = -ENOMEM; + pbm = kzalloc(sizeof(*pbm), GFP_KERNEL); + if (!pbm) { + printk(KERN_ERR PFX "Cannot allocate pci_pbm_info.\n"); + goto out_err; + } + + pbm->sibling = schizo_find_sibling(portid, chip_type); + + iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL); + if (!iommu) { + printk(KERN_ERR PFX "Cannot allocate PBM A iommu.\n"); + goto out_free_pbm; + } + + pbm->iommu = iommu; + + if (schizo_pbm_init(pbm, op, portid, chip_type)) + goto out_free_iommu; + + if (pbm->sibling) + pbm->sibling->sibling = pbm; + + dev_set_drvdata(&op->dev, pbm); + + return 0; + +out_free_iommu: + kfree(pbm->iommu); + +out_free_pbm: + kfree(pbm); + +out_err: + return err; +} + +static int __devinit schizo_probe(struct of_device *op, + const struct of_device_id *match) +{ + return __schizo_init(op, (unsigned long) match->data); +} + +/* The ordering of this table is very important. Some Tomatillo + * nodes announce that they are compatible with both pci108e,a801 + * and pci108e,8001. So list the chips in reverse chronological + * order. + */ +static struct of_device_id __initdata schizo_match[] = { + { + .name = "pci", + .compatible = "pci108e,a801", + .data = (void *) PBM_CHIP_TYPE_TOMATILLO, + }, + { + .name = "pci", + .compatible = "pci108e,8002", + .data = (void *) PBM_CHIP_TYPE_SCHIZO_PLUS, + }, + { + .name = "pci", + .compatible = "pci108e,8001", + .data = (void *) PBM_CHIP_TYPE_SCHIZO, + }, + {}, +}; + +static struct of_platform_driver schizo_driver = { + .name = DRIVER_NAME, + .match_table = schizo_match, + .probe = schizo_probe, +}; + +static int __init schizo_init(void) +{ + return of_register_driver(&schizo_driver, &of_bus_type); +} + +subsys_initcall(schizo_init); diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c new file mode 100644 index 000000000000..34a1fded3941 --- /dev/null +++ b/arch/sparc/kernel/pci_sun4v.c @@ -0,0 +1,1033 @@ +/* pci_sun4v.c: SUN4V specific PCI controller support. + * + * Copyright (C) 2006, 2007, 2008 David S. Miller (davem@davemloft.net) + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/percpu.h> +#include <linux/irq.h> +#include <linux/msi.h> +#include <linux/log2.h> +#include <linux/of_device.h> + +#include <asm/iommu.h> +#include <asm/irq.h> +#include <asm/hypervisor.h> +#include <asm/prom.h> + +#include "pci_impl.h" +#include "iommu_common.h" + +#include "pci_sun4v.h" + +#define DRIVER_NAME "pci_sun4v" +#define PFX DRIVER_NAME ": " + +static unsigned long vpci_major = 1; +static unsigned long vpci_minor = 1; + +#define PGLIST_NENTS (PAGE_SIZE / sizeof(u64)) + +struct iommu_batch { + struct device *dev; /* Device mapping is for. */ + unsigned long prot; /* IOMMU page protections */ + unsigned long entry; /* Index into IOTSB. */ + u64 *pglist; /* List of physical pages */ + unsigned long npages; /* Number of pages in list. */ +}; + +static DEFINE_PER_CPU(struct iommu_batch, iommu_batch); +static int iommu_batch_initialized; + +/* Interrupts must be disabled. */ +static inline void iommu_batch_start(struct device *dev, unsigned long prot, unsigned long entry) +{ + struct iommu_batch *p = &__get_cpu_var(iommu_batch); + + p->dev = dev; + p->prot = prot; + p->entry = entry; + p->npages = 0; +} + +/* Interrupts must be disabled. */ +static long iommu_batch_flush(struct iommu_batch *p) +{ + struct pci_pbm_info *pbm = p->dev->archdata.host_controller; + unsigned long devhandle = pbm->devhandle; + unsigned long prot = p->prot; + unsigned long entry = p->entry; + u64 *pglist = p->pglist; + unsigned long npages = p->npages; + + while (npages != 0) { + long num; + + num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry), + npages, prot, __pa(pglist)); + if (unlikely(num < 0)) { + if (printk_ratelimit()) + printk("iommu_batch_flush: IOMMU map of " + "[%08lx:%08lx:%lx:%lx:%lx] failed with " + "status %ld\n", + devhandle, HV_PCI_TSBID(0, entry), + npages, prot, __pa(pglist), num); + return -1; + } + + entry += num; + npages -= num; + pglist += num; + } + + p->entry = entry; + p->npages = 0; + + return 0; +} + +static inline void iommu_batch_new_entry(unsigned long entry) +{ + struct iommu_batch *p = &__get_cpu_var(iommu_batch); + + if (p->entry + p->npages == entry) + return; + if (p->entry != ~0UL) + iommu_batch_flush(p); + p->entry = entry; +} + +/* Interrupts must be disabled. */ +static inline long iommu_batch_add(u64 phys_page) +{ + struct iommu_batch *p = &__get_cpu_var(iommu_batch); + + BUG_ON(p->npages >= PGLIST_NENTS); + + p->pglist[p->npages++] = phys_page; + if (p->npages == PGLIST_NENTS) + return iommu_batch_flush(p); + + return 0; +} + +/* Interrupts must be disabled. */ +static inline long iommu_batch_end(void) +{ + struct iommu_batch *p = &__get_cpu_var(iommu_batch); + + BUG_ON(p->npages >= PGLIST_NENTS); + + return iommu_batch_flush(p); +} + +static void *dma_4v_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addrp, gfp_t gfp) +{ + unsigned long flags, order, first_page, npages, n; + struct iommu *iommu; + struct page *page; + void *ret; + long entry; + int nid; + + size = IO_PAGE_ALIGN(size); + order = get_order(size); + if (unlikely(order >= MAX_ORDER)) + return NULL; + + npages = size >> IO_PAGE_SHIFT; + + nid = dev->archdata.numa_node; + page = alloc_pages_node(nid, gfp, order); + if (unlikely(!page)) + return NULL; + + first_page = (unsigned long) page_address(page); + memset((char *)first_page, 0, PAGE_SIZE << order); + + iommu = dev->archdata.iommu; + + spin_lock_irqsave(&iommu->lock, flags); + entry = iommu_range_alloc(dev, iommu, npages, NULL); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (unlikely(entry == DMA_ERROR_CODE)) + goto range_alloc_fail; + + *dma_addrp = (iommu->page_table_map_base + + (entry << IO_PAGE_SHIFT)); + ret = (void *) first_page; + first_page = __pa(first_page); + + local_irq_save(flags); + + iommu_batch_start(dev, + (HV_PCI_MAP_ATTR_READ | + HV_PCI_MAP_ATTR_WRITE), + entry); + + for (n = 0; n < npages; n++) { + long err = iommu_batch_add(first_page + (n * PAGE_SIZE)); + if (unlikely(err < 0L)) + goto iommu_map_fail; + } + + if (unlikely(iommu_batch_end() < 0L)) + goto iommu_map_fail; + + local_irq_restore(flags); + + return ret; + +iommu_map_fail: + /* Interrupts are disabled. */ + spin_lock(&iommu->lock); + iommu_range_free(iommu, *dma_addrp, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + +range_alloc_fail: + free_pages(first_page, order); + return NULL; +} + +static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, + dma_addr_t dvma) +{ + struct pci_pbm_info *pbm; + struct iommu *iommu; + unsigned long flags, order, npages, entry; + u32 devhandle; + + npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; + iommu = dev->archdata.iommu; + pbm = dev->archdata.host_controller; + devhandle = pbm->devhandle; + entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + + spin_lock_irqsave(&iommu->lock, flags); + + iommu_range_free(iommu, dvma, npages); + + do { + unsigned long num; + + num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), + npages); + entry += num; + npages -= num; + } while (npages != 0); + + spin_unlock_irqrestore(&iommu->lock, flags); + + order = get_order(size); + if (order < 10) + free_pages((unsigned long)cpu, order); +} + +static dma_addr_t dma_4v_map_single(struct device *dev, void *ptr, size_t sz, + enum dma_data_direction direction) +{ + struct iommu *iommu; + unsigned long flags, npages, oaddr; + unsigned long i, base_paddr; + u32 bus_addr, ret; + unsigned long prot; + long entry; + + iommu = dev->archdata.iommu; + + if (unlikely(direction == DMA_NONE)) + goto bad; + + oaddr = (unsigned long)ptr; + npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); + npages >>= IO_PAGE_SHIFT; + + spin_lock_irqsave(&iommu->lock, flags); + entry = iommu_range_alloc(dev, iommu, npages, NULL); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (unlikely(entry == DMA_ERROR_CODE)) + goto bad; + + bus_addr = (iommu->page_table_map_base + + (entry << IO_PAGE_SHIFT)); + ret = bus_addr | (oaddr & ~IO_PAGE_MASK); + base_paddr = __pa(oaddr & IO_PAGE_MASK); + prot = HV_PCI_MAP_ATTR_READ; + if (direction != DMA_TO_DEVICE) + prot |= HV_PCI_MAP_ATTR_WRITE; + + local_irq_save(flags); + + iommu_batch_start(dev, prot, entry); + + for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) { + long err = iommu_batch_add(base_paddr); + if (unlikely(err < 0L)) + goto iommu_map_fail; + } + if (unlikely(iommu_batch_end() < 0L)) + goto iommu_map_fail; + + local_irq_restore(flags); + + return ret; + +bad: + if (printk_ratelimit()) + WARN_ON(1); + return DMA_ERROR_CODE; + +iommu_map_fail: + /* Interrupts are disabled. */ + spin_lock(&iommu->lock); + iommu_range_free(iommu, bus_addr, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + + return DMA_ERROR_CODE; +} + +static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr, + size_t sz, enum dma_data_direction direction) +{ + struct pci_pbm_info *pbm; + struct iommu *iommu; + unsigned long flags, npages; + long entry; + u32 devhandle; + + if (unlikely(direction == DMA_NONE)) { + if (printk_ratelimit()) + WARN_ON(1); + return; + } + + iommu = dev->archdata.iommu; + pbm = dev->archdata.host_controller; + devhandle = pbm->devhandle; + + npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); + npages >>= IO_PAGE_SHIFT; + bus_addr &= IO_PAGE_MASK; + + spin_lock_irqsave(&iommu->lock, flags); + + iommu_range_free(iommu, bus_addr, npages); + + entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT; + do { + unsigned long num; + + num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), + npages); + entry += num; + npages -= num; + } while (npages != 0); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + struct scatterlist *s, *outs, *segstart; + unsigned long flags, handle, prot; + dma_addr_t dma_next = 0, dma_addr; + unsigned int max_seg_size; + unsigned long seg_boundary_size; + int outcount, incount, i; + struct iommu *iommu; + unsigned long base_shift; + long err; + + BUG_ON(direction == DMA_NONE); + + iommu = dev->archdata.iommu; + if (nelems == 0 || !iommu) + return 0; + + prot = HV_PCI_MAP_ATTR_READ; + if (direction != DMA_TO_DEVICE) + prot |= HV_PCI_MAP_ATTR_WRITE; + + outs = s = segstart = &sglist[0]; + outcount = 1; + incount = nelems; + handle = 0; + + /* Init first segment length for backout at failure */ + outs->dma_length = 0; + + spin_lock_irqsave(&iommu->lock, flags); + + iommu_batch_start(dev, prot, ~0UL); + + max_seg_size = dma_get_max_seg_size(dev); + seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + IO_PAGE_SIZE) >> IO_PAGE_SHIFT; + base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT; + for_each_sg(sglist, s, nelems, i) { + unsigned long paddr, npages, entry, out_entry = 0, slen; + + slen = s->length; + /* Sanity check */ + if (slen == 0) { + dma_next = 0; + continue; + } + /* Allocate iommu entries for that segment */ + paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); + npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE); + entry = iommu_range_alloc(dev, iommu, npages, &handle); + + /* Handle failure */ + if (unlikely(entry == DMA_ERROR_CODE)) { + if (printk_ratelimit()) + printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx" + " npages %lx\n", iommu, paddr, npages); + goto iommu_map_failed; + } + + iommu_batch_new_entry(entry); + + /* Convert entry to a dma_addr_t */ + dma_addr = iommu->page_table_map_base + + (entry << IO_PAGE_SHIFT); + dma_addr |= (s->offset & ~IO_PAGE_MASK); + + /* Insert into HW table */ + paddr &= IO_PAGE_MASK; + while (npages--) { + err = iommu_batch_add(paddr); + if (unlikely(err < 0L)) + goto iommu_map_failed; + paddr += IO_PAGE_SIZE; + } + + /* If we are in an open segment, try merging */ + if (segstart != s) { + /* We cannot merge if: + * - allocated dma_addr isn't contiguous to previous allocation + */ + if ((dma_addr != dma_next) || + (outs->dma_length + s->length > max_seg_size) || + (is_span_boundary(out_entry, base_shift, + seg_boundary_size, outs, s))) { + /* Can't merge: create a new segment */ + segstart = s; + outcount++; + outs = sg_next(outs); + } else { + outs->dma_length += s->length; + } + } + + if (segstart == s) { + /* This is a new segment, fill entries */ + outs->dma_address = dma_addr; + outs->dma_length = slen; + out_entry = entry; + } + + /* Calculate next page pointer for contiguous check */ + dma_next = dma_addr + slen; + } + + err = iommu_batch_end(); + + if (unlikely(err < 0L)) + goto iommu_map_failed; + + spin_unlock_irqrestore(&iommu->lock, flags); + + if (outcount < incount) { + outs = sg_next(outs); + outs->dma_address = DMA_ERROR_CODE; + outs->dma_length = 0; + } + + return outcount; + +iommu_map_failed: + for_each_sg(sglist, s, nelems, i) { + if (s->dma_length != 0) { + unsigned long vaddr, npages; + + vaddr = s->dma_address & IO_PAGE_MASK; + npages = iommu_num_pages(s->dma_address, s->dma_length, + IO_PAGE_SIZE); + iommu_range_free(iommu, vaddr, npages); + /* XXX demap? XXX */ + s->dma_address = DMA_ERROR_CODE; + s->dma_length = 0; + } + if (s == outs) + break; + } + spin_unlock_irqrestore(&iommu->lock, flags); + + return 0; +} + +static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction) +{ + struct pci_pbm_info *pbm; + struct scatterlist *sg; + struct iommu *iommu; + unsigned long flags; + u32 devhandle; + + BUG_ON(direction == DMA_NONE); + + iommu = dev->archdata.iommu; + pbm = dev->archdata.host_controller; + devhandle = pbm->devhandle; + + spin_lock_irqsave(&iommu->lock, flags); + + sg = sglist; + while (nelems--) { + dma_addr_t dma_handle = sg->dma_address; + unsigned int len = sg->dma_length; + unsigned long npages, entry; + + if (!len) + break; + npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE); + iommu_range_free(iommu, dma_handle, npages); + + entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + while (npages) { + unsigned long num; + + num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), + npages); + entry += num; + npages -= num; + } + + sg = sg_next(sg); + } + + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static void dma_4v_sync_single_for_cpu(struct device *dev, + dma_addr_t bus_addr, size_t sz, + enum dma_data_direction direction) +{ + /* Nothing to do... */ +} + +static void dma_4v_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sglist, int nelems, + enum dma_data_direction direction) +{ + /* Nothing to do... */ +} + +static const struct dma_ops sun4v_dma_ops = { + .alloc_coherent = dma_4v_alloc_coherent, + .free_coherent = dma_4v_free_coherent, + .map_single = dma_4v_map_single, + .unmap_single = dma_4v_unmap_single, + .map_sg = dma_4v_map_sg, + .unmap_sg = dma_4v_unmap_sg, + .sync_single_for_cpu = dma_4v_sync_single_for_cpu, + .sync_sg_for_cpu = dma_4v_sync_sg_for_cpu, +}; + +static void __init pci_sun4v_scan_bus(struct pci_pbm_info *pbm, + struct device *parent) +{ + struct property *prop; + struct device_node *dp; + + dp = pbm->op->node; + prop = of_find_property(dp, "66mhz-capable", NULL); + pbm->is_66mhz_capable = (prop != NULL); + pbm->pci_bus = pci_scan_one_pbm(pbm, parent); + + /* XXX register error interrupt handlers XXX */ +} + +static unsigned long __init probe_existing_entries(struct pci_pbm_info *pbm, + struct iommu *iommu) +{ + struct iommu_arena *arena = &iommu->arena; + unsigned long i, cnt = 0; + u32 devhandle; + + devhandle = pbm->devhandle; + for (i = 0; i < arena->limit; i++) { + unsigned long ret, io_attrs, ra; + + ret = pci_sun4v_iommu_getmap(devhandle, + HV_PCI_TSBID(0, i), + &io_attrs, &ra); + if (ret == HV_EOK) { + if (page_in_phys_avail(ra)) { + pci_sun4v_iommu_demap(devhandle, + HV_PCI_TSBID(0, i), 1); + } else { + cnt++; + __set_bit(i, arena->map); + } + } + } + + return cnt; +} + +static int __init pci_sun4v_iommu_init(struct pci_pbm_info *pbm) +{ + static const u32 vdma_default[] = { 0x80000000, 0x80000000 }; + struct iommu *iommu = pbm->iommu; + unsigned long num_tsb_entries, sz, tsbsize; + u32 dma_mask, dma_offset; + const u32 *vdma; + + vdma = of_get_property(pbm->op->node, "virtual-dma", NULL); + if (!vdma) + vdma = vdma_default; + + if ((vdma[0] | vdma[1]) & ~IO_PAGE_MASK) { + printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n", + vdma[0], vdma[1]); + return -EINVAL; + }; + + dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL); + num_tsb_entries = vdma[1] / IO_PAGE_SIZE; + tsbsize = num_tsb_entries * sizeof(iopte_t); + + dma_offset = vdma[0]; + + /* Setup initial software IOMMU state. */ + spin_lock_init(&iommu->lock); + iommu->ctx_lowest_free = 1; + iommu->page_table_map_base = dma_offset; + iommu->dma_addr_mask = dma_mask; + + /* Allocate and initialize the free area map. */ + sz = (num_tsb_entries + 7) / 8; + sz = (sz + 7UL) & ~7UL; + iommu->arena.map = kzalloc(sz, GFP_KERNEL); + if (!iommu->arena.map) { + printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n"); + return -ENOMEM; + } + iommu->arena.limit = num_tsb_entries; + + sz = probe_existing_entries(pbm, iommu); + if (sz) + printk("%s: Imported %lu TSB entries from OBP\n", + pbm->name, sz); + + return 0; +} + +#ifdef CONFIG_PCI_MSI +struct pci_sun4v_msiq_entry { + u64 version_type; +#define MSIQ_VERSION_MASK 0xffffffff00000000UL +#define MSIQ_VERSION_SHIFT 32 +#define MSIQ_TYPE_MASK 0x00000000000000ffUL +#define MSIQ_TYPE_SHIFT 0 +#define MSIQ_TYPE_NONE 0x00 +#define MSIQ_TYPE_MSG 0x01 +#define MSIQ_TYPE_MSI32 0x02 +#define MSIQ_TYPE_MSI64 0x03 +#define MSIQ_TYPE_INTX 0x08 +#define MSIQ_TYPE_NONE2 0xff + + u64 intx_sysino; + u64 reserved1; + u64 stick; + u64 req_id; /* bus/device/func */ +#define MSIQ_REQID_BUS_MASK 0xff00UL +#define MSIQ_REQID_BUS_SHIFT 8 +#define MSIQ_REQID_DEVICE_MASK 0x00f8UL +#define MSIQ_REQID_DEVICE_SHIFT 3 +#define MSIQ_REQID_FUNC_MASK 0x0007UL +#define MSIQ_REQID_FUNC_SHIFT 0 + + u64 msi_address; + + /* The format of this value is message type dependent. + * For MSI bits 15:0 are the data from the MSI packet. + * For MSI-X bits 31:0 are the data from the MSI packet. + * For MSG, the message code and message routing code where: + * bits 39:32 is the bus/device/fn of the msg target-id + * bits 18:16 is the message routing code + * bits 7:0 is the message code + * For INTx the low order 2-bits are: + * 00 - INTA + * 01 - INTB + * 10 - INTC + * 11 - INTD + */ + u64 msi_data; + + u64 reserved2; +}; + +static int pci_sun4v_get_head(struct pci_pbm_info *pbm, unsigned long msiqid, + unsigned long *head) +{ + unsigned long err, limit; + + err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, head); + if (unlikely(err)) + return -ENXIO; + + limit = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry); + if (unlikely(*head >= limit)) + return -EFBIG; + + return 0; +} + +static int pci_sun4v_dequeue_msi(struct pci_pbm_info *pbm, + unsigned long msiqid, unsigned long *head, + unsigned long *msi) +{ + struct pci_sun4v_msiq_entry *ep; + unsigned long err, type; + + /* Note: void pointer arithmetic, 'head' is a byte offset */ + ep = (pbm->msi_queues + ((msiqid - pbm->msiq_first) * + (pbm->msiq_ent_count * + sizeof(struct pci_sun4v_msiq_entry))) + + *head); + + if ((ep->version_type & MSIQ_TYPE_MASK) == 0) + return 0; + + type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT; + if (unlikely(type != MSIQ_TYPE_MSI32 && + type != MSIQ_TYPE_MSI64)) + return -EINVAL; + + *msi = ep->msi_data; + + err = pci_sun4v_msi_setstate(pbm->devhandle, + ep->msi_data /* msi_num */, + HV_MSISTATE_IDLE); + if (unlikely(err)) + return -ENXIO; + + /* Clear the entry. */ + ep->version_type &= ~MSIQ_TYPE_MASK; + + (*head) += sizeof(struct pci_sun4v_msiq_entry); + if (*head >= + (pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry))) + *head = 0; + + return 1; +} + +static int pci_sun4v_set_head(struct pci_pbm_info *pbm, unsigned long msiqid, + unsigned long head) +{ + unsigned long err; + + err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head); + if (unlikely(err)) + return -EINVAL; + + return 0; +} + +static int pci_sun4v_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid, + unsigned long msi, int is_msi64) +{ + if (pci_sun4v_msi_setmsiq(pbm->devhandle, msi, msiqid, + (is_msi64 ? + HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32))) + return -ENXIO; + if (pci_sun4v_msi_setstate(pbm->devhandle, msi, HV_MSISTATE_IDLE)) + return -ENXIO; + if (pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_VALID)) + return -ENXIO; + return 0; +} + +static int pci_sun4v_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi) +{ + unsigned long err, msiqid; + + err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi, &msiqid); + if (err) + return -ENXIO; + + pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_INVALID); + + return 0; +} + +static int pci_sun4v_msiq_alloc(struct pci_pbm_info *pbm) +{ + unsigned long q_size, alloc_size, pages, order; + int i; + + q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry); + alloc_size = (pbm->msiq_num * q_size); + order = get_order(alloc_size); + pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order); + if (pages == 0UL) { + printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n", + order); + return -ENOMEM; + } + memset((char *)pages, 0, PAGE_SIZE << order); + pbm->msi_queues = (void *) pages; + + for (i = 0; i < pbm->msiq_num; i++) { + unsigned long err, base = __pa(pages + (i * q_size)); + unsigned long ret1, ret2; + + err = pci_sun4v_msiq_conf(pbm->devhandle, + pbm->msiq_first + i, + base, pbm->msiq_ent_count); + if (err) { + printk(KERN_ERR "MSI: msiq register fails (err=%lu)\n", + err); + goto h_error; + } + + err = pci_sun4v_msiq_info(pbm->devhandle, + pbm->msiq_first + i, + &ret1, &ret2); + if (err) { + printk(KERN_ERR "MSI: Cannot read msiq (err=%lu)\n", + err); + goto h_error; + } + if (ret1 != base || ret2 != pbm->msiq_ent_count) { + printk(KERN_ERR "MSI: Bogus qconf " + "expected[%lx:%x] got[%lx:%lx]\n", + base, pbm->msiq_ent_count, + ret1, ret2); + goto h_error; + } + } + + return 0; + +h_error: + free_pages(pages, order); + return -EINVAL; +} + +static void pci_sun4v_msiq_free(struct pci_pbm_info *pbm) +{ + unsigned long q_size, alloc_size, pages, order; + int i; + + for (i = 0; i < pbm->msiq_num; i++) { + unsigned long msiqid = pbm->msiq_first + i; + + (void) pci_sun4v_msiq_conf(pbm->devhandle, msiqid, 0UL, 0); + } + + q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry); + alloc_size = (pbm->msiq_num * q_size); + order = get_order(alloc_size); + + pages = (unsigned long) pbm->msi_queues; + + free_pages(pages, order); + + pbm->msi_queues = NULL; +} + +static int pci_sun4v_msiq_build_irq(struct pci_pbm_info *pbm, + unsigned long msiqid, + unsigned long devino) +{ + unsigned int virt_irq = sun4v_build_irq(pbm->devhandle, devino); + + if (!virt_irq) + return -ENOMEM; + + if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE)) + return -EINVAL; + if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID)) + return -EINVAL; + + return virt_irq; +} + +static const struct sparc64_msiq_ops pci_sun4v_msiq_ops = { + .get_head = pci_sun4v_get_head, + .dequeue_msi = pci_sun4v_dequeue_msi, + .set_head = pci_sun4v_set_head, + .msi_setup = pci_sun4v_msi_setup, + .msi_teardown = pci_sun4v_msi_teardown, + .msiq_alloc = pci_sun4v_msiq_alloc, + .msiq_free = pci_sun4v_msiq_free, + .msiq_build_irq = pci_sun4v_msiq_build_irq, +}; + +static void pci_sun4v_msi_init(struct pci_pbm_info *pbm) +{ + sparc64_pbm_msi_init(pbm, &pci_sun4v_msiq_ops); +} +#else /* CONFIG_PCI_MSI */ +static void pci_sun4v_msi_init(struct pci_pbm_info *pbm) +{ +} +#endif /* !(CONFIG_PCI_MSI) */ + +static int __init pci_sun4v_pbm_init(struct pci_pbm_info *pbm, + struct of_device *op, u32 devhandle) +{ + struct device_node *dp = op->node; + int err; + + pbm->numa_node = of_node_to_nid(dp); + + pbm->pci_ops = &sun4v_pci_ops; + pbm->config_space_reg_bits = 12; + + pbm->index = pci_num_pbms++; + + pbm->op = op; + + pbm->devhandle = devhandle; + + pbm->name = dp->full_name; + + printk("%s: SUN4V PCI Bus Module\n", pbm->name); + printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node); + + pci_determine_mem_io_space(pbm); + + pci_get_pbm_props(pbm); + + err = pci_sun4v_iommu_init(pbm); + if (err) + return err; + + pci_sun4v_msi_init(pbm); + + pci_sun4v_scan_bus(pbm, &op->dev); + + pbm->next = pci_pbm_root; + pci_pbm_root = pbm; + + return 0; +} + +static int __devinit pci_sun4v_probe(struct of_device *op, + const struct of_device_id *match) +{ + const struct linux_prom64_registers *regs; + static int hvapi_negotiated = 0; + struct pci_pbm_info *pbm; + struct device_node *dp; + struct iommu *iommu; + u32 devhandle; + int i, err; + + dp = op->node; + + if (!hvapi_negotiated++) { + err = sun4v_hvapi_register(HV_GRP_PCI, + vpci_major, + &vpci_minor); + + if (err) { + printk(KERN_ERR PFX "Could not register hvapi, " + "err=%d\n", err); + return err; + } + printk(KERN_INFO PFX "Registered hvapi major[%lu] minor[%lu]\n", + vpci_major, vpci_minor); + + dma_ops = &sun4v_dma_ops; + } + + regs = of_get_property(dp, "reg", NULL); + err = -ENODEV; + if (!regs) { + printk(KERN_ERR PFX "Could not find config registers\n"); + goto out_err; + } + devhandle = (regs->phys_addr >> 32UL) & 0x0fffffff; + + err = -ENOMEM; + if (!iommu_batch_initialized) { + for_each_possible_cpu(i) { + unsigned long page = get_zeroed_page(GFP_KERNEL); + + if (!page) + goto out_err; + + per_cpu(iommu_batch, i).pglist = (u64 *) page; + } + iommu_batch_initialized = 1; + } + + pbm = kzalloc(sizeof(*pbm), GFP_KERNEL); + if (!pbm) { + printk(KERN_ERR PFX "Could not allocate pci_pbm_info\n"); + goto out_err; + } + + iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL); + if (!iommu) { + printk(KERN_ERR PFX "Could not allocate pbm iommu\n"); + goto out_free_controller; + } + + pbm->iommu = iommu; + + err = pci_sun4v_pbm_init(pbm, op, devhandle); + if (err) + goto out_free_iommu; + + dev_set_drvdata(&op->dev, pbm); + + return 0; + +out_free_iommu: + kfree(pbm->iommu); + +out_free_controller: + kfree(pbm); + +out_err: + return err; +} + +static struct of_device_id __initdata pci_sun4v_match[] = { + { + .name = "pci", + .compatible = "SUNW,sun4v-pci", + }, + {}, +}; + +static struct of_platform_driver pci_sun4v_driver = { + .name = DRIVER_NAME, + .match_table = pci_sun4v_match, + .probe = pci_sun4v_probe, +}; + +static int __init pci_sun4v_init(void) +{ + return of_register_driver(&pci_sun4v_driver, &of_bus_type); +} + +subsys_initcall(pci_sun4v_init); diff --git a/arch/sparc/kernel/pci_sun4v.h b/arch/sparc/kernel/pci_sun4v.h new file mode 100644 index 000000000000..8e9fc3a5b4f5 --- /dev/null +++ b/arch/sparc/kernel/pci_sun4v.h @@ -0,0 +1,92 @@ +/* pci_sun4v.h: SUN4V specific PCI controller support. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#ifndef _PCI_SUN4V_H +#define _PCI_SUN4V_H + +extern long pci_sun4v_iommu_map(unsigned long devhandle, + unsigned long tsbid, + unsigned long num_ttes, + unsigned long io_attributes, + unsigned long io_page_list_pa); +extern unsigned long pci_sun4v_iommu_demap(unsigned long devhandle, + unsigned long tsbid, + unsigned long num_ttes); +extern unsigned long pci_sun4v_iommu_getmap(unsigned long devhandle, + unsigned long tsbid, + unsigned long *io_attributes, + unsigned long *real_address); +extern unsigned long pci_sun4v_config_get(unsigned long devhandle, + unsigned long pci_device, + unsigned long config_offset, + unsigned long size); +extern int pci_sun4v_config_put(unsigned long devhandle, + unsigned long pci_device, + unsigned long config_offset, + unsigned long size, + unsigned long data); + +extern unsigned long pci_sun4v_msiq_conf(unsigned long devhandle, + unsigned long msiqid, + unsigned long msiq_paddr, + unsigned long num_entries); +extern unsigned long pci_sun4v_msiq_info(unsigned long devhandle, + unsigned long msiqid, + unsigned long *msiq_paddr, + unsigned long *num_entries); +extern unsigned long pci_sun4v_msiq_getvalid(unsigned long devhandle, + unsigned long msiqid, + unsigned long *valid); +extern unsigned long pci_sun4v_msiq_setvalid(unsigned long devhandle, + unsigned long msiqid, + unsigned long valid); +extern unsigned long pci_sun4v_msiq_getstate(unsigned long devhandle, + unsigned long msiqid, + unsigned long *state); +extern unsigned long pci_sun4v_msiq_setstate(unsigned long devhandle, + unsigned long msiqid, + unsigned long state); +extern unsigned long pci_sun4v_msiq_gethead(unsigned long devhandle, + unsigned long msiqid, + unsigned long *head); +extern unsigned long pci_sun4v_msiq_sethead(unsigned long devhandle, + unsigned long msiqid, + unsigned long head); +extern unsigned long pci_sun4v_msiq_gettail(unsigned long devhandle, + unsigned long msiqid, + unsigned long *head); +extern unsigned long pci_sun4v_msi_getvalid(unsigned long devhandle, + unsigned long msinum, + unsigned long *valid); +extern unsigned long pci_sun4v_msi_setvalid(unsigned long devhandle, + unsigned long msinum, + unsigned long valid); +extern unsigned long pci_sun4v_msi_getmsiq(unsigned long devhandle, + unsigned long msinum, + unsigned long *msiq); +extern unsigned long pci_sun4v_msi_setmsiq(unsigned long devhandle, + unsigned long msinum, + unsigned long msiq, + unsigned long msitype); +extern unsigned long pci_sun4v_msi_getstate(unsigned long devhandle, + unsigned long msinum, + unsigned long *state); +extern unsigned long pci_sun4v_msi_setstate(unsigned long devhandle, + unsigned long msinum, + unsigned long state); +extern unsigned long pci_sun4v_msg_getmsiq(unsigned long devhandle, + unsigned long msinum, + unsigned long *msiq); +extern unsigned long pci_sun4v_msg_setmsiq(unsigned long devhandle, + unsigned long msinum, + unsigned long msiq); +extern unsigned long pci_sun4v_msg_getvalid(unsigned long devhandle, + unsigned long msinum, + unsigned long *valid); +extern unsigned long pci_sun4v_msg_setvalid(unsigned long devhandle, + unsigned long msinum, + unsigned long valid); + +#endif /* !(_PCI_SUN4V_H) */ diff --git a/arch/sparc/kernel/pci_sun4v_asm.S b/arch/sparc/kernel/pci_sun4v_asm.S new file mode 100644 index 000000000000..e606d46c6815 --- /dev/null +++ b/arch/sparc/kernel/pci_sun4v_asm.S @@ -0,0 +1,362 @@ +/* pci_sun4v_asm: Hypervisor calls for PCI support. + * + * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net> + */ + +#include <linux/linkage.h> +#include <asm/hypervisor.h> + + /* %o0: devhandle + * %o1: tsbid + * %o2: num ttes + * %o3: io_attributes + * %o4: io_page_list phys address + * + * returns %o0: -status if status was non-zero, else + * %o0: num pages mapped + */ +ENTRY(pci_sun4v_iommu_map) + mov %o5, %g1 + mov HV_FAST_PCI_IOMMU_MAP, %o5 + ta HV_FAST_TRAP + brnz,pn %o0, 1f + sub %g0, %o0, %o0 + mov %o1, %o0 +1: retl + nop +ENDPROC(pci_sun4v_iommu_map) + + /* %o0: devhandle + * %o1: tsbid + * %o2: num ttes + * + * returns %o0: num ttes demapped + */ +ENTRY(pci_sun4v_iommu_demap) + mov HV_FAST_PCI_IOMMU_DEMAP, %o5 + ta HV_FAST_TRAP + retl + mov %o1, %o0 +ENDPROC(pci_sun4v_iommu_demap) + + /* %o0: devhandle + * %o1: tsbid + * %o2: &io_attributes + * %o3: &real_address + * + * returns %o0: status + */ +ENTRY(pci_sun4v_iommu_getmap) + mov %o2, %o4 + mov HV_FAST_PCI_IOMMU_GETMAP, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + stx %o2, [%o3] + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_iommu_getmap) + + /* %o0: devhandle + * %o1: pci_device + * %o2: pci_config_offset + * %o3: size + * + * returns %o0: data + * + * If there is an error, the data will be returned + * as all 1's. + */ +ENTRY(pci_sun4v_config_get) + mov HV_FAST_PCI_CONFIG_GET, %o5 + ta HV_FAST_TRAP + brnz,a,pn %o1, 1f + mov -1, %o2 +1: retl + mov %o2, %o0 +ENDPROC(pci_sun4v_config_get) + + /* %o0: devhandle + * %o1: pci_device + * %o2: pci_config_offset + * %o3: size + * %o4: data + * + * returns %o0: status + * + * status will be zero if the operation completed + * successfully, else -1 if not + */ +ENTRY(pci_sun4v_config_put) + mov HV_FAST_PCI_CONFIG_PUT, %o5 + ta HV_FAST_TRAP + brnz,a,pn %o1, 1f + mov -1, %o1 +1: retl + mov %o1, %o0 +ENDPROC(pci_sun4v_config_put) + + /* %o0: devhandle + * %o1: msiqid + * %o2: msiq phys address + * %o3: num entries + * + * returns %o0: status + * + * status will be zero if the operation completed + * successfully, else -1 if not + */ +ENTRY(pci_sun4v_msiq_conf) + mov HV_FAST_PCI_MSIQ_CONF, %o5 + ta HV_FAST_TRAP + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msiq_conf) + + /* %o0: devhandle + * %o1: msiqid + * %o2: &msiq_phys_addr + * %o3: &msiq_num_entries + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msiq_info) + mov %o2, %o4 + mov HV_FAST_PCI_MSIQ_INFO, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + stx %o2, [%o3] + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msiq_info) + + /* %o0: devhandle + * %o1: msiqid + * %o2: &valid + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msiq_getvalid) + mov HV_FAST_PCI_MSIQ_GETVALID, %o5 + ta HV_FAST_TRAP + stx %o1, [%o2] + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msiq_getvalid) + + /* %o0: devhandle + * %o1: msiqid + * %o2: valid + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msiq_setvalid) + mov HV_FAST_PCI_MSIQ_SETVALID, %o5 + ta HV_FAST_TRAP + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msiq_setvalid) + + /* %o0: devhandle + * %o1: msiqid + * %o2: &state + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msiq_getstate) + mov HV_FAST_PCI_MSIQ_GETSTATE, %o5 + ta HV_FAST_TRAP + stx %o1, [%o2] + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msiq_getstate) + + /* %o0: devhandle + * %o1: msiqid + * %o2: state + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msiq_setstate) + mov HV_FAST_PCI_MSIQ_SETSTATE, %o5 + ta HV_FAST_TRAP + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msiq_setstate) + + /* %o0: devhandle + * %o1: msiqid + * %o2: &head + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msiq_gethead) + mov HV_FAST_PCI_MSIQ_GETHEAD, %o5 + ta HV_FAST_TRAP + stx %o1, [%o2] + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msiq_gethead) + + /* %o0: devhandle + * %o1: msiqid + * %o2: head + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msiq_sethead) + mov HV_FAST_PCI_MSIQ_SETHEAD, %o5 + ta HV_FAST_TRAP + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msiq_sethead) + + /* %o0: devhandle + * %o1: msiqid + * %o2: &tail + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msiq_gettail) + mov HV_FAST_PCI_MSIQ_GETTAIL, %o5 + ta HV_FAST_TRAP + stx %o1, [%o2] + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msiq_gettail) + + /* %o0: devhandle + * %o1: msinum + * %o2: &valid + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msi_getvalid) + mov HV_FAST_PCI_MSI_GETVALID, %o5 + ta HV_FAST_TRAP + stx %o1, [%o2] + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msi_getvalid) + + /* %o0: devhandle + * %o1: msinum + * %o2: valid + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msi_setvalid) + mov HV_FAST_PCI_MSI_SETVALID, %o5 + ta HV_FAST_TRAP + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msi_setvalid) + + /* %o0: devhandle + * %o1: msinum + * %o2: &msiq + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msi_getmsiq) + mov HV_FAST_PCI_MSI_GETMSIQ, %o5 + ta HV_FAST_TRAP + stx %o1, [%o2] + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msi_getmsiq) + + /* %o0: devhandle + * %o1: msinum + * %o2: msitype + * %o3: msiq + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msi_setmsiq) + mov HV_FAST_PCI_MSI_SETMSIQ, %o5 + ta HV_FAST_TRAP + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msi_setmsiq) + + /* %o0: devhandle + * %o1: msinum + * %o2: &state + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msi_getstate) + mov HV_FAST_PCI_MSI_GETSTATE, %o5 + ta HV_FAST_TRAP + stx %o1, [%o2] + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msi_getstate) + + /* %o0: devhandle + * %o1: msinum + * %o2: state + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msi_setstate) + mov HV_FAST_PCI_MSI_SETSTATE, %o5 + ta HV_FAST_TRAP + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msi_setstate) + + /* %o0: devhandle + * %o1: msinum + * %o2: &msiq + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msg_getmsiq) + mov HV_FAST_PCI_MSG_GETMSIQ, %o5 + ta HV_FAST_TRAP + stx %o1, [%o2] + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msg_getmsiq) + + /* %o0: devhandle + * %o1: msinum + * %o2: msiq + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msg_setmsiq) + mov HV_FAST_PCI_MSG_SETMSIQ, %o5 + ta HV_FAST_TRAP + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msg_setmsiq) + + /* %o0: devhandle + * %o1: msinum + * %o2: &valid + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msg_getvalid) + mov HV_FAST_PCI_MSG_GETVALID, %o5 + ta HV_FAST_TRAP + stx %o1, [%o2] + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msg_getvalid) + + /* %o0: devhandle + * %o1: msinum + * %o2: valid + * + * returns %o0: status + */ +ENTRY(pci_sun4v_msg_setvalid) + mov HV_FAST_PCI_MSG_SETVALID, %o5 + ta HV_FAST_TRAP + retl + mov %o0, %o0 +ENDPROC(pci_sun4v_msg_setvalid) + diff --git a/arch/sparc/kernel/power.c b/arch/sparc/kernel/power.c new file mode 100644 index 000000000000..076cad7f9757 --- /dev/null +++ b/arch/sparc/kernel/power.c @@ -0,0 +1,75 @@ +/* power.c: Power management driver. + * + * Copyright (C) 1999, 2007, 2008 David S. Miller (davem@davemloft.net) + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/reboot.h> +#include <linux/of_device.h> + +#include <asm/prom.h> +#include <asm/io.h> + +static void __iomem *power_reg; + +static irqreturn_t power_handler(int irq, void *dev_id) +{ + orderly_poweroff(true); + + /* FIXME: Check registers for status... */ + return IRQ_HANDLED; +} + +static int __init has_button_interrupt(unsigned int irq, struct device_node *dp) +{ + if (irq == 0xffffffff) + return 0; + if (!of_find_property(dp, "button", NULL)) + return 0; + + return 1; +} + +static int __devinit power_probe(struct of_device *op, const struct of_device_id *match) +{ + struct resource *res = &op->resource[0]; + unsigned int irq= op->irqs[0]; + + power_reg = of_ioremap(res, 0, 0x4, "power"); + + printk(KERN_INFO "%s: Control reg at %lx\n", + op->node->name, res->start); + + if (has_button_interrupt(irq, op->node)) { + if (request_irq(irq, + power_handler, 0, "power", NULL) < 0) + printk(KERN_ERR "power: Cannot setup IRQ handler.\n"); + } + + return 0; +} + +static struct of_device_id __initdata power_match[] = { + { + .name = "power", + }, + {}, +}; + +static struct of_platform_driver power_driver = { + .match_table = power_match, + .probe = power_probe, + .driver = { + .name = "power", + }, +}; + +static int __init power_init(void) +{ + return of_register_driver(&power_driver, &of_platform_bus_type); +} + +device_initcall(power_init); diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c new file mode 100644 index 000000000000..d5e2acef9877 --- /dev/null +++ b/arch/sparc/kernel/process_64.c @@ -0,0 +1,812 @@ +/* arch/sparc64/kernel/process.c + * + * Copyright (C) 1995, 1996, 2008 David S. Miller (davem@davemloft.net) + * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) + * Copyright (C) 1997, 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + */ + +/* + * This file handles the architecture-dependent parts of process handling.. + */ + +#include <stdarg.h> + +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/delay.h> +#include <linux/compat.h> +#include <linux/tick.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/elfcore.h> +#include <linux/sysrq.h> + +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/pstate.h> +#include <asm/elf.h> +#include <asm/fpumacro.h> +#include <asm/head.h> +#include <asm/cpudata.h> +#include <asm/mmu_context.h> +#include <asm/unistd.h> +#include <asm/hypervisor.h> +#include <asm/syscalls.h> +#include <asm/irq_regs.h> +#include <asm/smp.h> + +#include "kstack.h" + +static void sparc64_yield(int cpu) +{ + if (tlb_type != hypervisor) + return; + + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb__after_clear_bit(); + + while (!need_resched() && !cpu_is_offline(cpu)) { + unsigned long pstate; + + /* Disable interrupts. */ + __asm__ __volatile__( + "rdpr %%pstate, %0\n\t" + "andn %0, %1, %0\n\t" + "wrpr %0, %%g0, %%pstate" + : "=&r" (pstate) + : "i" (PSTATE_IE)); + + if (!need_resched() && !cpu_is_offline(cpu)) + sun4v_cpu_yield(); + + /* Re-enable interrupts. */ + __asm__ __volatile__( + "rdpr %%pstate, %0\n\t" + "or %0, %1, %0\n\t" + "wrpr %0, %%g0, %%pstate" + : "=&r" (pstate) + : "i" (PSTATE_IE)); + } + + set_thread_flag(TIF_POLLING_NRFLAG); +} + +/* The idle loop on sparc64. */ +void cpu_idle(void) +{ + int cpu = smp_processor_id(); + + set_thread_flag(TIF_POLLING_NRFLAG); + + while(1) { + tick_nohz_stop_sched_tick(1); + + while (!need_resched() && !cpu_is_offline(cpu)) + sparc64_yield(cpu); + + tick_nohz_restart_sched_tick(); + + preempt_enable_no_resched(); + +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_is_offline(cpu)) + cpu_play_dead(); +#endif + + schedule(); + preempt_disable(); + } +} + +#ifdef CONFIG_COMPAT +static void show_regwindow32(struct pt_regs *regs) +{ + struct reg_window32 __user *rw; + struct reg_window32 r_w; + mm_segment_t old_fs; + + __asm__ __volatile__ ("flushw"); + rw = compat_ptr((unsigned)regs->u_regs[14]); + old_fs = get_fs(); + set_fs (USER_DS); + if (copy_from_user (&r_w, rw, sizeof(r_w))) { + set_fs (old_fs); + return; + } + + set_fs (old_fs); + printk("l0: %08x l1: %08x l2: %08x l3: %08x " + "l4: %08x l5: %08x l6: %08x l7: %08x\n", + r_w.locals[0], r_w.locals[1], r_w.locals[2], r_w.locals[3], + r_w.locals[4], r_w.locals[5], r_w.locals[6], r_w.locals[7]); + printk("i0: %08x i1: %08x i2: %08x i3: %08x " + "i4: %08x i5: %08x i6: %08x i7: %08x\n", + r_w.ins[0], r_w.ins[1], r_w.ins[2], r_w.ins[3], + r_w.ins[4], r_w.ins[5], r_w.ins[6], r_w.ins[7]); +} +#else +#define show_regwindow32(regs) do { } while (0) +#endif + +static void show_regwindow(struct pt_regs *regs) +{ + struct reg_window __user *rw; + struct reg_window *rwk; + struct reg_window r_w; + mm_segment_t old_fs; + + if ((regs->tstate & TSTATE_PRIV) || !(test_thread_flag(TIF_32BIT))) { + __asm__ __volatile__ ("flushw"); + rw = (struct reg_window __user *) + (regs->u_regs[14] + STACK_BIAS); + rwk = (struct reg_window *) + (regs->u_regs[14] + STACK_BIAS); + if (!(regs->tstate & TSTATE_PRIV)) { + old_fs = get_fs(); + set_fs (USER_DS); + if (copy_from_user (&r_w, rw, sizeof(r_w))) { + set_fs (old_fs); + return; + } + rwk = &r_w; + set_fs (old_fs); + } + } else { + show_regwindow32(regs); + return; + } + printk("l0: %016lx l1: %016lx l2: %016lx l3: %016lx\n", + rwk->locals[0], rwk->locals[1], rwk->locals[2], rwk->locals[3]); + printk("l4: %016lx l5: %016lx l6: %016lx l7: %016lx\n", + rwk->locals[4], rwk->locals[5], rwk->locals[6], rwk->locals[7]); + printk("i0: %016lx i1: %016lx i2: %016lx i3: %016lx\n", + rwk->ins[0], rwk->ins[1], rwk->ins[2], rwk->ins[3]); + printk("i4: %016lx i5: %016lx i6: %016lx i7: %016lx\n", + rwk->ins[4], rwk->ins[5], rwk->ins[6], rwk->ins[7]); + if (regs->tstate & TSTATE_PRIV) + printk("I7: <%pS>\n", (void *) rwk->ins[7]); +} + +void show_regs(struct pt_regs *regs) +{ + printk("TSTATE: %016lx TPC: %016lx TNPC: %016lx Y: %08x %s\n", regs->tstate, + regs->tpc, regs->tnpc, regs->y, print_tainted()); + printk("TPC: <%pS>\n", (void *) regs->tpc); + printk("g0: %016lx g1: %016lx g2: %016lx g3: %016lx\n", + regs->u_regs[0], regs->u_regs[1], regs->u_regs[2], + regs->u_regs[3]); + printk("g4: %016lx g5: %016lx g6: %016lx g7: %016lx\n", + regs->u_regs[4], regs->u_regs[5], regs->u_regs[6], + regs->u_regs[7]); + printk("o0: %016lx o1: %016lx o2: %016lx o3: %016lx\n", + regs->u_regs[8], regs->u_regs[9], regs->u_regs[10], + regs->u_regs[11]); + printk("o4: %016lx o5: %016lx sp: %016lx ret_pc: %016lx\n", + regs->u_regs[12], regs->u_regs[13], regs->u_regs[14], + regs->u_regs[15]); + printk("RPC: <%pS>\n", (void *) regs->u_regs[15]); + show_regwindow(regs); +} + +struct global_reg_snapshot global_reg_snapshot[NR_CPUS]; +static DEFINE_SPINLOCK(global_reg_snapshot_lock); + +static void __global_reg_self(struct thread_info *tp, struct pt_regs *regs, + int this_cpu) +{ + flushw_all(); + + global_reg_snapshot[this_cpu].tstate = regs->tstate; + global_reg_snapshot[this_cpu].tpc = regs->tpc; + global_reg_snapshot[this_cpu].tnpc = regs->tnpc; + global_reg_snapshot[this_cpu].o7 = regs->u_regs[UREG_I7]; + + if (regs->tstate & TSTATE_PRIV) { + struct reg_window *rw; + + rw = (struct reg_window *) + (regs->u_regs[UREG_FP] + STACK_BIAS); + if (kstack_valid(tp, (unsigned long) rw)) { + global_reg_snapshot[this_cpu].i7 = rw->ins[7]; + rw = (struct reg_window *) + (rw->ins[6] + STACK_BIAS); + if (kstack_valid(tp, (unsigned long) rw)) + global_reg_snapshot[this_cpu].rpc = rw->ins[7]; + } + } else { + global_reg_snapshot[this_cpu].i7 = 0; + global_reg_snapshot[this_cpu].rpc = 0; + } + global_reg_snapshot[this_cpu].thread = tp; +} + +/* In order to avoid hangs we do not try to synchronize with the + * global register dump client cpus. The last store they make is to + * the thread pointer, so do a short poll waiting for that to become + * non-NULL. + */ +static void __global_reg_poll(struct global_reg_snapshot *gp) +{ + int limit = 0; + + while (!gp->thread && ++limit < 100) { + barrier(); + udelay(1); + } +} + +void __trigger_all_cpu_backtrace(void) +{ + struct thread_info *tp = current_thread_info(); + struct pt_regs *regs = get_irq_regs(); + unsigned long flags; + int this_cpu, cpu; + + if (!regs) + regs = tp->kregs; + + spin_lock_irqsave(&global_reg_snapshot_lock, flags); + + memset(global_reg_snapshot, 0, sizeof(global_reg_snapshot)); + + this_cpu = raw_smp_processor_id(); + + __global_reg_self(tp, regs, this_cpu); + + smp_fetch_global_regs(); + + for_each_online_cpu(cpu) { + struct global_reg_snapshot *gp = &global_reg_snapshot[cpu]; + + __global_reg_poll(gp); + + tp = gp->thread; + printk("%c CPU[%3d]: TSTATE[%016lx] TPC[%016lx] TNPC[%016lx] TASK[%s:%d]\n", + (cpu == this_cpu ? '*' : ' '), cpu, + gp->tstate, gp->tpc, gp->tnpc, + ((tp && tp->task) ? tp->task->comm : "NULL"), + ((tp && tp->task) ? tp->task->pid : -1)); + + if (gp->tstate & TSTATE_PRIV) { + printk(" TPC[%pS] O7[%pS] I7[%pS] RPC[%pS]\n", + (void *) gp->tpc, + (void *) gp->o7, + (void *) gp->i7, + (void *) gp->rpc); + } else { + printk(" TPC[%lx] O7[%lx] I7[%lx] RPC[%lx]\n", + gp->tpc, gp->o7, gp->i7, gp->rpc); + } + } + + memset(global_reg_snapshot, 0, sizeof(global_reg_snapshot)); + + spin_unlock_irqrestore(&global_reg_snapshot_lock, flags); +} + +#ifdef CONFIG_MAGIC_SYSRQ + +static void sysrq_handle_globreg(int key, struct tty_struct *tty) +{ + __trigger_all_cpu_backtrace(); +} + +static struct sysrq_key_op sparc_globalreg_op = { + .handler = sysrq_handle_globreg, + .help_msg = "Globalregs", + .action_msg = "Show Global CPU Regs", +}; + +static int __init sparc_globreg_init(void) +{ + return register_sysrq_key('y', &sparc_globalreg_op); +} + +core_initcall(sparc_globreg_init); + +#endif + +unsigned long thread_saved_pc(struct task_struct *tsk) +{ + struct thread_info *ti = task_thread_info(tsk); + unsigned long ret = 0xdeadbeefUL; + + if (ti && ti->ksp) { + unsigned long *sp; + sp = (unsigned long *)(ti->ksp + STACK_BIAS); + if (((unsigned long)sp & (sizeof(long) - 1)) == 0UL && + sp[14]) { + unsigned long *fp; + fp = (unsigned long *)(sp[14] + STACK_BIAS); + if (((unsigned long)fp & (sizeof(long) - 1)) == 0UL) + ret = fp[15]; + } + } + return ret; +} + +/* Free current thread data structures etc.. */ +void exit_thread(void) +{ + struct thread_info *t = current_thread_info(); + + if (t->utraps) { + if (t->utraps[0] < 2) + kfree (t->utraps); + else + t->utraps[0]--; + } + + if (test_and_clear_thread_flag(TIF_PERFCTR)) { + t->user_cntd0 = t->user_cntd1 = NULL; + t->pcr_reg = 0; + write_pcr(0); + } +} + +void flush_thread(void) +{ + struct thread_info *t = current_thread_info(); + struct mm_struct *mm; + + if (test_ti_thread_flag(t, TIF_ABI_PENDING)) { + clear_ti_thread_flag(t, TIF_ABI_PENDING); + if (test_ti_thread_flag(t, TIF_32BIT)) + clear_ti_thread_flag(t, TIF_32BIT); + else + set_ti_thread_flag(t, TIF_32BIT); + } + + mm = t->task->mm; + if (mm) + tsb_context_switch(mm); + + set_thread_wsaved(0); + + /* Turn off performance counters if on. */ + if (test_and_clear_thread_flag(TIF_PERFCTR)) { + t->user_cntd0 = t->user_cntd1 = NULL; + t->pcr_reg = 0; + write_pcr(0); + } + + /* Clear FPU register state. */ + t->fpsaved[0] = 0; + + if (get_thread_current_ds() != ASI_AIUS) + set_fs(USER_DS); +} + +/* It's a bit more tricky when 64-bit tasks are involved... */ +static unsigned long clone_stackframe(unsigned long csp, unsigned long psp) +{ + unsigned long fp, distance, rval; + + if (!(test_thread_flag(TIF_32BIT))) { + csp += STACK_BIAS; + psp += STACK_BIAS; + __get_user(fp, &(((struct reg_window __user *)psp)->ins[6])); + fp += STACK_BIAS; + } else + __get_user(fp, &(((struct reg_window32 __user *)psp)->ins[6])); + + /* Now 8-byte align the stack as this is mandatory in the + * Sparc ABI due to how register windows work. This hides + * the restriction from thread libraries etc. -DaveM + */ + csp &= ~7UL; + + distance = fp - psp; + rval = (csp - distance); + if (copy_in_user((void __user *) rval, (void __user *) psp, distance)) + rval = 0; + else if (test_thread_flag(TIF_32BIT)) { + if (put_user(((u32)csp), + &(((struct reg_window32 __user *)rval)->ins[6]))) + rval = 0; + } else { + if (put_user(((u64)csp - STACK_BIAS), + &(((struct reg_window __user *)rval)->ins[6]))) + rval = 0; + else + rval = rval - STACK_BIAS; + } + + return rval; +} + +/* Standard stuff. */ +static inline void shift_window_buffer(int first_win, int last_win, + struct thread_info *t) +{ + int i; + + for (i = first_win; i < last_win; i++) { + t->rwbuf_stkptrs[i] = t->rwbuf_stkptrs[i+1]; + memcpy(&t->reg_window[i], &t->reg_window[i+1], + sizeof(struct reg_window)); + } +} + +void synchronize_user_stack(void) +{ + struct thread_info *t = current_thread_info(); + unsigned long window; + + flush_user_windows(); + if ((window = get_thread_wsaved()) != 0) { + int winsize = sizeof(struct reg_window); + int bias = 0; + + if (test_thread_flag(TIF_32BIT)) + winsize = sizeof(struct reg_window32); + else + bias = STACK_BIAS; + + window -= 1; + do { + unsigned long sp = (t->rwbuf_stkptrs[window] + bias); + struct reg_window *rwin = &t->reg_window[window]; + + if (!copy_to_user((char __user *)sp, rwin, winsize)) { + shift_window_buffer(window, get_thread_wsaved() - 1, t); + set_thread_wsaved(get_thread_wsaved() - 1); + } + } while (window--); + } +} + +static void stack_unaligned(unsigned long sp) +{ + siginfo_t info; + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRALN; + info.si_addr = (void __user *) sp; + info.si_trapno = 0; + force_sig_info(SIGBUS, &info, current); +} + +void fault_in_user_windows(void) +{ + struct thread_info *t = current_thread_info(); + unsigned long window; + int winsize = sizeof(struct reg_window); + int bias = 0; + + if (test_thread_flag(TIF_32BIT)) + winsize = sizeof(struct reg_window32); + else + bias = STACK_BIAS; + + flush_user_windows(); + window = get_thread_wsaved(); + + if (likely(window != 0)) { + window -= 1; + do { + unsigned long sp = (t->rwbuf_stkptrs[window] + bias); + struct reg_window *rwin = &t->reg_window[window]; + + if (unlikely(sp & 0x7UL)) + stack_unaligned(sp); + + if (unlikely(copy_to_user((char __user *)sp, + rwin, winsize))) + goto barf; + } while (window--); + } + set_thread_wsaved(0); + return; + +barf: + set_thread_wsaved(window + 1); + do_exit(SIGILL); +} + +asmlinkage long sparc_do_fork(unsigned long clone_flags, + unsigned long stack_start, + struct pt_regs *regs, + unsigned long stack_size) +{ + int __user *parent_tid_ptr, *child_tid_ptr; + unsigned long orig_i1 = regs->u_regs[UREG_I1]; + long ret; + +#ifdef CONFIG_COMPAT + if (test_thread_flag(TIF_32BIT)) { + parent_tid_ptr = compat_ptr(regs->u_regs[UREG_I2]); + child_tid_ptr = compat_ptr(regs->u_regs[UREG_I4]); + } else +#endif + { + parent_tid_ptr = (int __user *) regs->u_regs[UREG_I2]; + child_tid_ptr = (int __user *) regs->u_regs[UREG_I4]; + } + + ret = do_fork(clone_flags, stack_start, + regs, stack_size, + parent_tid_ptr, child_tid_ptr); + + /* If we get an error and potentially restart the system + * call, we're screwed because copy_thread() clobbered + * the parent's %o1. So detect that case and restore it + * here. + */ + if ((unsigned long)ret >= -ERESTART_RESTARTBLOCK) + regs->u_regs[UREG_I1] = orig_i1; + + return ret; +} + +/* Copy a Sparc thread. The fork() return value conventions + * under SunOS are nothing short of bletcherous: + * Parent --> %o0 == childs pid, %o1 == 0 + * Child --> %o0 == parents pid, %o1 == 1 + */ +int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, + unsigned long unused, + struct task_struct *p, struct pt_regs *regs) +{ + struct thread_info *t = task_thread_info(p); + struct sparc_stackf *parent_sf; + unsigned long child_stack_sz; + char *child_trap_frame; + int kernel_thread; + + kernel_thread = (regs->tstate & TSTATE_PRIV) ? 1 : 0; + parent_sf = ((struct sparc_stackf *) regs) - 1; + + /* Calculate offset to stack_frame & pt_regs */ + child_stack_sz = ((STACKFRAME_SZ + TRACEREG_SZ) + + (kernel_thread ? STACKFRAME_SZ : 0)); + child_trap_frame = (task_stack_page(p) + + (THREAD_SIZE - child_stack_sz)); + memcpy(child_trap_frame, parent_sf, child_stack_sz); + + t->flags = (t->flags & ~((0xffUL << TI_FLAG_CWP_SHIFT) | + (0xffUL << TI_FLAG_CURRENT_DS_SHIFT))) | + (((regs->tstate + 1) & TSTATE_CWP) << TI_FLAG_CWP_SHIFT); + t->new_child = 1; + t->ksp = ((unsigned long) child_trap_frame) - STACK_BIAS; + t->kregs = (struct pt_regs *) (child_trap_frame + + sizeof(struct sparc_stackf)); + t->fpsaved[0] = 0; + + if (kernel_thread) { + struct sparc_stackf *child_sf = (struct sparc_stackf *) + (child_trap_frame + (STACKFRAME_SZ + TRACEREG_SZ)); + + /* Zero terminate the stack backtrace. */ + child_sf->fp = NULL; + t->kregs->u_regs[UREG_FP] = + ((unsigned long) child_sf) - STACK_BIAS; + + /* Special case, if we are spawning a kernel thread from + * a userspace task (usermode helper, NFS or similar), we + * must disable performance counters in the child because + * the address space and protection realm are changing. + */ + if (t->flags & _TIF_PERFCTR) { + t->user_cntd0 = t->user_cntd1 = NULL; + t->pcr_reg = 0; + t->flags &= ~_TIF_PERFCTR; + } + t->flags |= ((long)ASI_P << TI_FLAG_CURRENT_DS_SHIFT); + t->kregs->u_regs[UREG_G6] = (unsigned long) t; + t->kregs->u_regs[UREG_G4] = (unsigned long) t->task; + } else { + if (t->flags & _TIF_32BIT) { + sp &= 0x00000000ffffffffUL; + regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL; + } + t->kregs->u_regs[UREG_FP] = sp; + t->flags |= ((long)ASI_AIUS << TI_FLAG_CURRENT_DS_SHIFT); + if (sp != regs->u_regs[UREG_FP]) { + unsigned long csp; + + csp = clone_stackframe(sp, regs->u_regs[UREG_FP]); + if (!csp) + return -EFAULT; + t->kregs->u_regs[UREG_FP] = csp; + } + if (t->utraps) + t->utraps[0]++; + } + + /* Set the return value for the child. */ + t->kregs->u_regs[UREG_I0] = current->pid; + t->kregs->u_regs[UREG_I1] = 1; + + /* Set the second return value for the parent. */ + regs->u_regs[UREG_I1] = 0; + + if (clone_flags & CLONE_SETTLS) + t->kregs->u_regs[UREG_G7] = regs->u_regs[UREG_I3]; + + return 0; +} + +/* + * This is the mechanism for creating a new kernel thread. + * + * NOTE! Only a kernel-only process(ie the swapper or direct descendants + * who haven't done an "execve()") should use this: it will work within + * a system call from a "real" process, but the process memory space will + * not be freed until both the parent and the child have exited. + */ +pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +{ + long retval; + + /* If the parent runs before fn(arg) is called by the child, + * the input registers of this function can be clobbered. + * So we stash 'fn' and 'arg' into global registers which + * will not be modified by the parent. + */ + __asm__ __volatile__("mov %4, %%g2\n\t" /* Save FN into global */ + "mov %5, %%g3\n\t" /* Save ARG into global */ + "mov %1, %%g1\n\t" /* Clone syscall nr. */ + "mov %2, %%o0\n\t" /* Clone flags. */ + "mov 0, %%o1\n\t" /* usp arg == 0 */ + "t 0x6d\n\t" /* Linux/Sparc clone(). */ + "brz,a,pn %%o1, 1f\n\t" /* Parent, just return. */ + " mov %%o0, %0\n\t" + "jmpl %%g2, %%o7\n\t" /* Call the function. */ + " mov %%g3, %%o0\n\t" /* Set arg in delay. */ + "mov %3, %%g1\n\t" + "t 0x6d\n\t" /* Linux/Sparc exit(). */ + /* Notreached by child. */ + "1:" : + "=r" (retval) : + "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED), + "i" (__NR_exit), "r" (fn), "r" (arg) : + "g1", "g2", "g3", "o0", "o1", "memory", "cc"); + return retval; +} + +typedef struct { + union { + unsigned int pr_regs[32]; + unsigned long pr_dregs[16]; + } pr_fr; + unsigned int __unused; + unsigned int pr_fsr; + unsigned char pr_qcnt; + unsigned char pr_q_entrysize; + unsigned char pr_en; + unsigned int pr_q[64]; +} elf_fpregset_t32; + +/* + * fill in the fpu structure for a core dump. + */ +int dump_fpu (struct pt_regs * regs, elf_fpregset_t * fpregs) +{ + unsigned long *kfpregs = current_thread_info()->fpregs; + unsigned long fprs = current_thread_info()->fpsaved[0]; + + if (test_thread_flag(TIF_32BIT)) { + elf_fpregset_t32 *fpregs32 = (elf_fpregset_t32 *)fpregs; + + if (fprs & FPRS_DL) + memcpy(&fpregs32->pr_fr.pr_regs[0], kfpregs, + sizeof(unsigned int) * 32); + else + memset(&fpregs32->pr_fr.pr_regs[0], 0, + sizeof(unsigned int) * 32); + fpregs32->pr_qcnt = 0; + fpregs32->pr_q_entrysize = 8; + memset(&fpregs32->pr_q[0], 0, + (sizeof(unsigned int) * 64)); + if (fprs & FPRS_FEF) { + fpregs32->pr_fsr = (unsigned int) current_thread_info()->xfsr[0]; + fpregs32->pr_en = 1; + } else { + fpregs32->pr_fsr = 0; + fpregs32->pr_en = 0; + } + } else { + if(fprs & FPRS_DL) + memcpy(&fpregs->pr_regs[0], kfpregs, + sizeof(unsigned int) * 32); + else + memset(&fpregs->pr_regs[0], 0, + sizeof(unsigned int) * 32); + if(fprs & FPRS_DU) + memcpy(&fpregs->pr_regs[16], kfpregs+16, + sizeof(unsigned int) * 32); + else + memset(&fpregs->pr_regs[16], 0, + sizeof(unsigned int) * 32); + if(fprs & FPRS_FEF) { + fpregs->pr_fsr = current_thread_info()->xfsr[0]; + fpregs->pr_gsr = current_thread_info()->gsr[0]; + } else { + fpregs->pr_fsr = fpregs->pr_gsr = 0; + } + fpregs->pr_fprs = fprs; + } + return 1; +} + +/* + * sparc_execve() executes a new program after the asm stub has set + * things up for us. This should basically do what I want it to. + */ +asmlinkage int sparc_execve(struct pt_regs *regs) +{ + int error, base = 0; + char *filename; + + /* User register window flush is done by entry.S */ + + /* Check for indirect call. */ + if (regs->u_regs[UREG_G1] == 0) + base = 1; + + filename = getname((char __user *)regs->u_regs[base + UREG_I0]); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = do_execve(filename, + (char __user * __user *) + regs->u_regs[base + UREG_I1], + (char __user * __user *) + regs->u_regs[base + UREG_I2], regs); + putname(filename); + if (!error) { + fprs_write(0); + current_thread_info()->xfsr[0] = 0; + current_thread_info()->fpsaved[0] = 0; + regs->tstate &= ~TSTATE_PEF; + } +out: + return error; +} + +unsigned long get_wchan(struct task_struct *task) +{ + unsigned long pc, fp, bias = 0; + struct thread_info *tp; + struct reg_window *rw; + unsigned long ret = 0; + int count = 0; + + if (!task || task == current || + task->state == TASK_RUNNING) + goto out; + + tp = task_thread_info(task); + bias = STACK_BIAS; + fp = task_thread_info(task)->ksp + bias; + + do { + if (!kstack_valid(tp, fp)) + break; + rw = (struct reg_window *) fp; + pc = rw->ins[7]; + if (!in_sched_functions(pc)) { + ret = pc; + goto out; + } + fp = rw->ins[6] + bias; + } while (++count < 16); + +out: + return ret; +} diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c new file mode 100644 index 000000000000..dbba82f9b142 --- /dev/null +++ b/arch/sparc/kernel/prom_64.c @@ -0,0 +1,1684 @@ +/* + * Procedures for creating, accessing and interpreting the device tree. + * + * Paul Mackerras August 1996. + * Copyright (C) 1996-2005 Paul Mackerras. + * + * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. + * {engebret|bergner}@us.ibm.com + * + * Adapted for sparc64 by David S. Miller davem@davemloft.net + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/lmb.h> +#include <linux/of_device.h> + +#include <asm/prom.h> +#include <asm/oplib.h> +#include <asm/irq.h> +#include <asm/asi.h> +#include <asm/upa.h> +#include <asm/smp.h> + +extern struct device_node *allnodes; /* temporary while merging */ + +extern rwlock_t devtree_lock; /* temporary while merging */ + +struct device_node *of_find_node_by_phandle(phandle handle) +{ + struct device_node *np; + + for (np = allnodes; np; np = np->allnext) + if (np->node == handle) + break; + + return np; +} +EXPORT_SYMBOL(of_find_node_by_phandle); + +int of_getintprop_default(struct device_node *np, const char *name, int def) +{ + struct property *prop; + int len; + + prop = of_find_property(np, name, &len); + if (!prop || len != 4) + return def; + + return *(int *) prop->value; +} +EXPORT_SYMBOL(of_getintprop_default); + +DEFINE_MUTEX(of_set_property_mutex); +EXPORT_SYMBOL(of_set_property_mutex); + +int of_set_property(struct device_node *dp, const char *name, void *val, int len) +{ + struct property **prevp; + void *new_val; + int err; + + new_val = kmalloc(len, GFP_KERNEL); + if (!new_val) + return -ENOMEM; + + memcpy(new_val, val, len); + + err = -ENODEV; + + write_lock(&devtree_lock); + prevp = &dp->properties; + while (*prevp) { + struct property *prop = *prevp; + + if (!strcasecmp(prop->name, name)) { + void *old_val = prop->value; + int ret; + + mutex_lock(&of_set_property_mutex); + ret = prom_setprop(dp->node, name, val, len); + mutex_unlock(&of_set_property_mutex); + + err = -EINVAL; + if (ret >= 0) { + prop->value = new_val; + prop->length = len; + + if (OF_IS_DYNAMIC(prop)) + kfree(old_val); + + OF_MARK_DYNAMIC(prop); + + err = 0; + } + break; + } + prevp = &(*prevp)->next; + } + write_unlock(&devtree_lock); + + /* XXX Upate procfs if necessary... */ + + return err; +} +EXPORT_SYMBOL(of_set_property); + +int of_find_in_proplist(const char *list, const char *match, int len) +{ + while (len > 0) { + int l; + + if (!strcmp(list, match)) + return 1; + l = strlen(list) + 1; + list += l; + len -= l; + } + return 0; +} +EXPORT_SYMBOL(of_find_in_proplist); + +static unsigned int prom_early_allocated __initdata; + +static void * __init prom_early_alloc(unsigned long size) +{ + unsigned long paddr = lmb_alloc(size, SMP_CACHE_BYTES); + void *ret; + + if (!paddr) { + prom_printf("prom_early_alloc(%lu) failed\n"); + prom_halt(); + } + + ret = __va(paddr); + memset(ret, 0, size); + prom_early_allocated += size; + + return ret; +} + +#ifdef CONFIG_PCI +/* PSYCHO interrupt mapping support. */ +#define PSYCHO_IMAP_A_SLOT0 0x0c00UL +#define PSYCHO_IMAP_B_SLOT0 0x0c20UL +static unsigned long psycho_pcislot_imap_offset(unsigned long ino) +{ + unsigned int bus = (ino & 0x10) >> 4; + unsigned int slot = (ino & 0x0c) >> 2; + + if (bus == 0) + return PSYCHO_IMAP_A_SLOT0 + (slot * 8); + else + return PSYCHO_IMAP_B_SLOT0 + (slot * 8); +} + +#define PSYCHO_OBIO_IMAP_BASE 0x1000UL + +#define PSYCHO_ONBOARD_IRQ_BASE 0x20 +#define psycho_onboard_imap_offset(__ino) \ + (PSYCHO_OBIO_IMAP_BASE + (((__ino) & 0x1f) << 3)) + +#define PSYCHO_ICLR_A_SLOT0 0x1400UL +#define PSYCHO_ICLR_SCSI 0x1800UL + +#define psycho_iclr_offset(ino) \ + ((ino & 0x20) ? (PSYCHO_ICLR_SCSI + (((ino) & 0x1f) << 3)) : \ + (PSYCHO_ICLR_A_SLOT0 + (((ino) & 0x1f)<<3))) + +static unsigned int psycho_irq_build(struct device_node *dp, + unsigned int ino, + void *_data) +{ + unsigned long controller_regs = (unsigned long) _data; + unsigned long imap, iclr; + unsigned long imap_off, iclr_off; + int inofixup = 0; + + ino &= 0x3f; + if (ino < PSYCHO_ONBOARD_IRQ_BASE) { + /* PCI slot */ + imap_off = psycho_pcislot_imap_offset(ino); + } else { + /* Onboard device */ + imap_off = psycho_onboard_imap_offset(ino); + } + + /* Now build the IRQ bucket. */ + imap = controller_regs + imap_off; + + iclr_off = psycho_iclr_offset(ino); + iclr = controller_regs + iclr_off; + + if ((ino & 0x20) == 0) + inofixup = ino & 0x03; + + return build_irq(inofixup, iclr, imap); +} + +static void __init psycho_irq_trans_init(struct device_node *dp) +{ + const struct linux_prom64_registers *regs; + + dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller)); + dp->irq_trans->irq_build = psycho_irq_build; + + regs = of_get_property(dp, "reg", NULL); + dp->irq_trans->data = (void *) regs[2].phys_addr; +} + +#define sabre_read(__reg) \ +({ u64 __ret; \ + __asm__ __volatile__("ldxa [%1] %2, %0" \ + : "=r" (__ret) \ + : "r" (__reg), "i" (ASI_PHYS_BYPASS_EC_E) \ + : "memory"); \ + __ret; \ +}) + +struct sabre_irq_data { + unsigned long controller_regs; + unsigned int pci_first_busno; +}; +#define SABRE_CONFIGSPACE 0x001000000UL +#define SABRE_WRSYNC 0x1c20UL + +#define SABRE_CONFIG_BASE(CONFIG_SPACE) \ + (CONFIG_SPACE | (1UL << 24)) +#define SABRE_CONFIG_ENCODE(BUS, DEVFN, REG) \ + (((unsigned long)(BUS) << 16) | \ + ((unsigned long)(DEVFN) << 8) | \ + ((unsigned long)(REG))) + +/* When a device lives behind a bridge deeper in the PCI bus topology + * than APB, a special sequence must run to make sure all pending DMA + * transfers at the time of IRQ delivery are visible in the coherency + * domain by the cpu. This sequence is to perform a read on the far + * side of the non-APB bridge, then perform a read of Sabre's DMA + * write-sync register. + */ +static void sabre_wsync_handler(unsigned int ino, void *_arg1, void *_arg2) +{ + unsigned int phys_hi = (unsigned int) (unsigned long) _arg1; + struct sabre_irq_data *irq_data = _arg2; + unsigned long controller_regs = irq_data->controller_regs; + unsigned long sync_reg = controller_regs + SABRE_WRSYNC; + unsigned long config_space = controller_regs + SABRE_CONFIGSPACE; + unsigned int bus, devfn; + u16 _unused; + + config_space = SABRE_CONFIG_BASE(config_space); + + bus = (phys_hi >> 16) & 0xff; + devfn = (phys_hi >> 8) & 0xff; + + config_space |= SABRE_CONFIG_ENCODE(bus, devfn, 0x00); + + __asm__ __volatile__("membar #Sync\n\t" + "lduha [%1] %2, %0\n\t" + "membar #Sync" + : "=r" (_unused) + : "r" ((u16 *) config_space), + "i" (ASI_PHYS_BYPASS_EC_E_L) + : "memory"); + + sabre_read(sync_reg); +} + +#define SABRE_IMAP_A_SLOT0 0x0c00UL +#define SABRE_IMAP_B_SLOT0 0x0c20UL +#define SABRE_ICLR_A_SLOT0 0x1400UL +#define SABRE_ICLR_B_SLOT0 0x1480UL +#define SABRE_ICLR_SCSI 0x1800UL +#define SABRE_ICLR_ETH 0x1808UL +#define SABRE_ICLR_BPP 0x1810UL +#define SABRE_ICLR_AU_REC 0x1818UL +#define SABRE_ICLR_AU_PLAY 0x1820UL +#define SABRE_ICLR_PFAIL 0x1828UL +#define SABRE_ICLR_KMS 0x1830UL +#define SABRE_ICLR_FLPY 0x1838UL +#define SABRE_ICLR_SHW 0x1840UL +#define SABRE_ICLR_KBD 0x1848UL +#define SABRE_ICLR_MS 0x1850UL +#define SABRE_ICLR_SER 0x1858UL +#define SABRE_ICLR_UE 0x1870UL +#define SABRE_ICLR_CE 0x1878UL +#define SABRE_ICLR_PCIERR 0x1880UL + +static unsigned long sabre_pcislot_imap_offset(unsigned long ino) +{ + unsigned int bus = (ino & 0x10) >> 4; + unsigned int slot = (ino & 0x0c) >> 2; + + if (bus == 0) + return SABRE_IMAP_A_SLOT0 + (slot * 8); + else + return SABRE_IMAP_B_SLOT0 + (slot * 8); +} + +#define SABRE_OBIO_IMAP_BASE 0x1000UL +#define SABRE_ONBOARD_IRQ_BASE 0x20 +#define sabre_onboard_imap_offset(__ino) \ + (SABRE_OBIO_IMAP_BASE + (((__ino) & 0x1f) << 3)) + +#define sabre_iclr_offset(ino) \ + ((ino & 0x20) ? (SABRE_ICLR_SCSI + (((ino) & 0x1f) << 3)) : \ + (SABRE_ICLR_A_SLOT0 + (((ino) & 0x1f)<<3))) + +static int sabre_device_needs_wsync(struct device_node *dp) +{ + struct device_node *parent = dp->parent; + const char *parent_model, *parent_compat; + + /* This traversal up towards the root is meant to + * handle two cases: + * + * 1) non-PCI bus sitting under PCI, such as 'ebus' + * 2) the PCI controller interrupts themselves, which + * will use the sabre_irq_build but do not need + * the DMA synchronization handling + */ + while (parent) { + if (!strcmp(parent->type, "pci")) + break; + parent = parent->parent; + } + + if (!parent) + return 0; + + parent_model = of_get_property(parent, + "model", NULL); + if (parent_model && + (!strcmp(parent_model, "SUNW,sabre") || + !strcmp(parent_model, "SUNW,simba"))) + return 0; + + parent_compat = of_get_property(parent, + "compatible", NULL); + if (parent_compat && + (!strcmp(parent_compat, "pci108e,a000") || + !strcmp(parent_compat, "pci108e,a001"))) + return 0; + + return 1; +} + +static unsigned int sabre_irq_build(struct device_node *dp, + unsigned int ino, + void *_data) +{ + struct sabre_irq_data *irq_data = _data; + unsigned long controller_regs = irq_data->controller_regs; + const struct linux_prom_pci_registers *regs; + unsigned long imap, iclr; + unsigned long imap_off, iclr_off; + int inofixup = 0; + int virt_irq; + + ino &= 0x3f; + if (ino < SABRE_ONBOARD_IRQ_BASE) { + /* PCI slot */ + imap_off = sabre_pcislot_imap_offset(ino); + } else { + /* onboard device */ + imap_off = sabre_onboard_imap_offset(ino); + } + + /* Now build the IRQ bucket. */ + imap = controller_regs + imap_off; + + iclr_off = sabre_iclr_offset(ino); + iclr = controller_regs + iclr_off; + + if ((ino & 0x20) == 0) + inofixup = ino & 0x03; + + virt_irq = build_irq(inofixup, iclr, imap); + + /* If the parent device is a PCI<->PCI bridge other than + * APB, we have to install a pre-handler to ensure that + * all pending DMA is drained before the interrupt handler + * is run. + */ + regs = of_get_property(dp, "reg", NULL); + if (regs && sabre_device_needs_wsync(dp)) { + irq_install_pre_handler(virt_irq, + sabre_wsync_handler, + (void *) (long) regs->phys_hi, + (void *) irq_data); + } + + return virt_irq; +} + +static void __init sabre_irq_trans_init(struct device_node *dp) +{ + const struct linux_prom64_registers *regs; + struct sabre_irq_data *irq_data; + const u32 *busrange; + + dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller)); + dp->irq_trans->irq_build = sabre_irq_build; + + irq_data = prom_early_alloc(sizeof(struct sabre_irq_data)); + + regs = of_get_property(dp, "reg", NULL); + irq_data->controller_regs = regs[0].phys_addr; + + busrange = of_get_property(dp, "bus-range", NULL); + irq_data->pci_first_busno = busrange[0]; + + dp->irq_trans->data = irq_data; +} + +/* SCHIZO interrupt mapping support. Unlike Psycho, for this controller the + * imap/iclr registers are per-PBM. + */ +#define SCHIZO_IMAP_BASE 0x1000UL +#define SCHIZO_ICLR_BASE 0x1400UL + +static unsigned long schizo_imap_offset(unsigned long ino) +{ + return SCHIZO_IMAP_BASE + (ino * 8UL); +} + +static unsigned long schizo_iclr_offset(unsigned long ino) +{ + return SCHIZO_ICLR_BASE + (ino * 8UL); +} + +static unsigned long schizo_ino_to_iclr(unsigned long pbm_regs, + unsigned int ino) +{ + + return pbm_regs + schizo_iclr_offset(ino); +} + +static unsigned long schizo_ino_to_imap(unsigned long pbm_regs, + unsigned int ino) +{ + return pbm_regs + schizo_imap_offset(ino); +} + +#define schizo_read(__reg) \ +({ u64 __ret; \ + __asm__ __volatile__("ldxa [%1] %2, %0" \ + : "=r" (__ret) \ + : "r" (__reg), "i" (ASI_PHYS_BYPASS_EC_E) \ + : "memory"); \ + __ret; \ +}) +#define schizo_write(__reg, __val) \ + __asm__ __volatile__("stxa %0, [%1] %2" \ + : /* no outputs */ \ + : "r" (__val), "r" (__reg), \ + "i" (ASI_PHYS_BYPASS_EC_E) \ + : "memory") + +static void tomatillo_wsync_handler(unsigned int ino, void *_arg1, void *_arg2) +{ + unsigned long sync_reg = (unsigned long) _arg2; + u64 mask = 1UL << (ino & IMAP_INO); + u64 val; + int limit; + + schizo_write(sync_reg, mask); + + limit = 100000; + val = 0; + while (--limit) { + val = schizo_read(sync_reg); + if (!(val & mask)) + break; + } + if (limit <= 0) { + printk("tomatillo_wsync_handler: DMA won't sync [%lx:%lx]\n", + val, mask); + } + + if (_arg1) { + static unsigned char cacheline[64] + __attribute__ ((aligned (64))); + + __asm__ __volatile__("rd %%fprs, %0\n\t" + "or %0, %4, %1\n\t" + "wr %1, 0x0, %%fprs\n\t" + "stda %%f0, [%5] %6\n\t" + "wr %0, 0x0, %%fprs\n\t" + "membar #Sync" + : "=&r" (mask), "=&r" (val) + : "0" (mask), "1" (val), + "i" (FPRS_FEF), "r" (&cacheline[0]), + "i" (ASI_BLK_COMMIT_P)); + } +} + +struct schizo_irq_data { + unsigned long pbm_regs; + unsigned long sync_reg; + u32 portid; + int chip_version; +}; + +static unsigned int schizo_irq_build(struct device_node *dp, + unsigned int ino, + void *_data) +{ + struct schizo_irq_data *irq_data = _data; + unsigned long pbm_regs = irq_data->pbm_regs; + unsigned long imap, iclr; + int ign_fixup; + int virt_irq; + int is_tomatillo; + + ino &= 0x3f; + + /* Now build the IRQ bucket. */ + imap = schizo_ino_to_imap(pbm_regs, ino); + iclr = schizo_ino_to_iclr(pbm_regs, ino); + + /* On Schizo, no inofixup occurs. This is because each + * INO has it's own IMAP register. On Psycho and Sabre + * there is only one IMAP register for each PCI slot even + * though four different INOs can be generated by each + * PCI slot. + * + * But, for JBUS variants (essentially, Tomatillo), we have + * to fixup the lowest bit of the interrupt group number. + */ + ign_fixup = 0; + + is_tomatillo = (irq_data->sync_reg != 0UL); + + if (is_tomatillo) { + if (irq_data->portid & 1) + ign_fixup = (1 << 6); + } + + virt_irq = build_irq(ign_fixup, iclr, imap); + + if (is_tomatillo) { + irq_install_pre_handler(virt_irq, + tomatillo_wsync_handler, + ((irq_data->chip_version <= 4) ? + (void *) 1 : (void *) 0), + (void *) irq_data->sync_reg); + } + + return virt_irq; +} + +static void __init __schizo_irq_trans_init(struct device_node *dp, + int is_tomatillo) +{ + const struct linux_prom64_registers *regs; + struct schizo_irq_data *irq_data; + + dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller)); + dp->irq_trans->irq_build = schizo_irq_build; + + irq_data = prom_early_alloc(sizeof(struct schizo_irq_data)); + + regs = of_get_property(dp, "reg", NULL); + dp->irq_trans->data = irq_data; + + irq_data->pbm_regs = regs[0].phys_addr; + if (is_tomatillo) + irq_data->sync_reg = regs[3].phys_addr + 0x1a18UL; + else + irq_data->sync_reg = 0UL; + irq_data->portid = of_getintprop_default(dp, "portid", 0); + irq_data->chip_version = of_getintprop_default(dp, "version#", 0); +} + +static void __init schizo_irq_trans_init(struct device_node *dp) +{ + __schizo_irq_trans_init(dp, 0); +} + +static void __init tomatillo_irq_trans_init(struct device_node *dp) +{ + __schizo_irq_trans_init(dp, 1); +} + +static unsigned int pci_sun4v_irq_build(struct device_node *dp, + unsigned int devino, + void *_data) +{ + u32 devhandle = (u32) (unsigned long) _data; + + return sun4v_build_irq(devhandle, devino); +} + +static void __init pci_sun4v_irq_trans_init(struct device_node *dp) +{ + const struct linux_prom64_registers *regs; + + dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller)); + dp->irq_trans->irq_build = pci_sun4v_irq_build; + + regs = of_get_property(dp, "reg", NULL); + dp->irq_trans->data = (void *) (unsigned long) + ((regs->phys_addr >> 32UL) & 0x0fffffff); +} + +struct fire_irq_data { + unsigned long pbm_regs; + u32 portid; +}; + +#define FIRE_IMAP_BASE 0x001000 +#define FIRE_ICLR_BASE 0x001400 + +static unsigned long fire_imap_offset(unsigned long ino) +{ + return FIRE_IMAP_BASE + (ino * 8UL); +} + +static unsigned long fire_iclr_offset(unsigned long ino) +{ + return FIRE_ICLR_BASE + (ino * 8UL); +} + +static unsigned long fire_ino_to_iclr(unsigned long pbm_regs, + unsigned int ino) +{ + return pbm_regs + fire_iclr_offset(ino); +} + +static unsigned long fire_ino_to_imap(unsigned long pbm_regs, + unsigned int ino) +{ + return pbm_regs + fire_imap_offset(ino); +} + +static unsigned int fire_irq_build(struct device_node *dp, + unsigned int ino, + void *_data) +{ + struct fire_irq_data *irq_data = _data; + unsigned long pbm_regs = irq_data->pbm_regs; + unsigned long imap, iclr; + unsigned long int_ctrlr; + + ino &= 0x3f; + + /* Now build the IRQ bucket. */ + imap = fire_ino_to_imap(pbm_regs, ino); + iclr = fire_ino_to_iclr(pbm_regs, ino); + + /* Set the interrupt controller number. */ + int_ctrlr = 1 << 6; + upa_writeq(int_ctrlr, imap); + + /* The interrupt map registers do not have an INO field + * like other chips do. They return zero in the INO + * field, and the interrupt controller number is controlled + * in bits 6 to 9. So in order for build_irq() to get + * the INO right we pass it in as part of the fixup + * which will get added to the map register zero value + * read by build_irq(). + */ + ino |= (irq_data->portid << 6); + ino -= int_ctrlr; + return build_irq(ino, iclr, imap); +} + +static void __init fire_irq_trans_init(struct device_node *dp) +{ + const struct linux_prom64_registers *regs; + struct fire_irq_data *irq_data; + + dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller)); + dp->irq_trans->irq_build = fire_irq_build; + + irq_data = prom_early_alloc(sizeof(struct fire_irq_data)); + + regs = of_get_property(dp, "reg", NULL); + dp->irq_trans->data = irq_data; + + irq_data->pbm_regs = regs[0].phys_addr; + irq_data->portid = of_getintprop_default(dp, "portid", 0); +} +#endif /* CONFIG_PCI */ + +#ifdef CONFIG_SBUS +/* INO number to IMAP register offset for SYSIO external IRQ's. + * This should conform to both Sunfire/Wildfire server and Fusion + * desktop designs. + */ +#define SYSIO_IMAP_SLOT0 0x2c00UL +#define SYSIO_IMAP_SLOT1 0x2c08UL +#define SYSIO_IMAP_SLOT2 0x2c10UL +#define SYSIO_IMAP_SLOT3 0x2c18UL +#define SYSIO_IMAP_SCSI 0x3000UL +#define SYSIO_IMAP_ETH 0x3008UL +#define SYSIO_IMAP_BPP 0x3010UL +#define SYSIO_IMAP_AUDIO 0x3018UL +#define SYSIO_IMAP_PFAIL 0x3020UL +#define SYSIO_IMAP_KMS 0x3028UL +#define SYSIO_IMAP_FLPY 0x3030UL +#define SYSIO_IMAP_SHW 0x3038UL +#define SYSIO_IMAP_KBD 0x3040UL +#define SYSIO_IMAP_MS 0x3048UL +#define SYSIO_IMAP_SER 0x3050UL +#define SYSIO_IMAP_TIM0 0x3060UL +#define SYSIO_IMAP_TIM1 0x3068UL +#define SYSIO_IMAP_UE 0x3070UL +#define SYSIO_IMAP_CE 0x3078UL +#define SYSIO_IMAP_SBERR 0x3080UL +#define SYSIO_IMAP_PMGMT 0x3088UL +#define SYSIO_IMAP_GFX 0x3090UL +#define SYSIO_IMAP_EUPA 0x3098UL + +#define bogon ((unsigned long) -1) +static unsigned long sysio_irq_offsets[] = { + /* SBUS Slot 0 --> 3, level 1 --> 7 */ + SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, + SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, + SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, + SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, + SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, + SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, + SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, + SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, + + /* Onboard devices (not relevant/used on SunFire). */ + SYSIO_IMAP_SCSI, + SYSIO_IMAP_ETH, + SYSIO_IMAP_BPP, + bogon, + SYSIO_IMAP_AUDIO, + SYSIO_IMAP_PFAIL, + bogon, + bogon, + SYSIO_IMAP_KMS, + SYSIO_IMAP_FLPY, + SYSIO_IMAP_SHW, + SYSIO_IMAP_KBD, + SYSIO_IMAP_MS, + SYSIO_IMAP_SER, + bogon, + bogon, + SYSIO_IMAP_TIM0, + SYSIO_IMAP_TIM1, + bogon, + bogon, + SYSIO_IMAP_UE, + SYSIO_IMAP_CE, + SYSIO_IMAP_SBERR, + SYSIO_IMAP_PMGMT, + SYSIO_IMAP_GFX, + SYSIO_IMAP_EUPA, +}; + +#undef bogon + +#define NUM_SYSIO_OFFSETS ARRAY_SIZE(sysio_irq_offsets) + +/* Convert Interrupt Mapping register pointer to associated + * Interrupt Clear register pointer, SYSIO specific version. + */ +#define SYSIO_ICLR_UNUSED0 0x3400UL +#define SYSIO_ICLR_SLOT0 0x3408UL +#define SYSIO_ICLR_SLOT1 0x3448UL +#define SYSIO_ICLR_SLOT2 0x3488UL +#define SYSIO_ICLR_SLOT3 0x34c8UL +static unsigned long sysio_imap_to_iclr(unsigned long imap) +{ + unsigned long diff = SYSIO_ICLR_UNUSED0 - SYSIO_IMAP_SLOT0; + return imap + diff; +} + +static unsigned int sbus_of_build_irq(struct device_node *dp, + unsigned int ino, + void *_data) +{ + unsigned long reg_base = (unsigned long) _data; + const struct linux_prom_registers *regs; + unsigned long imap, iclr; + int sbus_slot = 0; + int sbus_level = 0; + + ino &= 0x3f; + + regs = of_get_property(dp, "reg", NULL); + if (regs) + sbus_slot = regs->which_io; + + if (ino < 0x20) + ino += (sbus_slot * 8); + + imap = sysio_irq_offsets[ino]; + if (imap == ((unsigned long)-1)) { + prom_printf("get_irq_translations: Bad SYSIO INO[%x]\n", + ino); + prom_halt(); + } + imap += reg_base; + + /* SYSIO inconsistency. For external SLOTS, we have to select + * the right ICLR register based upon the lower SBUS irq level + * bits. + */ + if (ino >= 0x20) { + iclr = sysio_imap_to_iclr(imap); + } else { + sbus_level = ino & 0x7; + + switch(sbus_slot) { + case 0: + iclr = reg_base + SYSIO_ICLR_SLOT0; + break; + case 1: + iclr = reg_base + SYSIO_ICLR_SLOT1; + break; + case 2: + iclr = reg_base + SYSIO_ICLR_SLOT2; + break; + default: + case 3: + iclr = reg_base + SYSIO_ICLR_SLOT3; + break; + }; + + iclr += ((unsigned long)sbus_level - 1UL) * 8UL; + } + return build_irq(sbus_level, iclr, imap); +} + +static void __init sbus_irq_trans_init(struct device_node *dp) +{ + const struct linux_prom64_registers *regs; + + dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller)); + dp->irq_trans->irq_build = sbus_of_build_irq; + + regs = of_get_property(dp, "reg", NULL); + dp->irq_trans->data = (void *) (unsigned long) regs->phys_addr; +} +#endif /* CONFIG_SBUS */ + + +static unsigned int central_build_irq(struct device_node *dp, + unsigned int ino, + void *_data) +{ + struct device_node *central_dp = _data; + struct of_device *central_op = of_find_device_by_node(central_dp); + struct resource *res; + unsigned long imap, iclr; + u32 tmp; + + if (!strcmp(dp->name, "eeprom")) { + res = ¢ral_op->resource[5]; + } else if (!strcmp(dp->name, "zs")) { + res = ¢ral_op->resource[4]; + } else if (!strcmp(dp->name, "clock-board")) { + res = ¢ral_op->resource[3]; + } else { + return ino; + } + + imap = res->start + 0x00UL; + iclr = res->start + 0x10UL; + + /* Set the INO state to idle, and disable. */ + upa_writel(0, iclr); + upa_readl(iclr); + + tmp = upa_readl(imap); + tmp &= ~0x80000000; + upa_writel(tmp, imap); + + return build_irq(0, iclr, imap); +} + +static void __init central_irq_trans_init(struct device_node *dp) +{ + dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller)); + dp->irq_trans->irq_build = central_build_irq; + + dp->irq_trans->data = dp; +} + +struct irq_trans { + const char *name; + void (*init)(struct device_node *); +}; + +#ifdef CONFIG_PCI +static struct irq_trans __initdata pci_irq_trans_table[] = { + { "SUNW,sabre", sabre_irq_trans_init }, + { "pci108e,a000", sabre_irq_trans_init }, + { "pci108e,a001", sabre_irq_trans_init }, + { "SUNW,psycho", psycho_irq_trans_init }, + { "pci108e,8000", psycho_irq_trans_init }, + { "SUNW,schizo", schizo_irq_trans_init }, + { "pci108e,8001", schizo_irq_trans_init }, + { "SUNW,schizo+", schizo_irq_trans_init }, + { "pci108e,8002", schizo_irq_trans_init }, + { "SUNW,tomatillo", tomatillo_irq_trans_init }, + { "pci108e,a801", tomatillo_irq_trans_init }, + { "SUNW,sun4v-pci", pci_sun4v_irq_trans_init }, + { "pciex108e,80f0", fire_irq_trans_init }, +}; +#endif + +static unsigned int sun4v_vdev_irq_build(struct device_node *dp, + unsigned int devino, + void *_data) +{ + u32 devhandle = (u32) (unsigned long) _data; + + return sun4v_build_irq(devhandle, devino); +} + +static void __init sun4v_vdev_irq_trans_init(struct device_node *dp) +{ + const struct linux_prom64_registers *regs; + + dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller)); + dp->irq_trans->irq_build = sun4v_vdev_irq_build; + + regs = of_get_property(dp, "reg", NULL); + dp->irq_trans->data = (void *) (unsigned long) + ((regs->phys_addr >> 32UL) & 0x0fffffff); +} + +static void __init irq_trans_init(struct device_node *dp) +{ +#ifdef CONFIG_PCI + const char *model; + int i; +#endif + +#ifdef CONFIG_PCI + model = of_get_property(dp, "model", NULL); + if (!model) + model = of_get_property(dp, "compatible", NULL); + if (model) { + for (i = 0; i < ARRAY_SIZE(pci_irq_trans_table); i++) { + struct irq_trans *t = &pci_irq_trans_table[i]; + + if (!strcmp(model, t->name)) { + t->init(dp); + return; + } + } + } +#endif +#ifdef CONFIG_SBUS + if (!strcmp(dp->name, "sbus") || + !strcmp(dp->name, "sbi")) { + sbus_irq_trans_init(dp); + return; + } +#endif + if (!strcmp(dp->name, "fhc") && + !strcmp(dp->parent->name, "central")) { + central_irq_trans_init(dp); + return; + } + if (!strcmp(dp->name, "virtual-devices") || + !strcmp(dp->name, "niu")) { + sun4v_vdev_irq_trans_init(dp); + return; + } +} + +static int is_root_node(const struct device_node *dp) +{ + if (!dp) + return 0; + + return (dp->parent == NULL); +} + +/* The following routines deal with the black magic of fully naming a + * node. + * + * Certain well known named nodes are just the simple name string. + * + * Actual devices have an address specifier appended to the base name + * string, like this "foo@addr". The "addr" can be in any number of + * formats, and the platform plus the type of the node determine the + * format and how it is constructed. + * + * For children of the ROOT node, the naming convention is fixed and + * determined by whether this is a sun4u or sun4v system. + * + * For children of other nodes, it is bus type specific. So + * we walk up the tree until we discover a "device_type" property + * we recognize and we go from there. + * + * As an example, the boot device on my workstation has a full path: + * + * /pci@1e,600000/ide@d/disk@0,0:c + */ +static void __init sun4v_path_component(struct device_node *dp, char *tmp_buf) +{ + struct linux_prom64_registers *regs; + struct property *rprop; + u32 high_bits, low_bits, type; + + rprop = of_find_property(dp, "reg", NULL); + if (!rprop) + return; + + regs = rprop->value; + if (!is_root_node(dp->parent)) { + sprintf(tmp_buf, "%s@%x,%x", + dp->name, + (unsigned int) (regs->phys_addr >> 32UL), + (unsigned int) (regs->phys_addr & 0xffffffffUL)); + return; + } + + type = regs->phys_addr >> 60UL; + high_bits = (regs->phys_addr >> 32UL) & 0x0fffffffUL; + low_bits = (regs->phys_addr & 0xffffffffUL); + + if (type == 0 || type == 8) { + const char *prefix = (type == 0) ? "m" : "i"; + + if (low_bits) + sprintf(tmp_buf, "%s@%s%x,%x", + dp->name, prefix, + high_bits, low_bits); + else + sprintf(tmp_buf, "%s@%s%x", + dp->name, + prefix, + high_bits); + } else if (type == 12) { + sprintf(tmp_buf, "%s@%x", + dp->name, high_bits); + } +} + +static void __init sun4u_path_component(struct device_node *dp, char *tmp_buf) +{ + struct linux_prom64_registers *regs; + struct property *prop; + + prop = of_find_property(dp, "reg", NULL); + if (!prop) + return; + + regs = prop->value; + if (!is_root_node(dp->parent)) { + sprintf(tmp_buf, "%s@%x,%x", + dp->name, + (unsigned int) (regs->phys_addr >> 32UL), + (unsigned int) (regs->phys_addr & 0xffffffffUL)); + return; + } + + prop = of_find_property(dp, "upa-portid", NULL); + if (!prop) + prop = of_find_property(dp, "portid", NULL); + if (prop) { + unsigned long mask = 0xffffffffUL; + + if (tlb_type >= cheetah) + mask = 0x7fffff; + + sprintf(tmp_buf, "%s@%x,%x", + dp->name, + *(u32 *)prop->value, + (unsigned int) (regs->phys_addr & mask)); + } +} + +/* "name@slot,offset" */ +static void __init sbus_path_component(struct device_node *dp, char *tmp_buf) +{ + struct linux_prom_registers *regs; + struct property *prop; + + prop = of_find_property(dp, "reg", NULL); + if (!prop) + return; + + regs = prop->value; + sprintf(tmp_buf, "%s@%x,%x", + dp->name, + regs->which_io, + regs->phys_addr); +} + +/* "name@devnum[,func]" */ +static void __init pci_path_component(struct device_node *dp, char *tmp_buf) +{ + struct linux_prom_pci_registers *regs; + struct property *prop; + unsigned int devfn; + + prop = of_find_property(dp, "reg", NULL); + if (!prop) + return; + + regs = prop->value; + devfn = (regs->phys_hi >> 8) & 0xff; + if (devfn & 0x07) { + sprintf(tmp_buf, "%s@%x,%x", + dp->name, + devfn >> 3, + devfn & 0x07); + } else { + sprintf(tmp_buf, "%s@%x", + dp->name, + devfn >> 3); + } +} + +/* "name@UPA_PORTID,offset" */ +static void __init upa_path_component(struct device_node *dp, char *tmp_buf) +{ + struct linux_prom64_registers *regs; + struct property *prop; + + prop = of_find_property(dp, "reg", NULL); + if (!prop) + return; + + regs = prop->value; + + prop = of_find_property(dp, "upa-portid", NULL); + if (!prop) + return; + + sprintf(tmp_buf, "%s@%x,%x", + dp->name, + *(u32 *) prop->value, + (unsigned int) (regs->phys_addr & 0xffffffffUL)); +} + +/* "name@reg" */ +static void __init vdev_path_component(struct device_node *dp, char *tmp_buf) +{ + struct property *prop; + u32 *regs; + + prop = of_find_property(dp, "reg", NULL); + if (!prop) + return; + + regs = prop->value; + + sprintf(tmp_buf, "%s@%x", dp->name, *regs); +} + +/* "name@addrhi,addrlo" */ +static void __init ebus_path_component(struct device_node *dp, char *tmp_buf) +{ + struct linux_prom64_registers *regs; + struct property *prop; + + prop = of_find_property(dp, "reg", NULL); + if (!prop) + return; + + regs = prop->value; + + sprintf(tmp_buf, "%s@%x,%x", + dp->name, + (unsigned int) (regs->phys_addr >> 32UL), + (unsigned int) (regs->phys_addr & 0xffffffffUL)); +} + +/* "name@bus,addr" */ +static void __init i2c_path_component(struct device_node *dp, char *tmp_buf) +{ + struct property *prop; + u32 *regs; + + prop = of_find_property(dp, "reg", NULL); + if (!prop) + return; + + regs = prop->value; + + /* This actually isn't right... should look at the #address-cells + * property of the i2c bus node etc. etc. + */ + sprintf(tmp_buf, "%s@%x,%x", + dp->name, regs[0], regs[1]); +} + +/* "name@reg0[,reg1]" */ +static void __init usb_path_component(struct device_node *dp, char *tmp_buf) +{ + struct property *prop; + u32 *regs; + + prop = of_find_property(dp, "reg", NULL); + if (!prop) + return; + + regs = prop->value; + + if (prop->length == sizeof(u32) || regs[1] == 1) { + sprintf(tmp_buf, "%s@%x", + dp->name, regs[0]); + } else { + sprintf(tmp_buf, "%s@%x,%x", + dp->name, regs[0], regs[1]); + } +} + +/* "name@reg0reg1[,reg2reg3]" */ +static void __init ieee1394_path_component(struct device_node *dp, char *tmp_buf) +{ + struct property *prop; + u32 *regs; + + prop = of_find_property(dp, "reg", NULL); + if (!prop) + return; + + regs = prop->value; + + if (regs[2] || regs[3]) { + sprintf(tmp_buf, "%s@%08x%08x,%04x%08x", + dp->name, regs[0], regs[1], regs[2], regs[3]); + } else { + sprintf(tmp_buf, "%s@%08x%08x", + dp->name, regs[0], regs[1]); + } +} + +static void __init __build_path_component(struct device_node *dp, char *tmp_buf) +{ + struct device_node *parent = dp->parent; + + if (parent != NULL) { + if (!strcmp(parent->type, "pci") || + !strcmp(parent->type, "pciex")) { + pci_path_component(dp, tmp_buf); + return; + } + if (!strcmp(parent->type, "sbus")) { + sbus_path_component(dp, tmp_buf); + return; + } + if (!strcmp(parent->type, "upa")) { + upa_path_component(dp, tmp_buf); + return; + } + if (!strcmp(parent->type, "ebus")) { + ebus_path_component(dp, tmp_buf); + return; + } + if (!strcmp(parent->name, "usb") || + !strcmp(parent->name, "hub")) { + usb_path_component(dp, tmp_buf); + return; + } + if (!strcmp(parent->type, "i2c")) { + i2c_path_component(dp, tmp_buf); + return; + } + if (!strcmp(parent->type, "firewire")) { + ieee1394_path_component(dp, tmp_buf); + return; + } + if (!strcmp(parent->type, "virtual-devices")) { + vdev_path_component(dp, tmp_buf); + return; + } + /* "isa" is handled with platform naming */ + } + + /* Use platform naming convention. */ + if (tlb_type == hypervisor) { + sun4v_path_component(dp, tmp_buf); + return; + } else { + sun4u_path_component(dp, tmp_buf); + } +} + +static char * __init build_path_component(struct device_node *dp) +{ + char tmp_buf[64], *n; + + tmp_buf[0] = '\0'; + __build_path_component(dp, tmp_buf); + if (tmp_buf[0] == '\0') + strcpy(tmp_buf, dp->name); + + n = prom_early_alloc(strlen(tmp_buf) + 1); + strcpy(n, tmp_buf); + + return n; +} + +static char * __init build_full_name(struct device_node *dp) +{ + int len, ourlen, plen; + char *n; + + plen = strlen(dp->parent->full_name); + ourlen = strlen(dp->path_component_name); + len = ourlen + plen + 2; + + n = prom_early_alloc(len); + strcpy(n, dp->parent->full_name); + if (!is_root_node(dp->parent)) { + strcpy(n + plen, "/"); + plen++; + } + strcpy(n + plen, dp->path_component_name); + + return n; +} + +static unsigned int unique_id; + +static struct property * __init build_one_prop(phandle node, char *prev, char *special_name, void *special_val, int special_len) +{ + static struct property *tmp = NULL; + struct property *p; + + if (tmp) { + p = tmp; + memset(p, 0, sizeof(*p) + 32); + tmp = NULL; + } else { + p = prom_early_alloc(sizeof(struct property) + 32); + p->unique_id = unique_id++; + } + + p->name = (char *) (p + 1); + if (special_name) { + strcpy(p->name, special_name); + p->length = special_len; + p->value = prom_early_alloc(special_len); + memcpy(p->value, special_val, special_len); + } else { + if (prev == NULL) { + prom_firstprop(node, p->name); + } else { + prom_nextprop(node, prev, p->name); + } + if (strlen(p->name) == 0) { + tmp = p; + return NULL; + } + p->length = prom_getproplen(node, p->name); + if (p->length <= 0) { + p->length = 0; + } else { + p->value = prom_early_alloc(p->length + 1); + prom_getproperty(node, p->name, p->value, p->length); + ((unsigned char *)p->value)[p->length] = '\0'; + } + } + return p; +} + +static struct property * __init build_prop_list(phandle node) +{ + struct property *head, *tail; + + head = tail = build_one_prop(node, NULL, + ".node", &node, sizeof(node)); + + tail->next = build_one_prop(node, NULL, NULL, NULL, 0); + tail = tail->next; + while(tail) { + tail->next = build_one_prop(node, tail->name, + NULL, NULL, 0); + tail = tail->next; + } + + return head; +} + +static char * __init get_one_property(phandle node, const char *name) +{ + char *buf = "<NULL>"; + int len; + + len = prom_getproplen(node, name); + if (len > 0) { + buf = prom_early_alloc(len); + prom_getproperty(node, name, buf, len); + } + + return buf; +} + +static struct device_node * __init create_node(phandle node, struct device_node *parent) +{ + struct device_node *dp; + + if (!node) + return NULL; + + dp = prom_early_alloc(sizeof(*dp)); + dp->unique_id = unique_id++; + dp->parent = parent; + + kref_init(&dp->kref); + + dp->name = get_one_property(node, "name"); + dp->type = get_one_property(node, "device_type"); + dp->node = node; + + dp->properties = build_prop_list(node); + + irq_trans_init(dp); + + return dp; +} + +static struct device_node * __init build_tree(struct device_node *parent, phandle node, struct device_node ***nextp) +{ + struct device_node *ret = NULL, *prev_sibling = NULL; + struct device_node *dp; + + while (1) { + dp = create_node(node, parent); + if (!dp) + break; + + if (prev_sibling) + prev_sibling->sibling = dp; + + if (!ret) + ret = dp; + prev_sibling = dp; + + *(*nextp) = dp; + *nextp = &dp->allnext; + + dp->path_component_name = build_path_component(dp); + dp->full_name = build_full_name(dp); + + dp->child = build_tree(dp, prom_getchild(node), nextp); + + node = prom_getsibling(node); + } + + return ret; +} + +static const char *get_mid_prop(void) +{ + return (tlb_type == spitfire ? "upa-portid" : "portid"); +} + +struct device_node *of_find_node_by_cpuid(int cpuid) +{ + struct device_node *dp; + const char *mid_prop = get_mid_prop(); + + for_each_node_by_type(dp, "cpu") { + int id = of_getintprop_default(dp, mid_prop, -1); + const char *this_mid_prop = mid_prop; + + if (id < 0) { + this_mid_prop = "cpuid"; + id = of_getintprop_default(dp, this_mid_prop, -1); + } + + if (id < 0) { + prom_printf("OF: Serious problem, cpu lacks " + "%s property", this_mid_prop); + prom_halt(); + } + if (cpuid == id) + return dp; + } + return NULL; +} + +static void __init of_fill_in_cpu_data(void) +{ + struct device_node *dp; + const char *mid_prop = get_mid_prop(); + + ncpus_probed = 0; + for_each_node_by_type(dp, "cpu") { + int cpuid = of_getintprop_default(dp, mid_prop, -1); + const char *this_mid_prop = mid_prop; + struct device_node *portid_parent; + int portid = -1; + + portid_parent = NULL; + if (cpuid < 0) { + this_mid_prop = "cpuid"; + cpuid = of_getintprop_default(dp, this_mid_prop, -1); + if (cpuid >= 0) { + int limit = 2; + + portid_parent = dp; + while (limit--) { + portid_parent = portid_parent->parent; + if (!portid_parent) + break; + portid = of_getintprop_default(portid_parent, + "portid", -1); + if (portid >= 0) + break; + } + } + } + + if (cpuid < 0) { + prom_printf("OF: Serious problem, cpu lacks " + "%s property", this_mid_prop); + prom_halt(); + } + + ncpus_probed++; + +#ifdef CONFIG_SMP + if (cpuid >= NR_CPUS) { + printk(KERN_WARNING "Ignoring CPU %d which is " + ">= NR_CPUS (%d)\n", + cpuid, NR_CPUS); + continue; + } +#else + /* On uniprocessor we only want the values for the + * real physical cpu the kernel booted onto, however + * cpu_data() only has one entry at index 0. + */ + if (cpuid != real_hard_smp_processor_id()) + continue; + cpuid = 0; +#endif + + cpu_data(cpuid).clock_tick = + of_getintprop_default(dp, "clock-frequency", 0); + + if (portid_parent) { + cpu_data(cpuid).dcache_size = + of_getintprop_default(dp, "l1-dcache-size", + 16 * 1024); + cpu_data(cpuid).dcache_line_size = + of_getintprop_default(dp, "l1-dcache-line-size", + 32); + cpu_data(cpuid).icache_size = + of_getintprop_default(dp, "l1-icache-size", + 8 * 1024); + cpu_data(cpuid).icache_line_size = + of_getintprop_default(dp, "l1-icache-line-size", + 32); + cpu_data(cpuid).ecache_size = + of_getintprop_default(dp, "l2-cache-size", 0); + cpu_data(cpuid).ecache_line_size = + of_getintprop_default(dp, "l2-cache-line-size", 0); + if (!cpu_data(cpuid).ecache_size || + !cpu_data(cpuid).ecache_line_size) { + cpu_data(cpuid).ecache_size = + of_getintprop_default(portid_parent, + "l2-cache-size", + (4 * 1024 * 1024)); + cpu_data(cpuid).ecache_line_size = + of_getintprop_default(portid_parent, + "l2-cache-line-size", 64); + } + + cpu_data(cpuid).core_id = portid + 1; + cpu_data(cpuid).proc_id = portid; +#ifdef CONFIG_SMP + sparc64_multi_core = 1; +#endif + } else { + cpu_data(cpuid).dcache_size = + of_getintprop_default(dp, "dcache-size", 16 * 1024); + cpu_data(cpuid).dcache_line_size = + of_getintprop_default(dp, "dcache-line-size", 32); + + cpu_data(cpuid).icache_size = + of_getintprop_default(dp, "icache-size", 16 * 1024); + cpu_data(cpuid).icache_line_size = + of_getintprop_default(dp, "icache-line-size", 32); + + cpu_data(cpuid).ecache_size = + of_getintprop_default(dp, "ecache-size", + (4 * 1024 * 1024)); + cpu_data(cpuid).ecache_line_size = + of_getintprop_default(dp, "ecache-line-size", 64); + + cpu_data(cpuid).core_id = 0; + cpu_data(cpuid).proc_id = -1; + } + +#ifdef CONFIG_SMP + cpu_set(cpuid, cpu_present_map); + cpu_set(cpuid, cpu_possible_map); +#endif + } + + smp_fill_in_sib_core_maps(); +} + +struct device_node *of_console_device; +EXPORT_SYMBOL(of_console_device); + +char *of_console_path; +EXPORT_SYMBOL(of_console_path); + +char *of_console_options; +EXPORT_SYMBOL(of_console_options); + +static void __init of_console_init(void) +{ + char *msg = "OF stdout device is: %s\n"; + struct device_node *dp; + const char *type; + phandle node; + + of_console_path = prom_early_alloc(256); + if (prom_ihandle2path(prom_stdout, of_console_path, 256) < 0) { + prom_printf("Cannot obtain path of stdout.\n"); + prom_halt(); + } + of_console_options = strrchr(of_console_path, ':'); + if (of_console_options) { + of_console_options++; + if (*of_console_options == '\0') + of_console_options = NULL; + } + + node = prom_inst2pkg(prom_stdout); + if (!node) { + prom_printf("Cannot resolve stdout node from " + "instance %08x.\n", prom_stdout); + prom_halt(); + } + + dp = of_find_node_by_phandle(node); + type = of_get_property(dp, "device_type", NULL); + if (!type) { + prom_printf("Console stdout lacks device_type property.\n"); + prom_halt(); + } + + if (strcmp(type, "display") && strcmp(type, "serial")) { + prom_printf("Console device_type is neither display " + "nor serial.\n"); + prom_halt(); + } + + of_console_device = dp; + + printk(msg, of_console_path); +} + +void __init prom_build_devicetree(void) +{ + struct device_node **nextp; + + allnodes = create_node(prom_root_node, NULL); + allnodes->path_component_name = ""; + allnodes->full_name = "/"; + + nextp = &allnodes->allnext; + allnodes->child = build_tree(allnodes, + prom_getchild(allnodes->node), + &nextp); + of_console_init(); + + printk("PROM: Built device tree with %u bytes of memory.\n", + prom_early_allocated); + + if (tlb_type != hypervisor) + of_fill_in_cpu_data(); +} diff --git a/arch/sparc/kernel/psycho_common.c b/arch/sparc/kernel/psycho_common.c new file mode 100644 index 000000000000..790996428c14 --- /dev/null +++ b/arch/sparc/kernel/psycho_common.c @@ -0,0 +1,470 @@ +/* psycho_common.c: Code common to PSYCHO and derivative PCI controllers. + * + * Copyright (C) 2008 David S. Miller <davem@davemloft.net> + */ +#include <linux/kernel.h> +#include <linux/interrupt.h> + +#include <asm/upa.h> + +#include "pci_impl.h" +#include "iommu_common.h" +#include "psycho_common.h" + +#define PSYCHO_STRBUF_CTRL_DENAB 0x0000000000000002UL +#define PSYCHO_STCERR_WRITE 0x0000000000000002UL +#define PSYCHO_STCERR_READ 0x0000000000000001UL +#define PSYCHO_STCTAG_PPN 0x0fffffff00000000UL +#define PSYCHO_STCTAG_VPN 0x00000000ffffe000UL +#define PSYCHO_STCTAG_VALID 0x0000000000000002UL +#define PSYCHO_STCTAG_WRITE 0x0000000000000001UL +#define PSYCHO_STCLINE_LINDX 0x0000000001e00000UL +#define PSYCHO_STCLINE_SPTR 0x00000000001f8000UL +#define PSYCHO_STCLINE_LADDR 0x0000000000007f00UL +#define PSYCHO_STCLINE_EPTR 0x00000000000000fcUL +#define PSYCHO_STCLINE_VALID 0x0000000000000002UL +#define PSYCHO_STCLINE_FOFN 0x0000000000000001UL + +static DEFINE_SPINLOCK(stc_buf_lock); +static unsigned long stc_error_buf[128]; +static unsigned long stc_tag_buf[16]; +static unsigned long stc_line_buf[16]; + +static void psycho_check_stc_error(struct pci_pbm_info *pbm) +{ + unsigned long err_base, tag_base, line_base; + struct strbuf *strbuf = &pbm->stc; + u64 control; + int i; + + if (!strbuf->strbuf_control) + return; + + err_base = strbuf->strbuf_err_stat; + tag_base = strbuf->strbuf_tag_diag; + line_base = strbuf->strbuf_line_diag; + + spin_lock(&stc_buf_lock); + + /* This is __REALLY__ dangerous. When we put the streaming + * buffer into diagnostic mode to probe it's tags and error + * status, we _must_ clear all of the line tag valid bits + * before re-enabling the streaming buffer. If any dirty data + * lives in the STC when we do this, we will end up + * invalidating it before it has a chance to reach main + * memory. + */ + control = upa_readq(strbuf->strbuf_control); + upa_writeq(control | PSYCHO_STRBUF_CTRL_DENAB, strbuf->strbuf_control); + for (i = 0; i < 128; i++) { + u64 val; + + val = upa_readq(err_base + (i * 8UL)); + upa_writeq(0UL, err_base + (i * 8UL)); + stc_error_buf[i] = val; + } + for (i = 0; i < 16; i++) { + stc_tag_buf[i] = upa_readq(tag_base + (i * 8UL)); + stc_line_buf[i] = upa_readq(line_base + (i * 8UL)); + upa_writeq(0UL, tag_base + (i * 8UL)); + upa_writeq(0UL, line_base + (i * 8UL)); + } + + /* OK, state is logged, exit diagnostic mode. */ + upa_writeq(control, strbuf->strbuf_control); + + for (i = 0; i < 16; i++) { + int j, saw_error, first, last; + + saw_error = 0; + first = i * 8; + last = first + 8; + for (j = first; j < last; j++) { + u64 errval = stc_error_buf[j]; + if (errval != 0) { + saw_error++; + printk(KERN_ERR "%s: STC_ERR(%d)[wr(%d)" + "rd(%d)]\n", + pbm->name, + j, + (errval & PSYCHO_STCERR_WRITE) ? 1 : 0, + (errval & PSYCHO_STCERR_READ) ? 1 : 0); + } + } + if (saw_error != 0) { + u64 tagval = stc_tag_buf[i]; + u64 lineval = stc_line_buf[i]; + printk(KERN_ERR "%s: STC_TAG(%d)[PA(%016lx)VA(%08lx)" + "V(%d)W(%d)]\n", + pbm->name, + i, + ((tagval & PSYCHO_STCTAG_PPN) >> 19UL), + (tagval & PSYCHO_STCTAG_VPN), + ((tagval & PSYCHO_STCTAG_VALID) ? 1 : 0), + ((tagval & PSYCHO_STCTAG_WRITE) ? 1 : 0)); + printk(KERN_ERR "%s: STC_LINE(%d)[LIDX(%lx)SP(%lx)" + "LADDR(%lx)EP(%lx)V(%d)FOFN(%d)]\n", + pbm->name, + i, + ((lineval & PSYCHO_STCLINE_LINDX) >> 21UL), + ((lineval & PSYCHO_STCLINE_SPTR) >> 15UL), + ((lineval & PSYCHO_STCLINE_LADDR) >> 8UL), + ((lineval & PSYCHO_STCLINE_EPTR) >> 2UL), + ((lineval & PSYCHO_STCLINE_VALID) ? 1 : 0), + ((lineval & PSYCHO_STCLINE_FOFN) ? 1 : 0)); + } + } + + spin_unlock(&stc_buf_lock); +} + +#define PSYCHO_IOMMU_TAG 0xa580UL +#define PSYCHO_IOMMU_DATA 0xa600UL + +static void psycho_record_iommu_tags_and_data(struct pci_pbm_info *pbm, + u64 *tag, u64 *data) +{ + int i; + + for (i = 0; i < 16; i++) { + unsigned long base = pbm->controller_regs; + unsigned long off = i * 8UL; + + tag[i] = upa_readq(base + PSYCHO_IOMMU_TAG+off); + data[i] = upa_readq(base + PSYCHO_IOMMU_DATA+off); + + /* Now clear out the entry. */ + upa_writeq(0, base + PSYCHO_IOMMU_TAG + off); + upa_writeq(0, base + PSYCHO_IOMMU_DATA + off); + } +} + +#define PSYCHO_IOMMU_TAG_ERRSTS (0x3UL << 23UL) +#define PSYCHO_IOMMU_TAG_ERR (0x1UL << 22UL) +#define PSYCHO_IOMMU_TAG_WRITE (0x1UL << 21UL) +#define PSYCHO_IOMMU_TAG_STREAM (0x1UL << 20UL) +#define PSYCHO_IOMMU_TAG_SIZE (0x1UL << 19UL) +#define PSYCHO_IOMMU_TAG_VPAGE 0x7ffffUL +#define PSYCHO_IOMMU_DATA_VALID (1UL << 30UL) +#define PSYCHO_IOMMU_DATA_CACHE (1UL << 28UL) +#define PSYCHO_IOMMU_DATA_PPAGE 0xfffffffUL + +static void psycho_dump_iommu_tags_and_data(struct pci_pbm_info *pbm, + u64 *tag, u64 *data) +{ + int i; + + for (i = 0; i < 16; i++) { + u64 tag_val, data_val; + const char *type_str; + tag_val = tag[i]; + if (!(tag_val & PSYCHO_IOMMU_TAG_ERR)) + continue; + + data_val = data[i]; + switch((tag_val & PSYCHO_IOMMU_TAG_ERRSTS) >> 23UL) { + case 0: + type_str = "Protection Error"; + break; + case 1: + type_str = "Invalid Error"; + break; + case 2: + type_str = "TimeOut Error"; + break; + case 3: + default: + type_str = "ECC Error"; + break; + } + + printk(KERN_ERR "%s: IOMMU TAG(%d)[error(%s) wr(%d) " + "str(%d) sz(%dK) vpg(%08lx)]\n", + pbm->name, i, type_str, + ((tag_val & PSYCHO_IOMMU_TAG_WRITE) ? 1 : 0), + ((tag_val & PSYCHO_IOMMU_TAG_STREAM) ? 1 : 0), + ((tag_val & PSYCHO_IOMMU_TAG_SIZE) ? 64 : 8), + (tag_val & PSYCHO_IOMMU_TAG_VPAGE) << IOMMU_PAGE_SHIFT); + printk(KERN_ERR "%s: IOMMU DATA(%d)[valid(%d) cache(%d) " + "ppg(%016lx)]\n", + pbm->name, i, + ((data_val & PSYCHO_IOMMU_DATA_VALID) ? 1 : 0), + ((data_val & PSYCHO_IOMMU_DATA_CACHE) ? 1 : 0), + (data_val & PSYCHO_IOMMU_DATA_PPAGE)<<IOMMU_PAGE_SHIFT); + } +} + +#define PSYCHO_IOMMU_CTRL_XLTESTAT 0x0000000006000000UL +#define PSYCHO_IOMMU_CTRL_XLTEERR 0x0000000001000000UL + +void psycho_check_iommu_error(struct pci_pbm_info *pbm, + unsigned long afsr, + unsigned long afar, + enum psycho_error_type type) +{ + u64 control, iommu_tag[16], iommu_data[16]; + struct iommu *iommu = pbm->iommu; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + control = upa_readq(iommu->iommu_control); + if (control & PSYCHO_IOMMU_CTRL_XLTEERR) { + const char *type_str; + + control &= ~PSYCHO_IOMMU_CTRL_XLTEERR; + upa_writeq(control, iommu->iommu_control); + + switch ((control & PSYCHO_IOMMU_CTRL_XLTESTAT) >> 25UL) { + case 0: + type_str = "Protection Error"; + break; + case 1: + type_str = "Invalid Error"; + break; + case 2: + type_str = "TimeOut Error"; + break; + case 3: + default: + type_str = "ECC Error"; + break; + }; + printk(KERN_ERR "%s: IOMMU Error, type[%s]\n", + pbm->name, type_str); + + /* It is very possible for another DVMA to occur while + * we do this probe, and corrupt the system further. + * But we are so screwed at this point that we are + * likely to crash hard anyways, so get as much + * diagnostic information to the console as we can. + */ + psycho_record_iommu_tags_and_data(pbm, iommu_tag, iommu_data); + psycho_dump_iommu_tags_and_data(pbm, iommu_tag, iommu_data); + } + psycho_check_stc_error(pbm); + spin_unlock_irqrestore(&iommu->lock, flags); +} + +#define PSYCHO_PCICTRL_SBH_ERR 0x0000000800000000UL +#define PSYCHO_PCICTRL_SERR 0x0000000400000000UL + +static irqreturn_t psycho_pcierr_intr_other(struct pci_pbm_info *pbm) +{ + irqreturn_t ret = IRQ_NONE; + u64 csr, csr_error_bits; + u16 stat, *addr; + + csr = upa_readq(pbm->pci_csr); + csr_error_bits = csr & (PSYCHO_PCICTRL_SBH_ERR | PSYCHO_PCICTRL_SERR); + if (csr_error_bits) { + /* Clear the errors. */ + upa_writeq(csr, pbm->pci_csr); + + /* Log 'em. */ + if (csr_error_bits & PSYCHO_PCICTRL_SBH_ERR) + printk(KERN_ERR "%s: PCI streaming byte hole " + "error asserted.\n", pbm->name); + if (csr_error_bits & PSYCHO_PCICTRL_SERR) + printk(KERN_ERR "%s: PCI SERR signal asserted.\n", + pbm->name); + ret = IRQ_HANDLED; + } + addr = psycho_pci_config_mkaddr(pbm, pbm->pci_first_busno, + 0, PCI_STATUS); + pci_config_read16(addr, &stat); + if (stat & (PCI_STATUS_PARITY | + PCI_STATUS_SIG_TARGET_ABORT | + PCI_STATUS_REC_TARGET_ABORT | + PCI_STATUS_REC_MASTER_ABORT | + PCI_STATUS_SIG_SYSTEM_ERROR)) { + printk(KERN_ERR "%s: PCI bus error, PCI_STATUS[%04x]\n", + pbm->name, stat); + pci_config_write16(addr, 0xffff); + ret = IRQ_HANDLED; + } + return ret; +} + +#define PSYCHO_PCIAFSR_PMA 0x8000000000000000UL +#define PSYCHO_PCIAFSR_PTA 0x4000000000000000UL +#define PSYCHO_PCIAFSR_PRTRY 0x2000000000000000UL +#define PSYCHO_PCIAFSR_PPERR 0x1000000000000000UL +#define PSYCHO_PCIAFSR_SMA 0x0800000000000000UL +#define PSYCHO_PCIAFSR_STA 0x0400000000000000UL +#define PSYCHO_PCIAFSR_SRTRY 0x0200000000000000UL +#define PSYCHO_PCIAFSR_SPERR 0x0100000000000000UL +#define PSYCHO_PCIAFSR_RESV1 0x00ff000000000000UL +#define PSYCHO_PCIAFSR_BMSK 0x0000ffff00000000UL +#define PSYCHO_PCIAFSR_BLK 0x0000000080000000UL +#define PSYCHO_PCIAFSR_RESV2 0x0000000040000000UL +#define PSYCHO_PCIAFSR_MID 0x000000003e000000UL +#define PSYCHO_PCIAFSR_RESV3 0x0000000001ffffffUL + +irqreturn_t psycho_pcierr_intr(int irq, void *dev_id) +{ + struct pci_pbm_info *pbm = dev_id; + u64 afsr, afar, error_bits; + int reported; + + afsr = upa_readq(pbm->pci_afsr); + afar = upa_readq(pbm->pci_afar); + error_bits = afsr & + (PSYCHO_PCIAFSR_PMA | PSYCHO_PCIAFSR_PTA | + PSYCHO_PCIAFSR_PRTRY | PSYCHO_PCIAFSR_PPERR | + PSYCHO_PCIAFSR_SMA | PSYCHO_PCIAFSR_STA | + PSYCHO_PCIAFSR_SRTRY | PSYCHO_PCIAFSR_SPERR); + if (!error_bits) + return psycho_pcierr_intr_other(pbm); + upa_writeq(error_bits, pbm->pci_afsr); + printk(KERN_ERR "%s: PCI Error, primary error type[%s]\n", + pbm->name, + (((error_bits & PSYCHO_PCIAFSR_PMA) ? + "Master Abort" : + ((error_bits & PSYCHO_PCIAFSR_PTA) ? + "Target Abort" : + ((error_bits & PSYCHO_PCIAFSR_PRTRY) ? + "Excessive Retries" : + ((error_bits & PSYCHO_PCIAFSR_PPERR) ? + "Parity Error" : "???")))))); + printk(KERN_ERR "%s: bytemask[%04lx] UPA_MID[%02lx] was_block(%d)\n", + pbm->name, + (afsr & PSYCHO_PCIAFSR_BMSK) >> 32UL, + (afsr & PSYCHO_PCIAFSR_MID) >> 25UL, + (afsr & PSYCHO_PCIAFSR_BLK) ? 1 : 0); + printk(KERN_ERR "%s: PCI AFAR [%016lx]\n", pbm->name, afar); + printk(KERN_ERR "%s: PCI Secondary errors [", pbm->name); + reported = 0; + if (afsr & PSYCHO_PCIAFSR_SMA) { + reported++; + printk("(Master Abort)"); + } + if (afsr & PSYCHO_PCIAFSR_STA) { + reported++; + printk("(Target Abort)"); + } + if (afsr & PSYCHO_PCIAFSR_SRTRY) { + reported++; + printk("(Excessive Retries)"); + } + if (afsr & PSYCHO_PCIAFSR_SPERR) { + reported++; + printk("(Parity Error)"); + } + if (!reported) + printk("(none)"); + printk("]\n"); + + if (error_bits & (PSYCHO_PCIAFSR_PTA | PSYCHO_PCIAFSR_STA)) { + psycho_check_iommu_error(pbm, afsr, afar, PCI_ERR); + pci_scan_for_target_abort(pbm, pbm->pci_bus); + } + if (error_bits & (PSYCHO_PCIAFSR_PMA | PSYCHO_PCIAFSR_SMA)) + pci_scan_for_master_abort(pbm, pbm->pci_bus); + + if (error_bits & (PSYCHO_PCIAFSR_PPERR | PSYCHO_PCIAFSR_SPERR)) + pci_scan_for_parity_error(pbm, pbm->pci_bus); + + return IRQ_HANDLED; +} + +static void psycho_iommu_flush(struct pci_pbm_info *pbm) +{ + int i; + + for (i = 0; i < 16; i++) { + unsigned long off = i * 8; + + upa_writeq(0, pbm->controller_regs + PSYCHO_IOMMU_TAG + off); + upa_writeq(0, pbm->controller_regs + PSYCHO_IOMMU_DATA + off); + } +} + +#define PSYCHO_IOMMU_CONTROL 0x0200UL +#define PSYCHO_IOMMU_CTRL_TSBSZ 0x0000000000070000UL +#define PSYCHO_IOMMU_TSBSZ_1K 0x0000000000000000UL +#define PSYCHO_IOMMU_TSBSZ_2K 0x0000000000010000UL +#define PSYCHO_IOMMU_TSBSZ_4K 0x0000000000020000UL +#define PSYCHO_IOMMU_TSBSZ_8K 0x0000000000030000UL +#define PSYCHO_IOMMU_TSBSZ_16K 0x0000000000040000UL +#define PSYCHO_IOMMU_TSBSZ_32K 0x0000000000050000UL +#define PSYCHO_IOMMU_TSBSZ_64K 0x0000000000060000UL +#define PSYCHO_IOMMU_TSBSZ_128K 0x0000000000070000UL +#define PSYCHO_IOMMU_CTRL_TBWSZ 0x0000000000000004UL +#define PSYCHO_IOMMU_CTRL_DENAB 0x0000000000000002UL +#define PSYCHO_IOMMU_CTRL_ENAB 0x0000000000000001UL +#define PSYCHO_IOMMU_FLUSH 0x0210UL +#define PSYCHO_IOMMU_TSBBASE 0x0208UL + +int psycho_iommu_init(struct pci_pbm_info *pbm, int tsbsize, + u32 dvma_offset, u32 dma_mask, + unsigned long write_complete_offset) +{ + struct iommu *iommu = pbm->iommu; + u64 control; + int err; + + iommu->iommu_control = pbm->controller_regs + PSYCHO_IOMMU_CONTROL; + iommu->iommu_tsbbase = pbm->controller_regs + PSYCHO_IOMMU_TSBBASE; + iommu->iommu_flush = pbm->controller_regs + PSYCHO_IOMMU_FLUSH; + iommu->iommu_tags = pbm->controller_regs + PSYCHO_IOMMU_TAG; + iommu->write_complete_reg = (pbm->controller_regs + + write_complete_offset); + + iommu->iommu_ctxflush = 0; + + control = upa_readq(iommu->iommu_control); + control |= PSYCHO_IOMMU_CTRL_DENAB; + upa_writeq(control, iommu->iommu_control); + + psycho_iommu_flush(pbm); + + /* Leave diag mode enabled for full-flushing done in pci_iommu.c */ + err = iommu_table_init(iommu, tsbsize * 1024 * 8, + dvma_offset, dma_mask, pbm->numa_node); + if (err) + return err; + + upa_writeq(__pa(iommu->page_table), iommu->iommu_tsbbase); + + control = upa_readq(iommu->iommu_control); + control &= ~(PSYCHO_IOMMU_CTRL_TSBSZ | PSYCHO_IOMMU_CTRL_TBWSZ); + control |= PSYCHO_IOMMU_CTRL_ENAB; + + switch (tsbsize) { + case 64: + control |= PSYCHO_IOMMU_TSBSZ_64K; + break; + case 128: + control |= PSYCHO_IOMMU_TSBSZ_128K; + break; + default: + return -EINVAL; + } + + upa_writeq(control, iommu->iommu_control); + + return 0; + +} + +void psycho_pbm_init_common(struct pci_pbm_info *pbm, struct of_device *op, + const char *chip_name, int chip_type) +{ + struct device_node *dp = op->node; + + pbm->name = dp->full_name; + pbm->numa_node = -1; + pbm->chip_type = chip_type; + pbm->chip_version = of_getintprop_default(dp, "version#", 0); + pbm->chip_revision = of_getintprop_default(dp, "module-revision#", 0); + pbm->op = op; + pbm->pci_ops = &sun4u_pci_ops; + pbm->config_space_reg_bits = 8; + pbm->index = pci_num_pbms++; + pci_get_pbm_props(pbm); + pci_determine_mem_io_space(pbm); + + printk(KERN_INFO "%s: %s PCI Bus Module ver[%x:%x]\n", + pbm->name, chip_name, + pbm->chip_version, pbm->chip_revision); +} diff --git a/arch/sparc/kernel/psycho_common.h b/arch/sparc/kernel/psycho_common.h new file mode 100644 index 000000000000..092c278ef28d --- /dev/null +++ b/arch/sparc/kernel/psycho_common.h @@ -0,0 +1,48 @@ +#ifndef _PSYCHO_COMMON_H +#define _PSYCHO_COMMON_H + +/* U2P Programmer's Manual, page 13-55, configuration space + * address format: + * + * 32 24 23 16 15 11 10 8 7 2 1 0 + * --------------------------------------------------------- + * |0 0 0 0 0 0 0 0 1| bus | device | function | reg | 0 0 | + * --------------------------------------------------------- + */ +#define PSYCHO_CONFIG_BASE(PBM) \ + ((PBM)->config_space | (1UL << 24)) +#define PSYCHO_CONFIG_ENCODE(BUS, DEVFN, REG) \ + (((unsigned long)(BUS) << 16) | \ + ((unsigned long)(DEVFN) << 8) | \ + ((unsigned long)(REG))) + +static inline void *psycho_pci_config_mkaddr(struct pci_pbm_info *pbm, + unsigned char bus, + unsigned int devfn, + int where) +{ + return (void *) + (PSYCHO_CONFIG_BASE(pbm) | + PSYCHO_CONFIG_ENCODE(bus, devfn, where)); +} + +enum psycho_error_type { + UE_ERR, CE_ERR, PCI_ERR +}; + +extern void psycho_check_iommu_error(struct pci_pbm_info *pbm, + unsigned long afsr, + unsigned long afar, + enum psycho_error_type type); + +extern irqreturn_t psycho_pcierr_intr(int irq, void *dev_id); + +extern int psycho_iommu_init(struct pci_pbm_info *pbm, int tsbsize, + u32 dvma_offset, u32 dma_mask, + unsigned long write_complete_offset); + +extern void psycho_pbm_init_common(struct pci_pbm_info *pbm, + struct of_device *op, + const char *chip_name, int chip_type); + +#endif /* _PSYCHO_COMMON_H */ diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c new file mode 100644 index 000000000000..a941c610e7ce --- /dev/null +++ b/arch/sparc/kernel/ptrace_64.c @@ -0,0 +1,1090 @@ +/* ptrace.c: Sparc process tracing support. + * + * Copyright (C) 1996, 2008 David S. Miller (davem@davemloft.net) + * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * + * Based upon code written by Ross Biro, Linus Torvalds, Bob Manson, + * and David Mosberger. + * + * Added Linux support -miguel (weird, eh?, the original code was meant + * to emulate SunOS). + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/security.h> +#include <linux/seccomp.h> +#include <linux/audit.h> +#include <linux/signal.h> +#include <linux/regset.h> +#include <linux/tracehook.h> +#include <linux/compat.h> +#include <linux/elf.h> + +#include <asm/asi.h> +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/psrcompat.h> +#include <asm/visasm.h> +#include <asm/spitfire.h> +#include <asm/page.h> +#include <asm/cpudata.h> +#include <asm/cacheflush.h> + +#include "entry.h" + +/* #define ALLOW_INIT_TRACING */ + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure single step bits etc are not set. + */ +void ptrace_disable(struct task_struct *child) +{ + /* nothing to do */ +} + +/* To get the necessary page struct, access_process_vm() first calls + * get_user_pages(). This has done a flush_dcache_page() on the + * accessed page. Then our caller (copy_{to,from}_user_page()) did + * to memcpy to read/write the data from that page. + * + * Now, the only thing we have to do is: + * 1) flush the D-cache if it's possible than an illegal alias + * has been created + * 2) flush the I-cache if this is pre-cheetah and we did a write + */ +void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *kaddr, + unsigned long len, int write) +{ + BUG_ON(len > PAGE_SIZE); + + if (tlb_type == hypervisor) + return; + + preempt_disable(); + +#ifdef DCACHE_ALIASING_POSSIBLE + /* If bit 13 of the kernel address we used to access the + * user page is the same as the virtual address that page + * is mapped to in the user's address space, we can skip the + * D-cache flush. + */ + if ((uaddr ^ (unsigned long) kaddr) & (1UL << 13)) { + unsigned long start = __pa(kaddr); + unsigned long end = start + len; + unsigned long dcache_line_size; + + dcache_line_size = local_cpu_data().dcache_line_size; + + if (tlb_type == spitfire) { + for (; start < end; start += dcache_line_size) + spitfire_put_dcache_tag(start & 0x3fe0, 0x0); + } else { + start &= ~(dcache_line_size - 1); + for (; start < end; start += dcache_line_size) + __asm__ __volatile__( + "stxa %%g0, [%0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (start), + "i" (ASI_DCACHE_INVALIDATE)); + } + } +#endif + if (write && tlb_type == spitfire) { + unsigned long start = (unsigned long) kaddr; + unsigned long end = start + len; + unsigned long icache_line_size; + + icache_line_size = local_cpu_data().icache_line_size; + + for (; start < end; start += icache_line_size) + flushi(start); + } + + preempt_enable(); +} + +static int get_from_target(struct task_struct *target, unsigned long uaddr, + void *kbuf, int len) +{ + if (target == current) { + if (copy_from_user(kbuf, (void __user *) uaddr, len)) + return -EFAULT; + } else { + int len2 = access_process_vm(target, uaddr, kbuf, len, 0); + if (len2 != len) + return -EFAULT; + } + return 0; +} + +static int set_to_target(struct task_struct *target, unsigned long uaddr, + void *kbuf, int len) +{ + if (target == current) { + if (copy_to_user((void __user *) uaddr, kbuf, len)) + return -EFAULT; + } else { + int len2 = access_process_vm(target, uaddr, kbuf, len, 1); + if (len2 != len) + return -EFAULT; + } + return 0; +} + +static int regwindow64_get(struct task_struct *target, + const struct pt_regs *regs, + struct reg_window *wbuf) +{ + unsigned long rw_addr = regs->u_regs[UREG_I6]; + + if (test_tsk_thread_flag(current, TIF_32BIT)) { + struct reg_window32 win32; + int i; + + if (get_from_target(target, rw_addr, &win32, sizeof(win32))) + return -EFAULT; + for (i = 0; i < 8; i++) + wbuf->locals[i] = win32.locals[i]; + for (i = 0; i < 8; i++) + wbuf->ins[i] = win32.ins[i]; + } else { + rw_addr += STACK_BIAS; + if (get_from_target(target, rw_addr, wbuf, sizeof(*wbuf))) + return -EFAULT; + } + + return 0; +} + +static int regwindow64_set(struct task_struct *target, + const struct pt_regs *regs, + struct reg_window *wbuf) +{ + unsigned long rw_addr = regs->u_regs[UREG_I6]; + + if (test_tsk_thread_flag(current, TIF_32BIT)) { + struct reg_window32 win32; + int i; + + for (i = 0; i < 8; i++) + win32.locals[i] = wbuf->locals[i]; + for (i = 0; i < 8; i++) + win32.ins[i] = wbuf->ins[i]; + + if (set_to_target(target, rw_addr, &win32, sizeof(win32))) + return -EFAULT; + } else { + rw_addr += STACK_BIAS; + if (set_to_target(target, rw_addr, wbuf, sizeof(*wbuf))) + return -EFAULT; + } + + return 0; +} + +enum sparc_regset { + REGSET_GENERAL, + REGSET_FP, +}; + +static int genregs64_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const struct pt_regs *regs = task_pt_regs(target); + int ret; + + if (target == current) + flushw_user(); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs->u_regs, + 0, 16 * sizeof(u64)); + if (!ret && count && pos < (32 * sizeof(u64))) { + struct reg_window window; + + if (regwindow64_get(target, regs, &window)) + return -EFAULT; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &window, + 16 * sizeof(u64), + 32 * sizeof(u64)); + } + + if (!ret) { + /* TSTATE, TPC, TNPC */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ®s->tstate, + 32 * sizeof(u64), + 35 * sizeof(u64)); + } + + if (!ret) { + unsigned long y = regs->y; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &y, + 35 * sizeof(u64), + 36 * sizeof(u64)); + } + + if (!ret) { + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + 36 * sizeof(u64), -1); + + } + return ret; +} + +static int genregs64_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + int ret; + + if (target == current) + flushw_user(); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs->u_regs, + 0, 16 * sizeof(u64)); + if (!ret && count && pos < (32 * sizeof(u64))) { + struct reg_window window; + + if (regwindow64_get(target, regs, &window)) + return -EFAULT; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &window, + 16 * sizeof(u64), + 32 * sizeof(u64)); + + if (!ret && + regwindow64_set(target, regs, &window)) + return -EFAULT; + } + + if (!ret && count > 0) { + unsigned long tstate; + + /* TSTATE */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &tstate, + 32 * sizeof(u64), + 33 * sizeof(u64)); + if (!ret) { + /* Only the condition codes and the "in syscall" + * state can be modified in the %tstate register. + */ + tstate &= (TSTATE_ICC | TSTATE_XCC | TSTATE_SYSCALL); + regs->tstate &= ~(TSTATE_ICC | TSTATE_XCC | TSTATE_SYSCALL); + regs->tstate |= tstate; + } + } + + if (!ret) { + /* TPC, TNPC */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->tpc, + 33 * sizeof(u64), + 35 * sizeof(u64)); + } + + if (!ret) { + unsigned long y; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &y, + 35 * sizeof(u64), + 36 * sizeof(u64)); + if (!ret) + regs->y = y; + } + + if (!ret) + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + 36 * sizeof(u64), -1); + + return ret; +} + +static int fpregs64_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const unsigned long *fpregs = task_thread_info(target)->fpregs; + unsigned long fprs, fsr, gsr; + int ret; + + if (target == current) + save_and_clear_fpu(); + + fprs = task_thread_info(target)->fpsaved[0]; + + if (fprs & FPRS_DL) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + fpregs, + 0, 16 * sizeof(u64)); + else + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + 0, + 16 * sizeof(u64)); + + if (!ret) { + if (fprs & FPRS_DU) + ret = user_regset_copyout(&pos, &count, + &kbuf, &ubuf, + fpregs + 16, + 16 * sizeof(u64), + 32 * sizeof(u64)); + else + ret = user_regset_copyout_zero(&pos, &count, + &kbuf, &ubuf, + 16 * sizeof(u64), + 32 * sizeof(u64)); + } + + if (fprs & FPRS_FEF) { + fsr = task_thread_info(target)->xfsr[0]; + gsr = task_thread_info(target)->gsr[0]; + } else { + fsr = gsr = 0; + } + + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &fsr, + 32 * sizeof(u64), + 33 * sizeof(u64)); + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &gsr, + 33 * sizeof(u64), + 34 * sizeof(u64)); + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &fprs, + 34 * sizeof(u64), + 35 * sizeof(u64)); + + if (!ret) + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + 35 * sizeof(u64), -1); + + return ret; +} + +static int fpregs64_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned long *fpregs = task_thread_info(target)->fpregs; + unsigned long fprs; + int ret; + + if (target == current) + save_and_clear_fpu(); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + fpregs, + 0, 32 * sizeof(u64)); + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + task_thread_info(target)->xfsr, + 32 * sizeof(u64), + 33 * sizeof(u64)); + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + task_thread_info(target)->gsr, + 33 * sizeof(u64), + 34 * sizeof(u64)); + + fprs = task_thread_info(target)->fpsaved[0]; + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &fprs, + 34 * sizeof(u64), + 35 * sizeof(u64)); + } + + fprs |= (FPRS_FEF | FPRS_DL | FPRS_DU); + task_thread_info(target)->fpsaved[0] = fprs; + + if (!ret) + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + 35 * sizeof(u64), -1); + return ret; +} + +static const struct user_regset sparc64_regsets[] = { + /* Format is: + * G0 --> G7 + * O0 --> O7 + * L0 --> L7 + * I0 --> I7 + * TSTATE, TPC, TNPC, Y + */ + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = 36, + .size = sizeof(u64), .align = sizeof(u64), + .get = genregs64_get, .set = genregs64_set + }, + /* Format is: + * F0 --> F63 + * FSR + * GSR + * FPRS + */ + [REGSET_FP] = { + .core_note_type = NT_PRFPREG, + .n = 35, + .size = sizeof(u64), .align = sizeof(u64), + .get = fpregs64_get, .set = fpregs64_set + }, +}; + +static const struct user_regset_view user_sparc64_view = { + .name = "sparc64", .e_machine = EM_SPARCV9, + .regsets = sparc64_regsets, .n = ARRAY_SIZE(sparc64_regsets) +}; + +#ifdef CONFIG_COMPAT +static int genregs32_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const struct pt_regs *regs = task_pt_regs(target); + compat_ulong_t __user *reg_window; + compat_ulong_t *k = kbuf; + compat_ulong_t __user *u = ubuf; + compat_ulong_t reg; + + if (target == current) + flushw_user(); + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) { + for (; count > 0 && pos < 16; count--) + *k++ = regs->u_regs[pos++]; + + reg_window = (compat_ulong_t __user *) regs->u_regs[UREG_I6]; + if (target == current) { + for (; count > 0 && pos < 32; count--) { + if (get_user(*k++, ®_window[pos++])) + return -EFAULT; + } + } else { + for (; count > 0 && pos < 32; count--) { + if (access_process_vm(target, + (unsigned long) + ®_window[pos], + k, sizeof(*k), 0) + != sizeof(*k)) + return -EFAULT; + k++; + pos++; + } + } + } else { + for (; count > 0 && pos < 16; count--) { + if (put_user((compat_ulong_t) regs->u_regs[pos++], u++)) + return -EFAULT; + } + + reg_window = (compat_ulong_t __user *) regs->u_regs[UREG_I6]; + if (target == current) { + for (; count > 0 && pos < 32; count--) { + if (get_user(reg, ®_window[pos++]) || + put_user(reg, u++)) + return -EFAULT; + } + } else { + for (; count > 0 && pos < 32; count--) { + if (access_process_vm(target, + (unsigned long) + ®_window[pos], + ®, sizeof(reg), 0) + != sizeof(reg)) + return -EFAULT; + if (access_process_vm(target, + (unsigned long) u, + ®, sizeof(reg), 1) + != sizeof(reg)) + return -EFAULT; + pos++; + u++; + } + } + } + while (count > 0) { + switch (pos) { + case 32: /* PSR */ + reg = tstate_to_psr(regs->tstate); + break; + case 33: /* PC */ + reg = regs->tpc; + break; + case 34: /* NPC */ + reg = regs->tnpc; + break; + case 35: /* Y */ + reg = regs->y; + break; + case 36: /* WIM */ + case 37: /* TBR */ + reg = 0; + break; + default: + goto finish; + } + + if (kbuf) + *k++ = reg; + else if (put_user(reg, u++)) + return -EFAULT; + pos++; + count--; + } +finish: + pos *= sizeof(reg); + count *= sizeof(reg); + + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + 38 * sizeof(reg), -1); +} + +static int genregs32_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + compat_ulong_t __user *reg_window; + const compat_ulong_t *k = kbuf; + const compat_ulong_t __user *u = ubuf; + compat_ulong_t reg; + + if (target == current) + flushw_user(); + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) { + for (; count > 0 && pos < 16; count--) + regs->u_regs[pos++] = *k++; + + reg_window = (compat_ulong_t __user *) regs->u_regs[UREG_I6]; + if (target == current) { + for (; count > 0 && pos < 32; count--) { + if (put_user(*k++, ®_window[pos++])) + return -EFAULT; + } + } else { + for (; count > 0 && pos < 32; count--) { + if (access_process_vm(target, + (unsigned long) + ®_window[pos], + (void *) k, + sizeof(*k), 1) + != sizeof(*k)) + return -EFAULT; + k++; + pos++; + } + } + } else { + for (; count > 0 && pos < 16; count--) { + if (get_user(reg, u++)) + return -EFAULT; + regs->u_regs[pos++] = reg; + } + + reg_window = (compat_ulong_t __user *) regs->u_regs[UREG_I6]; + if (target == current) { + for (; count > 0 && pos < 32; count--) { + if (get_user(reg, u++) || + put_user(reg, ®_window[pos++])) + return -EFAULT; + } + } else { + for (; count > 0 && pos < 32; count--) { + if (access_process_vm(target, + (unsigned long) + u, + ®, sizeof(reg), 0) + != sizeof(reg)) + return -EFAULT; + if (access_process_vm(target, + (unsigned long) + ®_window[pos], + ®, sizeof(reg), 1) + != sizeof(reg)) + return -EFAULT; + pos++; + u++; + } + } + } + while (count > 0) { + unsigned long tstate; + + if (kbuf) + reg = *k++; + else if (get_user(reg, u++)) + return -EFAULT; + + switch (pos) { + case 32: /* PSR */ + tstate = regs->tstate; + tstate &= ~(TSTATE_ICC | TSTATE_XCC | TSTATE_SYSCALL); + tstate |= psr_to_tstate_icc(reg); + if (reg & PSR_SYSCALL) + tstate |= TSTATE_SYSCALL; + regs->tstate = tstate; + break; + case 33: /* PC */ + regs->tpc = reg; + break; + case 34: /* NPC */ + regs->tnpc = reg; + break; + case 35: /* Y */ + regs->y = reg; + break; + case 36: /* WIM */ + case 37: /* TBR */ + break; + default: + goto finish; + } + + pos++; + count--; + } +finish: + pos *= sizeof(reg); + count *= sizeof(reg); + + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + 38 * sizeof(reg), -1); +} + +static int fpregs32_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const unsigned long *fpregs = task_thread_info(target)->fpregs; + compat_ulong_t enabled; + unsigned long fprs; + compat_ulong_t fsr; + int ret = 0; + + if (target == current) + save_and_clear_fpu(); + + fprs = task_thread_info(target)->fpsaved[0]; + if (fprs & FPRS_FEF) { + fsr = task_thread_info(target)->xfsr[0]; + enabled = 1; + } else { + fsr = 0; + enabled = 0; + } + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + fpregs, + 0, 32 * sizeof(u32)); + + if (!ret) + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + 32 * sizeof(u32), + 33 * sizeof(u32)); + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &fsr, + 33 * sizeof(u32), + 34 * sizeof(u32)); + + if (!ret) { + compat_ulong_t val; + + val = (enabled << 8) | (8 << 16); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &val, + 34 * sizeof(u32), + 35 * sizeof(u32)); + } + + if (!ret) + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + 35 * sizeof(u32), -1); + + return ret; +} + +static int fpregs32_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned long *fpregs = task_thread_info(target)->fpregs; + unsigned long fprs; + int ret; + + if (target == current) + save_and_clear_fpu(); + + fprs = task_thread_info(target)->fpsaved[0]; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + fpregs, + 0, 32 * sizeof(u32)); + if (!ret) + user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + 32 * sizeof(u32), + 33 * sizeof(u32)); + if (!ret && count > 0) { + compat_ulong_t fsr; + unsigned long val; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &fsr, + 33 * sizeof(u32), + 34 * sizeof(u32)); + if (!ret) { + val = task_thread_info(target)->xfsr[0]; + val &= 0xffffffff00000000UL; + val |= fsr; + task_thread_info(target)->xfsr[0] = val; + } + } + + fprs |= (FPRS_FEF | FPRS_DL); + task_thread_info(target)->fpsaved[0] = fprs; + + if (!ret) + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + 34 * sizeof(u32), -1); + return ret; +} + +static const struct user_regset sparc32_regsets[] = { + /* Format is: + * G0 --> G7 + * O0 --> O7 + * L0 --> L7 + * I0 --> I7 + * PSR, PC, nPC, Y, WIM, TBR + */ + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = 38, + .size = sizeof(u32), .align = sizeof(u32), + .get = genregs32_get, .set = genregs32_set + }, + /* Format is: + * F0 --> F31 + * empty 32-bit word + * FSR (32--bit word) + * FPU QUEUE COUNT (8-bit char) + * FPU QUEUE ENTRYSIZE (8-bit char) + * FPU ENABLED (8-bit char) + * empty 8-bit char + * FPU QUEUE (64 32-bit ints) + */ + [REGSET_FP] = { + .core_note_type = NT_PRFPREG, + .n = 99, + .size = sizeof(u32), .align = sizeof(u32), + .get = fpregs32_get, .set = fpregs32_set + }, +}; + +static const struct user_regset_view user_sparc32_view = { + .name = "sparc", .e_machine = EM_SPARC, + .regsets = sparc32_regsets, .n = ARRAY_SIZE(sparc32_regsets) +}; +#endif /* CONFIG_COMPAT */ + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_32BIT)) + return &user_sparc32_view; +#endif + return &user_sparc64_view; +} + +#ifdef CONFIG_COMPAT +struct compat_fps { + unsigned int regs[32]; + unsigned int fsr; + unsigned int flags; + unsigned int extra; + unsigned int fpqd; + struct compat_fq { + unsigned int insnaddr; + unsigned int insn; + } fpq[16]; +}; + +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) +{ + const struct user_regset_view *view = task_user_regset_view(current); + compat_ulong_t caddr2 = task_pt_regs(current)->u_regs[UREG_I4]; + struct pt_regs32 __user *pregs; + struct compat_fps __user *fps; + unsigned long addr2 = caddr2; + unsigned long addr = caddr; + unsigned long data = cdata; + int ret; + + pregs = (struct pt_regs32 __user *) addr; + fps = (struct compat_fps __user *) addr; + + switch (request) { + case PTRACE_PEEKUSR: + ret = (addr != 0) ? -EIO : 0; + break; + + case PTRACE_GETREGS: + ret = copy_regset_to_user(child, view, REGSET_GENERAL, + 32 * sizeof(u32), + 4 * sizeof(u32), + &pregs->psr); + if (!ret) + ret = copy_regset_to_user(child, view, REGSET_GENERAL, + 1 * sizeof(u32), + 15 * sizeof(u32), + &pregs->u_regs[0]); + break; + + case PTRACE_SETREGS: + ret = copy_regset_from_user(child, view, REGSET_GENERAL, + 32 * sizeof(u32), + 4 * sizeof(u32), + &pregs->psr); + if (!ret) + ret = copy_regset_from_user(child, view, REGSET_GENERAL, + 1 * sizeof(u32), + 15 * sizeof(u32), + &pregs->u_regs[0]); + break; + + case PTRACE_GETFPREGS: + ret = copy_regset_to_user(child, view, REGSET_FP, + 0 * sizeof(u32), + 32 * sizeof(u32), + &fps->regs[0]); + if (!ret) + ret = copy_regset_to_user(child, view, REGSET_FP, + 33 * sizeof(u32), + 1 * sizeof(u32), + &fps->fsr); + if (!ret) { + if (__put_user(0, &fps->flags) || + __put_user(0, &fps->extra) || + __put_user(0, &fps->fpqd) || + clear_user(&fps->fpq[0], 32 * sizeof(unsigned int))) + ret = -EFAULT; + } + break; + + case PTRACE_SETFPREGS: + ret = copy_regset_from_user(child, view, REGSET_FP, + 0 * sizeof(u32), + 32 * sizeof(u32), + &fps->regs[0]); + if (!ret) + ret = copy_regset_from_user(child, view, REGSET_FP, + 33 * sizeof(u32), + 1 * sizeof(u32), + &fps->fsr); + break; + + case PTRACE_READTEXT: + case PTRACE_READDATA: + ret = ptrace_readdata(child, addr, + (char __user *)addr2, data); + if (ret == data) + ret = 0; + else if (ret >= 0) + ret = -EIO; + break; + + case PTRACE_WRITETEXT: + case PTRACE_WRITEDATA: + ret = ptrace_writedata(child, (char __user *) addr2, + addr, data); + if (ret == data) + ret = 0; + else if (ret >= 0) + ret = -EIO; + break; + + default: + if (request == PTRACE_SPARC_DETACH) + request = PTRACE_DETACH; + ret = compat_ptrace_request(child, request, addr, data); + break; + } + + return ret; +} +#endif /* CONFIG_COMPAT */ + +struct fps { + unsigned int regs[64]; + unsigned long fsr; +}; + +long arch_ptrace(struct task_struct *child, long request, long addr, long data) +{ + const struct user_regset_view *view = task_user_regset_view(current); + unsigned long addr2 = task_pt_regs(current)->u_regs[UREG_I4]; + struct pt_regs __user *pregs; + struct fps __user *fps; + int ret; + + pregs = (struct pt_regs __user *) (unsigned long) addr; + fps = (struct fps __user *) (unsigned long) addr; + + switch (request) { + case PTRACE_PEEKUSR: + ret = (addr != 0) ? -EIO : 0; + break; + + case PTRACE_GETREGS64: + ret = copy_regset_to_user(child, view, REGSET_GENERAL, + 1 * sizeof(u64), + 15 * sizeof(u64), + &pregs->u_regs[0]); + if (!ret) { + /* XXX doesn't handle 'y' register correctly XXX */ + ret = copy_regset_to_user(child, view, REGSET_GENERAL, + 32 * sizeof(u64), + 4 * sizeof(u64), + &pregs->tstate); + } + break; + + case PTRACE_SETREGS64: + ret = copy_regset_from_user(child, view, REGSET_GENERAL, + 1 * sizeof(u64), + 15 * sizeof(u64), + &pregs->u_regs[0]); + if (!ret) { + /* XXX doesn't handle 'y' register correctly XXX */ + ret = copy_regset_from_user(child, view, REGSET_GENERAL, + 32 * sizeof(u64), + 4 * sizeof(u64), + &pregs->tstate); + } + break; + + case PTRACE_GETFPREGS64: + ret = copy_regset_to_user(child, view, REGSET_FP, + 0 * sizeof(u64), + 33 * sizeof(u64), + fps); + break; + + case PTRACE_SETFPREGS64: + ret = copy_regset_from_user(child, view, REGSET_FP, + 0 * sizeof(u64), + 33 * sizeof(u64), + fps); + break; + + case PTRACE_READTEXT: + case PTRACE_READDATA: + ret = ptrace_readdata(child, addr, + (char __user *)addr2, data); + if (ret == data) + ret = 0; + else if (ret >= 0) + ret = -EIO; + break; + + case PTRACE_WRITETEXT: + case PTRACE_WRITEDATA: + ret = ptrace_writedata(child, (char __user *) addr2, + addr, data); + if (ret == data) + ret = 0; + else if (ret >= 0) + ret = -EIO; + break; + + default: + if (request == PTRACE_SPARC_DETACH) + request = PTRACE_DETACH; + ret = ptrace_request(child, request, addr, data); + break; + } + + return ret; +} + +asmlinkage int syscall_trace_enter(struct pt_regs *regs) +{ + int ret = 0; + + /* do the secure computing check first */ + secure_computing(regs->u_regs[UREG_G1]); + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + ret = tracehook_report_syscall_entry(regs); + + if (unlikely(current->audit_context) && !ret) + audit_syscall_entry((test_thread_flag(TIF_32BIT) ? + AUDIT_ARCH_SPARC : + AUDIT_ARCH_SPARC64), + regs->u_regs[UREG_G1], + regs->u_regs[UREG_I0], + regs->u_regs[UREG_I1], + regs->u_regs[UREG_I2], + regs->u_regs[UREG_I3]); + + return ret; +} + +asmlinkage void syscall_trace_leave(struct pt_regs *regs) +{ + if (unlikely(current->audit_context)) { + unsigned long tstate = regs->tstate; + int result = AUDITSC_SUCCESS; + + if (unlikely(tstate & (TSTATE_XCARRY | TSTATE_ICARRY))) + result = AUDITSC_FAILURE; + + audit_syscall_exit(result, regs->u_regs[UREG_I0]); + } + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, 0); +} diff --git a/arch/sparc/kernel/reboot.c b/arch/sparc/kernel/reboot.c new file mode 100644 index 000000000000..ef89d3d69748 --- /dev/null +++ b/arch/sparc/kernel/reboot.c @@ -0,0 +1,53 @@ +/* reboot.c: reboot/shutdown/halt/poweroff handling + * + * Copyright (C) 2008 David S. Miller <davem@davemloft.net> + */ +#include <linux/kernel.h> +#include <linux/reboot.h> +#include <linux/module.h> +#include <linux/pm.h> + +#include <asm/system.h> +#include <asm/oplib.h> +#include <asm/prom.h> + +/* sysctl - toggle power-off restriction for serial console + * systems in machine_power_off() + */ +int scons_pwroff = 1; + +/* This isn't actually used, it exists merely to satisfy the + * reference in kernel/sys.c + */ +void (*pm_power_off)(void) = machine_power_off; +EXPORT_SYMBOL(pm_power_off); + +void machine_power_off(void) +{ + if (strcmp(of_console_device->type, "serial") || scons_pwroff) + prom_halt_power_off(); + + prom_halt(); +} + +void machine_halt(void) +{ + prom_halt(); + panic("Halt failed!"); +} + +void machine_restart(char *cmd) +{ + char *p; + + p = strchr(reboot_command, '\n'); + if (p) + *p = 0; + if (cmd) + prom_reboot(cmd); + if (*reboot_command) + prom_reboot(reboot_command); + prom_reboot(""); + panic("Reboot failed!"); +} + diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S new file mode 100644 index 000000000000..fd3cee4d117c --- /dev/null +++ b/arch/sparc/kernel/rtrap_64.S @@ -0,0 +1,450 @@ +/* + * rtrap.S: Preparing for return from trap on Sparc V9. + * + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + */ + + +#include <asm/asi.h> +#include <asm/pstate.h> +#include <asm/ptrace.h> +#include <asm/spitfire.h> +#include <asm/head.h> +#include <asm/visasm.h> +#include <asm/processor.h> + +#define RTRAP_PSTATE (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE) +#define RTRAP_PSTATE_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV) +#define RTRAP_PSTATE_AG_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG) + + .text + .align 32 +__handle_softirq: + call do_softirq + nop + ba,a,pt %xcc, __handle_softirq_continue + nop +__handle_preemption: + call schedule + wrpr %g0, RTRAP_PSTATE, %pstate + ba,pt %xcc, __handle_preemption_continue + wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate + +__handle_user_windows: + call fault_in_user_windows + wrpr %g0, RTRAP_PSTATE, %pstate + wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate + /* Redo sched+sig checks */ + ldx [%g6 + TI_FLAGS], %l0 + andcc %l0, _TIF_NEED_RESCHED, %g0 + + be,pt %xcc, 1f + nop + call schedule + wrpr %g0, RTRAP_PSTATE, %pstate + wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate + ldx [%g6 + TI_FLAGS], %l0 + +1: andcc %l0, _TIF_DO_NOTIFY_RESUME_MASK, %g0 + be,pt %xcc, __handle_user_windows_continue + nop + mov %l5, %o1 + add %sp, PTREGS_OFF, %o0 + mov %l0, %o2 + + call do_notify_resume + wrpr %g0, RTRAP_PSTATE, %pstate + wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate + /* Signal delivery can modify pt_regs tstate, so we must + * reload it. + */ + ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 + sethi %hi(0xf << 20), %l4 + and %l1, %l4, %l4 + ba,pt %xcc, __handle_user_windows_continue + + andn %l1, %l4, %l1 +__handle_perfctrs: + call update_perfctrs + wrpr %g0, RTRAP_PSTATE, %pstate + wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate + ldub [%g6 + TI_WSAVED], %o2 + brz,pt %o2, 1f + nop + /* Redo userwin+sched+sig checks */ + call fault_in_user_windows + + wrpr %g0, RTRAP_PSTATE, %pstate + wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate + ldx [%g6 + TI_FLAGS], %l0 + andcc %l0, _TIF_NEED_RESCHED, %g0 + be,pt %xcc, 1f + + nop + call schedule + wrpr %g0, RTRAP_PSTATE, %pstate + wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate + ldx [%g6 + TI_FLAGS], %l0 +1: andcc %l0, _TIF_DO_NOTIFY_RESUME_MASK, %g0 + + be,pt %xcc, __handle_perfctrs_continue + sethi %hi(TSTATE_PEF), %o0 + mov %l5, %o1 + add %sp, PTREGS_OFF, %o0 + mov %l0, %o2 + call do_notify_resume + + wrpr %g0, RTRAP_PSTATE, %pstate + wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate + /* Signal delivery can modify pt_regs tstate, so we must + * reload it. + */ + ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 + sethi %hi(0xf << 20), %l4 + and %l1, %l4, %l4 + andn %l1, %l4, %l1 + ba,pt %xcc, __handle_perfctrs_continue + + sethi %hi(TSTATE_PEF), %o0 +__handle_userfpu: + rd %fprs, %l5 + andcc %l5, FPRS_FEF, %g0 + sethi %hi(TSTATE_PEF), %o0 + be,a,pn %icc, __handle_userfpu_continue + andn %l1, %o0, %l1 + ba,a,pt %xcc, __handle_userfpu_continue + +__handle_signal: + mov %l5, %o1 + add %sp, PTREGS_OFF, %o0 + mov %l0, %o2 + call do_notify_resume + wrpr %g0, RTRAP_PSTATE, %pstate + wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate + + /* Signal delivery can modify pt_regs tstate, so we must + * reload it. + */ + ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 + sethi %hi(0xf << 20), %l4 + and %l1, %l4, %l4 + ba,pt %xcc, __handle_signal_continue + andn %l1, %l4, %l1 + + /* When returning from a NMI (%pil==15) interrupt we want to + * avoid running softirqs, doing IRQ tracing, preempting, etc. + */ + .globl rtrap_nmi +rtrap_nmi: ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 + sethi %hi(0xf << 20), %l4 + and %l1, %l4, %l4 + andn %l1, %l4, %l1 + srl %l4, 20, %l4 + ba,pt %xcc, rtrap_no_irq_enable + wrpr %l4, %pil + + .align 64 + .globl rtrap_irq, rtrap, irqsz_patchme, rtrap_xcall +rtrap_irq: +rtrap: +#ifndef CONFIG_SMP + sethi %hi(per_cpu____cpu_data), %l0 + lduw [%l0 + %lo(per_cpu____cpu_data)], %l1 +#else + sethi %hi(per_cpu____cpu_data), %l0 + or %l0, %lo(per_cpu____cpu_data), %l0 + lduw [%l0 + %g5], %l1 +#endif + cmp %l1, 0 + + /* mm/ultra.S:xcall_report_regs KNOWS about this load. */ + bne,pn %icc, __handle_softirq + ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 +__handle_softirq_continue: +rtrap_xcall: + sethi %hi(0xf << 20), %l4 + and %l1, %l4, %l4 + andn %l1, %l4, %l1 + srl %l4, 20, %l4 +#ifdef CONFIG_TRACE_IRQFLAGS + brnz,pn %l4, rtrap_no_irq_enable + nop + call trace_hardirqs_on + nop + wrpr %l4, %pil +#endif +rtrap_no_irq_enable: + andcc %l1, TSTATE_PRIV, %l3 + bne,pn %icc, to_kernel + nop + + /* We must hold IRQs off and atomically test schedule+signal + * state, then hold them off all the way back to userspace. + * If we are returning to kernel, none of this matters. Note + * that we are disabling interrupts via PSTATE_IE, not using + * %pil. + * + * If we do not do this, there is a window where we would do + * the tests, later the signal/resched event arrives but we do + * not process it since we are still in kernel mode. It would + * take until the next local IRQ before the signal/resched + * event would be handled. + * + * This also means that if we have to deal with performance + * counters or user windows, we have to redo all of these + * sched+signal checks with IRQs disabled. + */ +to_user: wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate + wrpr 0, %pil +__handle_preemption_continue: + ldx [%g6 + TI_FLAGS], %l0 + sethi %hi(_TIF_USER_WORK_MASK), %o0 + or %o0, %lo(_TIF_USER_WORK_MASK), %o0 + andcc %l0, %o0, %g0 + sethi %hi(TSTATE_PEF), %o0 + be,pt %xcc, user_nowork + andcc %l1, %o0, %g0 + andcc %l0, _TIF_NEED_RESCHED, %g0 + bne,pn %xcc, __handle_preemption + andcc %l0, _TIF_DO_NOTIFY_RESUME_MASK, %g0 + bne,pn %xcc, __handle_signal +__handle_signal_continue: + ldub [%g6 + TI_WSAVED], %o2 + brnz,pn %o2, __handle_user_windows + nop +__handle_user_windows_continue: + ldx [%g6 + TI_FLAGS], %l5 + andcc %l5, _TIF_PERFCTR, %g0 + sethi %hi(TSTATE_PEF), %o0 + bne,pn %xcc, __handle_perfctrs +__handle_perfctrs_continue: + andcc %l1, %o0, %g0 + + /* This fpdepth clear is necessary for non-syscall rtraps only */ +user_nowork: + bne,pn %xcc, __handle_userfpu + stb %g0, [%g6 + TI_FPDEPTH] +__handle_userfpu_continue: + +rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 + ldx [%sp + PTREGS_OFF + PT_V9_G2], %g2 + + ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3 + ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4 + ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5 + brz,pt %l3, 1f + mov %g6, %l2 + + /* Must do this before thread reg is clobbered below. */ + LOAD_PER_CPU_BASE(%g5, %g6, %i0, %i1, %i2) +1: + ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6 + ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7 + + /* Normal globals are restored, go to trap globals. */ +661: wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate + nop + .section .sun4v_2insn_patch, "ax" + .word 661b + wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate + SET_GL(1) + .previous + + mov %l2, %g6 + + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 + ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 + + ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2 + ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3 + ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4 + ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5 + ldx [%sp + PTREGS_OFF + PT_V9_I6], %i6 + ldx [%sp + PTREGS_OFF + PT_V9_I7], %i7 + ldx [%sp + PTREGS_OFF + PT_V9_TPC], %l2 + ldx [%sp + PTREGS_OFF + PT_V9_TNPC], %o2 + + ld [%sp + PTREGS_OFF + PT_V9_Y], %o3 + wr %o3, %g0, %y + wrpr %l4, 0x0, %pil + wrpr %g0, 0x1, %tl + andn %l1, TSTATE_SYSCALL, %l1 + wrpr %l1, %g0, %tstate + wrpr %l2, %g0, %tpc + wrpr %o2, %g0, %tnpc + + brnz,pn %l3, kern_rtt + mov PRIMARY_CONTEXT, %l7 + +661: ldxa [%l7 + %l7] ASI_DMMU, %l0 + .section .sun4v_1insn_patch, "ax" + .word 661b + ldxa [%l7 + %l7] ASI_MMU, %l0 + .previous + + sethi %hi(sparc64_kern_pri_nuc_bits), %l1 + ldx [%l1 + %lo(sparc64_kern_pri_nuc_bits)], %l1 + or %l0, %l1, %l0 + +661: stxa %l0, [%l7] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %l0, [%l7] ASI_MMU + .previous + + sethi %hi(KERNBASE), %l7 + flush %l7 + rdpr %wstate, %l1 + rdpr %otherwin, %l2 + srl %l1, 3, %l1 + + wrpr %l2, %g0, %canrestore + wrpr %l1, %g0, %wstate + brnz,pt %l2, user_rtt_restore + wrpr %g0, %g0, %otherwin + + ldx [%g6 + TI_FLAGS], %g3 + wr %g0, ASI_AIUP, %asi + rdpr %cwp, %g1 + andcc %g3, _TIF_32BIT, %g0 + sub %g1, 1, %g1 + bne,pt %xcc, user_rtt_fill_32bit + wrpr %g1, %cwp + ba,a,pt %xcc, user_rtt_fill_64bit + +user_rtt_fill_fixup: + rdpr %cwp, %g1 + add %g1, 1, %g1 + wrpr %g1, 0x0, %cwp + + rdpr %wstate, %g2 + sll %g2, 3, %g2 + wrpr %g2, 0x0, %wstate + + /* We know %canrestore and %otherwin are both zero. */ + + sethi %hi(sparc64_kern_pri_context), %g2 + ldx [%g2 + %lo(sparc64_kern_pri_context)], %g2 + mov PRIMARY_CONTEXT, %g1 + +661: stxa %g2, [%g1] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g1] ASI_MMU + .previous + + sethi %hi(KERNBASE), %g1 + flush %g1 + + or %g4, FAULT_CODE_WINFIXUP, %g4 + stb %g4, [%g6 + TI_FAULT_CODE] + stx %g5, [%g6 + TI_FAULT_ADDR] + + mov %g6, %l1 + wrpr %g0, 0x0, %tl + +661: nop + .section .sun4v_1insn_patch, "ax" + .word 661b + SET_GL(0) + .previous + + wrpr %g0, RTRAP_PSTATE, %pstate + + mov %l1, %g6 + ldx [%g6 + TI_TASK], %g4 + LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) + call do_sparc64_fault + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + +user_rtt_pre_restore: + add %g1, 1, %g1 + wrpr %g1, 0x0, %cwp + +user_rtt_restore: + restore + rdpr %canrestore, %g1 + wrpr %g1, 0x0, %cleanwin + retry + nop + +kern_rtt: rdpr %canrestore, %g1 + brz,pn %g1, kern_rtt_fill + nop +kern_rtt_restore: + stw %g0, [%sp + PTREGS_OFF + PT_V9_MAGIC] + restore + retry + +to_kernel: +#ifdef CONFIG_PREEMPT + ldsw [%g6 + TI_PRE_COUNT], %l5 + brnz %l5, kern_fpucheck + ldx [%g6 + TI_FLAGS], %l5 + andcc %l5, _TIF_NEED_RESCHED, %g0 + be,pt %xcc, kern_fpucheck + nop + cmp %l4, 0 + bne,pn %xcc, kern_fpucheck + sethi %hi(PREEMPT_ACTIVE), %l6 + stw %l6, [%g6 + TI_PRE_COUNT] + call schedule + nop + ba,pt %xcc, rtrap + stw %g0, [%g6 + TI_PRE_COUNT] +#endif +kern_fpucheck: ldub [%g6 + TI_FPDEPTH], %l5 + brz,pt %l5, rt_continue + srl %l5, 1, %o0 + add %g6, TI_FPSAVED, %l6 + ldub [%l6 + %o0], %l2 + sub %l5, 2, %l5 + + add %g6, TI_GSR, %o1 + andcc %l2, (FPRS_FEF|FPRS_DU), %g0 + be,pt %icc, 2f + and %l2, FPRS_DL, %l6 + andcc %l2, FPRS_FEF, %g0 + be,pn %icc, 5f + sll %o0, 3, %o5 + rd %fprs, %g1 + + wr %g1, FPRS_FEF, %fprs + ldx [%o1 + %o5], %g1 + add %g6, TI_XFSR, %o1 + sll %o0, 8, %o2 + add %g6, TI_FPREGS, %o3 + brz,pn %l6, 1f + add %g6, TI_FPREGS+0x40, %o4 + + membar #Sync + ldda [%o3 + %o2] ASI_BLK_P, %f0 + ldda [%o4 + %o2] ASI_BLK_P, %f16 + membar #Sync +1: andcc %l2, FPRS_DU, %g0 + be,pn %icc, 1f + wr %g1, 0, %gsr + add %o2, 0x80, %o2 + membar #Sync + ldda [%o3 + %o2] ASI_BLK_P, %f32 + ldda [%o4 + %o2] ASI_BLK_P, %f48 +1: membar #Sync + ldx [%o1 + %o5], %fsr +2: stb %l5, [%g6 + TI_FPDEPTH] + ba,pt %xcc, rt_continue + nop +5: wr %g0, FPRS_FEF, %fprs + sll %o0, 8, %o2 + + add %g6, TI_FPREGS+0x80, %o3 + add %g6, TI_FPREGS+0xc0, %o4 + membar #Sync + ldda [%o3 + %o2] ASI_BLK_P, %f32 + ldda [%o4 + %o2] ASI_BLK_P, %f48 + membar #Sync + wr %g0, FPRS_DU, %fprs + ba,pt %xcc, rt_continue + stb %l5, [%g6 + TI_FPDEPTH] diff --git a/arch/sparc/kernel/sbus.c b/arch/sparc/kernel/sbus.c new file mode 100644 index 000000000000..2ead310066d1 --- /dev/null +++ b/arch/sparc/kernel/sbus.c @@ -0,0 +1,674 @@ +/* + * sbus.c: UltraSparc SBUS controller support. + * + * Copyright (C) 1999 David S. Miller (davem@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/of.h> +#include <linux/of_device.h> + +#include <asm/page.h> +#include <asm/io.h> +#include <asm/upa.h> +#include <asm/cache.h> +#include <asm/dma.h> +#include <asm/irq.h> +#include <asm/prom.h> +#include <asm/oplib.h> +#include <asm/starfire.h> + +#include "iommu_common.h" + +#define MAP_BASE ((u32)0xc0000000) + +/* Offsets from iommu_regs */ +#define SYSIO_IOMMUREG_BASE 0x2400UL +#define IOMMU_CONTROL (0x2400UL - 0x2400UL) /* IOMMU control register */ +#define IOMMU_TSBBASE (0x2408UL - 0x2400UL) /* TSB base address register */ +#define IOMMU_FLUSH (0x2410UL - 0x2400UL) /* IOMMU flush register */ +#define IOMMU_VADIAG (0x4400UL - 0x2400UL) /* SBUS virtual address diagnostic */ +#define IOMMU_TAGCMP (0x4408UL - 0x2400UL) /* TLB tag compare diagnostics */ +#define IOMMU_LRUDIAG (0x4500UL - 0x2400UL) /* IOMMU LRU queue diagnostics */ +#define IOMMU_TAGDIAG (0x4580UL - 0x2400UL) /* TLB tag diagnostics */ +#define IOMMU_DRAMDIAG (0x4600UL - 0x2400UL) /* TLB data RAM diagnostics */ + +#define IOMMU_DRAM_VALID (1UL << 30UL) + +/* Offsets from strbuf_regs */ +#define SYSIO_STRBUFREG_BASE 0x2800UL +#define STRBUF_CONTROL (0x2800UL - 0x2800UL) /* Control */ +#define STRBUF_PFLUSH (0x2808UL - 0x2800UL) /* Page flush/invalidate */ +#define STRBUF_FSYNC (0x2810UL - 0x2800UL) /* Flush synchronization */ +#define STRBUF_DRAMDIAG (0x5000UL - 0x2800UL) /* data RAM diagnostic */ +#define STRBUF_ERRDIAG (0x5400UL - 0x2800UL) /* error status diagnostics */ +#define STRBUF_PTAGDIAG (0x5800UL - 0x2800UL) /* Page tag diagnostics */ +#define STRBUF_LTAGDIAG (0x5900UL - 0x2800UL) /* Line tag diagnostics */ + +#define STRBUF_TAG_VALID 0x02UL + +/* Enable 64-bit DVMA mode for the given device. */ +void sbus_set_sbus64(struct device *dev, int bursts) +{ + struct iommu *iommu = dev->archdata.iommu; + struct of_device *op = to_of_device(dev); + const struct linux_prom_registers *regs; + unsigned long cfg_reg; + int slot; + u64 val; + + regs = of_get_property(op->node, "reg", NULL); + if (!regs) { + printk(KERN_ERR "sbus_set_sbus64: Cannot find regs for %s\n", + op->node->full_name); + return; + } + slot = regs->which_io; + + cfg_reg = iommu->write_complete_reg; + switch (slot) { + case 0: + cfg_reg += 0x20UL; + break; + case 1: + cfg_reg += 0x28UL; + break; + case 2: + cfg_reg += 0x30UL; + break; + case 3: + cfg_reg += 0x38UL; + break; + case 13: + cfg_reg += 0x40UL; + break; + case 14: + cfg_reg += 0x48UL; + break; + case 15: + cfg_reg += 0x50UL; + break; + + default: + return; + }; + + val = upa_readq(cfg_reg); + if (val & (1UL << 14UL)) { + /* Extended transfer mode already enabled. */ + return; + } + + val |= (1UL << 14UL); + + if (bursts & DMA_BURST8) + val |= (1UL << 1UL); + if (bursts & DMA_BURST16) + val |= (1UL << 2UL); + if (bursts & DMA_BURST32) + val |= (1UL << 3UL); + if (bursts & DMA_BURST64) + val |= (1UL << 4UL); + upa_writeq(val, cfg_reg); +} + +/* INO number to IMAP register offset for SYSIO external IRQ's. + * This should conform to both Sunfire/Wildfire server and Fusion + * desktop designs. + */ +#define SYSIO_IMAP_SLOT0 0x2c00UL +#define SYSIO_IMAP_SLOT1 0x2c08UL +#define SYSIO_IMAP_SLOT2 0x2c10UL +#define SYSIO_IMAP_SLOT3 0x2c18UL +#define SYSIO_IMAP_SCSI 0x3000UL +#define SYSIO_IMAP_ETH 0x3008UL +#define SYSIO_IMAP_BPP 0x3010UL +#define SYSIO_IMAP_AUDIO 0x3018UL +#define SYSIO_IMAP_PFAIL 0x3020UL +#define SYSIO_IMAP_KMS 0x3028UL +#define SYSIO_IMAP_FLPY 0x3030UL +#define SYSIO_IMAP_SHW 0x3038UL +#define SYSIO_IMAP_KBD 0x3040UL +#define SYSIO_IMAP_MS 0x3048UL +#define SYSIO_IMAP_SER 0x3050UL +#define SYSIO_IMAP_TIM0 0x3060UL +#define SYSIO_IMAP_TIM1 0x3068UL +#define SYSIO_IMAP_UE 0x3070UL +#define SYSIO_IMAP_CE 0x3078UL +#define SYSIO_IMAP_SBERR 0x3080UL +#define SYSIO_IMAP_PMGMT 0x3088UL +#define SYSIO_IMAP_GFX 0x3090UL +#define SYSIO_IMAP_EUPA 0x3098UL + +#define bogon ((unsigned long) -1) +static unsigned long sysio_irq_offsets[] = { + /* SBUS Slot 0 --> 3, level 1 --> 7 */ + SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, + SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, + SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, + SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, + SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, + SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, + SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, + SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, + + /* Onboard devices (not relevant/used on SunFire). */ + SYSIO_IMAP_SCSI, + SYSIO_IMAP_ETH, + SYSIO_IMAP_BPP, + bogon, + SYSIO_IMAP_AUDIO, + SYSIO_IMAP_PFAIL, + bogon, + bogon, + SYSIO_IMAP_KMS, + SYSIO_IMAP_FLPY, + SYSIO_IMAP_SHW, + SYSIO_IMAP_KBD, + SYSIO_IMAP_MS, + SYSIO_IMAP_SER, + bogon, + bogon, + SYSIO_IMAP_TIM0, + SYSIO_IMAP_TIM1, + bogon, + bogon, + SYSIO_IMAP_UE, + SYSIO_IMAP_CE, + SYSIO_IMAP_SBERR, + SYSIO_IMAP_PMGMT, +}; + +#undef bogon + +#define NUM_SYSIO_OFFSETS ARRAY_SIZE(sysio_irq_offsets) + +/* Convert Interrupt Mapping register pointer to associated + * Interrupt Clear register pointer, SYSIO specific version. + */ +#define SYSIO_ICLR_UNUSED0 0x3400UL +#define SYSIO_ICLR_SLOT0 0x3408UL +#define SYSIO_ICLR_SLOT1 0x3448UL +#define SYSIO_ICLR_SLOT2 0x3488UL +#define SYSIO_ICLR_SLOT3 0x34c8UL +static unsigned long sysio_imap_to_iclr(unsigned long imap) +{ + unsigned long diff = SYSIO_ICLR_UNUSED0 - SYSIO_IMAP_SLOT0; + return imap + diff; +} + +static unsigned int sbus_build_irq(struct of_device *op, unsigned int ino) +{ + struct iommu *iommu = op->dev.archdata.iommu; + unsigned long reg_base = iommu->write_complete_reg - 0x2000UL; + unsigned long imap, iclr; + int sbus_level = 0; + + imap = sysio_irq_offsets[ino]; + if (imap == ((unsigned long)-1)) { + prom_printf("get_irq_translations: Bad SYSIO INO[%x]\n", + ino); + prom_halt(); + } + imap += reg_base; + + /* SYSIO inconsistency. For external SLOTS, we have to select + * the right ICLR register based upon the lower SBUS irq level + * bits. + */ + if (ino >= 0x20) { + iclr = sysio_imap_to_iclr(imap); + } else { + int sbus_slot = (ino & 0x18)>>3; + + sbus_level = ino & 0x7; + + switch(sbus_slot) { + case 0: + iclr = reg_base + SYSIO_ICLR_SLOT0; + break; + case 1: + iclr = reg_base + SYSIO_ICLR_SLOT1; + break; + case 2: + iclr = reg_base + SYSIO_ICLR_SLOT2; + break; + default: + case 3: + iclr = reg_base + SYSIO_ICLR_SLOT3; + break; + }; + + iclr += ((unsigned long)sbus_level - 1UL) * 8UL; + } + return build_irq(sbus_level, iclr, imap); +} + +/* Error interrupt handling. */ +#define SYSIO_UE_AFSR 0x0030UL +#define SYSIO_UE_AFAR 0x0038UL +#define SYSIO_UEAFSR_PPIO 0x8000000000000000UL /* Primary PIO cause */ +#define SYSIO_UEAFSR_PDRD 0x4000000000000000UL /* Primary DVMA read cause */ +#define SYSIO_UEAFSR_PDWR 0x2000000000000000UL /* Primary DVMA write cause */ +#define SYSIO_UEAFSR_SPIO 0x1000000000000000UL /* Secondary PIO is cause */ +#define SYSIO_UEAFSR_SDRD 0x0800000000000000UL /* Secondary DVMA read cause */ +#define SYSIO_UEAFSR_SDWR 0x0400000000000000UL /* Secondary DVMA write cause*/ +#define SYSIO_UEAFSR_RESV1 0x03ff000000000000UL /* Reserved */ +#define SYSIO_UEAFSR_DOFF 0x0000e00000000000UL /* Doubleword Offset */ +#define SYSIO_UEAFSR_SIZE 0x00001c0000000000UL /* Bad transfer size 2^SIZE */ +#define SYSIO_UEAFSR_MID 0x000003e000000000UL /* UPA MID causing the fault */ +#define SYSIO_UEAFSR_RESV2 0x0000001fffffffffUL /* Reserved */ +static irqreturn_t sysio_ue_handler(int irq, void *dev_id) +{ + struct of_device *op = dev_id; + struct iommu *iommu = op->dev.archdata.iommu; + unsigned long reg_base = iommu->write_complete_reg - 0x2000UL; + unsigned long afsr_reg, afar_reg; + unsigned long afsr, afar, error_bits; + int reported, portid; + + afsr_reg = reg_base + SYSIO_UE_AFSR; + afar_reg = reg_base + SYSIO_UE_AFAR; + + /* Latch error status. */ + afsr = upa_readq(afsr_reg); + afar = upa_readq(afar_reg); + + /* Clear primary/secondary error status bits. */ + error_bits = afsr & + (SYSIO_UEAFSR_PPIO | SYSIO_UEAFSR_PDRD | SYSIO_UEAFSR_PDWR | + SYSIO_UEAFSR_SPIO | SYSIO_UEAFSR_SDRD | SYSIO_UEAFSR_SDWR); + upa_writeq(error_bits, afsr_reg); + + portid = of_getintprop_default(op->node, "portid", -1); + + /* Log the error. */ + printk("SYSIO[%x]: Uncorrectable ECC Error, primary error type[%s]\n", + portid, + (((error_bits & SYSIO_UEAFSR_PPIO) ? + "PIO" : + ((error_bits & SYSIO_UEAFSR_PDRD) ? + "DVMA Read" : + ((error_bits & SYSIO_UEAFSR_PDWR) ? + "DVMA Write" : "???"))))); + printk("SYSIO[%x]: DOFF[%lx] SIZE[%lx] MID[%lx]\n", + portid, + (afsr & SYSIO_UEAFSR_DOFF) >> 45UL, + (afsr & SYSIO_UEAFSR_SIZE) >> 42UL, + (afsr & SYSIO_UEAFSR_MID) >> 37UL); + printk("SYSIO[%x]: AFAR[%016lx]\n", portid, afar); + printk("SYSIO[%x]: Secondary UE errors [", portid); + reported = 0; + if (afsr & SYSIO_UEAFSR_SPIO) { + reported++; + printk("(PIO)"); + } + if (afsr & SYSIO_UEAFSR_SDRD) { + reported++; + printk("(DVMA Read)"); + } + if (afsr & SYSIO_UEAFSR_SDWR) { + reported++; + printk("(DVMA Write)"); + } + if (!reported) + printk("(none)"); + printk("]\n"); + + return IRQ_HANDLED; +} + +#define SYSIO_CE_AFSR 0x0040UL +#define SYSIO_CE_AFAR 0x0048UL +#define SYSIO_CEAFSR_PPIO 0x8000000000000000UL /* Primary PIO cause */ +#define SYSIO_CEAFSR_PDRD 0x4000000000000000UL /* Primary DVMA read cause */ +#define SYSIO_CEAFSR_PDWR 0x2000000000000000UL /* Primary DVMA write cause */ +#define SYSIO_CEAFSR_SPIO 0x1000000000000000UL /* Secondary PIO cause */ +#define SYSIO_CEAFSR_SDRD 0x0800000000000000UL /* Secondary DVMA read cause */ +#define SYSIO_CEAFSR_SDWR 0x0400000000000000UL /* Secondary DVMA write cause*/ +#define SYSIO_CEAFSR_RESV1 0x0300000000000000UL /* Reserved */ +#define SYSIO_CEAFSR_ESYND 0x00ff000000000000UL /* Syndrome Bits */ +#define SYSIO_CEAFSR_DOFF 0x0000e00000000000UL /* Double Offset */ +#define SYSIO_CEAFSR_SIZE 0x00001c0000000000UL /* Bad transfer size 2^SIZE */ +#define SYSIO_CEAFSR_MID 0x000003e000000000UL /* UPA MID causing the fault */ +#define SYSIO_CEAFSR_RESV2 0x0000001fffffffffUL /* Reserved */ +static irqreturn_t sysio_ce_handler(int irq, void *dev_id) +{ + struct of_device *op = dev_id; + struct iommu *iommu = op->dev.archdata.iommu; + unsigned long reg_base = iommu->write_complete_reg - 0x2000UL; + unsigned long afsr_reg, afar_reg; + unsigned long afsr, afar, error_bits; + int reported, portid; + + afsr_reg = reg_base + SYSIO_CE_AFSR; + afar_reg = reg_base + SYSIO_CE_AFAR; + + /* Latch error status. */ + afsr = upa_readq(afsr_reg); + afar = upa_readq(afar_reg); + + /* Clear primary/secondary error status bits. */ + error_bits = afsr & + (SYSIO_CEAFSR_PPIO | SYSIO_CEAFSR_PDRD | SYSIO_CEAFSR_PDWR | + SYSIO_CEAFSR_SPIO | SYSIO_CEAFSR_SDRD | SYSIO_CEAFSR_SDWR); + upa_writeq(error_bits, afsr_reg); + + portid = of_getintprop_default(op->node, "portid", -1); + + printk("SYSIO[%x]: Correctable ECC Error, primary error type[%s]\n", + portid, + (((error_bits & SYSIO_CEAFSR_PPIO) ? + "PIO" : + ((error_bits & SYSIO_CEAFSR_PDRD) ? + "DVMA Read" : + ((error_bits & SYSIO_CEAFSR_PDWR) ? + "DVMA Write" : "???"))))); + + /* XXX Use syndrome and afar to print out module string just like + * XXX UDB CE trap handler does... -DaveM + */ + printk("SYSIO[%x]: DOFF[%lx] ECC Syndrome[%lx] Size[%lx] MID[%lx]\n", + portid, + (afsr & SYSIO_CEAFSR_DOFF) >> 45UL, + (afsr & SYSIO_CEAFSR_ESYND) >> 48UL, + (afsr & SYSIO_CEAFSR_SIZE) >> 42UL, + (afsr & SYSIO_CEAFSR_MID) >> 37UL); + printk("SYSIO[%x]: AFAR[%016lx]\n", portid, afar); + + printk("SYSIO[%x]: Secondary CE errors [", portid); + reported = 0; + if (afsr & SYSIO_CEAFSR_SPIO) { + reported++; + printk("(PIO)"); + } + if (afsr & SYSIO_CEAFSR_SDRD) { + reported++; + printk("(DVMA Read)"); + } + if (afsr & SYSIO_CEAFSR_SDWR) { + reported++; + printk("(DVMA Write)"); + } + if (!reported) + printk("(none)"); + printk("]\n"); + + return IRQ_HANDLED; +} + +#define SYSIO_SBUS_AFSR 0x2010UL +#define SYSIO_SBUS_AFAR 0x2018UL +#define SYSIO_SBAFSR_PLE 0x8000000000000000UL /* Primary Late PIO Error */ +#define SYSIO_SBAFSR_PTO 0x4000000000000000UL /* Primary SBUS Timeout */ +#define SYSIO_SBAFSR_PBERR 0x2000000000000000UL /* Primary SBUS Error ACK */ +#define SYSIO_SBAFSR_SLE 0x1000000000000000UL /* Secondary Late PIO Error */ +#define SYSIO_SBAFSR_STO 0x0800000000000000UL /* Secondary SBUS Timeout */ +#define SYSIO_SBAFSR_SBERR 0x0400000000000000UL /* Secondary SBUS Error ACK */ +#define SYSIO_SBAFSR_RESV1 0x03ff000000000000UL /* Reserved */ +#define SYSIO_SBAFSR_RD 0x0000800000000000UL /* Primary was late PIO read */ +#define SYSIO_SBAFSR_RESV2 0x0000600000000000UL /* Reserved */ +#define SYSIO_SBAFSR_SIZE 0x00001c0000000000UL /* Size of transfer */ +#define SYSIO_SBAFSR_MID 0x000003e000000000UL /* MID causing the error */ +#define SYSIO_SBAFSR_RESV3 0x0000001fffffffffUL /* Reserved */ +static irqreturn_t sysio_sbus_error_handler(int irq, void *dev_id) +{ + struct of_device *op = dev_id; + struct iommu *iommu = op->dev.archdata.iommu; + unsigned long afsr_reg, afar_reg, reg_base; + unsigned long afsr, afar, error_bits; + int reported, portid; + + reg_base = iommu->write_complete_reg - 0x2000UL; + afsr_reg = reg_base + SYSIO_SBUS_AFSR; + afar_reg = reg_base + SYSIO_SBUS_AFAR; + + afsr = upa_readq(afsr_reg); + afar = upa_readq(afar_reg); + + /* Clear primary/secondary error status bits. */ + error_bits = afsr & + (SYSIO_SBAFSR_PLE | SYSIO_SBAFSR_PTO | SYSIO_SBAFSR_PBERR | + SYSIO_SBAFSR_SLE | SYSIO_SBAFSR_STO | SYSIO_SBAFSR_SBERR); + upa_writeq(error_bits, afsr_reg); + + portid = of_getintprop_default(op->node, "portid", -1); + + /* Log the error. */ + printk("SYSIO[%x]: SBUS Error, primary error type[%s] read(%d)\n", + portid, + (((error_bits & SYSIO_SBAFSR_PLE) ? + "Late PIO Error" : + ((error_bits & SYSIO_SBAFSR_PTO) ? + "Time Out" : + ((error_bits & SYSIO_SBAFSR_PBERR) ? + "Error Ack" : "???")))), + (afsr & SYSIO_SBAFSR_RD) ? 1 : 0); + printk("SYSIO[%x]: size[%lx] MID[%lx]\n", + portid, + (afsr & SYSIO_SBAFSR_SIZE) >> 42UL, + (afsr & SYSIO_SBAFSR_MID) >> 37UL); + printk("SYSIO[%x]: AFAR[%016lx]\n", portid, afar); + printk("SYSIO[%x]: Secondary SBUS errors [", portid); + reported = 0; + if (afsr & SYSIO_SBAFSR_SLE) { + reported++; + printk("(Late PIO Error)"); + } + if (afsr & SYSIO_SBAFSR_STO) { + reported++; + printk("(Time Out)"); + } + if (afsr & SYSIO_SBAFSR_SBERR) { + reported++; + printk("(Error Ack)"); + } + if (!reported) + printk("(none)"); + printk("]\n"); + + /* XXX check iommu/strbuf for further error status XXX */ + + return IRQ_HANDLED; +} + +#define ECC_CONTROL 0x0020UL +#define SYSIO_ECNTRL_ECCEN 0x8000000000000000UL /* Enable ECC Checking */ +#define SYSIO_ECNTRL_UEEN 0x4000000000000000UL /* Enable UE Interrupts */ +#define SYSIO_ECNTRL_CEEN 0x2000000000000000UL /* Enable CE Interrupts */ + +#define SYSIO_UE_INO 0x34 +#define SYSIO_CE_INO 0x35 +#define SYSIO_SBUSERR_INO 0x36 + +static void __init sysio_register_error_handlers(struct of_device *op) +{ + struct iommu *iommu = op->dev.archdata.iommu; + unsigned long reg_base = iommu->write_complete_reg - 0x2000UL; + unsigned int irq; + u64 control; + int portid; + + portid = of_getintprop_default(op->node, "portid", -1); + + irq = sbus_build_irq(op, SYSIO_UE_INO); + if (request_irq(irq, sysio_ue_handler, 0, + "SYSIO_UE", op) < 0) { + prom_printf("SYSIO[%x]: Cannot register UE interrupt.\n", + portid); + prom_halt(); + } + + irq = sbus_build_irq(op, SYSIO_CE_INO); + if (request_irq(irq, sysio_ce_handler, 0, + "SYSIO_CE", op) < 0) { + prom_printf("SYSIO[%x]: Cannot register CE interrupt.\n", + portid); + prom_halt(); + } + + irq = sbus_build_irq(op, SYSIO_SBUSERR_INO); + if (request_irq(irq, sysio_sbus_error_handler, 0, + "SYSIO_SBERR", op) < 0) { + prom_printf("SYSIO[%x]: Cannot register SBUS Error interrupt.\n", + portid); + prom_halt(); + } + + /* Now turn the error interrupts on and also enable ECC checking. */ + upa_writeq((SYSIO_ECNTRL_ECCEN | + SYSIO_ECNTRL_UEEN | + SYSIO_ECNTRL_CEEN), + reg_base + ECC_CONTROL); + + control = upa_readq(iommu->write_complete_reg); + control |= 0x100UL; /* SBUS Error Interrupt Enable */ + upa_writeq(control, iommu->write_complete_reg); +} + +/* Boot time initialization. */ +static void __init sbus_iommu_init(struct of_device *op) +{ + const struct linux_prom64_registers *pr; + struct device_node *dp = op->node; + struct iommu *iommu; + struct strbuf *strbuf; + unsigned long regs, reg_base; + int i, portid; + u64 control; + + pr = of_get_property(dp, "reg", NULL); + if (!pr) { + prom_printf("sbus_iommu_init: Cannot map SYSIO " + "control registers.\n"); + prom_halt(); + } + regs = pr->phys_addr; + + iommu = kzalloc(sizeof(*iommu), GFP_ATOMIC); + if (!iommu) + goto fatal_memory_error; + strbuf = kzalloc(sizeof(*strbuf), GFP_ATOMIC); + if (!strbuf) + goto fatal_memory_error; + + op->dev.archdata.iommu = iommu; + op->dev.archdata.stc = strbuf; + op->dev.archdata.numa_node = -1; + + reg_base = regs + SYSIO_IOMMUREG_BASE; + iommu->iommu_control = reg_base + IOMMU_CONTROL; + iommu->iommu_tsbbase = reg_base + IOMMU_TSBBASE; + iommu->iommu_flush = reg_base + IOMMU_FLUSH; + iommu->iommu_tags = iommu->iommu_control + + (IOMMU_TAGDIAG - IOMMU_CONTROL); + + reg_base = regs + SYSIO_STRBUFREG_BASE; + strbuf->strbuf_control = reg_base + STRBUF_CONTROL; + strbuf->strbuf_pflush = reg_base + STRBUF_PFLUSH; + strbuf->strbuf_fsync = reg_base + STRBUF_FSYNC; + + strbuf->strbuf_enabled = 1; + + strbuf->strbuf_flushflag = (volatile unsigned long *) + ((((unsigned long)&strbuf->__flushflag_buf[0]) + + 63UL) + & ~63UL); + strbuf->strbuf_flushflag_pa = (unsigned long) + __pa(strbuf->strbuf_flushflag); + + /* The SYSIO SBUS control register is used for dummy reads + * in order to ensure write completion. + */ + iommu->write_complete_reg = regs + 0x2000UL; + + portid = of_getintprop_default(op->node, "portid", -1); + printk(KERN_INFO "SYSIO: UPA portID %x, at %016lx\n", + portid, regs); + + /* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */ + if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff, -1)) + goto fatal_memory_error; + + control = upa_readq(iommu->iommu_control); + control = ((7UL << 16UL) | + (0UL << 2UL) | + (1UL << 1UL) | + (1UL << 0UL)); + upa_writeq(control, iommu->iommu_control); + + /* Clean out any cruft in the IOMMU using + * diagnostic accesses. + */ + for (i = 0; i < 16; i++) { + unsigned long dram, tag; + + dram = iommu->iommu_control + (IOMMU_DRAMDIAG - IOMMU_CONTROL); + tag = iommu->iommu_control + (IOMMU_TAGDIAG - IOMMU_CONTROL); + + dram += (unsigned long)i * 8UL; + tag += (unsigned long)i * 8UL; + upa_writeq(0, dram); + upa_writeq(0, tag); + } + upa_readq(iommu->write_complete_reg); + + /* Give the TSB to SYSIO. */ + upa_writeq(__pa(iommu->page_table), iommu->iommu_tsbbase); + + /* Setup streaming buffer, DE=1 SB_EN=1 */ + control = (1UL << 1UL) | (1UL << 0UL); + upa_writeq(control, strbuf->strbuf_control); + + /* Clear out the tags using diagnostics. */ + for (i = 0; i < 16; i++) { + unsigned long ptag, ltag; + + ptag = strbuf->strbuf_control + + (STRBUF_PTAGDIAG - STRBUF_CONTROL); + ltag = strbuf->strbuf_control + + (STRBUF_LTAGDIAG - STRBUF_CONTROL); + ptag += (unsigned long)i * 8UL; + ltag += (unsigned long)i * 8UL; + + upa_writeq(0UL, ptag); + upa_writeq(0UL, ltag); + } + + /* Enable DVMA arbitration for all devices/slots. */ + control = upa_readq(iommu->write_complete_reg); + control |= 0x3fUL; + upa_writeq(control, iommu->write_complete_reg); + + /* Now some Xfire specific grot... */ + if (this_is_starfire) + starfire_hookup(portid); + + sysio_register_error_handlers(op); + return; + +fatal_memory_error: + prom_printf("sbus_iommu_init: Fatal memory allocation error.\n"); +} + +static int __init sbus_init(void) +{ + struct device_node *dp; + + for_each_node_by_name(dp, "sbus") { + struct of_device *op = of_find_device_by_node(dp); + + sbus_iommu_init(op); + of_propagate_archdata(op); + } + + return 0; +} + +subsys_initcall(sbus_init); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c new file mode 100644 index 000000000000..c8b03a4f68bf --- /dev/null +++ b/arch/sparc/kernel/setup_64.c @@ -0,0 +1,428 @@ +/* + * linux/arch/sparc64/kernel/setup.c + * + * Copyright (C) 1995,1996 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <asm/smp.h> +#include <linux/user.h> +#include <linux/screen_info.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/syscalls.h> +#include <linux/kdev_t.h> +#include <linux/major.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/inet.h> +#include <linux/console.h> +#include <linux/root_dev.h> +#include <linux/interrupt.h> +#include <linux/cpu.h> +#include <linux/initrd.h> + +#include <asm/system.h> +#include <asm/io.h> +#include <asm/processor.h> +#include <asm/oplib.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/idprom.h> +#include <asm/head.h> +#include <asm/starfire.h> +#include <asm/mmu_context.h> +#include <asm/timer.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/mmu.h> +#include <asm/ns87303.h> + +#ifdef CONFIG_IP_PNP +#include <net/ipconfig.h> +#endif + +#include "entry.h" + +/* Used to synchronize accesses to NatSemi SUPER I/O chip configure + * operations in asm/ns87303.h + */ +DEFINE_SPINLOCK(ns87303_lock); + +struct screen_info screen_info = { + 0, 0, /* orig-x, orig-y */ + 0, /* unused */ + 0, /* orig-video-page */ + 0, /* orig-video-mode */ + 128, /* orig-video-cols */ + 0, 0, 0, /* unused, ega_bx, unused */ + 54, /* orig-video-lines */ + 0, /* orig-video-isVGA */ + 16 /* orig-video-points */ +}; + +static void +prom_console_write(struct console *con, const char *s, unsigned n) +{ + prom_write(s, n); +} + +/* Exported for mm/init.c:paging_init. */ +unsigned long cmdline_memory_size = 0; + +static struct console prom_early_console = { + .name = "earlyprom", + .write = prom_console_write, + .flags = CON_PRINTBUFFER | CON_BOOT | CON_ANYTIME, + .index = -1, +}; + +/* + * Process kernel command line switches that are specific to the + * SPARC or that require special low-level processing. + */ +static void __init process_switch(char c) +{ + switch (c) { + case 'd': + case 's': + break; + case 'h': + prom_printf("boot_flags_init: Halt!\n"); + prom_halt(); + break; + case 'p': + /* Just ignore, this behavior is now the default. */ + break; + case 'P': + /* Force UltraSPARC-III P-Cache on. */ + if (tlb_type != cheetah) { + printk("BOOT: Ignoring P-Cache force option.\n"); + break; + } + cheetah_pcache_forced_on = 1; + add_taint(TAINT_MACHINE_CHECK); + cheetah_enable_pcache(); + break; + + default: + printk("Unknown boot switch (-%c)\n", c); + break; + } +} + +static void __init boot_flags_init(char *commands) +{ + while (*commands) { + /* Move to the start of the next "argument". */ + while (*commands && *commands == ' ') + commands++; + + /* Process any command switches, otherwise skip it. */ + if (*commands == '\0') + break; + if (*commands == '-') { + commands++; + while (*commands && *commands != ' ') + process_switch(*commands++); + continue; + } + if (!strncmp(commands, "mem=", 4)) { + /* + * "mem=XXX[kKmM]" overrides the PROM-reported + * memory size. + */ + cmdline_memory_size = simple_strtoul(commands + 4, + &commands, 0); + if (*commands == 'K' || *commands == 'k') { + cmdline_memory_size <<= 10; + commands++; + } else if (*commands=='M' || *commands=='m') { + cmdline_memory_size <<= 20; + commands++; + } + } + while (*commands && *commands != ' ') + commands++; + } +} + +extern unsigned short root_flags; +extern unsigned short root_dev; +extern unsigned short ram_flags; +#define RAMDISK_IMAGE_START_MASK 0x07FF +#define RAMDISK_PROMPT_FLAG 0x8000 +#define RAMDISK_LOAD_FLAG 0x4000 + +extern int root_mountflags; + +char reboot_command[COMMAND_LINE_SIZE]; + +static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 }; + +void __init per_cpu_patch(void) +{ + struct cpuid_patch_entry *p; + unsigned long ver; + int is_jbus; + + if (tlb_type == spitfire && !this_is_starfire) + return; + + is_jbus = 0; + if (tlb_type != hypervisor) { + __asm__ ("rdpr %%ver, %0" : "=r" (ver)); + is_jbus = ((ver >> 32UL) == __JALAPENO_ID || + (ver >> 32UL) == __SERRANO_ID); + } + + p = &__cpuid_patch; + while (p < &__cpuid_patch_end) { + unsigned long addr = p->addr; + unsigned int *insns; + + switch (tlb_type) { + case spitfire: + insns = &p->starfire[0]; + break; + case cheetah: + case cheetah_plus: + if (is_jbus) + insns = &p->cheetah_jbus[0]; + else + insns = &p->cheetah_safari[0]; + break; + case hypervisor: + insns = &p->sun4v[0]; + break; + default: + prom_printf("Unknown cpu type, halting.\n"); + prom_halt(); + }; + + *(unsigned int *) (addr + 0) = insns[0]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + *(unsigned int *) (addr + 4) = insns[1]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + *(unsigned int *) (addr + 8) = insns[2]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 8)); + + *(unsigned int *) (addr + 12) = insns[3]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 12)); + + p++; + } +} + +void __init sun4v_patch(void) +{ + extern void sun4v_hvapi_init(void); + struct sun4v_1insn_patch_entry *p1; + struct sun4v_2insn_patch_entry *p2; + + if (tlb_type != hypervisor) + return; + + p1 = &__sun4v_1insn_patch; + while (p1 < &__sun4v_1insn_patch_end) { + unsigned long addr = p1->addr; + + *(unsigned int *) (addr + 0) = p1->insn; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + p1++; + } + + p2 = &__sun4v_2insn_patch; + while (p2 < &__sun4v_2insn_patch_end) { + unsigned long addr = p2->addr; + + *(unsigned int *) (addr + 0) = p2->insns[0]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + *(unsigned int *) (addr + 4) = p2->insns[1]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + p2++; + } + + sun4v_hvapi_init(); +} + +#ifdef CONFIG_SMP +void __init boot_cpu_id_too_large(int cpu) +{ + prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n", + cpu, NR_CPUS); + prom_halt(); +} +#endif + +void __init setup_arch(char **cmdline_p) +{ + /* Initialize PROM console and command line. */ + *cmdline_p = prom_getbootargs(); + strcpy(boot_command_line, *cmdline_p); + parse_early_param(); + + boot_flags_init(*cmdline_p); + register_console(&prom_early_console); + + if (tlb_type == hypervisor) + printk("ARCH: SUN4V\n"); + else + printk("ARCH: SUN4U\n"); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#elif defined(CONFIG_PROM_CONSOLE) + conswitchp = &prom_con; +#endif + + idprom_init(); + + if (!root_flags) + root_mountflags &= ~MS_RDONLY; + ROOT_DEV = old_decode_dev(root_dev); +#ifdef CONFIG_BLK_DEV_RAM + rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK; + rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0); + rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0); +#endif + + task_thread_info(&init_task)->kregs = &fake_swapper_regs; + +#ifdef CONFIG_IP_PNP + if (!ic_set_manually) { + int chosen = prom_finddevice ("/chosen"); + u32 cl, sv, gw; + + cl = prom_getintdefault (chosen, "client-ip", 0); + sv = prom_getintdefault (chosen, "server-ip", 0); + gw = prom_getintdefault (chosen, "gateway-ip", 0); + if (cl && sv) { + ic_myaddr = cl; + ic_servaddr = sv; + if (gw) + ic_gateway = gw; +#if defined(CONFIG_IP_PNP_BOOTP) || defined(CONFIG_IP_PNP_RARP) + ic_proto_enabled = 0; +#endif + } + } +#endif + + /* Get boot processor trap_block[] setup. */ + init_cur_cpu_trap(current_thread_info()); + + paging_init(); +} + +/* BUFFER is PAGE_SIZE bytes long. */ + +extern void smp_info(struct seq_file *); +extern void smp_bogo(struct seq_file *); +extern void mmu_info(struct seq_file *); + +unsigned int dcache_parity_tl1_occurred; +unsigned int icache_parity_tl1_occurred; + +int ncpus_probed; + +static int show_cpuinfo(struct seq_file *m, void *__unused) +{ + seq_printf(m, + "cpu\t\t: %s\n" + "fpu\t\t: %s\n" + "prom\t\t: %s\n" + "type\t\t: %s\n" + "ncpus probed\t: %d\n" + "ncpus active\t: %d\n" + "D$ parity tl1\t: %u\n" + "I$ parity tl1\t: %u\n" +#ifndef CONFIG_SMP + "Cpu0ClkTck\t: %016lx\n" +#endif + , + sparc_cpu_type, + sparc_fpu_type, + prom_version, + ((tlb_type == hypervisor) ? + "sun4v" : + "sun4u"), + ncpus_probed, + num_online_cpus(), + dcache_parity_tl1_occurred, + icache_parity_tl1_occurred +#ifndef CONFIG_SMP + , cpu_data(0).clock_tick +#endif + ); +#ifdef CONFIG_SMP + smp_bogo(m); +#endif + mmu_info(m); +#ifdef CONFIG_SMP + smp_info(m); +#endif + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + /* The pointer we are returning is arbitrary, + * it just has to be non-NULL and not IS_ERR + * in the success case. + */ + return *pos == 0 ? &c_start : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start =c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + +extern int stop_a_enabled; + +void sun_do_break(void) +{ + if (!stop_a_enabled) + return; + + prom_printf("\n"); + flush_user_windows(); + + prom_cmdline(); +} + +int stop_a_enabled = 1; diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c new file mode 100644 index 000000000000..ba5b09ad6666 --- /dev/null +++ b/arch/sparc/kernel/signal32.c @@ -0,0 +1,899 @@ +/* arch/sparc64/kernel/signal32.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx) + * Copyright (C) 1997 Eddie C. Dost (ecd@skynet.be) + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/wait.h> +#include <linux/ptrace.h> +#include <linux/unistd.h> +#include <linux/mm.h> +#include <linux/tty.h> +#include <linux/binfmts.h> +#include <linux/compat.h> +#include <linux/bitops.h> +#include <linux/tracehook.h> + +#include <asm/uaccess.h> +#include <asm/ptrace.h> +#include <asm/pgtable.h> +#include <asm/psrcompat.h> +#include <asm/fpumacro.h> +#include <asm/visasm.h> +#include <asm/compat_signal.h> + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +/* This magic should be in g_upper[0] for all upper parts + * to be valid. + */ +#define SIGINFO_EXTRA_V8PLUS_MAGIC 0x130e269 +typedef struct { + unsigned int g_upper[8]; + unsigned int o_upper[8]; + unsigned int asi; +} siginfo_extra_v8plus_t; + +struct signal_frame32 { + struct sparc_stackf32 ss; + __siginfo32_t info; + /* __siginfo_fpu32_t * */ u32 fpu_save; + unsigned int insns[2]; + unsigned int extramask[_COMPAT_NSIG_WORDS - 1]; + unsigned int extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */ + /* Only valid if (info.si_regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */ + siginfo_extra_v8plus_t v8plus; + __siginfo_fpu_t fpu_state; +}; + +typedef struct compat_siginfo{ + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[SI_PAD_SIZE32]; + + /* kill() */ + struct { + compat_pid_t _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + } _timer; + + /* POSIX.1b signals */ + struct { + compat_pid_t _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + compat_pid_t _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */ + struct { + u32 _addr; /* faulting insn/memory ref. */ + int _trapno; + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +}compat_siginfo_t; + +struct rt_signal_frame32 { + struct sparc_stackf32 ss; + compat_siginfo_t info; + struct pt_regs32 regs; + compat_sigset_t mask; + /* __siginfo_fpu32_t * */ u32 fpu_save; + unsigned int insns[2]; + stack_t32 stack; + unsigned int extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */ + /* Only valid if (regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */ + siginfo_extra_v8plus_t v8plus; + __siginfo_fpu_t fpu_state; +}; + +/* Align macros */ +#define SF_ALIGNEDSZ (((sizeof(struct signal_frame32) + 7) & (~7))) +#define RT_ALIGNEDSZ (((sizeof(struct rt_signal_frame32) + 7) & (~7))) + +int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) +{ + int err; + + if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) + return -EFAULT; + + /* If you change siginfo_t structure, please be sure + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. + This routine must convert siginfo from 64bit to 32bit as well + at the same time. */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + if (from->si_code < 0) + err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); + else { + switch (from->si_code >> 16) { + case __SI_TIMER >> 16: + err |= __put_user(from->si_tid, &to->si_tid); + err |= __put_user(from->si_overrun, &to->si_overrun); + err |= __put_user(from->si_int, &to->si_int); + break; + case __SI_CHLD >> 16: + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + err |= __put_user(from->si_status, &to->si_status); + default: + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + break; + case __SI_FAULT >> 16: + err |= __put_user(from->si_trapno, &to->si_trapno); + err |= __put_user((unsigned long)from->si_addr, &to->si_addr); + break; + case __SI_POLL >> 16: + err |= __put_user(from->si_band, &to->si_band); + err |= __put_user(from->si_fd, &to->si_fd); + break; + case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(from->si_int, &to->si_int); + break; + } + } + return err; +} + +/* CAUTION: This is just a very minimalist implementation for the + * sake of compat_sys_rt_sigqueueinfo() + */ +int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) +{ + if (!access_ok(VERIFY_WRITE, from, sizeof(compat_siginfo_t))) + return -EFAULT; + + if (copy_from_user(to, from, 3*sizeof(int)) || + copy_from_user(to->_sifields._pad, from->_sifields._pad, + SI_PAD_SIZE)) + return -EFAULT; + + return 0; +} + +static int restore_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) +{ + unsigned long *fpregs = current_thread_info()->fpregs; + unsigned long fprs; + int err; + + err = __get_user(fprs, &fpu->si_fprs); + fprs_write(0); + regs->tstate &= ~TSTATE_PEF; + if (fprs & FPRS_DL) + err |= copy_from_user(fpregs, &fpu->si_float_regs[0], (sizeof(unsigned int) * 32)); + if (fprs & FPRS_DU) + err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32], (sizeof(unsigned int) * 32)); + err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr); + err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr); + current_thread_info()->fpsaved[0] |= fprs; + return err; +} + +void do_sigreturn32(struct pt_regs *regs) +{ + struct signal_frame32 __user *sf; + unsigned int psr; + unsigned pc, npc, fpu_save; + sigset_t set; + unsigned seta[_COMPAT_NSIG_WORDS]; + int err, i; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + synchronize_user_stack(); + + regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL; + sf = (struct signal_frame32 __user *) regs->u_regs[UREG_FP]; + + /* 1. Make sure we are not getting garbage from the user */ + if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) || + (((unsigned long) sf) & 3)) + goto segv; + + get_user(pc, &sf->info.si_regs.pc); + __get_user(npc, &sf->info.si_regs.npc); + + if ((pc | npc) & 3) + goto segv; + + if (test_thread_flag(TIF_32BIT)) { + pc &= 0xffffffff; + npc &= 0xffffffff; + } + regs->tpc = pc; + regs->tnpc = npc; + + /* 2. Restore the state */ + err = __get_user(regs->y, &sf->info.si_regs.y); + err |= __get_user(psr, &sf->info.si_regs.psr); + + for (i = UREG_G1; i <= UREG_I7; i++) + err |= __get_user(regs->u_regs[i], &sf->info.si_regs.u_regs[i]); + if ((psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS) { + err |= __get_user(i, &sf->v8plus.g_upper[0]); + if (i == SIGINFO_EXTRA_V8PLUS_MAGIC) { + unsigned long asi; + + for (i = UREG_G1; i <= UREG_I7; i++) + err |= __get_user(((u32 *)regs->u_regs)[2*i], &sf->v8plus.g_upper[i]); + err |= __get_user(asi, &sf->v8plus.asi); + regs->tstate &= ~TSTATE_ASI; + regs->tstate |= ((asi & 0xffUL) << 24UL); + } + } + + /* User can only change condition codes in %tstate. */ + regs->tstate &= ~(TSTATE_ICC|TSTATE_XCC); + regs->tstate |= psr_to_tstate_icc(psr); + + /* Prevent syscall restart. */ + pt_regs_clear_syscall(regs); + + err |= __get_user(fpu_save, &sf->fpu_save); + if (fpu_save) + err |= restore_fpu_state32(regs, &sf->fpu_state); + err |= __get_user(seta[0], &sf->info.si_mask); + err |= copy_from_user(seta+1, &sf->extramask, + (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int)); + if (err) + goto segv; + switch (_NSIG_WORDS) { + case 4: set.sig[3] = seta[6] + (((long)seta[7]) << 32); + case 3: set.sig[2] = seta[4] + (((long)seta[5]) << 32); + case 2: set.sig[1] = seta[2] + (((long)seta[3]) << 32); + case 1: set.sig[0] = seta[0] + (((long)seta[1]) << 32); + } + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + return; + +segv: + force_sig(SIGSEGV, current); +} + +asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) +{ + struct rt_signal_frame32 __user *sf; + unsigned int psr, pc, npc, fpu_save, u_ss_sp; + mm_segment_t old_fs; + sigset_t set; + compat_sigset_t seta; + stack_t st; + int err, i; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + synchronize_user_stack(); + regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL; + sf = (struct rt_signal_frame32 __user *) regs->u_regs[UREG_FP]; + + /* 1. Make sure we are not getting garbage from the user */ + if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) || + (((unsigned long) sf) & 3)) + goto segv; + + get_user(pc, &sf->regs.pc); + __get_user(npc, &sf->regs.npc); + + if ((pc | npc) & 3) + goto segv; + + if (test_thread_flag(TIF_32BIT)) { + pc &= 0xffffffff; + npc &= 0xffffffff; + } + regs->tpc = pc; + regs->tnpc = npc; + + /* 2. Restore the state */ + err = __get_user(regs->y, &sf->regs.y); + err |= __get_user(psr, &sf->regs.psr); + + for (i = UREG_G1; i <= UREG_I7; i++) + err |= __get_user(regs->u_regs[i], &sf->regs.u_regs[i]); + if ((psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS) { + err |= __get_user(i, &sf->v8plus.g_upper[0]); + if (i == SIGINFO_EXTRA_V8PLUS_MAGIC) { + unsigned long asi; + + for (i = UREG_G1; i <= UREG_I7; i++) + err |= __get_user(((u32 *)regs->u_regs)[2*i], &sf->v8plus.g_upper[i]); + err |= __get_user(asi, &sf->v8plus.asi); + regs->tstate &= ~TSTATE_ASI; + regs->tstate |= ((asi & 0xffUL) << 24UL); + } + } + + /* User can only change condition codes in %tstate. */ + regs->tstate &= ~(TSTATE_ICC|TSTATE_XCC); + regs->tstate |= psr_to_tstate_icc(psr); + + /* Prevent syscall restart. */ + pt_regs_clear_syscall(regs); + + err |= __get_user(fpu_save, &sf->fpu_save); + if (fpu_save) + err |= restore_fpu_state32(regs, &sf->fpu_state); + err |= copy_from_user(&seta, &sf->mask, sizeof(compat_sigset_t)); + err |= __get_user(u_ss_sp, &sf->stack.ss_sp); + st.ss_sp = compat_ptr(u_ss_sp); + err |= __get_user(st.ss_flags, &sf->stack.ss_flags); + err |= __get_user(st.ss_size, &sf->stack.ss_size); + if (err) + goto segv; + + /* It is more difficult to avoid calling this function than to + call it and ignore errors. */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + do_sigaltstack((stack_t __user *) &st, NULL, (unsigned long)sf); + set_fs(old_fs); + + switch (_NSIG_WORDS) { + case 4: set.sig[3] = seta.sig[6] + (((long)seta.sig[7]) << 32); + case 3: set.sig[2] = seta.sig[4] + (((long)seta.sig[5]) << 32); + case 2: set.sig[1] = seta.sig[2] + (((long)seta.sig[3]) << 32); + case 1: set.sig[0] = seta.sig[0] + (((long)seta.sig[1]) << 32); + } + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + return; +segv: + force_sig(SIGSEGV, current); +} + +/* Checks if the fp is valid */ +static int invalid_frame_pointer(void __user *fp, int fplen) +{ + if ((((unsigned long) fp) & 7) || ((unsigned long)fp) > 0x100000000ULL - fplen) + return 1; + return 0; +} + +static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, unsigned long framesize) +{ + unsigned long sp; + + regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL; + sp = regs->u_regs[UREG_FP]; + + /* + * If we are on the alternate signal stack and would overflow it, don't. + * Return an always-bogus address instead so we will die with SIGSEGV. + */ + if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize))) + return (void __user *) -1L; + + /* This is the X/Open sanctioned signal stack switching. */ + if (sa->sa_flags & SA_ONSTACK) { + if (sas_ss_flags(sp) == 0) + sp = current->sas_ss_sp + current->sas_ss_size; + } + + /* Always align the stack frame. This handles two cases. First, + * sigaltstack need not be mindful of platform specific stack + * alignment. Second, if we took this signal because the stack + * is not aligned properly, we'd like to take the signal cleanly + * and report that. + */ + sp &= ~7UL; + + return (void __user *)(sp - framesize); +} + +static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) +{ + unsigned long *fpregs = current_thread_info()->fpregs; + unsigned long fprs; + int err = 0; + + fprs = current_thread_info()->fpsaved[0]; + if (fprs & FPRS_DL) + err |= copy_to_user(&fpu->si_float_regs[0], fpregs, + (sizeof(unsigned int) * 32)); + if (fprs & FPRS_DU) + err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16, + (sizeof(unsigned int) * 32)); + err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr); + err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr); + err |= __put_user(fprs, &fpu->si_fprs); + + return err; +} + +static void setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, + int signo, sigset_t *oldset) +{ + struct signal_frame32 __user *sf; + int sigframe_size; + u32 psr; + int i, err; + unsigned int seta[_COMPAT_NSIG_WORDS]; + + /* 1. Make sure everything is clean */ + synchronize_user_stack(); + save_and_clear_fpu(); + + sigframe_size = SF_ALIGNEDSZ; + if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) + sigframe_size -= sizeof(__siginfo_fpu_t); + + sf = (struct signal_frame32 __user *) + get_sigframe(&ka->sa, regs, sigframe_size); + + if (invalid_frame_pointer(sf, sigframe_size)) + goto sigill; + + if (get_thread_wsaved() != 0) + goto sigill; + + /* 2. Save the current process state */ + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + err = put_user(regs->tpc, &sf->info.si_regs.pc); + err |= __put_user(regs->tnpc, &sf->info.si_regs.npc); + err |= __put_user(regs->y, &sf->info.si_regs.y); + psr = tstate_to_psr(regs->tstate); + if (current_thread_info()->fpsaved[0] & FPRS_FEF) + psr |= PSR_EF; + err |= __put_user(psr, &sf->info.si_regs.psr); + for (i = 0; i < 16; i++) + err |= __put_user(regs->u_regs[i], &sf->info.si_regs.u_regs[i]); + err |= __put_user(sizeof(siginfo_extra_v8plus_t), &sf->extra_size); + err |= __put_user(SIGINFO_EXTRA_V8PLUS_MAGIC, &sf->v8plus.g_upper[0]); + for (i = 1; i < 16; i++) + err |= __put_user(((u32 *)regs->u_regs)[2*i], + &sf->v8plus.g_upper[i]); + err |= __put_user((regs->tstate & TSTATE_ASI) >> 24UL, + &sf->v8plus.asi); + + if (psr & PSR_EF) { + err |= save_fpu_state32(regs, &sf->fpu_state); + err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save); + } else { + err |= __put_user(0, &sf->fpu_save); + } + + switch (_NSIG_WORDS) { + case 4: seta[7] = (oldset->sig[3] >> 32); + seta[6] = oldset->sig[3]; + case 3: seta[5] = (oldset->sig[2] >> 32); + seta[4] = oldset->sig[2]; + case 2: seta[3] = (oldset->sig[1] >> 32); + seta[2] = oldset->sig[1]; + case 1: seta[1] = (oldset->sig[0] >> 32); + seta[0] = oldset->sig[0]; + } + err |= __put_user(seta[0], &sf->info.si_mask); + err |= __copy_to_user(sf->extramask, seta + 1, + (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int)); + + err |= copy_in_user((u32 __user *)sf, + (u32 __user *)(regs->u_regs[UREG_FP]), + sizeof(struct reg_window32)); + + if (err) + goto sigsegv; + + /* 3. signal handler back-trampoline and parameters */ + regs->u_regs[UREG_FP] = (unsigned long) sf; + regs->u_regs[UREG_I0] = signo; + regs->u_regs[UREG_I1] = (unsigned long) &sf->info; + regs->u_regs[UREG_I2] = (unsigned long) &sf->info; + + /* 4. signal handler */ + regs->tpc = (unsigned long) ka->sa.sa_handler; + regs->tnpc = (regs->tpc + 4); + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + + /* 5. return to kernel instructions */ + if (ka->ka_restorer) { + regs->u_regs[UREG_I7] = (unsigned long)ka->ka_restorer; + } else { + /* Flush instruction space. */ + unsigned long address = ((unsigned long)&(sf->insns[0])); + pgd_t *pgdp = pgd_offset(current->mm, address); + pud_t *pudp = pud_offset(pgdp, address); + pmd_t *pmdp = pmd_offset(pudp, address); + pte_t *ptep; + pte_t pte; + + regs->u_regs[UREG_I7] = (unsigned long) (&(sf->insns[0]) - 2); + + err = __put_user(0x821020d8, &sf->insns[0]); /*mov __NR_sigreturn, %g1*/ + err |= __put_user(0x91d02010, &sf->insns[1]); /*t 0x10*/ + if (err) + goto sigsegv; + + preempt_disable(); + ptep = pte_offset_map(pmdp, address); + pte = *ptep; + if (pte_present(pte)) { + unsigned long page = (unsigned long) + page_address(pte_page(pte)); + + wmb(); + __asm__ __volatile__("flush %0 + %1" + : /* no outputs */ + : "r" (page), + "r" (address & (PAGE_SIZE - 1)) + : "memory"); + } + pte_unmap(ptep); + preempt_enable(); + } + return; + +sigill: + do_exit(SIGILL); +sigsegv: + force_sigsegv(signo, current); +} + +static void setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs, + unsigned long signr, sigset_t *oldset, + siginfo_t *info) +{ + struct rt_signal_frame32 __user *sf; + int sigframe_size; + u32 psr; + int i, err; + compat_sigset_t seta; + + /* 1. Make sure everything is clean */ + synchronize_user_stack(); + save_and_clear_fpu(); + + sigframe_size = RT_ALIGNEDSZ; + if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) + sigframe_size -= sizeof(__siginfo_fpu_t); + + sf = (struct rt_signal_frame32 __user *) + get_sigframe(&ka->sa, regs, sigframe_size); + + if (invalid_frame_pointer(sf, sigframe_size)) + goto sigill; + + if (get_thread_wsaved() != 0) + goto sigill; + + /* 2. Save the current process state */ + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + err = put_user(regs->tpc, &sf->regs.pc); + err |= __put_user(regs->tnpc, &sf->regs.npc); + err |= __put_user(regs->y, &sf->regs.y); + psr = tstate_to_psr(regs->tstate); + if (current_thread_info()->fpsaved[0] & FPRS_FEF) + psr |= PSR_EF; + err |= __put_user(psr, &sf->regs.psr); + for (i = 0; i < 16; i++) + err |= __put_user(regs->u_regs[i], &sf->regs.u_regs[i]); + err |= __put_user(sizeof(siginfo_extra_v8plus_t), &sf->extra_size); + err |= __put_user(SIGINFO_EXTRA_V8PLUS_MAGIC, &sf->v8plus.g_upper[0]); + for (i = 1; i < 16; i++) + err |= __put_user(((u32 *)regs->u_regs)[2*i], + &sf->v8plus.g_upper[i]); + err |= __put_user((regs->tstate & TSTATE_ASI) >> 24UL, + &sf->v8plus.asi); + + if (psr & PSR_EF) { + err |= save_fpu_state32(regs, &sf->fpu_state); + err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save); + } else { + err |= __put_user(0, &sf->fpu_save); + } + + /* Update the siginfo structure. */ + err |= copy_siginfo_to_user32(&sf->info, info); + + /* Setup sigaltstack */ + err |= __put_user(current->sas_ss_sp, &sf->stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->u_regs[UREG_FP]), &sf->stack.ss_flags); + err |= __put_user(current->sas_ss_size, &sf->stack.ss_size); + + switch (_NSIG_WORDS) { + case 4: seta.sig[7] = (oldset->sig[3] >> 32); + seta.sig[6] = oldset->sig[3]; + case 3: seta.sig[5] = (oldset->sig[2] >> 32); + seta.sig[4] = oldset->sig[2]; + case 2: seta.sig[3] = (oldset->sig[1] >> 32); + seta.sig[2] = oldset->sig[1]; + case 1: seta.sig[1] = (oldset->sig[0] >> 32); + seta.sig[0] = oldset->sig[0]; + } + err |= __copy_to_user(&sf->mask, &seta, sizeof(compat_sigset_t)); + + err |= copy_in_user((u32 __user *)sf, + (u32 __user *)(regs->u_regs[UREG_FP]), + sizeof(struct reg_window32)); + if (err) + goto sigsegv; + + /* 3. signal handler back-trampoline and parameters */ + regs->u_regs[UREG_FP] = (unsigned long) sf; + regs->u_regs[UREG_I0] = signr; + regs->u_regs[UREG_I1] = (unsigned long) &sf->info; + regs->u_regs[UREG_I2] = (unsigned long) &sf->regs; + + /* 4. signal handler */ + regs->tpc = (unsigned long) ka->sa.sa_handler; + regs->tnpc = (regs->tpc + 4); + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + + /* 5. return to kernel instructions */ + if (ka->ka_restorer) + regs->u_regs[UREG_I7] = (unsigned long)ka->ka_restorer; + else { + /* Flush instruction space. */ + unsigned long address = ((unsigned long)&(sf->insns[0])); + pgd_t *pgdp = pgd_offset(current->mm, address); + pud_t *pudp = pud_offset(pgdp, address); + pmd_t *pmdp = pmd_offset(pudp, address); + pte_t *ptep; + + regs->u_regs[UREG_I7] = (unsigned long) (&(sf->insns[0]) - 2); + + /* mov __NR_rt_sigreturn, %g1 */ + err |= __put_user(0x82102065, &sf->insns[0]); + + /* t 0x10 */ + err |= __put_user(0x91d02010, &sf->insns[1]); + if (err) + goto sigsegv; + + preempt_disable(); + ptep = pte_offset_map(pmdp, address); + if (pte_present(*ptep)) { + unsigned long page = (unsigned long) + page_address(pte_page(*ptep)); + + wmb(); + __asm__ __volatile__("flush %0 + %1" + : /* no outputs */ + : "r" (page), + "r" (address & (PAGE_SIZE - 1)) + : "memory"); + } + pte_unmap(ptep); + preempt_enable(); + } + return; + +sigill: + do_exit(SIGILL); +sigsegv: + force_sigsegv(signr, current); +} + +static inline void handle_signal32(unsigned long signr, struct k_sigaction *ka, + siginfo_t *info, + sigset_t *oldset, struct pt_regs *regs) +{ + if (ka->sa.sa_flags & SA_SIGINFO) + setup_rt_frame32(ka, regs, signr, oldset, info); + else + setup_frame32(ka, regs, signr, oldset); + + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NOMASK)) + sigaddset(¤t->blocked,signr); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); +} + +static inline void syscall_restart32(unsigned long orig_i0, struct pt_regs *regs, + struct sigaction *sa) +{ + switch (regs->u_regs[UREG_I0]) { + case ERESTART_RESTARTBLOCK: + case ERESTARTNOHAND: + no_system_call_restart: + regs->u_regs[UREG_I0] = EINTR; + regs->tstate |= TSTATE_ICARRY; + break; + case ERESTARTSYS: + if (!(sa->sa_flags & SA_RESTART)) + goto no_system_call_restart; + /* fallthrough */ + case ERESTARTNOINTR: + regs->u_regs[UREG_I0] = orig_i0; + regs->tpc -= 4; + regs->tnpc -= 4; + } +} + +/* Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + */ +void do_signal32(sigset_t *oldset, struct pt_regs * regs, + int restart_syscall, unsigned long orig_i0) +{ + struct k_sigaction ka; + siginfo_t info; + int signr; + + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + + /* If the debugger messes with the program counter, it clears + * the "in syscall" bit, directing us to not perform a syscall + * restart. + */ + if (restart_syscall && !pt_regs_is_syscall(regs)) + restart_syscall = 0; + + if (signr > 0) { + if (restart_syscall) + syscall_restart32(orig_i0, regs, &ka.sa); + handle_signal32(signr, &ka, &info, oldset, regs); + + /* A signal was successfully delivered; the saved + * sigmask will have been stored in the signal frame, + * and will be restored by sigreturn, so we can simply + * clear the TS_RESTORE_SIGMASK flag. + */ + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; + + tracehook_signal_handler(signr, &info, &ka, regs, 0); + return; + } + if (restart_syscall && + (regs->u_regs[UREG_I0] == ERESTARTNOHAND || + regs->u_regs[UREG_I0] == ERESTARTSYS || + regs->u_regs[UREG_I0] == ERESTARTNOINTR)) { + /* replay the system call when we are done */ + regs->u_regs[UREG_I0] = orig_i0; + regs->tpc -= 4; + regs->tnpc -= 4; + } + if (restart_syscall && + regs->u_regs[UREG_I0] == ERESTART_RESTARTBLOCK) { + regs->u_regs[UREG_G1] = __NR_restart_syscall; + regs->tpc -= 4; + regs->tnpc -= 4; + } + + /* If there's no signal to deliver, we just put the saved sigmask + * back + */ + if (current_thread_info()->status & TS_RESTORE_SIGMASK) { + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } +} + +struct sigstack32 { + u32 the_stack; + int cur_status; +}; + +asmlinkage int do_sys32_sigstack(u32 u_ssptr, u32 u_ossptr, unsigned long sp) +{ + struct sigstack32 __user *ssptr = + (struct sigstack32 __user *)((unsigned long)(u_ssptr)); + struct sigstack32 __user *ossptr = + (struct sigstack32 __user *)((unsigned long)(u_ossptr)); + int ret = -EFAULT; + + /* First see if old state is wanted. */ + if (ossptr) { + if (put_user(current->sas_ss_sp + current->sas_ss_size, + &ossptr->the_stack) || + __put_user(on_sig_stack(sp), &ossptr->cur_status)) + goto out; + } + + /* Now see if we want to update the new state. */ + if (ssptr) { + u32 ss_sp; + + if (get_user(ss_sp, &ssptr->the_stack)) + goto out; + + /* If the current stack was set with sigaltstack, don't + * swap stacks while we are on it. + */ + ret = -EPERM; + if (current->sas_ss_sp && on_sig_stack(sp)) + goto out; + + /* Since we don't know the extent of the stack, and we don't + * track onstack-ness, but rather calculate it, we must + * presume a size. Ho hum this interface is lossy. + */ + current->sas_ss_sp = (unsigned long)ss_sp - SIGSTKSZ; + current->sas_ss_size = SIGSTKSZ; + } + + ret = 0; +out: + return ret; +} + +asmlinkage long do_sys32_sigaltstack(u32 ussa, u32 uossa, unsigned long sp) +{ + stack_t uss, uoss; + u32 u_ss_sp = 0; + int ret; + mm_segment_t old_fs; + stack_t32 __user *uss32 = compat_ptr(ussa); + stack_t32 __user *uoss32 = compat_ptr(uossa); + + if (ussa && (get_user(u_ss_sp, &uss32->ss_sp) || + __get_user(uss.ss_flags, &uss32->ss_flags) || + __get_user(uss.ss_size, &uss32->ss_size))) + return -EFAULT; + uss.ss_sp = compat_ptr(u_ss_sp); + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = do_sigaltstack(ussa ? (stack_t __user *) &uss : NULL, + uossa ? (stack_t __user *) &uoss : NULL, sp); + set_fs(old_fs); + if (!ret && uossa && (put_user(ptr_to_compat(uoss.ss_sp), &uoss32->ss_sp) || + __put_user(uoss.ss_flags, &uoss32->ss_flags) || + __put_user(uoss.ss_size, &uoss32->ss_size))) + return -EFAULT; + return ret; +} diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c new file mode 100644 index 000000000000..ec82d76dc6f2 --- /dev/null +++ b/arch/sparc/kernel/signal_64.c @@ -0,0 +1,617 @@ +/* + * arch/sparc64/kernel/signal.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1995, 2008 David S. Miller (davem@davemloft.net) + * Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx) + * Copyright (C) 1997 Eddie C. Dost (ecd@skynet.be) + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + */ + +#ifdef CONFIG_COMPAT +#include <linux/compat.h> /* for compat_old_sigset_t */ +#endif +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/wait.h> +#include <linux/ptrace.h> +#include <linux/tracehook.h> +#include <linux/unistd.h> +#include <linux/mm.h> +#include <linux/tty.h> +#include <linux/binfmts.h> +#include <linux/bitops.h> + +#include <asm/uaccess.h> +#include <asm/ptrace.h> +#include <asm/pgtable.h> +#include <asm/fpumacro.h> +#include <asm/uctx.h> +#include <asm/siginfo.h> +#include <asm/visasm.h> + +#include "entry.h" +#include "systbls.h" + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +/* {set, get}context() needed for 64-bit SparcLinux userland. */ +asmlinkage void sparc64_set_context(struct pt_regs *regs) +{ + struct ucontext __user *ucp = (struct ucontext __user *) + regs->u_regs[UREG_I0]; + mc_gregset_t __user *grp; + unsigned long pc, npc, tstate; + unsigned long fp, i7; + unsigned char fenab; + int err; + + flush_user_windows(); + if (get_thread_wsaved() || + (((unsigned long)ucp) & (sizeof(unsigned long)-1)) || + (!__access_ok(ucp, sizeof(*ucp)))) + goto do_sigsegv; + grp = &ucp->uc_mcontext.mc_gregs; + err = __get_user(pc, &((*grp)[MC_PC])); + err |= __get_user(npc, &((*grp)[MC_NPC])); + if (err || ((pc | npc) & 3)) + goto do_sigsegv; + if (regs->u_regs[UREG_I1]) { + sigset_t set; + + if (_NSIG_WORDS == 1) { + if (__get_user(set.sig[0], &ucp->uc_sigmask.sig[0])) + goto do_sigsegv; + } else { + if (__copy_from_user(&set, &ucp->uc_sigmask, sizeof(sigset_t))) + goto do_sigsegv; + } + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + } + if (test_thread_flag(TIF_32BIT)) { + pc &= 0xffffffff; + npc &= 0xffffffff; + } + regs->tpc = pc; + regs->tnpc = npc; + err |= __get_user(regs->y, &((*grp)[MC_Y])); + err |= __get_user(tstate, &((*grp)[MC_TSTATE])); + regs->tstate &= ~(TSTATE_ASI | TSTATE_ICC | TSTATE_XCC); + regs->tstate |= (tstate & (TSTATE_ASI | TSTATE_ICC | TSTATE_XCC)); + err |= __get_user(regs->u_regs[UREG_G1], (&(*grp)[MC_G1])); + err |= __get_user(regs->u_regs[UREG_G2], (&(*grp)[MC_G2])); + err |= __get_user(regs->u_regs[UREG_G3], (&(*grp)[MC_G3])); + err |= __get_user(regs->u_regs[UREG_G4], (&(*grp)[MC_G4])); + err |= __get_user(regs->u_regs[UREG_G5], (&(*grp)[MC_G5])); + err |= __get_user(regs->u_regs[UREG_G6], (&(*grp)[MC_G6])); + + /* Skip %g7 as that's the thread register in userspace. */ + + err |= __get_user(regs->u_regs[UREG_I0], (&(*grp)[MC_O0])); + err |= __get_user(regs->u_regs[UREG_I1], (&(*grp)[MC_O1])); + err |= __get_user(regs->u_regs[UREG_I2], (&(*grp)[MC_O2])); + err |= __get_user(regs->u_regs[UREG_I3], (&(*grp)[MC_O3])); + err |= __get_user(regs->u_regs[UREG_I4], (&(*grp)[MC_O4])); + err |= __get_user(regs->u_regs[UREG_I5], (&(*grp)[MC_O5])); + err |= __get_user(regs->u_regs[UREG_I6], (&(*grp)[MC_O6])); + err |= __get_user(regs->u_regs[UREG_I7], (&(*grp)[MC_O7])); + + err |= __get_user(fp, &(ucp->uc_mcontext.mc_fp)); + err |= __get_user(i7, &(ucp->uc_mcontext.mc_i7)); + err |= __put_user(fp, + (&(((struct reg_window __user *)(STACK_BIAS+regs->u_regs[UREG_I6]))->ins[6]))); + err |= __put_user(i7, + (&(((struct reg_window __user *)(STACK_BIAS+regs->u_regs[UREG_I6]))->ins[7]))); + + err |= __get_user(fenab, &(ucp->uc_mcontext.mc_fpregs.mcfpu_enab)); + if (fenab) { + unsigned long *fpregs = current_thread_info()->fpregs; + unsigned long fprs; + + fprs_write(0); + err |= __get_user(fprs, &(ucp->uc_mcontext.mc_fpregs.mcfpu_fprs)); + if (fprs & FPRS_DL) + err |= copy_from_user(fpregs, + &(ucp->uc_mcontext.mc_fpregs.mcfpu_fregs), + (sizeof(unsigned int) * 32)); + if (fprs & FPRS_DU) + err |= copy_from_user(fpregs+16, + ((unsigned long __user *)&(ucp->uc_mcontext.mc_fpregs.mcfpu_fregs))+16, + (sizeof(unsigned int) * 32)); + err |= __get_user(current_thread_info()->xfsr[0], + &(ucp->uc_mcontext.mc_fpregs.mcfpu_fsr)); + err |= __get_user(current_thread_info()->gsr[0], + &(ucp->uc_mcontext.mc_fpregs.mcfpu_gsr)); + regs->tstate &= ~TSTATE_PEF; + } + if (err) + goto do_sigsegv; + + return; +do_sigsegv: + force_sig(SIGSEGV, current); +} + +asmlinkage void sparc64_get_context(struct pt_regs *regs) +{ + struct ucontext __user *ucp = (struct ucontext __user *) + regs->u_regs[UREG_I0]; + mc_gregset_t __user *grp; + mcontext_t __user *mcp; + unsigned long fp, i7; + unsigned char fenab; + int err; + + synchronize_user_stack(); + if (get_thread_wsaved() || clear_user(ucp, sizeof(*ucp))) + goto do_sigsegv; + +#if 1 + fenab = 0; /* IMO get_context is like any other system call, thus modifies FPU state -jj */ +#else + fenab = (current_thread_info()->fpsaved[0] & FPRS_FEF); +#endif + + mcp = &ucp->uc_mcontext; + grp = &mcp->mc_gregs; + + /* Skip over the trap instruction, first. */ + if (test_thread_flag(TIF_32BIT)) { + regs->tpc = (regs->tnpc & 0xffffffff); + regs->tnpc = (regs->tnpc + 4) & 0xffffffff; + } else { + regs->tpc = regs->tnpc; + regs->tnpc += 4; + } + err = 0; + if (_NSIG_WORDS == 1) + err |= __put_user(current->blocked.sig[0], + (unsigned long __user *)&ucp->uc_sigmask); + else + err |= __copy_to_user(&ucp->uc_sigmask, ¤t->blocked, + sizeof(sigset_t)); + + err |= __put_user(regs->tstate, &((*grp)[MC_TSTATE])); + err |= __put_user(regs->tpc, &((*grp)[MC_PC])); + err |= __put_user(regs->tnpc, &((*grp)[MC_NPC])); + err |= __put_user(regs->y, &((*grp)[MC_Y])); + err |= __put_user(regs->u_regs[UREG_G1], &((*grp)[MC_G1])); + err |= __put_user(regs->u_regs[UREG_G2], &((*grp)[MC_G2])); + err |= __put_user(regs->u_regs[UREG_G3], &((*grp)[MC_G3])); + err |= __put_user(regs->u_regs[UREG_G4], &((*grp)[MC_G4])); + err |= __put_user(regs->u_regs[UREG_G5], &((*grp)[MC_G5])); + err |= __put_user(regs->u_regs[UREG_G6], &((*grp)[MC_G6])); + err |= __put_user(regs->u_regs[UREG_G7], &((*grp)[MC_G7])); + err |= __put_user(regs->u_regs[UREG_I0], &((*grp)[MC_O0])); + err |= __put_user(regs->u_regs[UREG_I1], &((*grp)[MC_O1])); + err |= __put_user(regs->u_regs[UREG_I2], &((*grp)[MC_O2])); + err |= __put_user(regs->u_regs[UREG_I3], &((*grp)[MC_O3])); + err |= __put_user(regs->u_regs[UREG_I4], &((*grp)[MC_O4])); + err |= __put_user(regs->u_regs[UREG_I5], &((*grp)[MC_O5])); + err |= __put_user(regs->u_regs[UREG_I6], &((*grp)[MC_O6])); + err |= __put_user(regs->u_regs[UREG_I7], &((*grp)[MC_O7])); + + err |= __get_user(fp, + (&(((struct reg_window __user *)(STACK_BIAS+regs->u_regs[UREG_I6]))->ins[6]))); + err |= __get_user(i7, + (&(((struct reg_window __user *)(STACK_BIAS+regs->u_regs[UREG_I6]))->ins[7]))); + err |= __put_user(fp, &(mcp->mc_fp)); + err |= __put_user(i7, &(mcp->mc_i7)); + + err |= __put_user(fenab, &(mcp->mc_fpregs.mcfpu_enab)); + if (fenab) { + unsigned long *fpregs = current_thread_info()->fpregs; + unsigned long fprs; + + fprs = current_thread_info()->fpsaved[0]; + if (fprs & FPRS_DL) + err |= copy_to_user(&(mcp->mc_fpregs.mcfpu_fregs), fpregs, + (sizeof(unsigned int) * 32)); + if (fprs & FPRS_DU) + err |= copy_to_user( + ((unsigned long __user *)&(mcp->mc_fpregs.mcfpu_fregs))+16, fpregs+16, + (sizeof(unsigned int) * 32)); + err |= __put_user(current_thread_info()->xfsr[0], &(mcp->mc_fpregs.mcfpu_fsr)); + err |= __put_user(current_thread_info()->gsr[0], &(mcp->mc_fpregs.mcfpu_gsr)); + err |= __put_user(fprs, &(mcp->mc_fpregs.mcfpu_fprs)); + } + if (err) + goto do_sigsegv; + + return; +do_sigsegv: + force_sig(SIGSEGV, current); +} + +struct rt_signal_frame { + struct sparc_stackf ss; + siginfo_t info; + struct pt_regs regs; + __siginfo_fpu_t __user *fpu_save; + stack_t stack; + sigset_t mask; + __siginfo_fpu_t fpu_state; +}; + +static long _sigpause_common(old_sigset_t set) +{ + set &= _BLOCKABLE; + spin_lock_irq(¤t->sighand->siglock); + current->saved_sigmask = current->blocked; + siginitset(¤t->blocked, set); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + current->state = TASK_INTERRUPTIBLE; + schedule(); + + set_restore_sigmask(); + + return -ERESTARTNOHAND; +} + +asmlinkage long sys_sigpause(unsigned int set) +{ + return _sigpause_common(set); +} + +asmlinkage long sys_sigsuspend(old_sigset_t set) +{ + return _sigpause_common(set); +} + +static inline int +restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) +{ + unsigned long *fpregs = current_thread_info()->fpregs; + unsigned long fprs; + int err; + + err = __get_user(fprs, &fpu->si_fprs); + fprs_write(0); + regs->tstate &= ~TSTATE_PEF; + if (fprs & FPRS_DL) + err |= copy_from_user(fpregs, &fpu->si_float_regs[0], + (sizeof(unsigned int) * 32)); + if (fprs & FPRS_DU) + err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32], + (sizeof(unsigned int) * 32)); + err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr); + err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr); + current_thread_info()->fpsaved[0] |= fprs; + return err; +} + +void do_rt_sigreturn(struct pt_regs *regs) +{ + struct rt_signal_frame __user *sf; + unsigned long tpc, tnpc, tstate; + __siginfo_fpu_t __user *fpu_save; + sigset_t set; + int err; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + synchronize_user_stack (); + sf = (struct rt_signal_frame __user *) + (regs->u_regs [UREG_FP] + STACK_BIAS); + + /* 1. Make sure we are not getting garbage from the user */ + if (((unsigned long) sf) & 3) + goto segv; + + err = get_user(tpc, &sf->regs.tpc); + err |= __get_user(tnpc, &sf->regs.tnpc); + if (test_thread_flag(TIF_32BIT)) { + tpc &= 0xffffffff; + tnpc &= 0xffffffff; + } + err |= ((tpc | tnpc) & 3); + + /* 2. Restore the state */ + err |= __get_user(regs->y, &sf->regs.y); + err |= __get_user(tstate, &sf->regs.tstate); + err |= copy_from_user(regs->u_regs, sf->regs.u_regs, sizeof(regs->u_regs)); + + /* User can only change condition codes and %asi in %tstate. */ + regs->tstate &= ~(TSTATE_ASI | TSTATE_ICC | TSTATE_XCC); + regs->tstate |= (tstate & (TSTATE_ASI | TSTATE_ICC | TSTATE_XCC)); + + err |= __get_user(fpu_save, &sf->fpu_save); + if (fpu_save) + err |= restore_fpu_state(regs, &sf->fpu_state); + + err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t)); + err |= do_sigaltstack(&sf->stack, NULL, (unsigned long)sf); + + if (err) + goto segv; + + regs->tpc = tpc; + regs->tnpc = tnpc; + + /* Prevent syscall restart. */ + pt_regs_clear_syscall(regs); + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + return; +segv: + force_sig(SIGSEGV, current); +} + +/* Checks if the fp is valid */ +static int invalid_frame_pointer(void __user *fp, int fplen) +{ + if (((unsigned long) fp) & 7) + return 1; + return 0; +} + +static inline int +save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) +{ + unsigned long *fpregs = current_thread_info()->fpregs; + unsigned long fprs; + int err = 0; + + fprs = current_thread_info()->fpsaved[0]; + if (fprs & FPRS_DL) + err |= copy_to_user(&fpu->si_float_regs[0], fpregs, + (sizeof(unsigned int) * 32)); + if (fprs & FPRS_DU) + err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16, + (sizeof(unsigned int) * 32)); + err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr); + err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr); + err |= __put_user(fprs, &fpu->si_fprs); + + return err; +} + +static inline void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, unsigned long framesize) +{ + unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS; + + /* + * If we are on the alternate signal stack and would overflow it, don't. + * Return an always-bogus address instead so we will die with SIGSEGV. + */ + if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize))) + return (void __user *) -1L; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (sas_ss_flags(sp) == 0) + sp = current->sas_ss_sp + current->sas_ss_size; + } + + /* Always align the stack frame. This handles two cases. First, + * sigaltstack need not be mindful of platform specific stack + * alignment. Second, if we took this signal because the stack + * is not aligned properly, we'd like to take the signal cleanly + * and report that. + */ + sp &= ~7UL; + + return (void __user *)(sp - framesize); +} + +static inline void +setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, + int signo, sigset_t *oldset, siginfo_t *info) +{ + struct rt_signal_frame __user *sf; + int sigframe_size, err; + + /* 1. Make sure everything is clean */ + synchronize_user_stack(); + save_and_clear_fpu(); + + sigframe_size = sizeof(struct rt_signal_frame); + if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) + sigframe_size -= sizeof(__siginfo_fpu_t); + + sf = (struct rt_signal_frame __user *) + get_sigframe(ka, regs, sigframe_size); + + if (invalid_frame_pointer (sf, sigframe_size)) + goto sigill; + + if (get_thread_wsaved() != 0) + goto sigill; + + /* 2. Save the current process state */ + err = copy_to_user(&sf->regs, regs, sizeof (*regs)); + + if (current_thread_info()->fpsaved[0] & FPRS_FEF) { + err |= save_fpu_state(regs, &sf->fpu_state); + err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save); + } else { + err |= __put_user(0, &sf->fpu_save); + } + + /* Setup sigaltstack */ + err |= __put_user(current->sas_ss_sp, &sf->stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->u_regs[UREG_FP]), &sf->stack.ss_flags); + err |= __put_user(current->sas_ss_size, &sf->stack.ss_size); + + err |= copy_to_user(&sf->mask, oldset, sizeof(sigset_t)); + + err |= copy_in_user((u64 __user *)sf, + (u64 __user *)(regs->u_regs[UREG_FP]+STACK_BIAS), + sizeof(struct reg_window)); + + if (info) + err |= copy_siginfo_to_user(&sf->info, info); + else { + err |= __put_user(signo, &sf->info.si_signo); + err |= __put_user(SI_NOINFO, &sf->info.si_code); + } + if (err) + goto sigsegv; + + /* 3. signal handler back-trampoline and parameters */ + regs->u_regs[UREG_FP] = ((unsigned long) sf) - STACK_BIAS; + regs->u_regs[UREG_I0] = signo; + regs->u_regs[UREG_I1] = (unsigned long) &sf->info; + + /* The sigcontext is passed in this way because of how it + * is defined in GLIBC's /usr/include/bits/sigcontext.h + * for sparc64. It includes the 128 bytes of siginfo_t. + */ + regs->u_regs[UREG_I2] = (unsigned long) &sf->info; + + /* 5. signal handler */ + regs->tpc = (unsigned long) ka->sa.sa_handler; + regs->tnpc = (regs->tpc + 4); + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + /* 4. return to kernel instructions */ + regs->u_regs[UREG_I7] = (unsigned long)ka->ka_restorer; + return; + +sigill: + do_exit(SIGILL); +sigsegv: + force_sigsegv(signo, current); +} + +static inline void handle_signal(unsigned long signr, struct k_sigaction *ka, + siginfo_t *info, + sigset_t *oldset, struct pt_regs *regs) +{ + setup_rt_frame(ka, regs, signr, oldset, + (ka->sa.sa_flags & SA_SIGINFO) ? info : NULL); + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NOMASK)) + sigaddset(¤t->blocked,signr); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); +} + +static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs, + struct sigaction *sa) +{ + switch (regs->u_regs[UREG_I0]) { + case ERESTART_RESTARTBLOCK: + case ERESTARTNOHAND: + no_system_call_restart: + regs->u_regs[UREG_I0] = EINTR; + regs->tstate |= (TSTATE_ICARRY|TSTATE_XCARRY); + break; + case ERESTARTSYS: + if (!(sa->sa_flags & SA_RESTART)) + goto no_system_call_restart; + /* fallthrough */ + case ERESTARTNOINTR: + regs->u_regs[UREG_I0] = orig_i0; + regs->tpc -= 4; + regs->tnpc -= 4; + } +} + +/* Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + */ +static void do_signal(struct pt_regs *regs, unsigned long orig_i0) +{ + struct k_sigaction ka; + int restart_syscall; + sigset_t *oldset; + siginfo_t info; + int signr; + + if (pt_regs_is_syscall(regs) && + (regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY))) { + restart_syscall = 1; + } else + restart_syscall = 0; + + if (current_thread_info()->status & TS_RESTORE_SIGMASK) + oldset = ¤t->saved_sigmask; + else + oldset = ¤t->blocked; + +#ifdef CONFIG_COMPAT + if (test_thread_flag(TIF_32BIT)) { + extern void do_signal32(sigset_t *, struct pt_regs *, + int restart_syscall, + unsigned long orig_i0); + do_signal32(oldset, regs, restart_syscall, orig_i0); + return; + } +#endif + + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + + /* If the debugger messes with the program counter, it clears + * the software "in syscall" bit, directing us to not perform + * a syscall restart. + */ + if (restart_syscall && !pt_regs_is_syscall(regs)) + restart_syscall = 0; + + if (signr > 0) { + if (restart_syscall) + syscall_restart(orig_i0, regs, &ka.sa); + handle_signal(signr, &ka, &info, oldset, regs); + + /* A signal was successfully delivered; the saved + * sigmask will have been stored in the signal frame, + * and will be restored by sigreturn, so we can simply + * clear the TS_RESTORE_SIGMASK flag. + */ + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; + + tracehook_signal_handler(signr, &info, &ka, regs, 0); + return; + } + if (restart_syscall && + (regs->u_regs[UREG_I0] == ERESTARTNOHAND || + regs->u_regs[UREG_I0] == ERESTARTSYS || + regs->u_regs[UREG_I0] == ERESTARTNOINTR)) { + /* replay the system call when we are done */ + regs->u_regs[UREG_I0] = orig_i0; + regs->tpc -= 4; + regs->tnpc -= 4; + } + if (restart_syscall && + regs->u_regs[UREG_I0] == ERESTART_RESTARTBLOCK) { + regs->u_regs[UREG_G1] = __NR_restart_syscall; + regs->tpc -= 4; + regs->tnpc -= 4; + } + + /* If there's no signal to deliver, we just put the saved sigmask + * back + */ + if (current_thread_info()->status & TS_RESTORE_SIGMASK) { + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } +} + +void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long thread_info_flags) +{ + if (thread_info_flags & _TIF_SIGPENDING) + do_signal(regs, orig_i0); + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } +} diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c new file mode 100644 index 000000000000..b5225c81556c --- /dev/null +++ b/arch/sparc/kernel/smp_64.c @@ -0,0 +1,1412 @@ +/* smp.c: Sparc64 SMP support. + * + * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net) + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/threads.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/cache.h> +#include <linux/jiffies.h> +#include <linux/profile.h> +#include <linux/lmb.h> +#include <linux/cpu.h> + +#include <asm/head.h> +#include <asm/ptrace.h> +#include <asm/atomic.h> +#include <asm/tlbflush.h> +#include <asm/mmu_context.h> +#include <asm/cpudata.h> +#include <asm/hvtramp.h> +#include <asm/io.h> +#include <asm/timer.h> + +#include <asm/irq.h> +#include <asm/irq_regs.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/oplib.h> +#include <asm/uaccess.h> +#include <asm/starfire.h> +#include <asm/tlb.h> +#include <asm/sections.h> +#include <asm/prom.h> +#include <asm/mdesc.h> +#include <asm/ldc.h> +#include <asm/hypervisor.h> + +int sparc64_multi_core __read_mostly; + +cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE; +cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; +DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; +cpumask_t cpu_core_map[NR_CPUS] __read_mostly = + { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; + +EXPORT_SYMBOL(cpu_possible_map); +EXPORT_SYMBOL(cpu_online_map); +EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); +EXPORT_SYMBOL(cpu_core_map); + +static cpumask_t smp_commenced_mask; + +void smp_info(struct seq_file *m) +{ + int i; + + seq_printf(m, "State:\n"); + for_each_online_cpu(i) + seq_printf(m, "CPU%d:\t\tonline\n", i); +} + +void smp_bogo(struct seq_file *m) +{ + int i; + + for_each_online_cpu(i) + seq_printf(m, + "Cpu%dClkTck\t: %016lx\n", + i, cpu_data(i).clock_tick); +} + +extern void setup_sparc64_timer(void); + +static volatile unsigned long callin_flag = 0; + +void __cpuinit smp_callin(void) +{ + int cpuid = hard_smp_processor_id(); + + __local_per_cpu_offset = __per_cpu_offset(cpuid); + + if (tlb_type == hypervisor) + sun4v_ktsb_register(); + + __flush_tlb_all(); + + setup_sparc64_timer(); + + if (cheetah_pcache_forced_on) + cheetah_enable_pcache(); + + local_irq_enable(); + + callin_flag = 1; + __asm__ __volatile__("membar #Sync\n\t" + "flush %%g6" : : : "memory"); + + /* Clear this or we will die instantly when we + * schedule back to this idler... + */ + current_thread_info()->new_child = 0; + + /* Attach to the address space of init_task. */ + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + + /* inform the notifiers about the new cpu */ + notify_cpu_starting(cpuid); + + while (!cpu_isset(cpuid, smp_commenced_mask)) + rmb(); + + ipi_call_lock(); + cpu_set(cpuid, cpu_online_map); + ipi_call_unlock(); + + /* idle thread is expected to have preempt disabled */ + preempt_disable(); +} + +void cpu_panic(void) +{ + printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id()); + panic("SMP bolixed\n"); +} + +/* This tick register synchronization scheme is taken entirely from + * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit. + * + * The only change I've made is to rework it so that the master + * initiates the synchonization instead of the slave. -DaveM + */ + +#define MASTER 0 +#define SLAVE (SMP_CACHE_BYTES/sizeof(unsigned long)) + +#define NUM_ROUNDS 64 /* magic value */ +#define NUM_ITERS 5 /* likewise */ + +static DEFINE_SPINLOCK(itc_sync_lock); +static unsigned long go[SLAVE + 1]; + +#define DEBUG_TICK_SYNC 0 + +static inline long get_delta (long *rt, long *master) +{ + unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; + unsigned long tcenter, t0, t1, tm; + unsigned long i; + + for (i = 0; i < NUM_ITERS; i++) { + t0 = tick_ops->get_tick(); + go[MASTER] = 1; + membar_safe("#StoreLoad"); + while (!(tm = go[SLAVE])) + rmb(); + go[SLAVE] = 0; + wmb(); + t1 = tick_ops->get_tick(); + + if (t1 - t0 < best_t1 - best_t0) + best_t0 = t0, best_t1 = t1, best_tm = tm; + } + + *rt = best_t1 - best_t0; + *master = best_tm - best_t0; + + /* average best_t0 and best_t1 without overflow: */ + tcenter = (best_t0/2 + best_t1/2); + if (best_t0 % 2 + best_t1 % 2 == 2) + tcenter++; + return tcenter - best_tm; +} + +void smp_synchronize_tick_client(void) +{ + long i, delta, adj, adjust_latency = 0, done = 0; + unsigned long flags, rt, master_time_stamp, bound; +#if DEBUG_TICK_SYNC + struct { + long rt; /* roundtrip time */ + long master; /* master's timestamp */ + long diff; /* difference between midpoint and master's timestamp */ + long lat; /* estimate of itc adjustment latency */ + } t[NUM_ROUNDS]; +#endif + + go[MASTER] = 1; + + while (go[MASTER]) + rmb(); + + local_irq_save(flags); + { + for (i = 0; i < NUM_ROUNDS; i++) { + delta = get_delta(&rt, &master_time_stamp); + if (delta == 0) { + done = 1; /* let's lock on to this... */ + bound = rt; + } + + if (!done) { + if (i > 0) { + adjust_latency += -delta; + adj = -delta + adjust_latency/4; + } else + adj = -delta; + + tick_ops->add_tick(adj); + } +#if DEBUG_TICK_SYNC + t[i].rt = rt; + t[i].master = master_time_stamp; + t[i].diff = delta; + t[i].lat = adjust_latency/4; +#endif + } + } + local_irq_restore(flags); + +#if DEBUG_TICK_SYNC + for (i = 0; i < NUM_ROUNDS; i++) + printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", + t[i].rt, t[i].master, t[i].diff, t[i].lat); +#endif + + printk(KERN_INFO "CPU %d: synchronized TICK with master CPU " + "(last diff %ld cycles, maxerr %lu cycles)\n", + smp_processor_id(), delta, rt); +} + +static void smp_start_sync_tick_client(int cpu); + +static void smp_synchronize_one_tick(int cpu) +{ + unsigned long flags, i; + + go[MASTER] = 0; + + smp_start_sync_tick_client(cpu); + + /* wait for client to be ready */ + while (!go[MASTER]) + rmb(); + + /* now let the client proceed into his loop */ + go[MASTER] = 0; + membar_safe("#StoreLoad"); + + spin_lock_irqsave(&itc_sync_lock, flags); + { + for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) { + while (!go[MASTER]) + rmb(); + go[MASTER] = 0; + wmb(); + go[SLAVE] = tick_ops->get_tick(); + membar_safe("#StoreLoad"); + } + } + spin_unlock_irqrestore(&itc_sync_lock, flags); +} + +#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU) +/* XXX Put this in some common place. XXX */ +static unsigned long kimage_addr_to_ra(void *p) +{ + unsigned long val = (unsigned long) p; + + return kern_base + (val - KERNBASE); +} + +static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg) +{ + extern unsigned long sparc64_ttable_tl0; + extern unsigned long kern_locked_tte_data; + struct hvtramp_descr *hdesc; + unsigned long trampoline_ra; + struct trap_per_cpu *tb; + u64 tte_vaddr, tte_data; + unsigned long hv_err; + int i; + + hdesc = kzalloc(sizeof(*hdesc) + + (sizeof(struct hvtramp_mapping) * + num_kernel_image_mappings - 1), + GFP_KERNEL); + if (!hdesc) { + printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate " + "hvtramp_descr.\n"); + return; + } + + hdesc->cpu = cpu; + hdesc->num_mappings = num_kernel_image_mappings; + + tb = &trap_block[cpu]; + tb->hdesc = hdesc; + + hdesc->fault_info_va = (unsigned long) &tb->fault_info; + hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info); + + hdesc->thread_reg = thread_reg; + + tte_vaddr = (unsigned long) KERNBASE; + tte_data = kern_locked_tte_data; + + for (i = 0; i < hdesc->num_mappings; i++) { + hdesc->maps[i].vaddr = tte_vaddr; + hdesc->maps[i].tte = tte_data; + tte_vaddr += 0x400000; + tte_data += 0x400000; + } + + trampoline_ra = kimage_addr_to_ra(hv_cpu_startup); + + hv_err = sun4v_cpu_start(cpu, trampoline_ra, + kimage_addr_to_ra(&sparc64_ttable_tl0), + __pa(hdesc)); + if (hv_err) + printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() " + "gives error %lu\n", hv_err); +} +#endif + +extern unsigned long sparc64_cpu_startup; + +/* The OBP cpu startup callback truncates the 3rd arg cookie to + * 32-bits (I think) so to be safe we have it read the pointer + * contained here so we work on >4GB machines. -DaveM + */ +static struct thread_info *cpu_new_thread = NULL; + +static int __cpuinit smp_boot_one_cpu(unsigned int cpu) +{ + struct trap_per_cpu *tb = &trap_block[cpu]; + unsigned long entry = + (unsigned long)(&sparc64_cpu_startup); + unsigned long cookie = + (unsigned long)(&cpu_new_thread); + struct task_struct *p; + int timeout, ret; + + p = fork_idle(cpu); + if (IS_ERR(p)) + return PTR_ERR(p); + callin_flag = 0; + cpu_new_thread = task_thread_info(p); + + if (tlb_type == hypervisor) { +#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU) + if (ldom_domaining_enabled) + ldom_startcpu_cpuid(cpu, + (unsigned long) cpu_new_thread); + else +#endif + prom_startcpu_cpuid(cpu, entry, cookie); + } else { + struct device_node *dp = of_find_node_by_cpuid(cpu); + + prom_startcpu(dp->node, entry, cookie); + } + + for (timeout = 0; timeout < 50000; timeout++) { + if (callin_flag) + break; + udelay(100); + } + + if (callin_flag) { + ret = 0; + } else { + printk("Processor %d is stuck.\n", cpu); + ret = -ENODEV; + } + cpu_new_thread = NULL; + + if (tb->hdesc) { + kfree(tb->hdesc); + tb->hdesc = NULL; + } + + return ret; +} + +static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu) +{ + u64 result, target; + int stuck, tmp; + + if (this_is_starfire) { + /* map to real upaid */ + cpu = (((cpu & 0x3c) << 1) | + ((cpu & 0x40) >> 4) | + (cpu & 0x3)); + } + + target = (cpu << 14) | 0x70; +again: + /* Ok, this is the real Spitfire Errata #54. + * One must read back from a UDB internal register + * after writes to the UDB interrupt dispatch, but + * before the membar Sync for that write. + * So we use the high UDB control register (ASI 0x7f, + * ADDR 0x20) for the dummy read. -DaveM + */ + tmp = 0x40; + __asm__ __volatile__( + "wrpr %1, %2, %%pstate\n\t" + "stxa %4, [%0] %3\n\t" + "stxa %5, [%0+%8] %3\n\t" + "add %0, %8, %0\n\t" + "stxa %6, [%0+%8] %3\n\t" + "membar #Sync\n\t" + "stxa %%g0, [%7] %3\n\t" + "membar #Sync\n\t" + "mov 0x20, %%g1\n\t" + "ldxa [%%g1] 0x7f, %%g0\n\t" + "membar #Sync" + : "=r" (tmp) + : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W), + "r" (data0), "r" (data1), "r" (data2), "r" (target), + "r" (0x10), "0" (tmp) + : "g1"); + + /* NOTE: PSTATE_IE is still clear. */ + stuck = 100000; + do { + __asm__ __volatile__("ldxa [%%g0] %1, %0" + : "=r" (result) + : "i" (ASI_INTR_DISPATCH_STAT)); + if (result == 0) { + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" + : : "r" (pstate)); + return; + } + stuck -= 1; + if (stuck == 0) + break; + } while (result & 0x1); + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" + : : "r" (pstate)); + if (stuck == 0) { + printk("CPU[%d]: mondo stuckage result[%016lx]\n", + smp_processor_id(), result); + } else { + udelay(2); + goto again; + } +} + +static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt) +{ + u64 *mondo, data0, data1, data2; + u16 *cpu_list; + u64 pstate; + int i; + + __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); + cpu_list = __va(tb->cpu_list_pa); + mondo = __va(tb->cpu_mondo_block_pa); + data0 = mondo[0]; + data1 = mondo[1]; + data2 = mondo[2]; + for (i = 0; i < cnt; i++) + spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]); +} + +/* Cheetah now allows to send the whole 64-bytes of data in the interrupt + * packet, but we have no use for that. However we do take advantage of + * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). + */ +static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt) +{ + int nack_busy_id, is_jbus, need_more; + u64 *mondo, pstate, ver, busy_mask; + u16 *cpu_list; + + cpu_list = __va(tb->cpu_list_pa); + mondo = __va(tb->cpu_mondo_block_pa); + + /* Unfortunately, someone at Sun had the brilliant idea to make the + * busy/nack fields hard-coded by ITID number for this Ultra-III + * derivative processor. + */ + __asm__ ("rdpr %%ver, %0" : "=r" (ver)); + is_jbus = ((ver >> 32) == __JALAPENO_ID || + (ver >> 32) == __SERRANO_ID); + + __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); + +retry: + need_more = 0; + __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t" + : : "r" (pstate), "i" (PSTATE_IE)); + + /* Setup the dispatch data registers. */ + __asm__ __volatile__("stxa %0, [%3] %6\n\t" + "stxa %1, [%4] %6\n\t" + "stxa %2, [%5] %6\n\t" + "membar #Sync\n\t" + : /* no outputs */ + : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]), + "r" (0x40), "r" (0x50), "r" (0x60), + "i" (ASI_INTR_W)); + + nack_busy_id = 0; + busy_mask = 0; + { + int i; + + for (i = 0; i < cnt; i++) { + u64 target, nr; + + nr = cpu_list[i]; + if (nr == 0xffff) + continue; + + target = (nr << 14) | 0x70; + if (is_jbus) { + busy_mask |= (0x1UL << (nr * 2)); + } else { + target |= (nack_busy_id << 24); + busy_mask |= (0x1UL << + (nack_busy_id * 2)); + } + __asm__ __volatile__( + "stxa %%g0, [%0] %1\n\t" + "membar #Sync\n\t" + : /* no outputs */ + : "r" (target), "i" (ASI_INTR_W)); + nack_busy_id++; + if (nack_busy_id == 32) { + need_more = 1; + break; + } + } + } + + /* Now, poll for completion. */ + { + u64 dispatch_stat, nack_mask; + long stuck; + + stuck = 100000 * nack_busy_id; + nack_mask = busy_mask << 1; + do { + __asm__ __volatile__("ldxa [%%g0] %1, %0" + : "=r" (dispatch_stat) + : "i" (ASI_INTR_DISPATCH_STAT)); + if (!(dispatch_stat & (busy_mask | nack_mask))) { + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" + : : "r" (pstate)); + if (unlikely(need_more)) { + int i, this_cnt = 0; + for (i = 0; i < cnt; i++) { + if (cpu_list[i] == 0xffff) + continue; + cpu_list[i] = 0xffff; + this_cnt++; + if (this_cnt == 32) + break; + } + goto retry; + } + return; + } + if (!--stuck) + break; + } while (dispatch_stat & busy_mask); + + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" + : : "r" (pstate)); + + if (dispatch_stat & busy_mask) { + /* Busy bits will not clear, continue instead + * of freezing up on this cpu. + */ + printk("CPU[%d]: mondo stuckage result[%016lx]\n", + smp_processor_id(), dispatch_stat); + } else { + int i, this_busy_nack = 0; + + /* Delay some random time with interrupts enabled + * to prevent deadlock. + */ + udelay(2 * nack_busy_id); + + /* Clear out the mask bits for cpus which did not + * NACK us. + */ + for (i = 0; i < cnt; i++) { + u64 check_mask, nr; + + nr = cpu_list[i]; + if (nr == 0xffff) + continue; + + if (is_jbus) + check_mask = (0x2UL << (2*nr)); + else + check_mask = (0x2UL << + this_busy_nack); + if ((dispatch_stat & check_mask) == 0) + cpu_list[i] = 0xffff; + this_busy_nack += 2; + if (this_busy_nack == 64) + break; + } + + goto retry; + } + } +} + +/* Multi-cpu list version. */ +static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) +{ + int retries, this_cpu, prev_sent, i, saw_cpu_error; + unsigned long status; + u16 *cpu_list; + + this_cpu = smp_processor_id(); + + cpu_list = __va(tb->cpu_list_pa); + + saw_cpu_error = 0; + retries = 0; + prev_sent = 0; + do { + int forward_progress, n_sent; + + status = sun4v_cpu_mondo_send(cnt, + tb->cpu_list_pa, + tb->cpu_mondo_block_pa); + + /* HV_EOK means all cpus received the xcall, we're done. */ + if (likely(status == HV_EOK)) + break; + + /* First, see if we made any forward progress. + * + * The hypervisor indicates successful sends by setting + * cpu list entries to the value 0xffff. + */ + n_sent = 0; + for (i = 0; i < cnt; i++) { + if (likely(cpu_list[i] == 0xffff)) + n_sent++; + } + + forward_progress = 0; + if (n_sent > prev_sent) + forward_progress = 1; + + prev_sent = n_sent; + + /* If we get a HV_ECPUERROR, then one or more of the cpus + * in the list are in error state. Use the cpu_state() + * hypervisor call to find out which cpus are in error state. + */ + if (unlikely(status == HV_ECPUERROR)) { + for (i = 0; i < cnt; i++) { + long err; + u16 cpu; + + cpu = cpu_list[i]; + if (cpu == 0xffff) + continue; + + err = sun4v_cpu_state(cpu); + if (err == HV_CPU_STATE_ERROR) { + saw_cpu_error = (cpu + 1); + cpu_list[i] = 0xffff; + } + } + } else if (unlikely(status != HV_EWOULDBLOCK)) + goto fatal_mondo_error; + + /* Don't bother rewriting the CPU list, just leave the + * 0xffff and non-0xffff entries in there and the + * hypervisor will do the right thing. + * + * Only advance timeout state if we didn't make any + * forward progress. + */ + if (unlikely(!forward_progress)) { + if (unlikely(++retries > 10000)) + goto fatal_mondo_timeout; + + /* Delay a little bit to let other cpus catch up + * on their cpu mondo queue work. + */ + udelay(2 * cnt); + } + } while (1); + + if (unlikely(saw_cpu_error)) + goto fatal_mondo_cpu_error; + + return; + +fatal_mondo_cpu_error: + printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " + "(including %d) were in error state\n", + this_cpu, saw_cpu_error - 1); + return; + +fatal_mondo_timeout: + printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " + " progress after %d retries.\n", + this_cpu, retries); + goto dump_cpu_list_and_out; + +fatal_mondo_error: + printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", + this_cpu, status); + printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " + "mondo_block_pa(%lx)\n", + this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa); + +dump_cpu_list_and_out: + printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu); + for (i = 0; i < cnt; i++) + printk("%u ", cpu_list[i]); + printk("]\n"); +} + +static void (*xcall_deliver_impl)(struct trap_per_cpu *, int); + +static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask) +{ + struct trap_per_cpu *tb; + int this_cpu, i, cnt; + unsigned long flags; + u16 *cpu_list; + u64 *mondo; + + /* We have to do this whole thing with interrupts fully disabled. + * Otherwise if we send an xcall from interrupt context it will + * corrupt both our mondo block and cpu list state. + * + * One consequence of this is that we cannot use timeout mechanisms + * that depend upon interrupts being delivered locally. So, for + * example, we cannot sample jiffies and expect it to advance. + * + * Fortunately, udelay() uses %stick/%tick so we can use that. + */ + local_irq_save(flags); + + this_cpu = smp_processor_id(); + tb = &trap_block[this_cpu]; + + mondo = __va(tb->cpu_mondo_block_pa); + mondo[0] = data0; + mondo[1] = data1; + mondo[2] = data2; + wmb(); + + cpu_list = __va(tb->cpu_list_pa); + + /* Setup the initial cpu list. */ + cnt = 0; + for_each_cpu_mask_nr(i, *mask) { + if (i == this_cpu || !cpu_online(i)) + continue; + cpu_list[cnt++] = i; + } + + if (cnt) + xcall_deliver_impl(tb, cnt); + + local_irq_restore(flags); +} + +/* Send cross call to all processors mentioned in MASK_P + * except self. Really, there are only two cases currently, + * "&cpu_online_map" and "&mm->cpu_vm_mask". + */ +static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask) +{ + u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff)); + + xcall_deliver(data0, data1, data2, mask); +} + +/* Send cross call to all processors except self. */ +static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2) +{ + smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map); +} + +extern unsigned long xcall_sync_tick; + +static void smp_start_sync_tick_client(int cpu) +{ + xcall_deliver((u64) &xcall_sync_tick, 0, 0, + &cpumask_of_cpu(cpu)); +} + +extern unsigned long xcall_call_function; + +void arch_send_call_function_ipi(cpumask_t mask) +{ + xcall_deliver((u64) &xcall_call_function, 0, 0, &mask); +} + +extern unsigned long xcall_call_function_single; + +void arch_send_call_function_single_ipi(int cpu) +{ + xcall_deliver((u64) &xcall_call_function_single, 0, 0, + &cpumask_of_cpu(cpu)); +} + +void smp_call_function_client(int irq, struct pt_regs *regs) +{ + clear_softint(1 << irq); + generic_smp_call_function_interrupt(); +} + +void smp_call_function_single_client(int irq, struct pt_regs *regs) +{ + clear_softint(1 << irq); + generic_smp_call_function_single_interrupt(); +} + +static void tsb_sync(void *info) +{ + struct trap_per_cpu *tp = &trap_block[raw_smp_processor_id()]; + struct mm_struct *mm = info; + + /* It is not valid to test "currrent->active_mm == mm" here. + * + * The value of "current" is not changed atomically with + * switch_mm(). But that's OK, we just need to check the + * current cpu's trap block PGD physical address. + */ + if (tp->pgd_paddr == __pa(mm->pgd)) + tsb_context_switch(mm); +} + +void smp_tsb_sync(struct mm_struct *mm) +{ + smp_call_function_mask(mm->cpu_vm_mask, tsb_sync, mm, 1); +} + +extern unsigned long xcall_flush_tlb_mm; +extern unsigned long xcall_flush_tlb_pending; +extern unsigned long xcall_flush_tlb_kernel_range; +extern unsigned long xcall_fetch_glob_regs; +extern unsigned long xcall_receive_signal; +extern unsigned long xcall_new_mmu_context_version; +#ifdef CONFIG_KGDB +extern unsigned long xcall_kgdb_capture; +#endif + +#ifdef DCACHE_ALIASING_POSSIBLE +extern unsigned long xcall_flush_dcache_page_cheetah; +#endif +extern unsigned long xcall_flush_dcache_page_spitfire; + +#ifdef CONFIG_DEBUG_DCFLUSH +extern atomic_t dcpage_flushes; +extern atomic_t dcpage_flushes_xcall; +#endif + +static inline void __local_flush_dcache_page(struct page *page) +{ +#ifdef DCACHE_ALIASING_POSSIBLE + __flush_dcache_page(page_address(page), + ((tlb_type == spitfire) && + page_mapping(page) != NULL)); +#else + if (page_mapping(page) != NULL && + tlb_type == spitfire) + __flush_icache_page(__pa(page_address(page))); +#endif +} + +void smp_flush_dcache_page_impl(struct page *page, int cpu) +{ + int this_cpu; + + if (tlb_type == hypervisor) + return; + +#ifdef CONFIG_DEBUG_DCFLUSH + atomic_inc(&dcpage_flushes); +#endif + + this_cpu = get_cpu(); + + if (cpu == this_cpu) { + __local_flush_dcache_page(page); + } else if (cpu_online(cpu)) { + void *pg_addr = page_address(page); + u64 data0 = 0; + + if (tlb_type == spitfire) { + data0 = ((u64)&xcall_flush_dcache_page_spitfire); + if (page_mapping(page) != NULL) + data0 |= ((u64)1 << 32); + } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { +#ifdef DCACHE_ALIASING_POSSIBLE + data0 = ((u64)&xcall_flush_dcache_page_cheetah); +#endif + } + if (data0) { + xcall_deliver(data0, __pa(pg_addr), + (u64) pg_addr, &cpumask_of_cpu(cpu)); +#ifdef CONFIG_DEBUG_DCFLUSH + atomic_inc(&dcpage_flushes_xcall); +#endif + } + } + + put_cpu(); +} + +void flush_dcache_page_all(struct mm_struct *mm, struct page *page) +{ + void *pg_addr; + int this_cpu; + u64 data0; + + if (tlb_type == hypervisor) + return; + + this_cpu = get_cpu(); + +#ifdef CONFIG_DEBUG_DCFLUSH + atomic_inc(&dcpage_flushes); +#endif + data0 = 0; + pg_addr = page_address(page); + if (tlb_type == spitfire) { + data0 = ((u64)&xcall_flush_dcache_page_spitfire); + if (page_mapping(page) != NULL) + data0 |= ((u64)1 << 32); + } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { +#ifdef DCACHE_ALIASING_POSSIBLE + data0 = ((u64)&xcall_flush_dcache_page_cheetah); +#endif + } + if (data0) { + xcall_deliver(data0, __pa(pg_addr), + (u64) pg_addr, &cpu_online_map); +#ifdef CONFIG_DEBUG_DCFLUSH + atomic_inc(&dcpage_flushes_xcall); +#endif + } + __local_flush_dcache_page(page); + + put_cpu(); +} + +void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) +{ + struct mm_struct *mm; + unsigned long flags; + + clear_softint(1 << irq); + + /* See if we need to allocate a new TLB context because + * the version of the one we are using is now out of date. + */ + mm = current->active_mm; + if (unlikely(!mm || (mm == &init_mm))) + return; + + spin_lock_irqsave(&mm->context.lock, flags); + + if (unlikely(!CTX_VALID(mm->context))) + get_new_mmu_context(mm); + + spin_unlock_irqrestore(&mm->context.lock, flags); + + load_secondary_context(mm); + __flush_tlb_mm(CTX_HWBITS(mm->context), + SECONDARY_CONTEXT); +} + +void smp_new_mmu_context_version(void) +{ + smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0); +} + +#ifdef CONFIG_KGDB +void kgdb_roundup_cpus(unsigned long flags) +{ + smp_cross_call(&xcall_kgdb_capture, 0, 0, 0); +} +#endif + +void smp_fetch_global_regs(void) +{ + smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0); +} + +/* We know that the window frames of the user have been flushed + * to the stack before we get here because all callers of us + * are flush_tlb_*() routines, and these run after flush_cache_*() + * which performs the flushw. + * + * The SMP TLB coherency scheme we use works as follows: + * + * 1) mm->cpu_vm_mask is a bit mask of which cpus an address + * space has (potentially) executed on, this is the heuristic + * we use to avoid doing cross calls. + * + * Also, for flushing from kswapd and also for clones, we + * use cpu_vm_mask as the list of cpus to make run the TLB. + * + * 2) TLB context numbers are shared globally across all processors + * in the system, this allows us to play several games to avoid + * cross calls. + * + * One invariant is that when a cpu switches to a process, and + * that processes tsk->active_mm->cpu_vm_mask does not have the + * current cpu's bit set, that tlb context is flushed locally. + * + * If the address space is non-shared (ie. mm->count == 1) we avoid + * cross calls when we want to flush the currently running process's + * tlb state. This is done by clearing all cpu bits except the current + * processor's in current->active_mm->cpu_vm_mask and performing the + * flush locally only. This will force any subsequent cpus which run + * this task to flush the context from the local tlb if the process + * migrates to another cpu (again). + * + * 3) For shared address spaces (threads) and swapping we bite the + * bullet for most cases and perform the cross call (but only to + * the cpus listed in cpu_vm_mask). + * + * The performance gain from "optimizing" away the cross call for threads is + * questionable (in theory the big win for threads is the massive sharing of + * address space state across processors). + */ + +/* This currently is only used by the hugetlb arch pre-fault + * hook on UltraSPARC-III+ and later when changing the pagesize + * bits of the context register for an address space. + */ +void smp_flush_tlb_mm(struct mm_struct *mm) +{ + u32 ctx = CTX_HWBITS(mm->context); + int cpu = get_cpu(); + + if (atomic_read(&mm->mm_users) == 1) { + mm->cpu_vm_mask = cpumask_of_cpu(cpu); + goto local_flush_and_out; + } + + smp_cross_call_masked(&xcall_flush_tlb_mm, + ctx, 0, 0, + &mm->cpu_vm_mask); + +local_flush_and_out: + __flush_tlb_mm(ctx, SECONDARY_CONTEXT); + + put_cpu(); +} + +void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs) +{ + u32 ctx = CTX_HWBITS(mm->context); + int cpu = get_cpu(); + + if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1) + mm->cpu_vm_mask = cpumask_of_cpu(cpu); + else + smp_cross_call_masked(&xcall_flush_tlb_pending, + ctx, nr, (unsigned long) vaddrs, + &mm->cpu_vm_mask); + + __flush_tlb_pending(ctx, nr, vaddrs); + + put_cpu(); +} + +void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + start &= PAGE_MASK; + end = PAGE_ALIGN(end); + if (start != end) { + smp_cross_call(&xcall_flush_tlb_kernel_range, + 0, start, end); + + __flush_tlb_kernel_range(start, end); + } +} + +/* CPU capture. */ +/* #define CAPTURE_DEBUG */ +extern unsigned long xcall_capture; + +static atomic_t smp_capture_depth = ATOMIC_INIT(0); +static atomic_t smp_capture_registry = ATOMIC_INIT(0); +static unsigned long penguins_are_doing_time; + +void smp_capture(void) +{ + int result = atomic_add_ret(1, &smp_capture_depth); + + if (result == 1) { + int ncpus = num_online_cpus(); + +#ifdef CAPTURE_DEBUG + printk("CPU[%d]: Sending penguins to jail...", + smp_processor_id()); +#endif + penguins_are_doing_time = 1; + atomic_inc(&smp_capture_registry); + smp_cross_call(&xcall_capture, 0, 0, 0); + while (atomic_read(&smp_capture_registry) != ncpus) + rmb(); +#ifdef CAPTURE_DEBUG + printk("done\n"); +#endif + } +} + +void smp_release(void) +{ + if (atomic_dec_and_test(&smp_capture_depth)) { +#ifdef CAPTURE_DEBUG + printk("CPU[%d]: Giving pardon to " + "imprisoned penguins\n", + smp_processor_id()); +#endif + penguins_are_doing_time = 0; + membar_safe("#StoreLoad"); + atomic_dec(&smp_capture_registry); + } +} + +/* Imprisoned penguins run with %pil == PIL_NORMAL_MAX, but PSTATE_IE + * set, so they can service tlb flush xcalls... + */ +extern void prom_world(int); + +void smp_penguin_jailcell(int irq, struct pt_regs *regs) +{ + clear_softint(1 << irq); + + preempt_disable(); + + __asm__ __volatile__("flushw"); + prom_world(1); + atomic_inc(&smp_capture_registry); + membar_safe("#StoreLoad"); + while (penguins_are_doing_time) + rmb(); + atomic_dec(&smp_capture_registry); + prom_world(0); + + preempt_enable(); +} + +/* /proc/profile writes can call this, don't __init it please. */ +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ +} + +void __devinit smp_prepare_boot_cpu(void) +{ +} + +void __init smp_setup_processor_id(void) +{ + if (tlb_type == spitfire) + xcall_deliver_impl = spitfire_xcall_deliver; + else if (tlb_type == cheetah || tlb_type == cheetah_plus) + xcall_deliver_impl = cheetah_xcall_deliver; + else + xcall_deliver_impl = hypervisor_xcall_deliver; +} + +void __devinit smp_fill_in_sib_core_maps(void) +{ + unsigned int i; + + for_each_present_cpu(i) { + unsigned int j; + + cpus_clear(cpu_core_map[i]); + if (cpu_data(i).core_id == 0) { + cpu_set(i, cpu_core_map[i]); + continue; + } + + for_each_present_cpu(j) { + if (cpu_data(i).core_id == + cpu_data(j).core_id) + cpu_set(j, cpu_core_map[i]); + } + } + + for_each_present_cpu(i) { + unsigned int j; + + cpus_clear(per_cpu(cpu_sibling_map, i)); + if (cpu_data(i).proc_id == -1) { + cpu_set(i, per_cpu(cpu_sibling_map, i)); + continue; + } + + for_each_present_cpu(j) { + if (cpu_data(i).proc_id == + cpu_data(j).proc_id) + cpu_set(j, per_cpu(cpu_sibling_map, i)); + } + } +} + +int __cpuinit __cpu_up(unsigned int cpu) +{ + int ret = smp_boot_one_cpu(cpu); + + if (!ret) { + cpu_set(cpu, smp_commenced_mask); + while (!cpu_isset(cpu, cpu_online_map)) + mb(); + if (!cpu_isset(cpu, cpu_online_map)) { + ret = -ENODEV; + } else { + /* On SUN4V, writes to %tick and %stick are + * not allowed. + */ + if (tlb_type != hypervisor) + smp_synchronize_one_tick(cpu); + } + } + return ret; +} + +#ifdef CONFIG_HOTPLUG_CPU +void cpu_play_dead(void) +{ + int cpu = smp_processor_id(); + unsigned long pstate; + + idle_task_exit(); + + if (tlb_type == hypervisor) { + struct trap_per_cpu *tb = &trap_block[cpu]; + + sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO, + tb->cpu_mondo_pa, 0); + sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO, + tb->dev_mondo_pa, 0); + sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR, + tb->resum_mondo_pa, 0); + sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR, + tb->nonresum_mondo_pa, 0); + } + + cpu_clear(cpu, smp_commenced_mask); + membar_safe("#Sync"); + + local_irq_disable(); + + __asm__ __volatile__( + "rdpr %%pstate, %0\n\t" + "wrpr %0, %1, %%pstate" + : "=r" (pstate) + : "i" (PSTATE_IE)); + + while (1) + barrier(); +} + +int __cpu_disable(void) +{ + int cpu = smp_processor_id(); + cpuinfo_sparc *c; + int i; + + for_each_cpu_mask(i, cpu_core_map[cpu]) + cpu_clear(cpu, cpu_core_map[i]); + cpus_clear(cpu_core_map[cpu]); + + for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu)) + cpu_clear(cpu, per_cpu(cpu_sibling_map, i)); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + + c = &cpu_data(cpu); + + c->core_id = 0; + c->proc_id = -1; + + smp_wmb(); + + /* Make sure no interrupts point to this cpu. */ + fixup_irqs(); + + local_irq_enable(); + mdelay(1); + local_irq_disable(); + + ipi_call_lock(); + cpu_clear(cpu, cpu_online_map); + ipi_call_unlock(); + + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + int i; + + for (i = 0; i < 100; i++) { + smp_rmb(); + if (!cpu_isset(cpu, smp_commenced_mask)) + break; + msleep(100); + } + if (cpu_isset(cpu, smp_commenced_mask)) { + printk(KERN_ERR "CPU %u didn't die...\n", cpu); + } else { +#if defined(CONFIG_SUN_LDOMS) + unsigned long hv_err; + int limit = 100; + + do { + hv_err = sun4v_cpu_stop(cpu); + if (hv_err == HV_EOK) { + cpu_clear(cpu, cpu_present_map); + break; + } + } while (--limit > 0); + if (limit <= 0) { + printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n", + hv_err); + } +#endif + } +} +#endif + +void __init smp_cpus_done(unsigned int max_cpus) +{ +} + +void smp_send_reschedule(int cpu) +{ + xcall_deliver((u64) &xcall_receive_signal, 0, 0, + &cpumask_of_cpu(cpu)); +} + +void smp_receive_signal_client(int irq, struct pt_regs *regs) +{ + clear_softint(1 << irq); +} + +/* This is a nop because we capture all other cpus + * anyways when making the PROM active. + */ +void smp_send_stop(void) +{ +} + +unsigned long __per_cpu_base __read_mostly; +unsigned long __per_cpu_shift __read_mostly; + +EXPORT_SYMBOL(__per_cpu_base); +EXPORT_SYMBOL(__per_cpu_shift); + +void __init real_setup_per_cpu_areas(void) +{ + unsigned long paddr, goal, size, i; + char *ptr; + + /* Copy section for each CPU (we discard the original) */ + goal = PERCPU_ENOUGH_ROOM; + + __per_cpu_shift = PAGE_SHIFT; + for (size = PAGE_SIZE; size < goal; size <<= 1UL) + __per_cpu_shift++; + + paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE); + if (!paddr) { + prom_printf("Cannot allocate per-cpu memory.\n"); + prom_halt(); + } + + ptr = __va(paddr); + __per_cpu_base = ptr - __per_cpu_start; + + for (i = 0; i < NR_CPUS; i++, ptr += size) + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + + /* Setup %g5 for the boot cpu. */ + __local_per_cpu_offset = __per_cpu_offset(smp_processor_id()); +} diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c new file mode 100644 index 000000000000..c450825b3fe5 --- /dev/null +++ b/arch/sparc/kernel/sparc_ksyms_64.c @@ -0,0 +1,289 @@ +/* arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support. + * + * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net) + * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) + * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz) + */ + +/* Tell string.h we don't want memcpy etc. as cpp defines */ +#define EXPORT_SYMTAB_STROPS +#define PROMLIB_INTERNAL + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/in6.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/fs_struct.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/socket.h> +#include <linux/syscalls.h> +#include <linux/percpu.h> +#include <linux/init.h> +#include <linux/rwsem.h> +#include <net/compat.h> + +#include <asm/oplib.h> +#include <asm/system.h> +#include <asm/auxio.h> +#include <asm/pgtable.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/idprom.h> +#include <asm/elf.h> +#include <asm/head.h> +#include <asm/smp.h> +#include <asm/ptrace.h> +#include <asm/uaccess.h> +#include <asm/checksum.h> +#include <asm/fpumacro.h> +#include <asm/pgalloc.h> +#include <asm/cacheflush.h> +#ifdef CONFIG_SBUS +#include <asm/dma.h> +#endif +#include <asm/ns87303.h> +#include <asm/timer.h> +#include <asm/cpudata.h> +#include <asm/ftrace.h> +#include <asm/hypervisor.h> + +struct poll { + int fd; + short events; + short revents; +}; + +extern void die_if_kernel(char *str, struct pt_regs *regs); +extern pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); +extern void *__bzero(void *, size_t); +extern void *__memscan_zero(void *, size_t); +extern void *__memscan_generic(void *, int, size_t); +extern int __memcmp(const void *, const void *, __kernel_size_t); +extern __kernel_size_t strlen(const char *); +extern void sys_sigsuspend(void); +extern int compat_sys_ioctl(unsigned int fd, unsigned int cmd, u32 arg); +extern int (*handle_mathemu)(struct pt_regs *, struct fpustate *); +extern long sparc32_open(const char __user * filename, int flags, int mode); +extern int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, + unsigned long pfn, unsigned long size, pgprot_t prot); + +extern int __ashrdi3(int, int); + +extern int dump_fpu (struct pt_regs * regs, elf_fpregset_t * fpregs); + +extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); +extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, + unsigned long *); +extern void xor_vis_4(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *); +extern void xor_vis_5(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *, unsigned long *); + +extern void xor_niagara_2(unsigned long, unsigned long *, unsigned long *); +extern void xor_niagara_3(unsigned long, unsigned long *, unsigned long *, + unsigned long *); +extern void xor_niagara_4(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *); +extern void xor_niagara_5(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *, unsigned long *); + +/* Per-CPU information table */ +EXPORT_PER_CPU_SYMBOL(__cpu_data); + +/* used by various drivers */ +#ifdef CONFIG_SMP +/* Out of line rw-locking implementation. */ +EXPORT_SYMBOL(__read_lock); +EXPORT_SYMBOL(__read_unlock); +EXPORT_SYMBOL(__write_lock); +EXPORT_SYMBOL(__write_unlock); +EXPORT_SYMBOL(__write_trylock); +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_MCOUNT +EXPORT_SYMBOL(_mcount); +#endif + +EXPORT_SYMBOL(sparc64_get_clock_tick); + +/* RW semaphores */ +EXPORT_SYMBOL(__down_read); +EXPORT_SYMBOL(__down_read_trylock); +EXPORT_SYMBOL(__down_write); +EXPORT_SYMBOL(__down_write_trylock); +EXPORT_SYMBOL(__up_read); +EXPORT_SYMBOL(__up_write); +EXPORT_SYMBOL(__downgrade_write); + +/* Atomic counter implementation. */ +EXPORT_SYMBOL(atomic_add); +EXPORT_SYMBOL(atomic_add_ret); +EXPORT_SYMBOL(atomic_sub); +EXPORT_SYMBOL(atomic_sub_ret); +EXPORT_SYMBOL(atomic64_add); +EXPORT_SYMBOL(atomic64_add_ret); +EXPORT_SYMBOL(atomic64_sub); +EXPORT_SYMBOL(atomic64_sub_ret); + +/* Atomic bit operations. */ +EXPORT_SYMBOL(test_and_set_bit); +EXPORT_SYMBOL(test_and_clear_bit); +EXPORT_SYMBOL(test_and_change_bit); +EXPORT_SYMBOL(set_bit); +EXPORT_SYMBOL(clear_bit); +EXPORT_SYMBOL(change_bit); + +EXPORT_SYMBOL(__flushw_user); + +EXPORT_SYMBOL(tlb_type); +EXPORT_SYMBOL(sun4v_chip_type); +EXPORT_SYMBOL(get_fb_unmapped_area); +EXPORT_SYMBOL(flush_icache_range); + +EXPORT_SYMBOL(flush_dcache_page); +#ifdef DCACHE_ALIASING_POSSIBLE +EXPORT_SYMBOL(__flush_dcache_range); +#endif + +EXPORT_SYMBOL(sun4v_niagara_getperf); +EXPORT_SYMBOL(sun4v_niagara_setperf); +EXPORT_SYMBOL(sun4v_niagara2_getperf); +EXPORT_SYMBOL(sun4v_niagara2_setperf); + +#ifdef CONFIG_SUN_AUXIO +EXPORT_SYMBOL(auxio_set_led); +EXPORT_SYMBOL(auxio_set_lte); +#endif +#ifdef CONFIG_SBUS +EXPORT_SYMBOL(sbus_set_sbus64); +#endif +EXPORT_SYMBOL(outsb); +EXPORT_SYMBOL(outsw); +EXPORT_SYMBOL(outsl); +EXPORT_SYMBOL(insb); +EXPORT_SYMBOL(insw); +EXPORT_SYMBOL(insl); +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pci_alloc_consistent); +EXPORT_SYMBOL(pci_free_consistent); +EXPORT_SYMBOL(pci_map_single); +EXPORT_SYMBOL(pci_unmap_single); +EXPORT_SYMBOL(pci_map_sg); +EXPORT_SYMBOL(pci_unmap_sg); +EXPORT_SYMBOL(pci_dma_sync_single_for_cpu); +EXPORT_SYMBOL(pci_dma_sync_sg_for_cpu); +EXPORT_SYMBOL(pci_dma_supported); +#endif + +/* I/O device mmaping on Sparc64. */ +EXPORT_SYMBOL(io_remap_pfn_range); + +EXPORT_SYMBOL(dump_fpu); +EXPORT_SYMBOL(put_fs_struct); + +/* math-emu wants this */ +EXPORT_SYMBOL(die_if_kernel); + +/* Kernel thread creation. */ +EXPORT_SYMBOL(kernel_thread); + +/* prom symbols */ +EXPORT_SYMBOL(idprom); +EXPORT_SYMBOL(prom_root_node); +EXPORT_SYMBOL(prom_getchild); +EXPORT_SYMBOL(prom_getsibling); +EXPORT_SYMBOL(prom_searchsiblings); +EXPORT_SYMBOL(prom_firstprop); +EXPORT_SYMBOL(prom_nextprop); +EXPORT_SYMBOL(prom_getproplen); +EXPORT_SYMBOL(prom_getproperty); +EXPORT_SYMBOL(prom_node_has_property); +EXPORT_SYMBOL(prom_setprop); +EXPORT_SYMBOL(saved_command_line); +EXPORT_SYMBOL(prom_finddevice); +EXPORT_SYMBOL(prom_feval); +EXPORT_SYMBOL(prom_getbool); +EXPORT_SYMBOL(prom_getstring); +EXPORT_SYMBOL(prom_getint); +EXPORT_SYMBOL(prom_getintdefault); +EXPORT_SYMBOL(__prom_getchild); +EXPORT_SYMBOL(__prom_getsibling); + +/* sparc library symbols */ +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(__strlen_user); +EXPORT_SYMBOL(__strnlen_user); + +/* Special internal versions of library functions. */ +EXPORT_SYMBOL(_clear_page); +EXPORT_SYMBOL(clear_user_page); +EXPORT_SYMBOL(copy_user_page); +EXPORT_SYMBOL(__bzero); +EXPORT_SYMBOL(__memscan_zero); +EXPORT_SYMBOL(__memscan_generic); +EXPORT_SYMBOL(__memcmp); +EXPORT_SYMBOL(__memset); + +EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(csum_partial_copy_nocheck); +EXPORT_SYMBOL(__csum_partial_copy_from_user); +EXPORT_SYMBOL(__csum_partial_copy_to_user); +EXPORT_SYMBOL(ip_fast_csum); + +/* Moving data to/from/in userspace. */ +EXPORT_SYMBOL(___copy_to_user); +EXPORT_SYMBOL(___copy_from_user); +EXPORT_SYMBOL(___copy_in_user); +EXPORT_SYMBOL(copy_to_user_fixup); +EXPORT_SYMBOL(copy_from_user_fixup); +EXPORT_SYMBOL(copy_in_user_fixup); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__clear_user); + +/* Various address conversion macros use this. */ +EXPORT_SYMBOL(sparc64_valid_addr_bitmap); + +/* No version information on this, heavily used in inline asm, + * and will always be 'void __ret_efault(void)'. + */ +EXPORT_SYMBOL(__ret_efault); + +/* No version information on these, as gcc produces such symbols. */ +EXPORT_SYMBOL(memcmp); +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(strncmp); + +void VISenter(void); +/* RAID code needs this */ +EXPORT_SYMBOL(VISenter); + +/* for input/keybdev */ +EXPORT_SYMBOL(sun_do_break); +EXPORT_SYMBOL(stop_a_enabled); + +#ifdef CONFIG_DEBUG_BUGVERBOSE +EXPORT_SYMBOL(do_BUG); +#endif + +/* for ns8703 */ +EXPORT_SYMBOL(ns87303_lock); + +EXPORT_SYMBOL(tick_ops); + +EXPORT_SYMBOL(xor_vis_2); +EXPORT_SYMBOL(xor_vis_3); +EXPORT_SYMBOL(xor_vis_4); +EXPORT_SYMBOL(xor_vis_5); + +EXPORT_SYMBOL(xor_niagara_2); +EXPORT_SYMBOL(xor_niagara_3); +EXPORT_SYMBOL(xor_niagara_4); +EXPORT_SYMBOL(xor_niagara_5); + +EXPORT_SYMBOL_GPL(real_hard_smp_processor_id); diff --git a/arch/sparc/kernel/spiterrs.S b/arch/sparc/kernel/spiterrs.S new file mode 100644 index 000000000000..c357e40ffd01 --- /dev/null +++ b/arch/sparc/kernel/spiterrs.S @@ -0,0 +1,245 @@ + /* We need to carefully read the error status, ACK the errors, + * prevent recursive traps, and pass the information on to C + * code for logging. + * + * We pass the AFAR in as-is, and we encode the status + * information as described in asm-sparc64/sfafsr.h + */ + .type __spitfire_access_error,#function +__spitfire_access_error: + /* Disable ESTATE error reporting so that we do not take + * recursive traps and RED state the processor. + */ + stxa %g0, [%g0] ASI_ESTATE_ERROR_EN + membar #Sync + + mov UDBE_UE, %g1 + ldxa [%g0] ASI_AFSR, %g4 ! Get AFSR + + /* __spitfire_cee_trap branches here with AFSR in %g4 and + * UDBE_CE in %g1. It only clears ESTATE_ERR_CE in the ESTATE + * Error Enable register. + */ +__spitfire_cee_trap_continue: + ldxa [%g0] ASI_AFAR, %g5 ! Get AFAR + + rdpr %tt, %g3 + and %g3, 0x1ff, %g3 ! Paranoia + sllx %g3, SFSTAT_TRAP_TYPE_SHIFT, %g3 + or %g4, %g3, %g4 + rdpr %tl, %g3 + cmp %g3, 1 + mov 1, %g3 + bleu %xcc, 1f + sllx %g3, SFSTAT_TL_GT_ONE_SHIFT, %g3 + + or %g4, %g3, %g4 + + /* Read in the UDB error register state, clearing the sticky + * error bits as-needed. We only clear them if the UE bit is + * set. Likewise, __spitfire_cee_trap below will only do so + * if the CE bit is set. + * + * NOTE: UltraSparc-I/II have high and low UDB error + * registers, corresponding to the two UDB units + * present on those chips. UltraSparc-IIi only + * has a single UDB, called "SDB" in the manual. + * For IIi the upper UDB register always reads + * as zero so for our purposes things will just + * work with the checks below. + */ +1: ldxa [%g0] ASI_UDBH_ERROR_R, %g3 + and %g3, 0x3ff, %g7 ! Paranoia + sllx %g7, SFSTAT_UDBH_SHIFT, %g7 + or %g4, %g7, %g4 + andcc %g3, %g1, %g3 ! UDBE_UE or UDBE_CE + be,pn %xcc, 1f + nop + stxa %g3, [%g0] ASI_UDB_ERROR_W + membar #Sync + +1: mov 0x18, %g3 + ldxa [%g3] ASI_UDBL_ERROR_R, %g3 + and %g3, 0x3ff, %g7 ! Paranoia + sllx %g7, SFSTAT_UDBL_SHIFT, %g7 + or %g4, %g7, %g4 + andcc %g3, %g1, %g3 ! UDBE_UE or UDBE_CE + be,pn %xcc, 1f + nop + mov 0x18, %g7 + stxa %g3, [%g7] ASI_UDB_ERROR_W + membar #Sync + +1: /* Ok, now that we've latched the error state, clear the + * sticky bits in the AFSR. + */ + stxa %g4, [%g0] ASI_AFSR + membar #Sync + + rdpr %tl, %g2 + cmp %g2, 1 + rdpr %pil, %g2 + bleu,pt %xcc, 1f + wrpr %g0, PIL_NORMAL_MAX, %pil + + ba,pt %xcc, etraptl1 + rd %pc, %g7 + + ba,pt %xcc, 2f + nop + +1: ba,pt %xcc, etrap_irq + rd %pc, %g7 + +2: +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_off + nop +#endif + mov %l4, %o1 + mov %l5, %o2 + call spitfire_access_error + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + .size __spitfire_access_error,.-__spitfire_access_error + + /* This is the trap handler entry point for ECC correctable + * errors. They are corrected, but we listen for the trap so + * that the event can be logged. + * + * Disrupting errors are either: + * 1) single-bit ECC errors during UDB reads to system + * memory + * 2) data parity errors during write-back events + * + * As far as I can make out from the manual, the CEE trap is + * only for correctable errors during memory read accesses by + * the front-end of the processor. + * + * The code below is only for trap level 1 CEE events, as it + * is the only situation where we can safely record and log. + * For trap level >1 we just clear the CE bit in the AFSR and + * return. + * + * This is just like __spiftire_access_error above, but it + * specifically handles correctable errors. If an + * uncorrectable error is indicated in the AFSR we will branch + * directly above to __spitfire_access_error to handle it + * instead. Uncorrectable therefore takes priority over + * correctable, and the error logging C code will notice this + * case by inspecting the trap type. + */ + .type __spitfire_cee_trap,#function +__spitfire_cee_trap: + ldxa [%g0] ASI_AFSR, %g4 ! Get AFSR + mov 1, %g3 + sllx %g3, SFAFSR_UE_SHIFT, %g3 + andcc %g4, %g3, %g0 ! Check for UE + bne,pn %xcc, __spitfire_access_error + nop + + /* Ok, in this case we only have a correctable error. + * Indicate we only wish to capture that state in register + * %g1, and we only disable CE error reporting unlike UE + * handling which disables all errors. + */ + ldxa [%g0] ASI_ESTATE_ERROR_EN, %g3 + andn %g3, ESTATE_ERR_CE, %g3 + stxa %g3, [%g0] ASI_ESTATE_ERROR_EN + membar #Sync + + /* Preserve AFSR in %g4, indicate UDB state to capture in %g1 */ + ba,pt %xcc, __spitfire_cee_trap_continue + mov UDBE_CE, %g1 + .size __spitfire_cee_trap,.-__spitfire_cee_trap + + .type __spitfire_data_access_exception_tl1,#function +__spitfire_data_access_exception_tl1: + rdpr %pstate, %g4 + wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate + mov TLB_SFSR, %g3 + mov DMMU_SFAR, %g5 + ldxa [%g3] ASI_DMMU, %g4 ! Get SFSR + ldxa [%g5] ASI_DMMU, %g5 ! Get SFAR + stxa %g0, [%g3] ASI_DMMU ! Clear SFSR.FaultValid bit + membar #Sync + rdpr %tt, %g3 + cmp %g3, 0x80 ! first win spill/fill trap + blu,pn %xcc, 1f + cmp %g3, 0xff ! last win spill/fill trap + bgu,pn %xcc, 1f + nop + ba,pt %xcc, winfix_dax + rdpr %tpc, %g3 +1: sethi %hi(109f), %g7 + ba,pt %xcc, etraptl1 +109: or %g7, %lo(109b), %g7 + mov %l4, %o1 + mov %l5, %o2 + call spitfire_data_access_exception_tl1 + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + .size __spitfire_data_access_exception_tl1,.-__spitfire_data_access_exception_tl1 + + .type __spitfire_data_access_exception,#function +__spitfire_data_access_exception: + rdpr %pstate, %g4 + wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate + mov TLB_SFSR, %g3 + mov DMMU_SFAR, %g5 + ldxa [%g3] ASI_DMMU, %g4 ! Get SFSR + ldxa [%g5] ASI_DMMU, %g5 ! Get SFAR + stxa %g0, [%g3] ASI_DMMU ! Clear SFSR.FaultValid bit + membar #Sync + sethi %hi(109f), %g7 + ba,pt %xcc, etrap +109: or %g7, %lo(109b), %g7 + mov %l4, %o1 + mov %l5, %o2 + call spitfire_data_access_exception + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + .size __spitfire_data_access_exception,.-__spitfire_data_access_exception + + .type __spitfire_insn_access_exception_tl1,#function +__spitfire_insn_access_exception_tl1: + rdpr %pstate, %g4 + wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate + mov TLB_SFSR, %g3 + ldxa [%g3] ASI_IMMU, %g4 ! Get SFSR + rdpr %tpc, %g5 ! IMMU has no SFAR, use TPC + stxa %g0, [%g3] ASI_IMMU ! Clear FaultValid bit + membar #Sync + sethi %hi(109f), %g7 + ba,pt %xcc, etraptl1 +109: or %g7, %lo(109b), %g7 + mov %l4, %o1 + mov %l5, %o2 + call spitfire_insn_access_exception_tl1 + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + .size __spitfire_insn_access_exception_tl1,.-__spitfire_insn_access_exception_tl1 + + .type __spitfire_insn_access_exception,#function +__spitfire_insn_access_exception: + rdpr %pstate, %g4 + wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate + mov TLB_SFSR, %g3 + ldxa [%g3] ASI_IMMU, %g4 ! Get SFSR + rdpr %tpc, %g5 ! IMMU has no SFAR, use TPC + stxa %g0, [%g3] ASI_IMMU ! Clear FaultValid bit + membar #Sync + sethi %hi(109f), %g7 + ba,pt %xcc, etrap +109: or %g7, %lo(109b), %g7 + mov %l4, %o1 + mov %l5, %o2 + call spitfire_insn_access_exception + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + .size __spitfire_insn_access_exception,.-__spitfire_insn_access_exception diff --git a/arch/sparc/kernel/sstate.c b/arch/sparc/kernel/sstate.c new file mode 100644 index 000000000000..8cdbe5946b43 --- /dev/null +++ b/arch/sparc/kernel/sstate.c @@ -0,0 +1,127 @@ +/* sstate.c: System soft state support. + * + * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net> + */ + +#include <linux/kernel.h> +#include <linux/notifier.h> +#include <linux/reboot.h> +#include <linux/init.h> + +#include <asm/hypervisor.h> +#include <asm/spitfire.h> +#include <asm/oplib.h> +#include <asm/head.h> +#include <asm/io.h> + +static int hv_supports_soft_state; + +static unsigned long kimage_addr_to_ra(const char *p) +{ + unsigned long val = (unsigned long) p; + + return kern_base + (val - KERNBASE); +} + +static void do_set_sstate(unsigned long state, const char *msg) +{ + unsigned long err; + + if (!hv_supports_soft_state) + return; + + err = sun4v_mach_set_soft_state(state, kimage_addr_to_ra(msg)); + if (err) { + printk(KERN_WARNING "SSTATE: Failed to set soft-state to " + "state[%lx] msg[%s], err=%lu\n", + state, msg, err); + } +} + +static const char booting_msg[32] __attribute__((aligned(32))) = + "Linux booting"; +static const char running_msg[32] __attribute__((aligned(32))) = + "Linux running"; +static const char halting_msg[32] __attribute__((aligned(32))) = + "Linux halting"; +static const char poweroff_msg[32] __attribute__((aligned(32))) = + "Linux powering off"; +static const char rebooting_msg[32] __attribute__((aligned(32))) = + "Linux rebooting"; +static const char panicing_msg[32] __attribute__((aligned(32))) = + "Linux panicing"; + +static int sstate_reboot_call(struct notifier_block *np, unsigned long type, void *_unused) +{ + const char *msg; + + switch (type) { + case SYS_DOWN: + default: + msg = rebooting_msg; + break; + + case SYS_HALT: + msg = halting_msg; + break; + + case SYS_POWER_OFF: + msg = poweroff_msg; + break; + } + + do_set_sstate(HV_SOFT_STATE_TRANSITION, msg); + + return NOTIFY_OK; +} + +static struct notifier_block sstate_reboot_notifier = { + .notifier_call = sstate_reboot_call, +}; + +static int sstate_panic_event(struct notifier_block *n, unsigned long event, void *ptr) +{ + do_set_sstate(HV_SOFT_STATE_TRANSITION, panicing_msg); + + return NOTIFY_DONE; +} + +static struct notifier_block sstate_panic_block = { + .notifier_call = sstate_panic_event, + .priority = INT_MAX, +}; + +static int __init sstate_init(void) +{ + unsigned long major, minor; + + if (tlb_type != hypervisor) + return 0; + + major = 1; + minor = 0; + if (sun4v_hvapi_register(HV_GRP_SOFT_STATE, major, &minor)) + return 0; + + hv_supports_soft_state = 1; + + prom_sun4v_guest_soft_state(); + + do_set_sstate(HV_SOFT_STATE_TRANSITION, booting_msg); + + atomic_notifier_chain_register(&panic_notifier_list, + &sstate_panic_block); + register_reboot_notifier(&sstate_reboot_notifier); + + return 0; +} + +core_initcall(sstate_init); + +static int __init sstate_running(void) +{ + do_set_sstate(HV_SOFT_STATE_NORMAL, running_msg); + return 0; +} + +late_initcall(sstate_running); diff --git a/arch/sparc/kernel/stacktrace.c b/arch/sparc/kernel/stacktrace.c new file mode 100644 index 000000000000..acb12f673757 --- /dev/null +++ b/arch/sparc/kernel/stacktrace.c @@ -0,0 +1,64 @@ +#include <linux/sched.h> +#include <linux/stacktrace.h> +#include <linux/thread_info.h> +#include <linux/module.h> +#include <asm/ptrace.h> +#include <asm/stacktrace.h> + +#include "kstack.h" + +static void __save_stack_trace(struct thread_info *tp, + struct stack_trace *trace, + bool skip_sched) +{ + unsigned long ksp, fp; + + if (tp == current_thread_info()) { + stack_trace_flush(); + __asm__ __volatile__("mov %%fp, %0" : "=r" (ksp)); + } else { + ksp = tp->ksp; + } + + fp = ksp + STACK_BIAS; + do { + struct sparc_stackf *sf; + struct pt_regs *regs; + unsigned long pc; + + if (!kstack_valid(tp, fp)) + break; + + sf = (struct sparc_stackf *) fp; + regs = (struct pt_regs *) (sf + 1); + + if (kstack_is_trap_frame(tp, regs)) { + if (!(regs->tstate & TSTATE_PRIV)) + break; + pc = regs->tpc; + fp = regs->u_regs[UREG_I6] + STACK_BIAS; + } else { + pc = sf->callers_pc; + fp = (unsigned long)sf->fp + STACK_BIAS; + } + + if (trace->skip > 0) + trace->skip--; + else if (!skip_sched || !in_sched_functions(pc)) + trace->entries[trace->nr_entries++] = pc; + } while (trace->nr_entries < trace->max_entries); +} + +void save_stack_trace(struct stack_trace *trace) +{ + __save_stack_trace(current_thread_info(), trace, false); +} +EXPORT_SYMBOL_GPL(save_stack_trace); + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + struct thread_info *tp = task_thread_info(tsk); + + __save_stack_trace(tp, trace, true); +} +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/sparc/kernel/starfire.c b/arch/sparc/kernel/starfire.c new file mode 100644 index 000000000000..060d0f3a6151 --- /dev/null +++ b/arch/sparc/kernel/starfire.c @@ -0,0 +1,116 @@ +/* + * starfire.c: Starfire/E10000 support. + * + * Copyright (C) 1998 David S. Miller (davem@redhat.com) + * Copyright (C) 2000 Anton Blanchard (anton@samba.org) + */ + +#include <linux/kernel.h> +#include <linux/slab.h> + +#include <asm/page.h> +#include <asm/oplib.h> +#include <asm/smp.h> +#include <asm/upa.h> +#include <asm/starfire.h> + +/* + * A few places around the kernel check this to see if + * they need to call us to do things in a Starfire specific + * way. + */ +int this_is_starfire = 0; + +void check_if_starfire(void) +{ + int ssnode = prom_finddevice("/ssp-serial"); + if (ssnode != 0 && ssnode != -1) + this_is_starfire = 1; +} + +int starfire_hard_smp_processor_id(void) +{ + return upa_readl(0x1fff40000d0UL); +} + +/* + * Each Starfire board has 32 registers which perform translation + * and delivery of traditional interrupt packets into the extended + * Starfire hardware format. Essentially UPAID's now have 2 more + * bits than in all previous Sun5 systems. + */ +struct starfire_irqinfo { + unsigned long imap_slots[32]; + unsigned long tregs[32]; + struct starfire_irqinfo *next; + int upaid, hwmid; +}; + +static struct starfire_irqinfo *sflist = NULL; + +/* Beam me up Scott(McNeil)y... */ +void starfire_hookup(int upaid) +{ + struct starfire_irqinfo *p; + unsigned long treg_base, hwmid, i; + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + prom_printf("starfire_hookup: No memory, this is insane.\n"); + prom_halt(); + } + treg_base = 0x100fc000000UL; + hwmid = ((upaid & 0x3c) << 1) | + ((upaid & 0x40) >> 4) | + (upaid & 0x3); + p->hwmid = hwmid; + treg_base += (hwmid << 33UL); + treg_base += 0x200UL; + for (i = 0; i < 32; i++) { + p->imap_slots[i] = 0UL; + p->tregs[i] = treg_base + (i * 0x10UL); + /* Lets play it safe and not overwrite existing mappings */ + if (upa_readl(p->tregs[i]) != 0) + p->imap_slots[i] = 0xdeadbeaf; + } + p->upaid = upaid; + p->next = sflist; + sflist = p; +} + +unsigned int starfire_translate(unsigned long imap, + unsigned int upaid) +{ + struct starfire_irqinfo *p; + unsigned int bus_hwmid; + unsigned int i; + + bus_hwmid = (((unsigned long)imap) >> 33) & 0x7f; + for (p = sflist; p != NULL; p = p->next) + if (p->hwmid == bus_hwmid) + break; + if (p == NULL) { + prom_printf("XFIRE: Cannot find irqinfo for imap %016lx\n", + ((unsigned long)imap)); + prom_halt(); + } + for (i = 0; i < 32; i++) { + if (p->imap_slots[i] == imap || + p->imap_slots[i] == 0UL) + break; + } + if (i == 32) { + printk("starfire_translate: Are you kidding me?\n"); + panic("Lucy in the sky...."); + } + p->imap_slots[i] = imap; + + /* map to real upaid */ + upaid = (((upaid & 0x3c) << 1) | + ((upaid & 0x40) >> 4) | + (upaid & 0x3)); + + upa_writel(upaid, p->tregs[i]); + + return i; +} diff --git a/arch/sparc/kernel/sun4v_ivec.S b/arch/sparc/kernel/sun4v_ivec.S new file mode 100644 index 000000000000..559bc5e9c199 --- /dev/null +++ b/arch/sparc/kernel/sun4v_ivec.S @@ -0,0 +1,341 @@ +/* sun4v_ivec.S: Sun4v interrupt vector handling. + * + * Copyright (C) 2006 <davem@davemloft.net> + */ + +#include <asm/cpudata.h> +#include <asm/intr_queue.h> +#include <asm/pil.h> + + .text + .align 32 + +sun4v_cpu_mondo: + /* Head offset in %g2, tail offset in %g4. + * If they are the same, no work. + */ + mov INTRQ_CPU_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_CPU_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_cpu_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g4. */ + ldxa [%g0] ASI_SCRATCHPAD, %g4 + sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4 + + /* Get CPU mondo queue base phys address into %g7. */ + ldx [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7 + + /* Now get the cross-call arguments and handler PC, same + * layout as sun4u: + * + * 1st 64-bit word: low half is 32-bit PC, put into %g3 and jmpl to it + * high half is context arg to MMU flushes, into %g5 + * 2nd 64-bit word: 64-bit arg, load into %g1 + * 3rd 64-bit word: 64-bit arg, load into %g7 + */ + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g3 + add %g2, 0x8, %g2 + srlx %g3, 32, %g5 + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g1 + add %g2, 0x8, %g2 + srl %g3, 0, %g3 + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g7 + add %g2, 0x40 - 0x8 - 0x8, %g2 + + /* Update queue head pointer. */ + lduw [%g4 + TRAP_PER_CPU_CPU_MONDO_QMASK], %g4 + and %g2, %g4, %g2 + + mov INTRQ_CPU_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + jmpl %g3, %g0 + nop + +sun4v_cpu_mondo_queue_empty: + retry + +sun4v_dev_mondo: + /* Head offset in %g2, tail offset in %g4. */ + mov INTRQ_DEVICE_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_DEVICE_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_dev_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g4. */ + ldxa [%g0] ASI_SCRATCHPAD, %g4 + sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4 + + /* Get DEV mondo queue base phys address into %g5. */ + ldx [%g4 + TRAP_PER_CPU_DEV_MONDO_PA], %g5 + + /* Load IVEC into %g3. */ + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + add %g2, 0x40, %g2 + + /* XXX There can be a full 64-byte block of data here. + * XXX This is how we can get at MSI vector data. + * XXX Current we do not capture this, but when we do we'll + * XXX need to add a 64-byte storage area in the struct ino_bucket + * XXX or the struct irq_desc. + */ + + /* Update queue head pointer, this frees up some registers. */ + lduw [%g4 + TRAP_PER_CPU_DEV_MONDO_QMASK], %g4 + and %g2, %g4, %g2 + + mov INTRQ_DEVICE_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + TRAP_LOAD_IRQ_WORK_PA(%g1, %g4) + + /* For VIRQs, cookie is encoded as ~bucket_phys_addr */ + brlz,pt %g3, 1f + xnor %g3, %g0, %g4 + + /* Get __pa(&ivector_table[IVEC]) into %g4. */ + sethi %hi(ivector_table_pa), %g4 + ldx [%g4 + %lo(ivector_table_pa)], %g4 + sllx %g3, 4, %g3 + add %g4, %g3, %g4 + +1: ldx [%g1], %g2 + stxa %g2, [%g4] ASI_PHYS_USE_EC + stx %g4, [%g1] + + /* Signal the interrupt by setting (1 << pil) in %softint. */ + wr %g0, 1 << PIL_DEVICE_IRQ, %set_softint + +sun4v_dev_mondo_queue_empty: + retry + +sun4v_res_mondo: + /* Head offset in %g2, tail offset in %g4. */ + mov INTRQ_RESUM_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_RESUM_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_res_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g3. */ + ldxa [%g0] ASI_SCRATCHPAD, %g3 + sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3 + + /* Get RES mondo queue base phys address into %g5. */ + ldx [%g3 + TRAP_PER_CPU_RESUM_MONDO_PA], %g5 + + /* Get RES kernel buffer base phys address into %g7. */ + ldx [%g3 + TRAP_PER_CPU_RESUM_KBUF_PA], %g7 + + /* If the first word is non-zero, queue is full. */ + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g1 + brnz,pn %g1, sun4v_res_mondo_queue_full + nop + + lduw [%g3 + TRAP_PER_CPU_RESUM_QMASK], %g4 + + /* Remember this entry's offset in %g1. */ + mov %g2, %g1 + + /* Copy 64-byte queue entry into kernel buffer. */ + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + + /* Update queue head pointer. */ + and %g2, %g4, %g2 + + mov INTRQ_RESUM_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + /* Disable interrupts and save register state so we can call + * C code. The etrap handling will leave %g4 in %l4 for us + * when it's done. + */ + rdpr %pil, %g2 + wrpr %g0, PIL_NORMAL_MAX, %pil + mov %g1, %g4 + ba,pt %xcc, etrap_irq + rd %pc, %g7 +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_off + nop +#endif + /* Log the event. */ + add %sp, PTREGS_OFF, %o0 + call sun4v_resum_error + mov %l4, %o1 + + /* Return from trap. */ + ba,pt %xcc, rtrap_irq + nop + +sun4v_res_mondo_queue_empty: + retry + +sun4v_res_mondo_queue_full: + /* The queue is full, consolidate our damage by setting + * the head equal to the tail. We'll just trap again otherwise. + * Call C code to log the event. + */ + mov INTRQ_RESUM_MONDO_HEAD, %g2 + stxa %g4, [%g2] ASI_QUEUE + membar #Sync + + rdpr %pil, %g2 + wrpr %g0, PIL_NORMAL_MAX, %pil + ba,pt %xcc, etrap_irq + rd %pc, %g7 +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_off + nop +#endif + call sun4v_resum_overflow + add %sp, PTREGS_OFF, %o0 + + ba,pt %xcc, rtrap_irq + nop + +sun4v_nonres_mondo: + /* Head offset in %g2, tail offset in %g4. */ + mov INTRQ_NONRESUM_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_NONRESUM_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_nonres_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g3. */ + ldxa [%g0] ASI_SCRATCHPAD, %g3 + sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3 + + /* Get RES mondo queue base phys address into %g5. */ + ldx [%g3 + TRAP_PER_CPU_NONRESUM_MONDO_PA], %g5 + + /* Get RES kernel buffer base phys address into %g7. */ + ldx [%g3 + TRAP_PER_CPU_NONRESUM_KBUF_PA], %g7 + + /* If the first word is non-zero, queue is full. */ + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g1 + brnz,pn %g1, sun4v_nonres_mondo_queue_full + nop + + lduw [%g3 + TRAP_PER_CPU_NONRESUM_QMASK], %g4 + + /* Remember this entry's offset in %g1. */ + mov %g2, %g1 + + /* Copy 64-byte queue entry into kernel buffer. */ + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + + /* Update queue head pointer. */ + and %g2, %g4, %g2 + + mov INTRQ_NONRESUM_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + /* Disable interrupts and save register state so we can call + * C code. The etrap handling will leave %g4 in %l4 for us + * when it's done. + */ + rdpr %pil, %g2 + wrpr %g0, PIL_NORMAL_MAX, %pil + mov %g1, %g4 + ba,pt %xcc, etrap_irq + rd %pc, %g7 +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_off + nop +#endif + /* Log the event. */ + add %sp, PTREGS_OFF, %o0 + call sun4v_nonresum_error + mov %l4, %o1 + + /* Return from trap. */ + ba,pt %xcc, rtrap_irq + nop + +sun4v_nonres_mondo_queue_empty: + retry + +sun4v_nonres_mondo_queue_full: + /* The queue is full, consolidate our damage by setting + * the head equal to the tail. We'll just trap again otherwise. + * Call C code to log the event. + */ + mov INTRQ_NONRESUM_MONDO_HEAD, %g2 + stxa %g4, [%g2] ASI_QUEUE + membar #Sync + + rdpr %pil, %g2 + wrpr %g0, PIL_NORMAL_MAX, %pil + ba,pt %xcc, etrap_irq + rd %pc, %g7 +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_off + nop +#endif + call sun4v_nonresum_overflow + add %sp, PTREGS_OFF, %o0 + + ba,pt %xcc, rtrap_irq + nop diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S new file mode 100644 index 000000000000..e1fbf8c75787 --- /dev/null +++ b/arch/sparc/kernel/sun4v_tlb_miss.S @@ -0,0 +1,428 @@ +/* sun4v_tlb_miss.S: Sun4v TLB miss handlers. + * + * Copyright (C) 2006 <davem@davemloft.net> + */ + + .text + .align 32 + + /* Load ITLB fault information into VADDR and CTX, using BASE. */ +#define LOAD_ITLB_INFO(BASE, VADDR, CTX) \ + ldx [BASE + HV_FAULT_I_ADDR_OFFSET], VADDR; \ + ldx [BASE + HV_FAULT_I_CTX_OFFSET], CTX; + + /* Load DTLB fault information into VADDR and CTX, using BASE. */ +#define LOAD_DTLB_INFO(BASE, VADDR, CTX) \ + ldx [BASE + HV_FAULT_D_ADDR_OFFSET], VADDR; \ + ldx [BASE + HV_FAULT_D_CTX_OFFSET], CTX; + + /* DEST = (VADDR >> 22) + * + * Branch to ZERO_CTX_LABEL if context is zero. + */ +#define COMPUTE_TAG_TARGET(DEST, VADDR, CTX, ZERO_CTX_LABEL) \ + srlx VADDR, 22, DEST; \ + brz,pn CTX, ZERO_CTX_LABEL; \ + nop; + + /* Create TSB pointer. This is something like: + * + * index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL; + * tsb_base = tsb_reg & ~0x7UL; + * tsb_index = ((vaddr >> HASH_SHIFT) & tsb_mask); + * tsb_ptr = tsb_base + (tsb_index * 16); + */ +#define COMPUTE_TSB_PTR(TSB_PTR, VADDR, HASH_SHIFT, TMP1, TMP2) \ + and TSB_PTR, 0x7, TMP1; \ + mov 512, TMP2; \ + andn TSB_PTR, 0x7, TSB_PTR; \ + sllx TMP2, TMP1, TMP2; \ + srlx VADDR, HASH_SHIFT, TMP1; \ + sub TMP2, 1, TMP2; \ + and TMP1, TMP2, TMP1; \ + sllx TMP1, 4, TMP1; \ + add TSB_PTR, TMP1, TSB_PTR; + +sun4v_itlb_miss: + /* Load MMU Miss base into %g2. */ + ldxa [%g0] ASI_SCRATCHPAD, %g2 + + /* Load UTSB reg into %g1. */ + mov SCRATCHPAD_UTSBREG1, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g1 + + LOAD_ITLB_INFO(%g2, %g4, %g5) + COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_itlb_4v) + COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g3, %g7) + + /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ + ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2 + cmp %g2, %g6 + bne,a,pn %xcc, tsb_miss_page_table_walk + mov FAULT_CODE_ITLB, %g3 + andcc %g3, _PAGE_EXEC_4V, %g0 + be,a,pn %xcc, tsb_do_fault + mov FAULT_CODE_ITLB, %g3 + + /* We have a valid entry, make hypervisor call to load + * I-TLB and return from trap. + * + * %g3: PTE + * %g4: vaddr + */ +sun4v_itlb_load: + ldxa [%g0] ASI_SCRATCHPAD, %g6 + mov %o0, %g1 ! save %o0 + mov %o1, %g2 ! save %o1 + mov %o2, %g5 ! save %o2 + mov %o3, %g7 ! save %o3 + mov %g4, %o0 ! vaddr + ldx [%g6 + HV_FAULT_I_CTX_OFFSET], %o1 ! ctx + mov %g3, %o2 ! PTE + mov HV_MMU_IMMU, %o3 ! flags + ta HV_MMU_MAP_ADDR_TRAP + brnz,pn %o0, sun4v_itlb_error + mov %g2, %o1 ! restore %o1 + mov %g1, %o0 ! restore %o0 + mov %g5, %o2 ! restore %o2 + mov %g7, %o3 ! restore %o3 + + retry + +sun4v_dtlb_miss: + /* Load MMU Miss base into %g2. */ + ldxa [%g0] ASI_SCRATCHPAD, %g2 + + /* Load UTSB reg into %g1. */ + mov SCRATCHPAD_UTSBREG1, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g1 + + LOAD_DTLB_INFO(%g2, %g4, %g5) + COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_dtlb_4v) + COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g3, %g7) + + /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ + ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2 + cmp %g2, %g6 + bne,a,pn %xcc, tsb_miss_page_table_walk + mov FAULT_CODE_DTLB, %g3 + + /* We have a valid entry, make hypervisor call to load + * D-TLB and return from trap. + * + * %g3: PTE + * %g4: vaddr + */ +sun4v_dtlb_load: + ldxa [%g0] ASI_SCRATCHPAD, %g6 + mov %o0, %g1 ! save %o0 + mov %o1, %g2 ! save %o1 + mov %o2, %g5 ! save %o2 + mov %o3, %g7 ! save %o3 + mov %g4, %o0 ! vaddr + ldx [%g6 + HV_FAULT_D_CTX_OFFSET], %o1 ! ctx + mov %g3, %o2 ! PTE + mov HV_MMU_DMMU, %o3 ! flags + ta HV_MMU_MAP_ADDR_TRAP + brnz,pn %o0, sun4v_dtlb_error + mov %g2, %o1 ! restore %o1 + mov %g1, %o0 ! restore %o0 + mov %g5, %o2 ! restore %o2 + mov %g7, %o3 ! restore %o3 + + retry + +sun4v_dtlb_prot: + SET_GL(1) + + /* Load MMU Miss base into %g5. */ + ldxa [%g0] ASI_SCRATCHPAD, %g5 + + ldx [%g5 + HV_FAULT_D_ADDR_OFFSET], %g5 + rdpr %tl, %g1 + cmp %g1, 1 + bgu,pn %xcc, winfix_trampoline + mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 + ba,pt %xcc, sparc64_realfault_common + nop + + /* Called from trap table: + * %g4: vaddr + * %g5: context + * %g6: TAG TARGET + */ +sun4v_itsb_miss: + mov SCRATCHPAD_UTSBREG1, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g1 + brz,pn %g5, kvmap_itlb_4v + mov FAULT_CODE_ITLB, %g3 + ba,a,pt %xcc, sun4v_tsb_miss_common + + /* Called from trap table: + * %g4: vaddr + * %g5: context + * %g6: TAG TARGET + */ +sun4v_dtsb_miss: + mov SCRATCHPAD_UTSBREG1, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g1 + brz,pn %g5, kvmap_dtlb_4v + mov FAULT_CODE_DTLB, %g3 + + /* fallthrough */ + +sun4v_tsb_miss_common: + COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g5, %g7) + + sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2 + +#ifdef CONFIG_HUGETLB_PAGE + mov SCRATCHPAD_UTSBREG2, %g5 + ldxa [%g5] ASI_SCRATCHPAD, %g5 + cmp %g5, -1 + be,pt %xcc, 80f + nop + COMPUTE_TSB_PTR(%g5, %g4, HPAGE_SHIFT, %g2, %g7) + + /* That clobbered %g2, reload it. */ + ldxa [%g0] ASI_SCRATCHPAD, %g2 + sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2 + +80: stx %g5, [%g2 + TRAP_PER_CPU_TSB_HUGE_TEMP] +#endif + + ba,pt %xcc, tsb_miss_page_table_walk_sun4v_fastpath + ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7 + +sun4v_itlb_error: + sethi %hi(sun4v_err_itlb_vaddr), %g1 + stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)] + sethi %hi(sun4v_err_itlb_ctx), %g1 + ldxa [%g0] ASI_SCRATCHPAD, %g6 + ldx [%g6 + HV_FAULT_I_CTX_OFFSET], %o1 + stx %o1, [%g1 + %lo(sun4v_err_itlb_ctx)] + sethi %hi(sun4v_err_itlb_pte), %g1 + stx %g3, [%g1 + %lo(sun4v_err_itlb_pte)] + sethi %hi(sun4v_err_itlb_error), %g1 + stx %o0, [%g1 + %lo(sun4v_err_itlb_error)] + + rdpr %tl, %g4 + cmp %g4, 1 + ble,pt %icc, 1f + sethi %hi(2f), %g7 + ba,pt %xcc, etraptl1 + or %g7, %lo(2f), %g7 + +1: ba,pt %xcc, etrap +2: or %g7, %lo(2b), %g7 + mov %l4, %o1 + call sun4v_itlb_error_report + add %sp, PTREGS_OFF, %o0 + + /* NOTREACHED */ + +sun4v_dtlb_error: + sethi %hi(sun4v_err_dtlb_vaddr), %g1 + stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)] + sethi %hi(sun4v_err_dtlb_ctx), %g1 + ldxa [%g0] ASI_SCRATCHPAD, %g6 + ldx [%g6 + HV_FAULT_D_CTX_OFFSET], %o1 + stx %o1, [%g1 + %lo(sun4v_err_dtlb_ctx)] + sethi %hi(sun4v_err_dtlb_pte), %g1 + stx %g3, [%g1 + %lo(sun4v_err_dtlb_pte)] + sethi %hi(sun4v_err_dtlb_error), %g1 + stx %o0, [%g1 + %lo(sun4v_err_dtlb_error)] + + rdpr %tl, %g4 + cmp %g4, 1 + ble,pt %icc, 1f + sethi %hi(2f), %g7 + ba,pt %xcc, etraptl1 + or %g7, %lo(2f), %g7 + +1: ba,pt %xcc, etrap +2: or %g7, %lo(2b), %g7 + mov %l4, %o1 + call sun4v_dtlb_error_report + add %sp, PTREGS_OFF, %o0 + + /* NOTREACHED */ + + /* Instruction Access Exception, tl0. */ +sun4v_iacc: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_I_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call sun4v_insn_access_exception + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap + + /* Instruction Access Exception, tl1. */ +sun4v_iacc_tl1: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_I_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etraptl1 + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call sun4v_insn_access_exception_tl1 + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap + + /* Data Access Exception, tl0. */ +sun4v_dacc: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call sun4v_data_access_exception + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap + + /* Data Access Exception, tl1. */ +sun4v_dacc_tl1: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etraptl1 + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call sun4v_data_access_exception_tl1 + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap + + /* Memory Address Unaligned. */ +sun4v_mna: + /* Window fixup? */ + rdpr %tl, %g2 + cmp %g2, 1 + ble,pt %icc, 1f + nop + + SET_GL(1) + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g5 + mov HV_FAULT_TYPE_UNALIGNED, %g3 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g4 + sllx %g3, 16, %g3 + or %g4, %g3, %g4 + ba,pt %xcc, winfix_mna + rdpr %tpc, %g3 + /* not reached */ + +1: ldxa [%g0] ASI_SCRATCHPAD, %g2 + mov HV_FAULT_TYPE_UNALIGNED, %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call sun4v_do_mna + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap + + /* Privileged Action. */ +sun4v_privact: + ba,pt %xcc, etrap + rd %pc, %g7 + call do_privact + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap + + /* Unaligned ldd float, tl0. */ +sun4v_lddfmna: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call handle_lddfmna + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap + + /* Unaligned std float, tl0. */ +sun4v_stdfmna: + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 + sllx %g3, 16, %g3 + or %g5, %g3, %g5 + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + mov %l5, %o2 + call handle_stdfmna + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define SUN4V_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl sun4v_patch_tlb_handlers + .type sun4v_patch_tlb_handlers,#function +sun4v_patch_tlb_handlers: + SUN4V_DO_PATCH(tl0_iamiss, sun4v_itlb_miss) + SUN4V_DO_PATCH(tl1_iamiss, sun4v_itlb_miss) + SUN4V_DO_PATCH(tl0_damiss, sun4v_dtlb_miss) + SUN4V_DO_PATCH(tl1_damiss, sun4v_dtlb_miss) + SUN4V_DO_PATCH(tl0_daprot, sun4v_dtlb_prot) + SUN4V_DO_PATCH(tl1_daprot, sun4v_dtlb_prot) + SUN4V_DO_PATCH(tl0_iax, sun4v_iacc) + SUN4V_DO_PATCH(tl1_iax, sun4v_iacc_tl1) + SUN4V_DO_PATCH(tl0_dax, sun4v_dacc) + SUN4V_DO_PATCH(tl1_dax, sun4v_dacc_tl1) + SUN4V_DO_PATCH(tl0_mna, sun4v_mna) + SUN4V_DO_PATCH(tl1_mna, sun4v_mna) + SUN4V_DO_PATCH(tl0_lddfmna, sun4v_lddfmna) + SUN4V_DO_PATCH(tl0_stdfmna, sun4v_stdfmna) + SUN4V_DO_PATCH(tl0_privact, sun4v_privact) + retl + nop + .size sun4v_patch_tlb_handlers,.-sun4v_patch_tlb_handlers diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S new file mode 100644 index 000000000000..f061c4dda9ef --- /dev/null +++ b/arch/sparc/kernel/sys32.S @@ -0,0 +1,367 @@ +/* + * sys32.S: I-cache tricks for 32-bit compatibility layer simple + * conversions. + * + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz) + */ + +#include <asm/errno.h> + +/* NOTE: call as jump breaks return stack, we have to avoid that */ + + .text + +#define SIGN1(STUB,SYSCALL,REG1) \ + .align 32; \ + .globl STUB; \ +STUB: sethi %hi(SYSCALL), %g1; \ + jmpl %g1 + %lo(SYSCALL), %g0; \ + sra REG1, 0, REG1 + +#define SIGN2(STUB,SYSCALL,REG1,REG2) \ + .align 32; \ + .globl STUB; \ +STUB: sethi %hi(SYSCALL), %g1; \ + sra REG1, 0, REG1; \ + jmpl %g1 + %lo(SYSCALL), %g0; \ + sra REG2, 0, REG2 + +#define SIGN3(STUB,SYSCALL,REG1,REG2,REG3) \ + .align 32; \ + .globl STUB; \ +STUB: sra REG1, 0, REG1; \ + sethi %hi(SYSCALL), %g1; \ + sra REG2, 0, REG2; \ + jmpl %g1 + %lo(SYSCALL), %g0; \ + sra REG3, 0, REG3 + +#define SIGN4(STUB,SYSCALL,REG1,REG2,REG3,REG4) \ + .align 32; \ + .globl STUB; \ +STUB: sra REG1, 0, REG1; \ + sethi %hi(SYSCALL), %g1; \ + sra REG2, 0, REG2; \ + sra REG3, 0, REG3; \ + jmpl %g1 + %lo(SYSCALL), %g0; \ + sra REG4, 0, REG4 + +SIGN1(sys32_exit, sparc_exit, %o0) +SIGN1(sys32_exit_group, sys_exit_group, %o0) +SIGN1(sys32_wait4, compat_sys_wait4, %o2) +SIGN1(sys32_creat, sys_creat, %o1) +SIGN1(sys32_mknod, sys_mknod, %o1) +SIGN1(sys32_perfctr, sys_perfctr, %o0) +SIGN1(sys32_umount, sys_umount, %o1) +SIGN1(sys32_signal, sys_signal, %o0) +SIGN1(sys32_access, sys_access, %o1) +SIGN1(sys32_msync, sys_msync, %o2) +SIGN2(sys32_reboot, sys_reboot, %o0, %o1) +SIGN1(sys32_setitimer, compat_sys_setitimer, %o0) +SIGN1(sys32_getitimer, compat_sys_getitimer, %o0) +SIGN1(sys32_sethostname, sys_sethostname, %o1) +SIGN1(sys32_swapon, sys_swapon, %o1) +SIGN1(sys32_sigaction, compat_sys_sigaction, %o0) +SIGN1(sys32_rt_sigaction, compat_sys_rt_sigaction, %o0) +SIGN1(sys32_sigprocmask, compat_sys_sigprocmask, %o0) +SIGN1(sys32_rt_sigprocmask, compat_sys_rt_sigprocmask, %o0) +SIGN2(sys32_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo, %o0, %o1) +SIGN1(sys32_getrusage, compat_sys_getrusage, %o0) +SIGN1(sys32_setxattr, sys_setxattr, %o4) +SIGN1(sys32_lsetxattr, sys_lsetxattr, %o4) +SIGN1(sys32_fsetxattr, sys_fsetxattr, %o4) +SIGN1(sys32_fgetxattr, sys_fgetxattr, %o0) +SIGN1(sys32_flistxattr, sys_flistxattr, %o0) +SIGN1(sys32_fremovexattr, sys_fremovexattr, %o0) +SIGN2(sys32_tkill, sys_tkill, %o0, %o1) +SIGN1(sys32_epoll_create, sys_epoll_create, %o0) +SIGN3(sys32_epoll_ctl, sys_epoll_ctl, %o0, %o1, %o2) +SIGN3(sys32_epoll_wait, sys_epoll_wait, %o0, %o2, %o3) +SIGN1(sys32_readahead, compat_sys_readahead, %o0) +SIGN2(sys32_fadvise64, compat_sys_fadvise64, %o0, %o4) +SIGN2(sys32_fadvise64_64, compat_sys_fadvise64_64, %o0, %o5) +SIGN2(sys32_bdflush, sys_bdflush, %o0, %o1) +SIGN1(sys32_mlockall, sys_mlockall, %o0) +SIGN1(sys32_nfsservctl, compat_sys_nfsservctl, %o0) +SIGN1(sys32_clock_nanosleep, compat_sys_clock_nanosleep, %o1) +SIGN1(sys32_timer_settime, compat_sys_timer_settime, %o1) +SIGN1(sys32_io_submit, compat_sys_io_submit, %o1) +SIGN1(sys32_mq_open, compat_sys_mq_open, %o1) +SIGN1(sys32_select, compat_sys_select, %o0) +SIGN1(sys32_mkdir, sys_mkdir, %o1) +SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5) +SIGN1(sys32_sysfs, compat_sys_sysfs, %o0) +SIGN2(sys32_sendfile, compat_sys_sendfile, %o0, %o1) +SIGN2(sys32_sendfile64, compat_sys_sendfile64, %o0, %o1) +SIGN1(sys32_prctl, sys_prctl, %o0) +SIGN1(sys32_sched_rr_get_interval, compat_sys_sched_rr_get_interval, %o0) +SIGN2(sys32_waitpid, sys_waitpid, %o0, %o2) +SIGN1(sys32_getgroups, sys_getgroups, %o0) +SIGN1(sys32_getpgid, sys_getpgid, %o0) +SIGN2(sys32_getpriority, sys_getpriority, %o0, %o1) +SIGN1(sys32_getsid, sys_getsid, %o0) +SIGN2(sys32_kill, sys_kill, %o0, %o1) +SIGN1(sys32_nice, sys_nice, %o0) +SIGN1(sys32_lseek, sys_lseek, %o1) +SIGN2(sys32_open, sparc32_open, %o1, %o2) +SIGN1(sys32_readlink, sys_readlink, %o2) +SIGN1(sys32_sched_get_priority_max, sys_sched_get_priority_max, %o0) +SIGN1(sys32_sched_get_priority_min, sys_sched_get_priority_min, %o0) +SIGN1(sys32_sched_getparam, sys_sched_getparam, %o0) +SIGN1(sys32_sched_getscheduler, sys_sched_getscheduler, %o0) +SIGN1(sys32_sched_setparam, sys_sched_setparam, %o0) +SIGN2(sys32_sched_setscheduler, sys_sched_setscheduler, %o0, %o1) +SIGN1(sys32_getdomainname, sys_getdomainname, %o1) +SIGN1(sys32_setdomainname, sys_setdomainname, %o1) +SIGN1(sys32_setgroups, sys_setgroups, %o0) +SIGN2(sys32_setpgid, sys_setpgid, %o0, %o1) +SIGN3(sys32_setpriority, sys_setpriority, %o0, %o1, %o2) +SIGN1(sys32_ssetmask, sys_ssetmask, %o0) +SIGN2(sys32_syslog, sys_syslog, %o0, %o2) +SIGN1(sys32_umask, sys_umask, %o0) +SIGN3(sys32_tgkill, sys_tgkill, %o0, %o1, %o2) +SIGN1(sys32_sendto, sys_sendto, %o0) +SIGN1(sys32_recvfrom, sys_recvfrom, %o0) +SIGN3(sys32_socket, sys_socket, %o0, %o1, %o2) +SIGN2(sys32_connect, sys_connect, %o0, %o2) +SIGN2(sys32_bind, sys_bind, %o0, %o2) +SIGN2(sys32_listen, sys_listen, %o0, %o1) +SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0) +SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0) +SIGN2(sys32_shutdown, sys_shutdown, %o0, %o1) +SIGN3(sys32_socketpair, sys_socketpair, %o0, %o1, %o2) +SIGN1(sys32_getpeername, sys_getpeername, %o0) +SIGN1(sys32_getsockname, sys_getsockname, %o0) +SIGN2(sys32_ioprio_get, sys_ioprio_get, %o0, %o1) +SIGN3(sys32_ioprio_set, sys_ioprio_set, %o0, %o1, %o2) +SIGN2(sys32_splice, sys_splice, %o0, %o1) +SIGN2(sys32_sync_file_range, compat_sync_file_range, %o0, %o5) +SIGN2(sys32_tee, sys_tee, %o0, %o1) +SIGN1(sys32_vmsplice, compat_sys_vmsplice, %o0) + + .globl sys32_mmap2 +sys32_mmap2: + sethi %hi(sys_mmap), %g1 + jmpl %g1 + %lo(sys_mmap), %g0 + sllx %o5, 12, %o5 + + .align 32 + .globl sys32_socketcall +sys32_socketcall: /* %o0=call, %o1=args */ + cmp %o0, 1 + bl,pn %xcc, do_einval + cmp %o0, 18 + bg,pn %xcc, do_einval + sub %o0, 1, %o0 + sllx %o0, 5, %o0 + sethi %hi(__socketcall_table_begin), %g2 + or %g2, %lo(__socketcall_table_begin), %g2 + jmpl %g2 + %o0, %g0 + nop +do_einval: + retl + mov -EINVAL, %o0 + + .align 32 +__socketcall_table_begin: + + /* Each entry is exactly 32 bytes. */ +do_sys_socket: /* sys_socket(int, int, int) */ +1: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(sys_socket), %g1 +2: ldswa [%o1 + 0x8] %asi, %o2 + jmpl %g1 + %lo(sys_socket), %g0 +3: ldswa [%o1 + 0x4] %asi, %o1 + nop + nop + nop +do_sys_bind: /* sys_bind(int fd, struct sockaddr *, int) */ +4: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(sys_bind), %g1 +5: ldswa [%o1 + 0x8] %asi, %o2 + jmpl %g1 + %lo(sys_bind), %g0 +6: lduwa [%o1 + 0x4] %asi, %o1 + nop + nop + nop +do_sys_connect: /* sys_connect(int, struct sockaddr *, int) */ +7: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(sys_connect), %g1 +8: ldswa [%o1 + 0x8] %asi, %o2 + jmpl %g1 + %lo(sys_connect), %g0 +9: lduwa [%o1 + 0x4] %asi, %o1 + nop + nop + nop +do_sys_listen: /* sys_listen(int, int) */ +10: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(sys_listen), %g1 + jmpl %g1 + %lo(sys_listen), %g0 +11: ldswa [%o1 + 0x4] %asi, %o1 + nop + nop + nop + nop +do_sys_accept: /* sys_accept(int, struct sockaddr *, int *) */ +12: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(sys_accept), %g1 +13: lduwa [%o1 + 0x8] %asi, %o2 + jmpl %g1 + %lo(sys_accept), %g0 +14: lduwa [%o1 + 0x4] %asi, %o1 + nop + nop + nop +do_sys_getsockname: /* sys_getsockname(int, struct sockaddr *, int *) */ +15: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(sys_getsockname), %g1 +16: lduwa [%o1 + 0x8] %asi, %o2 + jmpl %g1 + %lo(sys_getsockname), %g0 +17: lduwa [%o1 + 0x4] %asi, %o1 + nop + nop + nop +do_sys_getpeername: /* sys_getpeername(int, struct sockaddr *, int *) */ +18: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(sys_getpeername), %g1 +19: lduwa [%o1 + 0x8] %asi, %o2 + jmpl %g1 + %lo(sys_getpeername), %g0 +20: lduwa [%o1 + 0x4] %asi, %o1 + nop + nop + nop +do_sys_socketpair: /* sys_socketpair(int, int, int, int *) */ +21: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(sys_socketpair), %g1 +22: ldswa [%o1 + 0x8] %asi, %o2 +23: lduwa [%o1 + 0xc] %asi, %o3 + jmpl %g1 + %lo(sys_socketpair), %g0 +24: ldswa [%o1 + 0x4] %asi, %o1 + nop + nop +do_sys_send: /* sys_send(int, void *, size_t, unsigned int) */ +25: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(sys_send), %g1 +26: lduwa [%o1 + 0x8] %asi, %o2 +27: lduwa [%o1 + 0xc] %asi, %o3 + jmpl %g1 + %lo(sys_send), %g0 +28: lduwa [%o1 + 0x4] %asi, %o1 + nop + nop +do_sys_recv: /* sys_recv(int, void *, size_t, unsigned int) */ +29: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(sys_recv), %g1 +30: lduwa [%o1 + 0x8] %asi, %o2 +31: lduwa [%o1 + 0xc] %asi, %o3 + jmpl %g1 + %lo(sys_recv), %g0 +32: lduwa [%o1 + 0x4] %asi, %o1 + nop + nop +do_sys_sendto: /* sys_sendto(int, u32, compat_size_t, unsigned int, u32, int) */ +33: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(sys_sendto), %g1 +34: lduwa [%o1 + 0x8] %asi, %o2 +35: lduwa [%o1 + 0xc] %asi, %o3 +36: lduwa [%o1 + 0x10] %asi, %o4 +37: ldswa [%o1 + 0x14] %asi, %o5 + jmpl %g1 + %lo(sys_sendto), %g0 +38: lduwa [%o1 + 0x4] %asi, %o1 +do_sys_recvfrom: /* sys_recvfrom(int, u32, compat_size_t, unsigned int, u32, u32) */ +39: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(sys_recvfrom), %g1 +40: lduwa [%o1 + 0x8] %asi, %o2 +41: lduwa [%o1 + 0xc] %asi, %o3 +42: lduwa [%o1 + 0x10] %asi, %o4 +43: lduwa [%o1 + 0x14] %asi, %o5 + jmpl %g1 + %lo(sys_recvfrom), %g0 +44: lduwa [%o1 + 0x4] %asi, %o1 +do_sys_shutdown: /* sys_shutdown(int, int) */ +45: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(sys_shutdown), %g1 + jmpl %g1 + %lo(sys_shutdown), %g0 +46: ldswa [%o1 + 0x4] %asi, %o1 + nop + nop + nop + nop +do_sys_setsockopt: /* compat_sys_setsockopt(int, int, int, char *, int) */ +47: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(compat_sys_setsockopt), %g1 +48: ldswa [%o1 + 0x8] %asi, %o2 +49: lduwa [%o1 + 0xc] %asi, %o3 +50: ldswa [%o1 + 0x10] %asi, %o4 + jmpl %g1 + %lo(compat_sys_setsockopt), %g0 +51: ldswa [%o1 + 0x4] %asi, %o1 + nop +do_sys_getsockopt: /* compat_sys_getsockopt(int, int, int, u32, u32) */ +52: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(compat_sys_getsockopt), %g1 +53: ldswa [%o1 + 0x8] %asi, %o2 +54: lduwa [%o1 + 0xc] %asi, %o3 +55: lduwa [%o1 + 0x10] %asi, %o4 + jmpl %g1 + %lo(compat_sys_getsockopt), %g0 +56: ldswa [%o1 + 0x4] %asi, %o1 + nop +do_sys_sendmsg: /* compat_sys_sendmsg(int, struct compat_msghdr *, unsigned int) */ +57: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(compat_sys_sendmsg), %g1 +58: lduwa [%o1 + 0x8] %asi, %o2 + jmpl %g1 + %lo(compat_sys_sendmsg), %g0 +59: lduwa [%o1 + 0x4] %asi, %o1 + nop + nop + nop +do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int) */ +60: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(compat_sys_recvmsg), %g1 +61: lduwa [%o1 + 0x8] %asi, %o2 + jmpl %g1 + %lo(compat_sys_recvmsg), %g0 +62: lduwa [%o1 + 0x4] %asi, %o1 + nop + nop + nop +do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */ +63: ldswa [%o1 + 0x0] %asi, %o0 + sethi %hi(sys_accept4), %g1 +64: lduwa [%o1 + 0x8] %asi, %o2 +65: ldswa [%o1 + 0xc] %asi, %o3 + jmpl %g1 + %lo(sys_accept4), %g0 +66: lduwa [%o1 + 0x4] %asi, %o1 + nop + nop + + .section __ex_table,"a" + .align 4 + .word 1b, __retl_efault, 2b, __retl_efault + .word 3b, __retl_efault, 4b, __retl_efault + .word 5b, __retl_efault, 6b, __retl_efault + .word 7b, __retl_efault, 8b, __retl_efault + .word 9b, __retl_efault, 10b, __retl_efault + .word 11b, __retl_efault, 12b, __retl_efault + .word 13b, __retl_efault, 14b, __retl_efault + .word 15b, __retl_efault, 16b, __retl_efault + .word 17b, __retl_efault, 18b, __retl_efault + .word 19b, __retl_efault, 20b, __retl_efault + .word 21b, __retl_efault, 22b, __retl_efault + .word 23b, __retl_efault, 24b, __retl_efault + .word 25b, __retl_efault, 26b, __retl_efault + .word 27b, __retl_efault, 28b, __retl_efault + .word 29b, __retl_efault, 30b, __retl_efault + .word 31b, __retl_efault, 32b, __retl_efault + .word 33b, __retl_efault, 34b, __retl_efault + .word 35b, __retl_efault, 36b, __retl_efault + .word 37b, __retl_efault, 38b, __retl_efault + .word 39b, __retl_efault, 40b, __retl_efault + .word 41b, __retl_efault, 42b, __retl_efault + .word 43b, __retl_efault, 44b, __retl_efault + .word 45b, __retl_efault, 46b, __retl_efault + .word 47b, __retl_efault, 48b, __retl_efault + .word 49b, __retl_efault, 50b, __retl_efault + .word 51b, __retl_efault, 52b, __retl_efault + .word 53b, __retl_efault, 54b, __retl_efault + .word 55b, __retl_efault, 56b, __retl_efault + .word 57b, __retl_efault, 58b, __retl_efault + .word 59b, __retl_efault, 60b, __retl_efault + .word 61b, __retl_efault, 62b, __retl_efault + .word 63b, __retl_efault, 64b, __retl_efault + .word 65b, __retl_efault, 66b, __retl_efault + .previous diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c new file mode 100644 index 000000000000..e800503879e4 --- /dev/null +++ b/arch/sparc/kernel/sys_sparc32.c @@ -0,0 +1,682 @@ +/* sys_sparc32.c: Conversion between 32bit and 64bit native syscalls. + * + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997, 2007 David S. Miller (davem@davemloft.net) + * + * These routines maintain argument size conversion between 32bit and 64bit + * environment. + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/capability.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/file.h> +#include <linux/signal.h> +#include <linux/resource.h> +#include <linux/times.h> +#include <linux/utsname.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/sem.h> +#include <linux/msg.h> +#include <linux/shm.h> +#include <linux/slab.h> +#include <linux/uio.h> +#include <linux/nfs_fs.h> +#include <linux/quota.h> +#include <linux/module.h> +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr.h> +#include <linux/nfsd/syscall.h> +#include <linux/poll.h> +#include <linux/personality.h> +#include <linux/stat.h> +#include <linux/filter.h> +#include <linux/highmem.h> +#include <linux/highuid.h> +#include <linux/mman.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/icmpv6.h> +#include <linux/syscalls.h> +#include <linux/sysctl.h> +#include <linux/binfmts.h> +#include <linux/dnotify.h> +#include <linux/security.h> +#include <linux/compat.h> +#include <linux/vfs.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/ptrace.h> + +#include <asm/types.h> +#include <asm/uaccess.h> +#include <asm/fpumacro.h> +#include <asm/mmu_context.h> +#include <asm/compat_signal.h> + +#ifdef CONFIG_SYSVIPC +asmlinkage long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t ptr, u32 fifth) +{ + int version; + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + switch (call) { + case SEMTIMEDOP: + if (fifth) + /* sign extend semid */ + return compat_sys_semtimedop((int)first, + compat_ptr(ptr), second, + compat_ptr(fifth)); + /* else fall through for normal semop() */ + case SEMOP: + /* struct sembuf is the same on 32 and 64bit :)) */ + /* sign extend semid */ + return sys_semtimedop((int)first, compat_ptr(ptr), second, + NULL); + case SEMGET: + /* sign extend key, nsems */ + return sys_semget((int)first, (int)second, third); + case SEMCTL: + /* sign extend semid, semnum */ + return compat_sys_semctl((int)first, (int)second, third, + compat_ptr(ptr)); + + case MSGSND: + /* sign extend msqid */ + return compat_sys_msgsnd((int)first, (int)second, third, + compat_ptr(ptr)); + case MSGRCV: + /* sign extend msqid, msgtyp */ + return compat_sys_msgrcv((int)first, second, (int)fifth, + third, version, compat_ptr(ptr)); + case MSGGET: + /* sign extend key */ + return sys_msgget((int)first, second); + case MSGCTL: + /* sign extend msqid */ + return compat_sys_msgctl((int)first, second, compat_ptr(ptr)); + + case SHMAT: + /* sign extend shmid */ + return compat_sys_shmat((int)first, second, third, version, + compat_ptr(ptr)); + case SHMDT: + return sys_shmdt(compat_ptr(ptr)); + case SHMGET: + /* sign extend key_t */ + return sys_shmget((int)first, second, third); + case SHMCTL: + /* sign extend shmid */ + return compat_sys_shmctl((int)first, second, compat_ptr(ptr)); + + default: + return -ENOSYS; + }; + + return -ENOSYS; +} +#endif + +asmlinkage long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low) +{ + if ((int)high < 0) + return -EINVAL; + else + return sys_truncate(path, (high << 32) | low); +} + +asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low) +{ + if ((int)high < 0) + return -EINVAL; + else + return sys_ftruncate(fd, (high << 32) | low); +} + +static int cp_compat_stat64(struct kstat *stat, + struct compat_stat64 __user *statbuf) +{ + int err; + + err = put_user(huge_encode_dev(stat->dev), &statbuf->st_dev); + err |= put_user(stat->ino, &statbuf->st_ino); + err |= put_user(stat->mode, &statbuf->st_mode); + err |= put_user(stat->nlink, &statbuf->st_nlink); + err |= put_user(stat->uid, &statbuf->st_uid); + err |= put_user(stat->gid, &statbuf->st_gid); + err |= put_user(huge_encode_dev(stat->rdev), &statbuf->st_rdev); + err |= put_user(0, (unsigned long __user *) &statbuf->__pad3[0]); + err |= put_user(stat->size, &statbuf->st_size); + err |= put_user(stat->blksize, &statbuf->st_blksize); + err |= put_user(0, (unsigned int __user *) &statbuf->__pad4[0]); + err |= put_user(0, (unsigned int __user *) &statbuf->__pad4[4]); + err |= put_user(stat->blocks, &statbuf->st_blocks); + err |= put_user(stat->atime.tv_sec, &statbuf->st_atime); + err |= put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec); + err |= put_user(stat->mtime.tv_sec, &statbuf->st_mtime); + err |= put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec); + err |= put_user(stat->ctime.tv_sec, &statbuf->st_ctime); + err |= put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec); + err |= put_user(0, &statbuf->__unused4); + err |= put_user(0, &statbuf->__unused5); + + return err; +} + +asmlinkage long compat_sys_stat64(char __user * filename, + struct compat_stat64 __user *statbuf) +{ + struct kstat stat; + int error = vfs_stat(filename, &stat); + + if (!error) + error = cp_compat_stat64(&stat, statbuf); + return error; +} + +asmlinkage long compat_sys_lstat64(char __user * filename, + struct compat_stat64 __user *statbuf) +{ + struct kstat stat; + int error = vfs_lstat(filename, &stat); + + if (!error) + error = cp_compat_stat64(&stat, statbuf); + return error; +} + +asmlinkage long compat_sys_fstat64(unsigned int fd, + struct compat_stat64 __user * statbuf) +{ + struct kstat stat; + int error = vfs_fstat(fd, &stat); + + if (!error) + error = cp_compat_stat64(&stat, statbuf); + return error; +} + +asmlinkage long compat_sys_fstatat64(unsigned int dfd, char __user *filename, + struct compat_stat64 __user * statbuf, int flag) +{ + struct kstat stat; + int error = -EINVAL; + + if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + goto out; + + if (flag & AT_SYMLINK_NOFOLLOW) + error = vfs_lstat_fd(dfd, filename, &stat); + else + error = vfs_stat_fd(dfd, filename, &stat); + + if (!error) + error = cp_compat_stat64(&stat, statbuf); + +out: + return error; +} + +asmlinkage long compat_sys_sysfs(int option, u32 arg1, u32 arg2) +{ + return sys_sysfs(option, arg1, arg2); +} + +asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval) +{ + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs (); + + set_fs (KERNEL_DS); + ret = sys_sched_rr_get_interval(pid, (struct timespec __user *) &t); + set_fs (old_fs); + if (put_compat_timespec(&t, interval)) + return -EFAULT; + return ret; +} + +asmlinkage long compat_sys_rt_sigprocmask(int how, + compat_sigset_t __user *set, + compat_sigset_t __user *oset, + compat_size_t sigsetsize) +{ + sigset_t s; + compat_sigset_t s32; + int ret; + mm_segment_t old_fs = get_fs(); + + if (set) { + if (copy_from_user (&s32, set, sizeof(compat_sigset_t))) + return -EFAULT; + switch (_NSIG_WORDS) { + case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); + case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); + case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); + case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); + } + } + set_fs (KERNEL_DS); + ret = sys_rt_sigprocmask(how, + set ? (sigset_t __user *) &s : NULL, + oset ? (sigset_t __user *) &s : NULL, + sigsetsize); + set_fs (old_fs); + if (ret) return ret; + if (oset) { + switch (_NSIG_WORDS) { + case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; + case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; + case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; + case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; + } + if (copy_to_user (oset, &s32, sizeof(compat_sigset_t))) + return -EFAULT; + } + return 0; +} + +asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, + compat_size_t sigsetsize) +{ + sigset_t s; + compat_sigset_t s32; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_rt_sigpending((sigset_t __user *) &s, sigsetsize); + set_fs (old_fs); + if (!ret) { + switch (_NSIG_WORDS) { + case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; + case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; + case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; + case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; + } + if (copy_to_user (set, &s32, sizeof(compat_sigset_t))) + return -EFAULT; + } + return ret; +} + +asmlinkage long compat_sys_rt_sigqueueinfo(int pid, int sig, + struct compat_siginfo __user *uinfo) +{ + siginfo_t info; + int ret; + mm_segment_t old_fs = get_fs(); + + if (copy_siginfo_from_user32(&info, uinfo)) + return -EFAULT; + + set_fs (KERNEL_DS); + ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *) &info); + set_fs (old_fs); + return ret; +} + +asmlinkage long compat_sys_sigaction(int sig, struct old_sigaction32 __user *act, + struct old_sigaction32 __user *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + WARN_ON_ONCE(sig >= 0); + sig = -sig; + + if (act) { + compat_old_sigset_t mask; + u32 u_handler, u_restorer; + + ret = get_user(u_handler, &act->sa_handler); + new_ka.sa.sa_handler = compat_ptr(u_handler); + ret |= __get_user(u_restorer, &act->sa_restorer); + new_ka.sa.sa_restorer = compat_ptr(u_restorer); + ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); + ret |= __get_user(mask, &act->sa_mask); + if (ret) + return ret; + new_ka.ka_restorer = NULL; + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler); + ret |= __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer); + ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + ret |= __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} + +asmlinkage long compat_sys_rt_sigaction(int sig, + struct sigaction32 __user *act, + struct sigaction32 __user *oact, + void __user *restorer, + compat_size_t sigsetsize) +{ + struct k_sigaction new_ka, old_ka; + int ret; + compat_sigset_t set32; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + + if (act) { + u32 u_handler, u_restorer; + + new_ka.ka_restorer = restorer; + ret = get_user(u_handler, &act->sa_handler); + new_ka.sa.sa_handler = compat_ptr(u_handler); + ret |= __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)); + switch (_NSIG_WORDS) { + case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] | (((long)set32.sig[7]) << 32); + case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4] | (((long)set32.sig[5]) << 32); + case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2] | (((long)set32.sig[3]) << 32); + case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0] | (((long)set32.sig[1]) << 32); + } + ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); + ret |= __get_user(u_restorer, &act->sa_restorer); + new_ka.sa.sa_restorer = compat_ptr(u_restorer); + if (ret) + return -EFAULT; + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + switch (_NSIG_WORDS) { + case 4: set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); set32.sig[6] = old_ka.sa.sa_mask.sig[3]; + case 3: set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32); set32.sig[4] = old_ka.sa.sa_mask.sig[2]; + case 2: set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32); set32.sig[2] = old_ka.sa.sa_mask.sig[1]; + case 1: set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); set32.sig[0] = old_ka.sa.sa_mask.sig[0]; + } + ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler); + ret |= __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)); + ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + ret |= __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer); + if (ret) + ret = -EFAULT; + } + + return ret; +} + +/* + * sparc32_execve() executes a new program after the asm stub has set + * things up for us. This should basically do what I want it to. + */ +asmlinkage long sparc32_execve(struct pt_regs *regs) +{ + int error, base = 0; + char *filename; + + /* User register window flush is done by entry.S */ + + /* Check for indirect call. */ + if ((u32)regs->u_regs[UREG_G1] == 0) + base = 1; + + filename = getname(compat_ptr(regs->u_regs[base + UREG_I0])); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + + error = compat_do_execve(filename, + compat_ptr(regs->u_regs[base + UREG_I1]), + compat_ptr(regs->u_regs[base + UREG_I2]), regs); + + putname(filename); + + if (!error) { + fprs_write(0); + current_thread_info()->xfsr[0] = 0; + current_thread_info()->fpsaved[0] = 0; + regs->tstate &= ~TSTATE_PEF; + } +out: + return error; +} + +#ifdef CONFIG_MODULES + +asmlinkage long sys32_init_module(void __user *umod, u32 len, + const char __user *uargs) +{ + return sys_init_module(umod, len, uargs); +} + +asmlinkage long sys32_delete_module(const char __user *name_user, + unsigned int flags) +{ + return sys_delete_module(name_user, flags); +} + +#else /* CONFIG_MODULES */ + +asmlinkage long sys32_init_module(const char __user *name_user, + struct module __user *mod_user) +{ + return -ENOSYS; +} + +asmlinkage long sys32_delete_module(const char __user *name_user) +{ + return -ENOSYS; +} + +#endif /* CONFIG_MODULES */ + +asmlinkage compat_ssize_t sys32_pread64(unsigned int fd, + char __user *ubuf, + compat_size_t count, + unsigned long poshi, + unsigned long poslo) +{ + return sys_pread64(fd, ubuf, count, (poshi << 32) | poslo); +} + +asmlinkage compat_ssize_t sys32_pwrite64(unsigned int fd, + char __user *ubuf, + compat_size_t count, + unsigned long poshi, + unsigned long poslo) +{ + return sys_pwrite64(fd, ubuf, count, (poshi << 32) | poslo); +} + +asmlinkage long compat_sys_readahead(int fd, + unsigned long offhi, + unsigned long offlo, + compat_size_t count) +{ + return sys_readahead(fd, (offhi << 32) | offlo, count); +} + +long compat_sys_fadvise64(int fd, + unsigned long offhi, + unsigned long offlo, + compat_size_t len, int advice) +{ + return sys_fadvise64_64(fd, (offhi << 32) | offlo, len, advice); +} + +long compat_sys_fadvise64_64(int fd, + unsigned long offhi, unsigned long offlo, + unsigned long lenhi, unsigned long lenlo, + int advice) +{ + return sys_fadvise64_64(fd, + (offhi << 32) | offlo, + (lenhi << 32) | lenlo, + advice); +} + +asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, + compat_off_t __user *offset, + compat_size_t count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + off_t of; + + if (offset && get_user(of, offset)) + return -EFAULT; + + set_fs(KERNEL_DS); + ret = sys_sendfile(out_fd, in_fd, + offset ? (off_t __user *) &of : NULL, + count); + set_fs(old_fs); + + if (offset && put_user(of, offset)) + return -EFAULT; + + return ret; +} + +asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, + compat_loff_t __user *offset, + compat_size_t count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + loff_t lof; + + if (offset && get_user(lof, offset)) + return -EFAULT; + + set_fs(KERNEL_DS); + ret = sys_sendfile64(out_fd, in_fd, + offset ? (loff_t __user *) &lof : NULL, + count); + set_fs(old_fs); + + if (offset && put_user(lof, offset)) + return -EFAULT; + + return ret; +} + +/* This is just a version for 32-bit applications which does + * not force O_LARGEFILE on. + */ + +asmlinkage long sparc32_open(const char __user *filename, + int flags, int mode) +{ + return do_sys_open(AT_FDCWD, filename, flags, mode); +} + +extern unsigned long do_mremap(unsigned long addr, + unsigned long old_len, unsigned long new_len, + unsigned long flags, unsigned long new_addr); + +asmlinkage unsigned long sys32_mremap(unsigned long addr, + unsigned long old_len, unsigned long new_len, + unsigned long flags, u32 __new_addr) +{ + unsigned long ret = -EINVAL; + unsigned long new_addr = __new_addr; + + if (unlikely(sparc_mmap_check(addr, old_len))) + goto out; + if (unlikely(sparc_mmap_check(new_addr, new_len))) + goto out; + down_write(¤t->mm->mmap_sem); + ret = do_mremap(addr, old_len, new_len, flags, new_addr); + up_write(¤t->mm->mmap_sem); +out: + return ret; +} + +struct __sysctl_args32 { + u32 name; + int nlen; + u32 oldval; + u32 oldlenp; + u32 newval; + u32 newlen; + u32 __unused[4]; +}; + +asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args) +{ +#ifndef CONFIG_SYSCTL_SYSCALL + return -ENOSYS; +#else + struct __sysctl_args32 tmp; + int error; + size_t oldlen, __user *oldlenp = NULL; + unsigned long addr = (((unsigned long)&args->__unused[0]) + 7UL) & ~7UL; + + if (copy_from_user(&tmp, args, sizeof(tmp))) + return -EFAULT; + + if (tmp.oldval && tmp.oldlenp) { + /* Duh, this is ugly and might not work if sysctl_args + is in read-only memory, but do_sysctl does indirectly + a lot of uaccess in both directions and we'd have to + basically copy the whole sysctl.c here, and + glibc's __sysctl uses rw memory for the structure + anyway. */ + if (get_user(oldlen, (u32 __user *)(unsigned long)tmp.oldlenp) || + put_user(oldlen, (size_t __user *)addr)) + return -EFAULT; + oldlenp = (size_t __user *)addr; + } + + lock_kernel(); + error = do_sysctl((int __user *)(unsigned long) tmp.name, + tmp.nlen, + (void __user *)(unsigned long) tmp.oldval, + oldlenp, + (void __user *)(unsigned long) tmp.newval, + tmp.newlen); + unlock_kernel(); + if (oldlenp) { + if (!error) { + if (get_user(oldlen, (size_t __user *)addr) || + put_user(oldlen, (u32 __user *)(unsigned long) tmp.oldlenp)) + error = -EFAULT; + } + if (copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused))) + error = -EFAULT; + } + return error; +#endif +} + +long sys32_lookup_dcookie(unsigned long cookie_high, + unsigned long cookie_low, + char __user *buf, size_t len) +{ + return sys_lookup_dcookie((cookie_high << 32) | cookie_low, + buf, len); +} + +long compat_sync_file_range(int fd, unsigned long off_high, unsigned long off_low, unsigned long nb_high, unsigned long nb_low, int flags) +{ + return sys_sync_file_range(fd, + (off_high << 32) | off_low, + (nb_high << 32) | nb_low, + flags); +} + +asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, + u32 lenhi, u32 lenlo) +{ + return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, + ((loff_t)lenhi << 32) | lenlo); +} diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c new file mode 100644 index 000000000000..39749e32dc7e --- /dev/null +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -0,0 +1,914 @@ +/* linux/arch/sparc64/kernel/sys_sparc.c + * + * This file contains various random system calls that + * have a non-standard calling sequence on the Linux/sparc + * platform. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/mm.h> +#include <linux/sem.h> +#include <linux/msg.h> +#include <linux/shm.h> +#include <linux/stat.h> +#include <linux/mman.h> +#include <linux/utsname.h> +#include <linux/smp.h> +#include <linux/slab.h> +#include <linux/syscalls.h> +#include <linux/ipc.h> +#include <linux/personality.h> +#include <linux/random.h> + +#include <asm/uaccess.h> +#include <asm/utrap.h> +#include <asm/perfctr.h> +#include <asm/unistd.h> + +#include "entry.h" +#include "systbls.h" + +/* #define DEBUG_UNIMP_SYSCALL */ + +asmlinkage unsigned long sys_getpagesize(void) +{ + return PAGE_SIZE; +} + +#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL)) +#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL)) + +/* Does addr --> addr+len fall within 4GB of the VA-space hole or + * overflow past the end of the 64-bit address space? + */ +static inline int invalid_64bit_range(unsigned long addr, unsigned long len) +{ + unsigned long va_exclude_start, va_exclude_end; + + va_exclude_start = VA_EXCLUDE_START; + va_exclude_end = VA_EXCLUDE_END; + + if (unlikely(len >= va_exclude_start)) + return 1; + + if (unlikely((addr + len) < addr)) + return 1; + + if (unlikely((addr >= va_exclude_start && addr < va_exclude_end) || + ((addr + len) >= va_exclude_start && + (addr + len) < va_exclude_end))) + return 1; + + return 0; +} + +/* Does start,end straddle the VA-space hole? */ +static inline int straddles_64bit_va_hole(unsigned long start, unsigned long end) +{ + unsigned long va_exclude_start, va_exclude_end; + + va_exclude_start = VA_EXCLUDE_START; + va_exclude_end = VA_EXCLUDE_END; + + if (likely(start < va_exclude_start && end < va_exclude_start)) + return 0; + + if (likely(start >= va_exclude_end && end >= va_exclude_end)) + return 0; + + return 1; +} + +/* These functions differ from the default implementations in + * mm/mmap.c in two ways: + * + * 1) For file backed MAP_SHARED mmap()'s we D-cache color align, + * for fixed such mappings we just validate what the user gave us. + * 2) For 64-bit tasks we avoid mapping anything within 4GB of + * the spitfire/niagara VA-hole. + */ + +static inline unsigned long COLOUR_ALIGN(unsigned long addr, + unsigned long pgoff) +{ + unsigned long base = (addr+SHMLBA-1)&~(SHMLBA-1); + unsigned long off = (pgoff<<PAGE_SHIFT) & (SHMLBA-1); + + return base + off; +} + +static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr, + unsigned long pgoff) +{ + unsigned long base = addr & ~(SHMLBA-1); + unsigned long off = (pgoff<<PAGE_SHIFT) & (SHMLBA-1); + + if (base + off <= addr) + return base + off; + return base - off; +} + +unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct * vma; + unsigned long task_size = TASK_SIZE; + unsigned long start_addr; + int do_color_align; + + if (flags & MAP_FIXED) { + /* We do not accept a shared mapping if it would violate + * cache aliasing constraints. + */ + if ((flags & MAP_SHARED) && + ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))) + return -EINVAL; + return addr; + } + + if (test_thread_flag(TIF_32BIT)) + task_size = STACK_TOP32; + if (unlikely(len > task_size || len >= VA_EXCLUDE_START)) + return -ENOMEM; + + do_color_align = 0; + if (filp || (flags & MAP_SHARED)) + do_color_align = 1; + + if (addr) { + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + } + + task_size -= len; + +full_search: + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (addr < VA_EXCLUDE_START && + (addr + len) >= VA_EXCLUDE_START) { + addr = VA_EXCLUDE_END; + vma = find_vma(mm, VA_EXCLUDE_END); + } + if (unlikely(task_size < addr)) { + if (start_addr != TASK_UNMAPPED_BASE) { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; + } + if (likely(!vma || addr + len <= vma->vm_start)) { + /* + * Remember the place where we stopped the search: + */ + mm->free_area_cache = addr + len; + return addr; + } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + addr = vma->vm_end; + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + } +} + +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long task_size = STACK_TOP32; + unsigned long addr = addr0; + int do_color_align; + + /* This should only ever run for 32-bit processes. */ + BUG_ON(!test_thread_flag(TIF_32BIT)); + + if (flags & MAP_FIXED) { + /* We do not accept a shared mapping if it would violate + * cache aliasing constraints. + */ + if ((flags & MAP_SHARED) && + ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))) + return -EINVAL; + return addr; + } + + if (unlikely(len > task_size)) + return -ENOMEM; + + do_color_align = 0; + if (filp || (flags & MAP_SHARED)) + do_color_align = 1; + + /* requesting a specific address */ + if (addr) { + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested address hole */ + addr = mm->free_area_cache; + if (do_color_align) { + unsigned long base = COLOUR_ALIGN_DOWN(addr-len, pgoff); + + addr = base + len; + } + + /* make sure it can fit in the remaining address space */ + if (likely(addr > len)) { + vma = find_vma(mm, addr-len); + if (!vma || addr <= vma->vm_start) { + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr-len); + } + } + + if (unlikely(mm->mmap_base < len)) + goto bottomup; + + addr = mm->mmap_base-len; + if (do_color_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); + + do { + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (likely(!vma || addr+len <= vma->vm_start)) { + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + } + + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = vma->vm_start-len; + if (do_color_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); + } while (likely(len < vma->vm_start)); + +bottomup: + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->cached_hole_size = ~0UL; + mm->free_area_cache = TASK_UNMAPPED_BASE; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + + return addr; +} + +/* Try to align mapping such that we align it as much as possible. */ +unsigned long get_fb_unmapped_area(struct file *filp, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags) +{ + unsigned long align_goal, addr = -ENOMEM; + + if (flags & MAP_FIXED) { + /* Ok, don't mess with it. */ + return get_unmapped_area(NULL, orig_addr, len, pgoff, flags); + } + flags &= ~MAP_SHARED; + + align_goal = PAGE_SIZE; + if (len >= (4UL * 1024 * 1024)) + align_goal = (4UL * 1024 * 1024); + else if (len >= (512UL * 1024)) + align_goal = (512UL * 1024); + else if (len >= (64UL * 1024)) + align_goal = (64UL * 1024); + + do { + addr = get_unmapped_area(NULL, orig_addr, len + (align_goal - PAGE_SIZE), pgoff, flags); + if (!(addr & ~PAGE_MASK)) { + addr = (addr + (align_goal - 1UL)) & ~(align_goal - 1UL); + break; + } + + if (align_goal == (4UL * 1024 * 1024)) + align_goal = (512UL * 1024); + else if (align_goal == (512UL * 1024)) + align_goal = (64UL * 1024); + else + align_goal = PAGE_SIZE; + } while ((addr & ~PAGE_MASK) && align_goal > PAGE_SIZE); + + /* Mapping is smaller than 64K or larger areas could not + * be obtained. + */ + if (addr & ~PAGE_MASK) + addr = get_unmapped_area(NULL, orig_addr, len, pgoff, flags); + + return addr; +} + +/* Essentially the same as PowerPC... */ +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + unsigned long random_factor = 0UL; + + if (current->flags & PF_RANDOMIZE) { + random_factor = get_random_int(); + if (test_thread_flag(TIF_32BIT)) + random_factor &= ((1 * 1024 * 1024) - 1); + else + random_factor = ((random_factor << PAGE_SHIFT) & + 0xffffffffUL); + } + + /* + * Fall back to the standard layout if the personality + * bit is set, or if the expected stack growth is unlimited: + */ + if (!test_thread_flag(TIF_32BIT) || + (current->personality & ADDR_COMPAT_LAYOUT) || + current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY || + sysctl_legacy_va_layout) { + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + /* We know it's 32-bit */ + unsigned long task_size = STACK_TOP32; + unsigned long gap; + + gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + if (gap < 128 * 1024 * 1024) + gap = 128 * 1024 * 1024; + if (gap > (task_size / 6 * 5)) + gap = (task_size / 6 * 5); + + mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +} + +asmlinkage unsigned long sparc_brk(unsigned long brk) +{ + /* People could try to be nasty and use ta 0x6d in 32bit programs */ + if (test_thread_flag(TIF_32BIT) && brk >= STACK_TOP32) + return current->mm->brk; + + if (unlikely(straddles_64bit_va_hole(current->mm->brk, brk))) + return current->mm->brk; + + return sys_brk(brk); +} + +/* + * sys_pipe() is the normal C calling standard for creating + * a pipe. It's not the way unix traditionally does this, though. + */ +asmlinkage long sparc_pipe(struct pt_regs *regs) +{ + int fd[2]; + int error; + + error = do_pipe_flags(fd, 0); + if (error) + goto out; + regs->u_regs[UREG_I1] = fd[1]; + error = fd[0]; +out: + return error; +} + +/* + * sys_ipc() is the de-multiplexer for the SysV IPC calls.. + * + * This is really horribly ugly. + */ + +asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second, + unsigned long third, void __user *ptr, long fifth) +{ + long err; + + /* No need for backward compatibility. We can start fresh... */ + if (call <= SEMCTL) { + switch (call) { + case SEMOP: + err = sys_semtimedop(first, ptr, + (unsigned)second, NULL); + goto out; + case SEMTIMEDOP: + err = sys_semtimedop(first, ptr, (unsigned)second, + (const struct timespec __user *) + (unsigned long) fifth); + goto out; + case SEMGET: + err = sys_semget(first, (int)second, (int)third); + goto out; + case SEMCTL: { + err = sys_semctl(first, second, + (int)third | IPC_64, + (union semun) ptr); + goto out; + } + default: + err = -ENOSYS; + goto out; + }; + } + if (call <= MSGCTL) { + switch (call) { + case MSGSND: + err = sys_msgsnd(first, ptr, (size_t)second, + (int)third); + goto out; + case MSGRCV: + err = sys_msgrcv(first, ptr, (size_t)second, fifth, + (int)third); + goto out; + case MSGGET: + err = sys_msgget((key_t)first, (int)second); + goto out; + case MSGCTL: + err = sys_msgctl(first, (int)second | IPC_64, ptr); + goto out; + default: + err = -ENOSYS; + goto out; + }; + } + if (call <= SHMCTL) { + switch (call) { + case SHMAT: { + ulong raddr; + err = do_shmat(first, ptr, (int)second, &raddr); + if (!err) { + if (put_user(raddr, + (ulong __user *) third)) + err = -EFAULT; + } + goto out; + } + case SHMDT: + err = sys_shmdt(ptr); + goto out; + case SHMGET: + err = sys_shmget(first, (size_t)second, (int)third); + goto out; + case SHMCTL: + err = sys_shmctl(first, (int)second | IPC_64, ptr); + goto out; + default: + err = -ENOSYS; + goto out; + }; + } else { + err = -ENOSYS; + } +out: + return err; +} + +asmlinkage long sparc64_newuname(struct new_utsname __user *name) +{ + int ret = sys_newuname(name); + + if (current->personality == PER_LINUX32 && !ret) { + ret = (copy_to_user(name->machine, "sparc\0\0", 8) + ? -EFAULT : 0); + } + return ret; +} + +asmlinkage long sparc64_personality(unsigned long personality) +{ + int ret; + + if (current->personality == PER_LINUX32 && + personality == PER_LINUX) + personality = PER_LINUX32; + ret = sys_personality(personality); + if (ret == PER_LINUX32) + ret = PER_LINUX; + + return ret; +} + +int sparc_mmap_check(unsigned long addr, unsigned long len) +{ + if (test_thread_flag(TIF_32BIT)) { + if (len >= STACK_TOP32) + return -EINVAL; + + if (addr > STACK_TOP32 - len) + return -EINVAL; + } else { + if (len >= VA_EXCLUDE_START) + return -EINVAL; + + if (invalid_64bit_range(addr, len)) + return -EINVAL; + } + + return 0; +} + +/* Linux version of mmap */ +asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, unsigned long fd, + unsigned long off) +{ + struct file * file = NULL; + unsigned long retval = -EBADF; + + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + goto out; + } + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + len = PAGE_ALIGN(len); + + down_write(¤t->mm->mmap_sem); + retval = do_mmap(file, addr, len, prot, flags, off); + up_write(¤t->mm->mmap_sem); + + if (file) + fput(file); +out: + return retval; +} + +asmlinkage long sys64_munmap(unsigned long addr, size_t len) +{ + long ret; + + if (invalid_64bit_range(addr, len)) + return -EINVAL; + + down_write(¤t->mm->mmap_sem); + ret = do_munmap(current->mm, addr, len); + up_write(¤t->mm->mmap_sem); + return ret; +} + +extern unsigned long do_mremap(unsigned long addr, + unsigned long old_len, unsigned long new_len, + unsigned long flags, unsigned long new_addr); + +asmlinkage unsigned long sys64_mremap(unsigned long addr, + unsigned long old_len, unsigned long new_len, + unsigned long flags, unsigned long new_addr) +{ + unsigned long ret = -EINVAL; + + if (test_thread_flag(TIF_32BIT)) + goto out; + if (unlikely(new_len >= VA_EXCLUDE_START)) + goto out; + if (unlikely(sparc_mmap_check(addr, old_len))) + goto out; + if (unlikely(sparc_mmap_check(new_addr, new_len))) + goto out; + + down_write(¤t->mm->mmap_sem); + ret = do_mremap(addr, old_len, new_len, flags, new_addr); + up_write(¤t->mm->mmap_sem); +out: + return ret; +} + +/* we come to here via sys_nis_syscall so it can setup the regs argument */ +asmlinkage unsigned long c_sys_nis_syscall(struct pt_regs *regs) +{ + static int count; + + /* Don't make the system unusable, if someone goes stuck */ + if (count++ > 5) + return -ENOSYS; + + printk ("Unimplemented SPARC system call %ld\n",regs->u_regs[1]); +#ifdef DEBUG_UNIMP_SYSCALL + show_regs (regs); +#endif + + return -ENOSYS; +} + +/* #define DEBUG_SPARC_BREAKPOINT */ + +asmlinkage void sparc_breakpoint(struct pt_regs *regs) +{ + siginfo_t info; + + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } +#ifdef DEBUG_SPARC_BREAKPOINT + printk ("TRAP: Entering kernel PC=%lx, nPC=%lx\n", regs->tpc, regs->tnpc); +#endif + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + info.si_addr = (void __user *)regs->tpc; + info.si_trapno = 0; + force_sig_info(SIGTRAP, &info, current); +#ifdef DEBUG_SPARC_BREAKPOINT + printk ("TRAP: Returning to space: PC=%lx nPC=%lx\n", regs->tpc, regs->tnpc); +#endif +} + +extern void check_pending(int signum); + +asmlinkage long sys_getdomainname(char __user *name, int len) +{ + int nlen, err; + + if (len < 0) + return -EINVAL; + + down_read(&uts_sem); + + nlen = strlen(utsname()->domainname) + 1; + err = -EINVAL; + if (nlen > len) + goto out; + + err = -EFAULT; + if (!copy_to_user(name, utsname()->domainname, nlen)) + err = 0; + +out: + up_read(&uts_sem); + return err; +} + +asmlinkage long sys_utrap_install(utrap_entry_t type, + utrap_handler_t new_p, + utrap_handler_t new_d, + utrap_handler_t __user *old_p, + utrap_handler_t __user *old_d) +{ + if (type < UT_INSTRUCTION_EXCEPTION || type > UT_TRAP_INSTRUCTION_31) + return -EINVAL; + if (new_p == (utrap_handler_t)(long)UTH_NOCHANGE) { + if (old_p) { + if (!current_thread_info()->utraps) { + if (put_user(NULL, old_p)) + return -EFAULT; + } else { + if (put_user((utrap_handler_t)(current_thread_info()->utraps[type]), old_p)) + return -EFAULT; + } + } + if (old_d) { + if (put_user(NULL, old_d)) + return -EFAULT; + } + return 0; + } + if (!current_thread_info()->utraps) { + current_thread_info()->utraps = + kzalloc((UT_TRAP_INSTRUCTION_31+1)*sizeof(long), GFP_KERNEL); + if (!current_thread_info()->utraps) + return -ENOMEM; + current_thread_info()->utraps[0] = 1; + } else { + if ((utrap_handler_t)current_thread_info()->utraps[type] != new_p && + current_thread_info()->utraps[0] > 1) { + unsigned long *p = current_thread_info()->utraps; + + current_thread_info()->utraps = + kmalloc((UT_TRAP_INSTRUCTION_31+1)*sizeof(long), + GFP_KERNEL); + if (!current_thread_info()->utraps) { + current_thread_info()->utraps = p; + return -ENOMEM; + } + p[0]--; + current_thread_info()->utraps[0] = 1; + memcpy(current_thread_info()->utraps+1, p+1, + UT_TRAP_INSTRUCTION_31*sizeof(long)); + } + } + if (old_p) { + if (put_user((utrap_handler_t)(current_thread_info()->utraps[type]), old_p)) + return -EFAULT; + } + if (old_d) { + if (put_user(NULL, old_d)) + return -EFAULT; + } + current_thread_info()->utraps[type] = (long)new_p; + + return 0; +} + +asmlinkage long sparc_memory_ordering(unsigned long model, + struct pt_regs *regs) +{ + if (model >= 3) + return -EINVAL; + regs->tstate = (regs->tstate & ~TSTATE_MM) | (model << 14); + return 0; +} + +asmlinkage long sys_rt_sigaction(int sig, + const struct sigaction __user *act, + struct sigaction __user *oact, + void __user *restorer, + size_t sigsetsize) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (act) { + new_ka.ka_restorer = restorer; + if (copy_from_user(&new_ka.sa, act, sizeof(*act))) + return -EFAULT; + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (copy_to_user(oact, &old_ka.sa, sizeof(*oact))) + return -EFAULT; + } + + return ret; +} + +/* Invoked by rtrap code to update performance counters in + * user space. + */ +asmlinkage void update_perfctrs(void) +{ + unsigned long pic, tmp; + + read_pic(pic); + tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic); + __put_user(tmp, current_thread_info()->user_cntd0); + tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32)); + __put_user(tmp, current_thread_info()->user_cntd1); + reset_pic(); +} + +asmlinkage long sys_perfctr(int opcode, unsigned long arg0, unsigned long arg1, unsigned long arg2) +{ + int err = 0; + + switch(opcode) { + case PERFCTR_ON: + current_thread_info()->pcr_reg = arg2; + current_thread_info()->user_cntd0 = (u64 __user *) arg0; + current_thread_info()->user_cntd1 = (u64 __user *) arg1; + current_thread_info()->kernel_cntd0 = + current_thread_info()->kernel_cntd1 = 0; + write_pcr(arg2); + reset_pic(); + set_thread_flag(TIF_PERFCTR); + break; + + case PERFCTR_OFF: + err = -EINVAL; + if (test_thread_flag(TIF_PERFCTR)) { + current_thread_info()->user_cntd0 = + current_thread_info()->user_cntd1 = NULL; + current_thread_info()->pcr_reg = 0; + write_pcr(0); + clear_thread_flag(TIF_PERFCTR); + err = 0; + } + break; + + case PERFCTR_READ: { + unsigned long pic, tmp; + + if (!test_thread_flag(TIF_PERFCTR)) { + err = -EINVAL; + break; + } + read_pic(pic); + tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic); + err |= __put_user(tmp, current_thread_info()->user_cntd0); + tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32)); + err |= __put_user(tmp, current_thread_info()->user_cntd1); + reset_pic(); + break; + } + + case PERFCTR_CLRPIC: + if (!test_thread_flag(TIF_PERFCTR)) { + err = -EINVAL; + break; + } + current_thread_info()->kernel_cntd0 = + current_thread_info()->kernel_cntd1 = 0; + reset_pic(); + break; + + case PERFCTR_SETPCR: { + u64 __user *user_pcr = (u64 __user *)arg0; + + if (!test_thread_flag(TIF_PERFCTR)) { + err = -EINVAL; + break; + } + err |= __get_user(current_thread_info()->pcr_reg, user_pcr); + write_pcr(current_thread_info()->pcr_reg); + current_thread_info()->kernel_cntd0 = + current_thread_info()->kernel_cntd1 = 0; + reset_pic(); + break; + } + + case PERFCTR_GETPCR: { + u64 __user *user_pcr = (u64 __user *)arg0; + + if (!test_thread_flag(TIF_PERFCTR)) { + err = -EINVAL; + break; + } + err |= __put_user(current_thread_info()->pcr_reg, user_pcr); + break; + } + + default: + err = -EINVAL; + break; + }; + return err; +} + +/* + * Do a system call from kernel instead of calling sys_execve so we + * end up with proper pt_regs. + */ +int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +{ + long __res; + register long __g1 __asm__ ("g1") = __NR_execve; + register long __o0 __asm__ ("o0") = (long)(filename); + register long __o1 __asm__ ("o1") = (long)(argv); + register long __o2 __asm__ ("o2") = (long)(envp); + asm volatile ("t 0x6d\n\t" + "sub %%g0, %%o0, %0\n\t" + "movcc %%xcc, %%o0, %0\n\t" + : "=r" (__res), "=&r" (__o0) + : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__g1) + : "cc"); + return __res; +} diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S new file mode 100644 index 000000000000..7a6786a71363 --- /dev/null +++ b/arch/sparc/kernel/syscalls.S @@ -0,0 +1,279 @@ + /* SunOS's execv() call only specifies the argv argument, the + * environment settings are the same as the calling processes. + */ +sys_execve: + sethi %hi(sparc_execve), %g1 + ba,pt %xcc, execve_merge + or %g1, %lo(sparc_execve), %g1 + +#ifdef CONFIG_COMPAT +sunos_execv: + stx %g0, [%sp + PTREGS_OFF + PT_V9_I2] +sys32_execve: + sethi %hi(sparc32_execve), %g1 + or %g1, %lo(sparc32_execve), %g1 +#endif + +execve_merge: + flushw + jmpl %g1, %g0 + add %sp, PTREGS_OFF, %o0 + + .align 32 +sys_pipe: + ba,pt %xcc, sparc_pipe + add %sp, PTREGS_OFF, %o0 +sys_nis_syscall: + ba,pt %xcc, c_sys_nis_syscall + add %sp, PTREGS_OFF, %o0 +sys_memory_ordering: + ba,pt %xcc, sparc_memory_ordering + add %sp, PTREGS_OFF, %o1 +sys_sigaltstack: + ba,pt %xcc, do_sigaltstack + add %i6, STACK_BIAS, %o2 +#ifdef CONFIG_COMPAT +sys32_sigstack: + ba,pt %xcc, do_sys32_sigstack + mov %i6, %o2 +sys32_sigaltstack: + ba,pt %xcc, do_sys32_sigaltstack + mov %i6, %o2 +#endif + .align 32 +#ifdef CONFIG_COMPAT +sys32_sigreturn: + add %sp, PTREGS_OFF, %o0 + call do_sigreturn32 + add %o7, 1f-.-4, %o7 + nop +#endif +sys_rt_sigreturn: + add %sp, PTREGS_OFF, %o0 + call do_rt_sigreturn + add %o7, 1f-.-4, %o7 + nop +#ifdef CONFIG_COMPAT +sys32_rt_sigreturn: + add %sp, PTREGS_OFF, %o0 + call do_rt_sigreturn32 + add %o7, 1f-.-4, %o7 + nop +#endif + .align 32 +1: ldx [%g6 + TI_FLAGS], %l5 + andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0 + be,pt %icc, rtrap + nop + call syscall_trace_leave + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + + /* This is how fork() was meant to be done, 8 instruction entry. + * + * I questioned the following code briefly, let me clear things + * up so you must not reason on it like I did. + * + * Know the fork_kpsr etc. we use in the sparc32 port? We don't + * need it here because the only piece of window state we copy to + * the child is the CWP register. Even if the parent sleeps, + * we are safe because we stuck it into pt_regs of the parent + * so it will not change. + * + * XXX This raises the question, whether we can do the same on + * XXX sparc32 to get rid of fork_kpsr _and_ fork_kwim. The + * XXX answer is yes. We stick fork_kpsr in UREG_G0 and + * XXX fork_kwim in UREG_G1 (global registers are considered + * XXX volatile across a system call in the sparc ABI I think + * XXX if it isn't we can use regs->y instead, anyone who depends + * XXX upon the Y register being preserved across a fork deserves + * XXX to lose). + * + * In fact we should take advantage of that fact for other things + * during system calls... + */ + .align 32 +sys_vfork: /* Under Linux, vfork and fork are just special cases of clone. */ + sethi %hi(0x4000 | 0x0100 | SIGCHLD), %o0 + or %o0, %lo(0x4000 | 0x0100 | SIGCHLD), %o0 + ba,pt %xcc, sys_clone +sys_fork: + clr %o1 + mov SIGCHLD, %o0 +sys_clone: + flushw + movrz %o1, %fp, %o1 + mov 0, %o3 + ba,pt %xcc, sparc_do_fork + add %sp, PTREGS_OFF, %o2 + + .globl ret_from_syscall +ret_from_syscall: + /* Clear current_thread_info()->new_child, and + * check performance counter stuff too. + */ + stb %g0, [%g6 + TI_NEW_CHILD] + ldx [%g6 + TI_FLAGS], %l0 + call schedule_tail + mov %g7, %o0 + andcc %l0, _TIF_PERFCTR, %g0 + be,pt %icc, 1f + nop + ldx [%g6 + TI_PCR], %o7 + wr %g0, %o7, %pcr + + /* Blackbird errata workaround. See commentary in + * smp.c:smp_percpu_timer_interrupt() for more + * information. + */ + ba,pt %xcc, 99f + nop + + .align 64 +99: wr %g0, %g0, %pic + rd %pic, %g0 + +1: ba,pt %xcc, ret_sys_call + ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0 + + .globl sparc_exit + .type sparc_exit,#function +sparc_exit: + rdpr %pstate, %g2 + wrpr %g2, PSTATE_IE, %pstate + rdpr %otherwin, %g1 + rdpr %cansave, %g3 + add %g3, %g1, %g3 + wrpr %g3, 0x0, %cansave + wrpr %g0, 0x0, %otherwin + wrpr %g2, 0x0, %pstate + ba,pt %xcc, sys_exit + stb %g0, [%g6 + TI_WSAVED] + .size sparc_exit,.-sparc_exit + +linux_sparc_ni_syscall: + sethi %hi(sys_ni_syscall), %l7 + ba,pt %xcc, 4f + or %l7, %lo(sys_ni_syscall), %l7 + +linux_syscall_trace32: + call syscall_trace_enter + add %sp, PTREGS_OFF, %o0 + brnz,pn %o0, 3f + mov -ENOSYS, %o0 + srl %i0, 0, %o0 + srl %i4, 0, %o4 + srl %i1, 0, %o1 + srl %i2, 0, %o2 + ba,pt %xcc, 2f + srl %i3, 0, %o3 + +linux_syscall_trace: + call syscall_trace_enter + add %sp, PTREGS_OFF, %o0 + brnz,pn %o0, 3f + mov -ENOSYS, %o0 + mov %i0, %o0 + mov %i1, %o1 + mov %i2, %o2 + mov %i3, %o3 + b,pt %xcc, 2f + mov %i4, %o4 + + + /* Linux 32-bit system calls enter here... */ + .align 32 + .globl linux_sparc_syscall32 +linux_sparc_syscall32: + /* Direct access to user regs, much faster. */ + cmp %g1, NR_SYSCALLS ! IEU1 Group + bgeu,pn %xcc, linux_sparc_ni_syscall ! CTI + srl %i0, 0, %o0 ! IEU0 + sll %g1, 2, %l4 ! IEU0 Group + srl %i4, 0, %o4 ! IEU1 + lduw [%l7 + %l4], %l7 ! Load + srl %i1, 0, %o1 ! IEU0 Group + ldx [%g6 + TI_FLAGS], %l0 ! Load + + srl %i5, 0, %o5 ! IEU1 + srl %i2, 0, %o2 ! IEU0 Group + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0 + bne,pn %icc, linux_syscall_trace32 ! CTI + mov %i0, %l5 ! IEU1 + call %l7 ! CTI Group brk forced + srl %i3, 0, %o3 ! IEU0 + ba,a,pt %xcc, 3f + + /* Linux native system calls enter here... */ + .align 32 + .globl linux_sparc_syscall +linux_sparc_syscall: + /* Direct access to user regs, much faster. */ + cmp %g1, NR_SYSCALLS ! IEU1 Group + bgeu,pn %xcc, linux_sparc_ni_syscall ! CTI + mov %i0, %o0 ! IEU0 + sll %g1, 2, %l4 ! IEU0 Group + mov %i1, %o1 ! IEU1 + lduw [%l7 + %l4], %l7 ! Load +4: mov %i2, %o2 ! IEU0 Group + ldx [%g6 + TI_FLAGS], %l0 ! Load + + mov %i3, %o3 ! IEU1 + mov %i4, %o4 ! IEU0 Group + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0 + bne,pn %icc, linux_syscall_trace ! CTI Group + mov %i0, %l5 ! IEU0 +2: call %l7 ! CTI Group brk forced + mov %i5, %o5 ! IEU0 + nop + +3: stx %o0, [%sp + PTREGS_OFF + PT_V9_I0] +ret_sys_call: + ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %g3 + ldx [%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc + sra %o0, 0, %o0 + mov %ulo(TSTATE_XCARRY | TSTATE_ICARRY), %g2 + sllx %g2, 32, %g2 + + /* Check if force_successful_syscall_return() + * was invoked. + */ + ldub [%g6 + TI_SYS_NOERROR], %l2 + brnz,a,pn %l2, 80f + stb %g0, [%g6 + TI_SYS_NOERROR] + + cmp %o0, -ERESTART_RESTARTBLOCK + bgeu,pn %xcc, 1f + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %l6 +80: + /* System call success, clear Carry condition code. */ + andn %g3, %g2, %g3 + stx %g3, [%sp + PTREGS_OFF + PT_V9_TSTATE] + bne,pn %icc, linux_syscall_trace2 + add %l1, 0x4, %l2 ! npc = npc+4 + stx %l1, [%sp + PTREGS_OFF + PT_V9_TPC] + ba,pt %xcc, rtrap + stx %l2, [%sp + PTREGS_OFF + PT_V9_TNPC] + +1: + /* System call failure, set Carry condition code. + * Also, get abs(errno) to return to the process. + */ + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %l6 + sub %g0, %o0, %o0 + or %g3, %g2, %g3 + stx %o0, [%sp + PTREGS_OFF + PT_V9_I0] + stx %g3, [%sp + PTREGS_OFF + PT_V9_TSTATE] + bne,pn %icc, linux_syscall_trace2 + add %l1, 0x4, %l2 ! npc = npc+4 + stx %l1, [%sp + PTREGS_OFF + PT_V9_TPC] + + b,pt %xcc, rtrap + stx %l2, [%sp + PTREGS_OFF + PT_V9_TNPC] +linux_syscall_trace2: + call syscall_trace_leave + add %sp, PTREGS_OFF, %o0 + stx %l1, [%sp + PTREGS_OFF + PT_V9_TPC] + ba,pt %xcc, rtrap + stx %l2, [%sp + PTREGS_OFF + PT_V9_TNPC] diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c new file mode 100644 index 000000000000..84e5ce146713 --- /dev/null +++ b/arch/sparc/kernel/sysfs.c @@ -0,0 +1,313 @@ +/* sysfs.c: Toplogy sysfs support code for sparc64. + * + * Copyright (C) 2007 David S. Miller <davem@davemloft.net> + */ +#include <linux/sysdev.h> +#include <linux/cpu.h> +#include <linux/smp.h> +#include <linux/percpu.h> +#include <linux/init.h> + +#include <asm/hypervisor.h> +#include <asm/spitfire.h> + +static DEFINE_PER_CPU(struct hv_mmu_statistics, mmu_stats) __attribute__((aligned(64))); + +#define SHOW_MMUSTAT_ULONG(NAME) \ +static ssize_t show_##NAME(struct sys_device *dev, \ + struct sysdev_attribute *attr, char *buf) \ +{ \ + struct hv_mmu_statistics *p = &per_cpu(mmu_stats, dev->id); \ + return sprintf(buf, "%lu\n", p->NAME); \ +} \ +static SYSDEV_ATTR(NAME, 0444, show_##NAME, NULL) + +SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctx0_8k_tte); +SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctx0_8k_tte); +SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctx0_64k_tte); +SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctx0_64k_tte); +SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctx0_4mb_tte); +SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctx0_4mb_tte); +SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctx0_256mb_tte); +SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctx0_256mb_tte); +SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctxnon0_8k_tte); +SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctxnon0_8k_tte); +SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctxnon0_64k_tte); +SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctxnon0_64k_tte); +SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctxnon0_4mb_tte); +SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctxnon0_4mb_tte); +SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctxnon0_256mb_tte); +SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctxnon0_256mb_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctx0_8k_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctx0_8k_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctx0_64k_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctx0_64k_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctx0_4mb_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctx0_4mb_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctx0_256mb_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctx0_256mb_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctxnon0_8k_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctxnon0_8k_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctxnon0_64k_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctxnon0_64k_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctxnon0_4mb_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctxnon0_4mb_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctxnon0_256mb_tte); +SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctxnon0_256mb_tte); + +static struct attribute *mmu_stat_attrs[] = { + &attr_immu_tsb_hits_ctx0_8k_tte.attr, + &attr_immu_tsb_ticks_ctx0_8k_tte.attr, + &attr_immu_tsb_hits_ctx0_64k_tte.attr, + &attr_immu_tsb_ticks_ctx0_64k_tte.attr, + &attr_immu_tsb_hits_ctx0_4mb_tte.attr, + &attr_immu_tsb_ticks_ctx0_4mb_tte.attr, + &attr_immu_tsb_hits_ctx0_256mb_tte.attr, + &attr_immu_tsb_ticks_ctx0_256mb_tte.attr, + &attr_immu_tsb_hits_ctxnon0_8k_tte.attr, + &attr_immu_tsb_ticks_ctxnon0_8k_tte.attr, + &attr_immu_tsb_hits_ctxnon0_64k_tte.attr, + &attr_immu_tsb_ticks_ctxnon0_64k_tte.attr, + &attr_immu_tsb_hits_ctxnon0_4mb_tte.attr, + &attr_immu_tsb_ticks_ctxnon0_4mb_tte.attr, + &attr_immu_tsb_hits_ctxnon0_256mb_tte.attr, + &attr_immu_tsb_ticks_ctxnon0_256mb_tte.attr, + &attr_dmmu_tsb_hits_ctx0_8k_tte.attr, + &attr_dmmu_tsb_ticks_ctx0_8k_tte.attr, + &attr_dmmu_tsb_hits_ctx0_64k_tte.attr, + &attr_dmmu_tsb_ticks_ctx0_64k_tte.attr, + &attr_dmmu_tsb_hits_ctx0_4mb_tte.attr, + &attr_dmmu_tsb_ticks_ctx0_4mb_tte.attr, + &attr_dmmu_tsb_hits_ctx0_256mb_tte.attr, + &attr_dmmu_tsb_ticks_ctx0_256mb_tte.attr, + &attr_dmmu_tsb_hits_ctxnon0_8k_tte.attr, + &attr_dmmu_tsb_ticks_ctxnon0_8k_tte.attr, + &attr_dmmu_tsb_hits_ctxnon0_64k_tte.attr, + &attr_dmmu_tsb_ticks_ctxnon0_64k_tte.attr, + &attr_dmmu_tsb_hits_ctxnon0_4mb_tte.attr, + &attr_dmmu_tsb_ticks_ctxnon0_4mb_tte.attr, + &attr_dmmu_tsb_hits_ctxnon0_256mb_tte.attr, + &attr_dmmu_tsb_ticks_ctxnon0_256mb_tte.attr, + NULL, +}; + +static struct attribute_group mmu_stat_group = { + .attrs = mmu_stat_attrs, + .name = "mmu_stats", +}; + +/* XXX convert to rusty's on_one_cpu */ +static unsigned long run_on_cpu(unsigned long cpu, + unsigned long (*func)(unsigned long), + unsigned long arg) +{ + cpumask_t old_affinity = current->cpus_allowed; + unsigned long ret; + + /* should return -EINVAL to userspace */ + if (set_cpus_allowed(current, cpumask_of_cpu(cpu))) + return 0; + + ret = func(arg); + + set_cpus_allowed(current, old_affinity); + + return ret; +} + +static unsigned long read_mmustat_enable(unsigned long junk) +{ + unsigned long ra = 0; + + sun4v_mmustat_info(&ra); + + return ra != 0; +} + +static unsigned long write_mmustat_enable(unsigned long val) +{ + unsigned long ra, orig_ra; + + if (val) + ra = __pa(&per_cpu(mmu_stats, smp_processor_id())); + else + ra = 0UL; + + return sun4v_mmustat_conf(ra, &orig_ra); +} + +static ssize_t show_mmustat_enable(struct sys_device *s, + struct sysdev_attribute *attr, char *buf) +{ + unsigned long val = run_on_cpu(s->id, read_mmustat_enable, 0); + return sprintf(buf, "%lx\n", val); +} + +static ssize_t store_mmustat_enable(struct sys_device *s, + struct sysdev_attribute *attr, const char *buf, + size_t count) +{ + unsigned long val, err; + int ret = sscanf(buf, "%ld", &val); + + if (ret != 1) + return -EINVAL; + + err = run_on_cpu(s->id, write_mmustat_enable, val); + if (err) + return -EIO; + + return count; +} + +static SYSDEV_ATTR(mmustat_enable, 0644, show_mmustat_enable, store_mmustat_enable); + +static int mmu_stats_supported; + +static int register_mmu_stats(struct sys_device *s) +{ + if (!mmu_stats_supported) + return 0; + sysdev_create_file(s, &attr_mmustat_enable); + return sysfs_create_group(&s->kobj, &mmu_stat_group); +} + +#ifdef CONFIG_HOTPLUG_CPU +static void unregister_mmu_stats(struct sys_device *s) +{ + if (!mmu_stats_supported) + return; + sysfs_remove_group(&s->kobj, &mmu_stat_group); + sysdev_remove_file(s, &attr_mmustat_enable); +} +#endif + +#define SHOW_CPUDATA_ULONG_NAME(NAME, MEMBER) \ +static ssize_t show_##NAME(struct sys_device *dev, \ + struct sysdev_attribute *attr, char *buf) \ +{ \ + cpuinfo_sparc *c = &cpu_data(dev->id); \ + return sprintf(buf, "%lu\n", c->MEMBER); \ +} + +#define SHOW_CPUDATA_UINT_NAME(NAME, MEMBER) \ +static ssize_t show_##NAME(struct sys_device *dev, \ + struct sysdev_attribute *attr, char *buf) \ +{ \ + cpuinfo_sparc *c = &cpu_data(dev->id); \ + return sprintf(buf, "%u\n", c->MEMBER); \ +} + +SHOW_CPUDATA_ULONG_NAME(clock_tick, clock_tick); +SHOW_CPUDATA_UINT_NAME(l1_dcache_size, dcache_size); +SHOW_CPUDATA_UINT_NAME(l1_dcache_line_size, dcache_line_size); +SHOW_CPUDATA_UINT_NAME(l1_icache_size, icache_size); +SHOW_CPUDATA_UINT_NAME(l1_icache_line_size, icache_line_size); +SHOW_CPUDATA_UINT_NAME(l2_cache_size, ecache_size); +SHOW_CPUDATA_UINT_NAME(l2_cache_line_size, ecache_line_size); + +static struct sysdev_attribute cpu_core_attrs[] = { + _SYSDEV_ATTR(clock_tick, 0444, show_clock_tick, NULL), + _SYSDEV_ATTR(l1_dcache_size, 0444, show_l1_dcache_size, NULL), + _SYSDEV_ATTR(l1_dcache_line_size, 0444, show_l1_dcache_line_size, NULL), + _SYSDEV_ATTR(l1_icache_size, 0444, show_l1_icache_size, NULL), + _SYSDEV_ATTR(l1_icache_line_size, 0444, show_l1_icache_line_size, NULL), + _SYSDEV_ATTR(l2_cache_size, 0444, show_l2_cache_size, NULL), + _SYSDEV_ATTR(l2_cache_line_size, 0444, show_l2_cache_line_size, NULL), +}; + +static DEFINE_PER_CPU(struct cpu, cpu_devices); + +static void register_cpu_online(unsigned int cpu) +{ + struct cpu *c = &per_cpu(cpu_devices, cpu); + struct sys_device *s = &c->sysdev; + int i; + + for (i = 0; i < ARRAY_SIZE(cpu_core_attrs); i++) + sysdev_create_file(s, &cpu_core_attrs[i]); + + register_mmu_stats(s); +} + +#ifdef CONFIG_HOTPLUG_CPU +static void unregister_cpu_online(unsigned int cpu) +{ + struct cpu *c = &per_cpu(cpu_devices, cpu); + struct sys_device *s = &c->sysdev; + int i; + + unregister_mmu_stats(s); + for (i = 0; i < ARRAY_SIZE(cpu_core_attrs); i++) + sysdev_remove_file(s, &cpu_core_attrs[i]); +} +#endif + +static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned int)(long)hcpu; + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + register_cpu_online(cpu); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + case CPU_DEAD_FROZEN: + unregister_cpu_online(cpu); + break; +#endif + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata sysfs_cpu_nb = { + .notifier_call = sysfs_cpu_notify, +}; + +static void __init check_mmu_stats(void) +{ + unsigned long dummy1, err; + + if (tlb_type != hypervisor) + return; + + err = sun4v_mmustat_info(&dummy1); + if (!err) + mmu_stats_supported = 1; +} + +static void register_nodes(void) +{ +#ifdef CONFIG_NUMA + int i; + + for (i = 0; i < MAX_NUMNODES; i++) + register_one_node(i); +#endif +} + +static int __init topology_init(void) +{ + int cpu; + + register_nodes(); + + check_mmu_stats(); + + register_cpu_notifier(&sysfs_cpu_nb); + + for_each_possible_cpu(cpu) { + struct cpu *c = &per_cpu(cpu_devices, cpu); + + register_cpu(c, cpu); + if (cpu_online(cpu)) + register_cpu_online(cpu); + } + + return 0; +} + +subsys_initcall(topology_init); diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h new file mode 100644 index 000000000000..bc9f5dac4069 --- /dev/null +++ b/arch/sparc/kernel/systbls.h @@ -0,0 +1,51 @@ +#ifndef _SYSTBLS_H +#define _SYSTBLS_H + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/utsname.h> +#include <asm/utrap.h> +#include <asm/signal.h> + +extern asmlinkage unsigned long sys_getpagesize(void); +extern asmlinkage unsigned long sparc_brk(unsigned long brk); +extern asmlinkage long sparc_pipe(struct pt_regs *regs); +extern asmlinkage long sys_ipc(unsigned int call, int first, + unsigned long second, + unsigned long third, + void __user *ptr, long fifth); +extern asmlinkage long sparc64_newuname(struct new_utsname __user *name); +extern asmlinkage long sparc64_personality(unsigned long personality); +extern asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long off); +extern asmlinkage long sys64_munmap(unsigned long addr, size_t len); +extern asmlinkage unsigned long sys64_mremap(unsigned long addr, + unsigned long old_len, + unsigned long new_len, + unsigned long flags, + unsigned long new_addr); +extern asmlinkage unsigned long c_sys_nis_syscall(struct pt_regs *regs); +extern asmlinkage long sys_getdomainname(char __user *name, int len); +extern asmlinkage long sys_utrap_install(utrap_entry_t type, + utrap_handler_t new_p, + utrap_handler_t new_d, + utrap_handler_t __user *old_p, + utrap_handler_t __user *old_d); +extern asmlinkage long sparc_memory_ordering(unsigned long model, + struct pt_regs *regs); +extern asmlinkage long sys_rt_sigaction(int sig, + const struct sigaction __user *act, + struct sigaction __user *oact, + void __user *restorer, + size_t sigsetsize); +extern asmlinkage long sys_perfctr(int opcode, unsigned long arg0, + unsigned long arg1, unsigned long arg2); + +extern asmlinkage void sparc64_set_context(struct pt_regs *regs); +extern asmlinkage void sparc64_get_context(struct pt_regs *regs); +extern asmlinkage long sys_sigpause(unsigned int set); +extern asmlinkage long sys_sigsuspend(old_sigset_t set); +extern void do_rt_sigreturn(struct pt_regs *regs); + +#endif /* _SYSTBLS_H */ diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S new file mode 100644 index 000000000000..9fc78cf354bd --- /dev/null +++ b/arch/sparc/kernel/systbls_64.S @@ -0,0 +1,159 @@ +/* systbls.S: System call entry point tables for OS compatibility. + * The native Linux system call table lives here also. + * + * Copyright (C) 1995, 1996, 2007 David S. Miller (davem@davemloft.net) + * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * + * Based upon preliminary work which is: + * + * Copyright (C) 1995 Adrian M. Rodriguez (adrian@remus.rutgers.edu) + */ + + + .text + .align 4 + +#ifdef CONFIG_COMPAT + /* First, the 32-bit Linux native syscall table. */ + + .globl sys_call_table32 +sys_call_table32: +/*0*/ .word sys_restart_syscall, sys32_exit, sys_fork, sys_read, sys_write +/*5*/ .word sys32_open, sys_close, sys32_wait4, sys32_creat, sys_link +/*10*/ .word sys_unlink, sunos_execv, sys_chdir, sys_chown16, sys32_mknod +/*15*/ .word sys_chmod, sys_lchown16, sparc_brk, sys32_perfctr, sys32_lseek +/*20*/ .word sys_getpid, sys_capget, sys_capset, sys_setuid16, sys_getuid16 +/*25*/ .word sys32_vmsplice, compat_sys_ptrace, sys_alarm, sys32_sigaltstack, sys_pause +/*30*/ .word compat_sys_utime, sys_lchown, sys_fchown, sys32_access, sys32_nice + .word sys_chown, sys_sync, sys32_kill, compat_sys_newstat, sys32_sendfile +/*40*/ .word compat_sys_newlstat, sys_dup, sys_pipe, compat_sys_times, sys_getuid + .word sys32_umount, sys_setgid16, sys_getgid16, sys32_signal, sys_geteuid16 +/*50*/ .word sys_getegid16, sys_acct, sys_nis_syscall, sys_getgid, compat_sys_ioctl + .word sys32_reboot, sys32_mmap2, sys_symlink, sys32_readlink, sys32_execve +/*60*/ .word sys32_umask, sys_chroot, compat_sys_newfstat, compat_sys_fstat64, sys_getpagesize + .word sys32_msync, sys_vfork, sys32_pread64, sys32_pwrite64, sys_geteuid +/*70*/ .word sys_getegid, sys_mmap, sys_setreuid, sys_munmap, sys_mprotect + .word sys_madvise, sys_vhangup, sys32_truncate64, sys_mincore, sys_getgroups16 +/*80*/ .word sys_setgroups16, sys_getpgrp, sys32_setgroups, sys32_setitimer, sys32_ftruncate64 + .word sys32_swapon, sys32_getitimer, sys_setuid, sys32_sethostname, sys_setgid +/*90*/ .word sys_dup2, sys_setfsuid, compat_sys_fcntl, sys32_select, sys_setfsgid + .word sys_fsync, sys32_setpriority, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall +/*100*/ .word sys32_getpriority, sys32_rt_sigreturn, sys32_rt_sigaction, sys32_rt_sigprocmask, sys32_rt_sigpending + .word compat_sys_rt_sigtimedwait, sys32_rt_sigqueueinfo, compat_sys_rt_sigsuspend, sys_setresuid, sys_getresuid +/*110*/ .word sys_setresgid, sys_getresgid, sys_setregid, sys_nis_syscall, sys_nis_syscall + .word sys32_getgroups, compat_sys_gettimeofday, sys32_getrusage, sys_nis_syscall, sys_getcwd +/*120*/ .word compat_sys_readv, compat_sys_writev, compat_sys_settimeofday, sys_fchown16, sys_fchmod + .word sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, sys_truncate +/*130*/ .word sys_ftruncate, sys_flock, compat_sys_lstat64, sys_nis_syscall, sys_nis_syscall + .word sys_nis_syscall, sys32_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64 +/*140*/ .word sys32_sendfile64, sys_nis_syscall, sys32_futex, sys_gettid, compat_sys_getrlimit + .word compat_sys_setrlimit, sys_pivot_root, sys32_prctl, sys_pciconfig_read, sys_pciconfig_write +/*150*/ .word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64 + .word compat_sys_fcntl64, sys_inotify_rm_watch, compat_sys_statfs, compat_sys_fstatfs, sys_oldumount +/*160*/ .word compat_sys_sched_setaffinity, compat_sys_sched_getaffinity, sys32_getdomainname, sys32_setdomainname, sys_nis_syscall + .word sys_quotactl, sys_set_tid_address, compat_sys_mount, sys_ustat, sys32_setxattr +/*170*/ .word sys32_lsetxattr, sys32_fsetxattr, sys_getxattr, sys_lgetxattr, compat_sys_getdents + .word sys_setsid, sys_fchdir, sys32_fgetxattr, sys_listxattr, sys_llistxattr +/*180*/ .word sys32_flistxattr, sys_removexattr, sys_lremovexattr, compat_sys_sigpending, sys_ni_syscall + .word sys32_setpgid, sys32_fremovexattr, sys32_tkill, sys32_exit_group, sparc64_newuname +/*190*/ .word sys32_init_module, sparc64_personality, sys_remap_file_pages, sys32_epoll_create, sys32_epoll_ctl + .word sys32_epoll_wait, sys32_ioprio_set, sys_getppid, sys32_sigaction, sys_sgetmask +/*200*/ .word sys32_ssetmask, sys_sigsuspend, compat_sys_newlstat, sys_uselib, compat_sys_old_readdir + .word sys32_readahead, sys32_socketcall, sys32_syslog, sys32_lookup_dcookie, sys32_fadvise64 +/*210*/ .word sys32_fadvise64_64, sys32_tgkill, sys32_waitpid, sys_swapoff, compat_sys_sysinfo + .word compat_sys_ipc, sys32_sigreturn, sys_clone, sys32_ioprio_get, compat_sys_adjtimex +/*220*/ .word sys32_sigprocmask, sys_ni_syscall, sys32_delete_module, sys_ni_syscall, sys32_getpgid + .word sys32_bdflush, sys32_sysfs, sys_nis_syscall, sys_setfsuid16, sys_setfsgid16 +/*230*/ .word sys32_select, compat_sys_time, sys32_splice, compat_sys_stime, compat_sys_statfs64 + .word compat_sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys32_mlockall +/*240*/ .word sys_munlockall, sys32_sched_setparam, sys32_sched_getparam, sys32_sched_setscheduler, sys32_sched_getscheduler + .word sys_sched_yield, sys32_sched_get_priority_max, sys32_sched_get_priority_min, sys32_sched_rr_get_interval, compat_sys_nanosleep +/*250*/ .word sys32_mremap, sys32_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl + .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep +/*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun + .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy +/*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink + .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid +/*280*/ .word sys32_tee, sys_add_key, sys_request_key, sys_keyctl, compat_sys_openat + .word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_fstatat64 +/*290*/ .word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat + .word sys_fchmodat, sys_faccessat, compat_sys_pselect6, compat_sys_ppoll, sys_unshare +/*300*/ .word compat_sys_set_robust_list, compat_sys_get_robust_list, compat_sys_migrate_pages, compat_sys_mbind, compat_sys_get_mempolicy + .word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait +/*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate + .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1 +/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4 + +#endif /* CONFIG_COMPAT */ + + /* Now the 64-bit native Linux syscall table. */ + + .align 4 + .globl sys_call_table64, sys_call_table +sys_call_table64: +sys_call_table: +/*0*/ .word sys_restart_syscall, sparc_exit, sys_fork, sys_read, sys_write +/*5*/ .word sys_open, sys_close, sys_wait4, sys_creat, sys_link +/*10*/ .word sys_unlink, sys_nis_syscall, sys_chdir, sys_chown, sys_mknod +/*15*/ .word sys_chmod, sys_lchown, sparc_brk, sys_perfctr, sys_lseek +/*20*/ .word sys_getpid, sys_capget, sys_capset, sys_setuid, sys_getuid +/*25*/ .word sys_vmsplice, sys_ptrace, sys_alarm, sys_sigaltstack, sys_nis_syscall +/*30*/ .word sys_utime, sys_nis_syscall, sys_nis_syscall, sys_access, sys_nice + .word sys_nis_syscall, sys_sync, sys_kill, sys_newstat, sys_sendfile64 +/*40*/ .word sys_newlstat, sys_dup, sys_pipe, sys_times, sys_nis_syscall + .word sys_umount, sys_setgid, sys_getgid, sys_signal, sys_geteuid +/*50*/ .word sys_getegid, sys_acct, sys_memory_ordering, sys_nis_syscall, sys_ioctl + .word sys_reboot, sys_nis_syscall, sys_symlink, sys_readlink, sys_execve +/*60*/ .word sys_umask, sys_chroot, sys_newfstat, sys_fstat64, sys_getpagesize + .word sys_msync, sys_vfork, sys_pread64, sys_pwrite64, sys_nis_syscall +/*70*/ .word sys_nis_syscall, sys_mmap, sys_nis_syscall, sys64_munmap, sys_mprotect + .word sys_madvise, sys_vhangup, sys_nis_syscall, sys_mincore, sys_getgroups +/*80*/ .word sys_setgroups, sys_getpgrp, sys_nis_syscall, sys_setitimer, sys_nis_syscall + .word sys_swapon, sys_getitimer, sys_nis_syscall, sys_sethostname, sys_nis_syscall +/*90*/ .word sys_dup2, sys_nis_syscall, sys_fcntl, sys_select, sys_nis_syscall + .word sys_fsync, sys_setpriority, sys_socket, sys_connect, sys_accept +/*100*/ .word sys_getpriority, sys_rt_sigreturn, sys_rt_sigaction, sys_rt_sigprocmask, sys_rt_sigpending + .word sys_rt_sigtimedwait, sys_rt_sigqueueinfo, sys_rt_sigsuspend, sys_setresuid, sys_getresuid +/*110*/ .word sys_setresgid, sys_getresgid, sys_nis_syscall, sys_recvmsg, sys_sendmsg + .word sys_nis_syscall, sys_gettimeofday, sys_getrusage, sys_getsockopt, sys_getcwd +/*120*/ .word sys_readv, sys_writev, sys_settimeofday, sys_fchown, sys_fchmod + .word sys_recvfrom, sys_setreuid, sys_setregid, sys_rename, sys_truncate +/*130*/ .word sys_ftruncate, sys_flock, sys_lstat64, sys_sendto, sys_shutdown + .word sys_socketpair, sys_mkdir, sys_rmdir, sys_utimes, sys_stat64 +/*140*/ .word sys_sendfile64, sys_getpeername, sys_futex, sys_gettid, sys_getrlimit + .word sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write +/*150*/ .word sys_getsockname, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64 + .word sys_nis_syscall, sys_inotify_rm_watch, sys_statfs, sys_fstatfs, sys_oldumount +/*160*/ .word sys_sched_setaffinity, sys_sched_getaffinity, sys_getdomainname, sys_setdomainname, sys_utrap_install + .word sys_quotactl, sys_set_tid_address, sys_mount, sys_ustat, sys_setxattr +/*170*/ .word sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, sys_getdents + .word sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr +/*180*/ .word sys_flistxattr, sys_removexattr, sys_lremovexattr, sys_nis_syscall, sys_ni_syscall + .word sys_setpgid, sys_fremovexattr, sys_tkill, sys_exit_group, sparc64_newuname +/*190*/ .word sys_init_module, sparc64_personality, sys_remap_file_pages, sys_epoll_create, sys_epoll_ctl + .word sys_epoll_wait, sys_ioprio_set, sys_getppid, sys_nis_syscall, sys_sgetmask +/*200*/ .word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall + .word sys_readahead, sys_socketcall, sys_syslog, sys_lookup_dcookie, sys_fadvise64 +/*210*/ .word sys_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, sys_sysinfo + .word sys_ipc, sys_nis_syscall, sys_clone, sys_ioprio_get, sys_adjtimex +/*220*/ .word sys_nis_syscall, sys_ni_syscall, sys_delete_module, sys_ni_syscall, sys_getpgid + .word sys_bdflush, sys_sysfs, sys_nis_syscall, sys_setfsuid, sys_setfsgid +/*230*/ .word sys_select, sys_nis_syscall, sys_splice, sys_stime, sys_statfs64 + .word sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall +/*240*/ .word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler + .word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep +/*250*/ .word sys64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl + .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep +/*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun + .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy +/*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink + .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid +/*280*/ .word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat + .word sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_fstatat64 +/*290*/ .word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat + .word sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare +/*300*/ .word sys_set_robust_list, sys_get_robust_list, sys_migrate_pages, sys_mbind, sys_get_mempolicy + .word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait +/*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate + .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 +/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4 diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c new file mode 100644 index 000000000000..141da3759091 --- /dev/null +++ b/arch/sparc/kernel/time_64.c @@ -0,0 +1,862 @@ +/* time.c: UltraSparc timer and TOD clock support. + * + * Copyright (C) 1997, 2008 David S. Miller (davem@davemloft.net) + * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) + * + * Based largely on code which is: + * + * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu) + */ + +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/smp_lock.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/time.h> +#include <linux/timex.h> +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/mc146818rtc.h> +#include <linux/delay.h> +#include <linux/profile.h> +#include <linux/bcd.h> +#include <linux/jiffies.h> +#include <linux/cpufreq.h> +#include <linux/percpu.h> +#include <linux/miscdevice.h> +#include <linux/rtc.h> +#include <linux/rtc/m48t59.h> +#include <linux/kernel_stat.h> +#include <linux/clockchips.h> +#include <linux/clocksource.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> + +#include <asm/oplib.h> +#include <asm/timer.h> +#include <asm/irq.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/starfire.h> +#include <asm/smp.h> +#include <asm/sections.h> +#include <asm/cpudata.h> +#include <asm/uaccess.h> +#include <asm/irq_regs.h> + +#include "entry.h" + +DEFINE_SPINLOCK(rtc_lock); + +#define TICK_PRIV_BIT (1UL << 63) +#define TICKCMP_IRQ_BIT (1UL << 63) + +#ifdef CONFIG_SMP +unsigned long profile_pc(struct pt_regs *regs) +{ + unsigned long pc = instruction_pointer(regs); + + if (in_lock_functions(pc)) + return regs->u_regs[UREG_RETPC]; + return pc; +} +EXPORT_SYMBOL(profile_pc); +#endif + +static void tick_disable_protection(void) +{ + /* Set things up so user can access tick register for profiling + * purposes. Also workaround BB_ERRATA_1 by doing a dummy + * read back of %tick after writing it. + */ + __asm__ __volatile__( + " ba,pt %%xcc, 1f\n" + " nop\n" + " .align 64\n" + "1: rd %%tick, %%g2\n" + " add %%g2, 6, %%g2\n" + " andn %%g2, %0, %%g2\n" + " wrpr %%g2, 0, %%tick\n" + " rdpr %%tick, %%g0" + : /* no outputs */ + : "r" (TICK_PRIV_BIT) + : "g2"); +} + +static void tick_disable_irq(void) +{ + __asm__ __volatile__( + " ba,pt %%xcc, 1f\n" + " nop\n" + " .align 64\n" + "1: wr %0, 0x0, %%tick_cmpr\n" + " rd %%tick_cmpr, %%g0" + : /* no outputs */ + : "r" (TICKCMP_IRQ_BIT)); +} + +static void tick_init_tick(void) +{ + tick_disable_protection(); + tick_disable_irq(); +} + +static unsigned long tick_get_tick(void) +{ + unsigned long ret; + + __asm__ __volatile__("rd %%tick, %0\n\t" + "mov %0, %0" + : "=r" (ret)); + + return ret & ~TICK_PRIV_BIT; +} + +static int tick_add_compare(unsigned long adj) +{ + unsigned long orig_tick, new_tick, new_compare; + + __asm__ __volatile__("rd %%tick, %0" + : "=r" (orig_tick)); + + orig_tick &= ~TICKCMP_IRQ_BIT; + + /* Workaround for Spitfire Errata (#54 I think??), I discovered + * this via Sun BugID 4008234, mentioned in Solaris-2.5.1 patch + * number 103640. + * + * On Blackbird writes to %tick_cmpr can fail, the + * workaround seems to be to execute the wr instruction + * at the start of an I-cache line, and perform a dummy + * read back from %tick_cmpr right after writing to it. -DaveM + */ + __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" + " add %1, %2, %0\n\t" + ".align 64\n" + "1:\n\t" + "wr %0, 0, %%tick_cmpr\n\t" + "rd %%tick_cmpr, %%g0\n\t" + : "=r" (new_compare) + : "r" (orig_tick), "r" (adj)); + + __asm__ __volatile__("rd %%tick, %0" + : "=r" (new_tick)); + new_tick &= ~TICKCMP_IRQ_BIT; + + return ((long)(new_tick - (orig_tick+adj))) > 0L; +} + +static unsigned long tick_add_tick(unsigned long adj) +{ + unsigned long new_tick; + + /* Also need to handle Blackbird bug here too. */ + __asm__ __volatile__("rd %%tick, %0\n\t" + "add %0, %1, %0\n\t" + "wrpr %0, 0, %%tick\n\t" + : "=&r" (new_tick) + : "r" (adj)); + + return new_tick; +} + +static struct sparc64_tick_ops tick_operations __read_mostly = { + .name = "tick", + .init_tick = tick_init_tick, + .disable_irq = tick_disable_irq, + .get_tick = tick_get_tick, + .add_tick = tick_add_tick, + .add_compare = tick_add_compare, + .softint_mask = 1UL << 0, +}; + +struct sparc64_tick_ops *tick_ops __read_mostly = &tick_operations; + +static void stick_disable_irq(void) +{ + __asm__ __volatile__( + "wr %0, 0x0, %%asr25" + : /* no outputs */ + : "r" (TICKCMP_IRQ_BIT)); +} + +static void stick_init_tick(void) +{ + /* Writes to the %tick and %stick register are not + * allowed on sun4v. The Hypervisor controls that + * bit, per-strand. + */ + if (tlb_type != hypervisor) { + tick_disable_protection(); + tick_disable_irq(); + + /* Let the user get at STICK too. */ + __asm__ __volatile__( + " rd %%asr24, %%g2\n" + " andn %%g2, %0, %%g2\n" + " wr %%g2, 0, %%asr24" + : /* no outputs */ + : "r" (TICK_PRIV_BIT) + : "g1", "g2"); + } + + stick_disable_irq(); +} + +static unsigned long stick_get_tick(void) +{ + unsigned long ret; + + __asm__ __volatile__("rd %%asr24, %0" + : "=r" (ret)); + + return ret & ~TICK_PRIV_BIT; +} + +static unsigned long stick_add_tick(unsigned long adj) +{ + unsigned long new_tick; + + __asm__ __volatile__("rd %%asr24, %0\n\t" + "add %0, %1, %0\n\t" + "wr %0, 0, %%asr24\n\t" + : "=&r" (new_tick) + : "r" (adj)); + + return new_tick; +} + +static int stick_add_compare(unsigned long adj) +{ + unsigned long orig_tick, new_tick; + + __asm__ __volatile__("rd %%asr24, %0" + : "=r" (orig_tick)); + orig_tick &= ~TICKCMP_IRQ_BIT; + + __asm__ __volatile__("wr %0, 0, %%asr25" + : /* no outputs */ + : "r" (orig_tick + adj)); + + __asm__ __volatile__("rd %%asr24, %0" + : "=r" (new_tick)); + new_tick &= ~TICKCMP_IRQ_BIT; + + return ((long)(new_tick - (orig_tick+adj))) > 0L; +} + +static struct sparc64_tick_ops stick_operations __read_mostly = { + .name = "stick", + .init_tick = stick_init_tick, + .disable_irq = stick_disable_irq, + .get_tick = stick_get_tick, + .add_tick = stick_add_tick, + .add_compare = stick_add_compare, + .softint_mask = 1UL << 16, +}; + +/* On Hummingbird the STICK/STICK_CMPR register is implemented + * in I/O space. There are two 64-bit registers each, the + * first holds the low 32-bits of the value and the second holds + * the high 32-bits. + * + * Since STICK is constantly updating, we have to access it carefully. + * + * The sequence we use to read is: + * 1) read high + * 2) read low + * 3) read high again, if it rolled re-read both low and high again. + * + * Writing STICK safely is also tricky: + * 1) write low to zero + * 2) write high + * 3) write low + */ +#define HBIRD_STICKCMP_ADDR 0x1fe0000f060UL +#define HBIRD_STICK_ADDR 0x1fe0000f070UL + +static unsigned long __hbird_read_stick(void) +{ + unsigned long ret, tmp1, tmp2, tmp3; + unsigned long addr = HBIRD_STICK_ADDR+8; + + __asm__ __volatile__("ldxa [%1] %5, %2\n" + "1:\n\t" + "sub %1, 0x8, %1\n\t" + "ldxa [%1] %5, %3\n\t" + "add %1, 0x8, %1\n\t" + "ldxa [%1] %5, %4\n\t" + "cmp %4, %2\n\t" + "bne,a,pn %%xcc, 1b\n\t" + " mov %4, %2\n\t" + "sllx %4, 32, %4\n\t" + "or %3, %4, %0\n\t" + : "=&r" (ret), "=&r" (addr), + "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3) + : "i" (ASI_PHYS_BYPASS_EC_E), "1" (addr)); + + return ret; +} + +static void __hbird_write_stick(unsigned long val) +{ + unsigned long low = (val & 0xffffffffUL); + unsigned long high = (val >> 32UL); + unsigned long addr = HBIRD_STICK_ADDR; + + __asm__ __volatile__("stxa %%g0, [%0] %4\n\t" + "add %0, 0x8, %0\n\t" + "stxa %3, [%0] %4\n\t" + "sub %0, 0x8, %0\n\t" + "stxa %2, [%0] %4" + : "=&r" (addr) + : "0" (addr), "r" (low), "r" (high), + "i" (ASI_PHYS_BYPASS_EC_E)); +} + +static void __hbird_write_compare(unsigned long val) +{ + unsigned long low = (val & 0xffffffffUL); + unsigned long high = (val >> 32UL); + unsigned long addr = HBIRD_STICKCMP_ADDR + 0x8UL; + + __asm__ __volatile__("stxa %3, [%0] %4\n\t" + "sub %0, 0x8, %0\n\t" + "stxa %2, [%0] %4" + : "=&r" (addr) + : "0" (addr), "r" (low), "r" (high), + "i" (ASI_PHYS_BYPASS_EC_E)); +} + +static void hbtick_disable_irq(void) +{ + __hbird_write_compare(TICKCMP_IRQ_BIT); +} + +static void hbtick_init_tick(void) +{ + tick_disable_protection(); + + /* XXX This seems to be necessary to 'jumpstart' Hummingbird + * XXX into actually sending STICK interrupts. I think because + * XXX of how we store %tick_cmpr in head.S this somehow resets the + * XXX {TICK + STICK} interrupt mux. -DaveM + */ + __hbird_write_stick(__hbird_read_stick()); + + hbtick_disable_irq(); +} + +static unsigned long hbtick_get_tick(void) +{ + return __hbird_read_stick() & ~TICK_PRIV_BIT; +} + +static unsigned long hbtick_add_tick(unsigned long adj) +{ + unsigned long val; + + val = __hbird_read_stick() + adj; + __hbird_write_stick(val); + + return val; +} + +static int hbtick_add_compare(unsigned long adj) +{ + unsigned long val = __hbird_read_stick(); + unsigned long val2; + + val &= ~TICKCMP_IRQ_BIT; + val += adj; + __hbird_write_compare(val); + + val2 = __hbird_read_stick() & ~TICKCMP_IRQ_BIT; + + return ((long)(val2 - val)) > 0L; +} + +static struct sparc64_tick_ops hbtick_operations __read_mostly = { + .name = "hbtick", + .init_tick = hbtick_init_tick, + .disable_irq = hbtick_disable_irq, + .get_tick = hbtick_get_tick, + .add_tick = hbtick_add_tick, + .add_compare = hbtick_add_compare, + .softint_mask = 1UL << 0, +}; + +static unsigned long timer_ticks_per_nsec_quotient __read_mostly; + +int update_persistent_clock(struct timespec now) +{ + struct rtc_device *rtc = rtc_class_open("rtc0"); + int err = -1; + + if (rtc) { + err = rtc_set_mmss(rtc, now.tv_sec); + rtc_class_close(rtc); + } + + return err; +} + +unsigned long cmos_regs; +EXPORT_SYMBOL(cmos_regs); + +static struct resource rtc_cmos_resource; + +static struct platform_device rtc_cmos_device = { + .name = "rtc_cmos", + .id = -1, + .resource = &rtc_cmos_resource, + .num_resources = 1, +}; + +static int __devinit rtc_probe(struct of_device *op, const struct of_device_id *match) +{ + struct resource *r; + + printk(KERN_INFO "%s: RTC regs at 0x%lx\n", + op->node->full_name, op->resource[0].start); + + /* The CMOS RTC driver only accepts IORESOURCE_IO, so cons + * up a fake resource so that the probe works for all cases. + * When the RTC is behind an ISA bus it will have IORESOURCE_IO + * already, whereas when it's behind EBUS is will be IORESOURCE_MEM. + */ + + r = &rtc_cmos_resource; + r->flags = IORESOURCE_IO; + r->name = op->resource[0].name; + r->start = op->resource[0].start; + r->end = op->resource[0].end; + + cmos_regs = op->resource[0].start; + return platform_device_register(&rtc_cmos_device); +} + +static struct of_device_id __initdata rtc_match[] = { + { + .name = "rtc", + .compatible = "m5819", + }, + { + .name = "rtc", + .compatible = "isa-m5819p", + }, + { + .name = "rtc", + .compatible = "isa-m5823p", + }, + { + .name = "rtc", + .compatible = "ds1287", + }, + {}, +}; + +static struct of_platform_driver rtc_driver = { + .match_table = rtc_match, + .probe = rtc_probe, + .driver = { + .name = "rtc", + }, +}; + +static struct platform_device rtc_bq4802_device = { + .name = "rtc-bq4802", + .id = -1, + .num_resources = 1, +}; + +static int __devinit bq4802_probe(struct of_device *op, const struct of_device_id *match) +{ + + printk(KERN_INFO "%s: BQ4802 regs at 0x%lx\n", + op->node->full_name, op->resource[0].start); + + rtc_bq4802_device.resource = &op->resource[0]; + return platform_device_register(&rtc_bq4802_device); +} + +static struct of_device_id __initdata bq4802_match[] = { + { + .name = "rtc", + .compatible = "bq4802", + }, + {}, +}; + +static struct of_platform_driver bq4802_driver = { + .match_table = bq4802_match, + .probe = bq4802_probe, + .driver = { + .name = "bq4802", + }, +}; + +static unsigned char mostek_read_byte(struct device *dev, u32 ofs) +{ + struct platform_device *pdev = to_platform_device(dev); + void __iomem *regs = (void __iomem *) pdev->resource[0].start; + + return readb(regs + ofs); +} + +static void mostek_write_byte(struct device *dev, u32 ofs, u8 val) +{ + struct platform_device *pdev = to_platform_device(dev); + void __iomem *regs = (void __iomem *) pdev->resource[0].start; + + writeb(val, regs + ofs); +} + +static struct m48t59_plat_data m48t59_data = { + .read_byte = mostek_read_byte, + .write_byte = mostek_write_byte, +}; + +static struct platform_device m48t59_rtc = { + .name = "rtc-m48t59", + .id = 0, + .num_resources = 1, + .dev = { + .platform_data = &m48t59_data, + }, +}; + +static int __devinit mostek_probe(struct of_device *op, const struct of_device_id *match) +{ + struct device_node *dp = op->node; + + /* On an Enterprise system there can be multiple mostek clocks. + * We should only match the one that is on the central FHC bus. + */ + if (!strcmp(dp->parent->name, "fhc") && + strcmp(dp->parent->parent->name, "central") != 0) + return -ENODEV; + + printk(KERN_INFO "%s: Mostek regs at 0x%lx\n", + dp->full_name, op->resource[0].start); + + m48t59_rtc.resource = &op->resource[0]; + return platform_device_register(&m48t59_rtc); +} + +static struct of_device_id __initdata mostek_match[] = { + { + .name = "eeprom", + }, + {}, +}; + +static struct of_platform_driver mostek_driver = { + .match_table = mostek_match, + .probe = mostek_probe, + .driver = { + .name = "mostek", + }, +}; + +static struct platform_device rtc_sun4v_device = { + .name = "rtc-sun4v", + .id = -1, +}; + +static struct platform_device rtc_starfire_device = { + .name = "rtc-starfire", + .id = -1, +}; + +static int __init clock_init(void) +{ + if (this_is_starfire) + return platform_device_register(&rtc_starfire_device); + + if (tlb_type == hypervisor) + return platform_device_register(&rtc_sun4v_device); + + (void) of_register_driver(&rtc_driver, &of_platform_bus_type); + (void) of_register_driver(&mostek_driver, &of_platform_bus_type); + (void) of_register_driver(&bq4802_driver, &of_platform_bus_type); + + return 0; +} + +/* Must be after subsys_initcall() so that busses are probed. Must + * be before device_initcall() because things like the RTC driver + * need to see the clock registers. + */ +fs_initcall(clock_init); + +/* This is gets the master TICK_INT timer going. */ +static unsigned long sparc64_init_timers(void) +{ + struct device_node *dp; + unsigned long freq; + + dp = of_find_node_by_path("/"); + if (tlb_type == spitfire) { + unsigned long ver, manuf, impl; + + __asm__ __volatile__ ("rdpr %%ver, %0" + : "=&r" (ver)); + manuf = ((ver >> 48) & 0xffff); + impl = ((ver >> 32) & 0xffff); + if (manuf == 0x17 && impl == 0x13) { + /* Hummingbird, aka Ultra-IIe */ + tick_ops = &hbtick_operations; + freq = of_getintprop_default(dp, "stick-frequency", 0); + } else { + tick_ops = &tick_operations; + freq = local_cpu_data().clock_tick; + } + } else { + tick_ops = &stick_operations; + freq = of_getintprop_default(dp, "stick-frequency", 0); + } + + return freq; +} + +struct freq_table { + unsigned long clock_tick_ref; + unsigned int ref_freq; +}; +static DEFINE_PER_CPU(struct freq_table, sparc64_freq_table) = { 0, 0 }; + +unsigned long sparc64_get_clock_tick(unsigned int cpu) +{ + struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu); + + if (ft->clock_tick_ref) + return ft->clock_tick_ref; + return cpu_data(cpu).clock_tick; +} + +#ifdef CONFIG_CPU_FREQ + +static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + unsigned int cpu = freq->cpu; + struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu); + + if (!ft->ref_freq) { + ft->ref_freq = freq->old; + ft->clock_tick_ref = cpu_data(cpu).clock_tick; + } + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || + (val == CPUFREQ_RESUMECHANGE)) { + cpu_data(cpu).clock_tick = + cpufreq_scale(ft->clock_tick_ref, + ft->ref_freq, + freq->new); + } + + return 0; +} + +static struct notifier_block sparc64_cpufreq_notifier_block = { + .notifier_call = sparc64_cpufreq_notifier +}; + +static int __init register_sparc64_cpufreq_notifier(void) +{ + + cpufreq_register_notifier(&sparc64_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + return 0; +} + +core_initcall(register_sparc64_cpufreq_notifier); + +#endif /* CONFIG_CPU_FREQ */ + +static int sparc64_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + return tick_ops->add_compare(delta) ? -ETIME : 0; +} + +static void sparc64_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + switch (mode) { + case CLOCK_EVT_MODE_ONESHOT: + case CLOCK_EVT_MODE_RESUME: + break; + + case CLOCK_EVT_MODE_SHUTDOWN: + tick_ops->disable_irq(); + break; + + case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_MODE_UNUSED: + WARN_ON(1); + break; + }; +} + +static struct clock_event_device sparc64_clockevent = { + .features = CLOCK_EVT_FEAT_ONESHOT, + .set_mode = sparc64_timer_setup, + .set_next_event = sparc64_next_event, + .rating = 100, + .shift = 30, + .irq = -1, +}; +static DEFINE_PER_CPU(struct clock_event_device, sparc64_events); + +void timer_interrupt(int irq, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + unsigned long tick_mask = tick_ops->softint_mask; + int cpu = smp_processor_id(); + struct clock_event_device *evt = &per_cpu(sparc64_events, cpu); + + clear_softint(tick_mask); + + irq_enter(); + + kstat_this_cpu.irqs[0]++; + + if (unlikely(!evt->event_handler)) { + printk(KERN_WARNING + "Spurious SPARC64 timer interrupt on cpu %d\n", cpu); + } else + evt->event_handler(evt); + + irq_exit(); + + set_irq_regs(old_regs); +} + +void __devinit setup_sparc64_timer(void) +{ + struct clock_event_device *sevt; + unsigned long pstate; + + /* Guarantee that the following sequences execute + * uninterrupted. + */ + __asm__ __volatile__("rdpr %%pstate, %0\n\t" + "wrpr %0, %1, %%pstate" + : "=r" (pstate) + : "i" (PSTATE_IE)); + + tick_ops->init_tick(); + + /* Restore PSTATE_IE. */ + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" + : /* no outputs */ + : "r" (pstate)); + + sevt = &__get_cpu_var(sparc64_events); + + memcpy(sevt, &sparc64_clockevent, sizeof(*sevt)); + sevt->cpumask = cpumask_of_cpu(smp_processor_id()); + + clockevents_register_device(sevt); +} + +#define SPARC64_NSEC_PER_CYC_SHIFT 10UL + +static struct clocksource clocksource_tick = { + .rating = 100, + .mask = CLOCKSOURCE_MASK(64), + .shift = 16, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static void __init setup_clockevent_multiplier(unsigned long hz) +{ + unsigned long mult, shift = 32; + + while (1) { + mult = div_sc(hz, NSEC_PER_SEC, shift); + if (mult && (mult >> 32UL) == 0UL) + break; + + shift--; + } + + sparc64_clockevent.shift = shift; + sparc64_clockevent.mult = mult; +} + +static unsigned long tb_ticks_per_usec __read_mostly; + +void __delay(unsigned long loops) +{ + unsigned long bclock, now; + + bclock = tick_ops->get_tick(); + do { + now = tick_ops->get_tick(); + } while ((now-bclock) < loops); +} +EXPORT_SYMBOL(__delay); + +void udelay(unsigned long usecs) +{ + __delay(tb_ticks_per_usec * usecs); +} +EXPORT_SYMBOL(udelay); + +void __init time_init(void) +{ + unsigned long freq = sparc64_init_timers(); + + tb_ticks_per_usec = freq / USEC_PER_SEC; + + timer_ticks_per_nsec_quotient = + clocksource_hz2mult(freq, SPARC64_NSEC_PER_CYC_SHIFT); + + clocksource_tick.name = tick_ops->name; + clocksource_tick.mult = + clocksource_hz2mult(freq, + clocksource_tick.shift); + clocksource_tick.read = tick_ops->get_tick; + + printk("clocksource: mult[%x] shift[%d]\n", + clocksource_tick.mult, clocksource_tick.shift); + + clocksource_register(&clocksource_tick); + + sparc64_clockevent.name = tick_ops->name; + + setup_clockevent_multiplier(freq); + + sparc64_clockevent.max_delta_ns = + clockevent_delta2ns(0x7fffffffffffffffUL, &sparc64_clockevent); + sparc64_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &sparc64_clockevent); + + printk("clockevent: mult[%lx] shift[%d]\n", + sparc64_clockevent.mult, sparc64_clockevent.shift); + + setup_sparc64_timer(); +} + +unsigned long long sched_clock(void) +{ + unsigned long ticks = tick_ops->get_tick(); + + return (ticks * timer_ticks_per_nsec_quotient) + >> SPARC64_NSEC_PER_CYC_SHIFT; +} + +int __devinit read_current_timer(unsigned long *timer_val) +{ + *timer_val = tick_ops->get_tick(); + return 0; +} diff --git a/arch/sparc/kernel/trampoline_64.S b/arch/sparc/kernel/trampoline_64.S new file mode 100644 index 000000000000..da1b781b5e65 --- /dev/null +++ b/arch/sparc/kernel/trampoline_64.S @@ -0,0 +1,417 @@ +/* + * trampoline.S: Jump start slave processors on sparc64. + * + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + */ + +#include <linux/init.h> + +#include <asm/head.h> +#include <asm/asi.h> +#include <asm/lsu.h> +#include <asm/dcr.h> +#include <asm/dcu.h> +#include <asm/pstate.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/spitfire.h> +#include <asm/processor.h> +#include <asm/thread_info.h> +#include <asm/mmu.h> +#include <asm/hypervisor.h> +#include <asm/cpudata.h> + + .data + .align 8 +call_method: + .asciz "call-method" + .align 8 +itlb_load: + .asciz "SUNW,itlb-load" + .align 8 +dtlb_load: + .asciz "SUNW,dtlb-load" + + /* XXX __cpuinit this thing XXX */ +#define TRAMP_STACK_SIZE 1024 + .align 16 +tramp_stack: + .skip TRAMP_STACK_SIZE + + __CPUINIT + .align 8 + .globl sparc64_cpu_startup, sparc64_cpu_startup_end +sparc64_cpu_startup: + BRANCH_IF_SUN4V(g1, niagara_startup) + BRANCH_IF_CHEETAH_BASE(g1, g5, cheetah_startup) + BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1, g5, cheetah_plus_startup) + + ba,pt %xcc, spitfire_startup + nop + +cheetah_plus_startup: + /* Preserve OBP chosen DCU and DCR register settings. */ + ba,pt %xcc, cheetah_generic_startup + nop + +cheetah_startup: + mov DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1 + wr %g1, %asr18 + + sethi %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5 + or %g5, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5 + sllx %g5, 32, %g5 + or %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5 + stxa %g5, [%g0] ASI_DCU_CONTROL_REG + membar #Sync + /* fallthru */ + +cheetah_generic_startup: + mov TSB_EXTENSION_P, %g3 + stxa %g0, [%g3] ASI_DMMU + stxa %g0, [%g3] ASI_IMMU + membar #Sync + + mov TSB_EXTENSION_S, %g3 + stxa %g0, [%g3] ASI_DMMU + membar #Sync + + mov TSB_EXTENSION_N, %g3 + stxa %g0, [%g3] ASI_DMMU + stxa %g0, [%g3] ASI_IMMU + membar #Sync + /* fallthru */ + +niagara_startup: + /* Disable STICK_INT interrupts. */ + sethi %hi(0x80000000), %g5 + sllx %g5, 32, %g5 + wr %g5, %asr25 + + ba,pt %xcc, startup_continue + nop + +spitfire_startup: + mov (LSU_CONTROL_IC | LSU_CONTROL_DC | LSU_CONTROL_IM | LSU_CONTROL_DM), %g1 + stxa %g1, [%g0] ASI_LSU_CONTROL + membar #Sync + +startup_continue: + mov %o0, %l0 + BRANCH_IF_SUN4V(g1, niagara_lock_tlb) + + sethi %hi(0x80000000), %g2 + sllx %g2, 32, %g2 + wr %g2, 0, %tick_cmpr + + /* Call OBP by hand to lock KERNBASE into i/d tlbs. + * We lock 'num_kernel_image_mappings' consequetive entries. + */ + sethi %hi(prom_entry_lock), %g2 +1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 + brnz,pn %g1, 1b + nop + + sethi %hi(p1275buf), %g2 + or %g2, %lo(p1275buf), %g2 + ldx [%g2 + 0x10], %l2 + add %l2, -(192 + 128), %sp + flushw + + /* Setup the loop variables: + * %l3: VADDR base + * %l4: TTE base + * %l5: Loop iterator, iterates from 0 to 'num_kernel_image_mappings' + * %l6: Number of TTE entries to map + * %l7: Highest TTE entry number, we count down + */ + sethi %hi(KERNBASE), %l3 + sethi %hi(kern_locked_tte_data), %l4 + ldx [%l4 + %lo(kern_locked_tte_data)], %l4 + clr %l5 + sethi %hi(num_kernel_image_mappings), %l6 + lduw [%l6 + %lo(num_kernel_image_mappings)], %l6 + add %l6, 1, %l6 + + mov 15, %l7 + BRANCH_IF_ANY_CHEETAH(g1,g5,2f) + + mov 63, %l7 +2: + +3: + /* Lock into I-MMU */ + sethi %hi(call_method), %g2 + or %g2, %lo(call_method), %g2 + stx %g2, [%sp + 2047 + 128 + 0x00] + mov 5, %g2 + stx %g2, [%sp + 2047 + 128 + 0x08] + mov 1, %g2 + stx %g2, [%sp + 2047 + 128 + 0x10] + sethi %hi(itlb_load), %g2 + or %g2, %lo(itlb_load), %g2 + stx %g2, [%sp + 2047 + 128 + 0x18] + sethi %hi(prom_mmu_ihandle_cache), %g2 + lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 + stx %g2, [%sp + 2047 + 128 + 0x20] + + /* Each TTE maps 4MB, convert index to offset. */ + sllx %l5, 22, %g1 + + add %l3, %g1, %g2 + stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR + add %l4, %g1, %g2 + stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE + + /* TTE index is highest minus loop index. */ + sub %l7, %l5, %g2 + stx %g2, [%sp + 2047 + 128 + 0x38] + + sethi %hi(p1275buf), %g2 + or %g2, %lo(p1275buf), %g2 + ldx [%g2 + 0x08], %o1 + call %o1 + add %sp, (2047 + 128), %o0 + + /* Lock into D-MMU */ + sethi %hi(call_method), %g2 + or %g2, %lo(call_method), %g2 + stx %g2, [%sp + 2047 + 128 + 0x00] + mov 5, %g2 + stx %g2, [%sp + 2047 + 128 + 0x08] + mov 1, %g2 + stx %g2, [%sp + 2047 + 128 + 0x10] + sethi %hi(dtlb_load), %g2 + or %g2, %lo(dtlb_load), %g2 + stx %g2, [%sp + 2047 + 128 + 0x18] + sethi %hi(prom_mmu_ihandle_cache), %g2 + lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 + stx %g2, [%sp + 2047 + 128 + 0x20] + + /* Each TTE maps 4MB, convert index to offset. */ + sllx %l5, 22, %g1 + + add %l3, %g1, %g2 + stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR + add %l4, %g1, %g2 + stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE + + /* TTE index is highest minus loop index. */ + sub %l7, %l5, %g2 + stx %g2, [%sp + 2047 + 128 + 0x38] + + sethi %hi(p1275buf), %g2 + or %g2, %lo(p1275buf), %g2 + ldx [%g2 + 0x08], %o1 + call %o1 + add %sp, (2047 + 128), %o0 + + add %l5, 1, %l5 + cmp %l5, %l6 + bne,pt %xcc, 3b + nop + + sethi %hi(prom_entry_lock), %g2 + stb %g0, [%g2 + %lo(prom_entry_lock)] + + ba,pt %xcc, after_lock_tlb + nop + +niagara_lock_tlb: + sethi %hi(KERNBASE), %l3 + sethi %hi(kern_locked_tte_data), %l4 + ldx [%l4 + %lo(kern_locked_tte_data)], %l4 + clr %l5 + sethi %hi(num_kernel_image_mappings), %l6 + lduw [%l6 + %lo(num_kernel_image_mappings)], %l6 + add %l6, 1, %l6 + +1: + mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 + sllx %l5, 22, %g2 + add %l3, %g2, %o0 + clr %o1 + add %l4, %g2, %o2 + mov HV_MMU_IMMU, %o3 + ta HV_FAST_TRAP + + mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 + sllx %l5, 22, %g2 + add %l3, %g2, %o0 + clr %o1 + add %l4, %g2, %o2 + mov HV_MMU_DMMU, %o3 + ta HV_FAST_TRAP + + add %l5, 1, %l5 + cmp %l5, %l6 + bne,pt %xcc, 1b + nop + +after_lock_tlb: + wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate + wr %g0, 0, %fprs + + wr %g0, ASI_P, %asi + + mov PRIMARY_CONTEXT, %g7 + +661: stxa %g0, [%g7] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g0, [%g7] ASI_MMU + .previous + + membar #Sync + mov SECONDARY_CONTEXT, %g7 + +661: stxa %g0, [%g7] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g0, [%g7] ASI_MMU + .previous + + membar #Sync + + /* Everything we do here, until we properly take over the + * trap table, must be done with extreme care. We cannot + * make any references to %g6 (current thread pointer), + * %g4 (current task pointer), or %g5 (base of current cpu's + * per-cpu area) until we properly take over the trap table + * from the firmware and hypervisor. + * + * Get onto temporary stack which is in the locked kernel image. + */ + sethi %hi(tramp_stack), %g1 + or %g1, %lo(tramp_stack), %g1 + add %g1, TRAMP_STACK_SIZE, %g1 + sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp + mov 0, %fp + + /* Put garbage in these registers to trap any access to them. */ + set 0xdeadbeef, %g4 + set 0xdeadbeef, %g5 + set 0xdeadbeef, %g6 + + call init_irqwork_curcpu + nop + + sethi %hi(tlb_type), %g3 + lduw [%g3 + %lo(tlb_type)], %g2 + cmp %g2, 3 + bne,pt %icc, 1f + nop + + call hard_smp_processor_id + nop + + call sun4v_register_mondo_queues + nop + +1: call init_cur_cpu_trap + ldx [%l0], %o0 + + /* Start using proper page size encodings in ctx register. */ + sethi %hi(sparc64_kern_pri_context), %g3 + ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2 + mov PRIMARY_CONTEXT, %g1 + +661: stxa %g2, [%g1] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g1] ASI_MMU + .previous + + membar #Sync + + wrpr %g0, 0, %wstate + + sethi %hi(prom_entry_lock), %g2 +1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 + brnz,pn %g1, 1b + nop + + /* As a hack, put &init_thread_union into %g6. + * prom_world() loads from here to restore the %asi + * register. + */ + sethi %hi(init_thread_union), %g6 + or %g6, %lo(init_thread_union), %g6 + + sethi %hi(is_sun4v), %o0 + lduw [%o0 + %lo(is_sun4v)], %o0 + brz,pt %o0, 2f + nop + + TRAP_LOAD_TRAP_BLOCK(%g2, %g3) + add %g2, TRAP_PER_CPU_FAULT_INFO, %g2 + stxa %g2, [%g0] ASI_SCRATCHPAD + + /* Compute physical address: + * + * paddr = kern_base + (mmfsa_vaddr - KERNBASE) + */ + sethi %hi(KERNBASE), %g3 + sub %g2, %g3, %g2 + sethi %hi(kern_base), %g3 + ldx [%g3 + %lo(kern_base)], %g3 + add %g2, %g3, %o1 + sethi %hi(sparc64_ttable_tl0), %o0 + + set prom_set_trap_table_name, %g2 + stx %g2, [%sp + 2047 + 128 + 0x00] + mov 2, %g2 + stx %g2, [%sp + 2047 + 128 + 0x08] + mov 0, %g2 + stx %g2, [%sp + 2047 + 128 + 0x10] + stx %o0, [%sp + 2047 + 128 + 0x18] + stx %o1, [%sp + 2047 + 128 + 0x20] + sethi %hi(p1275buf), %g2 + or %g2, %lo(p1275buf), %g2 + ldx [%g2 + 0x08], %o1 + call %o1 + add %sp, (2047 + 128), %o0 + + ba,pt %xcc, 3f + nop + +2: sethi %hi(sparc64_ttable_tl0), %o0 + set prom_set_trap_table_name, %g2 + stx %g2, [%sp + 2047 + 128 + 0x00] + mov 1, %g2 + stx %g2, [%sp + 2047 + 128 + 0x08] + mov 0, %g2 + stx %g2, [%sp + 2047 + 128 + 0x10] + stx %o0, [%sp + 2047 + 128 + 0x18] + sethi %hi(p1275buf), %g2 + or %g2, %lo(p1275buf), %g2 + ldx [%g2 + 0x08], %o1 + call %o1 + add %sp, (2047 + 128), %o0 + +3: sethi %hi(prom_entry_lock), %g2 + stb %g0, [%g2 + %lo(prom_entry_lock)] + + ldx [%l0], %g6 + ldx [%g6 + TI_TASK], %g4 + + mov 1, %g5 + sllx %g5, THREAD_SHIFT, %g5 + sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 + add %g6, %g5, %sp + mov 0, %fp + + rdpr %pstate, %o1 + or %o1, PSTATE_IE, %o1 + wrpr %o1, 0, %pstate + + call smp_callin + nop + call cpu_idle + mov 0, %o0 + call cpu_panic + nop +1: b,a,pt %xcc, 1b + + .align 8 +sparc64_cpu_startup_end: diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c new file mode 100644 index 000000000000..4638af2f55a0 --- /dev/null +++ b/arch/sparc/kernel/traps_64.c @@ -0,0 +1,2600 @@ +/* arch/sparc64/kernel/traps.c + * + * Copyright (C) 1995,1997,2008 David S. Miller (davem@davemloft.net) + * Copyright (C) 1997,1999,2000 Jakub Jelinek (jakub@redhat.com) + */ + +/* + * I like traps on v9, :)))) + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/linkage.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/smp.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/kdebug.h> + +#include <asm/smp.h> +#include <asm/delay.h> +#include <asm/system.h> +#include <asm/ptrace.h> +#include <asm/oplib.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/unistd.h> +#include <asm/uaccess.h> +#include <asm/fpumacro.h> +#include <asm/lsu.h> +#include <asm/dcu.h> +#include <asm/estate.h> +#include <asm/chafsr.h> +#include <asm/sfafsr.h> +#include <asm/psrcompat.h> +#include <asm/processor.h> +#include <asm/timer.h> +#include <asm/head.h> +#include <asm/prom.h> +#include <asm/memctrl.h> + +#include "entry.h" +#include "kstack.h" + +/* When an irrecoverable trap occurs at tl > 0, the trap entry + * code logs the trap state registers at every level in the trap + * stack. It is found at (pt_regs + sizeof(pt_regs)) and the layout + * is as follows: + */ +struct tl1_traplog { + struct { + unsigned long tstate; + unsigned long tpc; + unsigned long tnpc; + unsigned long tt; + } trapstack[4]; + unsigned long tl; +}; + +static void dump_tl1_traplog(struct tl1_traplog *p) +{ + int i, limit; + + printk(KERN_EMERG "TRAPLOG: Error at trap level 0x%lx, " + "dumping track stack.\n", p->tl); + + limit = (tlb_type == hypervisor) ? 2 : 4; + for (i = 0; i < limit; i++) { + printk(KERN_EMERG + "TRAPLOG: Trap level %d TSTATE[%016lx] TPC[%016lx] " + "TNPC[%016lx] TT[%lx]\n", + i + 1, + p->trapstack[i].tstate, p->trapstack[i].tpc, + p->trapstack[i].tnpc, p->trapstack[i].tt); + printk("TRAPLOG: TPC<%pS>\n", (void *) p->trapstack[i].tpc); + } +} + +void bad_trap(struct pt_regs *regs, long lvl) +{ + char buffer[32]; + siginfo_t info; + + if (notify_die(DIE_TRAP, "bad trap", regs, + 0, lvl, SIGTRAP) == NOTIFY_STOP) + return; + + if (lvl < 0x100) { + sprintf(buffer, "Bad hw trap %lx at tl0\n", lvl); + die_if_kernel(buffer, regs); + } + + lvl -= 0x100; + if (regs->tstate & TSTATE_PRIV) { + sprintf(buffer, "Kernel bad sw trap %lx", lvl); + die_if_kernel(buffer, regs); + } + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLTRP; + info.si_addr = (void __user *)regs->tpc; + info.si_trapno = lvl; + force_sig_info(SIGILL, &info, current); +} + +void bad_trap_tl1(struct pt_regs *regs, long lvl) +{ + char buffer[32]; + + if (notify_die(DIE_TRAP_TL1, "bad trap tl1", regs, + 0, lvl, SIGTRAP) == NOTIFY_STOP) + return; + + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + + sprintf (buffer, "Bad trap %lx at tl>0", lvl); + die_if_kernel (buffer, regs); +} + +#ifdef CONFIG_DEBUG_BUGVERBOSE +void do_BUG(const char *file, int line) +{ + bust_spinlocks(1); + printk("kernel BUG at %s:%d!\n", file, line); +} +#endif + +static DEFINE_SPINLOCK(dimm_handler_lock); +static dimm_printer_t dimm_handler; + +static int sprintf_dimm(int synd_code, unsigned long paddr, char *buf, int buflen) +{ + unsigned long flags; + int ret = -ENODEV; + + spin_lock_irqsave(&dimm_handler_lock, flags); + if (dimm_handler) { + ret = dimm_handler(synd_code, paddr, buf, buflen); + } else if (tlb_type == spitfire) { + if (prom_getunumber(synd_code, paddr, buf, buflen) == -1) + ret = -EINVAL; + else + ret = 0; + } else + ret = -ENODEV; + spin_unlock_irqrestore(&dimm_handler_lock, flags); + + return ret; +} + +int register_dimm_printer(dimm_printer_t func) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&dimm_handler_lock, flags); + if (!dimm_handler) + dimm_handler = func; + else + ret = -EEXIST; + spin_unlock_irqrestore(&dimm_handler_lock, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(register_dimm_printer); + +void unregister_dimm_printer(dimm_printer_t func) +{ + unsigned long flags; + + spin_lock_irqsave(&dimm_handler_lock, flags); + if (dimm_handler == func) + dimm_handler = NULL; + spin_unlock_irqrestore(&dimm_handler_lock, flags); +} +EXPORT_SYMBOL_GPL(unregister_dimm_printer); + +void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) +{ + siginfo_t info; + + if (notify_die(DIE_TRAP, "instruction access exception", regs, + 0, 0x8, SIGTRAP) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) { + printk("spitfire_insn_access_exception: SFSR[%016lx] " + "SFAR[%016lx], going.\n", sfsr, sfar); + die_if_kernel("Iax", regs); + } + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = (void __user *)regs->tpc; + info.si_trapno = 0; + force_sig_info(SIGSEGV, &info, current); +} + +void spitfire_insn_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) +{ + if (notify_die(DIE_TRAP_TL1, "instruction access exception tl1", regs, + 0, 0x8, SIGTRAP) == NOTIFY_STOP) + return; + + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + spitfire_insn_access_exception(regs, sfsr, sfar); +} + +void sun4v_insn_access_exception(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) +{ + unsigned short type = (type_ctx >> 16); + unsigned short ctx = (type_ctx & 0xffff); + siginfo_t info; + + if (notify_die(DIE_TRAP, "instruction access exception", regs, + 0, 0x8, SIGTRAP) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) { + printk("sun4v_insn_access_exception: ADDR[%016lx] " + "CTX[%04x] TYPE[%04x], going.\n", + addr, ctx, type); + die_if_kernel("Iax", regs); + } + + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = (void __user *) addr; + info.si_trapno = 0; + force_sig_info(SIGSEGV, &info, current); +} + +void sun4v_insn_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) +{ + if (notify_die(DIE_TRAP_TL1, "instruction access exception tl1", regs, + 0, 0x8, SIGTRAP) == NOTIFY_STOP) + return; + + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + sun4v_insn_access_exception(regs, addr, type_ctx); +} + +void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) +{ + siginfo_t info; + + if (notify_die(DIE_TRAP, "data access exception", regs, + 0, 0x30, SIGTRAP) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) { + /* Test if this comes from uaccess places. */ + const struct exception_table_entry *entry; + + entry = search_exception_tables(regs->tpc); + if (entry) { + /* Ouch, somebody is trying VM hole tricks on us... */ +#ifdef DEBUG_EXCEPTIONS + printk("Exception: PC<%016lx> faddr<UNKNOWN>\n", regs->tpc); + printk("EX_TABLE: insn<%016lx> fixup<%016lx>\n", + regs->tpc, entry->fixup); +#endif + regs->tpc = entry->fixup; + regs->tnpc = regs->tpc + 4; + return; + } + /* Shit... */ + printk("spitfire_data_access_exception: SFSR[%016lx] " + "SFAR[%016lx], going.\n", sfsr, sfar); + die_if_kernel("Dax", regs); + } + + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = (void __user *)sfar; + info.si_trapno = 0; + force_sig_info(SIGSEGV, &info, current); +} + +void spitfire_data_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) +{ + if (notify_die(DIE_TRAP_TL1, "data access exception tl1", regs, + 0, 0x30, SIGTRAP) == NOTIFY_STOP) + return; + + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + spitfire_data_access_exception(regs, sfsr, sfar); +} + +void sun4v_data_access_exception(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) +{ + unsigned short type = (type_ctx >> 16); + unsigned short ctx = (type_ctx & 0xffff); + siginfo_t info; + + if (notify_die(DIE_TRAP, "data access exception", regs, + 0, 0x8, SIGTRAP) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) { + printk("sun4v_data_access_exception: ADDR[%016lx] " + "CTX[%04x] TYPE[%04x], going.\n", + addr, ctx, type); + die_if_kernel("Dax", regs); + } + + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info.si_addr = (void __user *) addr; + info.si_trapno = 0; + force_sig_info(SIGSEGV, &info, current); +} + +void sun4v_data_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) +{ + if (notify_die(DIE_TRAP_TL1, "data access exception tl1", regs, + 0, 0x8, SIGTRAP) == NOTIFY_STOP) + return; + + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + sun4v_data_access_exception(regs, addr, type_ctx); +} + +#ifdef CONFIG_PCI +#include "pci_impl.h" +#endif + +/* When access exceptions happen, we must do this. */ +static void spitfire_clean_and_reenable_l1_caches(void) +{ + unsigned long va; + + if (tlb_type != spitfire) + BUG(); + + /* Clean 'em. */ + for (va = 0; va < (PAGE_SIZE << 1); va += 32) { + spitfire_put_icache_tag(va, 0x0); + spitfire_put_dcache_tag(va, 0x0); + } + + /* Re-enable in LSU. */ + __asm__ __volatile__("flush %%g6\n\t" + "membar #Sync\n\t" + "stxa %0, [%%g0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (LSU_CONTROL_IC | LSU_CONTROL_DC | + LSU_CONTROL_IM | LSU_CONTROL_DM), + "i" (ASI_LSU_CONTROL) + : "memory"); +} + +static void spitfire_enable_estate_errors(void) +{ + __asm__ __volatile__("stxa %0, [%%g0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (ESTATE_ERR_ALL), + "i" (ASI_ESTATE_ERROR_EN)); +} + +static char ecc_syndrome_table[] = { + 0x4c, 0x40, 0x41, 0x48, 0x42, 0x48, 0x48, 0x49, + 0x43, 0x48, 0x48, 0x49, 0x48, 0x49, 0x49, 0x4a, + 0x44, 0x48, 0x48, 0x20, 0x48, 0x39, 0x4b, 0x48, + 0x48, 0x25, 0x31, 0x48, 0x28, 0x48, 0x48, 0x2c, + 0x45, 0x48, 0x48, 0x21, 0x48, 0x3d, 0x04, 0x48, + 0x48, 0x4b, 0x35, 0x48, 0x2d, 0x48, 0x48, 0x29, + 0x48, 0x00, 0x01, 0x48, 0x0a, 0x48, 0x48, 0x4b, + 0x0f, 0x48, 0x48, 0x4b, 0x48, 0x49, 0x49, 0x48, + 0x46, 0x48, 0x48, 0x2a, 0x48, 0x3b, 0x27, 0x48, + 0x48, 0x4b, 0x33, 0x48, 0x22, 0x48, 0x48, 0x2e, + 0x48, 0x19, 0x1d, 0x48, 0x1b, 0x4a, 0x48, 0x4b, + 0x1f, 0x48, 0x4a, 0x4b, 0x48, 0x4b, 0x4b, 0x48, + 0x48, 0x4b, 0x24, 0x48, 0x07, 0x48, 0x48, 0x36, + 0x4b, 0x48, 0x48, 0x3e, 0x48, 0x30, 0x38, 0x48, + 0x49, 0x48, 0x48, 0x4b, 0x48, 0x4b, 0x16, 0x48, + 0x48, 0x12, 0x4b, 0x48, 0x49, 0x48, 0x48, 0x4b, + 0x47, 0x48, 0x48, 0x2f, 0x48, 0x3f, 0x4b, 0x48, + 0x48, 0x06, 0x37, 0x48, 0x23, 0x48, 0x48, 0x2b, + 0x48, 0x05, 0x4b, 0x48, 0x4b, 0x48, 0x48, 0x32, + 0x26, 0x48, 0x48, 0x3a, 0x48, 0x34, 0x3c, 0x48, + 0x48, 0x11, 0x15, 0x48, 0x13, 0x4a, 0x48, 0x4b, + 0x17, 0x48, 0x4a, 0x4b, 0x48, 0x4b, 0x4b, 0x48, + 0x49, 0x48, 0x48, 0x4b, 0x48, 0x4b, 0x1e, 0x48, + 0x48, 0x1a, 0x4b, 0x48, 0x49, 0x48, 0x48, 0x4b, + 0x48, 0x08, 0x0d, 0x48, 0x02, 0x48, 0x48, 0x49, + 0x03, 0x48, 0x48, 0x49, 0x48, 0x4b, 0x4b, 0x48, + 0x49, 0x48, 0x48, 0x49, 0x48, 0x4b, 0x10, 0x48, + 0x48, 0x14, 0x4b, 0x48, 0x4b, 0x48, 0x48, 0x4b, + 0x49, 0x48, 0x48, 0x49, 0x48, 0x4b, 0x18, 0x48, + 0x48, 0x1c, 0x4b, 0x48, 0x4b, 0x48, 0x48, 0x4b, + 0x4a, 0x0c, 0x09, 0x48, 0x0e, 0x48, 0x48, 0x4b, + 0x0b, 0x48, 0x48, 0x4b, 0x48, 0x4b, 0x4b, 0x4a +}; + +static char *syndrome_unknown = "<Unknown>"; + +static void spitfire_log_udb_syndrome(unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long bit) +{ + unsigned short scode; + char memmod_str[64], *p; + + if (udbl & bit) { + scode = ecc_syndrome_table[udbl & 0xff]; + if (sprintf_dimm(scode, afar, memmod_str, sizeof(memmod_str)) < 0) + p = syndrome_unknown; + else + p = memmod_str; + printk(KERN_WARNING "CPU[%d]: UDBL Syndrome[%x] " + "Memory Module \"%s\"\n", + smp_processor_id(), scode, p); + } + + if (udbh & bit) { + scode = ecc_syndrome_table[udbh & 0xff]; + if (sprintf_dimm(scode, afar, memmod_str, sizeof(memmod_str)) < 0) + p = syndrome_unknown; + else + p = memmod_str; + printk(KERN_WARNING "CPU[%d]: UDBH Syndrome[%x] " + "Memory Module \"%s\"\n", + smp_processor_id(), scode, p); + } + +} + +static void spitfire_cee_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, int tl1, struct pt_regs *regs) +{ + + printk(KERN_WARNING "CPU[%d]: Correctable ECC Error " + "AFSR[%lx] AFAR[%016lx] UDBL[%lx] UDBH[%lx] TL>1[%d]\n", + smp_processor_id(), afsr, afar, udbl, udbh, tl1); + + spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_CE); + + /* We always log it, even if someone is listening for this + * trap. + */ + notify_die(DIE_TRAP, "Correctable ECC Error", regs, + 0, TRAP_TYPE_CEE, SIGTRAP); + + /* The Correctable ECC Error trap does not disable I/D caches. So + * we only have to restore the ESTATE Error Enable register. + */ + spitfire_enable_estate_errors(); +} + +static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long tt, int tl1, struct pt_regs *regs) +{ + siginfo_t info; + + printk(KERN_WARNING "CPU[%d]: Uncorrectable Error AFSR[%lx] " + "AFAR[%lx] UDBL[%lx] UDBH[%ld] TT[%lx] TL>1[%d]\n", + smp_processor_id(), afsr, afar, udbl, udbh, tt, tl1); + + /* XXX add more human friendly logging of the error status + * XXX as is implemented for cheetah + */ + + spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_UE); + + /* We always log it, even if someone is listening for this + * trap. + */ + notify_die(DIE_TRAP, "Uncorrectable Error", regs, + 0, tt, SIGTRAP); + + if (regs->tstate & TSTATE_PRIV) { + if (tl1) + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("UE", regs); + } + + /* XXX need more intelligent processing here, such as is implemented + * XXX for cheetah errors, in fact if the E-cache still holds the + * XXX line with bad parity this will loop + */ + + spitfire_clean_and_reenable_l1_caches(); + spitfire_enable_estate_errors(); + + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_OBJERR; + info.si_addr = (void *)0; + info.si_trapno = 0; + force_sig_info(SIGBUS, &info, current); +} + +void spitfire_access_error(struct pt_regs *regs, unsigned long status_encoded, unsigned long afar) +{ + unsigned long afsr, tt, udbh, udbl; + int tl1; + + afsr = (status_encoded & SFSTAT_AFSR_MASK) >> SFSTAT_AFSR_SHIFT; + tt = (status_encoded & SFSTAT_TRAP_TYPE) >> SFSTAT_TRAP_TYPE_SHIFT; + tl1 = (status_encoded & SFSTAT_TL_GT_ONE) ? 1 : 0; + udbl = (status_encoded & SFSTAT_UDBL_MASK) >> SFSTAT_UDBL_SHIFT; + udbh = (status_encoded & SFSTAT_UDBH_MASK) >> SFSTAT_UDBH_SHIFT; + +#ifdef CONFIG_PCI + if (tt == TRAP_TYPE_DAE && + pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) { + spitfire_clean_and_reenable_l1_caches(); + spitfire_enable_estate_errors(); + + pci_poke_faulted = 1; + regs->tnpc = regs->tpc + 4; + return; + } +#endif + + if (afsr & SFAFSR_UE) + spitfire_ue_log(afsr, afar, udbh, udbl, tt, tl1, regs); + + if (tt == TRAP_TYPE_CEE) { + /* Handle the case where we took a CEE trap, but ACK'd + * only the UE state in the UDB error registers. + */ + if (afsr & SFAFSR_UE) { + if (udbh & UDBE_CE) { + __asm__ __volatile__( + "stxa %0, [%1] %2\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (udbh & UDBE_CE), + "r" (0x0), "i" (ASI_UDB_ERROR_W)); + } + if (udbl & UDBE_CE) { + __asm__ __volatile__( + "stxa %0, [%1] %2\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (udbl & UDBE_CE), + "r" (0x18), "i" (ASI_UDB_ERROR_W)); + } + } + + spitfire_cee_log(afsr, afar, udbh, udbl, tl1, regs); + } +} + +int cheetah_pcache_forced_on; + +void cheetah_enable_pcache(void) +{ + unsigned long dcr; + + printk("CHEETAH: Enabling P-Cache on cpu %d.\n", + smp_processor_id()); + + __asm__ __volatile__("ldxa [%%g0] %1, %0" + : "=r" (dcr) + : "i" (ASI_DCU_CONTROL_REG)); + dcr |= (DCU_PE | DCU_HPE | DCU_SPE | DCU_SL); + __asm__ __volatile__("stxa %0, [%%g0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (dcr), "i" (ASI_DCU_CONTROL_REG)); +} + +/* Cheetah error trap handling. */ +static unsigned long ecache_flush_physbase; +static unsigned long ecache_flush_linesize; +static unsigned long ecache_flush_size; + +/* This table is ordered in priority of errors and matches the + * AFAR overwrite policy as well. + */ + +struct afsr_error_table { + unsigned long mask; + const char *name; +}; + +static const char CHAFSR_PERR_msg[] = + "System interface protocol error"; +static const char CHAFSR_IERR_msg[] = + "Internal processor error"; +static const char CHAFSR_ISAP_msg[] = + "System request parity error on incoming addresss"; +static const char CHAFSR_UCU_msg[] = + "Uncorrectable E-cache ECC error for ifetch/data"; +static const char CHAFSR_UCC_msg[] = + "SW Correctable E-cache ECC error for ifetch/data"; +static const char CHAFSR_UE_msg[] = + "Uncorrectable system bus data ECC error for read"; +static const char CHAFSR_EDU_msg[] = + "Uncorrectable E-cache ECC error for stmerge/blkld"; +static const char CHAFSR_EMU_msg[] = + "Uncorrectable system bus MTAG error"; +static const char CHAFSR_WDU_msg[] = + "Uncorrectable E-cache ECC error for writeback"; +static const char CHAFSR_CPU_msg[] = + "Uncorrectable ECC error for copyout"; +static const char CHAFSR_CE_msg[] = + "HW corrected system bus data ECC error for read"; +static const char CHAFSR_EDC_msg[] = + "HW corrected E-cache ECC error for stmerge/blkld"; +static const char CHAFSR_EMC_msg[] = + "HW corrected system bus MTAG ECC error"; +static const char CHAFSR_WDC_msg[] = + "HW corrected E-cache ECC error for writeback"; +static const char CHAFSR_CPC_msg[] = + "HW corrected ECC error for copyout"; +static const char CHAFSR_TO_msg[] = + "Unmapped error from system bus"; +static const char CHAFSR_BERR_msg[] = + "Bus error response from system bus"; +static const char CHAFSR_IVC_msg[] = + "HW corrected system bus data ECC error for ivec read"; +static const char CHAFSR_IVU_msg[] = + "Uncorrectable system bus data ECC error for ivec read"; +static struct afsr_error_table __cheetah_error_table[] = { + { CHAFSR_PERR, CHAFSR_PERR_msg }, + { CHAFSR_IERR, CHAFSR_IERR_msg }, + { CHAFSR_ISAP, CHAFSR_ISAP_msg }, + { CHAFSR_UCU, CHAFSR_UCU_msg }, + { CHAFSR_UCC, CHAFSR_UCC_msg }, + { CHAFSR_UE, CHAFSR_UE_msg }, + { CHAFSR_EDU, CHAFSR_EDU_msg }, + { CHAFSR_EMU, CHAFSR_EMU_msg }, + { CHAFSR_WDU, CHAFSR_WDU_msg }, + { CHAFSR_CPU, CHAFSR_CPU_msg }, + { CHAFSR_CE, CHAFSR_CE_msg }, + { CHAFSR_EDC, CHAFSR_EDC_msg }, + { CHAFSR_EMC, CHAFSR_EMC_msg }, + { CHAFSR_WDC, CHAFSR_WDC_msg }, + { CHAFSR_CPC, CHAFSR_CPC_msg }, + { CHAFSR_TO, CHAFSR_TO_msg }, + { CHAFSR_BERR, CHAFSR_BERR_msg }, + /* These two do not update the AFAR. */ + { CHAFSR_IVC, CHAFSR_IVC_msg }, + { CHAFSR_IVU, CHAFSR_IVU_msg }, + { 0, NULL }, +}; +static const char CHPAFSR_DTO_msg[] = + "System bus unmapped error for prefetch/storequeue-read"; +static const char CHPAFSR_DBERR_msg[] = + "System bus error for prefetch/storequeue-read"; +static const char CHPAFSR_THCE_msg[] = + "Hardware corrected E-cache Tag ECC error"; +static const char CHPAFSR_TSCE_msg[] = + "SW handled correctable E-cache Tag ECC error"; +static const char CHPAFSR_TUE_msg[] = + "Uncorrectable E-cache Tag ECC error"; +static const char CHPAFSR_DUE_msg[] = + "System bus uncorrectable data ECC error due to prefetch/store-fill"; +static struct afsr_error_table __cheetah_plus_error_table[] = { + { CHAFSR_PERR, CHAFSR_PERR_msg }, + { CHAFSR_IERR, CHAFSR_IERR_msg }, + { CHAFSR_ISAP, CHAFSR_ISAP_msg }, + { CHAFSR_UCU, CHAFSR_UCU_msg }, + { CHAFSR_UCC, CHAFSR_UCC_msg }, + { CHAFSR_UE, CHAFSR_UE_msg }, + { CHAFSR_EDU, CHAFSR_EDU_msg }, + { CHAFSR_EMU, CHAFSR_EMU_msg }, + { CHAFSR_WDU, CHAFSR_WDU_msg }, + { CHAFSR_CPU, CHAFSR_CPU_msg }, + { CHAFSR_CE, CHAFSR_CE_msg }, + { CHAFSR_EDC, CHAFSR_EDC_msg }, + { CHAFSR_EMC, CHAFSR_EMC_msg }, + { CHAFSR_WDC, CHAFSR_WDC_msg }, + { CHAFSR_CPC, CHAFSR_CPC_msg }, + { CHAFSR_TO, CHAFSR_TO_msg }, + { CHAFSR_BERR, CHAFSR_BERR_msg }, + { CHPAFSR_DTO, CHPAFSR_DTO_msg }, + { CHPAFSR_DBERR, CHPAFSR_DBERR_msg }, + { CHPAFSR_THCE, CHPAFSR_THCE_msg }, + { CHPAFSR_TSCE, CHPAFSR_TSCE_msg }, + { CHPAFSR_TUE, CHPAFSR_TUE_msg }, + { CHPAFSR_DUE, CHPAFSR_DUE_msg }, + /* These two do not update the AFAR. */ + { CHAFSR_IVC, CHAFSR_IVC_msg }, + { CHAFSR_IVU, CHAFSR_IVU_msg }, + { 0, NULL }, +}; +static const char JPAFSR_JETO_msg[] = + "System interface protocol error, hw timeout caused"; +static const char JPAFSR_SCE_msg[] = + "Parity error on system snoop results"; +static const char JPAFSR_JEIC_msg[] = + "System interface protocol error, illegal command detected"; +static const char JPAFSR_JEIT_msg[] = + "System interface protocol error, illegal ADTYPE detected"; +static const char JPAFSR_OM_msg[] = + "Out of range memory error has occurred"; +static const char JPAFSR_ETP_msg[] = + "Parity error on L2 cache tag SRAM"; +static const char JPAFSR_UMS_msg[] = + "Error due to unsupported store"; +static const char JPAFSR_RUE_msg[] = + "Uncorrectable ECC error from remote cache/memory"; +static const char JPAFSR_RCE_msg[] = + "Correctable ECC error from remote cache/memory"; +static const char JPAFSR_BP_msg[] = + "JBUS parity error on returned read data"; +static const char JPAFSR_WBP_msg[] = + "JBUS parity error on data for writeback or block store"; +static const char JPAFSR_FRC_msg[] = + "Foreign read to DRAM incurring correctable ECC error"; +static const char JPAFSR_FRU_msg[] = + "Foreign read to DRAM incurring uncorrectable ECC error"; +static struct afsr_error_table __jalapeno_error_table[] = { + { JPAFSR_JETO, JPAFSR_JETO_msg }, + { JPAFSR_SCE, JPAFSR_SCE_msg }, + { JPAFSR_JEIC, JPAFSR_JEIC_msg }, + { JPAFSR_JEIT, JPAFSR_JEIT_msg }, + { CHAFSR_PERR, CHAFSR_PERR_msg }, + { CHAFSR_IERR, CHAFSR_IERR_msg }, + { CHAFSR_ISAP, CHAFSR_ISAP_msg }, + { CHAFSR_UCU, CHAFSR_UCU_msg }, + { CHAFSR_UCC, CHAFSR_UCC_msg }, + { CHAFSR_UE, CHAFSR_UE_msg }, + { CHAFSR_EDU, CHAFSR_EDU_msg }, + { JPAFSR_OM, JPAFSR_OM_msg }, + { CHAFSR_WDU, CHAFSR_WDU_msg }, + { CHAFSR_CPU, CHAFSR_CPU_msg }, + { CHAFSR_CE, CHAFSR_CE_msg }, + { CHAFSR_EDC, CHAFSR_EDC_msg }, + { JPAFSR_ETP, JPAFSR_ETP_msg }, + { CHAFSR_WDC, CHAFSR_WDC_msg }, + { CHAFSR_CPC, CHAFSR_CPC_msg }, + { CHAFSR_TO, CHAFSR_TO_msg }, + { CHAFSR_BERR, CHAFSR_BERR_msg }, + { JPAFSR_UMS, JPAFSR_UMS_msg }, + { JPAFSR_RUE, JPAFSR_RUE_msg }, + { JPAFSR_RCE, JPAFSR_RCE_msg }, + { JPAFSR_BP, JPAFSR_BP_msg }, + { JPAFSR_WBP, JPAFSR_WBP_msg }, + { JPAFSR_FRC, JPAFSR_FRC_msg }, + { JPAFSR_FRU, JPAFSR_FRU_msg }, + /* These two do not update the AFAR. */ + { CHAFSR_IVU, CHAFSR_IVU_msg }, + { 0, NULL }, +}; +static struct afsr_error_table *cheetah_error_table; +static unsigned long cheetah_afsr_errors; + +struct cheetah_err_info *cheetah_error_log; + +static inline struct cheetah_err_info *cheetah_get_error_log(unsigned long afsr) +{ + struct cheetah_err_info *p; + int cpu = smp_processor_id(); + + if (!cheetah_error_log) + return NULL; + + p = cheetah_error_log + (cpu * 2); + if ((afsr & CHAFSR_TL1) != 0UL) + p++; + + return p; +} + +extern unsigned int tl0_icpe[], tl1_icpe[]; +extern unsigned int tl0_dcpe[], tl1_dcpe[]; +extern unsigned int tl0_fecc[], tl1_fecc[]; +extern unsigned int tl0_cee[], tl1_cee[]; +extern unsigned int tl0_iae[], tl1_iae[]; +extern unsigned int tl0_dae[], tl1_dae[]; +extern unsigned int cheetah_plus_icpe_trap_vector[], cheetah_plus_icpe_trap_vector_tl1[]; +extern unsigned int cheetah_plus_dcpe_trap_vector[], cheetah_plus_dcpe_trap_vector_tl1[]; +extern unsigned int cheetah_fecc_trap_vector[], cheetah_fecc_trap_vector_tl1[]; +extern unsigned int cheetah_cee_trap_vector[], cheetah_cee_trap_vector_tl1[]; +extern unsigned int cheetah_deferred_trap_vector[], cheetah_deferred_trap_vector_tl1[]; + +void __init cheetah_ecache_flush_init(void) +{ + unsigned long largest_size, smallest_linesize, order, ver; + int i, sz; + + /* Scan all cpu device tree nodes, note two values: + * 1) largest E-cache size + * 2) smallest E-cache line size + */ + largest_size = 0UL; + smallest_linesize = ~0UL; + + for (i = 0; i < NR_CPUS; i++) { + unsigned long val; + + val = cpu_data(i).ecache_size; + if (!val) + continue; + + if (val > largest_size) + largest_size = val; + + val = cpu_data(i).ecache_line_size; + if (val < smallest_linesize) + smallest_linesize = val; + + } + + if (largest_size == 0UL || smallest_linesize == ~0UL) { + prom_printf("cheetah_ecache_flush_init: Cannot probe cpu E-cache " + "parameters.\n"); + prom_halt(); + } + + ecache_flush_size = (2 * largest_size); + ecache_flush_linesize = smallest_linesize; + + ecache_flush_physbase = find_ecache_flush_span(ecache_flush_size); + + if (ecache_flush_physbase == ~0UL) { + prom_printf("cheetah_ecache_flush_init: Cannot find %d byte " + "contiguous physical memory.\n", + ecache_flush_size); + prom_halt(); + } + + /* Now allocate error trap reporting scoreboard. */ + sz = NR_CPUS * (2 * sizeof(struct cheetah_err_info)); + for (order = 0; order < MAX_ORDER; order++) { + if ((PAGE_SIZE << order) >= sz) + break; + } + cheetah_error_log = (struct cheetah_err_info *) + __get_free_pages(GFP_KERNEL, order); + if (!cheetah_error_log) { + prom_printf("cheetah_ecache_flush_init: Failed to allocate " + "error logging scoreboard (%d bytes).\n", sz); + prom_halt(); + } + memset(cheetah_error_log, 0, PAGE_SIZE << order); + + /* Mark all AFSRs as invalid so that the trap handler will + * log new new information there. + */ + for (i = 0; i < 2 * NR_CPUS; i++) + cheetah_error_log[i].afsr = CHAFSR_INVALID; + + __asm__ ("rdpr %%ver, %0" : "=r" (ver)); + if ((ver >> 32) == __JALAPENO_ID || + (ver >> 32) == __SERRANO_ID) { + cheetah_error_table = &__jalapeno_error_table[0]; + cheetah_afsr_errors = JPAFSR_ERRORS; + } else if ((ver >> 32) == 0x003e0015) { + cheetah_error_table = &__cheetah_plus_error_table[0]; + cheetah_afsr_errors = CHPAFSR_ERRORS; + } else { + cheetah_error_table = &__cheetah_error_table[0]; + cheetah_afsr_errors = CHAFSR_ERRORS; + } + + /* Now patch trap tables. */ + memcpy(tl0_fecc, cheetah_fecc_trap_vector, (8 * 4)); + memcpy(tl1_fecc, cheetah_fecc_trap_vector_tl1, (8 * 4)); + memcpy(tl0_cee, cheetah_cee_trap_vector, (8 * 4)); + memcpy(tl1_cee, cheetah_cee_trap_vector_tl1, (8 * 4)); + memcpy(tl0_iae, cheetah_deferred_trap_vector, (8 * 4)); + memcpy(tl1_iae, cheetah_deferred_trap_vector_tl1, (8 * 4)); + memcpy(tl0_dae, cheetah_deferred_trap_vector, (8 * 4)); + memcpy(tl1_dae, cheetah_deferred_trap_vector_tl1, (8 * 4)); + if (tlb_type == cheetah_plus) { + memcpy(tl0_dcpe, cheetah_plus_dcpe_trap_vector, (8 * 4)); + memcpy(tl1_dcpe, cheetah_plus_dcpe_trap_vector_tl1, (8 * 4)); + memcpy(tl0_icpe, cheetah_plus_icpe_trap_vector, (8 * 4)); + memcpy(tl1_icpe, cheetah_plus_icpe_trap_vector_tl1, (8 * 4)); + } + flushi(PAGE_OFFSET); +} + +static void cheetah_flush_ecache(void) +{ + unsigned long flush_base = ecache_flush_physbase; + unsigned long flush_linesize = ecache_flush_linesize; + unsigned long flush_size = ecache_flush_size; + + __asm__ __volatile__("1: subcc %0, %4, %0\n\t" + " bne,pt %%xcc, 1b\n\t" + " ldxa [%2 + %0] %3, %%g0\n\t" + : "=&r" (flush_size) + : "0" (flush_size), "r" (flush_base), + "i" (ASI_PHYS_USE_EC), "r" (flush_linesize)); +} + +static void cheetah_flush_ecache_line(unsigned long physaddr) +{ + unsigned long alias; + + physaddr &= ~(8UL - 1UL); + physaddr = (ecache_flush_physbase + + (physaddr & ((ecache_flush_size>>1UL) - 1UL))); + alias = physaddr + (ecache_flush_size >> 1UL); + __asm__ __volatile__("ldxa [%0] %2, %%g0\n\t" + "ldxa [%1] %2, %%g0\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (physaddr), "r" (alias), + "i" (ASI_PHYS_USE_EC)); +} + +/* Unfortunately, the diagnostic access to the I-cache tags we need to + * use to clear the thing interferes with I-cache coherency transactions. + * + * So we must only flush the I-cache when it is disabled. + */ +static void __cheetah_flush_icache(void) +{ + unsigned int icache_size, icache_line_size; + unsigned long addr; + + icache_size = local_cpu_data().icache_size; + icache_line_size = local_cpu_data().icache_line_size; + + /* Clear the valid bits in all the tags. */ + for (addr = 0; addr < icache_size; addr += icache_line_size) { + __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (addr | (2 << 3)), + "i" (ASI_IC_TAG)); + } +} + +static void cheetah_flush_icache(void) +{ + unsigned long dcu_save; + + /* Save current DCU, disable I-cache. */ + __asm__ __volatile__("ldxa [%%g0] %1, %0\n\t" + "or %0, %2, %%g1\n\t" + "stxa %%g1, [%%g0] %1\n\t" + "membar #Sync" + : "=r" (dcu_save) + : "i" (ASI_DCU_CONTROL_REG), "i" (DCU_IC) + : "g1"); + + __cheetah_flush_icache(); + + /* Restore DCU register */ + __asm__ __volatile__("stxa %0, [%%g0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (dcu_save), "i" (ASI_DCU_CONTROL_REG)); +} + +static void cheetah_flush_dcache(void) +{ + unsigned int dcache_size, dcache_line_size; + unsigned long addr; + + dcache_size = local_cpu_data().dcache_size; + dcache_line_size = local_cpu_data().dcache_line_size; + + for (addr = 0; addr < dcache_size; addr += dcache_line_size) { + __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (addr), "i" (ASI_DCACHE_TAG)); + } +} + +/* In order to make the even parity correct we must do two things. + * First, we clear DC_data_parity and set DC_utag to an appropriate value. + * Next, we clear out all 32-bytes of data for that line. Data of + * all-zero + tag parity value of zero == correct parity. + */ +static void cheetah_plus_zap_dcache_parity(void) +{ + unsigned int dcache_size, dcache_line_size; + unsigned long addr; + + dcache_size = local_cpu_data().dcache_size; + dcache_line_size = local_cpu_data().dcache_line_size; + + for (addr = 0; addr < dcache_size; addr += dcache_line_size) { + unsigned long tag = (addr >> 14); + unsigned long line; + + __asm__ __volatile__("membar #Sync\n\t" + "stxa %0, [%1] %2\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (tag), "r" (addr), + "i" (ASI_DCACHE_UTAG)); + for (line = addr; line < addr + dcache_line_size; line += 8) + __asm__ __volatile__("membar #Sync\n\t" + "stxa %%g0, [%0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (line), + "i" (ASI_DCACHE_DATA)); + } +} + +/* Conversion tables used to frob Cheetah AFSR syndrome values into + * something palatable to the memory controller driver get_unumber + * routine. + */ +#define MT0 137 +#define MT1 138 +#define MT2 139 +#define NONE 254 +#define MTC0 140 +#define MTC1 141 +#define MTC2 142 +#define MTC3 143 +#define C0 128 +#define C1 129 +#define C2 130 +#define C3 131 +#define C4 132 +#define C5 133 +#define C6 134 +#define C7 135 +#define C8 136 +#define M2 144 +#define M3 145 +#define M4 146 +#define M 147 +static unsigned char cheetah_ecc_syntab[] = { +/*00*/NONE, C0, C1, M2, C2, M2, M3, 47, C3, M2, M2, 53, M2, 41, 29, M, +/*01*/C4, M, M, 50, M2, 38, 25, M2, M2, 33, 24, M2, 11, M, M2, 16, +/*02*/C5, M, M, 46, M2, 37, 19, M2, M, 31, 32, M, 7, M2, M2, 10, +/*03*/M2, 40, 13, M2, 59, M, M2, 66, M, M2, M2, 0, M2, 67, 71, M, +/*04*/C6, M, M, 43, M, 36, 18, M, M2, 49, 15, M, 63, M2, M2, 6, +/*05*/M2, 44, 28, M2, M, M2, M2, 52, 68, M2, M2, 62, M2, M3, M3, M4, +/*06*/M2, 26, 106, M2, 64, M, M2, 2, 120, M, M2, M3, M, M3, M3, M4, +/*07*/116, M2, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3, +/*08*/C7, M2, M, 42, M, 35, 17, M2, M, 45, 14, M2, 21, M2, M2, 5, +/*09*/M, 27, M, M, 99, M, M, 3, 114, M2, M2, 20, M2, M3, M3, M, +/*0a*/M2, 23, 113, M2, 112, M2, M, 51, 95, M, M2, M3, M2, M3, M3, M2, +/*0b*/103, M, M2, M3, M2, M3, M3, M4, M2, 48, M, M, 73, M2, M, M3, +/*0c*/M2, 22, 110, M2, 109, M2, M, 9, 108, M2, M, M3, M2, M3, M3, M, +/*0d*/102, M2, M, M, M2, M3, M3, M, M2, M3, M3, M2, M, M4, M, M3, +/*0e*/98, M, M2, M3, M2, M, M3, M4, M2, M3, M3, M4, M3, M, M, M, +/*0f*/M2, M3, M3, M, M3, M, M, M, 56, M4, M, M3, M4, M, M, M, +/*10*/C8, M, M2, 39, M, 34, 105, M2, M, 30, 104, M, 101, M, M, 4, +/*11*/M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, M2, M, M3, M, +/*12*/M2, 97, 82, M2, 78, M2, M2, 1, 96, M, M, M, M, M, M3, M2, +/*13*/94, M, M2, M3, M2, M, M3, M, M2, M, 79, M, 69, M, M4, M, +/*14*/M2, 93, 92, M, 91, M, M2, 8, 90, M2, M2, M, M, M, M, M4, +/*15*/89, M, M, M3, M2, M3, M3, M, M, M, M3, M2, M3, M2, M, M3, +/*16*/86, M, M2, M3, M2, M, M3, M, M2, M, M3, M, M3, M, M, M3, +/*17*/M, M, M3, M2, M3, M2, M4, M, 60, M, M2, M3, M4, M, M, M2, +/*18*/M2, 88, 85, M2, 84, M, M2, 55, 81, M2, M2, M3, M2, M3, M3, M4, +/*19*/77, M, M, M, M2, M3, M, M, M2, M3, M3, M4, M3, M2, M, M, +/*1a*/74, M, M2, M3, M, M, M3, M, M, M, M3, M, M3, M, M4, M3, +/*1b*/M2, 70, 107, M4, 65, M2, M2, M, 127, M, M, M, M2, M3, M3, M, +/*1c*/80, M2, M2, 72, M, 119, 118, M, M2, 126, 76, M, 125, M, M4, M3, +/*1d*/M2, 115, 124, M, 75, M, M, M3, 61, M, M4, M, M4, M, M, M, +/*1e*/M, 123, 122, M4, 121, M4, M, M3, 117, M2, M2, M3, M4, M3, M, M, +/*1f*/111, M, M, M, M4, M3, M3, M, M, M, M3, M, M3, M2, M, M +}; +static unsigned char cheetah_mtag_syntab[] = { + NONE, MTC0, + MTC1, NONE, + MTC2, NONE, + NONE, MT0, + MTC3, NONE, + NONE, MT1, + NONE, MT2, + NONE, NONE +}; + +/* Return the highest priority error conditon mentioned. */ +static inline unsigned long cheetah_get_hipri(unsigned long afsr) +{ + unsigned long tmp = 0; + int i; + + for (i = 0; cheetah_error_table[i].mask; i++) { + if ((tmp = (afsr & cheetah_error_table[i].mask)) != 0UL) + return tmp; + } + return tmp; +} + +static const char *cheetah_get_string(unsigned long bit) +{ + int i; + + for (i = 0; cheetah_error_table[i].mask; i++) { + if ((bit & cheetah_error_table[i].mask) != 0UL) + return cheetah_error_table[i].name; + } + return "???"; +} + +static void cheetah_log_errors(struct pt_regs *regs, struct cheetah_err_info *info, + unsigned long afsr, unsigned long afar, int recoverable) +{ + unsigned long hipri; + char unum[256]; + + printk("%s" "ERROR(%d): Cheetah error trap taken afsr[%016lx] afar[%016lx] TL1(%d)\n", + (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(), + afsr, afar, + (afsr & CHAFSR_TL1) ? 1 : 0); + printk("%s" "ERROR(%d): TPC[%lx] TNPC[%lx] O7[%lx] TSTATE[%lx]\n", + (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(), + regs->tpc, regs->tnpc, regs->u_regs[UREG_I7], regs->tstate); + printk("%s" "ERROR(%d): ", + (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id()); + printk("TPC<%pS>\n", (void *) regs->tpc); + printk("%s" "ERROR(%d): M_SYND(%lx), E_SYND(%lx)%s%s\n", + (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(), + (afsr & CHAFSR_M_SYNDROME) >> CHAFSR_M_SYNDROME_SHIFT, + (afsr & CHAFSR_E_SYNDROME) >> CHAFSR_E_SYNDROME_SHIFT, + (afsr & CHAFSR_ME) ? ", Multiple Errors" : "", + (afsr & CHAFSR_PRIV) ? ", Privileged" : ""); + hipri = cheetah_get_hipri(afsr); + printk("%s" "ERROR(%d): Highest priority error (%016lx) \"%s\"\n", + (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(), + hipri, cheetah_get_string(hipri)); + + /* Try to get unumber if relevant. */ +#define ESYND_ERRORS (CHAFSR_IVC | CHAFSR_IVU | \ + CHAFSR_CPC | CHAFSR_CPU | \ + CHAFSR_UE | CHAFSR_CE | \ + CHAFSR_EDC | CHAFSR_EDU | \ + CHAFSR_UCC | CHAFSR_UCU | \ + CHAFSR_WDU | CHAFSR_WDC) +#define MSYND_ERRORS (CHAFSR_EMC | CHAFSR_EMU) + if (afsr & ESYND_ERRORS) { + int syndrome; + int ret; + + syndrome = (afsr & CHAFSR_E_SYNDROME) >> CHAFSR_E_SYNDROME_SHIFT; + syndrome = cheetah_ecc_syntab[syndrome]; + ret = sprintf_dimm(syndrome, afar, unum, sizeof(unum)); + if (ret != -1) + printk("%s" "ERROR(%d): AFAR E-syndrome [%s]\n", + (recoverable ? KERN_WARNING : KERN_CRIT), + smp_processor_id(), unum); + } else if (afsr & MSYND_ERRORS) { + int syndrome; + int ret; + + syndrome = (afsr & CHAFSR_M_SYNDROME) >> CHAFSR_M_SYNDROME_SHIFT; + syndrome = cheetah_mtag_syntab[syndrome]; + ret = sprintf_dimm(syndrome, afar, unum, sizeof(unum)); + if (ret != -1) + printk("%s" "ERROR(%d): AFAR M-syndrome [%s]\n", + (recoverable ? KERN_WARNING : KERN_CRIT), + smp_processor_id(), unum); + } + + /* Now dump the cache snapshots. */ + printk("%s" "ERROR(%d): D-cache idx[%x] tag[%016lx] utag[%016lx] stag[%016lx]\n", + (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(), + (int) info->dcache_index, + info->dcache_tag, + info->dcache_utag, + info->dcache_stag); + printk("%s" "ERROR(%d): D-cache data0[%016lx] data1[%016lx] data2[%016lx] data3[%016lx]\n", + (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(), + info->dcache_data[0], + info->dcache_data[1], + info->dcache_data[2], + info->dcache_data[3]); + printk("%s" "ERROR(%d): I-cache idx[%x] tag[%016lx] utag[%016lx] stag[%016lx] " + "u[%016lx] l[%016lx]\n", + (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(), + (int) info->icache_index, + info->icache_tag, + info->icache_utag, + info->icache_stag, + info->icache_upper, + info->icache_lower); + printk("%s" "ERROR(%d): I-cache INSN0[%016lx] INSN1[%016lx] INSN2[%016lx] INSN3[%016lx]\n", + (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(), + info->icache_data[0], + info->icache_data[1], + info->icache_data[2], + info->icache_data[3]); + printk("%s" "ERROR(%d): I-cache INSN4[%016lx] INSN5[%016lx] INSN6[%016lx] INSN7[%016lx]\n", + (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(), + info->icache_data[4], + info->icache_data[5], + info->icache_data[6], + info->icache_data[7]); + printk("%s" "ERROR(%d): E-cache idx[%x] tag[%016lx]\n", + (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(), + (int) info->ecache_index, info->ecache_tag); + printk("%s" "ERROR(%d): E-cache data0[%016lx] data1[%016lx] data2[%016lx] data3[%016lx]\n", + (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(), + info->ecache_data[0], + info->ecache_data[1], + info->ecache_data[2], + info->ecache_data[3]); + + afsr = (afsr & ~hipri) & cheetah_afsr_errors; + while (afsr != 0UL) { + unsigned long bit = cheetah_get_hipri(afsr); + + printk("%s" "ERROR: Multiple-error (%016lx) \"%s\"\n", + (recoverable ? KERN_WARNING : KERN_CRIT), + bit, cheetah_get_string(bit)); + + afsr &= ~bit; + } + + if (!recoverable) + printk(KERN_CRIT "ERROR: This condition is not recoverable.\n"); +} + +static int cheetah_recheck_errors(struct cheetah_err_info *logp) +{ + unsigned long afsr, afar; + int ret = 0; + + __asm__ __volatile__("ldxa [%%g0] %1, %0\n\t" + : "=r" (afsr) + : "i" (ASI_AFSR)); + if ((afsr & cheetah_afsr_errors) != 0) { + if (logp != NULL) { + __asm__ __volatile__("ldxa [%%g0] %1, %0\n\t" + : "=r" (afar) + : "i" (ASI_AFAR)); + logp->afsr = afsr; + logp->afar = afar; + } + ret = 1; + } + __asm__ __volatile__("stxa %0, [%%g0] %1\n\t" + "membar #Sync\n\t" + : : "r" (afsr), "i" (ASI_AFSR)); + + return ret; +} + +void cheetah_fecc_handler(struct pt_regs *regs, unsigned long afsr, unsigned long afar) +{ + struct cheetah_err_info local_snapshot, *p; + int recoverable; + + /* Flush E-cache */ + cheetah_flush_ecache(); + + p = cheetah_get_error_log(afsr); + if (!p) { + prom_printf("ERROR: Early Fast-ECC error afsr[%016lx] afar[%016lx]\n", + afsr, afar); + prom_printf("ERROR: CPU(%d) TPC[%016lx] TNPC[%016lx] TSTATE[%016lx]\n", + smp_processor_id(), regs->tpc, regs->tnpc, regs->tstate); + prom_halt(); + } + + /* Grab snapshot of logged error. */ + memcpy(&local_snapshot, p, sizeof(local_snapshot)); + + /* If the current trap snapshot does not match what the + * trap handler passed along into our args, big trouble. + * In such a case, mark the local copy as invalid. + * + * Else, it matches and we mark the afsr in the non-local + * copy as invalid so we may log new error traps there. + */ + if (p->afsr != afsr || p->afar != afar) + local_snapshot.afsr = CHAFSR_INVALID; + else + p->afsr = CHAFSR_INVALID; + + cheetah_flush_icache(); + cheetah_flush_dcache(); + + /* Re-enable I-cache/D-cache */ + __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t" + "or %%g1, %1, %%g1\n\t" + "stxa %%g1, [%%g0] %0\n\t" + "membar #Sync" + : /* no outputs */ + : "i" (ASI_DCU_CONTROL_REG), + "i" (DCU_DC | DCU_IC) + : "g1"); + + /* Re-enable error reporting */ + __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t" + "or %%g1, %1, %%g1\n\t" + "stxa %%g1, [%%g0] %0\n\t" + "membar #Sync" + : /* no outputs */ + : "i" (ASI_ESTATE_ERROR_EN), + "i" (ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN) + : "g1"); + + /* Decide if we can continue after handling this trap and + * logging the error. + */ + recoverable = 1; + if (afsr & (CHAFSR_PERR | CHAFSR_IERR | CHAFSR_ISAP)) + recoverable = 0; + + /* Re-check AFSR/AFAR. What we are looking for here is whether a new + * error was logged while we had error reporting traps disabled. + */ + if (cheetah_recheck_errors(&local_snapshot)) { + unsigned long new_afsr = local_snapshot.afsr; + + /* If we got a new asynchronous error, die... */ + if (new_afsr & (CHAFSR_EMU | CHAFSR_EDU | + CHAFSR_WDU | CHAFSR_CPU | + CHAFSR_IVU | CHAFSR_UE | + CHAFSR_BERR | CHAFSR_TO)) + recoverable = 0; + } + + /* Log errors. */ + cheetah_log_errors(regs, &local_snapshot, afsr, afar, recoverable); + + if (!recoverable) + panic("Irrecoverable Fast-ECC error trap.\n"); + + /* Flush E-cache to kick the error trap handlers out. */ + cheetah_flush_ecache(); +} + +/* Try to fix a correctable error by pushing the line out from + * the E-cache. Recheck error reporting registers to see if the + * problem is intermittent. + */ +static int cheetah_fix_ce(unsigned long physaddr) +{ + unsigned long orig_estate; + unsigned long alias1, alias2; + int ret; + + /* Make sure correctable error traps are disabled. */ + __asm__ __volatile__("ldxa [%%g0] %2, %0\n\t" + "andn %0, %1, %%g1\n\t" + "stxa %%g1, [%%g0] %2\n\t" + "membar #Sync" + : "=&r" (orig_estate) + : "i" (ESTATE_ERROR_CEEN), + "i" (ASI_ESTATE_ERROR_EN) + : "g1"); + + /* We calculate alias addresses that will force the + * cache line in question out of the E-cache. Then + * we bring it back in with an atomic instruction so + * that we get it in some modified/exclusive state, + * then we displace it again to try and get proper ECC + * pushed back into the system. + */ + physaddr &= ~(8UL - 1UL); + alias1 = (ecache_flush_physbase + + (physaddr & ((ecache_flush_size >> 1) - 1))); + alias2 = alias1 + (ecache_flush_size >> 1); + __asm__ __volatile__("ldxa [%0] %3, %%g0\n\t" + "ldxa [%1] %3, %%g0\n\t" + "casxa [%2] %3, %%g0, %%g0\n\t" + "ldxa [%0] %3, %%g0\n\t" + "ldxa [%1] %3, %%g0\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (alias1), "r" (alias2), + "r" (physaddr), "i" (ASI_PHYS_USE_EC)); + + /* Did that trigger another error? */ + if (cheetah_recheck_errors(NULL)) { + /* Try one more time. */ + __asm__ __volatile__("ldxa [%0] %1, %%g0\n\t" + "membar #Sync" + : : "r" (physaddr), "i" (ASI_PHYS_USE_EC)); + if (cheetah_recheck_errors(NULL)) + ret = 2; + else + ret = 1; + } else { + /* No new error, intermittent problem. */ + ret = 0; + } + + /* Restore error enables. */ + __asm__ __volatile__("stxa %0, [%%g0] %1\n\t" + "membar #Sync" + : : "r" (orig_estate), "i" (ASI_ESTATE_ERROR_EN)); + + return ret; +} + +/* Return non-zero if PADDR is a valid physical memory address. */ +static int cheetah_check_main_memory(unsigned long paddr) +{ + unsigned long vaddr = PAGE_OFFSET + paddr; + + if (vaddr > (unsigned long) high_memory) + return 0; + + return kern_addr_valid(vaddr); +} + +void cheetah_cee_handler(struct pt_regs *regs, unsigned long afsr, unsigned long afar) +{ + struct cheetah_err_info local_snapshot, *p; + int recoverable, is_memory; + + p = cheetah_get_error_log(afsr); + if (!p) { + prom_printf("ERROR: Early CEE error afsr[%016lx] afar[%016lx]\n", + afsr, afar); + prom_printf("ERROR: CPU(%d) TPC[%016lx] TNPC[%016lx] TSTATE[%016lx]\n", + smp_processor_id(), regs->tpc, regs->tnpc, regs->tstate); + prom_halt(); + } + + /* Grab snapshot of logged error. */ + memcpy(&local_snapshot, p, sizeof(local_snapshot)); + + /* If the current trap snapshot does not match what the + * trap handler passed along into our args, big trouble. + * In such a case, mark the local copy as invalid. + * + * Else, it matches and we mark the afsr in the non-local + * copy as invalid so we may log new error traps there. + */ + if (p->afsr != afsr || p->afar != afar) + local_snapshot.afsr = CHAFSR_INVALID; + else + p->afsr = CHAFSR_INVALID; + + is_memory = cheetah_check_main_memory(afar); + + if (is_memory && (afsr & CHAFSR_CE) != 0UL) { + /* XXX Might want to log the results of this operation + * XXX somewhere... -DaveM + */ + cheetah_fix_ce(afar); + } + + { + int flush_all, flush_line; + + flush_all = flush_line = 0; + if ((afsr & CHAFSR_EDC) != 0UL) { + if ((afsr & cheetah_afsr_errors) == CHAFSR_EDC) + flush_line = 1; + else + flush_all = 1; + } else if ((afsr & CHAFSR_CPC) != 0UL) { + if ((afsr & cheetah_afsr_errors) == CHAFSR_CPC) + flush_line = 1; + else + flush_all = 1; + } + + /* Trap handler only disabled I-cache, flush it. */ + cheetah_flush_icache(); + + /* Re-enable I-cache */ + __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t" + "or %%g1, %1, %%g1\n\t" + "stxa %%g1, [%%g0] %0\n\t" + "membar #Sync" + : /* no outputs */ + : "i" (ASI_DCU_CONTROL_REG), + "i" (DCU_IC) + : "g1"); + + if (flush_all) + cheetah_flush_ecache(); + else if (flush_line) + cheetah_flush_ecache_line(afar); + } + + /* Re-enable error reporting */ + __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t" + "or %%g1, %1, %%g1\n\t" + "stxa %%g1, [%%g0] %0\n\t" + "membar #Sync" + : /* no outputs */ + : "i" (ASI_ESTATE_ERROR_EN), + "i" (ESTATE_ERROR_CEEN) + : "g1"); + + /* Decide if we can continue after handling this trap and + * logging the error. + */ + recoverable = 1; + if (afsr & (CHAFSR_PERR | CHAFSR_IERR | CHAFSR_ISAP)) + recoverable = 0; + + /* Re-check AFSR/AFAR */ + (void) cheetah_recheck_errors(&local_snapshot); + + /* Log errors. */ + cheetah_log_errors(regs, &local_snapshot, afsr, afar, recoverable); + + if (!recoverable) + panic("Irrecoverable Correctable-ECC error trap.\n"); +} + +void cheetah_deferred_handler(struct pt_regs *regs, unsigned long afsr, unsigned long afar) +{ + struct cheetah_err_info local_snapshot, *p; + int recoverable, is_memory; + +#ifdef CONFIG_PCI + /* Check for the special PCI poke sequence. */ + if (pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) { + cheetah_flush_icache(); + cheetah_flush_dcache(); + + /* Re-enable I-cache/D-cache */ + __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t" + "or %%g1, %1, %%g1\n\t" + "stxa %%g1, [%%g0] %0\n\t" + "membar #Sync" + : /* no outputs */ + : "i" (ASI_DCU_CONTROL_REG), + "i" (DCU_DC | DCU_IC) + : "g1"); + + /* Re-enable error reporting */ + __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t" + "or %%g1, %1, %%g1\n\t" + "stxa %%g1, [%%g0] %0\n\t" + "membar #Sync" + : /* no outputs */ + : "i" (ASI_ESTATE_ERROR_EN), + "i" (ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN) + : "g1"); + + (void) cheetah_recheck_errors(NULL); + + pci_poke_faulted = 1; + regs->tpc += 4; + regs->tnpc = regs->tpc + 4; + return; + } +#endif + + p = cheetah_get_error_log(afsr); + if (!p) { + prom_printf("ERROR: Early deferred error afsr[%016lx] afar[%016lx]\n", + afsr, afar); + prom_printf("ERROR: CPU(%d) TPC[%016lx] TNPC[%016lx] TSTATE[%016lx]\n", + smp_processor_id(), regs->tpc, regs->tnpc, regs->tstate); + prom_halt(); + } + + /* Grab snapshot of logged error. */ + memcpy(&local_snapshot, p, sizeof(local_snapshot)); + + /* If the current trap snapshot does not match what the + * trap handler passed along into our args, big trouble. + * In such a case, mark the local copy as invalid. + * + * Else, it matches and we mark the afsr in the non-local + * copy as invalid so we may log new error traps there. + */ + if (p->afsr != afsr || p->afar != afar) + local_snapshot.afsr = CHAFSR_INVALID; + else + p->afsr = CHAFSR_INVALID; + + is_memory = cheetah_check_main_memory(afar); + + { + int flush_all, flush_line; + + flush_all = flush_line = 0; + if ((afsr & CHAFSR_EDU) != 0UL) { + if ((afsr & cheetah_afsr_errors) == CHAFSR_EDU) + flush_line = 1; + else + flush_all = 1; + } else if ((afsr & CHAFSR_BERR) != 0UL) { + if ((afsr & cheetah_afsr_errors) == CHAFSR_BERR) + flush_line = 1; + else + flush_all = 1; + } + + cheetah_flush_icache(); + cheetah_flush_dcache(); + + /* Re-enable I/D caches */ + __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t" + "or %%g1, %1, %%g1\n\t" + "stxa %%g1, [%%g0] %0\n\t" + "membar #Sync" + : /* no outputs */ + : "i" (ASI_DCU_CONTROL_REG), + "i" (DCU_IC | DCU_DC) + : "g1"); + + if (flush_all) + cheetah_flush_ecache(); + else if (flush_line) + cheetah_flush_ecache_line(afar); + } + + /* Re-enable error reporting */ + __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t" + "or %%g1, %1, %%g1\n\t" + "stxa %%g1, [%%g0] %0\n\t" + "membar #Sync" + : /* no outputs */ + : "i" (ASI_ESTATE_ERROR_EN), + "i" (ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN) + : "g1"); + + /* Decide if we can continue after handling this trap and + * logging the error. + */ + recoverable = 1; + if (afsr & (CHAFSR_PERR | CHAFSR_IERR | CHAFSR_ISAP)) + recoverable = 0; + + /* Re-check AFSR/AFAR. What we are looking for here is whether a new + * error was logged while we had error reporting traps disabled. + */ + if (cheetah_recheck_errors(&local_snapshot)) { + unsigned long new_afsr = local_snapshot.afsr; + + /* If we got a new asynchronous error, die... */ + if (new_afsr & (CHAFSR_EMU | CHAFSR_EDU | + CHAFSR_WDU | CHAFSR_CPU | + CHAFSR_IVU | CHAFSR_UE | + CHAFSR_BERR | CHAFSR_TO)) + recoverable = 0; + } + + /* Log errors. */ + cheetah_log_errors(regs, &local_snapshot, afsr, afar, recoverable); + + /* "Recoverable" here means we try to yank the page from ever + * being newly used again. This depends upon a few things: + * 1) Must be main memory, and AFAR must be valid. + * 2) If we trapped from user, OK. + * 3) Else, if we trapped from kernel we must find exception + * table entry (ie. we have to have been accessing user + * space). + * + * If AFAR is not in main memory, or we trapped from kernel + * and cannot find an exception table entry, it is unacceptable + * to try and continue. + */ + if (recoverable && is_memory) { + if ((regs->tstate & TSTATE_PRIV) == 0UL) { + /* OK, usermode access. */ + recoverable = 1; + } else { + const struct exception_table_entry *entry; + + entry = search_exception_tables(regs->tpc); + if (entry) { + /* OK, kernel access to userspace. */ + recoverable = 1; + + } else { + /* BAD, privileged state is corrupted. */ + recoverable = 0; + } + + if (recoverable) { + if (pfn_valid(afar >> PAGE_SHIFT)) + get_page(pfn_to_page(afar >> PAGE_SHIFT)); + else + recoverable = 0; + + /* Only perform fixup if we still have a + * recoverable condition. + */ + if (recoverable) { + regs->tpc = entry->fixup; + regs->tnpc = regs->tpc + 4; + } + } + } + } else { + recoverable = 0; + } + + if (!recoverable) + panic("Irrecoverable deferred error trap.\n"); +} + +/* Handle a D/I cache parity error trap. TYPE is encoded as: + * + * Bit0: 0=dcache,1=icache + * Bit1: 0=recoverable,1=unrecoverable + * + * The hardware has disabled both the I-cache and D-cache in + * the %dcr register. + */ +void cheetah_plus_parity_error(int type, struct pt_regs *regs) +{ + if (type & 0x1) + __cheetah_flush_icache(); + else + cheetah_plus_zap_dcache_parity(); + cheetah_flush_dcache(); + + /* Re-enable I-cache/D-cache */ + __asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t" + "or %%g1, %1, %%g1\n\t" + "stxa %%g1, [%%g0] %0\n\t" + "membar #Sync" + : /* no outputs */ + : "i" (ASI_DCU_CONTROL_REG), + "i" (DCU_DC | DCU_IC) + : "g1"); + + if (type & 0x2) { + printk(KERN_EMERG "CPU[%d]: Cheetah+ %c-cache parity error at TPC[%016lx]\n", + smp_processor_id(), + (type & 0x1) ? 'I' : 'D', + regs->tpc); + printk(KERN_EMERG "TPC<%pS>\n", (void *) regs->tpc); + panic("Irrecoverable Cheetah+ parity error."); + } + + printk(KERN_WARNING "CPU[%d]: Cheetah+ %c-cache parity error at TPC[%016lx]\n", + smp_processor_id(), + (type & 0x1) ? 'I' : 'D', + regs->tpc); + printk(KERN_WARNING "TPC<%pS>\n", (void *) regs->tpc); +} + +struct sun4v_error_entry { + u64 err_handle; + u64 err_stick; + + u32 err_type; +#define SUN4V_ERR_TYPE_UNDEFINED 0 +#define SUN4V_ERR_TYPE_UNCORRECTED_RES 1 +#define SUN4V_ERR_TYPE_PRECISE_NONRES 2 +#define SUN4V_ERR_TYPE_DEFERRED_NONRES 3 +#define SUN4V_ERR_TYPE_WARNING_RES 4 + + u32 err_attrs; +#define SUN4V_ERR_ATTRS_PROCESSOR 0x00000001 +#define SUN4V_ERR_ATTRS_MEMORY 0x00000002 +#define SUN4V_ERR_ATTRS_PIO 0x00000004 +#define SUN4V_ERR_ATTRS_INT_REGISTERS 0x00000008 +#define SUN4V_ERR_ATTRS_FPU_REGISTERS 0x00000010 +#define SUN4V_ERR_ATTRS_USER_MODE 0x01000000 +#define SUN4V_ERR_ATTRS_PRIV_MODE 0x02000000 +#define SUN4V_ERR_ATTRS_RES_QUEUE_FULL 0x80000000 + + u64 err_raddr; + u32 err_size; + u16 err_cpu; + u16 err_pad; +}; + +static atomic_t sun4v_resum_oflow_cnt = ATOMIC_INIT(0); +static atomic_t sun4v_nonresum_oflow_cnt = ATOMIC_INIT(0); + +static const char *sun4v_err_type_to_str(u32 type) +{ + switch (type) { + case SUN4V_ERR_TYPE_UNDEFINED: + return "undefined"; + case SUN4V_ERR_TYPE_UNCORRECTED_RES: + return "uncorrected resumable"; + case SUN4V_ERR_TYPE_PRECISE_NONRES: + return "precise nonresumable"; + case SUN4V_ERR_TYPE_DEFERRED_NONRES: + return "deferred nonresumable"; + case SUN4V_ERR_TYPE_WARNING_RES: + return "warning resumable"; + default: + return "unknown"; + }; +} + +static void sun4v_log_error(struct pt_regs *regs, struct sun4v_error_entry *ent, int cpu, const char *pfx, atomic_t *ocnt) +{ + int cnt; + + printk("%s: Reporting on cpu %d\n", pfx, cpu); + printk("%s: err_handle[%lx] err_stick[%lx] err_type[%08x:%s]\n", + pfx, + ent->err_handle, ent->err_stick, + ent->err_type, + sun4v_err_type_to_str(ent->err_type)); + printk("%s: err_attrs[%08x:%s %s %s %s %s %s %s %s]\n", + pfx, + ent->err_attrs, + ((ent->err_attrs & SUN4V_ERR_ATTRS_PROCESSOR) ? + "processor" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_MEMORY) ? + "memory" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_PIO) ? + "pio" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_INT_REGISTERS) ? + "integer-regs" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_FPU_REGISTERS) ? + "fpu-regs" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_USER_MODE) ? + "user" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_PRIV_MODE) ? + "privileged" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_RES_QUEUE_FULL) ? + "queue-full" : "")); + printk("%s: err_raddr[%016lx] err_size[%u] err_cpu[%u]\n", + pfx, + ent->err_raddr, ent->err_size, ent->err_cpu); + + show_regs(regs); + + if ((cnt = atomic_read(ocnt)) != 0) { + atomic_set(ocnt, 0); + wmb(); + printk("%s: Queue overflowed %d times.\n", + pfx, cnt); + } +} + +/* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate. + * Log the event and clear the first word of the entry. + */ +void sun4v_resum_error(struct pt_regs *regs, unsigned long offset) +{ + struct sun4v_error_entry *ent, local_copy; + struct trap_per_cpu *tb; + unsigned long paddr; + int cpu; + + cpu = get_cpu(); + + tb = &trap_block[cpu]; + paddr = tb->resum_kernel_buf_pa + offset; + ent = __va(paddr); + + memcpy(&local_copy, ent, sizeof(struct sun4v_error_entry)); + + /* We have a local copy now, so release the entry. */ + ent->err_handle = 0; + wmb(); + + put_cpu(); + + if (ent->err_type == SUN4V_ERR_TYPE_WARNING_RES) { + /* If err_type is 0x4, it's a powerdown request. Do + * not do the usual resumable error log because that + * makes it look like some abnormal error. + */ + printk(KERN_INFO "Power down request...\n"); + kill_cad_pid(SIGINT, 1); + return; + } + + sun4v_log_error(regs, &local_copy, cpu, + KERN_ERR "RESUMABLE ERROR", + &sun4v_resum_oflow_cnt); +} + +/* If we try to printk() we'll probably make matters worse, by trying + * to retake locks this cpu already holds or causing more errors. So + * just bump a counter, and we'll report these counter bumps above. + */ +void sun4v_resum_overflow(struct pt_regs *regs) +{ + atomic_inc(&sun4v_resum_oflow_cnt); +} + +/* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate. + * Log the event, clear the first word of the entry, and die. + */ +void sun4v_nonresum_error(struct pt_regs *regs, unsigned long offset) +{ + struct sun4v_error_entry *ent, local_copy; + struct trap_per_cpu *tb; + unsigned long paddr; + int cpu; + + cpu = get_cpu(); + + tb = &trap_block[cpu]; + paddr = tb->nonresum_kernel_buf_pa + offset; + ent = __va(paddr); + + memcpy(&local_copy, ent, sizeof(struct sun4v_error_entry)); + + /* We have a local copy now, so release the entry. */ + ent->err_handle = 0; + wmb(); + + put_cpu(); + +#ifdef CONFIG_PCI + /* Check for the special PCI poke sequence. */ + if (pci_poke_in_progress && pci_poke_cpu == cpu) { + pci_poke_faulted = 1; + regs->tpc += 4; + regs->tnpc = regs->tpc + 4; + return; + } +#endif + + sun4v_log_error(regs, &local_copy, cpu, + KERN_EMERG "NON-RESUMABLE ERROR", + &sun4v_nonresum_oflow_cnt); + + panic("Non-resumable error."); +} + +/* If we try to printk() we'll probably make matters worse, by trying + * to retake locks this cpu already holds or causing more errors. So + * just bump a counter, and we'll report these counter bumps above. + */ +void sun4v_nonresum_overflow(struct pt_regs *regs) +{ + /* XXX Actually even this can make not that much sense. Perhaps + * XXX we should just pull the plug and panic directly from here? + */ + atomic_inc(&sun4v_nonresum_oflow_cnt); +} + +unsigned long sun4v_err_itlb_vaddr; +unsigned long sun4v_err_itlb_ctx; +unsigned long sun4v_err_itlb_pte; +unsigned long sun4v_err_itlb_error; + +void sun4v_itlb_error_report(struct pt_regs *regs, int tl) +{ + if (tl > 1) + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + + printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n", + regs->tpc, tl); + printk(KERN_EMERG "SUN4V-ITLB: TPC<%pS>\n", (void *) regs->tpc); + printk(KERN_EMERG "SUN4V-ITLB: O7[%lx]\n", regs->u_regs[UREG_I7]); + printk(KERN_EMERG "SUN4V-ITLB: O7<%pS>\n", + (void *) regs->u_regs[UREG_I7]); + printk(KERN_EMERG "SUN4V-ITLB: vaddr[%lx] ctx[%lx] " + "pte[%lx] error[%lx]\n", + sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx, + sun4v_err_itlb_pte, sun4v_err_itlb_error); + + prom_halt(); +} + +unsigned long sun4v_err_dtlb_vaddr; +unsigned long sun4v_err_dtlb_ctx; +unsigned long sun4v_err_dtlb_pte; +unsigned long sun4v_err_dtlb_error; + +void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) +{ + if (tl > 1) + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + + printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n", + regs->tpc, tl); + printk(KERN_EMERG "SUN4V-DTLB: TPC<%pS>\n", (void *) regs->tpc); + printk(KERN_EMERG "SUN4V-DTLB: O7[%lx]\n", regs->u_regs[UREG_I7]); + printk(KERN_EMERG "SUN4V-DTLB: O7<%pS>\n", + (void *) regs->u_regs[UREG_I7]); + printk(KERN_EMERG "SUN4V-DTLB: vaddr[%lx] ctx[%lx] " + "pte[%lx] error[%lx]\n", + sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx, + sun4v_err_dtlb_pte, sun4v_err_dtlb_error); + + prom_halt(); +} + +void hypervisor_tlbop_error(unsigned long err, unsigned long op) +{ + printk(KERN_CRIT "SUN4V: TLB hv call error %lu for op %lu\n", + err, op); +} + +void hypervisor_tlbop_error_xcall(unsigned long err, unsigned long op) +{ + printk(KERN_CRIT "SUN4V: XCALL TLB hv call error %lu for op %lu\n", + err, op); +} + +void do_fpe_common(struct pt_regs *regs) +{ + if (regs->tstate & TSTATE_PRIV) { + regs->tpc = regs->tnpc; + regs->tnpc += 4; + } else { + unsigned long fsr = current_thread_info()->xfsr[0]; + siginfo_t info; + + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_addr = (void __user *)regs->tpc; + info.si_trapno = 0; + info.si_code = __SI_FAULT; + if ((fsr & 0x1c000) == (1 << 14)) { + if (fsr & 0x10) + info.si_code = FPE_FLTINV; + else if (fsr & 0x08) + info.si_code = FPE_FLTOVF; + else if (fsr & 0x04) + info.si_code = FPE_FLTUND; + else if (fsr & 0x02) + info.si_code = FPE_FLTDIV; + else if (fsr & 0x01) + info.si_code = FPE_FLTRES; + } + force_sig_info(SIGFPE, &info, current); + } +} + +void do_fpieee(struct pt_regs *regs) +{ + if (notify_die(DIE_TRAP, "fpu exception ieee", regs, + 0, 0x24, SIGFPE) == NOTIFY_STOP) + return; + + do_fpe_common(regs); +} + +extern int do_mathemu(struct pt_regs *, struct fpustate *); + +void do_fpother(struct pt_regs *regs) +{ + struct fpustate *f = FPUSTATE; + int ret = 0; + + if (notify_die(DIE_TRAP, "fpu exception other", regs, + 0, 0x25, SIGFPE) == NOTIFY_STOP) + return; + + switch ((current_thread_info()->xfsr[0] & 0x1c000)) { + case (2 << 14): /* unfinished_FPop */ + case (3 << 14): /* unimplemented_FPop */ + ret = do_mathemu(regs, f); + break; + } + if (ret) + return; + do_fpe_common(regs); +} + +void do_tof(struct pt_regs *regs) +{ + siginfo_t info; + + if (notify_die(DIE_TRAP, "tagged arithmetic overflow", regs, + 0, 0x26, SIGEMT) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) + die_if_kernel("Penguin overflow trap from kernel mode", regs); + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + info.si_signo = SIGEMT; + info.si_errno = 0; + info.si_code = EMT_TAGOVF; + info.si_addr = (void __user *)regs->tpc; + info.si_trapno = 0; + force_sig_info(SIGEMT, &info, current); +} + +void do_div0(struct pt_regs *regs) +{ + siginfo_t info; + + if (notify_die(DIE_TRAP, "integer division by zero", regs, + 0, 0x28, SIGFPE) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) + die_if_kernel("TL0: Kernel divide by zero.", regs); + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_code = FPE_INTDIV; + info.si_addr = (void __user *)regs->tpc; + info.si_trapno = 0; + force_sig_info(SIGFPE, &info, current); +} + +static void instruction_dump(unsigned int *pc) +{ + int i; + + if ((((unsigned long) pc) & 3)) + return; + + printk("Instruction DUMP:"); + for (i = -3; i < 6; i++) + printk("%c%08x%c",i?' ':'<',pc[i],i?' ':'>'); + printk("\n"); +} + +static void user_instruction_dump(unsigned int __user *pc) +{ + int i; + unsigned int buf[9]; + + if ((((unsigned long) pc) & 3)) + return; + + if (copy_from_user(buf, pc - 3, sizeof(buf))) + return; + + printk("Instruction DUMP:"); + for (i = 0; i < 9; i++) + printk("%c%08x%c",i==3?' ':'<',buf[i],i==3?' ':'>'); + printk("\n"); +} + +void show_stack(struct task_struct *tsk, unsigned long *_ksp) +{ + unsigned long fp, thread_base, ksp; + struct thread_info *tp; + int count = 0; + + ksp = (unsigned long) _ksp; + if (!tsk) + tsk = current; + tp = task_thread_info(tsk); + if (ksp == 0UL) { + if (tsk == current) + asm("mov %%fp, %0" : "=r" (ksp)); + else + ksp = tp->ksp; + } + if (tp == current_thread_info()) + flushw_all(); + + fp = ksp + STACK_BIAS; + thread_base = (unsigned long) tp; + + printk("Call Trace:\n"); + do { + struct sparc_stackf *sf; + struct pt_regs *regs; + unsigned long pc; + + if (!kstack_valid(tp, fp)) + break; + sf = (struct sparc_stackf *) fp; + regs = (struct pt_regs *) (sf + 1); + + if (kstack_is_trap_frame(tp, regs)) { + if (!(regs->tstate & TSTATE_PRIV)) + break; + pc = regs->tpc; + fp = regs->u_regs[UREG_I6] + STACK_BIAS; + } else { + pc = sf->callers_pc; + fp = (unsigned long)sf->fp + STACK_BIAS; + } + + printk(" [%016lx] %pS\n", pc, (void *) pc); + } while (++count < 16); +} + +void dump_stack(void) +{ + show_stack(current, NULL); +} + +EXPORT_SYMBOL(dump_stack); + +static inline int is_kernel_stack(struct task_struct *task, + struct reg_window *rw) +{ + unsigned long rw_addr = (unsigned long) rw; + unsigned long thread_base, thread_end; + + if (rw_addr < PAGE_OFFSET) { + if (task != &init_task) + return 0; + } + + thread_base = (unsigned long) task_stack_page(task); + thread_end = thread_base + sizeof(union thread_union); + if (rw_addr >= thread_base && + rw_addr < thread_end && + !(rw_addr & 0x7UL)) + return 1; + + return 0; +} + +static inline struct reg_window *kernel_stack_up(struct reg_window *rw) +{ + unsigned long fp = rw->ins[6]; + + if (!fp) + return NULL; + + return (struct reg_window *) (fp + STACK_BIAS); +} + +void die_if_kernel(char *str, struct pt_regs *regs) +{ + static int die_counter; + int count = 0; + + /* Amuse the user. */ + printk( +" \\|/ ____ \\|/\n" +" \"@'/ .. \\`@\"\n" +" /_| \\__/ |_\\\n" +" \\__U_/\n"); + + printk("%s(%d): %s [#%d]\n", current->comm, task_pid_nr(current), str, ++die_counter); + notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV); + __asm__ __volatile__("flushw"); + show_regs(regs); + add_taint(TAINT_DIE); + if (regs->tstate & TSTATE_PRIV) { + struct reg_window *rw = (struct reg_window *) + (regs->u_regs[UREG_FP] + STACK_BIAS); + + /* Stop the back trace when we hit userland or we + * find some badly aligned kernel stack. + */ + while (rw && + count++ < 30&& + is_kernel_stack(current, rw)) { + printk("Caller[%016lx]: %pS\n", rw->ins[7], + (void *) rw->ins[7]); + + rw = kernel_stack_up(rw); + } + instruction_dump ((unsigned int *) regs->tpc); + } else { + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + user_instruction_dump ((unsigned int __user *) regs->tpc); + } + if (regs->tstate & TSTATE_PRIV) + do_exit(SIGKILL); + do_exit(SIGSEGV); +} + +#define VIS_OPCODE_MASK ((0x3 << 30) | (0x3f << 19)) +#define VIS_OPCODE_VAL ((0x2 << 30) | (0x36 << 19)) + +extern int handle_popc(u32 insn, struct pt_regs *regs); +extern int handle_ldf_stq(u32 insn, struct pt_regs *regs); + +void do_illegal_instruction(struct pt_regs *regs) +{ + unsigned long pc = regs->tpc; + unsigned long tstate = regs->tstate; + u32 insn; + siginfo_t info; + + if (notify_die(DIE_TRAP, "illegal instruction", regs, + 0, 0x10, SIGILL) == NOTIFY_STOP) + return; + + if (tstate & TSTATE_PRIV) + die_if_kernel("Kernel illegal instruction", regs); + if (test_thread_flag(TIF_32BIT)) + pc = (u32)pc; + if (get_user(insn, (u32 __user *) pc) != -EFAULT) { + if ((insn & 0xc1ffc000) == 0x81700000) /* POPC */ { + if (handle_popc(insn, regs)) + return; + } else if ((insn & 0xc1580000) == 0xc1100000) /* LDQ/STQ */ { + if (handle_ldf_stq(insn, regs)) + return; + } else if (tlb_type == hypervisor) { + if ((insn & VIS_OPCODE_MASK) == VIS_OPCODE_VAL) { + if (!vis_emul(regs, insn)) + return; + } else { + struct fpustate *f = FPUSTATE; + + /* XXX maybe verify XFSR bits like + * XXX do_fpother() does? + */ + if (do_mathemu(regs, f)) + return; + } + } + } + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLOPC; + info.si_addr = (void __user *)pc; + info.si_trapno = 0; + force_sig_info(SIGILL, &info, current); +} + +extern void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn); + +void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr) +{ + siginfo_t info; + + if (notify_die(DIE_TRAP, "memory address unaligned", regs, + 0, 0x34, SIGSEGV) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) { + kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc)); + return; + } + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRALN; + info.si_addr = (void __user *)sfar; + info.si_trapno = 0; + force_sig_info(SIGBUS, &info, current); +} + +void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) +{ + siginfo_t info; + + if (notify_die(DIE_TRAP, "memory address unaligned", regs, + 0, 0x34, SIGSEGV) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) { + kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc)); + return; + } + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRALN; + info.si_addr = (void __user *) addr; + info.si_trapno = 0; + force_sig_info(SIGBUS, &info, current); +} + +void do_privop(struct pt_regs *regs) +{ + siginfo_t info; + + if (notify_die(DIE_TRAP, "privileged operation", regs, + 0, 0x11, SIGILL) == NOTIFY_STOP) + return; + + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_PRVOPC; + info.si_addr = (void __user *)regs->tpc; + info.si_trapno = 0; + force_sig_info(SIGILL, &info, current); +} + +void do_privact(struct pt_regs *regs) +{ + do_privop(regs); +} + +/* Trap level 1 stuff or other traps we should never see... */ +void do_cee(struct pt_regs *regs) +{ + die_if_kernel("TL0: Cache Error Exception", regs); +} + +void do_cee_tl1(struct pt_regs *regs) +{ + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("TL1: Cache Error Exception", regs); +} + +void do_dae_tl1(struct pt_regs *regs) +{ + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("TL1: Data Access Exception", regs); +} + +void do_iae_tl1(struct pt_regs *regs) +{ + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("TL1: Instruction Access Exception", regs); +} + +void do_div0_tl1(struct pt_regs *regs) +{ + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("TL1: DIV0 Exception", regs); +} + +void do_fpdis_tl1(struct pt_regs *regs) +{ + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("TL1: FPU Disabled", regs); +} + +void do_fpieee_tl1(struct pt_regs *regs) +{ + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("TL1: FPU IEEE Exception", regs); +} + +void do_fpother_tl1(struct pt_regs *regs) +{ + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("TL1: FPU Other Exception", regs); +} + +void do_ill_tl1(struct pt_regs *regs) +{ + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("TL1: Illegal Instruction Exception", regs); +} + +void do_irq_tl1(struct pt_regs *regs) +{ + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("TL1: IRQ Exception", regs); +} + +void do_lddfmna_tl1(struct pt_regs *regs) +{ + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("TL1: LDDF Exception", regs); +} + +void do_stdfmna_tl1(struct pt_regs *regs) +{ + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("TL1: STDF Exception", regs); +} + +void do_paw(struct pt_regs *regs) +{ + die_if_kernel("TL0: Phys Watchpoint Exception", regs); +} + +void do_paw_tl1(struct pt_regs *regs) +{ + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("TL1: Phys Watchpoint Exception", regs); +} + +void do_vaw(struct pt_regs *regs) +{ + die_if_kernel("TL0: Virt Watchpoint Exception", regs); +} + +void do_vaw_tl1(struct pt_regs *regs) +{ + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("TL1: Virt Watchpoint Exception", regs); +} + +void do_tof_tl1(struct pt_regs *regs) +{ + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + die_if_kernel("TL1: Tag Overflow Exception", regs); +} + +void do_getpsr(struct pt_regs *regs) +{ + regs->u_regs[UREG_I0] = tstate_to_psr(regs->tstate); + regs->tpc = regs->tnpc; + regs->tnpc += 4; + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } +} + +struct trap_per_cpu trap_block[NR_CPUS]; + +/* This can get invoked before sched_init() so play it super safe + * and use hard_smp_processor_id(). + */ +void notrace init_cur_cpu_trap(struct thread_info *t) +{ + int cpu = hard_smp_processor_id(); + struct trap_per_cpu *p = &trap_block[cpu]; + + p->thread = t; + p->pgd_paddr = 0; +} + +extern void thread_info_offsets_are_bolixed_dave(void); +extern void trap_per_cpu_offsets_are_bolixed_dave(void); +extern void tsb_config_offsets_are_bolixed_dave(void); + +/* Only invoked on boot processor. */ +void __init trap_init(void) +{ + /* Compile time sanity check. */ + if (TI_TASK != offsetof(struct thread_info, task) || + TI_FLAGS != offsetof(struct thread_info, flags) || + TI_CPU != offsetof(struct thread_info, cpu) || + TI_FPSAVED != offsetof(struct thread_info, fpsaved) || + TI_KSP != offsetof(struct thread_info, ksp) || + TI_FAULT_ADDR != offsetof(struct thread_info, fault_address) || + TI_KREGS != offsetof(struct thread_info, kregs) || + TI_UTRAPS != offsetof(struct thread_info, utraps) || + TI_EXEC_DOMAIN != offsetof(struct thread_info, exec_domain) || + TI_REG_WINDOW != offsetof(struct thread_info, reg_window) || + TI_RWIN_SPTRS != offsetof(struct thread_info, rwbuf_stkptrs) || + TI_GSR != offsetof(struct thread_info, gsr) || + TI_XFSR != offsetof(struct thread_info, xfsr) || + TI_USER_CNTD0 != offsetof(struct thread_info, user_cntd0) || + TI_USER_CNTD1 != offsetof(struct thread_info, user_cntd1) || + TI_KERN_CNTD0 != offsetof(struct thread_info, kernel_cntd0) || + TI_KERN_CNTD1 != offsetof(struct thread_info, kernel_cntd1) || + TI_PCR != offsetof(struct thread_info, pcr_reg) || + TI_PRE_COUNT != offsetof(struct thread_info, preempt_count) || + TI_NEW_CHILD != offsetof(struct thread_info, new_child) || + TI_SYS_NOERROR != offsetof(struct thread_info, syscall_noerror) || + TI_RESTART_BLOCK != offsetof(struct thread_info, restart_block) || + TI_KUNA_REGS != offsetof(struct thread_info, kern_una_regs) || + TI_KUNA_INSN != offsetof(struct thread_info, kern_una_insn) || + TI_FPREGS != offsetof(struct thread_info, fpregs) || + (TI_FPREGS & (64 - 1))) + thread_info_offsets_are_bolixed_dave(); + + if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) || + (TRAP_PER_CPU_PGD_PADDR != + offsetof(struct trap_per_cpu, pgd_paddr)) || + (TRAP_PER_CPU_CPU_MONDO_PA != + offsetof(struct trap_per_cpu, cpu_mondo_pa)) || + (TRAP_PER_CPU_DEV_MONDO_PA != + offsetof(struct trap_per_cpu, dev_mondo_pa)) || + (TRAP_PER_CPU_RESUM_MONDO_PA != + offsetof(struct trap_per_cpu, resum_mondo_pa)) || + (TRAP_PER_CPU_RESUM_KBUF_PA != + offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) || + (TRAP_PER_CPU_NONRESUM_MONDO_PA != + offsetof(struct trap_per_cpu, nonresum_mondo_pa)) || + (TRAP_PER_CPU_NONRESUM_KBUF_PA != + offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) || + (TRAP_PER_CPU_FAULT_INFO != + offsetof(struct trap_per_cpu, fault_info)) || + (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA != + offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) || + (TRAP_PER_CPU_CPU_LIST_PA != + offsetof(struct trap_per_cpu, cpu_list_pa)) || + (TRAP_PER_CPU_TSB_HUGE != + offsetof(struct trap_per_cpu, tsb_huge)) || + (TRAP_PER_CPU_TSB_HUGE_TEMP != + offsetof(struct trap_per_cpu, tsb_huge_temp)) || + (TRAP_PER_CPU_IRQ_WORKLIST_PA != + offsetof(struct trap_per_cpu, irq_worklist_pa)) || + (TRAP_PER_CPU_CPU_MONDO_QMASK != + offsetof(struct trap_per_cpu, cpu_mondo_qmask)) || + (TRAP_PER_CPU_DEV_MONDO_QMASK != + offsetof(struct trap_per_cpu, dev_mondo_qmask)) || + (TRAP_PER_CPU_RESUM_QMASK != + offsetof(struct trap_per_cpu, resum_qmask)) || + (TRAP_PER_CPU_NONRESUM_QMASK != + offsetof(struct trap_per_cpu, nonresum_qmask))) + trap_per_cpu_offsets_are_bolixed_dave(); + + if ((TSB_CONFIG_TSB != + offsetof(struct tsb_config, tsb)) || + (TSB_CONFIG_RSS_LIMIT != + offsetof(struct tsb_config, tsb_rss_limit)) || + (TSB_CONFIG_NENTRIES != + offsetof(struct tsb_config, tsb_nentries)) || + (TSB_CONFIG_REG_VAL != + offsetof(struct tsb_config, tsb_reg_val)) || + (TSB_CONFIG_MAP_VADDR != + offsetof(struct tsb_config, tsb_map_vaddr)) || + (TSB_CONFIG_MAP_PTE != + offsetof(struct tsb_config, tsb_map_pte))) + tsb_config_offsets_are_bolixed_dave(); + + /* Attach to the address space of init_task. On SMP we + * do this in smp.c:smp_callin for other cpus. + */ + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; +} diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S new file mode 100644 index 000000000000..8c91d9b29a2f --- /dev/null +++ b/arch/sparc/kernel/tsb.S @@ -0,0 +1,552 @@ +/* tsb.S: Sparc64 TSB table handling. + * + * Copyright (C) 2006 David S. Miller <davem@davemloft.net> + */ + + +#include <asm/tsb.h> +#include <asm/hypervisor.h> +#include <asm/page.h> +#include <asm/cpudata.h> +#include <asm/mmu.h> + + .text + .align 32 + + /* Invoked from TLB miss handler, we are in the + * MMU global registers and they are setup like + * this: + * + * %g1: TSB entry pointer + * %g2: available temporary + * %g3: FAULT_CODE_{D,I}TLB + * %g4: available temporary + * %g5: available temporary + * %g6: TAG TARGET + * %g7: available temporary, will be loaded by us with + * the physical address base of the linux page + * tables for the current address space + */ +tsb_miss_dtlb: + mov TLB_TAG_ACCESS, %g4 + ba,pt %xcc, tsb_miss_page_table_walk + ldxa [%g4] ASI_DMMU, %g4 + +tsb_miss_itlb: + mov TLB_TAG_ACCESS, %g4 + ba,pt %xcc, tsb_miss_page_table_walk + ldxa [%g4] ASI_IMMU, %g4 + + /* At this point we have: + * %g1 -- PAGE_SIZE TSB entry address + * %g3 -- FAULT_CODE_{D,I}TLB + * %g4 -- missing virtual address + * %g6 -- TAG TARGET (vaddr >> 22) + */ +tsb_miss_page_table_walk: + TRAP_LOAD_TRAP_BLOCK(%g7, %g5) + + /* Before committing to a full page table walk, + * check the huge page TSB. + */ +#ifdef CONFIG_HUGETLB_PAGE + +661: ldx [%g7 + TRAP_PER_CPU_TSB_HUGE], %g5 + nop + .section .sun4v_2insn_patch, "ax" + .word 661b + mov SCRATCHPAD_UTSBREG2, %g5 + ldxa [%g5] ASI_SCRATCHPAD, %g5 + .previous + + cmp %g5, -1 + be,pt %xcc, 80f + nop + + /* We need an aligned pair of registers containing 2 values + * which can be easily rematerialized. %g6 and %g7 foot the + * bill just nicely. We'll save %g6 away into %g2 for the + * huge page TSB TAG comparison. + * + * Perform a huge page TSB lookup. + */ + mov %g6, %g2 + and %g5, 0x7, %g6 + mov 512, %g7 + andn %g5, 0x7, %g5 + sllx %g7, %g6, %g7 + srlx %g4, HPAGE_SHIFT, %g6 + sub %g7, 1, %g7 + and %g6, %g7, %g6 + sllx %g6, 4, %g6 + add %g5, %g6, %g5 + + TSB_LOAD_QUAD(%g5, %g6) + cmp %g6, %g2 + be,a,pt %xcc, tsb_tlb_reload + mov %g7, %g5 + + /* No match, remember the huge page TSB entry address, + * and restore %g6 and %g7. + */ + TRAP_LOAD_TRAP_BLOCK(%g7, %g6) + srlx %g4, 22, %g6 +80: stx %g5, [%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP] + +#endif + + ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7 + + /* At this point we have: + * %g1 -- TSB entry address + * %g3 -- FAULT_CODE_{D,I}TLB + * %g4 -- missing virtual address + * %g6 -- TAG TARGET (vaddr >> 22) + * %g7 -- page table physical address + * + * We know that both the base PAGE_SIZE TSB and the HPAGE_SIZE + * TSB both lack a matching entry. + */ +tsb_miss_page_table_walk_sun4v_fastpath: + USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) + + /* Load and check PTE. */ + ldxa [%g5] ASI_PHYS_USE_EC, %g5 + brgez,pn %g5, tsb_do_fault + nop + +#ifdef CONFIG_HUGETLB_PAGE +661: sethi %uhi(_PAGE_SZALL_4U), %g7 + sllx %g7, 32, %g7 + .section .sun4v_2insn_patch, "ax" + .word 661b + mov _PAGE_SZALL_4V, %g7 + nop + .previous + + and %g5, %g7, %g2 + +661: sethi %uhi(_PAGE_SZHUGE_4U), %g7 + sllx %g7, 32, %g7 + .section .sun4v_2insn_patch, "ax" + .word 661b + mov _PAGE_SZHUGE_4V, %g7 + nop + .previous + + cmp %g2, %g7 + bne,pt %xcc, 60f + nop + + /* It is a huge page, use huge page TSB entry address we + * calculated above. + */ + TRAP_LOAD_TRAP_BLOCK(%g7, %g2) + ldx [%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP], %g2 + cmp %g2, -1 + movne %xcc, %g2, %g1 +60: +#endif + + /* At this point we have: + * %g1 -- TSB entry address + * %g3 -- FAULT_CODE_{D,I}TLB + * %g5 -- valid PTE + * %g6 -- TAG TARGET (vaddr >> 22) + */ +tsb_reload: + TSB_LOCK_TAG(%g1, %g2, %g7) + TSB_WRITE(%g1, %g5, %g6) + + /* Finally, load TLB and return from trap. */ +tsb_tlb_reload: + cmp %g3, FAULT_CODE_DTLB + bne,pn %xcc, tsb_itlb_load + nop + +tsb_dtlb_load: + +661: stxa %g5, [%g0] ASI_DTLB_DATA_IN + retry + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + + /* For sun4v the ASI_DTLB_DATA_IN store and the retry + * instruction get nop'd out and we get here to branch + * to the sun4v tlb load code. The registers are setup + * as follows: + * + * %g4: vaddr + * %g5: PTE + * %g6: TAG + * + * The sun4v TLB load wants the PTE in %g3 so we fix that + * up here. + */ + ba,pt %xcc, sun4v_dtlb_load + mov %g5, %g3 + +tsb_itlb_load: + /* Executable bit must be set. */ +661: andcc %g5, _PAGE_EXEC_4U, %g0 + .section .sun4v_1insn_patch, "ax" + .word 661b + andcc %g5, _PAGE_EXEC_4V, %g0 + .previous + + be,pn %xcc, tsb_do_fault + nop + +661: stxa %g5, [%g0] ASI_ITLB_DATA_IN + retry + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + + /* For sun4v the ASI_ITLB_DATA_IN store and the retry + * instruction get nop'd out and we get here to branch + * to the sun4v tlb load code. The registers are setup + * as follows: + * + * %g4: vaddr + * %g5: PTE + * %g6: TAG + * + * The sun4v TLB load wants the PTE in %g3 so we fix that + * up here. + */ + ba,pt %xcc, sun4v_itlb_load + mov %g5, %g3 + + /* No valid entry in the page tables, do full fault + * processing. + */ + + .globl tsb_do_fault +tsb_do_fault: + cmp %g3, FAULT_CODE_DTLB + +661: rdpr %pstate, %g5 + wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate + .section .sun4v_2insn_patch, "ax" + .word 661b + SET_GL(1) + ldxa [%g0] ASI_SCRATCHPAD, %g4 + .previous + + bne,pn %xcc, tsb_do_itlb_fault + nop + +tsb_do_dtlb_fault: + rdpr %tl, %g3 + cmp %g3, 1 + +661: mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_DMMU, %g5 + .section .sun4v_2insn_patch, "ax" + .word 661b + ldx [%g4 + HV_FAULT_D_ADDR_OFFSET], %g5 + nop + .previous + + be,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_DTLB, %g4 + ba,pt %xcc, winfix_trampoline + nop + +tsb_do_itlb_fault: + rdpr %tpc, %g5 + ba,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_ITLB, %g4 + + .globl sparc64_realfault_common +sparc64_realfault_common: + /* fault code in %g4, fault address in %g5, etrap will + * preserve these two values in %l4 and %l5 respectively + */ + ba,pt %xcc, etrap ! Save trap state +1: rd %pc, %g7 ! ... + stb %l4, [%g6 + TI_FAULT_CODE] ! Save fault code + stx %l5, [%g6 + TI_FAULT_ADDR] ! Save fault address + call do_sparc64_fault ! Call fault handler + add %sp, PTREGS_OFF, %o0 ! Compute pt_regs arg + ba,pt %xcc, rtrap ! Restore cpu state + nop ! Delay slot (fill me) + +winfix_trampoline: + rdpr %tpc, %g3 ! Prepare winfixup TNPC + or %g3, 0x7c, %g3 ! Compute branch offset + wrpr %g3, %tnpc ! Write it into TNPC + done ! Trap return + + /* Insert an entry into the TSB. + * + * %o0: TSB entry pointer (virt or phys address) + * %o1: tag + * %o2: pte + */ + .align 32 + .globl __tsb_insert +__tsb_insert: + rdpr %pstate, %o5 + wrpr %o5, PSTATE_IE, %pstate + TSB_LOCK_TAG(%o0, %g2, %g3) + TSB_WRITE(%o0, %o2, %o1) + wrpr %o5, %pstate + retl + nop + .size __tsb_insert, .-__tsb_insert + + /* Flush the given TSB entry if it has the matching + * tag. + * + * %o0: TSB entry pointer (virt or phys address) + * %o1: tag + */ + .align 32 + .globl tsb_flush + .type tsb_flush,#function +tsb_flush: + sethi %hi(TSB_TAG_LOCK_HIGH), %g2 +1: TSB_LOAD_TAG(%o0, %g1) + srlx %g1, 32, %o3 + andcc %o3, %g2, %g0 + bne,pn %icc, 1b + nop + cmp %g1, %o1 + mov 1, %o3 + bne,pt %xcc, 2f + sllx %o3, TSB_TAG_INVALID_BIT, %o3 + TSB_CAS_TAG(%o0, %g1, %o3) + cmp %g1, %o3 + bne,pn %xcc, 1b + nop +2: retl + nop + .size tsb_flush, .-tsb_flush + + /* Reload MMU related context switch state at + * schedule() time. + * + * %o0: page table physical address + * %o1: TSB base config pointer + * %o2: TSB huge config pointer, or NULL if none + * %o3: Hypervisor TSB descriptor physical address + * + * We have to run this whole thing with interrupts + * disabled so that the current cpu doesn't change + * due to preemption. + */ + .align 32 + .globl __tsb_context_switch + .type __tsb_context_switch,#function +__tsb_context_switch: + rdpr %pstate, %g1 + wrpr %g1, PSTATE_IE, %pstate + + TRAP_LOAD_TRAP_BLOCK(%g2, %g3) + + stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR] + + ldx [%o1 + TSB_CONFIG_REG_VAL], %o0 + brz,pt %o2, 1f + mov -1, %g3 + + ldx [%o2 + TSB_CONFIG_REG_VAL], %g3 + +1: stx %g3, [%g2 + TRAP_PER_CPU_TSB_HUGE] + + sethi %hi(tlb_type), %g2 + lduw [%g2 + %lo(tlb_type)], %g2 + cmp %g2, 3 + bne,pt %icc, 50f + nop + + /* Hypervisor TSB switch. */ + mov SCRATCHPAD_UTSBREG1, %o5 + stxa %o0, [%o5] ASI_SCRATCHPAD + mov SCRATCHPAD_UTSBREG2, %o5 + stxa %g3, [%o5] ASI_SCRATCHPAD + + mov 2, %o0 + cmp %g3, -1 + move %xcc, 1, %o0 + + mov HV_FAST_MMU_TSB_CTXNON0, %o5 + mov %o3, %o1 + ta HV_FAST_TRAP + + /* Finish up. */ + ba,pt %xcc, 9f + nop + + /* SUN4U TSB switch. */ +50: mov TSB_REG, %o5 + stxa %o0, [%o5] ASI_DMMU + membar #Sync + stxa %o0, [%o5] ASI_IMMU + membar #Sync + +2: ldx [%o1 + TSB_CONFIG_MAP_VADDR], %o4 + brz %o4, 9f + ldx [%o1 + TSB_CONFIG_MAP_PTE], %o5 + + sethi %hi(sparc64_highest_unlocked_tlb_ent), %g2 + mov TLB_TAG_ACCESS, %g3 + lduw [%g2 + %lo(sparc64_highest_unlocked_tlb_ent)], %g2 + stxa %o4, [%g3] ASI_DMMU + membar #Sync + sllx %g2, 3, %g2 + stxa %o5, [%g2] ASI_DTLB_DATA_ACCESS + membar #Sync + + brz,pt %o2, 9f + nop + + ldx [%o2 + TSB_CONFIG_MAP_VADDR], %o4 + ldx [%o2 + TSB_CONFIG_MAP_PTE], %o5 + mov TLB_TAG_ACCESS, %g3 + stxa %o4, [%g3] ASI_DMMU + membar #Sync + sub %g2, (1 << 3), %g2 + stxa %o5, [%g2] ASI_DTLB_DATA_ACCESS + membar #Sync + +9: + wrpr %g1, %pstate + + retl + nop + .size __tsb_context_switch, .-__tsb_context_switch + +#define TSB_PASS_BITS ((1 << TSB_TAG_LOCK_BIT) | \ + (1 << TSB_TAG_INVALID_BIT)) + + .align 32 + .globl copy_tsb + .type copy_tsb,#function +copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size + * %o2=new_tsb_base, %o3=new_tsb_size + */ + sethi %uhi(TSB_PASS_BITS), %g7 + srlx %o3, 4, %o3 + add %o0, %o1, %g1 /* end of old tsb */ + sllx %g7, 32, %g7 + sub %o3, 1, %o3 /* %o3 == new tsb hash mask */ + +661: prefetcha [%o0] ASI_N, #one_read + .section .tsb_phys_patch, "ax" + .word 661b + prefetcha [%o0] ASI_PHYS_USE_EC, #one_read + .previous + +90: andcc %o0, (64 - 1), %g0 + bne 1f + add %o0, 64, %o5 + +661: prefetcha [%o5] ASI_N, #one_read + .section .tsb_phys_patch, "ax" + .word 661b + prefetcha [%o5] ASI_PHYS_USE_EC, #one_read + .previous + +1: TSB_LOAD_QUAD(%o0, %g2) /* %g2/%g3 == TSB entry */ + andcc %g2, %g7, %g0 /* LOCK or INVALID set? */ + bne,pn %xcc, 80f /* Skip it */ + sllx %g2, 22, %o4 /* TAG --> VADDR */ + + /* This can definitely be computed faster... */ + srlx %o0, 4, %o5 /* Build index */ + and %o5, 511, %o5 /* Mask index */ + sllx %o5, PAGE_SHIFT, %o5 /* Put into vaddr position */ + or %o4, %o5, %o4 /* Full VADDR. */ + srlx %o4, PAGE_SHIFT, %o4 /* Shift down to create index */ + and %o4, %o3, %o4 /* Mask with new_tsb_nents-1 */ + sllx %o4, 4, %o4 /* Shift back up into tsb ent offset */ + TSB_STORE(%o2 + %o4, %g2) /* Store TAG */ + add %o4, 0x8, %o4 /* Advance to TTE */ + TSB_STORE(%o2 + %o4, %g3) /* Store TTE */ + +80: add %o0, 16, %o0 + cmp %o0, %g1 + bne,pt %xcc, 90b + nop + + retl + nop + .size copy_tsb, .-copy_tsb + + /* Set the invalid bit in all TSB entries. */ + .align 32 + .globl tsb_init + .type tsb_init,#function +tsb_init: /* %o0 = TSB vaddr, %o1 = size in bytes */ + prefetch [%o0 + 0x000], #n_writes + mov 1, %g1 + prefetch [%o0 + 0x040], #n_writes + sllx %g1, TSB_TAG_INVALID_BIT, %g1 + prefetch [%o0 + 0x080], #n_writes +1: prefetch [%o0 + 0x0c0], #n_writes + stx %g1, [%o0 + 0x00] + stx %g1, [%o0 + 0x10] + stx %g1, [%o0 + 0x20] + stx %g1, [%o0 + 0x30] + prefetch [%o0 + 0x100], #n_writes + stx %g1, [%o0 + 0x40] + stx %g1, [%o0 + 0x50] + stx %g1, [%o0 + 0x60] + stx %g1, [%o0 + 0x70] + prefetch [%o0 + 0x140], #n_writes + stx %g1, [%o0 + 0x80] + stx %g1, [%o0 + 0x90] + stx %g1, [%o0 + 0xa0] + stx %g1, [%o0 + 0xb0] + prefetch [%o0 + 0x180], #n_writes + stx %g1, [%o0 + 0xc0] + stx %g1, [%o0 + 0xd0] + stx %g1, [%o0 + 0xe0] + stx %g1, [%o0 + 0xf0] + subcc %o1, 0x100, %o1 + bne,pt %xcc, 1b + add %o0, 0x100, %o0 + retl + nop + nop + nop + .size tsb_init, .-tsb_init + + .globl NGtsb_init + .type NGtsb_init,#function +NGtsb_init: + rd %asi, %g2 + mov 1, %g1 + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + sllx %g1, TSB_TAG_INVALID_BIT, %g1 +1: stxa %g1, [%o0 + 0x00] %asi + stxa %g1, [%o0 + 0x10] %asi + stxa %g1, [%o0 + 0x20] %asi + stxa %g1, [%o0 + 0x30] %asi + stxa %g1, [%o0 + 0x40] %asi + stxa %g1, [%o0 + 0x50] %asi + stxa %g1, [%o0 + 0x60] %asi + stxa %g1, [%o0 + 0x70] %asi + stxa %g1, [%o0 + 0x80] %asi + stxa %g1, [%o0 + 0x90] %asi + stxa %g1, [%o0 + 0xa0] %asi + stxa %g1, [%o0 + 0xb0] %asi + stxa %g1, [%o0 + 0xc0] %asi + stxa %g1, [%o0 + 0xd0] %asi + stxa %g1, [%o0 + 0xe0] %asi + stxa %g1, [%o0 + 0xf0] %asi + subcc %o1, 0x100, %o1 + bne,pt %xcc, 1b + add %o0, 0x100, %o0 + membar #Sync + retl + wr %g2, 0x0, %asi + .size NGtsb_init, .-NGtsb_init diff --git a/arch/sparc/kernel/ttable.S b/arch/sparc/kernel/ttable.S new file mode 100644 index 000000000000..ea925503b42e --- /dev/null +++ b/arch/sparc/kernel/ttable.S @@ -0,0 +1,266 @@ +/* ttable.S: Sparc V9 Trap Table(s) with SpitFire/Cheetah/SUN4V extensions. + * + * Copyright (C) 1996, 2001, 2006 David S. Miller (davem@davemloft.net) + */ + + + .globl sparc64_ttable_tl0, sparc64_ttable_tl1 + .globl tl0_icpe, tl1_icpe + .globl tl0_dcpe, tl1_dcpe + .globl tl0_fecc, tl1_fecc + .globl tl0_cee, tl1_cee + .globl tl0_iae, tl1_iae + .globl tl0_dae, tl1_dae + +sparc64_ttable_tl0: +tl0_resv000: BOOT_KERNEL BTRAP(0x1) BTRAP(0x2) BTRAP(0x3) +tl0_resv004: BTRAP(0x4) BTRAP(0x5) BTRAP(0x6) BTRAP(0x7) +tl0_iax: membar #Sync + TRAP_NOSAVE_7INSNS(__spitfire_insn_access_exception) +tl0_itsb_4v: SUN4V_ITSB_MISS +tl0_iae: membar #Sync + TRAP_NOSAVE_7INSNS(__spitfire_access_error) +tl0_resv00b: BTRAP(0xb) BTRAP(0xc) BTRAP(0xd) BTRAP(0xe) BTRAP(0xf) +tl0_ill: membar #Sync + TRAP_7INSNS(do_illegal_instruction) +tl0_privop: TRAP(do_privop) +tl0_resv012: BTRAP(0x12) BTRAP(0x13) BTRAP(0x14) BTRAP(0x15) BTRAP(0x16) BTRAP(0x17) +tl0_resv018: BTRAP(0x18) BTRAP(0x19) BTRAP(0x1a) BTRAP(0x1b) BTRAP(0x1c) BTRAP(0x1d) +tl0_resv01e: BTRAP(0x1e) BTRAP(0x1f) +tl0_fpdis: TRAP_NOSAVE(do_fpdis) +tl0_fpieee: TRAP_SAVEFPU(do_fpieee) +tl0_fpother: TRAP_NOSAVE(do_fpother_check_fitos) +tl0_tof: TRAP(do_tof) +tl0_cwin: CLEAN_WINDOW +tl0_div0: TRAP(do_div0) +tl0_resv029: BTRAP(0x29) BTRAP(0x2a) BTRAP(0x2b) BTRAP(0x2c) BTRAP(0x2d) BTRAP(0x2e) +tl0_resv02f: BTRAP(0x2f) +tl0_dax: TRAP_NOSAVE(__spitfire_data_access_exception) +tl0_dtsb_4v: SUN4V_DTSB_MISS +tl0_dae: membar #Sync + TRAP_NOSAVE_7INSNS(__spitfire_access_error) +tl0_resv033: BTRAP(0x33) +tl0_mna: TRAP_NOSAVE(do_mna) +tl0_lddfmna: TRAP_NOSAVE(do_lddfmna) +tl0_stdfmna: TRAP_NOSAVE(do_stdfmna) +tl0_privact: TRAP_NOSAVE(__do_privact) +tl0_resv038: BTRAP(0x38) BTRAP(0x39) BTRAP(0x3a) BTRAP(0x3b) BTRAP(0x3c) BTRAP(0x3d) +tl0_resv03e: BTRAP(0x3e) BTRAP(0x3f) BTRAP(0x40) +#ifdef CONFIG_SMP +tl0_irq1: TRAP_IRQ(smp_call_function_client, 1) +tl0_irq2: TRAP_IRQ(smp_receive_signal_client, 2) +tl0_irq3: TRAP_IRQ(smp_penguin_jailcell, 3) +tl0_irq4: TRAP_IRQ(smp_new_mmu_context_version_client, 4) +#else +tl0_irq1: BTRAP(0x41) +tl0_irq2: BTRAP(0x42) +tl0_irq3: BTRAP(0x43) +tl0_irq4: BTRAP(0x44) +#endif +tl0_irq5: TRAP_IRQ(handler_irq, 5) +#ifdef CONFIG_SMP +tl0_irq6: TRAP_IRQ(smp_call_function_single_client, 6) +#else +tl0_irq6: BTRAP(0x46) +#endif +tl0_irq7: BTRAP(0x47) BTRAP(0x48) BTRAP(0x49) +tl0_irq10: BTRAP(0x4a) BTRAP(0x4b) BTRAP(0x4c) BTRAP(0x4d) +tl0_irq14: TRAP_IRQ(timer_interrupt, 14) +tl0_irq15: TRAP_NMI_IRQ(perfctr_irq, 15) +tl0_resv050: BTRAP(0x50) BTRAP(0x51) BTRAP(0x52) BTRAP(0x53) BTRAP(0x54) BTRAP(0x55) +tl0_resv056: BTRAP(0x56) BTRAP(0x57) BTRAP(0x58) BTRAP(0x59) BTRAP(0x5a) BTRAP(0x5b) +tl0_resv05c: BTRAP(0x5c) BTRAP(0x5d) BTRAP(0x5e) BTRAP(0x5f) +tl0_ivec: TRAP_IVEC +tl0_paw: TRAP(do_paw) +tl0_vaw: TRAP(do_vaw) +tl0_cee: membar #Sync + TRAP_NOSAVE_7INSNS(__spitfire_cee_trap) +tl0_iamiss: +#include "itlb_miss.S" +tl0_damiss: +#include "dtlb_miss.S" +tl0_daprot: +#include "dtlb_prot.S" +tl0_fecc: BTRAP(0x70) /* Fast-ECC on Cheetah */ +tl0_dcpe: BTRAP(0x71) /* D-cache Parity Error on Cheetah+ */ +tl0_icpe: BTRAP(0x72) /* I-cache Parity Error on Cheetah+ */ +tl0_resv073: BTRAP(0x73) BTRAP(0x74) BTRAP(0x75) +tl0_resv076: BTRAP(0x76) BTRAP(0x77) BTRAP(0x78) BTRAP(0x79) BTRAP(0x7a) BTRAP(0x7b) +tl0_cpu_mondo: TRAP_NOSAVE(sun4v_cpu_mondo) +tl0_dev_mondo: TRAP_NOSAVE(sun4v_dev_mondo) +tl0_res_mondo: TRAP_NOSAVE(sun4v_res_mondo) +tl0_nres_mondo: TRAP_NOSAVE(sun4v_nonres_mondo) +tl0_s0n: SPILL_0_NORMAL +tl0_s1n: SPILL_1_NORMAL +tl0_s2n: SPILL_2_NORMAL +tl0_s3n: SPILL_0_NORMAL_ETRAP +tl0_s4n: SPILL_1_GENERIC_ETRAP +tl0_s5n: SPILL_1_GENERIC_ETRAP_FIXUP +tl0_s6n: SPILL_2_GENERIC_ETRAP +tl0_s7n: SPILL_2_GENERIC_ETRAP_FIXUP +tl0_s0o: SPILL_0_OTHER +tl0_s1o: SPILL_1_OTHER +tl0_s2o: SPILL_2_OTHER +tl0_s3o: SPILL_3_OTHER +tl0_s4o: SPILL_4_OTHER +tl0_s5o: SPILL_5_OTHER +tl0_s6o: SPILL_6_OTHER +tl0_s7o: SPILL_7_OTHER +tl0_f0n: FILL_0_NORMAL +tl0_f1n: FILL_1_NORMAL +tl0_f2n: FILL_2_NORMAL +tl0_f3n: FILL_3_NORMAL +tl0_f4n: FILL_4_NORMAL +tl0_f5n: FILL_0_NORMAL_RTRAP +tl0_f6n: FILL_1_GENERIC_RTRAP +tl0_f7n: FILL_2_GENERIC_RTRAP +tl0_f0o: FILL_0_OTHER +tl0_f1o: FILL_1_OTHER +tl0_f2o: FILL_2_OTHER +tl0_f3o: FILL_3_OTHER +tl0_f4o: FILL_4_OTHER +tl0_f5o: FILL_5_OTHER +tl0_f6o: FILL_6_OTHER +tl0_f7o: FILL_7_OTHER +tl0_resv100: BTRAP(0x100) +tl0_bkpt: BREAKPOINT_TRAP +tl0_divz: TRAP(do_div0) +tl0_flushw: FLUSH_WINDOW_TRAP +tl0_resv104: BTRAP(0x104) BTRAP(0x105) BTRAP(0x106) BTRAP(0x107) BTRAP(0x108) +tl0_resv109: BTRAP(0x109) BTRAP(0x10a) BTRAP(0x10b) BTRAP(0x10c) BTRAP(0x10d) +tl0_resv10e: BTRAP(0x10e) BTRAP(0x10f) +tl0_linux32: LINUX_32BIT_SYSCALL_TRAP +tl0_oldlinux64: LINUX_64BIT_SYSCALL_TRAP +tl0_resv112: TRAP_UTRAP(UT_TRAP_INSTRUCTION_18,0x112) TRAP_UTRAP(UT_TRAP_INSTRUCTION_19,0x113) +tl0_resv114: TRAP_UTRAP(UT_TRAP_INSTRUCTION_20,0x114) TRAP_UTRAP(UT_TRAP_INSTRUCTION_21,0x115) +tl0_resv116: TRAP_UTRAP(UT_TRAP_INSTRUCTION_22,0x116) TRAP_UTRAP(UT_TRAP_INSTRUCTION_23,0x117) +tl0_resv118: TRAP_UTRAP(UT_TRAP_INSTRUCTION_24,0x118) TRAP_UTRAP(UT_TRAP_INSTRUCTION_25,0x119) +tl0_resv11a: TRAP_UTRAP(UT_TRAP_INSTRUCTION_26,0x11a) TRAP_UTRAP(UT_TRAP_INSTRUCTION_27,0x11b) +tl0_resv11c: TRAP_UTRAP(UT_TRAP_INSTRUCTION_28,0x11c) TRAP_UTRAP(UT_TRAP_INSTRUCTION_29,0x11d) +tl0_resv11e: TRAP_UTRAP(UT_TRAP_INSTRUCTION_30,0x11e) TRAP_UTRAP(UT_TRAP_INSTRUCTION_31,0x11f) +tl0_getcc: GETCC_TRAP +tl0_setcc: SETCC_TRAP +tl0_getpsr: TRAP(do_getpsr) +tl0_resv123: BTRAP(0x123) BTRAP(0x124) BTRAP(0x125) BTRAP(0x126) BTRAP(0x127) +tl0_resv128: BTRAP(0x128) BTRAP(0x129) BTRAP(0x12a) BTRAP(0x12b) BTRAP(0x12c) +tl0_resv12d: BTRAP(0x12d) BTRAP(0x12e) BTRAP(0x12f) BTRAP(0x130) BTRAP(0x131) +tl0_resv132: BTRAP(0x132) BTRAP(0x133) BTRAP(0x134) BTRAP(0x135) BTRAP(0x136) +tl0_resv137: BTRAP(0x137) BTRAP(0x138) BTRAP(0x139) BTRAP(0x13a) BTRAP(0x13b) +tl0_resv13c: BTRAP(0x13c) BTRAP(0x13d) BTRAP(0x13e) BTRAP(0x13f) BTRAP(0x140) +tl0_resv141: BTRAP(0x141) BTRAP(0x142) BTRAP(0x143) BTRAP(0x144) BTRAP(0x145) +tl0_resv146: BTRAP(0x146) BTRAP(0x147) BTRAP(0x148) BTRAP(0x149) BTRAP(0x14a) +tl0_resv14b: BTRAP(0x14b) BTRAP(0x14c) BTRAP(0x14d) BTRAP(0x14e) BTRAP(0x14f) +tl0_resv150: BTRAP(0x150) BTRAP(0x151) BTRAP(0x152) BTRAP(0x153) BTRAP(0x154) +tl0_resv155: BTRAP(0x155) BTRAP(0x156) BTRAP(0x157) BTRAP(0x158) BTRAP(0x159) +tl0_resv15a: BTRAP(0x15a) BTRAP(0x15b) BTRAP(0x15c) BTRAP(0x15d) BTRAP(0x15e) +tl0_resv15f: BTRAP(0x15f) BTRAP(0x160) BTRAP(0x161) BTRAP(0x162) BTRAP(0x163) +tl0_resv164: BTRAP(0x164) BTRAP(0x165) BTRAP(0x166) BTRAP(0x167) BTRAP(0x168) +tl0_resv169: BTRAP(0x169) BTRAP(0x16a) BTRAP(0x16b) BTRAP(0x16c) +tl0_linux64: LINUX_64BIT_SYSCALL_TRAP +tl0_gsctx: TRAP(sparc64_get_context) TRAP(sparc64_set_context) +tl0_resv170: KPROBES_TRAP(0x170) KPROBES_TRAP(0x171) KGDB_TRAP(0x172) +tl0_resv173: BTRAP(0x173) BTRAP(0x174) BTRAP(0x175) BTRAP(0x176) BTRAP(0x177) +tl0_resv178: BTRAP(0x178) BTRAP(0x179) BTRAP(0x17a) BTRAP(0x17b) BTRAP(0x17c) +tl0_resv17d: BTRAP(0x17d) BTRAP(0x17e) BTRAP(0x17f) +#define BTRAPS(x) BTRAP(x) BTRAP(x+1) BTRAP(x+2) BTRAP(x+3) BTRAP(x+4) BTRAP(x+5) BTRAP(x+6) BTRAP(x+7) +tl0_resv180: BTRAPS(0x180) BTRAPS(0x188) +tl0_resv190: BTRAPS(0x190) BTRAPS(0x198) +tl0_resv1a0: BTRAPS(0x1a0) BTRAPS(0x1a8) +tl0_resv1b0: BTRAPS(0x1b0) BTRAPS(0x1b8) +tl0_resv1c0: BTRAPS(0x1c0) BTRAPS(0x1c8) +tl0_resv1d0: BTRAPS(0x1d0) BTRAPS(0x1d8) +tl0_resv1e0: BTRAPS(0x1e0) BTRAPS(0x1e8) +tl0_resv1f0: BTRAPS(0x1f0) BTRAPS(0x1f8) + +sparc64_ttable_tl1: +tl1_resv000: BOOT_KERNEL BTRAPTL1(0x1) BTRAPTL1(0x2) BTRAPTL1(0x3) +tl1_resv004: BTRAPTL1(0x4) BTRAPTL1(0x5) BTRAPTL1(0x6) BTRAPTL1(0x7) +tl1_iax: TRAP_NOSAVE(__spitfire_insn_access_exception_tl1) +tl1_itsb_4v: SUN4V_ITSB_MISS +tl1_iae: membar #Sync + TRAP_NOSAVE_7INSNS(__spitfire_access_error) +tl1_resv00b: BTRAPTL1(0xb) BTRAPTL1(0xc) BTRAPTL1(0xd) BTRAPTL1(0xe) BTRAPTL1(0xf) +tl1_ill: TRAPTL1(do_ill_tl1) +tl1_privop: BTRAPTL1(0x11) +tl1_resv012: BTRAPTL1(0x12) BTRAPTL1(0x13) BTRAPTL1(0x14) BTRAPTL1(0x15) +tl1_resv016: BTRAPTL1(0x16) BTRAPTL1(0x17) BTRAPTL1(0x18) BTRAPTL1(0x19) +tl1_resv01a: BTRAPTL1(0x1a) BTRAPTL1(0x1b) BTRAPTL1(0x1c) BTRAPTL1(0x1d) +tl1_resv01e: BTRAPTL1(0x1e) BTRAPTL1(0x1f) +tl1_fpdis: TRAP_NOSAVE(do_fpdis) +tl1_fpieee: TRAPTL1(do_fpieee_tl1) +tl1_fpother: TRAPTL1(do_fpother_tl1) +tl1_tof: TRAPTL1(do_tof_tl1) +tl1_cwin: CLEAN_WINDOW +tl1_div0: TRAPTL1(do_div0_tl1) +tl1_resv029: BTRAPTL1(0x29) BTRAPTL1(0x2a) BTRAPTL1(0x2b) BTRAPTL1(0x2c) +tl1_resv02d: BTRAPTL1(0x2d) BTRAPTL1(0x2e) BTRAPTL1(0x2f) +tl1_dax: TRAP_NOSAVE(__spitfire_data_access_exception_tl1) +tl1_dtsb_4v: SUN4V_DTSB_MISS +tl1_dae: membar #Sync + TRAP_NOSAVE_7INSNS(__spitfire_access_error) +tl1_resv033: BTRAPTL1(0x33) +tl1_mna: TRAP_NOSAVE(do_mna) +tl1_lddfmna: TRAPTL1(do_lddfmna_tl1) +tl1_stdfmna: TRAPTL1(do_stdfmna_tl1) +tl1_privact: BTRAPTL1(0x37) +tl1_resv038: BTRAPTL1(0x38) BTRAPTL1(0x39) BTRAPTL1(0x3a) BTRAPTL1(0x3b) +tl1_resv03c: BTRAPTL1(0x3c) BTRAPTL1(0x3d) BTRAPTL1(0x3e) BTRAPTL1(0x3f) +tl1_resv040: BTRAPTL1(0x40) +tl1_irq1: TRAP_IRQ(do_irq_tl1, 1) TRAP_IRQ(do_irq_tl1, 2) TRAP_IRQ(do_irq_tl1, 3) +tl1_irq4: TRAP_IRQ(do_irq_tl1, 4) TRAP_IRQ(do_irq_tl1, 5) TRAP_IRQ(do_irq_tl1, 6) +tl1_irq7: TRAP_IRQ(do_irq_tl1, 7) TRAP_IRQ(do_irq_tl1, 8) TRAP_IRQ(do_irq_tl1, 9) +tl1_irq10: TRAP_IRQ(do_irq_tl1, 10) TRAP_IRQ(do_irq_tl1, 11) +tl1_irq12: TRAP_IRQ(do_irq_tl1, 12) TRAP_IRQ(do_irq_tl1, 13) +tl1_irq14: TRAP_IRQ(do_irq_tl1, 14) TRAP_IRQ(do_irq_tl1, 15) +tl1_resv050: BTRAPTL1(0x50) BTRAPTL1(0x51) BTRAPTL1(0x52) BTRAPTL1(0x53) +tl1_resv054: BTRAPTL1(0x54) BTRAPTL1(0x55) BTRAPTL1(0x56) BTRAPTL1(0x57) +tl1_resv058: BTRAPTL1(0x58) BTRAPTL1(0x59) BTRAPTL1(0x5a) BTRAPTL1(0x5b) +tl1_resv05c: BTRAPTL1(0x5c) BTRAPTL1(0x5d) BTRAPTL1(0x5e) BTRAPTL1(0x5f) +tl1_ivec: TRAP_IVEC +tl1_paw: TRAPTL1(do_paw_tl1) +tl1_vaw: TRAPTL1(do_vaw_tl1) +tl1_cee: BTRAPTL1(0x63) +tl1_iamiss: BTRAPTL1(0x64) BTRAPTL1(0x65) BTRAPTL1(0x66) BTRAPTL1(0x67) +tl1_damiss: +#include "dtlb_miss.S" +tl1_daprot: +#include "dtlb_prot.S" +tl1_fecc: BTRAPTL1(0x70) /* Fast-ECC on Cheetah */ +tl1_dcpe: BTRAPTL1(0x71) /* D-cache Parity Error on Cheetah+ */ +tl1_icpe: BTRAPTL1(0x72) /* I-cache Parity Error on Cheetah+ */ +tl1_resv073: BTRAPTL1(0x73) +tl1_resv074: BTRAPTL1(0x74) BTRAPTL1(0x75) BTRAPTL1(0x76) BTRAPTL1(0x77) +tl1_resv078: BTRAPTL1(0x78) BTRAPTL1(0x79) BTRAPTL1(0x7a) BTRAPTL1(0x7b) +tl1_resv07c: BTRAPTL1(0x7c) BTRAPTL1(0x7d) BTRAPTL1(0x7e) BTRAPTL1(0x7f) +tl1_s0n: SPILL_0_NORMAL +tl1_s1n: SPILL_1_NORMAL +tl1_s2n: SPILL_2_NORMAL +tl1_s3n: SPILL_3_NORMAL +tl1_s4n: SPILL_4_NORMAL +tl1_s5n: SPILL_5_NORMAL +tl1_s6n: SPILL_6_NORMAL +tl1_s7n: SPILL_7_NORMAL +tl1_s0o: SPILL_0_OTHER +tl1_s1o: SPILL_1_OTHER +tl1_s2o: SPILL_2_OTHER +tl1_s3o: SPILL_3_OTHER +tl1_s4o: SPILL_4_OTHER +tl1_s5o: SPILL_5_OTHER +tl1_s6o: SPILL_6_OTHER +tl1_s7o: SPILL_7_OTHER +tl1_f0n: FILL_0_NORMAL +tl1_f1n: FILL_1_NORMAL +tl1_f2n: FILL_2_NORMAL +tl1_f3n: FILL_3_NORMAL +tl1_f4n: FILL_4_NORMAL +tl1_f5n: FILL_5_NORMAL +tl1_f6n: FILL_6_NORMAL +tl1_f7n: FILL_7_NORMAL +tl1_f0o: FILL_0_OTHER +tl1_f1o: FILL_1_OTHER +tl1_f2o: FILL_2_OTHER +tl1_f3o: FILL_3_OTHER +tl1_f4o: FILL_4_OTHER +tl1_f5o: FILL_5_OTHER +tl1_f6o: FILL_6_OTHER +tl1_f7o: FILL_7_OTHER diff --git a/arch/sparc/kernel/una_asm_64.S b/arch/sparc/kernel/una_asm_64.S new file mode 100644 index 000000000000..be183fe41443 --- /dev/null +++ b/arch/sparc/kernel/una_asm_64.S @@ -0,0 +1,146 @@ +/* una_asm.S: Kernel unaligned trap assembler helpers. + * + * Copyright (C) 1996,2005 David S. Miller (davem@davemloft.net) + * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + */ + + .text + + .globl __do_int_store +__do_int_store: + rd %asi, %o4 + wr %o3, 0, %asi + mov %o2, %g3 + cmp %o1, 2 + be,pn %icc, 2f + cmp %o1, 4 + be,pt %icc, 1f + srlx %g3, 24, %g2 + srlx %g3, 56, %g1 + srlx %g3, 48, %g7 +4: stba %g1, [%o0] %asi + srlx %g3, 40, %g1 +5: stba %g7, [%o0 + 1] %asi + srlx %g3, 32, %g7 +6: stba %g1, [%o0 + 2] %asi +7: stba %g7, [%o0 + 3] %asi + srlx %g3, 16, %g1 +8: stba %g2, [%o0 + 4] %asi + srlx %g3, 8, %g7 +9: stba %g1, [%o0 + 5] %asi +10: stba %g7, [%o0 + 6] %asi + ba,pt %xcc, 0f +11: stba %g3, [%o0 + 7] %asi +1: srl %g3, 16, %g7 +12: stba %g2, [%o0] %asi + srl %g3, 8, %g2 +13: stba %g7, [%o0 + 1] %asi +14: stba %g2, [%o0 + 2] %asi + ba,pt %xcc, 0f +15: stba %g3, [%o0 + 3] %asi +2: srl %g3, 8, %g2 +16: stba %g2, [%o0] %asi +17: stba %g3, [%o0 + 1] %asi +0: + wr %o4, 0x0, %asi + retl + mov 0, %o0 + .size __do_int_store, .-__do_int_store + + .section __ex_table,"a" + .word 4b, __retl_efault + .word 5b, __retl_efault + .word 6b, __retl_efault + .word 7b, __retl_efault + .word 8b, __retl_efault + .word 9b, __retl_efault + .word 10b, __retl_efault + .word 11b, __retl_efault + .word 12b, __retl_efault + .word 13b, __retl_efault + .word 14b, __retl_efault + .word 15b, __retl_efault + .word 16b, __retl_efault + .word 17b, __retl_efault + .previous + + .globl do_int_load +do_int_load: + rd %asi, %o5 + wr %o4, 0, %asi + cmp %o1, 8 + bge,pn %icc, 9f + cmp %o1, 4 + be,pt %icc, 6f +4: lduba [%o2] %asi, %g2 +5: lduba [%o2 + 1] %asi, %g3 + sll %g2, 8, %g2 + brz,pt %o3, 3f + add %g2, %g3, %g2 + sllx %g2, 48, %g2 + srax %g2, 48, %g2 +3: ba,pt %xcc, 0f + stx %g2, [%o0] +6: lduba [%o2 + 1] %asi, %g3 + sll %g2, 24, %g2 +7: lduba [%o2 + 2] %asi, %g7 + sll %g3, 16, %g3 +8: lduba [%o2 + 3] %asi, %g1 + sll %g7, 8, %g7 + or %g2, %g3, %g2 + or %g7, %g1, %g7 + or %g2, %g7, %g2 + brnz,a,pt %o3, 3f + sra %g2, 0, %g2 +3: ba,pt %xcc, 0f + stx %g2, [%o0] +9: lduba [%o2] %asi, %g2 +10: lduba [%o2 + 1] %asi, %g3 + sllx %g2, 56, %g2 +11: lduba [%o2 + 2] %asi, %g7 + sllx %g3, 48, %g3 +12: lduba [%o2 + 3] %asi, %g1 + sllx %g7, 40, %g7 + sllx %g1, 32, %g1 + or %g2, %g3, %g2 + or %g7, %g1, %g7 +13: lduba [%o2 + 4] %asi, %g3 + or %g2, %g7, %g7 +14: lduba [%o2 + 5] %asi, %g1 + sllx %g3, 24, %g3 +15: lduba [%o2 + 6] %asi, %g2 + sllx %g1, 16, %g1 + or %g7, %g3, %g7 +16: lduba [%o2 + 7] %asi, %g3 + sllx %g2, 8, %g2 + or %g7, %g1, %g7 + or %g2, %g3, %g2 + or %g7, %g2, %g7 + cmp %o1, 8 + be,a,pt %icc, 0f + stx %g7, [%o0] + srlx %g7, 32, %g2 + sra %g7, 0, %g7 + stx %g2, [%o0] + stx %g7, [%o0 + 8] +0: + wr %o5, 0x0, %asi + retl + mov 0, %o0 + .size __do_int_load, .-__do_int_load + + .section __ex_table,"a" + .word 4b, __retl_efault + .word 5b, __retl_efault + .word 6b, __retl_efault + .word 7b, __retl_efault + .word 8b, __retl_efault + .word 9b, __retl_efault + .word 10b, __retl_efault + .word 11b, __retl_efault + .word 12b, __retl_efault + .word 13b, __retl_efault + .word 14b, __retl_efault + .word 15b, __retl_efault + .word 16b, __retl_efault + .previous diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c new file mode 100644 index 000000000000..203ddfad9f27 --- /dev/null +++ b/arch/sparc/kernel/unaligned_64.c @@ -0,0 +1,690 @@ +/* + * unaligned.c: Unaligned load/store trap handling with special + * cases for the kernel to do them more quickly. + * + * Copyright (C) 1996,2008 David S. Miller (davem@davemloft.net) + * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + */ + + +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <asm/asi.h> +#include <asm/ptrace.h> +#include <asm/pstate.h> +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <linux/smp.h> +#include <linux/bitops.h> +#include <asm/fpumacro.h> + +/* #define DEBUG_MNA */ + +enum direction { + load, /* ld, ldd, ldh, ldsh */ + store, /* st, std, sth, stsh */ + both, /* Swap, ldstub, cas, ... */ + fpld, + fpst, + invalid, +}; + +#ifdef DEBUG_MNA +static char *dirstrings[] = { + "load", "store", "both", "fpload", "fpstore", "invalid" +}; +#endif + +static inline enum direction decode_direction(unsigned int insn) +{ + unsigned long tmp = (insn >> 21) & 1; + + if (!tmp) + return load; + else { + switch ((insn>>19)&0xf) { + case 15: /* swap* */ + return both; + default: + return store; + } + } +} + +/* 16 = double-word, 8 = extra-word, 4 = word, 2 = half-word */ +static inline int decode_access_size(unsigned int insn) +{ + unsigned int tmp; + + tmp = ((insn >> 19) & 0xf); + if (tmp == 11 || tmp == 14) /* ldx/stx */ + return 8; + tmp &= 3; + if (!tmp) + return 4; + else if (tmp == 3) + return 16; /* ldd/std - Although it is actually 8 */ + else if (tmp == 2) + return 2; + else { + printk("Impossible unaligned trap. insn=%08x\n", insn); + die_if_kernel("Byte sized unaligned access?!?!", current_thread_info()->kregs); + + /* GCC should never warn that control reaches the end + * of this function without returning a value because + * die_if_kernel() is marked with attribute 'noreturn'. + * Alas, some versions do... + */ + + return 0; + } +} + +static inline int decode_asi(unsigned int insn, struct pt_regs *regs) +{ + if (insn & 0x800000) { + if (insn & 0x2000) + return (unsigned char)(regs->tstate >> 24); /* %asi */ + else + return (unsigned char)(insn >> 5); /* imm_asi */ + } else + return ASI_P; +} + +/* 0x400000 = signed, 0 = unsigned */ +static inline int decode_signedness(unsigned int insn) +{ + return (insn & 0x400000); +} + +static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, + unsigned int rd, int from_kernel) +{ + if (rs2 >= 16 || rs1 >= 16 || rd >= 16) { + if (from_kernel != 0) + __asm__ __volatile__("flushw"); + else + flushw_user(); + } +} + +static inline long sign_extend_imm13(long imm) +{ + return imm << 51 >> 51; +} + +static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) +{ + unsigned long value; + + if (reg < 16) + return (!reg ? 0 : regs->u_regs[reg]); + if (regs->tstate & TSTATE_PRIV) { + struct reg_window *win; + win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); + value = win->locals[reg - 16]; + } else if (test_thread_flag(TIF_32BIT)) { + struct reg_window32 __user *win32; + win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); + get_user(value, &win32->locals[reg - 16]); + } else { + struct reg_window __user *win; + win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); + get_user(value, &win->locals[reg - 16]); + } + return value; +} + +static unsigned long *fetch_reg_addr(unsigned int reg, struct pt_regs *regs) +{ + if (reg < 16) + return ®s->u_regs[reg]; + if (regs->tstate & TSTATE_PRIV) { + struct reg_window *win; + win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); + return &win->locals[reg - 16]; + } else if (test_thread_flag(TIF_32BIT)) { + struct reg_window32 *win32; + win32 = (struct reg_window32 *)((unsigned long)((u32)regs->u_regs[UREG_FP])); + return (unsigned long *)&win32->locals[reg - 16]; + } else { + struct reg_window *win; + win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); + return &win->locals[reg - 16]; + } +} + +unsigned long compute_effective_address(struct pt_regs *regs, + unsigned int insn, unsigned int rd) +{ + unsigned int rs1 = (insn >> 14) & 0x1f; + unsigned int rs2 = insn & 0x1f; + int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; + + if (insn & 0x2000) { + maybe_flush_windows(rs1, 0, rd, from_kernel); + return (fetch_reg(rs1, regs) + sign_extend_imm13(insn)); + } else { + maybe_flush_windows(rs1, rs2, rd, from_kernel); + return (fetch_reg(rs1, regs) + fetch_reg(rs2, regs)); + } +} + +/* This is just to make gcc think die_if_kernel does return... */ +static void __used unaligned_panic(char *str, struct pt_regs *regs) +{ + die_if_kernel(str, regs); +} + +extern int do_int_load(unsigned long *dest_reg, int size, + unsigned long *saddr, int is_signed, int asi); + +extern int __do_int_store(unsigned long *dst_addr, int size, + unsigned long src_val, int asi); + +static inline int do_int_store(int reg_num, int size, unsigned long *dst_addr, + struct pt_regs *regs, int asi, int orig_asi) +{ + unsigned long zero = 0; + unsigned long *src_val_p = &zero; + unsigned long src_val; + + if (size == 16) { + size = 8; + zero = (((long)(reg_num ? + (unsigned)fetch_reg(reg_num, regs) : 0)) << 32) | + (unsigned)fetch_reg(reg_num + 1, regs); + } else if (reg_num) { + src_val_p = fetch_reg_addr(reg_num, regs); + } + src_val = *src_val_p; + if (unlikely(asi != orig_asi)) { + switch (size) { + case 2: + src_val = swab16(src_val); + break; + case 4: + src_val = swab32(src_val); + break; + case 8: + src_val = swab64(src_val); + break; + case 16: + default: + BUG(); + break; + }; + } + return __do_int_store(dst_addr, size, src_val, asi); +} + +static inline void advance(struct pt_regs *regs) +{ + regs->tpc = regs->tnpc; + regs->tnpc += 4; + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } +} + +static inline int floating_point_load_or_store_p(unsigned int insn) +{ + return (insn >> 24) & 1; +} + +static inline int ok_for_kernel(unsigned int insn) +{ + return !floating_point_load_or_store_p(insn); +} + +static void kernel_mna_trap_fault(int fixup_tstate_asi) +{ + struct pt_regs *regs = current_thread_info()->kern_una_regs; + unsigned int insn = current_thread_info()->kern_una_insn; + const struct exception_table_entry *entry; + + entry = search_exception_tables(regs->tpc); + if (!entry) { + unsigned long address; + + address = compute_effective_address(regs, insn, + ((insn >> 25) & 0x1f)); + if (address < PAGE_SIZE) { + printk(KERN_ALERT "Unable to handle kernel NULL " + "pointer dereference in mna handler"); + } else + printk(KERN_ALERT "Unable to handle kernel paging " + "request in mna handler"); + printk(KERN_ALERT " at virtual address %016lx\n",address); + printk(KERN_ALERT "current->{active_,}mm->context = %016lx\n", + (current->mm ? CTX_HWBITS(current->mm->context) : + CTX_HWBITS(current->active_mm->context))); + printk(KERN_ALERT "current->{active_,}mm->pgd = %016lx\n", + (current->mm ? (unsigned long) current->mm->pgd : + (unsigned long) current->active_mm->pgd)); + die_if_kernel("Oops", regs); + /* Not reached */ + } + regs->tpc = entry->fixup; + regs->tnpc = regs->tpc + 4; + + if (fixup_tstate_asi) { + regs->tstate &= ~TSTATE_ASI; + regs->tstate |= (ASI_AIUS << 24UL); + } +} + +static void log_unaligned(struct pt_regs *regs) +{ + static unsigned long count, last_time; + + if (time_after(jiffies, last_time + 5 * HZ)) + count = 0; + if (count < 5) { + last_time = jiffies; + count++; + printk("Kernel unaligned access at TPC[%lx] %pS\n", + regs->tpc, (void *) regs->tpc); + } +} + +asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn) +{ + enum direction dir = decode_direction(insn); + int size = decode_access_size(insn); + int orig_asi, asi; + + current_thread_info()->kern_una_regs = regs; + current_thread_info()->kern_una_insn = insn; + + orig_asi = asi = decode_asi(insn, regs); + + /* If this is a {get,put}_user() on an unaligned userspace pointer, + * just signal a fault and do not log the event. + */ + if (asi == ASI_AIUS) { + kernel_mna_trap_fault(0); + return; + } + + log_unaligned(regs); + + if (!ok_for_kernel(insn) || dir == both) { + printk("Unsupported unaligned load/store trap for kernel " + "at <%016lx>.\n", regs->tpc); + unaligned_panic("Kernel does fpu/atomic " + "unaligned load/store.", regs); + + kernel_mna_trap_fault(0); + } else { + unsigned long addr, *reg_addr; + int err; + + addr = compute_effective_address(regs, insn, + ((insn >> 25) & 0x1f)); +#ifdef DEBUG_MNA + printk("KMNA: pc=%016lx [dir=%s addr=%016lx size=%d] " + "retpc[%016lx]\n", + regs->tpc, dirstrings[dir], addr, size, + regs->u_regs[UREG_RETPC]); +#endif + switch (asi) { + case ASI_NL: + case ASI_AIUPL: + case ASI_AIUSL: + case ASI_PL: + case ASI_SL: + case ASI_PNFL: + case ASI_SNFL: + asi &= ~0x08; + break; + }; + switch (dir) { + case load: + reg_addr = fetch_reg_addr(((insn>>25)&0x1f), regs); + err = do_int_load(reg_addr, size, + (unsigned long *) addr, + decode_signedness(insn), asi); + if (likely(!err) && unlikely(asi != orig_asi)) { + unsigned long val_in = *reg_addr; + switch (size) { + case 2: + val_in = swab16(val_in); + break; + case 4: + val_in = swab32(val_in); + break; + case 8: + val_in = swab64(val_in); + break; + case 16: + default: + BUG(); + break; + }; + *reg_addr = val_in; + } + break; + + case store: + err = do_int_store(((insn>>25)&0x1f), size, + (unsigned long *) addr, regs, + asi, orig_asi); + break; + + default: + panic("Impossible kernel unaligned trap."); + /* Not reached... */ + } + if (unlikely(err)) + kernel_mna_trap_fault(1); + else + advance(regs); + } +} + +static char popc_helper[] = { +0, 1, 1, 2, 1, 2, 2, 3, +1, 2, 2, 3, 2, 3, 3, 4, +}; + +int handle_popc(u32 insn, struct pt_regs *regs) +{ + u64 value; + int ret, i, rd = ((insn >> 25) & 0x1f); + int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; + + if (insn & 0x2000) { + maybe_flush_windows(0, 0, rd, from_kernel); + value = sign_extend_imm13(insn); + } else { + maybe_flush_windows(0, insn & 0x1f, rd, from_kernel); + value = fetch_reg(insn & 0x1f, regs); + } + for (ret = 0, i = 0; i < 16; i++) { + ret += popc_helper[value & 0xf]; + value >>= 4; + } + if (rd < 16) { + if (rd) + regs->u_regs[rd] = ret; + } else { + if (test_thread_flag(TIF_32BIT)) { + struct reg_window32 __user *win32; + win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); + put_user(ret, &win32->locals[rd - 16]); + } else { + struct reg_window __user *win; + win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); + put_user(ret, &win->locals[rd - 16]); + } + } + advance(regs); + return 1; +} + +extern void do_fpother(struct pt_regs *regs); +extern void do_privact(struct pt_regs *regs); +extern void spitfire_data_access_exception(struct pt_regs *regs, + unsigned long sfsr, + unsigned long sfar); +extern void sun4v_data_access_exception(struct pt_regs *regs, + unsigned long addr, + unsigned long type_ctx); + +int handle_ldf_stq(u32 insn, struct pt_regs *regs) +{ + unsigned long addr = compute_effective_address(regs, insn, 0); + int freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20); + struct fpustate *f = FPUSTATE; + int asi = decode_asi(insn, regs); + int flag = (freg < 32) ? FPRS_DL : FPRS_DU; + + save_and_clear_fpu(); + current_thread_info()->xfsr[0] &= ~0x1c000; + if (freg & 3) { + current_thread_info()->xfsr[0] |= (6 << 14) /* invalid_fp_register */; + do_fpother(regs); + return 0; + } + if (insn & 0x200000) { + /* STQ */ + u64 first = 0, second = 0; + + if (current_thread_info()->fpsaved[0] & flag) { + first = *(u64 *)&f->regs[freg]; + second = *(u64 *)&f->regs[freg+2]; + } + if (asi < 0x80) { + do_privact(regs); + return 1; + } + switch (asi) { + case ASI_P: + case ASI_S: break; + case ASI_PL: + case ASI_SL: + { + /* Need to convert endians */ + u64 tmp = __swab64p(&first); + + first = __swab64p(&second); + second = tmp; + break; + } + default: + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, addr, 0); + else + spitfire_data_access_exception(regs, 0, addr); + return 1; + } + if (put_user (first >> 32, (u32 __user *)addr) || + __put_user ((u32)first, (u32 __user *)(addr + 4)) || + __put_user (second >> 32, (u32 __user *)(addr + 8)) || + __put_user ((u32)second, (u32 __user *)(addr + 12))) { + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, addr, 0); + else + spitfire_data_access_exception(regs, 0, addr); + return 1; + } + } else { + /* LDF, LDDF, LDQF */ + u32 data[4] __attribute__ ((aligned(8))); + int size, i; + int err; + + if (asi < 0x80) { + do_privact(regs); + return 1; + } else if (asi > ASI_SNFL) { + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, addr, 0); + else + spitfire_data_access_exception(regs, 0, addr); + return 1; + } + switch (insn & 0x180000) { + case 0x000000: size = 1; break; + case 0x100000: size = 4; break; + default: size = 2; break; + } + for (i = 0; i < size; i++) + data[i] = 0; + + err = get_user (data[0], (u32 __user *) addr); + if (!err) { + for (i = 1; i < size; i++) + err |= __get_user (data[i], (u32 __user *)(addr + 4*i)); + } + if (err && !(asi & 0x2 /* NF */)) { + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, addr, 0); + else + spitfire_data_access_exception(regs, 0, addr); + return 1; + } + if (asi & 0x8) /* Little */ { + u64 tmp; + + switch (size) { + case 1: data[0] = le32_to_cpup(data + 0); break; + default:*(u64 *)(data + 0) = le64_to_cpup((u64 *)(data + 0)); + break; + case 4: tmp = le64_to_cpup((u64 *)(data + 0)); + *(u64 *)(data + 0) = le64_to_cpup((u64 *)(data + 2)); + *(u64 *)(data + 2) = tmp; + break; + } + } + if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) { + current_thread_info()->fpsaved[0] = FPRS_FEF; + current_thread_info()->gsr[0] = 0; + } + if (!(current_thread_info()->fpsaved[0] & flag)) { + if (freg < 32) + memset(f->regs, 0, 32*sizeof(u32)); + else + memset(f->regs+32, 0, 32*sizeof(u32)); + } + memcpy(f->regs + freg, data, size * 4); + current_thread_info()->fpsaved[0] |= flag; + } + advance(regs); + return 1; +} + +void handle_ld_nf(u32 insn, struct pt_regs *regs) +{ + int rd = ((insn >> 25) & 0x1f); + int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; + unsigned long *reg; + + maybe_flush_windows(0, 0, rd, from_kernel); + reg = fetch_reg_addr(rd, regs); + if (from_kernel || rd < 16) { + reg[0] = 0; + if ((insn & 0x780000) == 0x180000) + reg[1] = 0; + } else if (test_thread_flag(TIF_32BIT)) { + put_user(0, (int __user *) reg); + if ((insn & 0x780000) == 0x180000) + put_user(0, ((int __user *) reg) + 1); + } else { + put_user(0, (unsigned long __user *) reg); + if ((insn & 0x780000) == 0x180000) + put_user(0, (unsigned long __user *) reg + 1); + } + advance(regs); +} + +void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr) +{ + unsigned long pc = regs->tpc; + unsigned long tstate = regs->tstate; + u32 insn; + u32 first, second; + u64 value; + u8 freg; + int flag; + struct fpustate *f = FPUSTATE; + + if (tstate & TSTATE_PRIV) + die_if_kernel("lddfmna from kernel", regs); + if (test_thread_flag(TIF_32BIT)) + pc = (u32)pc; + if (get_user(insn, (u32 __user *) pc) != -EFAULT) { + int asi = decode_asi(insn, regs); + if ((asi > ASI_SNFL) || + (asi < ASI_P)) + goto daex; + if (get_user(first, (u32 __user *)sfar) || + get_user(second, (u32 __user *)(sfar + 4))) { + if (asi & 0x2) /* NF */ { + first = 0; second = 0; + } else + goto daex; + } + save_and_clear_fpu(); + freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20); + value = (((u64)first) << 32) | second; + if (asi & 0x8) /* Little */ + value = __swab64p(&value); + flag = (freg < 32) ? FPRS_DL : FPRS_DU; + if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) { + current_thread_info()->fpsaved[0] = FPRS_FEF; + current_thread_info()->gsr[0] = 0; + } + if (!(current_thread_info()->fpsaved[0] & flag)) { + if (freg < 32) + memset(f->regs, 0, 32*sizeof(u32)); + else + memset(f->regs+32, 0, 32*sizeof(u32)); + } + *(u64 *)(f->regs + freg) = value; + current_thread_info()->fpsaved[0] |= flag; + } else { +daex: + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, sfar, sfsr); + else + spitfire_data_access_exception(regs, sfsr, sfar); + return; + } + advance(regs); + return; +} + +void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr) +{ + unsigned long pc = regs->tpc; + unsigned long tstate = regs->tstate; + u32 insn; + u64 value; + u8 freg; + int flag; + struct fpustate *f = FPUSTATE; + + if (tstate & TSTATE_PRIV) + die_if_kernel("stdfmna from kernel", regs); + if (test_thread_flag(TIF_32BIT)) + pc = (u32)pc; + if (get_user(insn, (u32 __user *) pc) != -EFAULT) { + int asi = decode_asi(insn, regs); + freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20); + value = 0; + flag = (freg < 32) ? FPRS_DL : FPRS_DU; + if ((asi > ASI_SNFL) || + (asi < ASI_P)) + goto daex; + save_and_clear_fpu(); + if (current_thread_info()->fpsaved[0] & flag) + value = *(u64 *)&f->regs[freg]; + switch (asi) { + case ASI_P: + case ASI_S: break; + case ASI_PL: + case ASI_SL: + value = __swab64p(&value); break; + default: goto daex; + } + if (put_user (value >> 32, (u32 __user *) sfar) || + __put_user ((u32)value, (u32 __user *)(sfar + 4))) + goto daex; + } else { +daex: + if (tlb_type == hypervisor) + sun4v_data_access_exception(regs, sfar, sfsr); + else + spitfire_data_access_exception(regs, sfsr, sfar); + return; + } + advance(regs); + return; +} diff --git a/arch/sparc/kernel/us2e_cpufreq.c b/arch/sparc/kernel/us2e_cpufreq.c new file mode 100644 index 000000000000..791c15138f3a --- /dev/null +++ b/arch/sparc/kernel/us2e_cpufreq.c @@ -0,0 +1,413 @@ +/* us2e_cpufreq.c: UltraSPARC-IIe cpu frequency support + * + * Copyright (C) 2003 David S. Miller (davem@redhat.com) + * + * Many thanks to Dominik Brodowski for fixing up the cpufreq + * infrastructure in order to make this driver easier to implement. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/cpufreq.h> +#include <linux/threads.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/init.h> + +#include <asm/asi.h> +#include <asm/timer.h> + +static struct cpufreq_driver *cpufreq_us2e_driver; + +struct us2e_freq_percpu_info { + struct cpufreq_frequency_table table[6]; +}; + +/* Indexed by cpu number. */ +static struct us2e_freq_percpu_info *us2e_freq_table; + +#define HBIRD_MEM_CNTL0_ADDR 0x1fe0000f010UL +#define HBIRD_ESTAR_MODE_ADDR 0x1fe0000f080UL + +/* UltraSPARC-IIe has five dividers: 1, 2, 4, 6, and 8. These are controlled + * in the ESTAR mode control register. + */ +#define ESTAR_MODE_DIV_1 0x0000000000000000UL +#define ESTAR_MODE_DIV_2 0x0000000000000001UL +#define ESTAR_MODE_DIV_4 0x0000000000000003UL +#define ESTAR_MODE_DIV_6 0x0000000000000002UL +#define ESTAR_MODE_DIV_8 0x0000000000000004UL +#define ESTAR_MODE_DIV_MASK 0x0000000000000007UL + +#define MCTRL0_SREFRESH_ENAB 0x0000000000010000UL +#define MCTRL0_REFR_COUNT_MASK 0x0000000000007f00UL +#define MCTRL0_REFR_COUNT_SHIFT 8 +#define MCTRL0_REFR_INTERVAL 7800 +#define MCTRL0_REFR_CLKS_P_CNT 64 + +static unsigned long read_hbreg(unsigned long addr) +{ + unsigned long ret; + + __asm__ __volatile__("ldxa [%1] %2, %0" + : "=&r" (ret) + : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E)); + return ret; +} + +static void write_hbreg(unsigned long addr, unsigned long val) +{ + __asm__ __volatile__("stxa %0, [%1] %2\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (val), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E) + : "memory"); + if (addr == HBIRD_ESTAR_MODE_ADDR) { + /* Need to wait 16 clock cycles for the PLL to lock. */ + udelay(1); + } +} + +static void self_refresh_ctl(int enable) +{ + unsigned long mctrl = read_hbreg(HBIRD_MEM_CNTL0_ADDR); + + if (enable) + mctrl |= MCTRL0_SREFRESH_ENAB; + else + mctrl &= ~MCTRL0_SREFRESH_ENAB; + write_hbreg(HBIRD_MEM_CNTL0_ADDR, mctrl); + (void) read_hbreg(HBIRD_MEM_CNTL0_ADDR); +} + +static void frob_mem_refresh(int cpu_slowing_down, + unsigned long clock_tick, + unsigned long old_divisor, unsigned long divisor) +{ + unsigned long old_refr_count, refr_count, mctrl; + + refr_count = (clock_tick * MCTRL0_REFR_INTERVAL); + refr_count /= (MCTRL0_REFR_CLKS_P_CNT * divisor * 1000000000UL); + + mctrl = read_hbreg(HBIRD_MEM_CNTL0_ADDR); + old_refr_count = (mctrl & MCTRL0_REFR_COUNT_MASK) + >> MCTRL0_REFR_COUNT_SHIFT; + + mctrl &= ~MCTRL0_REFR_COUNT_MASK; + mctrl |= refr_count << MCTRL0_REFR_COUNT_SHIFT; + write_hbreg(HBIRD_MEM_CNTL0_ADDR, mctrl); + mctrl = read_hbreg(HBIRD_MEM_CNTL0_ADDR); + + if (cpu_slowing_down && !(mctrl & MCTRL0_SREFRESH_ENAB)) { + unsigned long usecs; + + /* We have to wait for both refresh counts (old + * and new) to go to zero. + */ + usecs = (MCTRL0_REFR_CLKS_P_CNT * + (refr_count + old_refr_count) * + 1000000UL * + old_divisor) / clock_tick; + udelay(usecs + 1UL); + } +} + +static void us2e_transition(unsigned long estar, unsigned long new_bits, + unsigned long clock_tick, + unsigned long old_divisor, unsigned long divisor) +{ + unsigned long flags; + + local_irq_save(flags); + + estar &= ~ESTAR_MODE_DIV_MASK; + + /* This is based upon the state transition diagram in the IIe manual. */ + if (old_divisor == 2 && divisor == 1) { + self_refresh_ctl(0); + write_hbreg(HBIRD_ESTAR_MODE_ADDR, estar | new_bits); + frob_mem_refresh(0, clock_tick, old_divisor, divisor); + } else if (old_divisor == 1 && divisor == 2) { + frob_mem_refresh(1, clock_tick, old_divisor, divisor); + write_hbreg(HBIRD_ESTAR_MODE_ADDR, estar | new_bits); + self_refresh_ctl(1); + } else if (old_divisor == 1 && divisor > 2) { + us2e_transition(estar, ESTAR_MODE_DIV_2, clock_tick, + 1, 2); + us2e_transition(estar, new_bits, clock_tick, + 2, divisor); + } else if (old_divisor > 2 && divisor == 1) { + us2e_transition(estar, ESTAR_MODE_DIV_2, clock_tick, + old_divisor, 2); + us2e_transition(estar, new_bits, clock_tick, + 2, divisor); + } else if (old_divisor < divisor) { + frob_mem_refresh(0, clock_tick, old_divisor, divisor); + write_hbreg(HBIRD_ESTAR_MODE_ADDR, estar | new_bits); + } else if (old_divisor > divisor) { + write_hbreg(HBIRD_ESTAR_MODE_ADDR, estar | new_bits); + frob_mem_refresh(1, clock_tick, old_divisor, divisor); + } else { + BUG(); + } + + local_irq_restore(flags); +} + +static unsigned long index_to_estar_mode(unsigned int index) +{ + switch (index) { + case 0: + return ESTAR_MODE_DIV_1; + + case 1: + return ESTAR_MODE_DIV_2; + + case 2: + return ESTAR_MODE_DIV_4; + + case 3: + return ESTAR_MODE_DIV_6; + + case 4: + return ESTAR_MODE_DIV_8; + + default: + BUG(); + }; +} + +static unsigned long index_to_divisor(unsigned int index) +{ + switch (index) { + case 0: + return 1; + + case 1: + return 2; + + case 2: + return 4; + + case 3: + return 6; + + case 4: + return 8; + + default: + BUG(); + }; +} + +static unsigned long estar_to_divisor(unsigned long estar) +{ + unsigned long ret; + + switch (estar & ESTAR_MODE_DIV_MASK) { + case ESTAR_MODE_DIV_1: + ret = 1; + break; + case ESTAR_MODE_DIV_2: + ret = 2; + break; + case ESTAR_MODE_DIV_4: + ret = 4; + break; + case ESTAR_MODE_DIV_6: + ret = 6; + break; + case ESTAR_MODE_DIV_8: + ret = 8; + break; + default: + BUG(); + }; + + return ret; +} + +static unsigned int us2e_freq_get(unsigned int cpu) +{ + cpumask_t cpus_allowed; + unsigned long clock_tick, estar; + + if (!cpu_online(cpu)) + return 0; + + cpus_allowed = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + + clock_tick = sparc64_get_clock_tick(cpu) / 1000; + estar = read_hbreg(HBIRD_ESTAR_MODE_ADDR); + + set_cpus_allowed(current, cpus_allowed); + + return clock_tick / estar_to_divisor(estar); +} + +static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index) +{ + unsigned long new_bits, new_freq; + unsigned long clock_tick, divisor, old_divisor, estar; + cpumask_t cpus_allowed; + struct cpufreq_freqs freqs; + + if (!cpu_online(cpu)) + return; + + cpus_allowed = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + + new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000; + new_bits = index_to_estar_mode(index); + divisor = index_to_divisor(index); + new_freq /= divisor; + + estar = read_hbreg(HBIRD_ESTAR_MODE_ADDR); + + old_divisor = estar_to_divisor(estar); + + freqs.old = clock_tick / old_divisor; + freqs.new = new_freq; + freqs.cpu = cpu; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + if (old_divisor != divisor) + us2e_transition(estar, new_bits, clock_tick * 1000, + old_divisor, divisor); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + set_cpus_allowed(current, cpus_allowed); +} + +static int us2e_freq_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int new_index = 0; + + if (cpufreq_frequency_table_target(policy, + &us2e_freq_table[policy->cpu].table[0], + target_freq, relation, &new_index)) + return -EINVAL; + + us2e_set_cpu_divider_index(policy->cpu, new_index); + + return 0; +} + +static int us2e_freq_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, + &us2e_freq_table[policy->cpu].table[0]); +} + +static int __init us2e_freq_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + unsigned long clock_tick = sparc64_get_clock_tick(cpu) / 1000; + struct cpufreq_frequency_table *table = + &us2e_freq_table[cpu].table[0]; + + table[0].index = 0; + table[0].frequency = clock_tick / 1; + table[1].index = 1; + table[1].frequency = clock_tick / 2; + table[2].index = 2; + table[2].frequency = clock_tick / 4; + table[2].index = 3; + table[2].frequency = clock_tick / 6; + table[2].index = 4; + table[2].frequency = clock_tick / 8; + table[2].index = 5; + table[3].frequency = CPUFREQ_TABLE_END; + + policy->cpuinfo.transition_latency = 0; + policy->cur = clock_tick; + + return cpufreq_frequency_table_cpuinfo(policy, table); +} + +static int us2e_freq_cpu_exit(struct cpufreq_policy *policy) +{ + if (cpufreq_us2e_driver) + us2e_set_cpu_divider_index(policy->cpu, 0); + + return 0; +} + +static int __init us2e_freq_init(void) +{ + unsigned long manuf, impl, ver; + int ret; + + if (tlb_type != spitfire) + return -ENODEV; + + __asm__("rdpr %%ver, %0" : "=r" (ver)); + manuf = ((ver >> 48) & 0xffff); + impl = ((ver >> 32) & 0xffff); + + if (manuf == 0x17 && impl == 0x13) { + struct cpufreq_driver *driver; + + ret = -ENOMEM; + driver = kzalloc(sizeof(struct cpufreq_driver), GFP_KERNEL); + if (!driver) + goto err_out; + + us2e_freq_table = kzalloc( + (NR_CPUS * sizeof(struct us2e_freq_percpu_info)), + GFP_KERNEL); + if (!us2e_freq_table) + goto err_out; + + driver->init = us2e_freq_cpu_init; + driver->verify = us2e_freq_verify; + driver->target = us2e_freq_target; + driver->get = us2e_freq_get; + driver->exit = us2e_freq_cpu_exit; + driver->owner = THIS_MODULE, + strcpy(driver->name, "UltraSPARC-IIe"); + + cpufreq_us2e_driver = driver; + ret = cpufreq_register_driver(driver); + if (ret) + goto err_out; + + return 0; + +err_out: + if (driver) { + kfree(driver); + cpufreq_us2e_driver = NULL; + } + kfree(us2e_freq_table); + us2e_freq_table = NULL; + return ret; + } + + return -ENODEV; +} + +static void __exit us2e_freq_exit(void) +{ + if (cpufreq_us2e_driver) { + cpufreq_unregister_driver(cpufreq_us2e_driver); + kfree(cpufreq_us2e_driver); + cpufreq_us2e_driver = NULL; + kfree(us2e_freq_table); + us2e_freq_table = NULL; + } +} + +MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); +MODULE_DESCRIPTION("cpufreq driver for UltraSPARC-IIe"); +MODULE_LICENSE("GPL"); + +module_init(us2e_freq_init); +module_exit(us2e_freq_exit); diff --git a/arch/sparc/kernel/us3_cpufreq.c b/arch/sparc/kernel/us3_cpufreq.c new file mode 100644 index 000000000000..365b6464e2ce --- /dev/null +++ b/arch/sparc/kernel/us3_cpufreq.c @@ -0,0 +1,274 @@ +/* us3_cpufreq.c: UltraSPARC-III cpu frequency support + * + * Copyright (C) 2003 David S. Miller (davem@redhat.com) + * + * Many thanks to Dominik Brodowski for fixing up the cpufreq + * infrastructure in order to make this driver easier to implement. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/cpufreq.h> +#include <linux/threads.h> +#include <linux/slab.h> +#include <linux/init.h> + +#include <asm/head.h> +#include <asm/timer.h> + +static struct cpufreq_driver *cpufreq_us3_driver; + +struct us3_freq_percpu_info { + struct cpufreq_frequency_table table[4]; +}; + +/* Indexed by cpu number. */ +static struct us3_freq_percpu_info *us3_freq_table; + +/* UltraSPARC-III has three dividers: 1, 2, and 32. These are controlled + * in the Safari config register. + */ +#define SAFARI_CFG_DIV_1 0x0000000000000000UL +#define SAFARI_CFG_DIV_2 0x0000000040000000UL +#define SAFARI_CFG_DIV_32 0x0000000080000000UL +#define SAFARI_CFG_DIV_MASK 0x00000000C0000000UL + +static unsigned long read_safari_cfg(void) +{ + unsigned long ret; + + __asm__ __volatile__("ldxa [%%g0] %1, %0" + : "=&r" (ret) + : "i" (ASI_SAFARI_CONFIG)); + return ret; +} + +static void write_safari_cfg(unsigned long val) +{ + __asm__ __volatile__("stxa %0, [%%g0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (val), "i" (ASI_SAFARI_CONFIG) + : "memory"); +} + +static unsigned long get_current_freq(unsigned int cpu, unsigned long safari_cfg) +{ + unsigned long clock_tick = sparc64_get_clock_tick(cpu) / 1000; + unsigned long ret; + + switch (safari_cfg & SAFARI_CFG_DIV_MASK) { + case SAFARI_CFG_DIV_1: + ret = clock_tick / 1; + break; + case SAFARI_CFG_DIV_2: + ret = clock_tick / 2; + break; + case SAFARI_CFG_DIV_32: + ret = clock_tick / 32; + break; + default: + BUG(); + }; + + return ret; +} + +static unsigned int us3_freq_get(unsigned int cpu) +{ + cpumask_t cpus_allowed; + unsigned long reg; + unsigned int ret; + + if (!cpu_online(cpu)) + return 0; + + cpus_allowed = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + + reg = read_safari_cfg(); + ret = get_current_freq(cpu, reg); + + set_cpus_allowed(current, cpus_allowed); + + return ret; +} + +static void us3_set_cpu_divider_index(unsigned int cpu, unsigned int index) +{ + unsigned long new_bits, new_freq, reg; + cpumask_t cpus_allowed; + struct cpufreq_freqs freqs; + + if (!cpu_online(cpu)) + return; + + cpus_allowed = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + + new_freq = sparc64_get_clock_tick(cpu) / 1000; + switch (index) { + case 0: + new_bits = SAFARI_CFG_DIV_1; + new_freq /= 1; + break; + case 1: + new_bits = SAFARI_CFG_DIV_2; + new_freq /= 2; + break; + case 2: + new_bits = SAFARI_CFG_DIV_32; + new_freq /= 32; + break; + + default: + BUG(); + }; + + reg = read_safari_cfg(); + + freqs.old = get_current_freq(cpu, reg); + freqs.new = new_freq; + freqs.cpu = cpu; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + reg &= ~SAFARI_CFG_DIV_MASK; + reg |= new_bits; + write_safari_cfg(reg); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + set_cpus_allowed(current, cpus_allowed); +} + +static int us3_freq_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int new_index = 0; + + if (cpufreq_frequency_table_target(policy, + &us3_freq_table[policy->cpu].table[0], + target_freq, + relation, + &new_index)) + return -EINVAL; + + us3_set_cpu_divider_index(policy->cpu, new_index); + + return 0; +} + +static int us3_freq_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, + &us3_freq_table[policy->cpu].table[0]); +} + +static int __init us3_freq_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + unsigned long clock_tick = sparc64_get_clock_tick(cpu) / 1000; + struct cpufreq_frequency_table *table = + &us3_freq_table[cpu].table[0]; + + table[0].index = 0; + table[0].frequency = clock_tick / 1; + table[1].index = 1; + table[1].frequency = clock_tick / 2; + table[2].index = 2; + table[2].frequency = clock_tick / 32; + table[3].index = 0; + table[3].frequency = CPUFREQ_TABLE_END; + + policy->cpuinfo.transition_latency = 0; + policy->cur = clock_tick; + + return cpufreq_frequency_table_cpuinfo(policy, table); +} + +static int us3_freq_cpu_exit(struct cpufreq_policy *policy) +{ + if (cpufreq_us3_driver) + us3_set_cpu_divider_index(policy->cpu, 0); + + return 0; +} + +static int __init us3_freq_init(void) +{ + unsigned long manuf, impl, ver; + int ret; + + if (tlb_type != cheetah && tlb_type != cheetah_plus) + return -ENODEV; + + __asm__("rdpr %%ver, %0" : "=r" (ver)); + manuf = ((ver >> 48) & 0xffff); + impl = ((ver >> 32) & 0xffff); + + if (manuf == CHEETAH_MANUF && + (impl == CHEETAH_IMPL || + impl == CHEETAH_PLUS_IMPL || + impl == JAGUAR_IMPL || + impl == PANTHER_IMPL)) { + struct cpufreq_driver *driver; + + ret = -ENOMEM; + driver = kzalloc(sizeof(struct cpufreq_driver), GFP_KERNEL); + if (!driver) + goto err_out; + + us3_freq_table = kzalloc( + (NR_CPUS * sizeof(struct us3_freq_percpu_info)), + GFP_KERNEL); + if (!us3_freq_table) + goto err_out; + + driver->init = us3_freq_cpu_init; + driver->verify = us3_freq_verify; + driver->target = us3_freq_target; + driver->get = us3_freq_get; + driver->exit = us3_freq_cpu_exit; + driver->owner = THIS_MODULE, + strcpy(driver->name, "UltraSPARC-III"); + + cpufreq_us3_driver = driver; + ret = cpufreq_register_driver(driver); + if (ret) + goto err_out; + + return 0; + +err_out: + if (driver) { + kfree(driver); + cpufreq_us3_driver = NULL; + } + kfree(us3_freq_table); + us3_freq_table = NULL; + return ret; + } + + return -ENODEV; +} + +static void __exit us3_freq_exit(void) +{ + if (cpufreq_us3_driver) { + cpufreq_unregister_driver(cpufreq_us3_driver); + kfree(cpufreq_us3_driver); + cpufreq_us3_driver = NULL; + kfree(us3_freq_table); + us3_freq_table = NULL; + } +} + +MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); +MODULE_DESCRIPTION("cpufreq driver for UltraSPARC-III"); +MODULE_LICENSE("GPL"); + +module_init(us3_freq_init); +module_exit(us3_freq_exit); diff --git a/arch/sparc/kernel/utrap.S b/arch/sparc/kernel/utrap.S new file mode 100644 index 000000000000..b7f0f3f3a909 --- /dev/null +++ b/arch/sparc/kernel/utrap.S @@ -0,0 +1,29 @@ + .globl utrap_trap + .type utrap_trap,#function +utrap_trap: /* %g3=handler,%g4=level */ + TRAP_LOAD_THREAD_REG(%g6, %g1) + ldx [%g6 + TI_UTRAPS], %g1 + brnz,pt %g1, invoke_utrap + nop + + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + call bad_trap + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + +invoke_utrap: + sllx %g3, 3, %g3 + ldx [%g1 + %g3], %g1 + save %sp, -128, %sp + rdpr %tstate, %l6 + rdpr %cwp, %l7 + andn %l6, TSTATE_CWP, %l6 + wrpr %l6, %l7, %tstate + rdpr %tpc, %l6 + rdpr %tnpc, %l7 + wrpr %g1, 0, %tnpc + done + .size utrap_trap,.-utrap_trap diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c new file mode 100644 index 000000000000..92b1f8ec01de --- /dev/null +++ b/arch/sparc/kernel/vio.c @@ -0,0 +1,451 @@ +/* vio.c: Virtual I/O channel devices probing infrastructure. + * + * Copyright (c) 2003-2005 IBM Corp. + * Dave Engebretsen engebret@us.ibm.com + * Santiago Leon santil@us.ibm.com + * Hollis Blanchard <hollisb@us.ibm.com> + * Stephen Rothwell + * + * Adapted to sparc64 by David S. Miller davem@davemloft.net + */ + +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/init.h> + +#include <asm/mdesc.h> +#include <asm/vio.h> + +static const struct vio_device_id *vio_match_device( + const struct vio_device_id *matches, + const struct vio_dev *dev) +{ + const char *type, *compat; + int len; + + type = dev->type; + compat = dev->compat; + len = dev->compat_len; + + while (matches->type[0] || matches->compat[0]) { + int match = 1; + if (matches->type[0]) + match &= !strcmp(matches->type, type); + + if (matches->compat[0]) { + match &= len && + of_find_in_proplist(compat, matches->compat, len); + } + if (match) + return matches; + matches++; + } + return NULL; +} + +static int vio_bus_match(struct device *dev, struct device_driver *drv) +{ + struct vio_dev *vio_dev = to_vio_dev(dev); + struct vio_driver *vio_drv = to_vio_driver(drv); + const struct vio_device_id *matches = vio_drv->id_table; + + if (!matches) + return 0; + + return vio_match_device(matches, vio_dev) != NULL; +} + +static int vio_device_probe(struct device *dev) +{ + struct vio_dev *vdev = to_vio_dev(dev); + struct vio_driver *drv = to_vio_driver(dev->driver); + const struct vio_device_id *id; + int error = -ENODEV; + + if (drv->probe) { + id = vio_match_device(drv->id_table, vdev); + if (id) + error = drv->probe(vdev, id); + } + + return error; +} + +static int vio_device_remove(struct device *dev) +{ + struct vio_dev *vdev = to_vio_dev(dev); + struct vio_driver *drv = to_vio_driver(dev->driver); + + if (drv->remove) + return drv->remove(vdev); + + return 1; +} + +static ssize_t devspec_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct vio_dev *vdev = to_vio_dev(dev); + const char *str = "none"; + + if (!strcmp(vdev->type, "vnet-port")) + str = "vnet"; + else if (!strcmp(vdev->type, "vdc-port")) + str = "vdisk"; + + return sprintf(buf, "%s\n", str); +} + +static ssize_t type_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct vio_dev *vdev = to_vio_dev(dev); + return sprintf(buf, "%s\n", vdev->type); +} + +static struct device_attribute vio_dev_attrs[] = { + __ATTR_RO(devspec), + __ATTR_RO(type), + __ATTR_NULL +}; + +static struct bus_type vio_bus_type = { + .name = "vio", + .dev_attrs = vio_dev_attrs, + .match = vio_bus_match, + .probe = vio_device_probe, + .remove = vio_device_remove, +}; + +int vio_register_driver(struct vio_driver *viodrv) +{ + viodrv->driver.bus = &vio_bus_type; + + return driver_register(&viodrv->driver); +} +EXPORT_SYMBOL(vio_register_driver); + +void vio_unregister_driver(struct vio_driver *viodrv) +{ + driver_unregister(&viodrv->driver); +} +EXPORT_SYMBOL(vio_unregister_driver); + +static void vio_dev_release(struct device *dev) +{ + kfree(to_vio_dev(dev)); +} + +static ssize_t +show_pciobppath_attr(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct vio_dev *vdev; + struct device_node *dp; + + vdev = to_vio_dev(dev); + dp = vdev->dp; + + return snprintf (buf, PAGE_SIZE, "%s\n", dp->full_name); +} + +static DEVICE_ATTR(obppath, S_IRUSR | S_IRGRP | S_IROTH, + show_pciobppath_attr, NULL); + +static struct device_node *cdev_node; + +static struct vio_dev *root_vdev; +static u64 cdev_cfg_handle; + +static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp, + struct vio_dev *vdev) +{ + u64 a; + + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { + const u64 *chan_id; + const u64 *irq; + u64 target; + + target = mdesc_arc_target(hp, a); + + irq = mdesc_get_property(hp, target, "tx-ino", NULL); + if (irq) + vdev->tx_irq = sun4v_build_virq(cdev_cfg_handle, *irq); + + irq = mdesc_get_property(hp, target, "rx-ino", NULL); + if (irq) + vdev->rx_irq = sun4v_build_virq(cdev_cfg_handle, *irq); + + chan_id = mdesc_get_property(hp, target, "id", NULL); + if (chan_id) + vdev->channel_id = *chan_id; + } +} + +static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, + struct device *parent) +{ + const char *type, *compat, *bus_id_name; + struct device_node *dp; + struct vio_dev *vdev; + int err, tlen, clen; + const u64 *id, *cfg_handle; + u64 a; + + type = mdesc_get_property(hp, mp, "device-type", &tlen); + if (!type) { + type = mdesc_get_property(hp, mp, "name", &tlen); + if (!type) { + type = mdesc_node_name(hp, mp); + tlen = strlen(type) + 1; + } + } + if (tlen > VIO_MAX_TYPE_LEN) { + printk(KERN_ERR "VIO: Type string [%s] is too long.\n", + type); + return NULL; + } + + id = mdesc_get_property(hp, mp, "id", NULL); + + cfg_handle = NULL; + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) { + u64 target; + + target = mdesc_arc_target(hp, a); + cfg_handle = mdesc_get_property(hp, target, + "cfg-handle", NULL); + if (cfg_handle) + break; + } + + bus_id_name = type; + if (!strcmp(type, "domain-services-port")) + bus_id_name = "ds"; + + if (strlen(bus_id_name) >= BUS_ID_SIZE - 4) { + printk(KERN_ERR "VIO: bus_id_name [%s] is too long.\n", + bus_id_name); + return NULL; + } + + compat = mdesc_get_property(hp, mp, "device-type", &clen); + if (!compat) { + clen = 0; + } else if (clen > VIO_MAX_COMPAT_LEN) { + printk(KERN_ERR "VIO: Compat len %d for [%s] is too long.\n", + clen, type); + return NULL; + } + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) { + printk(KERN_ERR "VIO: Could not allocate vio_dev\n"); + return NULL; + } + + vdev->mp = mp; + memcpy(vdev->type, type, tlen); + if (compat) + memcpy(vdev->compat, compat, clen); + else + memset(vdev->compat, 0, sizeof(vdev->compat)); + vdev->compat_len = clen; + + vdev->channel_id = ~0UL; + vdev->tx_irq = ~0; + vdev->rx_irq = ~0; + + vio_fill_channel_info(hp, mp, vdev); + + if (!id) { + dev_set_name(&vdev->dev, "%s", bus_id_name); + vdev->dev_no = ~(u64)0; + } else if (!cfg_handle) { + dev_set_name(&vdev->dev, "%s-%lu", bus_id_name, *id); + vdev->dev_no = *id; + } else { + dev_set_name(&vdev->dev, "%s-%lu-%lu", bus_id_name, + *cfg_handle, *id); + vdev->dev_no = *cfg_handle; + } + + vdev->dev.parent = parent; + vdev->dev.bus = &vio_bus_type; + vdev->dev.release = vio_dev_release; + + if (parent == NULL) { + dp = cdev_node; + } else if (to_vio_dev(parent) == root_vdev) { + dp = of_get_next_child(cdev_node, NULL); + while (dp) { + if (!strcmp(dp->type, type)) + break; + + dp = of_get_next_child(cdev_node, dp); + } + } else { + dp = to_vio_dev(parent)->dp; + } + vdev->dp = dp; + + printk(KERN_INFO "VIO: Adding device %s\n", dev_name(&vdev->dev)); + + err = device_register(&vdev->dev); + if (err) { + printk(KERN_ERR "VIO: Could not register device %s, err=%d\n", + dev_name(&vdev->dev), err); + kfree(vdev); + return NULL; + } + if (vdev->dp) + err = sysfs_create_file(&vdev->dev.kobj, + &dev_attr_obppath.attr); + + return vdev; +} + +static void vio_add(struct mdesc_handle *hp, u64 node) +{ + (void) vio_create_one(hp, node, &root_vdev->dev); +} + +static int vio_md_node_match(struct device *dev, void *arg) +{ + struct vio_dev *vdev = to_vio_dev(dev); + + if (vdev->mp == (u64) arg) + return 1; + + return 0; +} + +static void vio_remove(struct mdesc_handle *hp, u64 node) +{ + struct device *dev; + + dev = device_find_child(&root_vdev->dev, (void *) node, + vio_md_node_match); + if (dev) { + printk(KERN_INFO "VIO: Removing device %s\n", dev_name(dev)); + + device_unregister(dev); + } +} + +static struct mdesc_notifier_client vio_device_notifier = { + .add = vio_add, + .remove = vio_remove, + .node_name = "virtual-device-port", +}; + +/* We are only interested in domain service ports under the + * "domain-services" node. On control nodes there is another port + * under "openboot" that we should not mess with as aparently that is + * reserved exclusively for OBP use. + */ +static void vio_add_ds(struct mdesc_handle *hp, u64 node) +{ + int found; + u64 a; + + found = 0; + mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) { + u64 target = mdesc_arc_target(hp, a); + const char *name = mdesc_node_name(hp, target); + + if (!strcmp(name, "domain-services")) { + found = 1; + break; + } + } + + if (found) + (void) vio_create_one(hp, node, &root_vdev->dev); +} + +static struct mdesc_notifier_client vio_ds_notifier = { + .add = vio_add_ds, + .remove = vio_remove, + .node_name = "domain-services-port", +}; + +static const char *channel_devices_node = "channel-devices"; +static const char *channel_devices_compat = "SUNW,sun4v-channel-devices"; +static const char *cfg_handle_prop = "cfg-handle"; + +static int __init vio_init(void) +{ + struct mdesc_handle *hp; + const char *compat; + const u64 *cfg_handle; + int err, len; + u64 root; + + err = bus_register(&vio_bus_type); + if (err) { + printk(KERN_ERR "VIO: Could not register bus type err=%d\n", + err); + return err; + } + + hp = mdesc_grab(); + if (!hp) + return 0; + + root = mdesc_node_by_name(hp, MDESC_NODE_NULL, channel_devices_node); + if (root == MDESC_NODE_NULL) { + printk(KERN_INFO "VIO: No channel-devices MDESC node.\n"); + mdesc_release(hp); + return 0; + } + + cdev_node = of_find_node_by_name(NULL, "channel-devices"); + err = -ENODEV; + if (!cdev_node) { + printk(KERN_INFO "VIO: No channel-devices OBP node.\n"); + goto out_release; + } + + compat = mdesc_get_property(hp, root, "compatible", &len); + if (!compat) { + printk(KERN_ERR "VIO: Channel devices lacks compatible " + "property\n"); + goto out_release; + } + if (!of_find_in_proplist(compat, channel_devices_compat, len)) { + printk(KERN_ERR "VIO: Channel devices node lacks (%s) " + "compat entry.\n", channel_devices_compat); + goto out_release; + } + + cfg_handle = mdesc_get_property(hp, root, cfg_handle_prop, NULL); + if (!cfg_handle) { + printk(KERN_ERR "VIO: Channel devices lacks %s property\n", + cfg_handle_prop); + goto out_release; + } + + cdev_cfg_handle = *cfg_handle; + + root_vdev = vio_create_one(hp, root, NULL); + err = -ENODEV; + if (!root_vdev) { + printk(KERN_ERR "VIO: Coult not create root device.\n"); + goto out_release; + } + + mdesc_register_notifier(&vio_device_notifier); + mdesc_register_notifier(&vio_ds_notifier); + + mdesc_release(hp); + + return err; + +out_release: + mdesc_release(hp); + return err; +} + +postcore_initcall(vio_init); diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c new file mode 100644 index 000000000000..708fa1705fbe --- /dev/null +++ b/arch/sparc/kernel/viohs.c @@ -0,0 +1,822 @@ +/* viohs.c: LDOM Virtual I/O handshake helper layer. + * + * Copyright (C) 2007 David S. Miller <davem@davemloft.net> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/ldc.h> +#include <asm/vio.h> + +int vio_ldc_send(struct vio_driver_state *vio, void *data, int len) +{ + int err, limit = 1000; + + err = -EINVAL; + while (limit-- > 0) { + err = ldc_write(vio->lp, data, len); + if (!err || (err != -EAGAIN)) + break; + udelay(1); + } + + return err; +} +EXPORT_SYMBOL(vio_ldc_send); + +static int send_ctrl(struct vio_driver_state *vio, + struct vio_msg_tag *tag, int len) +{ + tag->sid = vio_send_sid(vio); + return vio_ldc_send(vio, tag, len); +} + +static void init_tag(struct vio_msg_tag *tag, u8 type, u8 stype, u16 stype_env) +{ + tag->type = type; + tag->stype = stype; + tag->stype_env = stype_env; +} + +static int send_version(struct vio_driver_state *vio, u16 major, u16 minor) +{ + struct vio_ver_info pkt; + + vio->_local_sid = (u32) sched_clock(); + + memset(&pkt, 0, sizeof(pkt)); + init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_VER_INFO); + pkt.major = major; + pkt.minor = minor; + pkt.dev_class = vio->dev_class; + + viodbg(HS, "SEND VERSION INFO maj[%u] min[%u] devclass[%u]\n", + major, minor, vio->dev_class); + + return send_ctrl(vio, &pkt.tag, sizeof(pkt)); +} + +static int start_handshake(struct vio_driver_state *vio) +{ + int err; + + viodbg(HS, "START HANDSHAKE\n"); + + vio->hs_state = VIO_HS_INVALID; + + err = send_version(vio, + vio->ver_table[0].major, + vio->ver_table[0].minor); + if (err < 0) + return err; + + return 0; +} + +static void flush_rx_dring(struct vio_driver_state *vio) +{ + struct vio_dring_state *dr; + u64 ident; + + BUG_ON(!(vio->dr_state & VIO_DR_STATE_RXREG)); + + dr = &vio->drings[VIO_DRIVER_RX_RING]; + ident = dr->ident; + + BUG_ON(!vio->desc_buf); + kfree(vio->desc_buf); + vio->desc_buf = NULL; + + memset(dr, 0, sizeof(*dr)); + dr->ident = ident; +} + +void vio_link_state_change(struct vio_driver_state *vio, int event) +{ + if (event == LDC_EVENT_UP) { + vio->hs_state = VIO_HS_INVALID; + + switch (vio->dev_class) { + case VDEV_NETWORK: + case VDEV_NETWORK_SWITCH: + vio->dr_state = (VIO_DR_STATE_TXREQ | + VIO_DR_STATE_RXREQ); + break; + + case VDEV_DISK: + vio->dr_state = VIO_DR_STATE_TXREQ; + break; + case VDEV_DISK_SERVER: + vio->dr_state = VIO_DR_STATE_RXREQ; + break; + } + start_handshake(vio); + } else if (event == LDC_EVENT_RESET) { + vio->hs_state = VIO_HS_INVALID; + + if (vio->dr_state & VIO_DR_STATE_RXREG) + flush_rx_dring(vio); + + vio->dr_state = 0x00; + memset(&vio->ver, 0, sizeof(vio->ver)); + + ldc_disconnect(vio->lp); + } +} +EXPORT_SYMBOL(vio_link_state_change); + +static int handshake_failure(struct vio_driver_state *vio) +{ + struct vio_dring_state *dr; + + /* XXX Put policy here... Perhaps start a timer to fire + * XXX in 100 ms, which will bring the link up and retry + * XXX the handshake. + */ + + viodbg(HS, "HANDSHAKE FAILURE\n"); + + vio->dr_state &= ~(VIO_DR_STATE_TXREG | + VIO_DR_STATE_RXREG); + + dr = &vio->drings[VIO_DRIVER_RX_RING]; + memset(dr, 0, sizeof(*dr)); + + kfree(vio->desc_buf); + vio->desc_buf = NULL; + vio->desc_buf_len = 0; + + vio->hs_state = VIO_HS_INVALID; + + return -ECONNRESET; +} + +static int process_unknown(struct vio_driver_state *vio, void *arg) +{ + struct vio_msg_tag *pkt = arg; + + viodbg(HS, "UNKNOWN CONTROL [%02x:%02x:%04x:%08x]\n", + pkt->type, pkt->stype, pkt->stype_env, pkt->sid); + + printk(KERN_ERR "vio: ID[%lu] Resetting connection.\n", + vio->vdev->channel_id); + + ldc_disconnect(vio->lp); + + return -ECONNRESET; +} + +static int send_dreg(struct vio_driver_state *vio) +{ + struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_TX_RING]; + union { + struct vio_dring_register pkt; + char all[sizeof(struct vio_dring_register) + + (sizeof(struct ldc_trans_cookie) * + dr->ncookies)]; + } u; + int i; + + memset(&u, 0, sizeof(u)); + init_tag(&u.pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_DRING_REG); + u.pkt.dring_ident = 0; + u.pkt.num_descr = dr->num_entries; + u.pkt.descr_size = dr->entry_size; + u.pkt.options = VIO_TX_DRING; + u.pkt.num_cookies = dr->ncookies; + + viodbg(HS, "SEND DRING_REG INFO ndesc[%u] dsz[%u] opt[0x%x] " + "ncookies[%u]\n", + u.pkt.num_descr, u.pkt.descr_size, u.pkt.options, + u.pkt.num_cookies); + + for (i = 0; i < dr->ncookies; i++) { + u.pkt.cookies[i] = dr->cookies[i]; + + viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n", + i, + (unsigned long long) u.pkt.cookies[i].cookie_addr, + (unsigned long long) u.pkt.cookies[i].cookie_size); + } + + return send_ctrl(vio, &u.pkt.tag, sizeof(u)); +} + +static int send_rdx(struct vio_driver_state *vio) +{ + struct vio_rdx pkt; + + memset(&pkt, 0, sizeof(pkt)); + + init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX); + + viodbg(HS, "SEND RDX INFO\n"); + + return send_ctrl(vio, &pkt.tag, sizeof(pkt)); +} + +static int send_attr(struct vio_driver_state *vio) +{ + return vio->ops->send_attr(vio); +} + +static struct vio_version *find_by_major(struct vio_driver_state *vio, + u16 major) +{ + struct vio_version *ret = NULL; + int i; + + for (i = 0; i < vio->ver_table_entries; i++) { + struct vio_version *v = &vio->ver_table[i]; + if (v->major <= major) { + ret = v; + break; + } + } + return ret; +} + +static int process_ver_info(struct vio_driver_state *vio, + struct vio_ver_info *pkt) +{ + struct vio_version *vap; + int err; + + viodbg(HS, "GOT VERSION INFO maj[%u] min[%u] devclass[%u]\n", + pkt->major, pkt->minor, pkt->dev_class); + + if (vio->hs_state != VIO_HS_INVALID) { + /* XXX Perhaps invoke start_handshake? XXX */ + memset(&vio->ver, 0, sizeof(vio->ver)); + vio->hs_state = VIO_HS_INVALID; + } + + vap = find_by_major(vio, pkt->major); + + vio->_peer_sid = pkt->tag.sid; + + if (!vap) { + pkt->tag.stype = VIO_SUBTYPE_NACK; + pkt->major = 0; + pkt->minor = 0; + viodbg(HS, "SEND VERSION NACK maj[0] min[0]\n"); + err = send_ctrl(vio, &pkt->tag, sizeof(*pkt)); + } else if (vap->major != pkt->major) { + pkt->tag.stype = VIO_SUBTYPE_NACK; + pkt->major = vap->major; + pkt->minor = vap->minor; + viodbg(HS, "SEND VERSION NACK maj[%u] min[%u]\n", + pkt->major, pkt->minor); + err = send_ctrl(vio, &pkt->tag, sizeof(*pkt)); + } else { + struct vio_version ver = { + .major = pkt->major, + .minor = pkt->minor, + }; + if (ver.minor > vap->minor) + ver.minor = vap->minor; + pkt->minor = ver.minor; + pkt->tag.stype = VIO_SUBTYPE_ACK; + viodbg(HS, "SEND VERSION ACK maj[%u] min[%u]\n", + pkt->major, pkt->minor); + err = send_ctrl(vio, &pkt->tag, sizeof(*pkt)); + if (err > 0) { + vio->ver = ver; + vio->hs_state = VIO_HS_GOTVERS; + } + } + if (err < 0) + return handshake_failure(vio); + + return 0; +} + +static int process_ver_ack(struct vio_driver_state *vio, + struct vio_ver_info *pkt) +{ + viodbg(HS, "GOT VERSION ACK maj[%u] min[%u] devclass[%u]\n", + pkt->major, pkt->minor, pkt->dev_class); + + if (vio->hs_state & VIO_HS_GOTVERS) { + if (vio->ver.major != pkt->major || + vio->ver.minor != pkt->minor) { + pkt->tag.stype = VIO_SUBTYPE_NACK; + (void) send_ctrl(vio, &pkt->tag, sizeof(*pkt)); + return handshake_failure(vio); + } + } else { + vio->ver.major = pkt->major; + vio->ver.minor = pkt->minor; + vio->hs_state = VIO_HS_GOTVERS; + } + + switch (vio->dev_class) { + case VDEV_NETWORK: + case VDEV_DISK: + if (send_attr(vio) < 0) + return handshake_failure(vio); + break; + + default: + break; + } + + return 0; +} + +static int process_ver_nack(struct vio_driver_state *vio, + struct vio_ver_info *pkt) +{ + struct vio_version *nver; + + viodbg(HS, "GOT VERSION NACK maj[%u] min[%u] devclass[%u]\n", + pkt->major, pkt->minor, pkt->dev_class); + + if ((pkt->major == 0 && pkt->minor == 0) || + !(nver = find_by_major(vio, pkt->major))) + return handshake_failure(vio); + + if (send_version(vio, nver->major, nver->minor) < 0) + return handshake_failure(vio); + + return 0; +} + +static int process_ver(struct vio_driver_state *vio, struct vio_ver_info *pkt) +{ + switch (pkt->tag.stype) { + case VIO_SUBTYPE_INFO: + return process_ver_info(vio, pkt); + + case VIO_SUBTYPE_ACK: + return process_ver_ack(vio, pkt); + + case VIO_SUBTYPE_NACK: + return process_ver_nack(vio, pkt); + + default: + return handshake_failure(vio); + }; +} + +static int process_attr(struct vio_driver_state *vio, void *pkt) +{ + int err; + + if (!(vio->hs_state & VIO_HS_GOTVERS)) + return handshake_failure(vio); + + err = vio->ops->handle_attr(vio, pkt); + if (err < 0) { + return handshake_failure(vio); + } else { + vio->hs_state |= VIO_HS_GOT_ATTR; + + if ((vio->dr_state & VIO_DR_STATE_TXREQ) && + !(vio->hs_state & VIO_HS_SENT_DREG)) { + if (send_dreg(vio) < 0) + return handshake_failure(vio); + + vio->hs_state |= VIO_HS_SENT_DREG; + } + } + return 0; +} + +static int all_drings_registered(struct vio_driver_state *vio) +{ + int need_rx, need_tx; + + need_rx = (vio->dr_state & VIO_DR_STATE_RXREQ); + need_tx = (vio->dr_state & VIO_DR_STATE_TXREQ); + + if (need_rx && + !(vio->dr_state & VIO_DR_STATE_RXREG)) + return 0; + + if (need_tx && + !(vio->dr_state & VIO_DR_STATE_TXREG)) + return 0; + + return 1; +} + +static int process_dreg_info(struct vio_driver_state *vio, + struct vio_dring_register *pkt) +{ + struct vio_dring_state *dr; + int i, len; + + viodbg(HS, "GOT DRING_REG INFO ident[%llx] " + "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n", + (unsigned long long) pkt->dring_ident, + pkt->num_descr, pkt->descr_size, pkt->options, + pkt->num_cookies); + + if (!(vio->dr_state & VIO_DR_STATE_RXREQ)) + goto send_nack; + + if (vio->dr_state & VIO_DR_STATE_RXREG) + goto send_nack; + + BUG_ON(vio->desc_buf); + + vio->desc_buf = kzalloc(pkt->descr_size, GFP_ATOMIC); + if (!vio->desc_buf) + goto send_nack; + + vio->desc_buf_len = pkt->descr_size; + + dr = &vio->drings[VIO_DRIVER_RX_RING]; + + dr->num_entries = pkt->num_descr; + dr->entry_size = pkt->descr_size; + dr->ncookies = pkt->num_cookies; + for (i = 0; i < dr->ncookies; i++) { + dr->cookies[i] = pkt->cookies[i]; + + viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n", + i, + (unsigned long long) + pkt->cookies[i].cookie_addr, + (unsigned long long) + pkt->cookies[i].cookie_size); + } + + pkt->tag.stype = VIO_SUBTYPE_ACK; + pkt->dring_ident = ++dr->ident; + + viodbg(HS, "SEND DRING_REG ACK ident[%llx]\n", + (unsigned long long) pkt->dring_ident); + + len = (sizeof(*pkt) + + (dr->ncookies * sizeof(struct ldc_trans_cookie))); + if (send_ctrl(vio, &pkt->tag, len) < 0) + goto send_nack; + + vio->dr_state |= VIO_DR_STATE_RXREG; + + return 0; + +send_nack: + pkt->tag.stype = VIO_SUBTYPE_NACK; + viodbg(HS, "SEND DRING_REG NACK\n"); + (void) send_ctrl(vio, &pkt->tag, sizeof(*pkt)); + + return handshake_failure(vio); +} + +static int process_dreg_ack(struct vio_driver_state *vio, + struct vio_dring_register *pkt) +{ + struct vio_dring_state *dr; + + viodbg(HS, "GOT DRING_REG ACK ident[%llx] " + "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n", + (unsigned long long) pkt->dring_ident, + pkt->num_descr, pkt->descr_size, pkt->options, + pkt->num_cookies); + + dr = &vio->drings[VIO_DRIVER_TX_RING]; + + if (!(vio->dr_state & VIO_DR_STATE_TXREQ)) + return handshake_failure(vio); + + dr->ident = pkt->dring_ident; + vio->dr_state |= VIO_DR_STATE_TXREG; + + if (all_drings_registered(vio)) { + if (send_rdx(vio) < 0) + return handshake_failure(vio); + vio->hs_state = VIO_HS_SENT_RDX; + } + return 0; +} + +static int process_dreg_nack(struct vio_driver_state *vio, + struct vio_dring_register *pkt) +{ + viodbg(HS, "GOT DRING_REG NACK ident[%llx] " + "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n", + (unsigned long long) pkt->dring_ident, + pkt->num_descr, pkt->descr_size, pkt->options, + pkt->num_cookies); + + return handshake_failure(vio); +} + +static int process_dreg(struct vio_driver_state *vio, + struct vio_dring_register *pkt) +{ + if (!(vio->hs_state & VIO_HS_GOTVERS)) + return handshake_failure(vio); + + switch (pkt->tag.stype) { + case VIO_SUBTYPE_INFO: + return process_dreg_info(vio, pkt); + + case VIO_SUBTYPE_ACK: + return process_dreg_ack(vio, pkt); + + case VIO_SUBTYPE_NACK: + return process_dreg_nack(vio, pkt); + + default: + return handshake_failure(vio); + } +} + +static int process_dunreg(struct vio_driver_state *vio, + struct vio_dring_unregister *pkt) +{ + struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_RX_RING]; + + viodbg(HS, "GOT DRING_UNREG\n"); + + if (pkt->dring_ident != dr->ident) + return 0; + + vio->dr_state &= ~VIO_DR_STATE_RXREG; + + memset(dr, 0, sizeof(*dr)); + + kfree(vio->desc_buf); + vio->desc_buf = NULL; + vio->desc_buf_len = 0; + + return 0; +} + +static int process_rdx_info(struct vio_driver_state *vio, struct vio_rdx *pkt) +{ + viodbg(HS, "GOT RDX INFO\n"); + + pkt->tag.stype = VIO_SUBTYPE_ACK; + viodbg(HS, "SEND RDX ACK\n"); + if (send_ctrl(vio, &pkt->tag, sizeof(*pkt)) < 0) + return handshake_failure(vio); + + vio->hs_state |= VIO_HS_SENT_RDX_ACK; + return 0; +} + +static int process_rdx_ack(struct vio_driver_state *vio, struct vio_rdx *pkt) +{ + viodbg(HS, "GOT RDX ACK\n"); + + if (!(vio->hs_state & VIO_HS_SENT_RDX)) + return handshake_failure(vio); + + vio->hs_state |= VIO_HS_GOT_RDX_ACK; + return 0; +} + +static int process_rdx_nack(struct vio_driver_state *vio, struct vio_rdx *pkt) +{ + viodbg(HS, "GOT RDX NACK\n"); + + return handshake_failure(vio); +} + +static int process_rdx(struct vio_driver_state *vio, struct vio_rdx *pkt) +{ + if (!all_drings_registered(vio)) + handshake_failure(vio); + + switch (pkt->tag.stype) { + case VIO_SUBTYPE_INFO: + return process_rdx_info(vio, pkt); + + case VIO_SUBTYPE_ACK: + return process_rdx_ack(vio, pkt); + + case VIO_SUBTYPE_NACK: + return process_rdx_nack(vio, pkt); + + default: + return handshake_failure(vio); + } +} + +int vio_control_pkt_engine(struct vio_driver_state *vio, void *pkt) +{ + struct vio_msg_tag *tag = pkt; + u8 prev_state = vio->hs_state; + int err; + + switch (tag->stype_env) { + case VIO_VER_INFO: + err = process_ver(vio, pkt); + break; + + case VIO_ATTR_INFO: + err = process_attr(vio, pkt); + break; + + case VIO_DRING_REG: + err = process_dreg(vio, pkt); + break; + + case VIO_DRING_UNREG: + err = process_dunreg(vio, pkt); + break; + + case VIO_RDX: + err = process_rdx(vio, pkt); + break; + + default: + err = process_unknown(vio, pkt); + break; + } + if (!err && + vio->hs_state != prev_state && + (vio->hs_state & VIO_HS_COMPLETE)) + vio->ops->handshake_complete(vio); + + return err; +} +EXPORT_SYMBOL(vio_control_pkt_engine); + +void vio_conn_reset(struct vio_driver_state *vio) +{ +} +EXPORT_SYMBOL(vio_conn_reset); + +/* The issue is that the Solaris virtual disk server just mirrors the + * SID values it gets from the client peer. So we work around that + * here in vio_{validate,send}_sid() so that the drivers don't need + * to be aware of this crap. + */ +int vio_validate_sid(struct vio_driver_state *vio, struct vio_msg_tag *tp) +{ + u32 sid; + + /* Always let VERSION+INFO packets through unchecked, they + * define the new SID. + */ + if (tp->type == VIO_TYPE_CTRL && + tp->stype == VIO_SUBTYPE_INFO && + tp->stype_env == VIO_VER_INFO) + return 0; + + /* Ok, now figure out which SID to use. */ + switch (vio->dev_class) { + case VDEV_NETWORK: + case VDEV_NETWORK_SWITCH: + case VDEV_DISK_SERVER: + default: + sid = vio->_peer_sid; + break; + + case VDEV_DISK: + sid = vio->_local_sid; + break; + } + + if (sid == tp->sid) + return 0; + viodbg(DATA, "BAD SID tag->sid[%08x] peer_sid[%08x] local_sid[%08x]\n", + tp->sid, vio->_peer_sid, vio->_local_sid); + return -EINVAL; +} +EXPORT_SYMBOL(vio_validate_sid); + +u32 vio_send_sid(struct vio_driver_state *vio) +{ + switch (vio->dev_class) { + case VDEV_NETWORK: + case VDEV_NETWORK_SWITCH: + case VDEV_DISK: + default: + return vio->_local_sid; + + case VDEV_DISK_SERVER: + return vio->_peer_sid; + } +} +EXPORT_SYMBOL(vio_send_sid); + +int vio_ldc_alloc(struct vio_driver_state *vio, + struct ldc_channel_config *base_cfg, + void *event_arg) +{ + struct ldc_channel_config cfg = *base_cfg; + struct ldc_channel *lp; + + cfg.tx_irq = vio->vdev->tx_irq; + cfg.rx_irq = vio->vdev->rx_irq; + + lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg); + if (IS_ERR(lp)) + return PTR_ERR(lp); + + vio->lp = lp; + + return 0; +} +EXPORT_SYMBOL(vio_ldc_alloc); + +void vio_ldc_free(struct vio_driver_state *vio) +{ + ldc_free(vio->lp); + vio->lp = NULL; + + kfree(vio->desc_buf); + vio->desc_buf = NULL; + vio->desc_buf_len = 0; +} +EXPORT_SYMBOL(vio_ldc_free); + +void vio_port_up(struct vio_driver_state *vio) +{ + unsigned long flags; + int err, state; + + spin_lock_irqsave(&vio->lock, flags); + + state = ldc_state(vio->lp); + + err = 0; + if (state == LDC_STATE_INIT) { + err = ldc_bind(vio->lp, vio->name); + if (err) + printk(KERN_WARNING "%s: Port %lu bind failed, " + "err=%d\n", + vio->name, vio->vdev->channel_id, err); + } + + if (!err) { + err = ldc_connect(vio->lp); + if (err) + printk(KERN_WARNING "%s: Port %lu connect failed, " + "err=%d\n", + vio->name, vio->vdev->channel_id, err); + } + if (err) { + unsigned long expires = jiffies + HZ; + + expires = round_jiffies(expires); + mod_timer(&vio->timer, expires); + } + + spin_unlock_irqrestore(&vio->lock, flags); +} +EXPORT_SYMBOL(vio_port_up); + +static void vio_port_timer(unsigned long _arg) +{ + struct vio_driver_state *vio = (struct vio_driver_state *) _arg; + + vio_port_up(vio); +} + +int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev, + u8 dev_class, struct vio_version *ver_table, + int ver_table_size, struct vio_driver_ops *ops, + char *name) +{ + switch (dev_class) { + case VDEV_NETWORK: + case VDEV_NETWORK_SWITCH: + case VDEV_DISK: + case VDEV_DISK_SERVER: + break; + + default: + return -EINVAL; + } + + if (!ops->send_attr || + !ops->handle_attr || + !ops->handshake_complete) + return -EINVAL; + + if (!ver_table || ver_table_size < 0) + return -EINVAL; + + if (!name) + return -EINVAL; + + spin_lock_init(&vio->lock); + + vio->name = name; + + vio->dev_class = dev_class; + vio->vdev = vdev; + + vio->ver_table = ver_table; + vio->ver_table_entries = ver_table_size; + + vio->ops = ops; + + setup_timer(&vio->timer, vio_port_timer, (unsigned long) vio); + + return 0; +} +EXPORT_SYMBOL(vio_driver_init); diff --git a/arch/sparc/kernel/visemul.c b/arch/sparc/kernel/visemul.c new file mode 100644 index 000000000000..b956fd71c131 --- /dev/null +++ b/arch/sparc/kernel/visemul.c @@ -0,0 +1,890 @@ +/* visemul.c: Emulation of VIS instructions. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/thread_info.h> + +#include <asm/ptrace.h> +#include <asm/pstate.h> +#include <asm/system.h> +#include <asm/fpumacro.h> +#include <asm/uaccess.h> + +/* OPF field of various VIS instructions. */ + +/* 000111011 - four 16-bit packs */ +#define FPACK16_OPF 0x03b + +/* 000111010 - two 32-bit packs */ +#define FPACK32_OPF 0x03a + +/* 000111101 - four 16-bit packs */ +#define FPACKFIX_OPF 0x03d + +/* 001001101 - four 16-bit expands */ +#define FEXPAND_OPF 0x04d + +/* 001001011 - two 32-bit merges */ +#define FPMERGE_OPF 0x04b + +/* 000110001 - 8-by-16-bit partitoned product */ +#define FMUL8x16_OPF 0x031 + +/* 000110011 - 8-by-16-bit upper alpha partitioned product */ +#define FMUL8x16AU_OPF 0x033 + +/* 000110101 - 8-by-16-bit lower alpha partitioned product */ +#define FMUL8x16AL_OPF 0x035 + +/* 000110110 - upper 8-by-16-bit partitioned product */ +#define FMUL8SUx16_OPF 0x036 + +/* 000110111 - lower 8-by-16-bit partitioned product */ +#define FMUL8ULx16_OPF 0x037 + +/* 000111000 - upper 8-by-16-bit partitioned product */ +#define FMULD8SUx16_OPF 0x038 + +/* 000111001 - lower unsigned 8-by-16-bit partitioned product */ +#define FMULD8ULx16_OPF 0x039 + +/* 000101000 - four 16-bit compare; set rd if src1 > src2 */ +#define FCMPGT16_OPF 0x028 + +/* 000101100 - two 32-bit compare; set rd if src1 > src2 */ +#define FCMPGT32_OPF 0x02c + +/* 000100000 - four 16-bit compare; set rd if src1 <= src2 */ +#define FCMPLE16_OPF 0x020 + +/* 000100100 - two 32-bit compare; set rd if src1 <= src2 */ +#define FCMPLE32_OPF 0x024 + +/* 000100010 - four 16-bit compare; set rd if src1 != src2 */ +#define FCMPNE16_OPF 0x022 + +/* 000100110 - two 32-bit compare; set rd if src1 != src2 */ +#define FCMPNE32_OPF 0x026 + +/* 000101010 - four 16-bit compare; set rd if src1 == src2 */ +#define FCMPEQ16_OPF 0x02a + +/* 000101110 - two 32-bit compare; set rd if src1 == src2 */ +#define FCMPEQ32_OPF 0x02e + +/* 000000000 - Eight 8-bit edge boundary processing */ +#define EDGE8_OPF 0x000 + +/* 000000001 - Eight 8-bit edge boundary processing, no CC */ +#define EDGE8N_OPF 0x001 + +/* 000000010 - Eight 8-bit edge boundary processing, little-endian */ +#define EDGE8L_OPF 0x002 + +/* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */ +#define EDGE8LN_OPF 0x003 + +/* 000000100 - Four 16-bit edge boundary processing */ +#define EDGE16_OPF 0x004 + +/* 000000101 - Four 16-bit edge boundary processing, no CC */ +#define EDGE16N_OPF 0x005 + +/* 000000110 - Four 16-bit edge boundary processing, little-endian */ +#define EDGE16L_OPF 0x006 + +/* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */ +#define EDGE16LN_OPF 0x007 + +/* 000001000 - Two 32-bit edge boundary processing */ +#define EDGE32_OPF 0x008 + +/* 000001001 - Two 32-bit edge boundary processing, no CC */ +#define EDGE32N_OPF 0x009 + +/* 000001010 - Two 32-bit edge boundary processing, little-endian */ +#define EDGE32L_OPF 0x00a + +/* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */ +#define EDGE32LN_OPF 0x00b + +/* 000111110 - distance between 8 8-bit components */ +#define PDIST_OPF 0x03e + +/* 000010000 - convert 8-bit 3-D address to blocked byte address */ +#define ARRAY8_OPF 0x010 + +/* 000010010 - convert 16-bit 3-D address to blocked byte address */ +#define ARRAY16_OPF 0x012 + +/* 000010100 - convert 32-bit 3-D address to blocked byte address */ +#define ARRAY32_OPF 0x014 + +/* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */ +#define BMASK_OPF 0x019 + +/* 001001100 - Permute bytes as specified by GSR.MASK */ +#define BSHUFFLE_OPF 0x04c + +#define VIS_OPF_SHIFT 5 +#define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT) + +#define RS1(INSN) (((INSN) >> 14) & 0x1f) +#define RS2(INSN) (((INSN) >> 0) & 0x1f) +#define RD(INSN) (((INSN) >> 25) & 0x1f) + +static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, + unsigned int rd, int from_kernel) +{ + if (rs2 >= 16 || rs1 >= 16 || rd >= 16) { + if (from_kernel != 0) + __asm__ __volatile__("flushw"); + else + flushw_user(); + } +} + +static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) +{ + unsigned long value; + + if (reg < 16) + return (!reg ? 0 : regs->u_regs[reg]); + if (regs->tstate & TSTATE_PRIV) { + struct reg_window *win; + win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); + value = win->locals[reg - 16]; + } else if (test_thread_flag(TIF_32BIT)) { + struct reg_window32 __user *win32; + win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); + get_user(value, &win32->locals[reg - 16]); + } else { + struct reg_window __user *win; + win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); + get_user(value, &win->locals[reg - 16]); + } + return value; +} + +static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg, + struct pt_regs *regs) +{ + BUG_ON(reg < 16); + BUG_ON(regs->tstate & TSTATE_PRIV); + + if (test_thread_flag(TIF_32BIT)) { + struct reg_window32 __user *win32; + win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); + return (unsigned long __user *)&win32->locals[reg - 16]; + } else { + struct reg_window __user *win; + win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); + return &win->locals[reg - 16]; + } +} + +static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg, + struct pt_regs *regs) +{ + BUG_ON(reg >= 16); + BUG_ON(regs->tstate & TSTATE_PRIV); + + return ®s->u_regs[reg]; +} + +static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd) +{ + if (rd < 16) { + unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs); + + *rd_kern = val; + } else { + unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs); + + if (test_thread_flag(TIF_32BIT)) + __put_user((u32)val, (u32 __user *)rd_user); + else + __put_user(val, rd_user); + } +} + +static inline unsigned long fpd_regval(struct fpustate *f, + unsigned int insn_regnum) +{ + insn_regnum = (((insn_regnum & 1) << 5) | + (insn_regnum & 0x1e)); + + return *(unsigned long *) &f->regs[insn_regnum]; +} + +static inline unsigned long *fpd_regaddr(struct fpustate *f, + unsigned int insn_regnum) +{ + insn_regnum = (((insn_regnum & 1) << 5) | + (insn_regnum & 0x1e)); + + return (unsigned long *) &f->regs[insn_regnum]; +} + +static inline unsigned int fps_regval(struct fpustate *f, + unsigned int insn_regnum) +{ + return f->regs[insn_regnum]; +} + +static inline unsigned int *fps_regaddr(struct fpustate *f, + unsigned int insn_regnum) +{ + return &f->regs[insn_regnum]; +} + +struct edge_tab { + u16 left, right; +}; +static struct edge_tab edge8_tab[8] = { + { 0xff, 0x80 }, + { 0x7f, 0xc0 }, + { 0x3f, 0xe0 }, + { 0x1f, 0xf0 }, + { 0x0f, 0xf8 }, + { 0x07, 0xfc }, + { 0x03, 0xfe }, + { 0x01, 0xff }, +}; +static struct edge_tab edge8_tab_l[8] = { + { 0xff, 0x01 }, + { 0xfe, 0x03 }, + { 0xfc, 0x07 }, + { 0xf8, 0x0f }, + { 0xf0, 0x1f }, + { 0xe0, 0x3f }, + { 0xc0, 0x7f }, + { 0x80, 0xff }, +}; +static struct edge_tab edge16_tab[4] = { + { 0xf, 0x8 }, + { 0x7, 0xc }, + { 0x3, 0xe }, + { 0x1, 0xf }, +}; +static struct edge_tab edge16_tab_l[4] = { + { 0xf, 0x1 }, + { 0xe, 0x3 }, + { 0xc, 0x7 }, + { 0x8, 0xf }, +}; +static struct edge_tab edge32_tab[2] = { + { 0x3, 0x2 }, + { 0x1, 0x3 }, +}; +static struct edge_tab edge32_tab_l[2] = { + { 0x3, 0x1 }, + { 0x2, 0x3 }, +}; + +static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf) +{ + unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val; + u16 left, right; + + maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); + orig_rs1 = rs1 = fetch_reg(RS1(insn), regs); + orig_rs2 = rs2 = fetch_reg(RS2(insn), regs); + + if (test_thread_flag(TIF_32BIT)) { + rs1 = rs1 & 0xffffffff; + rs2 = rs2 & 0xffffffff; + } + switch (opf) { + default: + case EDGE8_OPF: + case EDGE8N_OPF: + left = edge8_tab[rs1 & 0x7].left; + right = edge8_tab[rs2 & 0x7].right; + break; + case EDGE8L_OPF: + case EDGE8LN_OPF: + left = edge8_tab_l[rs1 & 0x7].left; + right = edge8_tab_l[rs2 & 0x7].right; + break; + + case EDGE16_OPF: + case EDGE16N_OPF: + left = edge16_tab[(rs1 >> 1) & 0x3].left; + right = edge16_tab[(rs2 >> 1) & 0x3].right; + break; + + case EDGE16L_OPF: + case EDGE16LN_OPF: + left = edge16_tab_l[(rs1 >> 1) & 0x3].left; + right = edge16_tab_l[(rs2 >> 1) & 0x3].right; + break; + + case EDGE32_OPF: + case EDGE32N_OPF: + left = edge32_tab[(rs1 >> 2) & 0x1].left; + right = edge32_tab[(rs2 >> 2) & 0x1].right; + break; + + case EDGE32L_OPF: + case EDGE32LN_OPF: + left = edge32_tab_l[(rs1 >> 2) & 0x1].left; + right = edge32_tab_l[(rs2 >> 2) & 0x1].right; + break; + }; + + if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL)) + rd_val = right & left; + else + rd_val = left; + + store_reg(regs, rd_val, RD(insn)); + + switch (opf) { + case EDGE8_OPF: + case EDGE8L_OPF: + case EDGE16_OPF: + case EDGE16L_OPF: + case EDGE32_OPF: + case EDGE32L_OPF: { + unsigned long ccr, tstate; + + __asm__ __volatile__("subcc %1, %2, %%g0\n\t" + "rd %%ccr, %0" + : "=r" (ccr) + : "r" (orig_rs1), "r" (orig_rs2) + : "cc"); + tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC); + regs->tstate = tstate | (ccr << 32UL); + } + }; +} + +static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf) +{ + unsigned long rs1, rs2, rd_val; + unsigned int bits, bits_mask; + + maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); + rs1 = fetch_reg(RS1(insn), regs); + rs2 = fetch_reg(RS2(insn), regs); + + bits = (rs2 > 5 ? 5 : rs2); + bits_mask = (1UL << bits) - 1UL; + + rd_val = ((((rs1 >> 11) & 0x3) << 0) | + (((rs1 >> 33) & 0x3) << 2) | + (((rs1 >> 55) & 0x1) << 4) | + (((rs1 >> 13) & 0xf) << 5) | + (((rs1 >> 35) & 0xf) << 9) | + (((rs1 >> 56) & 0xf) << 13) | + (((rs1 >> 17) & bits_mask) << 17) | + (((rs1 >> 39) & bits_mask) << (17 + bits)) | + (((rs1 >> 60) & 0xf) << (17 + (2*bits)))); + + switch (opf) { + case ARRAY16_OPF: + rd_val <<= 1; + break; + + case ARRAY32_OPF: + rd_val <<= 2; + }; + + store_reg(regs, rd_val, RD(insn)); +} + +static void bmask(struct pt_regs *regs, unsigned int insn) +{ + unsigned long rs1, rs2, rd_val, gsr; + + maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); + rs1 = fetch_reg(RS1(insn), regs); + rs2 = fetch_reg(RS2(insn), regs); + rd_val = rs1 + rs2; + + store_reg(regs, rd_val, RD(insn)); + + gsr = current_thread_info()->gsr[0] & 0xffffffff; + gsr |= rd_val << 32UL; + current_thread_info()->gsr[0] = gsr; +} + +static void bshuffle(struct pt_regs *regs, unsigned int insn) +{ + struct fpustate *f = FPUSTATE; + unsigned long rs1, rs2, rd_val; + unsigned long bmask, i; + + bmask = current_thread_info()->gsr[0] >> 32UL; + + rs1 = fpd_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + + rd_val = 0UL; + for (i = 0; i < 8; i++) { + unsigned long which = (bmask >> (i * 4)) & 0xf; + unsigned long byte; + + if (which < 8) + byte = (rs1 >> (which * 8)) & 0xff; + else + byte = (rs2 >> ((which-8)*8)) & 0xff; + rd_val |= (byte << (i * 8)); + } + + *fpd_regaddr(f, RD(insn)) = rd_val; +} + +static void pdist(struct pt_regs *regs, unsigned int insn) +{ + struct fpustate *f = FPUSTATE; + unsigned long rs1, rs2, *rd, rd_val; + unsigned long i; + + rs1 = fpd_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + rd = fpd_regaddr(f, RD(insn)); + + rd_val = *rd; + + for (i = 0; i < 8; i++) { + s16 s1, s2; + + s1 = (rs1 >> (56 - (i * 8))) & 0xff; + s2 = (rs2 >> (56 - (i * 8))) & 0xff; + + /* Absolute value of difference. */ + s1 -= s2; + if (s1 < 0) + s1 = ~s1 + 1; + + rd_val += s1; + } + + *rd = rd_val; +} + +static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf) +{ + struct fpustate *f = FPUSTATE; + unsigned long rs1, rs2, gsr, scale, rd_val; + + gsr = current_thread_info()->gsr[0]; + scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f); + switch (opf) { + case FPACK16_OPF: { + unsigned long byte; + + rs2 = fpd_regval(f, RS2(insn)); + rd_val = 0; + for (byte = 0; byte < 4; byte++) { + unsigned int val; + s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL; + int scaled = src << scale; + int from_fixed = scaled >> 7; + + val = ((from_fixed < 0) ? + 0 : + (from_fixed > 255) ? + 255 : from_fixed); + + rd_val |= (val << (8 * byte)); + } + *fps_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FPACK32_OPF: { + unsigned long word; + + rs1 = fpd_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL); + for (word = 0; word < 2; word++) { + unsigned long val; + s32 src = (rs2 >> (word * 32UL)); + s64 scaled = src << scale; + s64 from_fixed = scaled >> 23; + + val = ((from_fixed < 0) ? + 0 : + (from_fixed > 255) ? + 255 : from_fixed); + + rd_val |= (val << (32 * word)); + } + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FPACKFIX_OPF: { + unsigned long word; + + rs2 = fpd_regval(f, RS2(insn)); + + rd_val = 0; + for (word = 0; word < 2; word++) { + long val; + s32 src = (rs2 >> (word * 32UL)); + s64 scaled = src << scale; + s64 from_fixed = scaled >> 16; + + val = ((from_fixed < -32768) ? + -32768 : + (from_fixed > 32767) ? + 32767 : from_fixed); + + rd_val |= ((val & 0xffff) << (word * 16)); + } + *fps_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FEXPAND_OPF: { + unsigned long byte; + + rs2 = fps_regval(f, RS2(insn)); + + rd_val = 0; + for (byte = 0; byte < 4; byte++) { + unsigned long val; + u8 src = (rs2 >> (byte * 8)) & 0xff; + + val = src << 4; + + rd_val |= (val << (byte * 16)); + } + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FPMERGE_OPF: { + rs1 = fps_regval(f, RS1(insn)); + rs2 = fps_regval(f, RS2(insn)); + + rd_val = (((rs2 & 0x000000ff) << 0) | + ((rs1 & 0x000000ff) << 8) | + ((rs2 & 0x0000ff00) << 8) | + ((rs1 & 0x0000ff00) << 16) | + ((rs2 & 0x00ff0000) << 16) | + ((rs1 & 0x00ff0000) << 24) | + ((rs2 & 0xff000000) << 24) | + ((rs1 & 0xff000000) << 32)); + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + }; +} + +static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf) +{ + struct fpustate *f = FPUSTATE; + unsigned long rs1, rs2, rd_val; + + switch (opf) { + case FMUL8x16_OPF: { + unsigned long byte; + + rs1 = fps_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + + rd_val = 0; + for (byte = 0; byte < 4; byte++) { + u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; + s16 src2 = (rs2 >> (byte * 16)) & 0xffff; + u32 prod = src1 * src2; + u16 scaled = ((prod & 0x00ffff00) >> 8); + + /* Round up. */ + if (prod & 0x80) + scaled++; + rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); + } + + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FMUL8x16AU_OPF: + case FMUL8x16AL_OPF: { + unsigned long byte; + s16 src2; + + rs1 = fps_regval(f, RS1(insn)); + rs2 = fps_regval(f, RS2(insn)); + + rd_val = 0; + src2 = (rs2 >> (opf == FMUL8x16AU_OPF) ? 16 : 0); + for (byte = 0; byte < 4; byte++) { + u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; + u32 prod = src1 * src2; + u16 scaled = ((prod & 0x00ffff00) >> 8); + + /* Round up. */ + if (prod & 0x80) + scaled++; + rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); + } + + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FMUL8SUx16_OPF: + case FMUL8ULx16_OPF: { + unsigned long byte, ushift; + + rs1 = fpd_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + + rd_val = 0; + ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0; + for (byte = 0; byte < 4; byte++) { + u16 src1; + s16 src2; + u32 prod; + u16 scaled; + + src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); + src2 = ((rs2 >> (16 * byte)) & 0xffff); + prod = src1 * src2; + scaled = ((prod & 0x00ffff00) >> 8); + + /* Round up. */ + if (prod & 0x80) + scaled++; + rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); + } + + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + + case FMULD8SUx16_OPF: + case FMULD8ULx16_OPF: { + unsigned long byte, ushift; + + rs1 = fps_regval(f, RS1(insn)); + rs2 = fps_regval(f, RS2(insn)); + + rd_val = 0; + ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0; + for (byte = 0; byte < 2; byte++) { + u16 src1; + s16 src2; + u32 prod; + u16 scaled; + + src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); + src2 = ((rs2 >> (16 * byte)) & 0xffff); + prod = src1 * src2; + scaled = ((prod & 0x00ffff00) >> 8); + + /* Round up. */ + if (prod & 0x80) + scaled++; + rd_val |= ((scaled & 0xffffUL) << + ((byte * 32UL) + 7UL)); + } + *fpd_regaddr(f, RD(insn)) = rd_val; + break; + } + }; +} + +static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf) +{ + struct fpustate *f = FPUSTATE; + unsigned long rs1, rs2, rd_val, i; + + rs1 = fpd_regval(f, RS1(insn)); + rs2 = fpd_regval(f, RS2(insn)); + + rd_val = 0; + + switch (opf) { + case FCMPGT16_OPF: + for (i = 0; i < 4; i++) { + s16 a = (rs1 >> (i * 16)) & 0xffff; + s16 b = (rs2 >> (i * 16)) & 0xffff; + + if (a > b) + rd_val |= 1 << i; + } + break; + + case FCMPGT32_OPF: + for (i = 0; i < 2; i++) { + s32 a = (rs1 >> (i * 32)) & 0xffff; + s32 b = (rs2 >> (i * 32)) & 0xffff; + + if (a > b) + rd_val |= 1 << i; + } + break; + + case FCMPLE16_OPF: + for (i = 0; i < 4; i++) { + s16 a = (rs1 >> (i * 16)) & 0xffff; + s16 b = (rs2 >> (i * 16)) & 0xffff; + + if (a <= b) + rd_val |= 1 << i; + } + break; + + case FCMPLE32_OPF: + for (i = 0; i < 2; i++) { + s32 a = (rs1 >> (i * 32)) & 0xffff; + s32 b = (rs2 >> (i * 32)) & 0xffff; + + if (a <= b) + rd_val |= 1 << i; + } + break; + + case FCMPNE16_OPF: + for (i = 0; i < 4; i++) { + s16 a = (rs1 >> (i * 16)) & 0xffff; + s16 b = (rs2 >> (i * 16)) & 0xffff; + + if (a != b) + rd_val |= 1 << i; + } + break; + + case FCMPNE32_OPF: + for (i = 0; i < 2; i++) { + s32 a = (rs1 >> (i * 32)) & 0xffff; + s32 b = (rs2 >> (i * 32)) & 0xffff; + + if (a != b) + rd_val |= 1 << i; + } + break; + + case FCMPEQ16_OPF: + for (i = 0; i < 4; i++) { + s16 a = (rs1 >> (i * 16)) & 0xffff; + s16 b = (rs2 >> (i * 16)) & 0xffff; + + if (a == b) + rd_val |= 1 << i; + } + break; + + case FCMPEQ32_OPF: + for (i = 0; i < 2; i++) { + s32 a = (rs1 >> (i * 32)) & 0xffff; + s32 b = (rs2 >> (i * 32)) & 0xffff; + + if (a == b) + rd_val |= 1 << i; + } + break; + }; + + maybe_flush_windows(0, 0, RD(insn), 0); + store_reg(regs, rd_val, RD(insn)); +} + +/* Emulate the VIS instructions which are not implemented in + * hardware on Niagara. + */ +int vis_emul(struct pt_regs *regs, unsigned int insn) +{ + unsigned long pc = regs->tpc; + unsigned int opf; + + BUG_ON(regs->tstate & TSTATE_PRIV); + + if (test_thread_flag(TIF_32BIT)) + pc = (u32)pc; + + if (get_user(insn, (u32 __user *) pc)) + return -EFAULT; + + save_and_clear_fpu(); + + opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT; + switch (opf) { + default: + return -EINVAL; + + /* Pixel Formatting Instructions. */ + case FPACK16_OPF: + case FPACK32_OPF: + case FPACKFIX_OPF: + case FEXPAND_OPF: + case FPMERGE_OPF: + pformat(regs, insn, opf); + break; + + /* Partitioned Multiply Instructions */ + case FMUL8x16_OPF: + case FMUL8x16AU_OPF: + case FMUL8x16AL_OPF: + case FMUL8SUx16_OPF: + case FMUL8ULx16_OPF: + case FMULD8SUx16_OPF: + case FMULD8ULx16_OPF: + pmul(regs, insn, opf); + break; + + /* Pixel Compare Instructions */ + case FCMPGT16_OPF: + case FCMPGT32_OPF: + case FCMPLE16_OPF: + case FCMPLE32_OPF: + case FCMPNE16_OPF: + case FCMPNE32_OPF: + case FCMPEQ16_OPF: + case FCMPEQ32_OPF: + pcmp(regs, insn, opf); + break; + + /* Edge Handling Instructions */ + case EDGE8_OPF: + case EDGE8N_OPF: + case EDGE8L_OPF: + case EDGE8LN_OPF: + case EDGE16_OPF: + case EDGE16N_OPF: + case EDGE16L_OPF: + case EDGE16LN_OPF: + case EDGE32_OPF: + case EDGE32N_OPF: + case EDGE32L_OPF: + case EDGE32LN_OPF: + edge(regs, insn, opf); + break; + + /* Pixel Component Distance */ + case PDIST_OPF: + pdist(regs, insn); + break; + + /* Three-Dimensional Array Addressing Instructions */ + case ARRAY8_OPF: + case ARRAY16_OPF: + case ARRAY32_OPF: + array(regs, insn, opf); + break; + + /* Byte Mask and Shuffle Instructions */ + case BMASK_OPF: + bmask(regs, insn); + break; + + case BSHUFFLE_OPF: + bshuffle(regs, insn); + break; + }; + + regs->tpc = regs->tnpc; + regs->tnpc += 4; + return 0; +} diff --git a/arch/sparc/kernel/winfixup.S b/arch/sparc/kernel/winfixup.S new file mode 100644 index 000000000000..a6b0863c27df --- /dev/null +++ b/arch/sparc/kernel/winfixup.S @@ -0,0 +1,156 @@ +/* winfixup.S: Handle cases where user stack pointer is found to be bogus. + * + * Copyright (C) 1997, 2006 David S. Miller (davem@davemloft.net) + */ + +#include <asm/asi.h> +#include <asm/head.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/processor.h> +#include <asm/spitfire.h> +#include <asm/thread_info.h> + + .text + + /* It used to be the case that these register window fault + * handlers could run via the save and restore instructions + * done by the trap entry and exit code. They now do the + * window spill/fill by hand, so that case no longer can occur. + */ + + .align 32 +fill_fixup: + TRAP_LOAD_THREAD_REG(%g6, %g1) + rdpr %tstate, %g1 + and %g1, TSTATE_CWP, %g1 + or %g4, FAULT_CODE_WINFIXUP, %g4 + stb %g4, [%g6 + TI_FAULT_CODE] + stx %g5, [%g6 + TI_FAULT_ADDR] + wrpr %g1, %cwp + ba,pt %xcc, etrap + rd %pc, %g7 + call do_sparc64_fault + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + + /* Be very careful about usage of the trap globals here. + * You cannot touch %g5 as that has the fault information. + */ +spill_fixup: +spill_fixup_mna: +spill_fixup_dax: + TRAP_LOAD_THREAD_REG(%g6, %g1) + ldx [%g6 + TI_FLAGS], %g1 + andcc %g1, _TIF_32BIT, %g0 + ldub [%g6 + TI_WSAVED], %g1 + sll %g1, 3, %g3 + add %g6, %g3, %g3 + stx %sp, [%g3 + TI_RWIN_SPTRS] + sll %g1, 7, %g3 + bne,pt %xcc, 1f + add %g6, %g3, %g3 + stx %l0, [%g3 + TI_REG_WINDOW + 0x00] + stx %l1, [%g3 + TI_REG_WINDOW + 0x08] + stx %l2, [%g3 + TI_REG_WINDOW + 0x10] + stx %l3, [%g3 + TI_REG_WINDOW + 0x18] + stx %l4, [%g3 + TI_REG_WINDOW + 0x20] + stx %l5, [%g3 + TI_REG_WINDOW + 0x28] + stx %l6, [%g3 + TI_REG_WINDOW + 0x30] + stx %l7, [%g3 + TI_REG_WINDOW + 0x38] + stx %i0, [%g3 + TI_REG_WINDOW + 0x40] + stx %i1, [%g3 + TI_REG_WINDOW + 0x48] + stx %i2, [%g3 + TI_REG_WINDOW + 0x50] + stx %i3, [%g3 + TI_REG_WINDOW + 0x58] + stx %i4, [%g3 + TI_REG_WINDOW + 0x60] + stx %i5, [%g3 + TI_REG_WINDOW + 0x68] + stx %i6, [%g3 + TI_REG_WINDOW + 0x70] + ba,pt %xcc, 2f + stx %i7, [%g3 + TI_REG_WINDOW + 0x78] +1: stw %l0, [%g3 + TI_REG_WINDOW + 0x00] + stw %l1, [%g3 + TI_REG_WINDOW + 0x04] + stw %l2, [%g3 + TI_REG_WINDOW + 0x08] + stw %l3, [%g3 + TI_REG_WINDOW + 0x0c] + stw %l4, [%g3 + TI_REG_WINDOW + 0x10] + stw %l5, [%g3 + TI_REG_WINDOW + 0x14] + stw %l6, [%g3 + TI_REG_WINDOW + 0x18] + stw %l7, [%g3 + TI_REG_WINDOW + 0x1c] + stw %i0, [%g3 + TI_REG_WINDOW + 0x20] + stw %i1, [%g3 + TI_REG_WINDOW + 0x24] + stw %i2, [%g3 + TI_REG_WINDOW + 0x28] + stw %i3, [%g3 + TI_REG_WINDOW + 0x2c] + stw %i4, [%g3 + TI_REG_WINDOW + 0x30] + stw %i5, [%g3 + TI_REG_WINDOW + 0x34] + stw %i6, [%g3 + TI_REG_WINDOW + 0x38] + stw %i7, [%g3 + TI_REG_WINDOW + 0x3c] +2: add %g1, 1, %g1 + stb %g1, [%g6 + TI_WSAVED] + rdpr %tstate, %g1 + andcc %g1, TSTATE_PRIV, %g0 + saved + be,pn %xcc, 1f + and %g1, TSTATE_CWP, %g1 + retry +1: mov FAULT_CODE_WRITE | FAULT_CODE_DTLB | FAULT_CODE_WINFIXUP, %g4 + stb %g4, [%g6 + TI_FAULT_CODE] + stx %g5, [%g6 + TI_FAULT_ADDR] + wrpr %g1, %cwp + ba,pt %xcc, etrap + rd %pc, %g7 + call do_sparc64_fault + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap + +winfix_mna: + andn %g3, 0x7f, %g3 + add %g3, 0x78, %g3 + wrpr %g3, %tnpc + done + +fill_fixup_mna: + rdpr %tstate, %g1 + and %g1, TSTATE_CWP, %g1 + wrpr %g1, %cwp + ba,pt %xcc, etrap + rd %pc, %g7 + sethi %hi(tlb_type), %g1 + lduw [%g1 + %lo(tlb_type)], %g1 + cmp %g1, 3 + bne,pt %icc, 1f + add %sp, PTREGS_OFF, %o0 + mov %l4, %o2 + call sun4v_do_mna + mov %l5, %o1 + ba,a,pt %xcc, rtrap +1: mov %l4, %o1 + mov %l5, %o2 + call mem_address_unaligned + nop + ba,a,pt %xcc, rtrap + +winfix_dax: + andn %g3, 0x7f, %g3 + add %g3, 0x74, %g3 + wrpr %g3, %tnpc + done + +fill_fixup_dax: + rdpr %tstate, %g1 + and %g1, TSTATE_CWP, %g1 + wrpr %g1, %cwp + ba,pt %xcc, etrap + rd %pc, %g7 + sethi %hi(tlb_type), %g1 + mov %l4, %o1 + lduw [%g1 + %lo(tlb_type)], %g1 + mov %l5, %o2 + cmp %g1, 3 + bne,pt %icc, 1f + add %sp, PTREGS_OFF, %o0 + call sun4v_data_access_exception + nop + ba,a,pt %xcc, rtrap +1: call spitfire_data_access_exception + nop + ba,a,pt %xcc, rtrap |