summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-03-20 11:57:50 -0800
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-20 11:57:50 -0800
commitc4a1745aa09fc110afdefea0e5d025043e348bae (patch)
tree6d28dc3a0c1bf18437b3d49f28e5c81b850cdb2f
parent88dcb91177cfa5b26143a29074389a2aa259c7cf (diff)
parentac0eb3eb7e54b700386068be025a43d2a3958ee5 (diff)
downloadlinux-c4a1745aa09fc110afdefea0e5d025043e348bae.tar.bz2
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6
* master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6: (230 commits) [SPARC64]: Update defconfig. [SPARC64]: Fix 2 bugs in huge page support. [SPARC64]: CONFIG_BLK_DEV_RAM fix [SPARC64]: Optimized TSB table initialization. [SPARC64]: Allow CONFIG_MEMORY_HOTPLUG to build. [SPARC64]: Use SLAB caches for TSB tables. [SPARC64]: Don't kill the page allocator when growing a TSB. [SPARC64]: Randomize mm->mmap_base when PF_RANDOMIZE is set. [SPARC64]: Increase top of 32-bit process stack. [SPARC64]: Top-down address space allocation for 32-bit tasks. [SPARC64] bbc_i2c: Fix cpu check and add missing module license. [SPARC64]: Fix and re-enable dynamic TSB sizing. [SUNSU]: Fix missing spinlock initialization. [TG3]: Do not try to access NIC_SRAM_DATA_SIG on Sun parts. [SPARC64]: First cut at VIS simulator for Niagara. [SPARC64]: Fix system type in /proc/cpuinfo and remove bogus OBP check. [SPARC64]: Add SMT scheduling support for Niagara. [SPARC64]: Fix 32-bit truncation which broke sparsemem. [SPARC64]: Move over to sparsemem. [SPARC64]: Fix new context version SMP handling. ...
-rw-r--r--arch/sparc/kernel/ioport.c40
-rw-r--r--arch/sparc64/Kconfig18
-rw-r--r--arch/sparc64/defconfig25
-rw-r--r--arch/sparc64/kernel/Makefile8
-rw-r--r--arch/sparc64/kernel/binfmt_aout32.c14
-rw-r--r--arch/sparc64/kernel/binfmt_elf32.c4
-rw-r--r--arch/sparc64/kernel/cpu.c7
-rw-r--r--arch/sparc64/kernel/devices.c189
-rw-r--r--arch/sparc64/kernel/dtlb_backend.S170
-rw-r--r--arch/sparc64/kernel/dtlb_base.S109
-rw-r--r--arch/sparc64/kernel/dtlb_miss.S39
-rw-r--r--arch/sparc64/kernel/ebus.c3
-rw-r--r--arch/sparc64/kernel/entry.S331
-rw-r--r--arch/sparc64/kernel/etrap.S170
-rw-r--r--arch/sparc64/kernel/head.S254
-rw-r--r--arch/sparc64/kernel/irq.c339
-rw-r--r--arch/sparc64/kernel/itlb_base.S79
-rw-r--r--arch/sparc64/kernel/itlb_miss.S39
-rw-r--r--arch/sparc64/kernel/ktlb.S363
-rw-r--r--arch/sparc64/kernel/pci.c13
-rw-r--r--arch/sparc64/kernel/pci_common.c301
-rw-r--r--arch/sparc64/kernel/pci_iommu.c36
-rw-r--r--arch/sparc64/kernel/pci_psycho.c23
-rw-r--r--arch/sparc64/kernel/pci_sabre.c23
-rw-r--r--arch/sparc64/kernel/pci_schizo.c24
-rw-r--r--arch/sparc64/kernel/pci_sun4v.c1147
-rw-r--r--arch/sparc64/kernel/pci_sun4v.h31
-rw-r--r--arch/sparc64/kernel/pci_sun4v_asm.S95
-rw-r--r--arch/sparc64/kernel/process.c133
-rw-r--r--arch/sparc64/kernel/ptrace.c3
-rw-r--r--arch/sparc64/kernel/rtrap.S115
-rw-r--r--arch/sparc64/kernel/sbus.c10
-rw-r--r--arch/sparc64/kernel/setup.c409
-rw-r--r--arch/sparc64/kernel/smp.c418
-rw-r--r--arch/sparc64/kernel/sparc64_ksyms.c26
-rw-r--r--arch/sparc64/kernel/sun4v_ivec.S334
-rw-r--r--arch/sparc64/kernel/sun4v_tlb_miss.S421
-rw-r--r--arch/sparc64/kernel/sys_sparc.c297
-rw-r--r--arch/sparc64/kernel/sys_sparc32.c9
-rw-r--r--arch/sparc64/kernel/time.c373
-rw-r--r--arch/sparc64/kernel/trampoline.S238
-rw-r--r--arch/sparc64/kernel/traps.c403
-rw-r--r--arch/sparc64/kernel/tsb.S442
-rw-r--r--arch/sparc64/kernel/ttable.S63
-rw-r--r--arch/sparc64/kernel/unaligned.c45
-rw-r--r--arch/sparc64/kernel/us2e_cpufreq.c11
-rw-r--r--arch/sparc64/kernel/us3_cpufreq.c11
-rw-r--r--arch/sparc64/kernel/visemul.c894
-rw-r--r--arch/sparc64/kernel/vmlinux.lds.S16
-rw-r--r--arch/sparc64/kernel/winfixup.S480
-rw-r--r--arch/sparc64/lib/Makefile2
-rw-r--r--arch/sparc64/lib/NGbzero.S163
-rw-r--r--arch/sparc64/lib/NGcopy_from_user.S37
-rw-r--r--arch/sparc64/lib/NGcopy_to_user.S40
-rw-r--r--arch/sparc64/lib/NGmemcpy.S368
-rw-r--r--arch/sparc64/lib/NGpage.S96
-rw-r--r--arch/sparc64/lib/NGpatch.S33
-rw-r--r--arch/sparc64/lib/U3patch.S3
-rw-r--r--arch/sparc64/lib/bzero.S18
-rw-r--r--arch/sparc64/lib/clear_page.S12
-rw-r--r--arch/sparc64/lib/copy_page.S7
-rw-r--r--arch/sparc64/lib/delay.c19
-rw-r--r--arch/sparc64/lib/xor.S300
-rw-r--r--arch/sparc64/math-emu/math.c24
-rw-r--r--arch/sparc64/mm/Makefile2
-rw-r--r--arch/sparc64/mm/fault.c15
-rw-r--r--arch/sparc64/mm/generic.c40
-rw-r--r--arch/sparc64/mm/hugetlbpage.c179
-rw-r--r--arch/sparc64/mm/init.c1431
-rw-r--r--arch/sparc64/mm/tlb.c64
-rw-r--r--arch/sparc64/mm/tsb.c440
-rw-r--r--arch/sparc64/mm/ultra.S374
-rw-r--r--arch/sparc64/prom/cif.S211
-rw-r--r--arch/sparc64/prom/console.c6
-rw-r--r--arch/sparc64/prom/init.c60
-rw-r--r--arch/sparc64/prom/misc.c44
-rw-r--r--arch/sparc64/prom/p1275.c11
-rw-r--r--arch/sparc64/prom/tree.c9
-rw-r--r--arch/sparc64/solaris/misc.c4
-rw-r--r--drivers/net/tg3.c4
-rw-r--r--drivers/sbus/char/bbc_i2c.c4
-rw-r--r--drivers/serial/Kconfig7
-rw-r--r--drivers/serial/Makefile1
-rw-r--r--drivers/serial/sunhv.c550
-rw-r--r--drivers/serial/sunsab.c19
-rw-r--r--drivers/serial/sunsu.c26
-rw-r--r--drivers/serial/sunzilog.c35
-rw-r--r--include/asm-sparc/idprom.h26
-rw-r--r--include/asm-sparc/oplib.h2
-rw-r--r--include/asm-sparc/uaccess.h47
-rw-r--r--include/asm-sparc64/a.out.h6
-rw-r--r--include/asm-sparc64/asi.h18
-rw-r--r--include/asm-sparc64/cpudata.h203
-rw-r--r--include/asm-sparc64/elf.h22
-rw-r--r--include/asm-sparc64/head.h15
-rw-r--r--include/asm-sparc64/hypervisor.h2128
-rw-r--r--include/asm-sparc64/idprom.h12
-rw-r--r--include/asm-sparc64/intr_queue.h15
-rw-r--r--include/asm-sparc64/irq.h4
-rw-r--r--include/asm-sparc64/mmu.h36
-rw-r--r--include/asm-sparc64/mmu_context.h162
-rw-r--r--include/asm-sparc64/numnodes.h6
-rw-r--r--include/asm-sparc64/oplib.h43
-rw-r--r--include/asm-sparc64/page.h13
-rw-r--r--include/asm-sparc64/pbm.h3
-rw-r--r--include/asm-sparc64/pci.h56
-rw-r--r--include/asm-sparc64/pgalloc.h166
-rw-r--r--include/asm-sparc64/pgtable.h704
-rw-r--r--include/asm-sparc64/pil.h4
-rw-r--r--include/asm-sparc64/processor.h23
-rw-r--r--include/asm-sparc64/pstate.h9
-rw-r--r--include/asm-sparc64/scratchpad.h14
-rw-r--r--include/asm-sparc64/smp.h30
-rw-r--r--include/asm-sparc64/sparsemem.h12
-rw-r--r--include/asm-sparc64/spitfire.h1
-rw-r--r--include/asm-sparc64/system.h7
-rw-r--r--include/asm-sparc64/thread_info.h9
-rw-r--r--include/asm-sparc64/timex.h6
-rw-r--r--include/asm-sparc64/tlbflush.h25
-rw-r--r--include/asm-sparc64/tsb.h281
-rw-r--r--include/asm-sparc64/ttable.h272
-rw-r--r--include/asm-sparc64/uaccess.h46
-rw-r--r--include/asm-sparc64/vdev.h16
-rw-r--r--include/asm-sparc64/xor.h34
-rw-r--r--include/linux/serial_core.h3
125 files changed, 14175 insertions, 4007 deletions
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index d39c9f206271..460f72e640e6 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -217,7 +217,7 @@ static void _sparc_free_io(struct resource *res)
unsigned long plen;
plen = res->end - res->start + 1;
- if ((plen & (PAGE_SIZE-1)) != 0) BUG();
+ BUG_ON((plen & (PAGE_SIZE-1)) != 0);
sparc_unmapiorange(res->start, plen);
release_resource(res);
}
@@ -512,8 +512,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t n, void *p, dma_addr_t ba)
dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
int direction)
{
- if (direction == PCI_DMA_NONE)
- BUG();
+ BUG_ON(direction == PCI_DMA_NONE);
/* IIep is write-through, not flushing. */
return virt_to_phys(ptr);
}
@@ -528,8 +527,7 @@ dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t ba, size_t size,
int direction)
{
- if (direction == PCI_DMA_NONE)
- BUG();
+ BUG_ON(direction == PCI_DMA_NONE);
if (direction != PCI_DMA_TODEVICE) {
mmu_inval_dma_area((unsigned long)phys_to_virt(ba),
(size + PAGE_SIZE-1) & PAGE_MASK);
@@ -542,8 +540,7 @@ void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t ba, size_t size,
dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page,
unsigned long offset, size_t size, int direction)
{
- if (direction == PCI_DMA_NONE)
- BUG();
+ BUG_ON(direction == PCI_DMA_NONE);
/* IIep is write-through, not flushing. */
return page_to_phys(page) + offset;
}
@@ -551,8 +548,7 @@ dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page,
void pci_unmap_page(struct pci_dev *hwdev,
dma_addr_t dma_address, size_t size, int direction)
{
- if (direction == PCI_DMA_NONE)
- BUG();
+ BUG_ON(direction == PCI_DMA_NONE);
/* mmu_inval_dma_area XXX */
}
@@ -576,11 +572,10 @@ int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents,
{
int n;
- if (direction == PCI_DMA_NONE)
- BUG();
+ BUG_ON(direction == PCI_DMA_NONE);
/* IIep is write-through, not flushing. */
for (n = 0; n < nents; n++) {
- if (page_address(sg->page) == NULL) BUG();
+ BUG_ON(page_address(sg->page) == NULL);
sg->dvma_address = virt_to_phys(page_address(sg->page));
sg->dvma_length = sg->length;
sg++;
@@ -597,11 +592,10 @@ void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents,
{
int n;
- if (direction == PCI_DMA_NONE)
- BUG();
+ BUG_ON(direction == PCI_DMA_NONE);
if (direction != PCI_DMA_TODEVICE) {
for (n = 0; n < nents; n++) {
- if (page_address(sg->page) == NULL) BUG();
+ BUG_ON(page_address(sg->page) == NULL);
mmu_inval_dma_area(
(unsigned long) page_address(sg->page),
(sg->length + PAGE_SIZE-1) & PAGE_MASK);
@@ -622,8 +616,7 @@ void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents,
*/
void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t ba, size_t size, int direction)
{
- if (direction == PCI_DMA_NONE)
- BUG();
+ BUG_ON(direction == PCI_DMA_NONE);
if (direction != PCI_DMA_TODEVICE) {
mmu_inval_dma_area((unsigned long)phys_to_virt(ba),
(size + PAGE_SIZE-1) & PAGE_MASK);
@@ -632,8 +625,7 @@ void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t ba, size_t si
void pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t ba, size_t size, int direction)
{
- if (direction == PCI_DMA_NONE)
- BUG();
+ BUG_ON(direction == PCI_DMA_NONE);
if (direction != PCI_DMA_TODEVICE) {
mmu_inval_dma_area((unsigned long)phys_to_virt(ba),
(size + PAGE_SIZE-1) & PAGE_MASK);
@@ -650,11 +642,10 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int
{
int n;
- if (direction == PCI_DMA_NONE)
- BUG();
+ BUG_ON(direction == PCI_DMA_NONE);
if (direction != PCI_DMA_TODEVICE) {
for (n = 0; n < nents; n++) {
- if (page_address(sg->page) == NULL) BUG();
+ BUG_ON(page_address(sg->page) == NULL);
mmu_inval_dma_area(
(unsigned long) page_address(sg->page),
(sg->length + PAGE_SIZE-1) & PAGE_MASK);
@@ -667,11 +658,10 @@ void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, i
{
int n;
- if (direction == PCI_DMA_NONE)
- BUG();
+ BUG_ON(direction == PCI_DMA_NONE);
if (direction != PCI_DMA_TODEVICE) {
for (n = 0; n < nents; n++) {
- if (page_address(sg->page) == NULL) BUG();
+ BUG_ON(page_address(sg->page) == NULL);
mmu_inval_dma_area(
(unsigned long) page_address(sg->page),
(sg->length + PAGE_SIZE-1) & PAGE_MASK);
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 4c0a50a76554..c3685b314d71 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -186,6 +186,15 @@ endchoice
endmenu
+config ARCH_SPARSEMEM_ENABLE
+ def_bool y
+
+config ARCH_SPARSEMEM_DEFAULT
+ def_bool y
+
+config LARGE_ALLOCS
+ def_bool y
+
source "mm/Kconfig"
config GENERIC_ISA_DMA
@@ -350,6 +359,15 @@ config SOLARIS_EMUL
endmenu
+config SCHED_SMT
+ bool "SMT (Hyperthreading) scheduler support"
+ depends on SMP
+ default y
+ help
+ SMT scheduler support improves the CPU scheduler's decision making
+ when dealing with UltraSPARC cpus at a cost of slightly increased
+ overhead in some places. If unsure say N here.
+
config CMDLINE_BOOL
bool "Default bootloader kernel arguments"
diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig
index 069d49777b2a..f819a9663a8d 100644
--- a/arch/sparc64/defconfig
+++ b/arch/sparc64/defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-rc2
-# Tue Feb 7 17:47:18 2006
+# Linux kernel version: 2.6.16
+# Mon Mar 20 01:23:21 2006
#
CONFIG_SPARC=y
CONFIG_SPARC64=y
@@ -115,14 +115,20 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_HUGETLB_PAGE_SIZE_4MB=y
# CONFIG_HUGETLB_PAGE_SIZE_512K is not set
# CONFIG_HUGETLB_PAGE_SIZE_64K is not set
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+CONFIG_LARGE_ALLOCS=y
CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_FLATMEM_MANUAL=y
+# CONFIG_FLATMEM_MANUAL is not set
# CONFIG_DISCONTIGMEM_MANUAL is not set
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_SPARSEMEM_MANUAL=y
+CONFIG_SPARSEMEM=y
+CONFIG_HAVE_MEMORY_PRESENT=y
# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPARSEMEM_EXTREME=y
+CONFIG_MEMORY_HOTPLUG=y
CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_MIGRATION=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_SBUS=y
CONFIG_SBUSCHAR=y
@@ -655,6 +661,7 @@ CONFIG_SERIAL_SUNCORE=y
CONFIG_SERIAL_SUNSU=y
CONFIG_SERIAL_SUNSU_CONSOLE=y
CONFIG_SERIAL_SUNSAB=m
+CONFIG_SERIAL_SUNHV=y
CONFIG_SERIAL_CORE=y
CONFIG_SERIAL_CORE_CONSOLE=y
# CONFIG_SERIAL_JSM is not set
@@ -1116,11 +1123,7 @@ CONFIG_USB_HIDDEV=y
# CONFIG_INFINIBAND is not set
#
-# SN Devices
-#
-
-#
-# EDAC - error detection and reporting (RAS)
+# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
#
#
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index 83d67eb18895..6f6816488b04 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -11,10 +11,12 @@ obj-y := process.o setup.o cpu.o idprom.o \
traps.o devices.o auxio.o una_asm.o \
irq.o ptrace.o time.o sys_sparc.o signal.o \
unaligned.o central.o pci.o starfire.o semaphore.o \
- power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o
+ power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o \
+ visemul.o
obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \
- pci_psycho.o pci_sabre.o pci_schizo.o
+ pci_psycho.o pci_sabre.o pci_schizo.o \
+ pci_sun4v.o pci_sun4v_asm.o
obj-$(CONFIG_SMP) += smp.o trampoline.o
obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o
obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o
@@ -38,5 +40,5 @@ else
CMODEL_CFLAG := -m64 -mcmodel=medlow
endif
-head.o: head.S ttable.S itlb_base.S dtlb_base.S dtlb_backend.S dtlb_prot.S \
+head.o: head.S ttable.S itlb_miss.S dtlb_miss.S ktlb.S tsb.S \
etrap.S rtrap.S winfixup.S entry.S
diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c
index 202a80c24b6f..d7caa60a0074 100644
--- a/arch/sparc64/kernel/binfmt_aout32.c
+++ b/arch/sparc64/kernel/binfmt_aout32.c
@@ -31,6 +31,7 @@
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
static int load_aout32_binary(struct linux_binprm *, struct pt_regs * regs);
static int load_aout32_library(struct file*);
@@ -238,6 +239,8 @@ static int load_aout32_binary(struct linux_binprm * bprm, struct pt_regs * regs)
(current->mm->start_data = N_DATADDR(ex));
current->mm->brk = ex.a_bss +
(current->mm->start_brk = N_BSSADDR(ex));
+ current->mm->free_area_cache = current->mm->mmap_base;
+ current->mm->cached_hole_size = 0;
current->mm->mmap = NULL;
compute_creds(bprm);
@@ -329,15 +332,8 @@ beyond_if:
current->mm->start_stack =
(unsigned long) create_aout32_tables((char __user *)bprm->p, bprm);
- if (!(orig_thr_flags & _TIF_32BIT)) {
- unsigned long pgd_cache = get_pgd_cache(current->mm->pgd);
-
- __asm__ __volatile__("stxa\t%0, [%1] %2\n\t"
- "membar #Sync"
- : /* no outputs */
- : "r" (pgd_cache),
- "r" (TSB_REG), "i" (ASI_DMMU));
- }
+ tsb_context_switch(current->mm);
+
start_thread32(regs, ex.a_entry, current->mm->start_stack);
if (current->ptrace & PT_PTRACED)
send_sig(SIGTRAP, current, 0);
diff --git a/arch/sparc64/kernel/binfmt_elf32.c b/arch/sparc64/kernel/binfmt_elf32.c
index a1a12d2aa353..8a2abcce2737 100644
--- a/arch/sparc64/kernel/binfmt_elf32.c
+++ b/arch/sparc64/kernel/binfmt_elf32.c
@@ -153,7 +153,9 @@ MODULE_AUTHOR("Eric Youngdale, David S. Miller, Jakub Jelinek");
#undef MODULE_DESCRIPTION
#undef MODULE_AUTHOR
+#include <asm/a.out.h>
+
#undef TASK_SIZE
-#define TASK_SIZE 0xf0000000
+#define TASK_SIZE STACK_TOP32
#include "../../../fs/binfmt_elf.c"
diff --git a/arch/sparc64/kernel/cpu.c b/arch/sparc64/kernel/cpu.c
index 00eed88ef2e8..11cc0caef592 100644
--- a/arch/sparc64/kernel/cpu.c
+++ b/arch/sparc64/kernel/cpu.c
@@ -13,6 +13,7 @@
#include <asm/system.h>
#include <asm/fpumacro.h>
#include <asm/cpudata.h>
+#include <asm/spitfire.h>
DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
@@ -71,6 +72,12 @@ void __init cpu_probe(void)
unsigned long ver, fpu_vers, manuf, impl, fprs;
int i;
+ if (tlb_type == hypervisor) {
+ sparc_cpu_type = "UltraSparc T1 (Niagara)";
+ sparc_fpu_type = "UltraSparc T1 integrated FPU";
+ return;
+ }
+
fprs = fprs_read();
fprs_write(FPRS_FEF);
__asm__ __volatile__ ("rdpr %%ver, %0; stx %%fsr, [%1]"
diff --git a/arch/sparc64/kernel/devices.c b/arch/sparc64/kernel/devices.c
index df9a1ca8fd77..007e8922cd16 100644
--- a/arch/sparc64/kernel/devices.c
+++ b/arch/sparc64/kernel/devices.c
@@ -12,6 +12,7 @@
#include <linux/string.h>
#include <linux/spinlock.h>
#include <linux/errno.h>
+#include <linux/bootmem.h>
#include <asm/page.h>
#include <asm/oplib.h>
@@ -20,6 +21,8 @@
#include <asm/spitfire.h>
#include <asm/timer.h>
#include <asm/cpudata.h>
+#include <asm/vdev.h>
+#include <asm/irq.h>
/* Used to synchronize acceses to NatSemi SUPER I/O chip configure
* operations in asm/ns87303.h
@@ -29,13 +32,158 @@ DEFINE_SPINLOCK(ns87303_lock);
extern void cpu_probe(void);
extern void central_probe(void);
-static char *cpu_mid_prop(void)
+u32 sun4v_vdev_devhandle;
+int sun4v_vdev_root;
+
+struct vdev_intmap {
+ unsigned int phys;
+ unsigned int irq;
+ unsigned int cnode;
+ unsigned int cinterrupt;
+};
+
+struct vdev_intmask {
+ unsigned int phys;
+ unsigned int interrupt;
+ unsigned int __unused;
+};
+
+static struct vdev_intmap *vdev_intmap;
+static int vdev_num_intmap;
+static struct vdev_intmask vdev_intmask;
+
+static void __init sun4v_virtual_device_probe(void)
+{
+ struct linux_prom64_registers regs;
+ struct vdev_intmap *ip;
+ int node, sz, err;
+
+ if (tlb_type != hypervisor)
+ return;
+
+ node = prom_getchild(prom_root_node);
+ node = prom_searchsiblings(node, "virtual-devices");
+ if (!node) {
+ prom_printf("SUN4V: Fatal error, no virtual-devices node.\n");
+ prom_halt();
+ }
+
+ sun4v_vdev_root = node;
+
+ prom_getproperty(node, "reg", (char *)&regs, sizeof(regs));
+ sun4v_vdev_devhandle = (regs.phys_addr >> 32UL) & 0x0fffffff;
+
+ sz = prom_getproplen(node, "interrupt-map");
+ if (sz <= 0) {
+ prom_printf("SUN4V: Error, no vdev interrupt-map.\n");
+ prom_halt();
+ }
+
+ if ((sz % sizeof(*ip)) != 0) {
+ prom_printf("SUN4V: Bogus interrupt-map property size %d\n",
+ sz);
+ prom_halt();
+ }
+
+ vdev_intmap = ip = alloc_bootmem_low_pages(sz);
+ if (!vdev_intmap) {
+ prom_printf("SUN4V: Error, cannot allocate vdev_intmap.\n");
+ prom_halt();
+ }
+
+ err = prom_getproperty(node, "interrupt-map", (char *) ip, sz);
+ if (err == -1) {
+ prom_printf("SUN4V: Fatal error, no vdev interrupt-map.\n");
+ prom_halt();
+ }
+ if (err != sz) {
+ prom_printf("SUN4V: Inconsistent interrupt-map size, "
+ "proplen(%d) vs getprop(%d).\n", sz,err);
+ prom_halt();
+ }
+
+ vdev_num_intmap = err / sizeof(*ip);
+
+ err = prom_getproperty(node, "interrupt-map-mask",
+ (char *) &vdev_intmask,
+ sizeof(vdev_intmask));
+ if (err <= 0) {
+ prom_printf("SUN4V: Fatal error, no vdev "
+ "interrupt-map-mask.\n");
+ prom_halt();
+ }
+ if (err % sizeof(vdev_intmask)) {
+ prom_printf("SUN4V: Bogus interrupt-map-mask "
+ "property size %d\n", err);
+ prom_halt();
+ }
+
+ printk("SUN4V: virtual-devices devhandle[%x]\n",
+ sun4v_vdev_devhandle);
+}
+
+unsigned int sun4v_vdev_device_interrupt(unsigned int dev_node)
+{
+ unsigned int irq, reg;
+ int err, i;
+
+ err = prom_getproperty(dev_node, "interrupts",
+ (char *) &irq, sizeof(irq));
+ if (err <= 0) {
+ printk("VDEV: Cannot get \"interrupts\" "
+ "property for OBP node %x\n", dev_node);
+ return 0;
+ }
+
+ err = prom_getproperty(dev_node, "reg",
+ (char *) &reg, sizeof(reg));
+ if (err <= 0) {
+ printk("VDEV: Cannot get \"reg\" "
+ "property for OBP node %x\n", dev_node);
+ return 0;
+ }
+
+ for (i = 0; i < vdev_num_intmap; i++) {
+ if (vdev_intmap[i].phys == (reg & vdev_intmask.phys) &&
+ vdev_intmap[i].irq == (irq & vdev_intmask.interrupt)) {
+ irq = vdev_intmap[i].cinterrupt;
+ break;
+ }
+ }
+
+ if (i == vdev_num_intmap) {
+ printk("VDEV: No matching interrupt map entry "
+ "for OBP node %x\n", dev_node);
+ return 0;
+ }
+
+ return sun4v_build_irq(sun4v_vdev_devhandle, irq, 5, 0);
+}
+
+static const char *cpu_mid_prop(void)
{
if (tlb_type == spitfire)
return "upa-portid";
return "portid";
}
+static int get_cpu_mid(int prom_node)
+{
+ if (tlb_type == hypervisor) {
+ struct linux_prom64_registers reg;
+
+ if (prom_getproplen(prom_node, "cpuid") == 4)
+ return prom_getintdefault(prom_node, "cpuid", 0);
+
+ prom_getproperty(prom_node, "reg", (char *) &reg, sizeof(reg));
+ return (reg.phys_addr >> 32) & 0x0fffffffUL;
+ } else {
+ const char *prop_name = cpu_mid_prop();
+
+ return prom_getintdefault(prom_node, prop_name, 0);
+ }
+}
+
static int check_cpu_node(int nd, int *cur_inst,
int (*compare)(int, int, void *), void *compare_arg,
int *prom_node, int *mid)
@@ -50,7 +198,7 @@ static int check_cpu_node(int nd, int *cur_inst,
if (prom_node)
*prom_node = nd;
if (mid)
- *mid = prom_getintdefault(nd, cpu_mid_prop(), 0);
+ *mid = get_cpu_mid(nd);
return 0;
}
@@ -105,7 +253,7 @@ static int cpu_mid_compare(int nd, int instance, void *_arg)
int desired_mid = (int) (long) _arg;
int this_mid;
- this_mid = prom_getintdefault(nd, cpu_mid_prop(), 0);
+ this_mid = get_cpu_mid(nd);
if (this_mid == desired_mid)
return 0;
return -ENODEV;
@@ -126,7 +274,8 @@ void __init device_scan(void)
#ifndef CONFIG_SMP
{
- int err, cpu_node;
+ int err, cpu_node, def;
+
err = cpu_find_by_instance(0, &cpu_node, NULL);
if (err) {
prom_printf("No cpu nodes, cannot continue\n");
@@ -135,21 +284,40 @@ void __init device_scan(void)
cpu_data(0).clock_tick = prom_getintdefault(cpu_node,
"clock-frequency",
0);
+
+ def = ((tlb_type == hypervisor) ?
+ (8 * 1024) :
+ (16 * 1024));
cpu_data(0).dcache_size = prom_getintdefault(cpu_node,
"dcache-size",
- 16 * 1024);
+ def);
+
+ def = 32;
cpu_data(0).dcache_line_size =
- prom_getintdefault(cpu_node, "dcache-line-size", 32);
+ prom_getintdefault(cpu_node, "dcache-line-size",
+ def);
+
+ def = 16 * 1024;
cpu_data(0).icache_size = prom_getintdefault(cpu_node,
"icache-size",
- 16 * 1024);
+ def);
+
+ def = 32;
cpu_data(0).icache_line_size =
- prom_getintdefault(cpu_node, "icache-line-size", 32);
+ prom_getintdefault(cpu_node, "icache-line-size",
+ def);
+
+ def = ((tlb_type == hypervisor) ?
+ (3 * 1024 * 1024) :
+ (4 * 1024 * 1024));
cpu_data(0).ecache_size = prom_getintdefault(cpu_node,
"ecache-size",
- 4 * 1024 * 1024);
+ def);
+
+ def = 64;
cpu_data(0).ecache_line_size =
- prom_getintdefault(cpu_node, "ecache-line-size", 64);
+ prom_getintdefault(cpu_node, "ecache-line-size",
+ def);
printk("CPU[0]: Caches "
"D[sz(%d):line_sz(%d)] "
"I[sz(%d):line_sz(%d)] "
@@ -160,6 +328,7 @@ void __init device_scan(void)
}
#endif
+ sun4v_virtual_device_probe();
central_probe();
cpu_probe();
diff --git a/arch/sparc64/kernel/dtlb_backend.S b/arch/sparc64/kernel/dtlb_backend.S
deleted file mode 100644
index acc889a7f9c1..000000000000
--- a/arch/sparc64/kernel/dtlb_backend.S
+++ /dev/null
@@ -1,170 +0,0 @@
-/* $Id: dtlb_backend.S,v 1.16 2001/10/09 04:02:11 davem Exp $
- * dtlb_backend.S: Back end to DTLB miss replacement strategy.
- * This is included directly into the trap table.
- *
- * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)
- * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
- */
-
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-
-#define VALID_SZ_BITS (_PAGE_VALID | _PAGE_SZBITS)
-
-#define VPTE_BITS (_PAGE_CP | _PAGE_CV | _PAGE_P )
-#define VPTE_SHIFT (PAGE_SHIFT - 3)
-
-/* Ways we can get here:
- *
- * 1) Nucleus loads and stores to/from PA-->VA direct mappings at tl>1.
- * 2) Nucleus loads and stores to/from user/kernel window save areas.
- * 3) VPTE misses from dtlb_base and itlb_base.
- *
- * We need to extract out the PMD and PGDIR indexes from the
- * linear virtual page table access address. The PTE index
- * is at the bottom, but we are not concerned with it. Bits
- * 0 to 2 are clear since each PTE is 8 bytes in size. Each
- * PMD and PGDIR entry are 4 bytes in size. Thus, this
- * address looks something like:
- *
- * |---------------------------------------------------------------|
- * | ... | PGDIR index | PMD index | PTE index | |
- * |---------------------------------------------------------------|
- * 63 F E D C B A 3 2 0 <- bit nr
- *
- * The variable bits above are defined as:
- * A --> 3 + (PAGE_SHIFT - log2(8))
- * --> 3 + (PAGE_SHIFT - 3) - 1
- * (ie. this is "bit 3" + PAGE_SIZE - size of PTE entry in bits - 1)
- * B --> A + 1
- * C --> B + (PAGE_SHIFT - log2(4))
- * --> B + (PAGE_SHIFT - 2) - 1
- * (ie. this is "bit B" + PAGE_SIZE - size of PMD entry in bits - 1)
- * D --> C + 1
- * E --> D + (PAGE_SHIFT - log2(4))
- * --> D + (PAGE_SHIFT - 2) - 1
- * (ie. this is "bit D" + PAGE_SIZE - size of PGDIR entry in bits - 1)
- * F --> E + 1
- *
- * (Note how "B" always evalutes to PAGE_SHIFT, all the other constants
- * cancel out.)
- *
- * For 8K PAGE_SIZE (thus, PAGE_SHIFT of 13) the bit numbers are:
- * A --> 12
- * B --> 13
- * C --> 23
- * D --> 24
- * E --> 34
- * F --> 35
- *
- * For 64K PAGE_SIZE (thus, PAGE_SHIFT of 16) the bit numbers are:
- * A --> 15
- * B --> 16
- * C --> 29
- * D --> 30
- * E --> 43
- * F --> 44
- *
- * Because bits both above and below each PGDIR and PMD index need to
- * be masked out, and the index can be as long as 14 bits (when using a
- * 64K PAGE_SIZE, and thus a PAGE_SHIFT of 16), we need 3 instructions
- * to extract each index out.
- *
- * Shifts do not pair very well on UltraSPARC-I, II, IIi, and IIe, so
- * we try to avoid using them for the entire operation. We could setup
- * a mask anywhere from bit 31 down to bit 10 using the sethi instruction.
- *
- * We need a mask covering bits B --> C and one covering D --> E.
- * For 8K PAGE_SIZE these masks are 0x00ffe000 and 0x7ff000000.
- * For 64K PAGE_SIZE these masks are 0x3fff0000 and 0xfffc0000000.
- * The second in each set cannot be loaded with a single sethi
- * instruction, because the upper bits are past bit 32. We would
- * need to use a sethi + a shift.
- *
- * For the time being, we use 2 shifts and a simple "and" mask.
- * We shift left to clear the bits above the index, we shift down
- * to clear the bits below the index (sans the log2(4 or 8) bits)
- * and a mask to clear the log2(4 or 8) bits. We need therefore
- * define 4 shift counts, all of which are relative to PAGE_SHIFT.
- *
- * Although unsupportable for other reasons, this does mean that
- * 512K and 4MB page sizes would be generaally supported by the
- * kernel. (ELF binaries would break with > 64K PAGE_SIZE since
- * the sections are only aligned that strongly).
- *
- * The operations performed for extraction are thus:
- *
- * ((X << FOO_SHIFT_LEFT) >> FOO_SHIFT_RIGHT) & ~0x3
- *
- */
-
-#define A (3 + (PAGE_SHIFT - 3) - 1)
-#define B (A + 1)
-#define C (B + (PAGE_SHIFT - 2) - 1)
-#define D (C + 1)
-#define E (D + (PAGE_SHIFT - 2) - 1)
-#define F (E + 1)
-
-#define PMD_SHIFT_LEFT (64 - D)
-#define PMD_SHIFT_RIGHT (64 - (D - B) - 2)
-#define PGDIR_SHIFT_LEFT (64 - F)
-#define PGDIR_SHIFT_RIGHT (64 - (F - D) - 2)
-#define LOW_MASK_BITS 0x3
-
-/* TLB1 ** ICACHE line 1: tl1 DTLB and quick VPTE miss */
- ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS
- add %g3, %g3, %g5 ! Compute VPTE base
- cmp %g4, %g5 ! VPTE miss?
- bgeu,pt %xcc, 1f ! Continue here
- andcc %g4, TAG_CONTEXT_BITS, %g5 ! tl0 miss Nucleus test
- ba,a,pt %xcc, from_tl1_trap ! Fall to tl0 miss
-1: sllx %g6, VPTE_SHIFT, %g4 ! Position TAG_ACCESS
- or %g4, %g5, %g4 ! Prepare TAG_ACCESS
-
-/* TLB1 ** ICACHE line 2: Quick VPTE miss */
- mov TSB_REG, %g1 ! Grab TSB reg
- ldxa [%g1] ASI_DMMU, %g5 ! Doing PGD caching?
- sllx %g6, PMD_SHIFT_LEFT, %g1 ! Position PMD offset
- be,pn %xcc, sparc64_vpte_nucleus ! Is it from Nucleus?
- srlx %g1, PMD_SHIFT_RIGHT, %g1 ! Mask PMD offset bits
- brnz,pt %g5, sparc64_vpte_continue ! Yep, go like smoke
- andn %g1, LOW_MASK_BITS, %g1 ! Final PMD mask
- sllx %g6, PGDIR_SHIFT_LEFT, %g5 ! Position PGD offset
-
-/* TLB1 ** ICACHE line 3: Quick VPTE miss */
- srlx %g5, PGDIR_SHIFT_RIGHT, %g5 ! Mask PGD offset bits
- andn %g5, LOW_MASK_BITS, %g5 ! Final PGD mask
- lduwa [%g7 + %g5] ASI_PHYS_USE_EC, %g5! Load PGD
- brz,pn %g5, vpte_noent ! Valid?
-sparc64_kpte_continue:
- sllx %g5, 11, %g5 ! Shift into place
-sparc64_vpte_continue:
- lduwa [%g5 + %g1] ASI_PHYS_USE_EC, %g5! Load PMD
- sllx %g5, 11, %g5 ! Shift into place
- brz,pn %g5, vpte_noent ! Valid?
-
-/* TLB1 ** ICACHE line 4: Quick VPTE miss */
- mov (VALID_SZ_BITS >> 61), %g1 ! upper vpte into %g1
- sllx %g1, 61, %g1 ! finish calc
- or %g5, VPTE_BITS, %g5 ! Prepare VPTE data
- or %g5, %g1, %g5 ! ...
- mov TLB_SFSR, %g1 ! Restore %g1 value
- stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Load VPTE into TLB
- stxa %g4, [%g1 + %g1] ASI_DMMU ! Restore previous TAG_ACCESS
- retry ! Load PTE once again
-
-#undef VALID_SZ_BITS
-#undef VPTE_SHIFT
-#undef VPTE_BITS
-#undef A
-#undef B
-#undef C
-#undef D
-#undef E
-#undef F
-#undef PMD_SHIFT_LEFT
-#undef PMD_SHIFT_RIGHT
-#undef PGDIR_SHIFT_LEFT
-#undef PGDIR_SHIFT_RIGHT
-#undef LOW_MASK_BITS
-
diff --git a/arch/sparc64/kernel/dtlb_base.S b/arch/sparc64/kernel/dtlb_base.S
deleted file mode 100644
index 6528786840c0..000000000000
--- a/arch/sparc64/kernel/dtlb_base.S
+++ /dev/null
@@ -1,109 +0,0 @@
-/* $Id: dtlb_base.S,v 1.17 2001/10/11 22:33:52 davem Exp $
- * dtlb_base.S: Front end to DTLB miss replacement strategy.
- * This is included directly into the trap table.
- *
- * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)
- * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
- */
-
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-
-/* %g1 TLB_SFSR (%g1 + %g1 == TLB_TAG_ACCESS)
- * %g2 (KERN_HIGHBITS | KERN_LOWBITS)
- * %g3 VPTE base (0xfffffffe00000000) Spitfire/Blackbird (44-bit VA space)
- * (0xffe0000000000000) Cheetah (64-bit VA space)
- * %g7 __pa(current->mm->pgd)
- *
- * The VPTE base value is completely magic, but note that
- * few places in the kernel other than these TLB miss
- * handlers know anything about the VPTE mechanism or
- * how it works (see VPTE_SIZE, TASK_SIZE and PTRS_PER_PGD).
- * Consider the 44-bit VADDR Ultra-I/II case as an example:
- *
- * VA[0 : (1<<43)] produce VPTE index [%g3 : 0]
- * VA[0 : -(1<<43)] produce VPTE index [%g3-(1<<(43-PAGE_SHIFT+3)) : %g3]
- *
- * For Cheetah's 64-bit VADDR space this is:
- *
- * VA[0 : (1<<63)] produce VPTE index [%g3 : 0]
- * VA[0 : -(1<<63)] produce VPTE index [%g3-(1<<(63-PAGE_SHIFT+3)) : %g3]
- *
- * If you're paying attention you'll notice that this means half of
- * the VPTE table is above %g3 and half is below, low VA addresses
- * map progressively upwards from %g3, and high VA addresses map
- * progressively upwards towards %g3. This trick was needed to make
- * the same 8 instruction handler work both for Spitfire/Blackbird's
- * peculiar VA space hole configuration and the full 64-bit VA space
- * one of Cheetah at the same time.
- */
-
-/* Ways we can get here:
- *
- * 1) Nucleus loads and stores to/from PA-->VA direct mappings.
- * 2) Nucleus loads and stores to/from vmalloc() areas.
- * 3) User loads and stores.
- * 4) User space accesses by nucleus at tl0
- */
-
-#if PAGE_SHIFT == 13
-/*
- * To compute vpte offset, we need to do ((addr >> 13) << 3),
- * which can be optimized to (addr >> 10) if bits 10/11/12 can
- * be guaranteed to be 0 ... mmu_context.h does guarantee this
- * by only using 10 bits in the hwcontext value.
- */
-#define CREATE_VPTE_OFFSET1(r1, r2) nop
-#define CREATE_VPTE_OFFSET2(r1, r2) \
- srax r1, 10, r2
-#else
-#define CREATE_VPTE_OFFSET1(r1, r2) \
- srax r1, PAGE_SHIFT, r2
-#define CREATE_VPTE_OFFSET2(r1, r2) \
- sllx r2, 3, r2
-#endif
-
-/* DTLB ** ICACHE line 1: Quick user TLB misses */
- mov TLB_SFSR, %g1
- ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS
- andcc %g4, TAG_CONTEXT_BITS, %g0 ! From Nucleus?
-from_tl1_trap:
- rdpr %tl, %g5 ! For TL==3 test
- CREATE_VPTE_OFFSET1(%g4, %g6) ! Create VPTE offset
- be,pn %xcc, kvmap ! Yep, special processing
- CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset
- cmp %g5, 4 ! Last trap level?
-
-/* DTLB ** ICACHE line 2: User finish + quick kernel TLB misses */
- be,pn %xcc, longpath ! Yep, cannot risk VPTE miss
- nop ! delay slot
- ldxa [%g3 + %g6] ASI_S, %g5 ! Load VPTE
-1: brgez,pn %g5, longpath ! Invalid, branch out
- nop ! Delay-slot
-9: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB
- retry ! Trap return
- nop
-
-/* DTLB ** ICACHE line 3: winfixups+real_faults */
-longpath:
- rdpr %pstate, %g5 ! Move into alternate globals
- wrpr %g5, PSTATE_AG|PSTATE_MG, %pstate
- rdpr %tl, %g4 ! See where we came from.
- cmp %g4, 1 ! Is etrap/rtrap window fault?
- mov TLB_TAG_ACCESS, %g4 ! Prepare for fault processing
- ldxa [%g4] ASI_DMMU, %g5 ! Load faulting VA page
- be,pt %xcc, sparc64_realfault_common ! Jump to normal fault handling
- mov FAULT_CODE_DTLB, %g4 ! It was read from DTLB
-
-/* DTLB ** ICACHE line 4: Unused... */
- ba,a,pt %xcc, winfix_trampoline ! Call window fixup code
- nop
- nop
- nop
- nop
- nop
- nop
- nop
-
-#undef CREATE_VPTE_OFFSET1
-#undef CREATE_VPTE_OFFSET2
diff --git a/arch/sparc64/kernel/dtlb_miss.S b/arch/sparc64/kernel/dtlb_miss.S
new file mode 100644
index 000000000000..09a6a15a7105
--- /dev/null
+++ b/arch/sparc64/kernel/dtlb_miss.S
@@ -0,0 +1,39 @@
+/* DTLB ** ICACHE line 1: Context 0 check and TSB load */
+ ldxa [%g0] ASI_DMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer
+ ldxa [%g0] ASI_DMMU, %g6 ! Get TAG TARGET
+ srlx %g6, 48, %g5 ! Get context
+ sllx %g6, 22, %g6 ! Zero out context
+ brz,pn %g5, kvmap_dtlb ! Context 0 processing
+ srlx %g6, 22, %g6 ! Delay slot
+ TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry
+ cmp %g4, %g6 ! Compare TAG
+
+/* DTLB ** ICACHE line 2: TSB compare and TLB load */
+ bne,pn %xcc, tsb_miss_dtlb ! Miss
+ mov FAULT_CODE_DTLB, %g3
+ stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Load TLB
+ retry ! Trap done
+ nop
+ nop
+ nop
+ nop
+
+/* DTLB ** ICACHE line 3: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+
+/* DTLB ** ICACHE line 4: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
diff --git a/arch/sparc64/kernel/ebus.c b/arch/sparc64/kernel/ebus.c
index 7991e919d8ab..c69504aa638f 100644
--- a/arch/sparc64/kernel/ebus.c
+++ b/arch/sparc64/kernel/ebus.c
@@ -277,10 +277,9 @@ static inline void *ebus_alloc(size_t size)
{
void *mem;
- mem = kmalloc(size, GFP_ATOMIC);
+ mem = kzalloc(size, GFP_ATOMIC);
if (!mem)
panic("ebus_alloc: out of memory");
- memset((char *)mem, 0, size);
return mem;
}
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index a73553ae7e53..6d0b3ed77a02 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -50,7 +50,8 @@ do_fpdis:
add %g0, %g0, %g0
ba,a,pt %xcc, rtrap_clr_l6
-1: ldub [%g6 + TI_FPSAVED], %g5
+1: TRAP_LOAD_THREAD_REG(%g6, %g1)
+ ldub [%g6 + TI_FPSAVED], %g5
wr %g0, FPRS_FEF, %fprs
andcc %g5, FPRS_FEF, %g0
be,a,pt %icc, 1f
@@ -96,10 +97,22 @@ do_fpdis:
add %g6, TI_FPREGS + 0x80, %g1
faddd %f0, %f2, %f4
fmuld %f0, %f2, %f6
- ldxa [%g3] ASI_DMMU, %g5
+
+661: ldxa [%g3] ASI_DMMU, %g5
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ ldxa [%g3] ASI_MMU, %g5
+ .previous
+
sethi %hi(sparc64_kern_sec_context), %g2
ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2
- stxa %g2, [%g3] ASI_DMMU
+
+661: stxa %g2, [%g3] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g3] ASI_MMU
+ .previous
+
membar #Sync
add %g6, TI_FPREGS + 0xc0, %g2
faddd %f0, %f2, %f8
@@ -125,11 +138,23 @@ do_fpdis:
fzero %f32
mov SECONDARY_CONTEXT, %g3
fzero %f34
- ldxa [%g3] ASI_DMMU, %g5
+
+661: ldxa [%g3] ASI_DMMU, %g5
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ ldxa [%g3] ASI_MMU, %g5
+ .previous
+
add %g6, TI_FPREGS, %g1
sethi %hi(sparc64_kern_sec_context), %g2
ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2
- stxa %g2, [%g3] ASI_DMMU
+
+661: stxa %g2, [%g3] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g3] ASI_MMU
+ .previous
+
membar #Sync
add %g6, TI_FPREGS + 0x40, %g2
faddd %f32, %f34, %f36
@@ -154,10 +179,22 @@ do_fpdis:
nop
3: mov SECONDARY_CONTEXT, %g3
add %g6, TI_FPREGS, %g1
- ldxa [%g3] ASI_DMMU, %g5
+
+661: ldxa [%g3] ASI_DMMU, %g5
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ ldxa [%g3] ASI_MMU, %g5
+ .previous
+
sethi %hi(sparc64_kern_sec_context), %g2
ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2
- stxa %g2, [%g3] ASI_DMMU
+
+661: stxa %g2, [%g3] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g3] ASI_MMU
+ .previous
+
membar #Sync
mov 0x40, %g2
membar #Sync
@@ -168,7 +205,13 @@ do_fpdis:
ldda [%g1 + %g2] ASI_BLK_S, %f48
membar #Sync
fpdis_exit:
- stxa %g5, [%g3] ASI_DMMU
+
+661: stxa %g5, [%g3] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g5, [%g3] ASI_MMU
+ .previous
+
membar #Sync
fpdis_exit2:
wr %g7, 0, %gsr
@@ -189,6 +232,7 @@ fp_other_bounce:
.globl do_fpother_check_fitos
.align 32
do_fpother_check_fitos:
+ TRAP_LOAD_THREAD_REG(%g6, %g1)
sethi %hi(fp_other_bounce - 4), %g7
or %g7, %lo(fp_other_bounce - 4), %g7
@@ -312,6 +356,7 @@ fitos_emul_fini:
.globl do_fptrap
.align 32
do_fptrap:
+ TRAP_LOAD_THREAD_REG(%g6, %g1)
stx %fsr, [%g6 + TI_XFSR]
do_fptrap_after_fsr:
ldub [%g6 + TI_FPSAVED], %g3
@@ -321,10 +366,22 @@ do_fptrap_after_fsr:
rd %gsr, %g3
stx %g3, [%g6 + TI_GSR]
mov SECONDARY_CONTEXT, %g3
- ldxa [%g3] ASI_DMMU, %g5
+
+661: ldxa [%g3] ASI_DMMU, %g5
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ ldxa [%g3] ASI_MMU, %g5
+ .previous
+
sethi %hi(sparc64_kern_sec_context), %g2
ldx [%g2 + %lo(sparc64_kern_sec_context)], %g2
- stxa %g2, [%g3] ASI_DMMU
+
+661: stxa %g2, [%g3] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g3] ASI_MMU
+ .previous
+
membar #Sync
add %g6, TI_FPREGS, %g2
andcc %g1, FPRS_DL, %g0
@@ -339,7 +396,13 @@ do_fptrap_after_fsr:
stda %f48, [%g2 + %g3] ASI_BLK_S
5: mov SECONDARY_CONTEXT, %g1
membar #Sync
- stxa %g5, [%g1] ASI_DMMU
+
+661: stxa %g5, [%g1] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g5, [%g1] ASI_MMU
+ .previous
+
membar #Sync
ba,pt %xcc, etrap
wr %g0, 0, %fprs
@@ -353,8 +416,6 @@ do_fptrap_after_fsr:
*
* With this method we can do most of the cross-call tlb/cache
* flushing very quickly.
- *
- * Current CPU's IRQ worklist table is locked into %g6, don't touch.
*/
.text
.align 32
@@ -378,6 +439,8 @@ do_ivec:
sllx %g2, %g4, %g2
sllx %g4, 2, %g4
+ TRAP_LOAD_IRQ_WORK(%g6, %g1)
+
lduw [%g6 + %g4], %g5 /* g5 = irq_work(cpu, pil) */
stw %g5, [%g3 + 0x00] /* bucket->irq_chain = g5 */
stw %g3, [%g6 + %g4] /* irq_work(cpu, pil) = bucket */
@@ -399,76 +462,6 @@ do_ivec_xcall:
1: jmpl %g3, %g0
nop
- .globl save_alternate_globals
-save_alternate_globals: /* %o0 = save_area */
- rdpr %pstate, %o5
- andn %o5, PSTATE_IE, %o1
- wrpr %o1, PSTATE_AG, %pstate
- stx %g0, [%o0 + 0x00]
- stx %g1, [%o0 + 0x08]
- stx %g2, [%o0 + 0x10]
- stx %g3, [%o0 + 0x18]
- stx %g4, [%o0 + 0x20]
- stx %g5, [%o0 + 0x28]
- stx %g6, [%o0 + 0x30]
- stx %g7, [%o0 + 0x38]
- wrpr %o1, PSTATE_IG, %pstate
- stx %g0, [%o0 + 0x40]
- stx %g1, [%o0 + 0x48]
- stx %g2, [%o0 + 0x50]
- stx %g3, [%o0 + 0x58]
- stx %g4, [%o0 + 0x60]
- stx %g5, [%o0 + 0x68]
- stx %g6, [%o0 + 0x70]
- stx %g7, [%o0 + 0x78]
- wrpr %o1, PSTATE_MG, %pstate
- stx %g0, [%o0 + 0x80]
- stx %g1, [%o0 + 0x88]
- stx %g2, [%o0 + 0x90]
- stx %g3, [%o0 + 0x98]
- stx %g4, [%o0 + 0xa0]
- stx %g5, [%o0 + 0xa8]
- stx %g6, [%o0 + 0xb0]
- stx %g7, [%o0 + 0xb8]
- wrpr %o5, 0x0, %pstate
- retl
- nop
-
- .globl restore_alternate_globals
-restore_alternate_globals: /* %o0 = save_area */
- rdpr %pstate, %o5
- andn %o5, PSTATE_IE, %o1
- wrpr %o1, PSTATE_AG, %pstate
- ldx [%o0 + 0x00], %g0
- ldx [%o0 + 0x08], %g1
- ldx [%o0 + 0x10], %g2
- ldx [%o0 + 0x18], %g3
- ldx [%o0 + 0x20], %g4
- ldx [%o0 + 0x28], %g5
- ldx [%o0 + 0x30], %g6
- ldx [%o0 + 0x38], %g7
- wrpr %o1, PSTATE_IG, %pstate
- ldx [%o0 + 0x40], %g0
- ldx [%o0 + 0x48], %g1
- ldx [%o0 + 0x50], %g2
- ldx [%o0 + 0x58], %g3
- ldx [%o0 + 0x60], %g4
- ldx [%o0 + 0x68], %g5
- ldx [%o0 + 0x70], %g6
- ldx [%o0 + 0x78], %g7
- wrpr %o1, PSTATE_MG, %pstate
- ldx [%o0 + 0x80], %g0
- ldx [%o0 + 0x88], %g1
- ldx [%o0 + 0x90], %g2
- ldx [%o0 + 0x98], %g3
- ldx [%o0 + 0xa0], %g4
- ldx [%o0 + 0xa8], %g5
- ldx [%o0 + 0xb0], %g6
- ldx [%o0 + 0xb8], %g7
- wrpr %o5, 0x0, %pstate
- retl
- nop
-
.globl getcc, setcc
getcc:
ldx [%o0 + PT_V9_TSTATE], %o1
@@ -488,9 +481,24 @@ setcc:
retl
stx %o1, [%o0 + PT_V9_TSTATE]
- .globl utrap, utrap_ill
-utrap: brz,pn %g1, etrap
+ .globl utrap_trap
+utrap_trap: /* %g3=handler,%g4=level */
+ TRAP_LOAD_THREAD_REG(%g6, %g1)
+ ldx [%g6 + TI_UTRAPS], %g1
+ brnz,pt %g1, invoke_utrap
nop
+
+ ba,pt %xcc, etrap
+ rd %pc, %g7
+ mov %l4, %o1
+ call bad_trap
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap
+ clr %l6
+
+invoke_utrap:
+ sllx %g3, 3, %g3
+ ldx [%g1 + %g3], %g1
save %sp, -128, %sp
rdpr %tstate, %l6
rdpr %cwp, %l7
@@ -500,17 +508,6 @@ utrap: brz,pn %g1, etrap
rdpr %tnpc, %l7
wrpr %g1, 0, %tnpc
done
-utrap_ill:
- call bad_trap
- add %sp, PTREGS_OFF, %o0
- ba,pt %xcc, rtrap
- clr %l6
-
- /* XXX Here is stuff we still need to write... -DaveM XXX */
- .globl netbsd_syscall
-netbsd_syscall:
- retl
- nop
/* We need to carefully read the error status, ACK
* the errors, prevent recursive traps, and pass the
@@ -1001,7 +998,7 @@ dcpe_icpe_tl1_common:
* %g3: scratch
* %g4: AFSR
* %g5: AFAR
- * %g6: current thread ptr
+ * %g6: unused, will have current thread ptr after etrap
* %g7: scratch
*/
__cheetah_log_error:
@@ -1539,13 +1536,14 @@ ret_from_syscall:
1: b,pt %xcc, ret_sys_call
ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0
-sparc_exit: wrpr %g0, (PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV), %pstate
+sparc_exit: rdpr %pstate, %g2
+ wrpr %g2, PSTATE_IE, %pstate
rdpr %otherwin, %g1
rdpr %cansave, %g3
add %g3, %g1, %g3
wrpr %g3, 0x0, %cansave
wrpr %g0, 0x0, %otherwin
- wrpr %g0, (PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV | PSTATE_IE), %pstate
+ wrpr %g2, 0x0, %pstate
ba,pt %xcc, sys_exit
stb %g0, [%g6 + TI_WSAVED]
@@ -1690,3 +1688,138 @@ __flushw_user:
restore %g0, %g0, %g0
2: retl
nop
+
+#ifdef CONFIG_SMP
+ .globl hard_smp_processor_id
+hard_smp_processor_id:
+#endif
+ .globl real_hard_smp_processor_id
+real_hard_smp_processor_id:
+ __GET_CPUID(%o0)
+ retl
+ nop
+
+ /* %o0: devhandle
+ * %o1: devino
+ *
+ * returns %o0: sysino
+ */
+ .globl sun4v_devino_to_sysino
+sun4v_devino_to_sysino:
+ mov HV_FAST_INTR_DEVINO2SYSINO, %o5
+ ta HV_FAST_TRAP
+ retl
+ mov %o1, %o0
+
+ /* %o0: sysino
+ *
+ * returns %o0: intr_enabled (HV_INTR_{DISABLED,ENABLED})
+ */
+ .globl sun4v_intr_getenabled
+sun4v_intr_getenabled:
+ mov HV_FAST_INTR_GETENABLED, %o5
+ ta HV_FAST_TRAP
+ retl
+ mov %o1, %o0
+
+ /* %o0: sysino
+ * %o1: intr_enabled (HV_INTR_{DISABLED,ENABLED})
+ */
+ .globl sun4v_intr_setenabled
+sun4v_intr_setenabled:
+ mov HV_FAST_INTR_SETENABLED, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+
+ /* %o0: sysino
+ *
+ * returns %o0: intr_state (HV_INTR_STATE_*)
+ */
+ .globl sun4v_intr_getstate
+sun4v_intr_getstate:
+ mov HV_FAST_INTR_GETSTATE, %o5
+ ta HV_FAST_TRAP
+ retl
+ mov %o1, %o0
+
+ /* %o0: sysino
+ * %o1: intr_state (HV_INTR_STATE_*)
+ */
+ .globl sun4v_intr_setstate
+sun4v_intr_setstate:
+ mov HV_FAST_INTR_SETSTATE, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+
+ /* %o0: sysino
+ *
+ * returns %o0: cpuid
+ */
+ .globl sun4v_intr_gettarget
+sun4v_intr_gettarget:
+ mov HV_FAST_INTR_GETTARGET, %o5
+ ta HV_FAST_TRAP
+ retl
+ mov %o1, %o0
+
+ /* %o0: sysino
+ * %o1: cpuid
+ */
+ .globl sun4v_intr_settarget
+sun4v_intr_settarget:
+ mov HV_FAST_INTR_SETTARGET, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+
+ /* %o0: type
+ * %o1: queue paddr
+ * %o2: num queue entries
+ *
+ * returns %o0: status
+ */
+ .globl sun4v_cpu_qconf
+sun4v_cpu_qconf:
+ mov HV_FAST_CPU_QCONF, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+
+ /* returns %o0: status
+ */
+ .globl sun4v_cpu_yield
+sun4v_cpu_yield:
+ mov HV_FAST_CPU_YIELD, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+
+ /* %o0: num cpus in cpu list
+ * %o1: cpu list paddr
+ * %o2: mondo block paddr
+ *
+ * returns %o0: status
+ */
+ .globl sun4v_cpu_mondo_send
+sun4v_cpu_mondo_send:
+ mov HV_FAST_CPU_MONDO_SEND, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+
+ /* %o0: CPU ID
+ *
+ * returns %o0: -status if status non-zero, else
+ * %o0: cpu state as HV_CPU_STATE_*
+ */
+ .globl sun4v_cpu_state
+sun4v_cpu_state:
+ mov HV_FAST_CPU_STATE, %o5
+ ta HV_FAST_TRAP
+ brnz,pn %o0, 1f
+ sub %g0, %o0, %o0
+ mov %o1, %o0
+1: retl
+ nop
diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S
index 0d8eba21111b..149383835c25 100644
--- a/arch/sparc64/kernel/etrap.S
+++ b/arch/sparc64/kernel/etrap.S
@@ -31,6 +31,7 @@
.globl etrap, etrap_irq, etraptl1
etrap: rdpr %pil, %g2
etrap_irq:
+ TRAP_LOAD_THREAD_REG(%g6, %g1)
rdpr %tstate, %g1
sllx %g2, 20, %g3
andcc %g1, TSTATE_PRIV, %g0
@@ -54,7 +55,31 @@ etrap_irq:
rd %y, %g3
stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC]
st %g3, [%g2 + STACKFRAME_SZ + PT_V9_Y]
- save %g2, -STACK_BIAS, %sp ! Ordering here is critical
+
+ rdpr %cansave, %g1
+ brnz,pt %g1, etrap_save
+ nop
+
+ rdpr %cwp, %g1
+ add %g1, 2, %g1
+ wrpr %g1, %cwp
+ be,pt %xcc, etrap_user_spill
+ mov ASI_AIUP, %g3
+
+ rdpr %otherwin, %g3
+ brz %g3, etrap_kernel_spill
+ mov ASI_AIUS, %g3
+
+etrap_user_spill:
+
+ wr %g3, 0x0, %asi
+ ldx [%g6 + TI_FLAGS], %g3
+ and %g3, _TIF_32BIT, %g3
+ brnz,pt %g3, etrap_user_spill_32bit
+ nop
+ ba,a,pt %xcc, etrap_user_spill_64bit
+
+etrap_save: save %g2, -STACK_BIAS, %sp
mov %g6, %l6
bne,pn %xcc, 3f
@@ -70,42 +95,56 @@ etrap_irq:
wrpr %g2, 0, %wstate
sethi %hi(sparc64_kern_pri_context), %g2
ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3
- stxa %g3, [%l4] ASI_DMMU
- flush %l6
- wr %g0, ASI_AIUS, %asi
-2: wrpr %g0, 0x0, %tl
- mov %g4, %l4
+
+661: stxa %g3, [%l4] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g3, [%l4] ASI_MMU
+ .previous
+
+ sethi %hi(KERNBASE), %l4
+ flush %l4
+ mov ASI_AIUS, %l7
+2: mov %g4, %l4
mov %g5, %l5
+ add %g7, 4, %l2
+
+ /* Go to trap time globals so we can save them. */
+661: wrpr %g0, ETRAP_PSTATE1, %pstate
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ SET_GL(0)
+ .previous
- mov %g7, %l2
- wrpr %g0, ETRAP_PSTATE1, %pstate
stx %g1, [%sp + PTREGS_OFF + PT_V9_G1]
stx %g2, [%sp + PTREGS_OFF + PT_V9_G2]
+ sllx %l7, 24, %l7
stx %g3, [%sp + PTREGS_OFF + PT_V9_G3]
+ rdpr %cwp, %l0
stx %g4, [%sp + PTREGS_OFF + PT_V9_G4]
stx %g5, [%sp + PTREGS_OFF + PT_V9_G5]
stx %g6, [%sp + PTREGS_OFF + PT_V9_G6]
-
stx %g7, [%sp + PTREGS_OFF + PT_V9_G7]
+ or %l7, %l0, %l7
+ sethi %hi(TSTATE_RMO | TSTATE_PEF), %l0
+ or %l7, %l0, %l7
+ wrpr %l2, %tnpc
+ wrpr %l7, (TSTATE_PRIV | TSTATE_IE), %tstate
stx %i0, [%sp + PTREGS_OFF + PT_V9_I0]
stx %i1, [%sp + PTREGS_OFF + PT_V9_I1]
stx %i2, [%sp + PTREGS_OFF + PT_V9_I2]
stx %i3, [%sp + PTREGS_OFF + PT_V9_I3]
stx %i4, [%sp + PTREGS_OFF + PT_V9_I4]
stx %i5, [%sp + PTREGS_OFF + PT_V9_I5]
-
stx %i6, [%sp + PTREGS_OFF + PT_V9_I6]
- stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
- wrpr %g0, ETRAP_PSTATE2, %pstate
mov %l6, %g6
-#ifdef CONFIG_SMP
- mov TSB_REG, %g3
- ldxa [%g3] ASI_IMMU, %g5
-#endif
- jmpl %l2 + 0x4, %g0
- ldx [%g6 + TI_TASK], %g4
+ stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
+ LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %l1)
+ ldx [%g6 + TI_TASK], %g4
+ done
-3: ldub [%l6 + TI_FPDEPTH], %l5
+3: mov ASI_P, %l7
+ ldub [%l6 + TI_FPDEPTH], %l5
add %l6, TI_FPSAVED + 1, %l4
srl %l5, 1, %l3
add %l5, 2, %l5
@@ -125,6 +164,7 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself.
* 0x58 TL4's TT
* 0x60 TL
*/
+ TRAP_LOAD_THREAD_REG(%g6, %g1)
sub %sp, ((4 * 8) * 4) + 8, %g2
rdpr %tl, %g1
@@ -148,6 +188,11 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself.
rdpr %tt, %g3
stx %g3, [%g2 + STACK_BIAS + 0x38]
+ sethi %hi(is_sun4v), %g3
+ lduw [%g3 + %lo(is_sun4v)], %g3
+ brnz,pn %g3, finish_tl1_capture
+ nop
+
wrpr %g0, 3, %tl
rdpr %tstate, %g3
stx %g3, [%g2 + STACK_BIAS + 0x40]
@@ -168,91 +213,20 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself.
rdpr %tt, %g3
stx %g3, [%g2 + STACK_BIAS + 0x78]
- wrpr %g1, %tl
stx %g1, [%g2 + STACK_BIAS + 0x80]
+finish_tl1_capture:
+ wrpr %g0, 1, %tl
+661: nop
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ SET_GL(1)
+ .previous
+
rdpr %tstate, %g1
sub %g2, STACKFRAME_SZ + TRACEREG_SZ - STACK_BIAS, %g2
ba,pt %xcc, 1b
andcc %g1, TSTATE_PRIV, %g0
- .align 64
- .globl scetrap
-scetrap: rdpr %pil, %g2
- rdpr %tstate, %g1
- sllx %g2, 20, %g3
- andcc %g1, TSTATE_PRIV, %g0
- or %g1, %g3, %g1
- bne,pn %xcc, 1f
- sub %sp, (STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS), %g2
- wrpr %g0, 7, %cleanwin
-
- sllx %g1, 51, %g3
- sethi %hi(TASK_REGOFF), %g2
- or %g2, %lo(TASK_REGOFF), %g2
- brlz,pn %g3, 1f
- add %g6, %g2, %g2
- wr %g0, 0, %fprs
-1: rdpr %tpc, %g3
- stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TSTATE]
-
- rdpr %tnpc, %g1
- stx %g3, [%g2 + STACKFRAME_SZ + PT_V9_TPC]
- stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC]
- save %g2, -STACK_BIAS, %sp ! Ordering here is critical
- mov %g6, %l6
- bne,pn %xcc, 2f
- mov ASI_P, %l7
- rdpr %canrestore, %g3
-
- rdpr %wstate, %g2
- wrpr %g0, 0, %canrestore
- sll %g2, 3, %g2
- mov PRIMARY_CONTEXT, %l4
- wrpr %g3, 0, %otherwin
- wrpr %g2, 0, %wstate
- sethi %hi(sparc64_kern_pri_context), %g2
- ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3
- stxa %g3, [%l4] ASI_DMMU
- flush %l6
-
- mov ASI_AIUS, %l7
-2: mov %g4, %l4
- mov %g5, %l5
- add %g7, 0x4, %l2
- wrpr %g0, ETRAP_PSTATE1, %pstate
- stx %g1, [%sp + PTREGS_OFF + PT_V9_G1]
- stx %g2, [%sp + PTREGS_OFF + PT_V9_G2]
- sllx %l7, 24, %l7
-
- stx %g3, [%sp + PTREGS_OFF + PT_V9_G3]
- rdpr %cwp, %l0
- stx %g4, [%sp + PTREGS_OFF + PT_V9_G4]
- stx %g5, [%sp + PTREGS_OFF + PT_V9_G5]
- stx %g6, [%sp + PTREGS_OFF + PT_V9_G6]
- stx %g7, [%sp + PTREGS_OFF + PT_V9_G7]
- or %l7, %l0, %l7
- sethi %hi(TSTATE_RMO | TSTATE_PEF), %l0
-
- or %l7, %l0, %l7
- wrpr %l2, %tnpc
- wrpr %l7, (TSTATE_PRIV | TSTATE_IE), %tstate
- stx %i0, [%sp + PTREGS_OFF + PT_V9_I0]
- stx %i1, [%sp + PTREGS_OFF + PT_V9_I1]
- stx %i2, [%sp + PTREGS_OFF + PT_V9_I2]
- stx %i3, [%sp + PTREGS_OFF + PT_V9_I3]
- stx %i4, [%sp + PTREGS_OFF + PT_V9_I4]
-
- stx %i5, [%sp + PTREGS_OFF + PT_V9_I5]
- stx %i6, [%sp + PTREGS_OFF + PT_V9_I6]
- mov %l6, %g6
- stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
-#ifdef CONFIG_SMP
- mov TSB_REG, %g3
- ldxa [%g3] ASI_IMMU, %g5
-#endif
- ldx [%g6 + TI_TASK], %g4
- done
-
#undef TASK_REGOFF
#undef ETRAP_PSTATE1
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index b49dcd4504b0..3eadac5e171e 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -26,6 +26,7 @@
#include <asm/head.h>
#include <asm/ttable.h>
#include <asm/mmu.h>
+#include <asm/cpudata.h>
/* This section from from _start to sparc64_boot_end should fit into
* 0x0000000000404000 to 0x0000000000408000.
@@ -94,12 +95,17 @@ sparc64_boot:
wrpr %g1, 0x0, %pstate
ba,a,pt %xcc, 1f
- .globl prom_finddev_name, prom_chosen_path
- .globl prom_getprop_name, prom_mmu_name
- .globl prom_callmethod_name, prom_translate_name
+ .globl prom_finddev_name, prom_chosen_path, prom_root_node
+ .globl prom_getprop_name, prom_mmu_name, prom_peer_name
+ .globl prom_callmethod_name, prom_translate_name, prom_root_compatible
.globl prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache
.globl prom_boot_mapped_pc, prom_boot_mapping_mode
.globl prom_boot_mapping_phys_high, prom_boot_mapping_phys_low
+ .globl is_sun4v
+prom_peer_name:
+ .asciz "peer"
+prom_compatible_name:
+ .asciz "compatible"
prom_finddev_name:
.asciz "finddevice"
prom_chosen_path:
@@ -116,7 +122,13 @@ prom_map_name:
.asciz "map"
prom_unmap_name:
.asciz "unmap"
+prom_sun4v_name:
+ .asciz "sun4v"
.align 4
+prom_root_compatible:
+ .skip 64
+prom_root_node:
+ .word 0
prom_mmu_ihandle_cache:
.word 0
prom_boot_mapped_pc:
@@ -128,8 +140,54 @@ prom_boot_mapping_phys_high:
.xword 0
prom_boot_mapping_phys_low:
.xword 0
+is_sun4v:
+ .word 0
1:
rd %pc, %l0
+
+ mov (1b - prom_peer_name), %l1
+ sub %l0, %l1, %l1
+ mov 0, %l2
+
+ /* prom_root_node = prom_peer(0) */
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "peer"
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, 0
+ stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ ldx [%sp + 2047 + 128 + 0x20], %l4 ! prom root node
+ mov (1b - prom_root_node), %l1
+ sub %l0, %l1, %l1
+ stw %l4, [%l1]
+
+ mov (1b - prom_getprop_name), %l1
+ mov (1b - prom_compatible_name), %l2
+ mov (1b - prom_root_compatible), %l5
+ sub %l0, %l1, %l1
+ sub %l0, %l2, %l2
+ sub %l0, %l5, %l5
+
+ /* prom_getproperty(prom_root_node, "compatible",
+ * &prom_root_compatible, 64)
+ */
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop"
+ mov 4, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, prom_root_node
+ stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "compatible"
+ stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_root_compatible
+ mov 64, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, size
+ stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
mov (1b - prom_finddev_name), %l1
mov (1b - prom_chosen_path), %l2
mov (1b - prom_boot_mapped_pc), %l3
@@ -238,6 +296,27 @@ prom_boot_mapping_phys_low:
add %sp, (192 + 128), %sp
sparc64_boot_after_remap:
+ sethi %hi(prom_root_compatible), %g1
+ or %g1, %lo(prom_root_compatible), %g1
+ sethi %hi(prom_sun4v_name), %g7
+ or %g7, %lo(prom_sun4v_name), %g7
+ mov 5, %g3
+1: ldub [%g7], %g2
+ ldub [%g1], %g4
+ cmp %g2, %g4
+ bne,pn %icc, 2f
+ add %g7, 1, %g7
+ subcc %g3, 1, %g3
+ bne,pt %xcc, 1b
+ add %g1, 1, %g1
+
+ sethi %hi(is_sun4v), %g1
+ or %g1, %lo(is_sun4v), %g1
+ mov 1, %g7
+ stw %g7, [%g1]
+
+2:
+ BRANCH_IF_SUN4V(g1, jump_to_sun4u_init)
BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot)
BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot)
ba,pt %xcc, spitfire_boot
@@ -301,20 +380,58 @@ jump_to_sun4u_init:
nop
sun4u_init:
+ BRANCH_IF_SUN4V(g1, sun4v_init)
+
/* Set ctx 0 */
- mov PRIMARY_CONTEXT, %g7
- stxa %g0, [%g7] ASI_DMMU
- membar #Sync
+ mov PRIMARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_DMMU
+ membar #Sync
- mov SECONDARY_CONTEXT, %g7
- stxa %g0, [%g7] ASI_DMMU
+ mov SECONDARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_DMMU
membar #Sync
- BRANCH_IF_ANY_CHEETAH(g1,g7,cheetah_tlb_fixup)
+ ba,pt %xcc, sun4u_continue
+ nop
+
+sun4v_init:
+ /* Set ctx 0 */
+ mov PRIMARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_MMU
+ membar #Sync
+
+ mov SECONDARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_MMU
+ membar #Sync
+ ba,pt %xcc, niagara_tlb_fixup
+ nop
+
+sun4u_continue:
+ BRANCH_IF_ANY_CHEETAH(g1, g7, cheetah_tlb_fixup)
ba,pt %xcc, spitfire_tlb_fixup
nop
+niagara_tlb_fixup:
+ mov 3, %g2 /* Set TLB type to hypervisor. */
+ sethi %hi(tlb_type), %g1
+ stw %g2, [%g1 + %lo(tlb_type)]
+
+ /* Patch copy/clear ops. */
+ call niagara_patch_copyops
+ nop
+ call niagara_patch_bzero
+ nop
+ call niagara_patch_pageops
+ nop
+
+ /* Patch TLB/cache ops. */
+ call hypervisor_patch_cachetlbops
+ nop
+
+ ba,pt %xcc, tlb_fixup_done
+ nop
+
cheetah_tlb_fixup:
mov 2, %g2 /* Set TLB type to cheetah+. */
BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
@@ -411,85 +528,55 @@ setup_trap_table:
wrpr %g0, 15, %pil
/* Make the firmware call to jump over to the Linux trap table. */
- call prom_set_trap_table
- sethi %hi(sparc64_ttable_tl0), %o0
+ sethi %hi(is_sun4v), %o0
+ lduw [%o0 + %lo(is_sun4v)], %o0
+ brz,pt %o0, 1f
+ nop
- /* Start using proper page size encodings in ctx register. */
- sethi %hi(sparc64_kern_pri_context), %g3
- ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2
- mov PRIMARY_CONTEXT, %g1
- stxa %g2, [%g1] ASI_DMMU
- membar #Sync
+ TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
+ add %g2, TRAP_PER_CPU_FAULT_INFO, %g2
+ stxa %g2, [%g0] ASI_SCRATCHPAD
- /* The Linux trap handlers expect various trap global registers
- * to be setup with some fixed values. So here we set these
- * up very carefully. These globals are:
- *
- * Alternate Globals (PSTATE_AG):
- *
- * %g6 --> current_thread_info()
- *
- * MMU Globals (PSTATE_MG):
- *
- * %g1 --> TLB_SFSR
- * %g2 --> ((_PAGE_VALID | _PAGE_SZ4MB |
- * _PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W)
- * ^ 0xfffff80000000000)
- * (this %g2 value is used for computing the PAGE_OFFSET kernel
- * TLB entries quickly, the virtual address of the fault XOR'd
- * with this %g2 value is the PTE to load into the TLB)
- * %g3 --> VPTE_BASE_CHEETAH or VPTE_BASE_SPITFIRE
+ /* Compute physical address:
*
- * Interrupt Globals (PSTATE_IG, setup by init_irqwork_curcpu()):
- *
- * %g6 --> __irq_work[smp_processor_id()]
+ * paddr = kern_base + (mmfsa_vaddr - KERNBASE)
*/
+ sethi %hi(KERNBASE), %g3
+ sub %g2, %g3, %g2
+ sethi %hi(kern_base), %g3
+ ldx [%g3 + %lo(kern_base)], %g3
+ add %g2, %g3, %o1
- rdpr %pstate, %o1
- mov %g6, %o2
- wrpr %o1, PSTATE_AG, %pstate
- mov %o2, %g6
-
-#define KERN_HIGHBITS ((_PAGE_VALID|_PAGE_SZ4MB)^0xfffff80000000000)
-#define KERN_LOWBITS (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W)
- wrpr %o1, PSTATE_MG, %pstate
- mov TSB_REG, %g1
- stxa %g0, [%g1] ASI_DMMU
- membar #Sync
- stxa %g0, [%g1] ASI_IMMU
- membar #Sync
- mov TLB_SFSR, %g1
- sethi %uhi(KERN_HIGHBITS), %g2
- or %g2, %ulo(KERN_HIGHBITS), %g2
- sllx %g2, 32, %g2
- or %g2, KERN_LOWBITS, %g2
-
- BRANCH_IF_ANY_CHEETAH(g3,g7,8f)
- ba,pt %xcc, 9f
+ call prom_set_trap_table_sun4v
+ sethi %hi(sparc64_ttable_tl0), %o0
+
+ ba,pt %xcc, 2f
nop
-8:
- sethi %uhi(VPTE_BASE_CHEETAH), %g3
- or %g3, %ulo(VPTE_BASE_CHEETAH), %g3
- ba,pt %xcc, 2f
- sllx %g3, 32, %g3
+1: call prom_set_trap_table
+ sethi %hi(sparc64_ttable_tl0), %o0
-9:
- sethi %uhi(VPTE_BASE_SPITFIRE), %g3
- or %g3, %ulo(VPTE_BASE_SPITFIRE), %g3
- sllx %g3, 32, %g3
+ /* Start using proper page size encodings in ctx register. */
+2: sethi %hi(sparc64_kern_pri_context), %g3
+ ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2
-2:
- clr %g7
-#undef KERN_HIGHBITS
-#undef KERN_LOWBITS
+ mov PRIMARY_CONTEXT, %g1
+
+661: stxa %g2, [%g1] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g1] ASI_MMU
+ .previous
+
+ membar #Sync
/* Kill PROM timer */
sethi %hi(0x80000000), %o2
sllx %o2, 32, %o2
wr %o2, 0, %tick_cmpr
- BRANCH_IF_ANY_CHEETAH(o2,o3,1f)
+ BRANCH_IF_SUN4V(o2, 1f)
+ BRANCH_IF_ANY_CHEETAH(o2, o3, 1f)
ba,pt %xcc, 2f
nop
@@ -502,7 +589,6 @@ setup_trap_table:
2:
wrpr %g0, %g0, %wstate
- wrpr %o1, 0x0, %pstate
call init_irqwork_curcpu
nop
@@ -517,7 +603,7 @@ setup_trap_table:
restore
.globl setup_tba
-setup_tba: /* i0 = is_starfire */
+setup_tba:
save %sp, -192, %sp
/* The boot processor is the only cpu which invokes this
@@ -536,31 +622,35 @@ setup_tba: /* i0 = is_starfire */
restore
sparc64_boot_end:
-#include "systbls.S"
#include "ktlb.S"
+#include "tsb.S"
#include "etrap.S"
#include "rtrap.S"
#include "winfixup.S"
#include "entry.S"
+#include "sun4v_tlb_miss.S"
+#include "sun4v_ivec.S"
/*
* The following skip makes sure the trap table in ttable.S is aligned
* on a 32K boundary as required by the v9 specs for TBA register.
+ *
+ * We align to a 32K boundary, then we have the 32K kernel TSB,
+ * then the 32K aligned trap table.
*/
1:
.skip 0x4000 + _start - 1b
-#ifdef CONFIG_SBUS
-/* This is just a hack to fool make depend config.h discovering
- strategy: As the .S files below need config.h, but
- make depend does not find it for them, we include config.h
- in head.S */
-#endif
+ .globl swapper_tsb
+swapper_tsb:
+ .skip (32 * 1024)
! 0x0000000000408000
#include "ttable.S"
+#include "systbls.S"
+
.data
.align 8
.globl prom_tba, tlb_type
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index 233526ba3abe..8c93ba655b33 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -21,6 +21,7 @@
#include <linux/delay.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/bootmem.h>
#include <asm/ptrace.h>
#include <asm/processor.h>
@@ -39,6 +40,7 @@
#include <asm/cache.h>
#include <asm/cpudata.h>
#include <asm/auxio.h>
+#include <asm/head.h>
#ifdef CONFIG_SMP
static void distribute_irqs(void);
@@ -136,12 +138,48 @@ out_unlock:
return 0;
}
+extern unsigned long real_hard_smp_processor_id(void);
+
+static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
+{
+ unsigned int tid;
+
+ if (this_is_starfire) {
+ tid = starfire_translate(imap, cpuid);
+ tid <<= IMAP_TID_SHIFT;
+ tid &= IMAP_TID_UPA;
+ } else {
+ if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+ unsigned long ver;
+
+ __asm__ ("rdpr %%ver, %0" : "=r" (ver));
+ if ((ver >> 32UL) == __JALAPENO_ID ||
+ (ver >> 32UL) == __SERRANO_ID) {
+ tid = cpuid << IMAP_TID_SHIFT;
+ tid &= IMAP_TID_JBUS;
+ } else {
+ unsigned int a = cpuid & 0x1f;
+ unsigned int n = (cpuid >> 5) & 0x1f;
+
+ tid = ((a << IMAP_AID_SHIFT) |
+ (n << IMAP_NID_SHIFT));
+ tid &= (IMAP_AID_SAFARI |
+ IMAP_NID_SAFARI);;
+ }
+ } else {
+ tid = cpuid << IMAP_TID_SHIFT;
+ tid &= IMAP_TID_UPA;
+ }
+ }
+
+ return tid;
+}
+
/* Now these are always passed a true fully specified sun4u INO. */
void enable_irq(unsigned int irq)
{
struct ino_bucket *bucket = __bucket(irq);
- unsigned long imap;
- unsigned long tid;
+ unsigned long imap, cpuid;
imap = bucket->imap;
if (imap == 0UL)
@@ -149,47 +187,38 @@ void enable_irq(unsigned int irq)
preempt_disable();
- if (tlb_type == cheetah || tlb_type == cheetah_plus) {
- unsigned long ver;
-
- __asm__ ("rdpr %%ver, %0" : "=r" (ver));
- if ((ver >> 32) == 0x003e0016) {
- /* We set it to our JBUS ID. */
- __asm__ __volatile__("ldxa [%%g0] %1, %0"
- : "=r" (tid)
- : "i" (ASI_JBUS_CONFIG));
- tid = ((tid & (0x1fUL<<17)) << 9);
- tid &= IMAP_TID_JBUS;
- } else {
- /* We set it to our Safari AID. */
- __asm__ __volatile__("ldxa [%%g0] %1, %0"
- : "=r" (tid)
- : "i" (ASI_SAFARI_CONFIG));
- tid = ((tid & (0x3ffUL<<17)) << 9);
- tid &= IMAP_AID_SAFARI;
- }
- } else if (this_is_starfire == 0) {
- /* We set it to our UPA MID. */
- __asm__ __volatile__("ldxa [%%g0] %1, %0"
- : "=r" (tid)
- : "i" (ASI_UPA_CONFIG));
- tid = ((tid & UPA_CONFIG_MID) << 9);
- tid &= IMAP_TID_UPA;
+ /* This gets the physical processor ID, even on uniprocessor,
+ * so we can always program the interrupt target correctly.
+ */
+ cpuid = real_hard_smp_processor_id();
+
+ if (tlb_type == hypervisor) {
+ unsigned int ino = __irq_ino(irq);
+ int err;
+
+ err = sun4v_intr_settarget(ino, cpuid);
+ if (err != HV_EOK)
+ printk("sun4v_intr_settarget(%x,%lu): err(%d)\n",
+ ino, cpuid, err);
+ err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED);
+ if (err != HV_EOK)
+ printk("sun4v_intr_setenabled(%x): err(%d)\n",
+ ino, err);
} else {
- tid = (starfire_translate(imap, smp_processor_id()) << 26);
- tid &= IMAP_TID_UPA;
+ unsigned int tid = sun4u_compute_tid(imap, cpuid);
+
+ /* NOTE NOTE NOTE, IGN and INO are read-only, IGN is a product
+ * of this SYSIO's preconfigured IGN in the SYSIO Control
+ * Register, the hardware just mirrors that value here.
+ * However for Graphics and UPA Slave devices the full
+ * IMAP_INR field can be set by the programmer here.
+ *
+ * Things like FFB can now be handled via the new IRQ
+ * mechanism.
+ */
+ upa_writel(tid | IMAP_VALID, imap);
}
- /* NOTE NOTE NOTE, IGN and INO are read-only, IGN is a product
- * of this SYSIO's preconfigured IGN in the SYSIO Control
- * Register, the hardware just mirrors that value here.
- * However for Graphics and UPA Slave devices the full
- * IMAP_INR field can be set by the programmer here.
- *
- * Things like FFB can now be handled via the new IRQ mechanism.
- */
- upa_writel(tid | IMAP_VALID, imap);
-
preempt_enable();
}
@@ -201,16 +230,26 @@ void disable_irq(unsigned int irq)
imap = bucket->imap;
if (imap != 0UL) {
- u32 tmp;
+ if (tlb_type == hypervisor) {
+ unsigned int ino = __irq_ino(irq);
+ int err;
+
+ err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED);
+ if (err != HV_EOK)
+ printk("sun4v_intr_setenabled(%x): "
+ "err(%d)\n", ino, err);
+ } else {
+ u32 tmp;
- /* NOTE: We do not want to futz with the IRQ clear registers
- * and move the state to IDLE, the SCSI code does call
- * disable_irq() to assure atomicity in the queue cmd
- * SCSI adapter driver code. Thus we'd lose interrupts.
- */
- tmp = upa_readl(imap);
- tmp &= ~IMAP_VALID;
- upa_writel(tmp, imap);
+ /* NOTE: We do not want to futz with the IRQ clear registers
+ * and move the state to IDLE, the SCSI code does call
+ * disable_irq() to assure atomicity in the queue cmd
+ * SCSI adapter driver code. Thus we'd lose interrupts.
+ */
+ tmp = upa_readl(imap);
+ tmp &= ~IMAP_VALID;
+ upa_writel(tmp, imap);
+ }
}
}
@@ -248,6 +287,8 @@ unsigned int build_irq(int pil, int inofixup, unsigned long iclr, unsigned long
return __irq(&pil0_dummy_bucket);
}
+ BUG_ON(tlb_type == hypervisor);
+
/* RULE: Both must be specified in all other cases. */
if (iclr == 0UL || imap == 0UL) {
prom_printf("Invalid build_irq %d %d %016lx %016lx\n",
@@ -275,12 +316,11 @@ unsigned int build_irq(int pil, int inofixup, unsigned long iclr, unsigned long
goto out;
}
- bucket->irq_info = kmalloc(sizeof(struct irq_desc), GFP_ATOMIC);
+ bucket->irq_info = kzalloc(sizeof(struct irq_desc), GFP_ATOMIC);
if (!bucket->irq_info) {
prom_printf("IRQ: Error, kmalloc(irq_desc) failed.\n");
prom_halt();
}
- memset(bucket->irq_info, 0, sizeof(struct irq_desc));
/* Ok, looks good, set it up. Don't touch the irq_chain or
* the pending flag.
@@ -294,6 +334,37 @@ out:
return __irq(bucket);
}
+unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino, int pil, unsigned char flags)
+{
+ struct ino_bucket *bucket;
+ unsigned long sysino;
+
+ sysino = sun4v_devino_to_sysino(devhandle, devino);
+
+ bucket = &ivector_table[sysino];
+
+ /* Catch accidental accesses to these things. IMAP/ICLR handling
+ * is done by hypervisor calls on sun4v platforms, not by direct
+ * register accesses.
+ *
+ * But we need to make them look unique for the disable_irq() logic
+ * in free_irq().
+ */
+ bucket->imap = ~0UL - sysino;
+ bucket->iclr = ~0UL - sysino;
+
+ bucket->pil = pil;
+ bucket->flags = flags;
+
+ bucket->irq_info = kzalloc(sizeof(struct irq_desc), GFP_ATOMIC);
+ if (!bucket->irq_info) {
+ prom_printf("IRQ: Error, kmalloc(irq_desc) failed.\n");
+ prom_halt();
+ }
+
+ return __irq(bucket);
+}
+
static void atomic_bucket_insert(struct ino_bucket *bucket)
{
unsigned long pstate;
@@ -482,7 +553,6 @@ void free_irq(unsigned int irq, void *dev_id)
bucket = __bucket(irq);
if (bucket != &pil0_dummy_bucket) {
struct irq_desc *desc = bucket->irq_info;
- unsigned long imap = bucket->imap;
int ent, i;
for (i = 0; i < MAX_IRQ_DESC_ACTION; i++) {
@@ -495,6 +565,8 @@ void free_irq(unsigned int irq, void *dev_id)
}
if (!desc->action_active_mask) {
+ unsigned long imap = bucket->imap;
+
/* This unique interrupt source is now inactive. */
bucket->flags &= ~IBF_ACTIVE;
@@ -592,7 +664,18 @@ static void process_bucket(int irq, struct ino_bucket *bp, struct pt_regs *regs)
break;
}
if (bp->pil != 0) {
- upa_writel(ICLR_IDLE, bp->iclr);
+ if (tlb_type == hypervisor) {
+ unsigned int ino = __irq_ino(bp);
+ int err;
+
+ err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
+ if (err != HV_EOK)
+ printk("sun4v_intr_setstate(%x): "
+ "err(%d)\n", ino, err);
+ } else {
+ upa_writel(ICLR_IDLE, bp->iclr);
+ }
+
/* Test and add entropy */
if (random & SA_SAMPLE_RANDOM)
add_interrupt_randomness(irq);
@@ -694,7 +777,7 @@ irqreturn_t sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs)
val = readb(auxio_register);
val |= AUXIO_AUX1_FTCNT;
writeb(val, auxio_register);
- val &= AUXIO_AUX1_FTCNT;
+ val &= ~AUXIO_AUX1_FTCNT;
writeb(val, auxio_register);
doing_pdma = 0;
@@ -727,25 +810,23 @@ EXPORT_SYMBOL(probe_irq_off);
static int retarget_one_irq(struct irqaction *p, int goal_cpu)
{
struct ino_bucket *bucket = get_ino_in_irqaction(p) + ivector_table;
- unsigned long imap = bucket->imap;
- unsigned int tid;
while (!cpu_online(goal_cpu)) {
if (++goal_cpu >= NR_CPUS)
goal_cpu = 0;
}
- if (tlb_type == cheetah || tlb_type == cheetah_plus) {
- tid = goal_cpu << 26;
- tid &= IMAP_AID_SAFARI;
- } else if (this_is_starfire == 0) {
- tid = goal_cpu << 26;
- tid &= IMAP_TID_UPA;
+ if (tlb_type == hypervisor) {
+ unsigned int ino = __irq_ino(bucket);
+
+ sun4v_intr_settarget(ino, goal_cpu);
+ sun4v_intr_setenabled(ino, HV_INTR_ENABLED);
} else {
- tid = (starfire_translate(imap, goal_cpu) << 26);
- tid &= IMAP_TID_UPA;
+ unsigned long imap = bucket->imap;
+ unsigned int tid = sun4u_compute_tid(imap, goal_cpu);
+
+ upa_writel(tid | IMAP_VALID, imap);
}
- upa_writel(tid | IMAP_VALID, imap);
do {
if (++goal_cpu >= NR_CPUS)
@@ -848,33 +929,114 @@ static void kill_prom_timer(void)
void init_irqwork_curcpu(void)
{
- register struct irq_work_struct *workp asm("o2");
- register unsigned long tmp asm("o3");
int cpu = hard_smp_processor_id();
- memset(__irq_work + cpu, 0, sizeof(*workp));
-
- /* Make sure we are called with PSTATE_IE disabled. */
- __asm__ __volatile__("rdpr %%pstate, %0\n\t"
- : "=r" (tmp));
- if (tmp & PSTATE_IE) {
- prom_printf("BUG: init_irqwork_curcpu() called with "
- "PSTATE_IE enabled, bailing.\n");
- __asm__ __volatile__("mov %%i7, %0\n\t"
- : "=r" (tmp));
- prom_printf("BUG: Called from %lx\n", tmp);
+ memset(__irq_work + cpu, 0, sizeof(struct irq_work_struct));
+}
+
+static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type)
+{
+ unsigned long num_entries = 128;
+ unsigned long status;
+
+ status = sun4v_cpu_qconf(type, paddr, num_entries);
+ if (status != HV_EOK) {
+ prom_printf("SUN4V: sun4v_cpu_qconf(%lu:%lx:%lu) failed, "
+ "err %lu\n", type, paddr, num_entries, status);
prom_halt();
}
+}
- /* Set interrupt globals. */
- workp = &__irq_work[cpu];
- __asm__ __volatile__(
- "rdpr %%pstate, %0\n\t"
- "wrpr %0, %1, %%pstate\n\t"
- "mov %2, %%g6\n\t"
- "wrpr %0, 0x0, %%pstate\n\t"
- : "=&r" (tmp)
- : "i" (PSTATE_IG), "r" (workp));
+static void __cpuinit sun4v_register_mondo_queues(int this_cpu)
+{
+ struct trap_per_cpu *tb = &trap_block[this_cpu];
+
+ register_one_mondo(tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO);
+ register_one_mondo(tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO);
+ register_one_mondo(tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR);
+ register_one_mondo(tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR);
+}
+
+static void __cpuinit alloc_one_mondo(unsigned long *pa_ptr, int use_bootmem)
+{
+ void *page;
+
+ if (use_bootmem)
+ page = alloc_bootmem_low_pages(PAGE_SIZE);
+ else
+ page = (void *) get_zeroed_page(GFP_ATOMIC);
+
+ if (!page) {
+ prom_printf("SUN4V: Error, cannot allocate mondo queue.\n");
+ prom_halt();
+ }
+
+ *pa_ptr = __pa(page);
+}
+
+static void __cpuinit alloc_one_kbuf(unsigned long *pa_ptr, int use_bootmem)
+{
+ void *page;
+
+ if (use_bootmem)
+ page = alloc_bootmem_low_pages(PAGE_SIZE);
+ else
+ page = (void *) get_zeroed_page(GFP_ATOMIC);
+
+ if (!page) {
+ prom_printf("SUN4V: Error, cannot allocate kbuf page.\n");
+ prom_halt();
+ }
+
+ *pa_ptr = __pa(page);
+}
+
+static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb, int use_bootmem)
+{
+#ifdef CONFIG_SMP
+ void *page;
+
+ BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
+
+ if (use_bootmem)
+ page = alloc_bootmem_low_pages(PAGE_SIZE);
+ else
+ page = (void *) get_zeroed_page(GFP_ATOMIC);
+
+ if (!page) {
+ prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
+ prom_halt();
+ }
+
+ tb->cpu_mondo_block_pa = __pa(page);
+ tb->cpu_list_pa = __pa(page + 64);
+#endif
+}
+
+/* Allocate and register the mondo and error queues for this cpu. */
+void __cpuinit sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load)
+{
+ struct trap_per_cpu *tb = &trap_block[cpu];
+
+ if (alloc) {
+ alloc_one_mondo(&tb->cpu_mondo_pa, use_bootmem);
+ alloc_one_mondo(&tb->dev_mondo_pa, use_bootmem);
+ alloc_one_mondo(&tb->resum_mondo_pa, use_bootmem);
+ alloc_one_kbuf(&tb->resum_kernel_buf_pa, use_bootmem);
+ alloc_one_mondo(&tb->nonresum_mondo_pa, use_bootmem);
+ alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, use_bootmem);
+
+ init_cpu_send_mondo_info(tb, use_bootmem);
+ }
+
+ if (load) {
+ if (cpu != hard_smp_processor_id()) {
+ prom_printf("SUN4V: init mondo on cpu %d not %d\n",
+ cpu, hard_smp_processor_id());
+ prom_halt();
+ }
+ sun4v_register_mondo_queues(cpu);
+ }
}
/* Only invoked on boot processor. */
@@ -884,6 +1046,9 @@ void __init init_IRQ(void)
kill_prom_timer();
memset(&ivector_table[0], 0, sizeof(ivector_table));
+ if (tlb_type == hypervisor)
+ sun4v_init_mondo_queues(1, hard_smp_processor_id(), 1, 1);
+
/* We need to clear any IRQ's pending in the soft interrupt
* registers, a spurious one could be left around from the
* PROM timer which we just disabled.
diff --git a/arch/sparc64/kernel/itlb_base.S b/arch/sparc64/kernel/itlb_base.S
deleted file mode 100644
index 4951ff8f6877..000000000000
--- a/arch/sparc64/kernel/itlb_base.S
+++ /dev/null
@@ -1,79 +0,0 @@
-/* $Id: itlb_base.S,v 1.12 2002/02/09 19:49:30 davem Exp $
- * itlb_base.S: Front end to ITLB miss replacement strategy.
- * This is included directly into the trap table.
- *
- * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)
- * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
- */
-
-#if PAGE_SHIFT == 13
-/*
- * To compute vpte offset, we need to do ((addr >> 13) << 3),
- * which can be optimized to (addr >> 10) if bits 10/11/12 can
- * be guaranteed to be 0 ... mmu_context.h does guarantee this
- * by only using 10 bits in the hwcontext value.
- */
-#define CREATE_VPTE_OFFSET1(r1, r2) \
- srax r1, 10, r2
-#define CREATE_VPTE_OFFSET2(r1, r2) nop
-#else /* PAGE_SHIFT */
-#define CREATE_VPTE_OFFSET1(r1, r2) \
- srax r1, PAGE_SHIFT, r2
-#define CREATE_VPTE_OFFSET2(r1, r2) \
- sllx r2, 3, r2
-#endif /* PAGE_SHIFT */
-
-
-/* Ways we can get here:
- *
- * 1) Nucleus instruction misses from module code.
- * 2) All user instruction misses.
- *
- * All real page faults merge their code paths to the
- * sparc64_realfault_common label below.
- */
-
-/* ITLB ** ICACHE line 1: Quick user TLB misses */
- mov TLB_SFSR, %g1
- ldxa [%g1 + %g1] ASI_IMMU, %g4 ! Get TAG_ACCESS
- CREATE_VPTE_OFFSET1(%g4, %g6) ! Create VPTE offset
- CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset
- ldxa [%g3 + %g6] ASI_P, %g5 ! Load VPTE
-1: brgez,pn %g5, 3f ! Not valid, branch out
- sethi %hi(_PAGE_EXEC), %g4 ! Delay-slot
- andcc %g5, %g4, %g0 ! Executable?
-
-/* ITLB ** ICACHE line 2: Real faults */
- be,pn %xcc, 3f ! Nope, branch.
- nop ! Delay-slot
-2: stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load PTE into TLB
- retry ! Trap return
-3: rdpr %pstate, %g4 ! Move into alt-globals
- wrpr %g4, PSTATE_AG|PSTATE_MG, %pstate
- rdpr %tpc, %g5 ! And load faulting VA
- mov FAULT_CODE_ITLB, %g4 ! It was read from ITLB
-
-/* ITLB ** ICACHE line 3: Finish faults */
-sparc64_realfault_common: ! Called by dtlb_miss
- stb %g4, [%g6 + TI_FAULT_CODE]
- stx %g5, [%g6 + TI_FAULT_ADDR]
- ba,pt %xcc, etrap ! Save state
-1: rd %pc, %g7 ! ...
- call do_sparc64_fault ! Call fault handler
- add %sp, PTREGS_OFF, %o0! Compute pt_regs arg
- ba,pt %xcc, rtrap_clr_l6 ! Restore cpu state
- nop
-
-/* ITLB ** ICACHE line 4: Window fixups */
-winfix_trampoline:
- rdpr %tpc, %g3 ! Prepare winfixup TNPC
- or %g3, 0x7c, %g3 ! Compute branch offset
- wrpr %g3, %tnpc ! Write it into TNPC
- done ! Do it to it
- nop
- nop
- nop
- nop
-
-#undef CREATE_VPTE_OFFSET1
-#undef CREATE_VPTE_OFFSET2
diff --git a/arch/sparc64/kernel/itlb_miss.S b/arch/sparc64/kernel/itlb_miss.S
new file mode 100644
index 000000000000..ad46e2024f4b
--- /dev/null
+++ b/arch/sparc64/kernel/itlb_miss.S
@@ -0,0 +1,39 @@
+/* ITLB ** ICACHE line 1: Context 0 check and TSB load */
+ ldxa [%g0] ASI_IMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer
+ ldxa [%g0] ASI_IMMU, %g6 ! Get TAG TARGET
+ srlx %g6, 48, %g5 ! Get context
+ sllx %g6, 22, %g6 ! Zero out context
+ brz,pn %g5, kvmap_itlb ! Context 0 processing
+ srlx %g6, 22, %g6 ! Delay slot
+ TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry
+ cmp %g4, %g6 ! Compare TAG
+
+/* ITLB ** ICACHE line 2: TSB compare and TLB load */
+ bne,pn %xcc, tsb_miss_itlb ! Miss
+ mov FAULT_CODE_ITLB, %g3
+ andcc %g5, _PAGE_EXEC_4U, %g0 ! Executable?
+ be,pn %xcc, tsb_do_fault
+ nop ! Delay slot, fill me
+ stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load TLB
+ retry ! Trap done
+ nop
+
+/* ITLB ** ICACHE line 3: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+
+/* ITLB ** ICACHE line 4: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S
index d9244d3c9f73..31da1e564c95 100644
--- a/arch/sparc64/kernel/ktlb.S
+++ b/arch/sparc64/kernel/ktlb.S
@@ -4,191 +4,276 @@
* Copyright (C) 1996 Eddie C. Dost (ecd@brainaid.de)
* Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx)
* Copyright (C) 1996,98,99 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
-*/
+ */
#include <linux/config.h>
#include <asm/head.h>
#include <asm/asi.h>
#include <asm/page.h>
#include <asm/pgtable.h>
+#include <asm/tsb.h>
.text
.align 32
-/*
- * On a second level vpte miss, check whether the original fault is to the OBP
- * range (note that this is only possible for instruction miss, data misses to
- * obp range do not use vpte). If so, go back directly to the faulting address.
- * This is because we want to read the tpc, otherwise we have no way of knowing
- * the 8k aligned faulting address if we are using >8k kernel pagesize. This
- * also ensures no vpte range addresses are dropped into tlb while obp is
- * executing (see inherit_locked_prom_mappings() rant).
- */
-sparc64_vpte_nucleus:
- /* Note that kvmap below has verified that the address is
- * in the range MODULES_VADDR --> VMALLOC_END already. So
- * here we need only check if it is an OBP address or not.
+kvmap_itlb:
+ /* g6: TAG TARGET */
+ mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_IMMU, %g4
+
+ /* sun4v_itlb_miss branches here with the missing virtual
+ * address already loaded into %g4
*/
+kvmap_itlb_4v:
+
+kvmap_itlb_nonlinear:
+ /* Catch kernel NULL pointer calls. */
+ sethi %hi(PAGE_SIZE), %g5
+ cmp %g4, %g5
+ bleu,pn %xcc, kvmap_dtlb_longpath
+ nop
+
+ KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_itlb_load)
+
+kvmap_itlb_tsb_miss:
sethi %hi(LOW_OBP_ADDRESS), %g5
cmp %g4, %g5
- blu,pn %xcc, kern_vpte
+ blu,pn %xcc, kvmap_itlb_vmalloc_addr
mov 0x1, %g5
sllx %g5, 32, %g5
cmp %g4, %g5
- blu,pn %xcc, vpte_insn_obp
+ blu,pn %xcc, kvmap_itlb_obp
nop
- /* These two instructions are patched by paginig_init(). */
-kern_vpte:
- sethi %hi(swapper_pgd_zero), %g5
- lduw [%g5 + %lo(swapper_pgd_zero)], %g5
+kvmap_itlb_vmalloc_addr:
+ KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
- /* With kernel PGD in %g5, branch back into dtlb_backend. */
- ba,pt %xcc, sparc64_kpte_continue
- andn %g1, 0x3, %g1 /* Finish PMD offset adjustment. */
+ KTSB_LOCK_TAG(%g1, %g2, %g7)
-vpte_noent:
- /* Restore previous TAG_ACCESS, %g5 is zero, and we will
- * skip over the trap instruction so that the top level
- * TLB miss handler will thing this %g5 value is just an
- * invalid PTE, thus branching to full fault processing.
- */
- mov TLB_SFSR, %g1
- stxa %g4, [%g1 + %g1] ASI_DMMU
- done
-
-vpte_insn_obp:
- /* Behave as if we are at TL0. */
- wrpr %g0, 1, %tl
- rdpr %tpc, %g4 /* Find original faulting iaddr */
- srlx %g4, 13, %g4 /* Throw out context bits */
- sllx %g4, 13, %g4 /* g4 has vpn + ctx0 now */
-
- /* Restore previous TAG_ACCESS. */
- mov TLB_SFSR, %g1
- stxa %g4, [%g1 + %g1] ASI_IMMU
-
- sethi %hi(prom_trans), %g5
- or %g5, %lo(prom_trans), %g5
-
-1: ldx [%g5 + 0x00], %g6 ! base
- brz,a,pn %g6, longpath ! no more entries, fail
- mov TLB_SFSR, %g1 ! and restore %g1
- ldx [%g5 + 0x08], %g1 ! len
- add %g6, %g1, %g1 ! end
- cmp %g6, %g4
- bgu,pt %xcc, 2f
- cmp %g4, %g1
- bgeu,pt %xcc, 2f
- ldx [%g5 + 0x10], %g1 ! PTE
-
- /* TLB load, restore %g1, and return from trap. */
- sub %g4, %g6, %g6
- add %g1, %g6, %g5
- mov TLB_SFSR, %g1
- stxa %g5, [%g0] ASI_ITLB_DATA_IN
- retry
+ /* Load and check PTE. */
+ ldxa [%g5] ASI_PHYS_USE_EC, %g5
+ mov 1, %g7
+ sllx %g7, TSB_TAG_INVALID_BIT, %g7
+ brgez,a,pn %g5, kvmap_itlb_longpath
+ KTSB_STORE(%g1, %g7)
+
+ KTSB_WRITE(%g1, %g5, %g6)
+
+ /* fallthrough to TLB load */
-2: ba,pt %xcc, 1b
- add %g5, (3 * 8), %g5 ! next entry
-
-kvmap_do_obp:
- sethi %hi(prom_trans), %g5
- or %g5, %lo(prom_trans), %g5
- srlx %g4, 13, %g4
- sllx %g4, 13, %g4
-
-1: ldx [%g5 + 0x00], %g6 ! base
- brz,a,pn %g6, longpath ! no more entries, fail
- mov TLB_SFSR, %g1 ! and restore %g1
- ldx [%g5 + 0x08], %g1 ! len
- add %g6, %g1, %g1 ! end
- cmp %g6, %g4
- bgu,pt %xcc, 2f
- cmp %g4, %g1
- bgeu,pt %xcc, 2f
- ldx [%g5 + 0x10], %g1 ! PTE
-
- /* TLB load, restore %g1, and return from trap. */
- sub %g4, %g6, %g6
- add %g1, %g6, %g5
- mov TLB_SFSR, %g1
- stxa %g5, [%g0] ASI_DTLB_DATA_IN
+kvmap_itlb_load:
+
+661: stxa %g5, [%g0] ASI_ITLB_DATA_IN
retry
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ nop
+ nop
+ .previous
+
+ /* For sun4v the ASI_ITLB_DATA_IN store and the retry
+ * instruction get nop'd out and we get here to branch
+ * to the sun4v tlb load code. The registers are setup
+ * as follows:
+ *
+ * %g4: vaddr
+ * %g5: PTE
+ * %g6: TAG
+ *
+ * The sun4v TLB load wants the PTE in %g3 so we fix that
+ * up here.
+ */
+ ba,pt %xcc, sun4v_itlb_load
+ mov %g5, %g3
-2: ba,pt %xcc, 1b
- add %g5, (3 * 8), %g5 ! next entry
+kvmap_itlb_longpath:
+
+661: rdpr %pstate, %g5
+ wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ SET_GL(1)
+ nop
+ .previous
+
+ rdpr %tpc, %g5
+ ba,pt %xcc, sparc64_realfault_common
+ mov FAULT_CODE_ITLB, %g4
+
+kvmap_itlb_obp:
+ OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath)
+
+ KTSB_LOCK_TAG(%g1, %g2, %g7)
+
+ KTSB_WRITE(%g1, %g5, %g6)
+
+ ba,pt %xcc, kvmap_itlb_load
+ nop
+
+kvmap_dtlb_obp:
+ OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath)
+
+ KTSB_LOCK_TAG(%g1, %g2, %g7)
+
+ KTSB_WRITE(%g1, %g5, %g6)
+
+ ba,pt %xcc, kvmap_dtlb_load
+ nop
-/*
- * On a first level data miss, check whether this is to the OBP range (note
- * that such accesses can be made by prom, as well as by kernel using
- * prom_getproperty on "address"), and if so, do not use vpte access ...
- * rather, use information saved during inherit_prom_mappings() using 8k
- * pagesize.
- */
.align 32
-kvmap:
- brgez,pn %g4, kvmap_nonlinear
+kvmap_dtlb_tsb4m_load:
+ KTSB_LOCK_TAG(%g1, %g2, %g7)
+ KTSB_WRITE(%g1, %g5, %g6)
+ ba,pt %xcc, kvmap_dtlb_load
nop
-#ifdef CONFIG_DEBUG_PAGEALLOC
+kvmap_dtlb:
+ /* %g6: TAG TARGET */
+ mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_DMMU, %g4
+
+ /* sun4v_dtlb_miss branches here with the missing virtual
+ * address already loaded into %g4
+ */
+kvmap_dtlb_4v:
+ brgez,pn %g4, kvmap_dtlb_nonlinear
+ nop
+
+ /* Correct TAG_TARGET is already in %g6, check 4mb TSB. */
+ KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
+
+ /* TSB entry address left in %g1, lookup linear PTE.
+ * Must preserve %g1 and %g6 (TAG).
+ */
+kvmap_dtlb_tsb4m_miss:
+ sethi %hi(kpte_linear_bitmap), %g2
+ or %g2, %lo(kpte_linear_bitmap), %g2
+
+ /* Clear the PAGE_OFFSET top virtual bits, then shift
+ * down to get a 256MB physical address index.
+ */
+ sllx %g4, 21, %g5
+ mov 1, %g7
+ srlx %g5, 21 + 28, %g5
+
+ /* Don't try this at home kids... this depends upon srlx
+ * only taking the low 6 bits of the shift count in %g5.
+ */
+ sllx %g7, %g5, %g7
+
+ /* Divide by 64 to get the offset into the bitmask. */
+ srlx %g5, 6, %g5
+ sllx %g5, 3, %g5
+
+ /* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */
+ ldx [%g2 + %g5], %g2
+ andcc %g2, %g7, %g0
+ sethi %hi(kern_linear_pte_xor), %g5
+ or %g5, %lo(kern_linear_pte_xor), %g5
+ bne,a,pt %xcc, 1f
+ add %g5, 8, %g5
+
+1: ldx [%g5], %g2
+
.globl kvmap_linear_patch
kvmap_linear_patch:
-#endif
- ba,pt %xcc, kvmap_load
+ ba,pt %xcc, kvmap_dtlb_tsb4m_load
xor %g2, %g4, %g5
-#ifdef CONFIG_DEBUG_PAGEALLOC
- sethi %hi(swapper_pg_dir), %g5
- or %g5, %lo(swapper_pg_dir), %g5
- sllx %g4, 64 - (PGDIR_SHIFT + PGDIR_BITS), %g6
- srlx %g6, 64 - PAGE_SHIFT, %g6
- andn %g6, 0x3, %g6
- lduw [%g5 + %g6], %g5
- brz,pn %g5, longpath
- sllx %g4, 64 - (PMD_SHIFT + PMD_BITS), %g6
- srlx %g6, 64 - PAGE_SHIFT, %g6
- sllx %g5, 11, %g5
- andn %g6, 0x3, %g6
- lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5
- brz,pn %g5, longpath
- sllx %g4, 64 - PMD_SHIFT, %g6
- srlx %g6, 64 - PAGE_SHIFT, %g6
- sllx %g5, 11, %g5
- andn %g6, 0x7, %g6
- ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5
- brz,pn %g5, longpath
+kvmap_dtlb_vmalloc_addr:
+ KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
+
+ KTSB_LOCK_TAG(%g1, %g2, %g7)
+
+ /* Load and check PTE. */
+ ldxa [%g5] ASI_PHYS_USE_EC, %g5
+ mov 1, %g7
+ sllx %g7, TSB_TAG_INVALID_BIT, %g7
+ brgez,a,pn %g5, kvmap_dtlb_longpath
+ KTSB_STORE(%g1, %g7)
+
+ KTSB_WRITE(%g1, %g5, %g6)
+
+ /* fallthrough to TLB load */
+
+kvmap_dtlb_load:
+
+661: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB
+ retry
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ nop
+ nop
+ .previous
+
+ /* For sun4v the ASI_DTLB_DATA_IN store and the retry
+ * instruction get nop'd out and we get here to branch
+ * to the sun4v tlb load code. The registers are setup
+ * as follows:
+ *
+ * %g4: vaddr
+ * %g5: PTE
+ * %g6: TAG
+ *
+ * The sun4v TLB load wants the PTE in %g3 so we fix that
+ * up here.
+ */
+ ba,pt %xcc, sun4v_dtlb_load
+ mov %g5, %g3
+
+kvmap_dtlb_nonlinear:
+ /* Catch kernel NULL pointer derefs. */
+ sethi %hi(PAGE_SIZE), %g5
+ cmp %g4, %g5
+ bleu,pn %xcc, kvmap_dtlb_longpath
nop
- ba,a,pt %xcc, kvmap_load
-#endif
-kvmap_nonlinear:
+ KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
+
+kvmap_dtlb_tsbmiss:
sethi %hi(MODULES_VADDR), %g5
cmp %g4, %g5
- blu,pn %xcc, longpath
+ blu,pn %xcc, kvmap_dtlb_longpath
mov (VMALLOC_END >> 24), %g5
sllx %g5, 24, %g5
cmp %g4, %g5
- bgeu,pn %xcc, longpath
+ bgeu,pn %xcc, kvmap_dtlb_longpath
nop
kvmap_check_obp:
sethi %hi(LOW_OBP_ADDRESS), %g5
cmp %g4, %g5
- blu,pn %xcc, kvmap_vmalloc_addr
+ blu,pn %xcc, kvmap_dtlb_vmalloc_addr
mov 0x1, %g5
sllx %g5, 32, %g5
cmp %g4, %g5
- blu,pn %xcc, kvmap_do_obp
+ blu,pn %xcc, kvmap_dtlb_obp
nop
-
-kvmap_vmalloc_addr:
- /* If we get here, a vmalloc addr was accessed, load kernel VPTE. */
- ldxa [%g3 + %g6] ASI_N, %g5
- brgez,pn %g5, longpath
+ ba,pt %xcc, kvmap_dtlb_vmalloc_addr
nop
-kvmap_load:
- /* PTE is valid, load into TLB and return from trap. */
- stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB
- retry
+kvmap_dtlb_longpath:
+
+661: rdpr %pstate, %g5
+ wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ SET_GL(1)
+ ldxa [%g0] ASI_SCRATCHPAD, %g5
+ .previous
+
+ rdpr %tl, %g3
+ cmp %g3, 1
+
+661: mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_DMMU, %g5
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ ldx [%g5 + HV_FAULT_D_ADDR_OFFSET], %g5
+ nop
+ .previous
+
+ be,pt %xcc, sparc64_realfault_common
+ mov FAULT_CODE_DTLB, %g4
+ ba,pt %xcc, winfix_trampoline
+ nop
diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c
index 2ff7c32ab0ce..95ffa9418620 100644
--- a/arch/sparc64/kernel/pci.c
+++ b/arch/sparc64/kernel/pci.c
@@ -188,6 +188,7 @@ extern void psycho_init(int, char *);
extern void schizo_init(int, char *);
extern void schizo_plus_init(int, char *);
extern void tomatillo_init(int, char *);
+extern void sun4v_pci_init(int, char *);
static struct {
char *model_name;
@@ -204,6 +205,7 @@ static struct {
{ "pci108e,8002", schizo_plus_init },
{ "SUNW,tomatillo", tomatillo_init },
{ "pci108e,a801", tomatillo_init },
+ { "SUNW,sun4v-pci", sun4v_pci_init },
};
#define PCI_NUM_CONTROLLER_TYPES (sizeof(pci_controller_table) / \
sizeof(pci_controller_table[0]))
@@ -283,6 +285,12 @@ int __init pcic_present(void)
return pci_controller_scan(pci_is_controller);
}
+struct pci_iommu_ops *pci_iommu_ops;
+EXPORT_SYMBOL(pci_iommu_ops);
+
+extern struct pci_iommu_ops pci_sun4u_iommu_ops,
+ pci_sun4v_iommu_ops;
+
/* Find each controller in the system, attach and initialize
* software state structure for each and link into the
* pci_controller_root. Setup the controller enough such
@@ -290,6 +298,11 @@ int __init pcic_present(void)
*/
static void __init pci_controller_probe(void)
{
+ if (tlb_type == hypervisor)
+ pci_iommu_ops = &pci_sun4v_iommu_ops;
+ else
+ pci_iommu_ops = &pci_sun4u_iommu_ops;
+
printk("PCI: Probing for controllers.\n");
pci_controller_scan(pci_controller_init);
diff --git a/arch/sparc64/kernel/pci_common.c b/arch/sparc64/kernel/pci_common.c
index 58310aacea28..33dedb1aacd4 100644
--- a/arch/sparc64/kernel/pci_common.c
+++ b/arch/sparc64/kernel/pci_common.c
@@ -39,6 +39,8 @@ static int __init find_device_prom_node(struct pci_pbm_info *pbm,
{
int node;
+ *nregs = 0;
+
/*
* Return the PBM's PROM node in case we are it's PCI device,
* as the PBM's reg property is different to standard PCI reg
@@ -51,10 +53,8 @@ static int __init find_device_prom_node(struct pci_pbm_info *pbm,
pdev->device == PCI_DEVICE_ID_SUN_SCHIZO ||
pdev->device == PCI_DEVICE_ID_SUN_TOMATILLO ||
pdev->device == PCI_DEVICE_ID_SUN_SABRE ||
- pdev->device == PCI_DEVICE_ID_SUN_HUMMINGBIRD)) {
- *nregs = 0;
+ pdev->device == PCI_DEVICE_ID_SUN_HUMMINGBIRD))
return bus_prom_node;
- }
node = prom_getchild(bus_prom_node);
while (node != 0) {
@@ -541,135 +541,183 @@ void __init pci_assign_unassigned(struct pci_pbm_info *pbm,
pci_assign_unassigned(pbm, bus);
}
-static int __init pci_intmap_match(struct pci_dev *pdev, unsigned int *interrupt)
+static inline unsigned int pci_slot_swivel(struct pci_pbm_info *pbm,
+ struct pci_dev *toplevel_pdev,
+ struct pci_dev *pdev,
+ unsigned int interrupt)
{
- struct linux_prom_pci_intmap bridge_local_intmap[PROM_PCIIMAP_MAX], *intmap;
- struct linux_prom_pci_intmask bridge_local_intmask, *intmask;
- struct pcidev_cookie *dev_pcp = pdev->sysdata;
- struct pci_pbm_info *pbm = dev_pcp->pbm;
- struct linux_prom_pci_registers *pregs = dev_pcp->prom_regs;
- unsigned int hi, mid, lo, irq;
- int i, num_intmap, map_slot;
+ unsigned int ret;
- intmap = &pbm->pbm_intmap[0];
- intmask = &pbm->pbm_intmask;
- num_intmap = pbm->num_pbm_intmap;
- map_slot = 0;
+ if (unlikely(interrupt < 1 || interrupt > 4)) {
+ printk("%s: Device %s interrupt value of %u is strange.\n",
+ pbm->name, pci_name(pdev), interrupt);
+ return interrupt;
+ }
- /* If we are underneath a PCI bridge, use PROM register
- * property of the parent bridge which is closest to
- * the PBM.
- *
- * However if that parent bridge has interrupt map/mask
- * properties of its own we use the PROM register property
- * of the next child device on the path to PDEV.
- *
- * In detail the two cases are (note that the 'X' below is the
- * 'next child on the path to PDEV' mentioned above):
- *
- * 1) PBM --> PCI bus lacking int{map,mask} --> X ... PDEV
- *
- * Here we use regs of 'PCI bus' device.
- *
- * 2) PBM --> PCI bus with int{map,mask} --> X ... PDEV
- *
- * Here we use regs of 'X'. Note that X can be PDEV.
- */
- if (pdev->bus->number != pbm->pci_first_busno) {
- struct pcidev_cookie *bus_pcp, *regs_pcp;
- struct pci_dev *bus_dev, *regs_dev;
- int plen;
+ ret = ((interrupt - 1 + (PCI_SLOT(pdev->devfn) & 3)) & 3) + 1;
+
+ printk("%s: %s IRQ Swivel %s [%x:%x] -> [%x]\n",
+ pbm->name, pci_name(toplevel_pdev), pci_name(pdev),
+ interrupt, PCI_SLOT(pdev->devfn), ret);
+
+ return ret;
+}
+
+static inline unsigned int pci_apply_intmap(struct pci_pbm_info *pbm,
+ struct pci_dev *toplevel_pdev,
+ struct pci_dev *pbus,
+ struct pci_dev *pdev,
+ unsigned int interrupt,
+ unsigned int *cnode)
+{
+ struct linux_prom_pci_intmap imap[PROM_PCIIMAP_MAX];
+ struct linux_prom_pci_intmask imask;
+ struct pcidev_cookie *pbus_pcp = pbus->sysdata;
+ struct pcidev_cookie *pdev_pcp = pdev->sysdata;
+ struct linux_prom_pci_registers *pregs = pdev_pcp->prom_regs;
+ int plen, num_imap, i;
+ unsigned int hi, mid, lo, irq, orig_interrupt;
+
+ *cnode = pbus_pcp->prom_node;
+
+ plen = prom_getproperty(pbus_pcp->prom_node, "interrupt-map",
+ (char *) &imap[0], sizeof(imap));
+ if (plen <= 0 ||
+ (plen % sizeof(struct linux_prom_pci_intmap)) != 0) {
+ printk("%s: Device %s interrupt-map has bad len %d\n",
+ pbm->name, pci_name(pbus), plen);
+ goto no_intmap;
+ }
+ num_imap = plen / sizeof(struct linux_prom_pci_intmap);
+
+ plen = prom_getproperty(pbus_pcp->prom_node, "interrupt-map-mask",
+ (char *) &imask, sizeof(imask));
+ if (plen <= 0 ||
+ (plen % sizeof(struct linux_prom_pci_intmask)) != 0) {
+ printk("%s: Device %s interrupt-map-mask has bad len %d\n",
+ pbm->name, pci_name(pbus), plen);
+ goto no_intmap;
+ }
+
+ orig_interrupt = interrupt;
- bus_dev = pdev->bus->self;
- regs_dev = pdev;
+ hi = pregs->phys_hi & imask.phys_hi;
+ mid = pregs->phys_mid & imask.phys_mid;
+ lo = pregs->phys_lo & imask.phys_lo;
+ irq = interrupt & imask.interrupt;
- while (bus_dev->bus &&
- bus_dev->bus->number != pbm->pci_first_busno) {
- regs_dev = bus_dev;
- bus_dev = bus_dev->bus->self;
+ for (i = 0; i < num_imap; i++) {
+ if (imap[i].phys_hi == hi &&
+ imap[i].phys_mid == mid &&
+ imap[i].phys_lo == lo &&
+ imap[i].interrupt == irq) {
+ *cnode = imap[i].cnode;
+ interrupt = imap[i].cinterrupt;
}
+ }
- regs_pcp = regs_dev->sysdata;
- pregs = regs_pcp->prom_regs;
+ printk("%s: %s MAP BUS %s DEV %s [%x] -> [%x]\n",
+ pbm->name, pci_name(toplevel_pdev),
+ pci_name(pbus), pci_name(pdev),
+ orig_interrupt, interrupt);
- bus_pcp = bus_dev->sysdata;
+no_intmap:
+ return interrupt;
+}
- /* But if the PCI bridge has it's own interrupt map
- * and mask properties, use that and the regs of the
- * PCI entity at the next level down on the path to the
- * device.
- */
- plen = prom_getproperty(bus_pcp->prom_node, "interrupt-map",
- (char *) &bridge_local_intmap[0],
- sizeof(bridge_local_intmap));
- if (plen != -1) {
- intmap = &bridge_local_intmap[0];
- num_intmap = plen / sizeof(struct linux_prom_pci_intmap);
- plen = prom_getproperty(bus_pcp->prom_node,
- "interrupt-map-mask",
- (char *) &bridge_local_intmask,
- sizeof(bridge_local_intmask));
- if (plen == -1) {
- printk("pci_intmap_match: Warning! Bridge has intmap "
- "but no intmask.\n");
- printk("pci_intmap_match: Trying to recover.\n");
- return 0;
- }
+/* For each PCI bus on the way to the root:
+ * 1) If it has an interrupt-map property, apply it.
+ * 2) Else, swivel the interrupt number based upon the PCI device number.
+ *
+ * Return the "IRQ controller" node. If this is the PBM's device node,
+ * all interrupt translations are complete, else we should use that node's
+ * "reg" property to apply the PBM's "interrupt-{map,mask}" to the interrupt.
+ */
+static unsigned int __init pci_intmap_match_to_root(struct pci_pbm_info *pbm,
+ struct pci_dev *pdev,
+ unsigned int *interrupt)
+{
+ struct pci_dev *toplevel_pdev = pdev;
+ struct pcidev_cookie *toplevel_pcp = toplevel_pdev->sysdata;
+ unsigned int cnode = toplevel_pcp->prom_node;
+
+ while (pdev->bus->number != pbm->pci_first_busno) {
+ struct pci_dev *pbus = pdev->bus->self;
+ struct pcidev_cookie *pcp = pbus->sysdata;
+ int plen;
- if (pdev->bus->self != bus_dev)
- map_slot = 1;
+ plen = prom_getproplen(pcp->prom_node, "interrupt-map");
+ if (plen <= 0) {
+ *interrupt = pci_slot_swivel(pbm, toplevel_pdev,
+ pdev, *interrupt);
+ cnode = pcp->prom_node;
} else {
- pregs = bus_pcp->prom_regs;
- map_slot = 1;
+ *interrupt = pci_apply_intmap(pbm, toplevel_pdev,
+ pbus, pdev,
+ *interrupt, &cnode);
+
+ while (pcp->prom_node != cnode &&
+ pbus->bus->number != pbm->pci_first_busno) {
+ pbus = pbus->bus->self;
+ pcp = pbus->sysdata;
+ }
}
- }
+ pdev = pbus;
- if (map_slot) {
- *interrupt = ((*interrupt
- - 1
- + PCI_SLOT(pdev->devfn)) & 0x3) + 1;
+ if (cnode == pbm->prom_node)
+ break;
}
- hi = pregs->phys_hi & intmask->phys_hi;
- mid = pregs->phys_mid & intmask->phys_mid;
- lo = pregs->phys_lo & intmask->phys_lo;
- irq = *interrupt & intmask->interrupt;
-
- for (i = 0; i < num_intmap; i++) {
- if (intmap[i].phys_hi == hi &&
- intmap[i].phys_mid == mid &&
- intmap[i].phys_lo == lo &&
- intmap[i].interrupt == irq) {
- *interrupt = intmap[i].cinterrupt;
- printk("PCI-IRQ: Routing bus[%2x] slot[%2x] map[%d] to INO[%02x]\n",
- pdev->bus->number, PCI_SLOT(pdev->devfn),
- map_slot, *interrupt);
- return 1;
- }
+ return cnode;
+}
+
+static int __init pci_intmap_match(struct pci_dev *pdev, unsigned int *interrupt)
+{
+ struct pcidev_cookie *dev_pcp = pdev->sysdata;
+ struct pci_pbm_info *pbm = dev_pcp->pbm;
+ struct linux_prom_pci_registers reg[PROMREG_MAX];
+ unsigned int hi, mid, lo, irq;
+ int i, cnode, plen;
+
+ cnode = pci_intmap_match_to_root(pbm, pdev, interrupt);
+ if (cnode == pbm->prom_node)
+ goto success;
+
+ plen = prom_getproperty(cnode, "reg", (char *) reg, sizeof(reg));
+ if (plen <= 0 ||
+ (plen % sizeof(struct linux_prom_pci_registers)) != 0) {
+ printk("%s: OBP node %x reg property has bad len %d\n",
+ pbm->name, cnode, plen);
+ goto fail;
}
- /* We will run this code even if pbm->num_pbm_intmap is zero, just so
- * we can apply the slot mapping to the PROM interrupt property value.
- * So do not spit out these warnings in that case.
- */
- if (num_intmap != 0) {
- /* Print it both to OBP console and kernel one so that if bootup
- * hangs here the user has the information to report.
- */
- prom_printf("pci_intmap_match: bus %02x, devfn %02x: ",
- pdev->bus->number, pdev->devfn);
- prom_printf("IRQ [%08x.%08x.%08x.%08x] not found in interrupt-map\n",
- pregs->phys_hi, pregs->phys_mid, pregs->phys_lo, *interrupt);
- prom_printf("Please email this information to davem@redhat.com\n");
-
- printk("pci_intmap_match: bus %02x, devfn %02x: ",
- pdev->bus->number, pdev->devfn);
- printk("IRQ [%08x.%08x.%08x.%08x] not found in interrupt-map\n",
- pregs->phys_hi, pregs->phys_mid, pregs->phys_lo, *interrupt);
- printk("Please email this information to davem@redhat.com\n");
+ hi = reg[0].phys_hi & pbm->pbm_intmask.phys_hi;
+ mid = reg[0].phys_mid & pbm->pbm_intmask.phys_mid;
+ lo = reg[0].phys_lo & pbm->pbm_intmask.phys_lo;
+ irq = *interrupt & pbm->pbm_intmask.interrupt;
+
+ for (i = 0; i < pbm->num_pbm_intmap; i++) {
+ struct linux_prom_pci_intmap *intmap;
+
+ intmap = &pbm->pbm_intmap[i];
+
+ if (intmap->phys_hi == hi &&
+ intmap->phys_mid == mid &&
+ intmap->phys_lo == lo &&
+ intmap->interrupt == irq) {
+ *interrupt = intmap->cinterrupt;
+ goto success;
+ }
}
+fail:
return 0;
+
+success:
+ printk("PCI-IRQ: Routing bus[%2x] slot[%2x] to INO[%02x]\n",
+ pdev->bus->number, PCI_SLOT(pdev->devfn),
+ *interrupt);
+ return 1;
}
static void __init pdev_fixup_irq(struct pci_dev *pdev)
@@ -703,16 +751,18 @@ static void __init pdev_fixup_irq(struct pci_dev *pdev)
return;
}
- /* Fully specified already? */
- if (((prom_irq & PCI_IRQ_IGN) >> 6) == portid) {
- pdev->irq = p->irq_build(pbm, pdev, prom_irq);
- goto have_irq;
- }
+ if (tlb_type != hypervisor) {
+ /* Fully specified already? */
+ if (((prom_irq & PCI_IRQ_IGN) >> 6) == portid) {
+ pdev->irq = p->irq_build(pbm, pdev, prom_irq);
+ goto have_irq;
+ }
- /* An onboard device? (bit 5 set) */
- if ((prom_irq & PCI_IRQ_INO) & 0x20) {
- pdev->irq = p->irq_build(pbm, pdev, (portid << 6 | prom_irq));
- goto have_irq;
+ /* An onboard device? (bit 5 set) */
+ if ((prom_irq & PCI_IRQ_INO) & 0x20) {
+ pdev->irq = p->irq_build(pbm, pdev, (portid << 6 | prom_irq));
+ goto have_irq;
+ }
}
/* Can we find a matching entry in the interrupt-map? */
@@ -927,33 +977,30 @@ void pci_register_legacy_regions(struct resource *io_res,
struct resource *p;
/* VGA Video RAM. */
- p = kmalloc(sizeof(*p), GFP_KERNEL);
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return;
- memset(p, 0, sizeof(*p));
p->name = "Video RAM area";
p->start = mem_res->start + 0xa0000UL;
p->end = p->start + 0x1ffffUL;
p->flags = IORESOURCE_BUSY;
request_resource(mem_res, p);
- p = kmalloc(sizeof(*p), GFP_KERNEL);
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return;
- memset(p, 0, sizeof(*p));
p->name = "System ROM";
p->start = mem_res->start + 0xf0000UL;
p->end = p->start + 0xffffUL;
p->flags = IORESOURCE_BUSY;
request_resource(mem_res, p);
- p = kmalloc(sizeof(*p), GFP_KERNEL);
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return;
- memset(p, 0, sizeof(*p));
p->name = "Video ROM";
p->start = mem_res->start + 0xc0000UL;
p->end = p->start + 0x7fffUL;
diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c
index a11910be1013..8efbc139769d 100644
--- a/arch/sparc64/kernel/pci_iommu.c
+++ b/arch/sparc64/kernel/pci_iommu.c
@@ -139,12 +139,11 @@ void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset,
/* Allocate and initialize the free area map. */
sz = num_tsb_entries / 8;
sz = (sz + 7UL) & ~7UL;
- iommu->arena.map = kmalloc(sz, GFP_KERNEL);
+ iommu->arena.map = kzalloc(sz, GFP_KERNEL);
if (!iommu->arena.map) {
prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n");
prom_halt();
}
- memset(iommu->arena.map, 0, sz);
iommu->arena.limit = num_tsb_entries;
/* Allocate and initialize the dummy page which we
@@ -219,7 +218,7 @@ static inline void iommu_free_ctx(struct pci_iommu *iommu, int ctx)
* DMA for PCI device PDEV. Return non-NULL cpu-side address if
* successful and set *DMA_ADDRP to the PCI side dma address.
*/
-void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp)
+static void *pci_4u_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp)
{
struct pcidev_cookie *pcp;
struct pci_iommu *iommu;
@@ -267,7 +266,7 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad
}
/* Free and unmap a consistent DMA translation. */
-void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_t dvma)
+static void pci_4u_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_t dvma)
{
struct pcidev_cookie *pcp;
struct pci_iommu *iommu;
@@ -294,7 +293,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_
/* Map a single buffer at PTR of SZ bytes for PCI DMA
* in streaming mode.
*/
-dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direction)
+static dma_addr_t pci_4u_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direction)
{
struct pcidev_cookie *pcp;
struct pci_iommu *iommu;
@@ -415,7 +414,7 @@ do_flush_sync:
}
/* Unmap a single streaming mode DMA translation. */
-void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
+static void pci_4u_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
{
struct pcidev_cookie *pcp;
struct pci_iommu *iommu;
@@ -548,7 +547,7 @@ static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg,
* When making changes here, inspect the assembly output. I was having
* hard time to kepp this routine out of using stack slots for holding variables.
*/
-int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
+static int pci_4u_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
{
struct pcidev_cookie *pcp;
struct pci_iommu *iommu;
@@ -562,9 +561,9 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int
/* Fast path single entry scatterlists. */
if (nelems == 1) {
sglist->dma_address =
- pci_map_single(pdev,
- (page_address(sglist->page) + sglist->offset),
- sglist->length, direction);
+ pci_4u_map_single(pdev,
+ (page_address(sglist->page) + sglist->offset),
+ sglist->length, direction);
if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE))
return 0;
sglist->dma_length = sglist->length;
@@ -635,7 +634,7 @@ bad_no_ctx:
}
/* Unmap a set of streaming mode DMA translations. */
-void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
+static void pci_4u_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
{
struct pcidev_cookie *pcp;
struct pci_iommu *iommu;
@@ -695,7 +694,7 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
/* Make physical memory consistent for a single
* streaming mode DMA translation after a transfer.
*/
-void pci_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
+static void pci_4u_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
{
struct pcidev_cookie *pcp;
struct pci_iommu *iommu;
@@ -735,7 +734,7 @@ void pci_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size
/* Make physical memory consistent for a set of streaming
* mode DMA translations after a transfer.
*/
-void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
+static void pci_4u_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
{
struct pcidev_cookie *pcp;
struct pci_iommu *iommu;
@@ -776,6 +775,17 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, i
spin_unlock_irqrestore(&iommu->lock, flags);
}
+struct pci_iommu_ops pci_sun4u_iommu_ops = {
+ .alloc_consistent = pci_4u_alloc_consistent,
+ .free_consistent = pci_4u_free_consistent,
+ .map_single = pci_4u_map_single,
+ .unmap_single = pci_4u_unmap_single,
+ .map_sg = pci_4u_map_sg,
+ .unmap_sg = pci_4u_unmap_sg,
+ .dma_sync_single_for_cpu = pci_4u_dma_sync_single_for_cpu,
+ .dma_sync_sg_for_cpu = pci_4u_dma_sync_sg_for_cpu,
+};
+
static void ali_sound_dma_hack(struct pci_dev *pdev, int set_bit)
{
struct pci_dev *ali_isa_bridge;
diff --git a/arch/sparc64/kernel/pci_psycho.c b/arch/sparc64/kernel/pci_psycho.c
index c03ed5f49d31..d17878b145c2 100644
--- a/arch/sparc64/kernel/pci_psycho.c
+++ b/arch/sparc64/kernel/pci_psycho.c
@@ -286,17 +286,17 @@ static unsigned char psycho_pil_table[] = {
/*0x14*/0, 0, 0, 0, /* PCI B slot 1 Int A, B, C, D */
/*0x18*/0, 0, 0, 0, /* PCI B slot 2 Int A, B, C, D */
/*0x1c*/0, 0, 0, 0, /* PCI B slot 3 Int A, B, C, D */
-/*0x20*/4, /* SCSI */
+/*0x20*/5, /* SCSI */
/*0x21*/5, /* Ethernet */
/*0x22*/8, /* Parallel Port */
/*0x23*/13, /* Audio Record */
/*0x24*/14, /* Audio Playback */
/*0x25*/15, /* PowerFail */
-/*0x26*/4, /* second SCSI */
+/*0x26*/5, /* second SCSI */
/*0x27*/11, /* Floppy */
-/*0x28*/4, /* Spare Hardware */
+/*0x28*/5, /* Spare Hardware */
/*0x29*/9, /* Keyboard */
-/*0x2a*/4, /* Mouse */
+/*0x2a*/5, /* Mouse */
/*0x2b*/12, /* Serial */
/*0x2c*/10, /* Timer 0 */
/*0x2d*/11, /* Timer 1 */
@@ -313,11 +313,11 @@ static int psycho_ino_to_pil(struct pci_dev *pdev, unsigned int ino)
ret = psycho_pil_table[ino];
if (ret == 0 && pdev == NULL) {
- ret = 4;
+ ret = 5;
} else if (ret == 0) {
switch ((pdev->class >> 16) & 0xff) {
case PCI_BASE_CLASS_STORAGE:
- ret = 4;
+ ret = 5;
break;
case PCI_BASE_CLASS_NETWORK:
@@ -336,7 +336,7 @@ static int psycho_ino_to_pil(struct pci_dev *pdev, unsigned int ino)
break;
default:
- ret = 4;
+ ret = 5;
break;
};
}
@@ -1164,7 +1164,7 @@ static void pbm_config_busmastering(struct pci_pbm_info *pbm)
static void pbm_scan_bus(struct pci_controller_info *p,
struct pci_pbm_info *pbm)
{
- struct pcidev_cookie *cookie = kmalloc(sizeof(*cookie), GFP_KERNEL);
+ struct pcidev_cookie *cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
if (!cookie) {
prom_printf("PSYCHO: Critical allocation failure.\n");
@@ -1172,7 +1172,6 @@ static void pbm_scan_bus(struct pci_controller_info *p,
}
/* All we care about is the PBM. */
- memset(cookie, 0, sizeof(*cookie));
cookie->pbm = pbm;
pbm->pci_bus = pci_scan_bus(pbm->pci_first_busno,
@@ -1465,18 +1464,16 @@ void psycho_init(int node, char *model_name)
}
}
- p = kmalloc(sizeof(struct pci_controller_info), GFP_ATOMIC);
+ p = kzalloc(sizeof(struct pci_controller_info), GFP_ATOMIC);
if (!p) {
prom_printf("PSYCHO: Fatal memory allocation error.\n");
prom_halt();
}
- memset(p, 0, sizeof(*p));
- iommu = kmalloc(sizeof(struct pci_iommu), GFP_ATOMIC);
+ iommu = kzalloc(sizeof(struct pci_iommu), GFP_ATOMIC);
if (!iommu) {
prom_printf("PSYCHO: Fatal memory allocation error.\n");
prom_halt();
}
- memset(iommu, 0, sizeof(*iommu));
p->pbm_A.iommu = p->pbm_B.iommu = iommu;
p->next = pci_controller_root;
diff --git a/arch/sparc64/kernel/pci_sabre.c b/arch/sparc64/kernel/pci_sabre.c
index da8e1364194f..f67bb7f078cf 100644
--- a/arch/sparc64/kernel/pci_sabre.c
+++ b/arch/sparc64/kernel/pci_sabre.c
@@ -533,17 +533,17 @@ static unsigned char sabre_pil_table[] = {
/*0x14*/0, 0, 0, 0, /* PCI B slot 1 Int A, B, C, D */
/*0x18*/0, 0, 0, 0, /* PCI B slot 2 Int A, B, C, D */
/*0x1c*/0, 0, 0, 0, /* PCI B slot 3 Int A, B, C, D */
-/*0x20*/4, /* SCSI */
+/*0x20*/5, /* SCSI */
/*0x21*/5, /* Ethernet */
/*0x22*/8, /* Parallel Port */
/*0x23*/13, /* Audio Record */
/*0x24*/14, /* Audio Playback */
/*0x25*/15, /* PowerFail */
-/*0x26*/4, /* second SCSI */
+/*0x26*/5, /* second SCSI */
/*0x27*/11, /* Floppy */
-/*0x28*/4, /* Spare Hardware */
+/*0x28*/5, /* Spare Hardware */
/*0x29*/9, /* Keyboard */
-/*0x2a*/4, /* Mouse */
+/*0x2a*/5, /* Mouse */
/*0x2b*/12, /* Serial */
/*0x2c*/10, /* Timer 0 */
/*0x2d*/11, /* Timer 1 */
@@ -565,11 +565,11 @@ static int sabre_ino_to_pil(struct pci_dev *pdev, unsigned int ino)
ret = sabre_pil_table[ino];
if (ret == 0 && pdev == NULL) {
- ret = 4;
+ ret = 5;
} else if (ret == 0) {
switch ((pdev->class >> 16) & 0xff) {
case PCI_BASE_CLASS_STORAGE:
- ret = 4;
+ ret = 5;
break;
case PCI_BASE_CLASS_NETWORK:
@@ -588,7 +588,7 @@ static int sabre_ino_to_pil(struct pci_dev *pdev, unsigned int ino)
break;
default:
- ret = 4;
+ ret = 5;
break;
};
}
@@ -1167,7 +1167,7 @@ static void apb_init(struct pci_controller_info *p, struct pci_bus *sabre_bus)
static struct pcidev_cookie *alloc_bridge_cookie(struct pci_pbm_info *pbm)
{
- struct pcidev_cookie *cookie = kmalloc(sizeof(*cookie), GFP_KERNEL);
+ struct pcidev_cookie *cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
if (!cookie) {
prom_printf("SABRE: Critical allocation failure.\n");
@@ -1175,7 +1175,6 @@ static struct pcidev_cookie *alloc_bridge_cookie(struct pci_pbm_info *pbm)
}
/* All we care about is the PBM. */
- memset(cookie, 0, sizeof(*cookie));
cookie->pbm = pbm;
return cookie;
@@ -1556,19 +1555,17 @@ void sabre_init(int pnode, char *model_name)
}
}
- p = kmalloc(sizeof(*p), GFP_ATOMIC);
+ p = kzalloc(sizeof(*p), GFP_ATOMIC);
if (!p) {
prom_printf("SABRE: Error, kmalloc(pci_controller_info) failed.\n");
prom_halt();
}
- memset(p, 0, sizeof(*p));
- iommu = kmalloc(sizeof(*iommu), GFP_ATOMIC);
+ iommu = kzalloc(sizeof(*iommu), GFP_ATOMIC);
if (!iommu) {
prom_printf("SABRE: Error, kmalloc(pci_iommu) failed.\n");
prom_halt();
}
- memset(iommu, 0, sizeof(*iommu));
p->pbm_A.iommu = p->pbm_B.iommu = iommu;
upa_portid = prom_getintdefault(pnode, "upa-portid", 0xff);
diff --git a/arch/sparc64/kernel/pci_schizo.c b/arch/sparc64/kernel/pci_schizo.c
index d8c4e0919b4e..7fe4de03ac2e 100644
--- a/arch/sparc64/kernel/pci_schizo.c
+++ b/arch/sparc64/kernel/pci_schizo.c
@@ -243,8 +243,8 @@ static unsigned char schizo_pil_table[] = {
/*0x0c*/0, 0, 0, 0, /* PCI slot 3 Int A, B, C, D */
/*0x10*/0, 0, 0, 0, /* PCI slot 4 Int A, B, C, D */
/*0x14*/0, 0, 0, 0, /* PCI slot 5 Int A, B, C, D */
-/*0x18*/4, /* SCSI */
-/*0x19*/4, /* second SCSI */
+/*0x18*/5, /* SCSI */
+/*0x19*/5, /* second SCSI */
/*0x1a*/0, /* UNKNOWN */
/*0x1b*/0, /* UNKNOWN */
/*0x1c*/8, /* Parallel */
@@ -254,7 +254,7 @@ static unsigned char schizo_pil_table[] = {
/*0x20*/13, /* Audio Record */
/*0x21*/14, /* Audio Playback */
/*0x22*/12, /* Serial */
-/*0x23*/4, /* EBUS I2C */
+/*0x23*/5, /* EBUS I2C */
/*0x24*/10, /* RTC Clock */
/*0x25*/11, /* Floppy */
/*0x26*/0, /* UNKNOWN */
@@ -296,11 +296,11 @@ static int schizo_ino_to_pil(struct pci_dev *pdev, unsigned int ino)
ret = schizo_pil_table[ino];
if (ret == 0 && pdev == NULL) {
- ret = 4;
+ ret = 5;
} else if (ret == 0) {
switch ((pdev->class >> 16) & 0xff) {
case PCI_BASE_CLASS_STORAGE:
- ret = 4;
+ ret = 5;
break;
case PCI_BASE_CLASS_NETWORK:
@@ -319,7 +319,7 @@ static int schizo_ino_to_pil(struct pci_dev *pdev, unsigned int ino)
break;
default:
- ret = 4;
+ ret = 5;
break;
};
}
@@ -1525,7 +1525,7 @@ static void pbm_config_busmastering(struct pci_pbm_info *pbm)
static void pbm_scan_bus(struct pci_controller_info *p,
struct pci_pbm_info *pbm)
{
- struct pcidev_cookie *cookie = kmalloc(sizeof(*cookie), GFP_KERNEL);
+ struct pcidev_cookie *cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
if (!cookie) {
prom_printf("%s: Critical allocation failure.\n", pbm->name);
@@ -1533,7 +1533,6 @@ static void pbm_scan_bus(struct pci_controller_info *p,
}
/* All we care about is the PBM. */
- memset(cookie, 0, sizeof(*cookie));
cookie->pbm = pbm;
pbm->pci_bus = pci_scan_bus(pbm->pci_first_busno,
@@ -2120,27 +2119,24 @@ static void __schizo_init(int node, char *model_name, int chip_type)
}
}
- p = kmalloc(sizeof(struct pci_controller_info), GFP_ATOMIC);
+ p = kzalloc(sizeof(struct pci_controller_info), GFP_ATOMIC);
if (!p) {
prom_printf("SCHIZO: Fatal memory allocation error.\n");
prom_halt();
}
- memset(p, 0, sizeof(*p));
- iommu = kmalloc(sizeof(struct pci_iommu), GFP_ATOMIC);
+ iommu = kzalloc(sizeof(struct pci_iommu), GFP_ATOMIC);
if (!iommu) {
prom_printf("SCHIZO: Fatal memory allocation error.\n");
prom_halt();
}
- memset(iommu, 0, sizeof(*iommu));
p->pbm_A.iommu = iommu;
- iommu = kmalloc(sizeof(struct pci_iommu), GFP_ATOMIC);
+ iommu = kzalloc(sizeof(struct pci_iommu), GFP_ATOMIC);
if (!iommu) {
prom_printf("SCHIZO: Fatal memory allocation error.\n");
prom_halt();
}
- memset(iommu, 0, sizeof(*iommu));
p->pbm_B.iommu = iommu;
p->next = pci_controller_root;
diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c
new file mode 100644
index 000000000000..9372d4f376d5
--- /dev/null
+++ b/arch/sparc64/kernel/pci_sun4v.c
@@ -0,0 +1,1147 @@
+/* pci_sun4v.c: SUN4V specific PCI controller support.
+ *
+ * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+
+#include <asm/pbm.h>
+#include <asm/iommu.h>
+#include <asm/irq.h>
+#include <asm/upa.h>
+#include <asm/pstate.h>
+#include <asm/oplib.h>
+#include <asm/hypervisor.h>
+
+#include "pci_impl.h"
+#include "iommu_common.h"
+
+#include "pci_sun4v.h"
+
+#define PGLIST_NENTS (PAGE_SIZE / sizeof(u64))
+
+struct pci_iommu_batch {
+ struct pci_dev *pdev; /* Device mapping is for. */
+ unsigned long prot; /* IOMMU page protections */
+ unsigned long entry; /* Index into IOTSB. */
+ u64 *pglist; /* List of physical pages */
+ unsigned long npages; /* Number of pages in list. */
+};
+
+static DEFINE_PER_CPU(struct pci_iommu_batch, pci_iommu_batch);
+
+/* Interrupts must be disabled. */
+static inline void pci_iommu_batch_start(struct pci_dev *pdev, unsigned long prot, unsigned long entry)
+{
+ struct pci_iommu_batch *p = &__get_cpu_var(pci_iommu_batch);
+
+ p->pdev = pdev;
+ p->prot = prot;
+ p->entry = entry;
+ p->npages = 0;
+}
+
+/* Interrupts must be disabled. */
+static long pci_iommu_batch_flush(struct pci_iommu_batch *p)
+{
+ struct pcidev_cookie *pcp = p->pdev->sysdata;
+ unsigned long devhandle = pcp->pbm->devhandle;
+ unsigned long prot = p->prot;
+ unsigned long entry = p->entry;
+ u64 *pglist = p->pglist;
+ unsigned long npages = p->npages;
+
+ while (npages != 0) {
+ long num;
+
+ num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
+ npages, prot, __pa(pglist));
+ if (unlikely(num < 0)) {
+ if (printk_ratelimit())
+ printk("pci_iommu_batch_flush: IOMMU map of "
+ "[%08lx:%08lx:%lx:%lx:%lx] failed with "
+ "status %ld\n",
+ devhandle, HV_PCI_TSBID(0, entry),
+ npages, prot, __pa(pglist), num);
+ return -1;
+ }
+
+ entry += num;
+ npages -= num;
+ pglist += num;
+ }
+
+ p->entry = entry;
+ p->npages = 0;
+
+ return 0;
+}
+
+/* Interrupts must be disabled. */
+static inline long pci_iommu_batch_add(u64 phys_page)
+{
+ struct pci_iommu_batch *p = &__get_cpu_var(pci_iommu_batch);
+
+ BUG_ON(p->npages >= PGLIST_NENTS);
+
+ p->pglist[p->npages++] = phys_page;
+ if (p->npages == PGLIST_NENTS)
+ return pci_iommu_batch_flush(p);
+
+ return 0;
+}
+
+/* Interrupts must be disabled. */
+static inline long pci_iommu_batch_end(void)
+{
+ struct pci_iommu_batch *p = &__get_cpu_var(pci_iommu_batch);
+
+ BUG_ON(p->npages >= PGLIST_NENTS);
+
+ return pci_iommu_batch_flush(p);
+}
+
+static long pci_arena_alloc(struct pci_iommu_arena *arena, unsigned long npages)
+{
+ unsigned long n, i, start, end, limit;
+ int pass;
+
+ limit = arena->limit;
+ start = arena->hint;
+ pass = 0;
+
+again:
+ n = find_next_zero_bit(arena->map, limit, start);
+ end = n + npages;
+ if (unlikely(end >= limit)) {
+ if (likely(pass < 1)) {
+ limit = start;
+ start = 0;
+ pass++;
+ goto again;
+ } else {
+ /* Scanned the whole thing, give up. */
+ return -1;
+ }
+ }
+
+ for (i = n; i < end; i++) {
+ if (test_bit(i, arena->map)) {
+ start = i + 1;
+ goto again;
+ }
+ }
+
+ for (i = n; i < end; i++)
+ __set_bit(i, arena->map);
+
+ arena->hint = end;
+
+ return n;
+}
+
+static void pci_arena_free(struct pci_iommu_arena *arena, unsigned long base, unsigned long npages)
+{
+ unsigned long i;
+
+ for (i = base; i < (base + npages); i++)
+ __clear_bit(i, arena->map);
+}
+
+static void *pci_4v_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp)
+{
+ struct pcidev_cookie *pcp;
+ struct pci_iommu *iommu;
+ unsigned long flags, order, first_page, npages, n;
+ void *ret;
+ long entry;
+
+ size = IO_PAGE_ALIGN(size);
+ order = get_order(size);
+ if (unlikely(order >= MAX_ORDER))
+ return NULL;
+
+ npages = size >> IO_PAGE_SHIFT;
+
+ first_page = __get_free_pages(GFP_ATOMIC, order);
+ if (unlikely(first_page == 0UL))
+ return NULL;
+
+ memset((char *)first_page, 0, PAGE_SIZE << order);
+
+ pcp = pdev->sysdata;
+ iommu = pcp->pbm->iommu;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ entry = pci_arena_alloc(&iommu->arena, npages);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (unlikely(entry < 0L))
+ goto arena_alloc_fail;
+
+ *dma_addrp = (iommu->page_table_map_base +
+ (entry << IO_PAGE_SHIFT));
+ ret = (void *) first_page;
+ first_page = __pa(first_page);
+
+ local_irq_save(flags);
+
+ pci_iommu_batch_start(pdev,
+ (HV_PCI_MAP_ATTR_READ |
+ HV_PCI_MAP_ATTR_WRITE),
+ entry);
+
+ for (n = 0; n < npages; n++) {
+ long err = pci_iommu_batch_add(first_page + (n * PAGE_SIZE));
+ if (unlikely(err < 0L))
+ goto iommu_map_fail;
+ }
+
+ if (unlikely(pci_iommu_batch_end() < 0L))
+ goto iommu_map_fail;
+
+ local_irq_restore(flags);
+
+ return ret;
+
+iommu_map_fail:
+ /* Interrupts are disabled. */
+ spin_lock(&iommu->lock);
+ pci_arena_free(&iommu->arena, entry, npages);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+arena_alloc_fail:
+ free_pages(first_page, order);
+ return NULL;
+}
+
+static void pci_4v_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_t dvma)
+{
+ struct pcidev_cookie *pcp;
+ struct pci_iommu *iommu;
+ unsigned long flags, order, npages, entry;
+ u32 devhandle;
+
+ npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
+ pcp = pdev->sysdata;
+ iommu = pcp->pbm->iommu;
+ devhandle = pcp->pbm->devhandle;
+ entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ pci_arena_free(&iommu->arena, entry, npages);
+
+ do {
+ unsigned long num;
+
+ num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
+ npages);
+ entry += num;
+ npages -= num;
+ } while (npages != 0);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ order = get_order(size);
+ if (order < 10)
+ free_pages((unsigned long)cpu, order);
+}
+
+static dma_addr_t pci_4v_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direction)
+{
+ struct pcidev_cookie *pcp;
+ struct pci_iommu *iommu;
+ unsigned long flags, npages, oaddr;
+ unsigned long i, base_paddr;
+ u32 bus_addr, ret;
+ unsigned long prot;
+ long entry;
+
+ pcp = pdev->sysdata;
+ iommu = pcp->pbm->iommu;
+
+ if (unlikely(direction == PCI_DMA_NONE))
+ goto bad;
+
+ oaddr = (unsigned long)ptr;
+ npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
+ npages >>= IO_PAGE_SHIFT;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ entry = pci_arena_alloc(&iommu->arena, npages);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (unlikely(entry < 0L))
+ goto bad;
+
+ bus_addr = (iommu->page_table_map_base +
+ (entry << IO_PAGE_SHIFT));
+ ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
+ base_paddr = __pa(oaddr & IO_PAGE_MASK);
+ prot = HV_PCI_MAP_ATTR_READ;
+ if (direction != PCI_DMA_TODEVICE)
+ prot |= HV_PCI_MAP_ATTR_WRITE;
+
+ local_irq_save(flags);
+
+ pci_iommu_batch_start(pdev, prot, entry);
+
+ for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
+ long err = pci_iommu_batch_add(base_paddr);
+ if (unlikely(err < 0L))
+ goto iommu_map_fail;
+ }
+ if (unlikely(pci_iommu_batch_end() < 0L))
+ goto iommu_map_fail;
+
+ local_irq_restore(flags);
+
+ return ret;
+
+bad:
+ if (printk_ratelimit())
+ WARN_ON(1);
+ return PCI_DMA_ERROR_CODE;
+
+iommu_map_fail:
+ /* Interrupts are disabled. */
+ spin_lock(&iommu->lock);
+ pci_arena_free(&iommu->arena, entry, npages);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ return PCI_DMA_ERROR_CODE;
+}
+
+static void pci_4v_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
+{
+ struct pcidev_cookie *pcp;
+ struct pci_iommu *iommu;
+ unsigned long flags, npages;
+ long entry;
+ u32 devhandle;
+
+ if (unlikely(direction == PCI_DMA_NONE)) {
+ if (printk_ratelimit())
+ WARN_ON(1);
+ return;
+ }
+
+ pcp = pdev->sysdata;
+ iommu = pcp->pbm->iommu;
+ devhandle = pcp->pbm->devhandle;
+
+ npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
+ npages >>= IO_PAGE_SHIFT;
+ bus_addr &= IO_PAGE_MASK;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
+ pci_arena_free(&iommu->arena, entry, npages);
+
+ do {
+ unsigned long num;
+
+ num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
+ npages);
+ entry += num;
+ npages -= num;
+ } while (npages != 0);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+#define SG_ENT_PHYS_ADDRESS(SG) \
+ (__pa(page_address((SG)->page)) + (SG)->offset)
+
+static inline long fill_sg(long entry, struct pci_dev *pdev,
+ struct scatterlist *sg,
+ int nused, int nelems, unsigned long prot)
+{
+ struct scatterlist *dma_sg = sg;
+ struct scatterlist *sg_end = sg + nelems;
+ unsigned long flags;
+ int i;
+
+ local_irq_save(flags);
+
+ pci_iommu_batch_start(pdev, prot, entry);
+
+ for (i = 0; i < nused; i++) {
+ unsigned long pteval = ~0UL;
+ u32 dma_npages;
+
+ dma_npages = ((dma_sg->dma_address & (IO_PAGE_SIZE - 1UL)) +
+ dma_sg->dma_length +
+ ((IO_PAGE_SIZE - 1UL))) >> IO_PAGE_SHIFT;
+ do {
+ unsigned long offset;
+ signed int len;
+
+ /* If we are here, we know we have at least one
+ * more page to map. So walk forward until we
+ * hit a page crossing, and begin creating new
+ * mappings from that spot.
+ */
+ for (;;) {
+ unsigned long tmp;
+
+ tmp = SG_ENT_PHYS_ADDRESS(sg);
+ len = sg->length;
+ if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) {
+ pteval = tmp & IO_PAGE_MASK;
+ offset = tmp & (IO_PAGE_SIZE - 1UL);
+ break;
+ }
+ if (((tmp ^ (tmp + len - 1UL)) >> IO_PAGE_SHIFT) != 0UL) {
+ pteval = (tmp + IO_PAGE_SIZE) & IO_PAGE_MASK;
+ offset = 0UL;
+ len -= (IO_PAGE_SIZE - (tmp & (IO_PAGE_SIZE - 1UL)));
+ break;
+ }
+ sg++;
+ }
+
+ pteval = (pteval & IOPTE_PAGE);
+ while (len > 0) {
+ long err;
+
+ err = pci_iommu_batch_add(pteval);
+ if (unlikely(err < 0L))
+ goto iommu_map_failed;
+
+ pteval += IO_PAGE_SIZE;
+ len -= (IO_PAGE_SIZE - offset);
+ offset = 0;
+ dma_npages--;
+ }
+
+ pteval = (pteval & IOPTE_PAGE) + len;
+ sg++;
+
+ /* Skip over any tail mappings we've fully mapped,
+ * adjusting pteval along the way. Stop when we
+ * detect a page crossing event.
+ */
+ while (sg < sg_end &&
+ (pteval << (64 - IO_PAGE_SHIFT)) != 0UL &&
+ (pteval == SG_ENT_PHYS_ADDRESS(sg)) &&
+ ((pteval ^
+ (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) {
+ pteval += sg->length;
+ sg++;
+ }
+ if ((pteval << (64 - IO_PAGE_SHIFT)) == 0UL)
+ pteval = ~0UL;
+ } while (dma_npages != 0);
+ dma_sg++;
+ }
+
+ if (unlikely(pci_iommu_batch_end() < 0L))
+ goto iommu_map_failed;
+
+ local_irq_restore(flags);
+ return 0;
+
+iommu_map_failed:
+ local_irq_restore(flags);
+ return -1L;
+}
+
+static int pci_4v_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
+{
+ struct pcidev_cookie *pcp;
+ struct pci_iommu *iommu;
+ unsigned long flags, npages, prot;
+ u32 dma_base;
+ struct scatterlist *sgtmp;
+ long entry, err;
+ int used;
+
+ /* Fast path single entry scatterlists. */
+ if (nelems == 1) {
+ sglist->dma_address =
+ pci_4v_map_single(pdev,
+ (page_address(sglist->page) + sglist->offset),
+ sglist->length, direction);
+ if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE))
+ return 0;
+ sglist->dma_length = sglist->length;
+ return 1;
+ }
+
+ pcp = pdev->sysdata;
+ iommu = pcp->pbm->iommu;
+
+ if (unlikely(direction == PCI_DMA_NONE))
+ goto bad;
+
+ /* Step 1: Prepare scatter list. */
+ npages = prepare_sg(sglist, nelems);
+
+ /* Step 2: Allocate a cluster and context, if necessary. */
+ spin_lock_irqsave(&iommu->lock, flags);
+ entry = pci_arena_alloc(&iommu->arena, npages);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (unlikely(entry < 0L))
+ goto bad;
+
+ dma_base = iommu->page_table_map_base +
+ (entry << IO_PAGE_SHIFT);
+
+ /* Step 3: Normalize DMA addresses. */
+ used = nelems;
+
+ sgtmp = sglist;
+ while (used && sgtmp->dma_length) {
+ sgtmp->dma_address += dma_base;
+ sgtmp++;
+ used--;
+ }
+ used = nelems - used;
+
+ /* Step 4: Create the mappings. */
+ prot = HV_PCI_MAP_ATTR_READ;
+ if (direction != PCI_DMA_TODEVICE)
+ prot |= HV_PCI_MAP_ATTR_WRITE;
+
+ err = fill_sg(entry, pdev, sglist, used, nelems, prot);
+ if (unlikely(err < 0L))
+ goto iommu_map_failed;
+
+ return used;
+
+bad:
+ if (printk_ratelimit())
+ WARN_ON(1);
+ return 0;
+
+iommu_map_failed:
+ spin_lock_irqsave(&iommu->lock, flags);
+ pci_arena_free(&iommu->arena, entry, npages);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ return 0;
+}
+
+static void pci_4v_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
+{
+ struct pcidev_cookie *pcp;
+ struct pci_iommu *iommu;
+ unsigned long flags, i, npages;
+ long entry;
+ u32 devhandle, bus_addr;
+
+ if (unlikely(direction == PCI_DMA_NONE)) {
+ if (printk_ratelimit())
+ WARN_ON(1);
+ }
+
+ pcp = pdev->sysdata;
+ iommu = pcp->pbm->iommu;
+ devhandle = pcp->pbm->devhandle;
+
+ bus_addr = sglist->dma_address & IO_PAGE_MASK;
+
+ for (i = 1; i < nelems; i++)
+ if (sglist[i].dma_length == 0)
+ break;
+ i--;
+ npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) -
+ bus_addr) >> IO_PAGE_SHIFT;
+
+ entry = ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ pci_arena_free(&iommu->arena, entry, npages);
+
+ do {
+ unsigned long num;
+
+ num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
+ npages);
+ entry += num;
+ npages -= num;
+ } while (npages != 0);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static void pci_4v_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
+{
+ /* Nothing to do... */
+}
+
+static void pci_4v_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
+{
+ /* Nothing to do... */
+}
+
+struct pci_iommu_ops pci_sun4v_iommu_ops = {
+ .alloc_consistent = pci_4v_alloc_consistent,
+ .free_consistent = pci_4v_free_consistent,
+ .map_single = pci_4v_map_single,
+ .unmap_single = pci_4v_unmap_single,
+ .map_sg = pci_4v_map_sg,
+ .unmap_sg = pci_4v_unmap_sg,
+ .dma_sync_single_for_cpu = pci_4v_dma_sync_single_for_cpu,
+ .dma_sync_sg_for_cpu = pci_4v_dma_sync_sg_for_cpu,
+};
+
+/* SUN4V PCI configuration space accessors. */
+
+static inline int pci_sun4v_out_of_range(struct pci_pbm_info *pbm, unsigned int bus, unsigned int device, unsigned int func)
+{
+ if (bus == pbm->pci_first_busno) {
+ if (device == 0 && func == 0)
+ return 0;
+ return 1;
+ }
+
+ if (bus < pbm->pci_first_busno ||
+ bus > pbm->pci_last_busno)
+ return 1;
+ return 0;
+}
+
+static int pci_sun4v_read_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
+ int where, int size, u32 *value)
+{
+ struct pci_pbm_info *pbm = bus_dev->sysdata;
+ u32 devhandle = pbm->devhandle;
+ unsigned int bus = bus_dev->number;
+ unsigned int device = PCI_SLOT(devfn);
+ unsigned int func = PCI_FUNC(devfn);
+ unsigned long ret;
+
+ if (pci_sun4v_out_of_range(pbm, bus, device, func)) {
+ ret = ~0UL;
+ } else {
+ ret = pci_sun4v_config_get(devhandle,
+ HV_PCI_DEVICE_BUILD(bus, device, func),
+ where, size);
+#if 0
+ printk("rcfg: [%x:%x:%x:%d]=[%lx]\n",
+ devhandle, HV_PCI_DEVICE_BUILD(bus, device, func),
+ where, size, ret);
+#endif
+ }
+ switch (size) {
+ case 1:
+ *value = ret & 0xff;
+ break;
+ case 2:
+ *value = ret & 0xffff;
+ break;
+ case 4:
+ *value = ret & 0xffffffff;
+ break;
+ };
+
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_sun4v_write_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
+ int where, int size, u32 value)
+{
+ struct pci_pbm_info *pbm = bus_dev->sysdata;
+ u32 devhandle = pbm->devhandle;
+ unsigned int bus = bus_dev->number;
+ unsigned int device = PCI_SLOT(devfn);
+ unsigned int func = PCI_FUNC(devfn);
+ unsigned long ret;
+
+ if (pci_sun4v_out_of_range(pbm, bus, device, func)) {
+ /* Do nothing. */
+ } else {
+ ret = pci_sun4v_config_put(devhandle,
+ HV_PCI_DEVICE_BUILD(bus, device, func),
+ where, size, value);
+#if 0
+ printk("wcfg: [%x:%x:%x:%d] v[%x] == [%lx]\n",
+ devhandle, HV_PCI_DEVICE_BUILD(bus, device, func),
+ where, size, value, ret);
+#endif
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops pci_sun4v_ops = {
+ .read = pci_sun4v_read_pci_cfg,
+ .write = pci_sun4v_write_pci_cfg,
+};
+
+
+static void pbm_scan_bus(struct pci_controller_info *p,
+ struct pci_pbm_info *pbm)
+{
+ struct pcidev_cookie *cookie = kmalloc(sizeof(*cookie), GFP_KERNEL);
+
+ if (!cookie) {
+ prom_printf("%s: Critical allocation failure.\n", pbm->name);
+ prom_halt();
+ }
+
+ /* All we care about is the PBM. */
+ memset(cookie, 0, sizeof(*cookie));
+ cookie->pbm = pbm;
+
+ pbm->pci_bus = pci_scan_bus(pbm->pci_first_busno, p->pci_ops, pbm);
+#if 0
+ pci_fixup_host_bridge_self(pbm->pci_bus);
+ pbm->pci_bus->self->sysdata = cookie;
+#endif
+ pci_fill_in_pbm_cookies(pbm->pci_bus, pbm,
+ pbm->prom_node);
+ pci_record_assignments(pbm, pbm->pci_bus);
+ pci_assign_unassigned(pbm, pbm->pci_bus);
+ pci_fixup_irq(pbm, pbm->pci_bus);
+ pci_determine_66mhz_disposition(pbm, pbm->pci_bus);
+ pci_setup_busmastering(pbm, pbm->pci_bus);
+}
+
+static void pci_sun4v_scan_bus(struct pci_controller_info *p)
+{
+ if (p->pbm_A.prom_node) {
+ p->pbm_A.is_66mhz_capable =
+ prom_getbool(p->pbm_A.prom_node, "66mhz-capable");
+
+ pbm_scan_bus(p, &p->pbm_A);
+ }
+ if (p->pbm_B.prom_node) {
+ p->pbm_B.is_66mhz_capable =
+ prom_getbool(p->pbm_B.prom_node, "66mhz-capable");
+
+ pbm_scan_bus(p, &p->pbm_B);
+ }
+
+ /* XXX register error interrupt handlers XXX */
+}
+
+static unsigned int pci_sun4v_irq_build(struct pci_pbm_info *pbm,
+ struct pci_dev *pdev,
+ unsigned int devino)
+{
+ u32 devhandle = pbm->devhandle;
+ int pil;
+
+ pil = 5;
+ if (pdev) {
+ switch ((pdev->class >> 16) & 0xff) {
+ case PCI_BASE_CLASS_STORAGE:
+ pil = 5;
+ break;
+
+ case PCI_BASE_CLASS_NETWORK:
+ pil = 6;
+ break;
+
+ case PCI_BASE_CLASS_DISPLAY:
+ pil = 9;
+ break;
+
+ case PCI_BASE_CLASS_MULTIMEDIA:
+ case PCI_BASE_CLASS_MEMORY:
+ case PCI_BASE_CLASS_BRIDGE:
+ case PCI_BASE_CLASS_SERIAL:
+ pil = 10;
+ break;
+
+ default:
+ pil = 5;
+ break;
+ };
+ }
+ BUG_ON(PIL_RESERVED(pil));
+
+ return sun4v_build_irq(devhandle, devino, pil, IBF_PCI);
+}
+
+static void pci_sun4v_base_address_update(struct pci_dev *pdev, int resource)
+{
+ struct pcidev_cookie *pcp = pdev->sysdata;
+ struct pci_pbm_info *pbm = pcp->pbm;
+ struct resource *res, *root;
+ u32 reg;
+ int where, size, is_64bit;
+
+ res = &pdev->resource[resource];
+ if (resource < 6) {
+ where = PCI_BASE_ADDRESS_0 + (resource * 4);
+ } else if (resource == PCI_ROM_RESOURCE) {
+ where = pdev->rom_base_reg;
+ } else {
+ /* Somebody might have asked allocation of a non-standard resource */
+ return;
+ }
+
+ /* XXX 64-bit MEM handling is not %100 correct... XXX */
+ is_64bit = 0;
+ if (res->flags & IORESOURCE_IO)
+ root = &pbm->io_space;
+ else {
+ root = &pbm->mem_space;
+ if ((res->flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK)
+ == PCI_BASE_ADDRESS_MEM_TYPE_64)
+ is_64bit = 1;
+ }
+
+ size = res->end - res->start;
+ pci_read_config_dword(pdev, where, &reg);
+ reg = ((reg & size) |
+ (((u32)(res->start - root->start)) & ~size));
+ if (resource == PCI_ROM_RESOURCE) {
+ reg |= PCI_ROM_ADDRESS_ENABLE;
+ res->flags |= IORESOURCE_ROM_ENABLE;
+ }
+ pci_write_config_dword(pdev, where, reg);
+
+ /* This knows that the upper 32-bits of the address
+ * must be zero. Our PCI common layer enforces this.
+ */
+ if (is_64bit)
+ pci_write_config_dword(pdev, where + 4, 0);
+}
+
+static void pci_sun4v_resource_adjust(struct pci_dev *pdev,
+ struct resource *res,
+ struct resource *root)
+{
+ res->start += root->start;
+ res->end += root->start;
+}
+
+/* Use ranges property to determine where PCI MEM, I/O, and Config
+ * space are for this PCI bus module.
+ */
+static void pci_sun4v_determine_mem_io_space(struct pci_pbm_info *pbm)
+{
+ int i, saw_mem, saw_io;
+
+ saw_mem = saw_io = 0;
+ for (i = 0; i < pbm->num_pbm_ranges; i++) {
+ struct linux_prom_pci_ranges *pr = &pbm->pbm_ranges[i];
+ unsigned long a;
+ int type;
+
+ type = (pr->child_phys_hi >> 24) & 0x3;
+ a = (((unsigned long)pr->parent_phys_hi << 32UL) |
+ ((unsigned long)pr->parent_phys_lo << 0UL));
+
+ switch (type) {
+ case 1:
+ /* 16-bit IO space, 16MB */
+ pbm->io_space.start = a;
+ pbm->io_space.end = a + ((16UL*1024UL*1024UL) - 1UL);
+ pbm->io_space.flags = IORESOURCE_IO;
+ saw_io = 1;
+ break;
+
+ case 2:
+ /* 32-bit MEM space, 2GB */
+ pbm->mem_space.start = a;
+ pbm->mem_space.end = a + (0x80000000UL - 1UL);
+ pbm->mem_space.flags = IORESOURCE_MEM;
+ saw_mem = 1;
+ break;
+
+ case 3:
+ /* XXX 64-bit MEM handling XXX */
+
+ default:
+ break;
+ };
+ }
+
+ if (!saw_io || !saw_mem) {
+ prom_printf("%s: Fatal error, missing %s PBM range.\n",
+ pbm->name,
+ (!saw_io ? "IO" : "MEM"));
+ prom_halt();
+ }
+
+ printk("%s: PCI IO[%lx] MEM[%lx]\n",
+ pbm->name,
+ pbm->io_space.start,
+ pbm->mem_space.start);
+}
+
+static void pbm_register_toplevel_resources(struct pci_controller_info *p,
+ struct pci_pbm_info *pbm)
+{
+ pbm->io_space.name = pbm->mem_space.name = pbm->name;
+
+ request_resource(&ioport_resource, &pbm->io_space);
+ request_resource(&iomem_resource, &pbm->mem_space);
+ pci_register_legacy_regions(&pbm->io_space,
+ &pbm->mem_space);
+}
+
+static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
+ struct pci_iommu *iommu)
+{
+ struct pci_iommu_arena *arena = &iommu->arena;
+ unsigned long i, cnt = 0;
+ u32 devhandle;
+
+ devhandle = pbm->devhandle;
+ for (i = 0; i < arena->limit; i++) {
+ unsigned long ret, io_attrs, ra;
+
+ ret = pci_sun4v_iommu_getmap(devhandle,
+ HV_PCI_TSBID(0, i),
+ &io_attrs, &ra);
+ if (ret == HV_EOK) {
+ cnt++;
+ __set_bit(i, arena->map);
+ }
+ }
+
+ return cnt;
+}
+
+static void pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
+{
+ struct pci_iommu *iommu = pbm->iommu;
+ unsigned long num_tsb_entries, sz;
+ u32 vdma[2], dma_mask, dma_offset;
+ int err, tsbsize;
+
+ err = prom_getproperty(pbm->prom_node, "virtual-dma",
+ (char *)&vdma[0], sizeof(vdma));
+ if (err == 0 || err == -1) {
+ /* No property, use default values. */
+ vdma[0] = 0x80000000;
+ vdma[1] = 0x80000000;
+ }
+
+ dma_mask = vdma[0];
+ switch (vdma[1]) {
+ case 0x20000000:
+ dma_mask |= 0x1fffffff;
+ tsbsize = 64;
+ break;
+
+ case 0x40000000:
+ dma_mask |= 0x3fffffff;
+ tsbsize = 128;
+ break;
+
+ case 0x80000000:
+ dma_mask |= 0x7fffffff;
+ tsbsize = 256;
+ break;
+
+ default:
+ prom_printf("PCI-SUN4V: strange virtual-dma size.\n");
+ prom_halt();
+ };
+
+ tsbsize *= (8 * 1024);
+
+ num_tsb_entries = tsbsize / sizeof(iopte_t);
+
+ dma_offset = vdma[0];
+
+ /* Setup initial software IOMMU state. */
+ spin_lock_init(&iommu->lock);
+ iommu->ctx_lowest_free = 1;
+ iommu->page_table_map_base = dma_offset;
+ iommu->dma_addr_mask = dma_mask;
+
+ /* Allocate and initialize the free area map. */
+ sz = num_tsb_entries / 8;
+ sz = (sz + 7UL) & ~7UL;
+ iommu->arena.map = kmalloc(sz, GFP_KERNEL);
+ if (!iommu->arena.map) {
+ prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n");
+ prom_halt();
+ }
+ memset(iommu->arena.map, 0, sz);
+ iommu->arena.limit = num_tsb_entries;
+
+ sz = probe_existing_entries(pbm, iommu);
+
+ printk("%s: TSB entries [%lu], existing mapings [%lu]\n",
+ pbm->name, num_tsb_entries, sz);
+}
+
+static void pci_sun4v_get_bus_range(struct pci_pbm_info *pbm)
+{
+ unsigned int busrange[2];
+ int prom_node = pbm->prom_node;
+ int err;
+
+ err = prom_getproperty(prom_node, "bus-range",
+ (char *)&busrange[0],
+ sizeof(busrange));
+ if (err == 0 || err == -1) {
+ prom_printf("%s: Fatal error, no bus-range.\n", pbm->name);
+ prom_halt();
+ }
+
+ pbm->pci_first_busno = busrange[0];
+ pbm->pci_last_busno = busrange[1];
+
+}
+
+static void pci_sun4v_pbm_init(struct pci_controller_info *p, int prom_node, u32 devhandle)
+{
+ struct pci_pbm_info *pbm;
+ int err, i;
+
+ if (devhandle & 0x40)
+ pbm = &p->pbm_B;
+ else
+ pbm = &p->pbm_A;
+
+ pbm->parent = p;
+ pbm->prom_node = prom_node;
+ pbm->pci_first_slot = 1;
+
+ pbm->devhandle = devhandle;
+
+ sprintf(pbm->name, "SUN4V-PCI%d PBM%c",
+ p->index, (pbm == &p->pbm_A ? 'A' : 'B'));
+
+ printk("%s: devhandle[%x] prom_node[%x:%x]\n",
+ pbm->name, pbm->devhandle,
+ pbm->prom_node, prom_getchild(pbm->prom_node));
+
+ prom_getstring(prom_node, "name",
+ pbm->prom_name, sizeof(pbm->prom_name));
+
+ err = prom_getproperty(prom_node, "ranges",
+ (char *) pbm->pbm_ranges,
+ sizeof(pbm->pbm_ranges));
+ if (err == 0 || err == -1) {
+ prom_printf("%s: Fatal error, no ranges property.\n",
+ pbm->name);
+ prom_halt();
+ }
+
+ pbm->num_pbm_ranges =
+ (err / sizeof(struct linux_prom_pci_ranges));
+
+ /* Mask out the top 8 bits of the ranges, leaving the real
+ * physical address.
+ */
+ for (i = 0; i < pbm->num_pbm_ranges; i++)
+ pbm->pbm_ranges[i].parent_phys_hi &= 0x0fffffff;
+
+ pci_sun4v_determine_mem_io_space(pbm);
+ pbm_register_toplevel_resources(p, pbm);
+
+ err = prom_getproperty(prom_node, "interrupt-map",
+ (char *)pbm->pbm_intmap,
+ sizeof(pbm->pbm_intmap));
+ if (err == 0 || err == -1) {
+ prom_printf("%s: Fatal error, no interrupt-map property.\n",
+ pbm->name);
+ prom_halt();
+ }
+
+ pbm->num_pbm_intmap = (err / sizeof(struct linux_prom_pci_intmap));
+ err = prom_getproperty(prom_node, "interrupt-map-mask",
+ (char *)&pbm->pbm_intmask,
+ sizeof(pbm->pbm_intmask));
+ if (err == 0 || err == -1) {
+ prom_printf("%s: Fatal error, no interrupt-map-mask.\n",
+ pbm->name);
+ prom_halt();
+ }
+
+ pci_sun4v_get_bus_range(pbm);
+ pci_sun4v_iommu_init(pbm);
+}
+
+void sun4v_pci_init(int node, char *model_name)
+{
+ struct pci_controller_info *p;
+ struct pci_iommu *iommu;
+ struct linux_prom64_registers regs;
+ u32 devhandle;
+ int i;
+
+ prom_getproperty(node, "reg", (char *)&regs, sizeof(regs));
+ devhandle = (regs.phys_addr >> 32UL) & 0x0fffffff;
+
+ for (p = pci_controller_root; p; p = p->next) {
+ struct pci_pbm_info *pbm;
+
+ if (p->pbm_A.prom_node && p->pbm_B.prom_node)
+ continue;
+
+ pbm = (p->pbm_A.prom_node ?
+ &p->pbm_A :
+ &p->pbm_B);
+
+ if (pbm->devhandle == (devhandle ^ 0x40)) {
+ pci_sun4v_pbm_init(p, node, devhandle);
+ return;
+ }
+ }
+
+ for_each_cpu(i) {
+ unsigned long page = get_zeroed_page(GFP_ATOMIC);
+
+ if (!page)
+ goto fatal_memory_error;
+
+ per_cpu(pci_iommu_batch, i).pglist = (u64 *) page;
+ }
+
+ p = kmalloc(sizeof(struct pci_controller_info), GFP_ATOMIC);
+ if (!p)
+ goto fatal_memory_error;
+
+ memset(p, 0, sizeof(*p));
+
+ iommu = kmalloc(sizeof(struct pci_iommu), GFP_ATOMIC);
+ if (!iommu)
+ goto fatal_memory_error;
+
+ memset(iommu, 0, sizeof(*iommu));
+ p->pbm_A.iommu = iommu;
+
+ iommu = kmalloc(sizeof(struct pci_iommu), GFP_ATOMIC);
+ if (!iommu)
+ goto fatal_memory_error;
+
+ memset(iommu, 0, sizeof(*iommu));
+ p->pbm_B.iommu = iommu;
+
+ p->next = pci_controller_root;
+ pci_controller_root = p;
+
+ p->index = pci_num_controllers++;
+ p->pbms_same_domain = 0;
+
+ p->scan_bus = pci_sun4v_scan_bus;
+ p->irq_build = pci_sun4v_irq_build;
+ p->base_address_update = pci_sun4v_base_address_update;
+ p->resource_adjust = pci_sun4v_resource_adjust;
+ p->pci_ops = &pci_sun4v_ops;
+
+ /* Like PSYCHO and SCHIZO we have a 2GB aligned area
+ * for memory space.
+ */
+ pci_memspace_mask = 0x7fffffffUL;
+
+ pci_sun4v_pbm_init(p, node, devhandle);
+ return;
+
+fatal_memory_error:
+ prom_printf("SUN4V_PCI: Fatal memory allocation error.\n");
+ prom_halt();
+}
diff --git a/arch/sparc64/kernel/pci_sun4v.h b/arch/sparc64/kernel/pci_sun4v.h
new file mode 100644
index 000000000000..884d25f6158d
--- /dev/null
+++ b/arch/sparc64/kernel/pci_sun4v.h
@@ -0,0 +1,31 @@
+/* pci_sun4v.h: SUN4V specific PCI controller support.
+ *
+ * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef _PCI_SUN4V_H
+#define _PCI_SUN4V_H
+
+extern long pci_sun4v_iommu_map(unsigned long devhandle,
+ unsigned long tsbid,
+ unsigned long num_ttes,
+ unsigned long io_attributes,
+ unsigned long io_page_list_pa);
+extern unsigned long pci_sun4v_iommu_demap(unsigned long devhandle,
+ unsigned long tsbid,
+ unsigned long num_ttes);
+extern unsigned long pci_sun4v_iommu_getmap(unsigned long devhandle,
+ unsigned long tsbid,
+ unsigned long *io_attributes,
+ unsigned long *real_address);
+extern unsigned long pci_sun4v_config_get(unsigned long devhandle,
+ unsigned long pci_device,
+ unsigned long config_offset,
+ unsigned long size);
+extern int pci_sun4v_config_put(unsigned long devhandle,
+ unsigned long pci_device,
+ unsigned long config_offset,
+ unsigned long size,
+ unsigned long data);
+
+#endif /* !(_PCI_SUN4V_H) */
diff --git a/arch/sparc64/kernel/pci_sun4v_asm.S b/arch/sparc64/kernel/pci_sun4v_asm.S
new file mode 100644
index 000000000000..6604fdbf746c
--- /dev/null
+++ b/arch/sparc64/kernel/pci_sun4v_asm.S
@@ -0,0 +1,95 @@
+/* pci_sun4v_asm: Hypervisor calls for PCI support.
+ *
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ */
+
+#include <asm/hypervisor.h>
+
+ /* %o0: devhandle
+ * %o1: tsbid
+ * %o2: num ttes
+ * %o3: io_attributes
+ * %o4: io_page_list phys address
+ *
+ * returns %o0: -status if status was non-zero, else
+ * %o0: num pages mapped
+ */
+ .globl pci_sun4v_iommu_map
+pci_sun4v_iommu_map:
+ mov %o5, %g1
+ mov HV_FAST_PCI_IOMMU_MAP, %o5
+ ta HV_FAST_TRAP
+ brnz,pn %o0, 1f
+ sub %g0, %o0, %o0
+ mov %o1, %o0
+1: retl
+ nop
+
+ /* %o0: devhandle
+ * %o1: tsbid
+ * %o2: num ttes
+ *
+ * returns %o0: num ttes demapped
+ */
+ .globl pci_sun4v_iommu_demap
+pci_sun4v_iommu_demap:
+ mov HV_FAST_PCI_IOMMU_DEMAP, %o5
+ ta HV_FAST_TRAP
+ retl
+ mov %o1, %o0
+
+ /* %o0: devhandle
+ * %o1: tsbid
+ * %o2: &io_attributes
+ * %o3: &real_address
+ *
+ * returns %o0: status
+ */
+ .globl pci_sun4v_iommu_getmap
+pci_sun4v_iommu_getmap:
+ mov %o2, %o4
+ mov HV_FAST_PCI_IOMMU_GETMAP, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ stx %o2, [%o3]
+ retl
+ mov %o0, %o0
+
+ /* %o0: devhandle
+ * %o1: pci_device
+ * %o2: pci_config_offset
+ * %o3: size
+ *
+ * returns %o0: data
+ *
+ * If there is an error, the data will be returned
+ * as all 1's.
+ */
+ .globl pci_sun4v_config_get
+pci_sun4v_config_get:
+ mov HV_FAST_PCI_CONFIG_GET, %o5
+ ta HV_FAST_TRAP
+ brnz,a,pn %o1, 1f
+ mov -1, %o2
+1: retl
+ mov %o2, %o0
+
+ /* %o0: devhandle
+ * %o1: pci_device
+ * %o2: pci_config_offset
+ * %o3: size
+ * %o4: data
+ *
+ * returns %o0: status
+ *
+ * status will be zero if the operation completed
+ * successfully, else -1 if not
+ */
+ .globl pci_sun4v_config_put
+pci_sun4v_config_put:
+ mov HV_FAST_PCI_CONFIG_PUT, %o5
+ ta HV_FAST_TRAP
+ brnz,a,pn %o1, 1f
+ mov -1, %o1
+1: retl
+ mov %o1, %o0
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index 059b0d025224..1c7ca2f712d9 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -44,83 +44,61 @@
#include <asm/fpumacro.h>
#include <asm/head.h>
#include <asm/cpudata.h>
+#include <asm/mmu_context.h>
#include <asm/unistd.h>
+#include <asm/hypervisor.h>
/* #define VERBOSE_SHOWREGS */
-/*
- * Nothing special yet...
- */
-void default_idle(void)
-{
-}
-
-#ifndef CONFIG_SMP
-
-/*
- * the idle loop on a Sparc... ;)
- */
-void cpu_idle(void)
+static void sparc64_yield(void)
{
- /* endless idle loop with no priority at all */
- for (;;) {
- /* If current->work.need_resched is zero we should really
- * setup for a system wakup event and execute a shutdown
- * instruction.
- *
- * But this requires writing back the contents of the
- * L2 cache etc. so implement this later. -DaveM
- */
- while (!need_resched())
- barrier();
+ if (tlb_type != hypervisor)
+ return;
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
- check_pgt_cache();
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb__after_clear_bit();
+
+ while (!need_resched()) {
+ unsigned long pstate;
+
+ /* Disable interrupts. */
+ __asm__ __volatile__(
+ "rdpr %%pstate, %0\n\t"
+ "andn %0, %1, %0\n\t"
+ "wrpr %0, %%g0, %%pstate"
+ : "=&r" (pstate)
+ : "i" (PSTATE_IE));
+
+ if (!need_resched())
+ sun4v_cpu_yield();
+
+ /* Re-enable interrupts. */
+ __asm__ __volatile__(
+ "rdpr %%pstate, %0\n\t"
+ "or %0, %1, %0\n\t"
+ "wrpr %0, %%g0, %%pstate"
+ : "=&r" (pstate)
+ : "i" (PSTATE_IE));
}
-}
-#else
+ set_thread_flag(TIF_POLLING_NRFLAG);
+}
-/*
- * the idle loop on a UltraMultiPenguin...
- *
- * TIF_POLLING_NRFLAG is set because we do not sleep the cpu
- * inside of the idler task, so an interrupt is not needed
- * to get a clean fast response.
- *
- * XXX Reverify this assumption... -DaveM
- *
- * Addendum: We do want it to do something for the signal
- * delivery case, we detect that by just seeing
- * if we are trying to send this to an idler or not.
- */
+/* The idle loop on sparc64. */
void cpu_idle(void)
{
- cpuinfo_sparc *cpuinfo = &local_cpu_data();
set_thread_flag(TIF_POLLING_NRFLAG);
while(1) {
if (need_resched()) {
- cpuinfo->idle_volume = 0;
preempt_enable_no_resched();
schedule();
preempt_disable();
- check_pgt_cache();
}
- cpuinfo->idle_volume++;
-
- /* The store ordering is so that IRQ handlers on
- * other cpus see our increasing idleness for the buddy
- * redistribution algorithm. -DaveM
- */
- membar_storeload_storestore();
+ sparc64_yield();
}
}
-#endif
-
extern char reboot_command [];
extern void (*prom_palette)(int);
@@ -354,6 +332,7 @@ void show_regs(struct pt_regs *regs)
extern long etrap, etraptl1;
#endif
__show_regs(regs);
+#if 0
#ifdef CONFIG_SMP
{
extern void smp_report_regs(void);
@@ -361,6 +340,7 @@ void show_regs(struct pt_regs *regs)
smp_report_regs();
}
#endif
+#endif
#ifdef VERBOSE_SHOWREGS
if (regs->tpc >= &etrap && regs->tpc < &etraptl1 &&
@@ -433,30 +413,15 @@ void exit_thread(void)
void flush_thread(void)
{
struct thread_info *t = current_thread_info();
+ struct mm_struct *mm;
if (t->flags & _TIF_ABI_PENDING)
t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT);
- if (t->task->mm) {
- unsigned long pgd_cache = 0UL;
- if (test_thread_flag(TIF_32BIT)) {
- struct mm_struct *mm = t->task->mm;
- pgd_t *pgd0 = &mm->pgd[0];
- pud_t *pud0 = pud_offset(pgd0, 0);
+ mm = t->task->mm;
+ if (mm)
+ tsb_context_switch(mm);
- if (pud_none(*pud0)) {
- pmd_t *page = pmd_alloc_one(mm, 0);
- pud_set(pud0, page);
- }
- pgd_cache = get_pgd_cache(pgd0);
- }
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "membar #Sync"
- : /* no outputs */
- : "r" (pgd_cache),
- "r" (TSB_REG),
- "i" (ASI_DMMU));
- }
set_thread_wsaved(0);
/* Turn off performance counters if on. */
@@ -555,6 +520,18 @@ void synchronize_user_stack(void)
}
}
+static void stack_unaligned(unsigned long sp)
+{
+ siginfo_t info;
+
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = BUS_ADRALN;
+ info.si_addr = (void __user *) sp;
+ info.si_trapno = 0;
+ force_sig_info(SIGBUS, &info, current);
+}
+
void fault_in_user_windows(void)
{
struct thread_info *t = current_thread_info();
@@ -570,13 +547,17 @@ void fault_in_user_windows(void)
flush_user_windows();
window = get_thread_wsaved();
- if (window != 0) {
+ if (likely(window != 0)) {
window -= 1;
do {
unsigned long sp = (t->rwbuf_stkptrs[window] + bias);
struct reg_window *rwin = &t->reg_window[window];
- if (copy_to_user((char __user *)sp, rwin, winsize))
+ if (unlikely(sp & 0x7UL))
+ stack_unaligned(sp);
+
+ if (unlikely(copy_to_user((char __user *)sp,
+ rwin, winsize)))
goto barf;
} while (window--);
}
diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c
index 3f9746f856d2..eb93e9c52846 100644
--- a/arch/sparc64/kernel/ptrace.c
+++ b/arch/sparc64/kernel/ptrace.c
@@ -124,6 +124,9 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
{
BUG_ON(len > PAGE_SIZE);
+ if (tlb_type == hypervisor)
+ return;
+
#ifdef DCACHE_ALIASING_POSSIBLE
/* If bit 13 of the kernel address we used to access the
* user page is the same as the virtual address that page
diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
index b80eba0081ca..7130e866f935 100644
--- a/arch/sparc64/kernel/rtrap.S
+++ b/arch/sparc64/kernel/rtrap.S
@@ -223,12 +223,26 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3
ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4
ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5
- mov TSB_REG, %g6
- brnz,a,pn %l3, 1f
- ldxa [%g6] ASI_IMMU, %g5
-1: ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6
+ brz,pt %l3, 1f
+ mov %g6, %l2
+
+ /* Must do this before thread reg is clobbered below. */
+ LOAD_PER_CPU_BASE(%g5, %g6, %i0, %i1, %i2)
+1:
+ ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6
ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7
- wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate
+
+ /* Normal globals are restored, go to trap globals. */
+661: wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate
+ nop
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
+ SET_GL(1)
+ .previous
+
+ mov %l2, %g6
+
ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0
ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1
@@ -252,27 +266,108 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
brnz,pn %l3, kern_rtt
mov PRIMARY_CONTEXT, %l7
- ldxa [%l7 + %l7] ASI_DMMU, %l0
+
+661: ldxa [%l7 + %l7] ASI_DMMU, %l0
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ ldxa [%l7 + %l7] ASI_MMU, %l0
+ .previous
+
sethi %hi(sparc64_kern_pri_nuc_bits), %l1
ldx [%l1 + %lo(sparc64_kern_pri_nuc_bits)], %l1
or %l0, %l1, %l0
- stxa %l0, [%l7] ASI_DMMU
- flush %g6
+
+661: stxa %l0, [%l7] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %l0, [%l7] ASI_MMU
+ .previous
+
+ sethi %hi(KERNBASE), %l7
+ flush %l7
rdpr %wstate, %l1
rdpr %otherwin, %l2
srl %l1, 3, %l1
wrpr %l2, %g0, %canrestore
wrpr %l1, %g0, %wstate
- wrpr %g0, %g0, %otherwin
+ brnz,pt %l2, user_rtt_restore
+ wrpr %g0, %g0, %otherwin
+
+ ldx [%g6 + TI_FLAGS], %g3
+ wr %g0, ASI_AIUP, %asi
+ rdpr %cwp, %g1
+ andcc %g3, _TIF_32BIT, %g0
+ sub %g1, 1, %g1
+ bne,pt %xcc, user_rtt_fill_32bit
+ wrpr %g1, %cwp
+ ba,a,pt %xcc, user_rtt_fill_64bit
+
+user_rtt_fill_fixup:
+ rdpr %cwp, %g1
+ add %g1, 1, %g1
+ wrpr %g1, 0x0, %cwp
+
+ rdpr %wstate, %g2
+ sll %g2, 3, %g2
+ wrpr %g2, 0x0, %wstate
+
+ /* We know %canrestore and %otherwin are both zero. */
+
+ sethi %hi(sparc64_kern_pri_context), %g2
+ ldx [%g2 + %lo(sparc64_kern_pri_context)], %g2
+ mov PRIMARY_CONTEXT, %g1
+
+661: stxa %g2, [%g1] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g1] ASI_MMU
+ .previous
+
+ sethi %hi(KERNBASE), %g1
+ flush %g1
+
+ or %g4, FAULT_CODE_WINFIXUP, %g4
+ stb %g4, [%g6 + TI_FAULT_CODE]
+ stx %g5, [%g6 + TI_FAULT_ADDR]
+
+ mov %g6, %l1
+ wrpr %g0, 0x0, %tl
+
+661: nop
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ SET_GL(0)
+ .previous
+
+ wrpr %g0, RTRAP_PSTATE, %pstate
+
+ mov %l1, %g6
+ ldx [%g6 + TI_TASK], %g4
+ LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3)
+ call do_sparc64_fault
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap
+ nop
+
+user_rtt_pre_restore:
+ add %g1, 1, %g1
+ wrpr %g1, 0x0, %cwp
+
+user_rtt_restore:
restore
rdpr %canrestore, %g1
wrpr %g1, 0x0, %cleanwin
retry
nop
-kern_rtt: restore
+kern_rtt: rdpr %canrestore, %g1
+ brz,pn %g1, kern_rtt_fill
+ nop
+kern_rtt_restore:
+ restore
retry
+
to_kernel:
#ifdef CONFIG_PREEMPT
ldsw [%g6 + TI_PRE_COUNT], %l5
diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c
index d95a1bcf163d..1d6ffdeabd4c 100644
--- a/arch/sparc64/kernel/sbus.c
+++ b/arch/sparc64/kernel/sbus.c
@@ -693,11 +693,11 @@ void sbus_set_sbus64(struct sbus_dev *sdev, int bursts)
/* SBUS SYSIO INO number to Sparc PIL level. */
static unsigned char sysio_ino_to_pil[] = {
- 0, 4, 4, 7, 5, 7, 8, 9, /* SBUS slot 0 */
- 0, 4, 4, 7, 5, 7, 8, 9, /* SBUS slot 1 */
- 0, 4, 4, 7, 5, 7, 8, 9, /* SBUS slot 2 */
- 0, 4, 4, 7, 5, 7, 8, 9, /* SBUS slot 3 */
- 4, /* Onboard SCSI */
+ 0, 5, 5, 7, 5, 7, 8, 9, /* SBUS slot 0 */
+ 0, 5, 5, 7, 5, 7, 8, 9, /* SBUS slot 1 */
+ 0, 5, 5, 7, 5, 7, 8, 9, /* SBUS slot 2 */
+ 0, 5, 5, 7, 5, 7, 8, 9, /* SBUS slot 3 */
+ 5, /* Onboard SCSI */
5, /* Onboard Ethernet */
/*XXX*/ 8, /* Onboard BPP */
0, /* Bogon */
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index 158bd31e15b7..7d0e67c1ce50 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -64,12 +64,6 @@ struct screen_info screen_info = {
16 /* orig-video-points */
};
-/* Typing sync at the prom prompt calls the function pointed to by
- * the sync callback which I set to the following function.
- * This should sync all filesystems and return, for now it just
- * prints out pretty messages and returns.
- */
-
void (*prom_palette)(int);
void (*prom_keyboard)(void);
@@ -79,259 +73,6 @@ prom_console_write(struct console *con, const char *s, unsigned n)
prom_write(s, n);
}
-static struct console prom_console = {
- .name = "prom",
- .write = prom_console_write,
- .flags = CON_CONSDEV | CON_ENABLED,
- .index = -1,
-};
-
-#define PROM_TRUE -1
-#define PROM_FALSE 0
-
-/* Pretty sick eh? */
-int prom_callback(long *args)
-{
- struct console *cons, *saved_console = NULL;
- unsigned long flags;
- char *cmd;
- extern spinlock_t prom_entry_lock;
-
- if (!args)
- return -1;
- if (!(cmd = (char *)args[0]))
- return -1;
-
- /*
- * The callback can be invoked on the cpu that first dropped
- * into prom_cmdline after taking the serial interrupt, or on
- * a slave processor that was smp_captured() if the
- * administrator has done a switch-cpu inside obp. In either
- * case, the cpu is marked as in-interrupt. Drop IRQ locks.
- */
- irq_exit();
-
- /* XXX Revisit the locking here someday. This is a debugging
- * XXX feature so it isnt all that critical. -DaveM
- */
- local_irq_save(flags);
-
- spin_unlock(&prom_entry_lock);
- cons = console_drivers;
- while (cons) {
- unregister_console(cons);
- cons->flags &= ~(CON_PRINTBUFFER);
- cons->next = saved_console;
- saved_console = cons;
- cons = console_drivers;
- }
- register_console(&prom_console);
- if (!strcmp(cmd, "sync")) {
- prom_printf("PROM `%s' command...\n", cmd);
- show_free_areas();
- if (current->pid != 0) {
- local_irq_enable();
- sys_sync();
- local_irq_disable();
- }
- args[2] = 0;
- args[args[1] + 3] = -1;
- prom_printf("Returning to PROM\n");
- } else if (!strcmp(cmd, "va>tte-data")) {
- unsigned long ctx, va;
- unsigned long tte = 0;
- long res = PROM_FALSE;
-
- ctx = args[3];
- va = args[4];
- if (ctx) {
- /*
- * Find process owning ctx, lookup mapping.
- */
- struct task_struct *p;
- struct mm_struct *mm = NULL;
- pgd_t *pgdp;
- pud_t *pudp;
- pmd_t *pmdp;
- pte_t *ptep;
- pte_t pte;
-
- for_each_process(p) {
- mm = p->mm;
- if (CTX_NRBITS(mm->context) == ctx)
- break;
- }
- if (!mm ||
- CTX_NRBITS(mm->context) != ctx)
- goto done;
-
- pgdp = pgd_offset(mm, va);
- if (pgd_none(*pgdp))
- goto done;
- pudp = pud_offset(pgdp, va);
- if (pud_none(*pudp))
- goto done;
- pmdp = pmd_offset(pudp, va);
- if (pmd_none(*pmdp))
- goto done;
-
- /* Preemption implicitly disabled by virtue of
- * being called from inside OBP.
- */
- ptep = pte_offset_map(pmdp, va);
- pte = *ptep;
- if (pte_present(pte)) {
- tte = pte_val(pte);
- res = PROM_TRUE;
- }
- pte_unmap(ptep);
- goto done;
- }
-
- if ((va >= KERNBASE) && (va < (KERNBASE + (4 * 1024 * 1024)))) {
- extern unsigned long sparc64_kern_pri_context;
-
- /* Spitfire Errata #32 workaround */
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "flush %%g6"
- : /* No outputs */
- : "r" (sparc64_kern_pri_context),
- "r" (PRIMARY_CONTEXT),
- "i" (ASI_DMMU));
-
- /*
- * Locked down tlb entry.
- */
-
- if (tlb_type == spitfire)
- tte = spitfire_get_dtlb_data(SPITFIRE_HIGHEST_LOCKED_TLBENT);
- else if (tlb_type == cheetah || tlb_type == cheetah_plus)
- tte = cheetah_get_ldtlb_data(CHEETAH_HIGHEST_LOCKED_TLBENT);
-
- res = PROM_TRUE;
- goto done;
- }
-
- if (va < PGDIR_SIZE) {
- /*
- * vmalloc or prom_inherited mapping.
- */
- pgd_t *pgdp;
- pud_t *pudp;
- pmd_t *pmdp;
- pte_t *ptep;
- pte_t pte;
- int error;
-
- if ((va >= LOW_OBP_ADDRESS) && (va < HI_OBP_ADDRESS)) {
- tte = prom_virt_to_phys(va, &error);
- if (!error)
- res = PROM_TRUE;
- goto done;
- }
- pgdp = pgd_offset_k(va);
- if (pgd_none(*pgdp))
- goto done;
- pudp = pud_offset(pgdp, va);
- if (pud_none(*pudp))
- goto done;
- pmdp = pmd_offset(pudp, va);
- if (pmd_none(*pmdp))
- goto done;
-
- /* Preemption implicitly disabled by virtue of
- * being called from inside OBP.
- */
- ptep = pte_offset_kernel(pmdp, va);
- pte = *ptep;
- if (pte_present(pte)) {
- tte = pte_val(pte);
- res = PROM_TRUE;
- }
- goto done;
- }
-
- if (va < PAGE_OFFSET) {
- /*
- * No mappings here.
- */
- goto done;
- }
-
- if (va & (1UL << 40)) {
- /*
- * I/O page.
- */
-
- tte = (__pa(va) & _PAGE_PADDR) |
- _PAGE_VALID | _PAGE_SZ4MB |
- _PAGE_E | _PAGE_P | _PAGE_W;
- res = PROM_TRUE;
- goto done;
- }
-
- /*
- * Normal page.
- */
- tte = (__pa(va) & _PAGE_PADDR) |
- _PAGE_VALID | _PAGE_SZ4MB |
- _PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W;
- res = PROM_TRUE;
-
- done:
- if (res == PROM_TRUE) {
- args[2] = 3;
- args[args[1] + 3] = 0;
- args[args[1] + 4] = res;
- args[args[1] + 5] = tte;
- } else {
- args[2] = 2;
- args[args[1] + 3] = 0;
- args[args[1] + 4] = res;
- }
- } else if (!strcmp(cmd, ".soft1")) {
- unsigned long tte;
-
- tte = args[3];
- prom_printf("%lx:\"%s%s%s%s%s\" ",
- (tte & _PAGE_SOFT) >> 7,
- tte & _PAGE_MODIFIED ? "M" : "-",
- tte & _PAGE_ACCESSED ? "A" : "-",
- tte & _PAGE_READ ? "W" : "-",
- tte & _PAGE_WRITE ? "R" : "-",
- tte & _PAGE_PRESENT ? "P" : "-");
-
- args[2] = 2;
- args[args[1] + 3] = 0;
- args[args[1] + 4] = PROM_TRUE;
- } else if (!strcmp(cmd, ".soft2")) {
- unsigned long tte;
-
- tte = args[3];
- prom_printf("%lx ", (tte & 0x07FC000000000000UL) >> 50);
-
- args[2] = 2;
- args[args[1] + 3] = 0;
- args[args[1] + 4] = PROM_TRUE;
- } else {
- prom_printf("unknown PROM `%s' command...\n", cmd);
- }
- unregister_console(&prom_console);
- while (saved_console) {
- cons = saved_console;
- saved_console = cons->next;
- register_console(cons);
- }
- spin_lock(&prom_entry_lock);
- local_irq_restore(flags);
-
- /*
- * Restore in-interrupt status for a resume from obp.
- */
- irq_enter();
- return 0;
-}
-
unsigned int boot_flags = 0;
#define BOOTME_DEBUG 0x1
#define BOOTME_SINGLE 0x2
@@ -479,15 +220,99 @@ char reboot_command[COMMAND_LINE_SIZE];
static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 };
-void register_prom_callbacks(void)
+static void __init per_cpu_patch(void)
{
- prom_setcallback(prom_callback);
- prom_feval(": linux-va>tte-data 2 \" va>tte-data\" $callback drop ; "
- "' linux-va>tte-data to va>tte-data");
- prom_feval(": linux-.soft1 1 \" .soft1\" $callback 2drop ; "
- "' linux-.soft1 to .soft1");
- prom_feval(": linux-.soft2 1 \" .soft2\" $callback 2drop ; "
- "' linux-.soft2 to .soft2");
+ struct cpuid_patch_entry *p;
+ unsigned long ver;
+ int is_jbus;
+
+ if (tlb_type == spitfire && !this_is_starfire)
+ return;
+
+ is_jbus = 0;
+ if (tlb_type != hypervisor) {
+ __asm__ ("rdpr %%ver, %0" : "=r" (ver));
+ is_jbus = ((ver >> 32UL) == __JALAPENO_ID ||
+ (ver >> 32UL) == __SERRANO_ID);
+ }
+
+ p = &__cpuid_patch;
+ while (p < &__cpuid_patch_end) {
+ unsigned long addr = p->addr;
+ unsigned int *insns;
+
+ switch (tlb_type) {
+ case spitfire:
+ insns = &p->starfire[0];
+ break;
+ case cheetah:
+ case cheetah_plus:
+ if (is_jbus)
+ insns = &p->cheetah_jbus[0];
+ else
+ insns = &p->cheetah_safari[0];
+ break;
+ case hypervisor:
+ insns = &p->sun4v[0];
+ break;
+ default:
+ prom_printf("Unknown cpu type, halting.\n");
+ prom_halt();
+ };
+
+ *(unsigned int *) (addr + 0) = insns[0];
+ wmb();
+ __asm__ __volatile__("flush %0" : : "r" (addr + 0));
+
+ *(unsigned int *) (addr + 4) = insns[1];
+ wmb();
+ __asm__ __volatile__("flush %0" : : "r" (addr + 4));
+
+ *(unsigned int *) (addr + 8) = insns[2];
+ wmb();
+ __asm__ __volatile__("flush %0" : : "r" (addr + 8));
+
+ *(unsigned int *) (addr + 12) = insns[3];
+ wmb();
+ __asm__ __volatile__("flush %0" : : "r" (addr + 12));
+
+ p++;
+ }
+}
+
+static void __init sun4v_patch(void)
+{
+ struct sun4v_1insn_patch_entry *p1;
+ struct sun4v_2insn_patch_entry *p2;
+
+ if (tlb_type != hypervisor)
+ return;
+
+ p1 = &__sun4v_1insn_patch;
+ while (p1 < &__sun4v_1insn_patch_end) {
+ unsigned long addr = p1->addr;
+
+ *(unsigned int *) (addr + 0) = p1->insn;
+ wmb();
+ __asm__ __volatile__("flush %0" : : "r" (addr + 0));
+
+ p1++;
+ }
+
+ p2 = &__sun4v_2insn_patch;
+ while (p2 < &__sun4v_2insn_patch_end) {
+ unsigned long addr = p2->addr;
+
+ *(unsigned int *) (addr + 0) = p2->insns[0];
+ wmb();
+ __asm__ __volatile__("flush %0" : : "r" (addr + 0));
+
+ *(unsigned int *) (addr + 4) = p2->insns[1];
+ wmb();
+ __asm__ __volatile__("flush %0" : : "r" (addr + 4));
+
+ p2++;
+ }
}
void __init setup_arch(char **cmdline_p)
@@ -496,7 +321,10 @@ void __init setup_arch(char **cmdline_p)
*cmdline_p = prom_getbootargs();
strcpy(saved_command_line, *cmdline_p);
- printk("ARCH: SUN4U\n");
+ if (tlb_type == hypervisor)
+ printk("ARCH: SUN4V\n");
+ else
+ printk("ARCH: SUN4U\n");
#ifdef CONFIG_DUMMY_CONSOLE
conswitchp = &dummy_con;
@@ -507,6 +335,13 @@ void __init setup_arch(char **cmdline_p)
/* Work out if we are starfire early on */
check_if_starfire();
+ /* Now we know enough to patch the get_cpuid sequences
+ * used by trap code.
+ */
+ per_cpu_patch();
+
+ sun4v_patch();
+
boot_flags_init(*cmdline_p);
idprom_init();
@@ -514,7 +349,7 @@ void __init setup_arch(char **cmdline_p)
if (!root_flags)
root_mountflags &= ~MS_RDONLY;
ROOT_DEV = old_decode_dev(root_dev);
-#ifdef CONFIG_BLK_DEV_INITRD
+#ifdef CONFIG_BLK_DEV_RAM
rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK;
rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0);
rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0);
@@ -544,6 +379,9 @@ void __init setup_arch(char **cmdline_p)
smp_setup_cpu_possible_map();
+ /* Get boot processor trap_block[] setup. */
+ init_cur_cpu_trap(current_thread_info());
+
paging_init();
}
@@ -565,6 +403,12 @@ static int __init set_preferred_console(void)
serial_console = 2;
} else if (idev == PROMDEV_IRSC && odev == PROMDEV_ORSC) {
serial_console = 3;
+ } else if (idev == PROMDEV_IVCONS && odev == PROMDEV_OVCONS) {
+ /* sunhv_console_init() doesn't check the serial_console
+ * value anyways...
+ */
+ serial_console = 4;
+ return add_preferred_console("ttyHV", 0, NULL);
} else {
prom_printf("Inconsistent console: "
"input %d, output %d\n",
@@ -598,9 +442,8 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
seq_printf(m,
"cpu\t\t: %s\n"
"fpu\t\t: %s\n"
- "promlib\t\t: Version 3 Revision %d\n"
- "prom\t\t: %d.%d.%d\n"
- "type\t\t: sun4u\n"
+ "prom\t\t: %s\n"
+ "type\t\t: %s\n"
"ncpus probed\t: %d\n"
"ncpus active\t: %d\n"
"D$ parity tl1\t: %u\n"
@@ -612,10 +455,10 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
,
sparc_cpu_type,
sparc_fpu_type,
- prom_rev,
- prom_prev >> 16,
- (prom_prev >> 8) & 0xff,
- prom_prev & 0xff,
+ prom_version,
+ ((tlb_type == hypervisor) ?
+ "sun4v" :
+ "sun4u"),
ncpus_probed,
num_online_cpus(),
dcache_parity_tl1_occurred,
@@ -692,15 +535,11 @@ static int __init topology_init(void)
while (!cpu_find_by_instance(ncpus_probed, NULL, NULL))
ncpus_probed++;
- for (i = 0; i < NR_CPUS; i++) {
- if (cpu_possible(i)) {
- struct cpu *p = kmalloc(sizeof(*p), GFP_KERNEL);
-
- if (p) {
- memset(p, 0, sizeof(*p));
- register_cpu(p, i, NULL);
- err = 0;
- }
+ for_each_cpu(i) {
+ struct cpu *p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (p) {
+ register_cpu(p, i, NULL);
+ err = 0;
}
}
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 1f7ad8a69052..373a701c90a5 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -38,6 +38,7 @@
#include <asm/timer.h>
#include <asm/starfire.h>
#include <asm/tlb.h>
+#include <asm/sections.h>
extern void calibrate_delay(void);
@@ -46,6 +47,8 @@ static unsigned char boot_cpu_id;
cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE;
+cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly =
+ { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
static cpumask_t smp_commenced_mask;
static cpumask_t cpu_callout_map;
@@ -77,7 +80,7 @@ void smp_bogo(struct seq_file *m)
void __init smp_store_cpu_info(int id)
{
- int cpu_node;
+ int cpu_node, def;
/* multiplier and counter set by
smp_setup_percpu_timer() */
@@ -87,24 +90,32 @@ void __init smp_store_cpu_info(int id)
cpu_data(id).clock_tick = prom_getintdefault(cpu_node,
"clock-frequency", 0);
- cpu_data(id).pgcache_size = 0;
- cpu_data(id).pte_cache[0] = NULL;
- cpu_data(id).pte_cache[1] = NULL;
- cpu_data(id).pgd_cache = NULL;
- cpu_data(id).idle_volume = 1;
-
+ def = ((tlb_type == hypervisor) ? (8 * 1024) : (16 * 1024));
cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size",
- 16 * 1024);
+ def);
+
+ def = 32;
cpu_data(id).dcache_line_size =
- prom_getintdefault(cpu_node, "dcache-line-size", 32);
+ prom_getintdefault(cpu_node, "dcache-line-size", def);
+
+ def = 16 * 1024;
cpu_data(id).icache_size = prom_getintdefault(cpu_node, "icache-size",
- 16 * 1024);
+ def);
+
+ def = 32;
cpu_data(id).icache_line_size =
- prom_getintdefault(cpu_node, "icache-line-size", 32);
+ prom_getintdefault(cpu_node, "icache-line-size", def);
+
+ def = ((tlb_type == hypervisor) ?
+ (3 * 1024 * 1024) :
+ (4 * 1024 * 1024));
cpu_data(id).ecache_size = prom_getintdefault(cpu_node, "ecache-size",
- 4 * 1024 * 1024);
+ def);
+
+ def = 64;
cpu_data(id).ecache_line_size =
- prom_getintdefault(cpu_node, "ecache-line-size", 64);
+ prom_getintdefault(cpu_node, "ecache-line-size", def);
+
printk("CPU[%d]: Caches "
"D[sz(%d):line_sz(%d)] "
"I[sz(%d):line_sz(%d)] "
@@ -119,27 +130,16 @@ static void smp_setup_percpu_timer(void);
static volatile unsigned long callin_flag = 0;
-extern void inherit_locked_prom_mappings(int save_p);
-
-static inline void cpu_setup_percpu_base(unsigned long cpu_id)
-{
- __asm__ __volatile__("mov %0, %%g5\n\t"
- "stxa %0, [%1] %2\n\t"
- "membar #Sync"
- : /* no outputs */
- : "r" (__per_cpu_offset(cpu_id)),
- "r" (TSB_REG), "i" (ASI_IMMU));
-}
-
void __init smp_callin(void)
{
int cpuid = hard_smp_processor_id();
- inherit_locked_prom_mappings(0);
+ __local_per_cpu_offset = __per_cpu_offset(cpuid);
- __flush_tlb_all();
+ if (tlb_type == hypervisor)
+ sun4v_ktsb_register();
- cpu_setup_percpu_base(cpuid);
+ __flush_tlb_all();
smp_setup_percpu_timer();
@@ -316,6 +316,8 @@ static void smp_synchronize_one_tick(int cpu)
spin_unlock_irqrestore(&itc_sync_lock, flags);
}
+extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load);
+
extern unsigned long sparc64_cpu_startup;
/* The OBP cpu startup callback truncates the 3rd arg cookie to
@@ -331,21 +333,31 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu)
unsigned long cookie =
(unsigned long)(&cpu_new_thread);
struct task_struct *p;
- int timeout, ret, cpu_node;
+ int timeout, ret;
p = fork_idle(cpu);
callin_flag = 0;
cpu_new_thread = task_thread_info(p);
cpu_set(cpu, cpu_callout_map);
- cpu_find_by_mid(cpu, &cpu_node);
- prom_startcpu(cpu_node, entry, cookie);
+ if (tlb_type == hypervisor) {
+ /* Alloc the mondo queues, cpu will load them. */
+ sun4v_init_mondo_queues(0, cpu, 1, 0);
+
+ prom_startcpu_cpuid(cpu, entry, cookie);
+ } else {
+ int cpu_node;
+
+ cpu_find_by_mid(cpu, &cpu_node);
+ prom_startcpu(cpu_node, entry, cookie);
+ }
for (timeout = 0; timeout < 5000000; timeout++) {
if (callin_flag)
break;
udelay(100);
}
+
if (callin_flag) {
ret = 0;
} else {
@@ -441,7 +453,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c
static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
{
u64 pstate, ver;
- int nack_busy_id, is_jalapeno;
+ int nack_busy_id, is_jbus;
if (cpus_empty(mask))
return;
@@ -451,7 +463,8 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
* derivative processor.
*/
__asm__ ("rdpr %%ver, %0" : "=r" (ver));
- is_jalapeno = ((ver >> 32) == 0x003e0016);
+ is_jbus = ((ver >> 32) == __JALAPENO_ID ||
+ (ver >> 32) == __SERRANO_ID);
__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
@@ -476,7 +489,7 @@ retry:
for_each_cpu_mask(i, mask) {
u64 target = (i << 14) | 0x70;
- if (!is_jalapeno)
+ if (!is_jbus)
target |= (nack_busy_id << 24);
__asm__ __volatile__(
"stxa %%g0, [%0] %1\n\t"
@@ -529,7 +542,7 @@ retry:
for_each_cpu_mask(i, mask) {
u64 check_mask;
- if (is_jalapeno)
+ if (is_jbus)
check_mask = (0x2UL << (2*i));
else
check_mask = (0x2UL <<
@@ -544,6 +557,155 @@ retry:
}
}
+/* Multi-cpu list version. */
+static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
+{
+ struct trap_per_cpu *tb;
+ u16 *cpu_list;
+ u64 *mondo;
+ cpumask_t error_mask;
+ unsigned long flags, status;
+ int cnt, retries, this_cpu, prev_sent, i;
+
+ /* We have to do this whole thing with interrupts fully disabled.
+ * Otherwise if we send an xcall from interrupt context it will
+ * corrupt both our mondo block and cpu list state.
+ *
+ * One consequence of this is that we cannot use timeout mechanisms
+ * that depend upon interrupts being delivered locally. So, for
+ * example, we cannot sample jiffies and expect it to advance.
+ *
+ * Fortunately, udelay() uses %stick/%tick so we can use that.
+ */
+ local_irq_save(flags);
+
+ this_cpu = smp_processor_id();
+ tb = &trap_block[this_cpu];
+
+ mondo = __va(tb->cpu_mondo_block_pa);
+ mondo[0] = data0;
+ mondo[1] = data1;
+ mondo[2] = data2;
+ wmb();
+
+ cpu_list = __va(tb->cpu_list_pa);
+
+ /* Setup the initial cpu list. */
+ cnt = 0;
+ for_each_cpu_mask(i, mask)
+ cpu_list[cnt++] = i;
+
+ cpus_clear(error_mask);
+ retries = 0;
+ prev_sent = 0;
+ do {
+ int forward_progress, n_sent;
+
+ status = sun4v_cpu_mondo_send(cnt,
+ tb->cpu_list_pa,
+ tb->cpu_mondo_block_pa);
+
+ /* HV_EOK means all cpus received the xcall, we're done. */
+ if (likely(status == HV_EOK))
+ break;
+
+ /* First, see if we made any forward progress.
+ *
+ * The hypervisor indicates successful sends by setting
+ * cpu list entries to the value 0xffff.
+ */
+ n_sent = 0;
+ for (i = 0; i < cnt; i++) {
+ if (likely(cpu_list[i] == 0xffff))
+ n_sent++;
+ }
+
+ forward_progress = 0;
+ if (n_sent > prev_sent)
+ forward_progress = 1;
+
+ prev_sent = n_sent;
+
+ /* If we get a HV_ECPUERROR, then one or more of the cpus
+ * in the list are in error state. Use the cpu_state()
+ * hypervisor call to find out which cpus are in error state.
+ */
+ if (unlikely(status == HV_ECPUERROR)) {
+ for (i = 0; i < cnt; i++) {
+ long err;
+ u16 cpu;
+
+ cpu = cpu_list[i];
+ if (cpu == 0xffff)
+ continue;
+
+ err = sun4v_cpu_state(cpu);
+ if (err >= 0 &&
+ err == HV_CPU_STATE_ERROR) {
+ cpu_list[i] = 0xffff;
+ cpu_set(cpu, error_mask);
+ }
+ }
+ } else if (unlikely(status != HV_EWOULDBLOCK))
+ goto fatal_mondo_error;
+
+ /* Don't bother rewriting the CPU list, just leave the
+ * 0xffff and non-0xffff entries in there and the
+ * hypervisor will do the right thing.
+ *
+ * Only advance timeout state if we didn't make any
+ * forward progress.
+ */
+ if (unlikely(!forward_progress)) {
+ if (unlikely(++retries > 10000))
+ goto fatal_mondo_timeout;
+
+ /* Delay a little bit to let other cpus catch up
+ * on their cpu mondo queue work.
+ */
+ udelay(2 * cnt);
+ }
+ } while (1);
+
+ local_irq_restore(flags);
+
+ if (unlikely(!cpus_empty(error_mask)))
+ goto fatal_mondo_cpu_error;
+
+ return;
+
+fatal_mondo_cpu_error:
+ printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
+ "were in error state\n",
+ this_cpu);
+ printk(KERN_CRIT "CPU[%d]: Error mask [ ", this_cpu);
+ for_each_cpu_mask(i, error_mask)
+ printk("%d ", i);
+ printk("]\n");
+ return;
+
+fatal_mondo_timeout:
+ local_irq_restore(flags);
+ printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
+ " progress after %d retries.\n",
+ this_cpu, retries);
+ goto dump_cpu_list_and_out;
+
+fatal_mondo_error:
+ local_irq_restore(flags);
+ printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
+ this_cpu, status);
+ printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
+ "mondo_block_pa(%lx)\n",
+ this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
+
+dump_cpu_list_and_out:
+ printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
+ for (i = 0; i < cnt; i++)
+ printk("%u ", cpu_list[i]);
+ printk("]\n");
+}
+
/* Send cross call to all processors mentioned in MASK
* except self.
*/
@@ -557,8 +719,10 @@ static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 d
if (tlb_type == spitfire)
spitfire_xcall_deliver(data0, data1, data2, mask);
- else
+ else if (tlb_type == cheetah || tlb_type == cheetah_plus)
cheetah_xcall_deliver(data0, data1, data2, mask);
+ else
+ hypervisor_xcall_deliver(data0, data1, data2, mask);
/* NOTE: Caller runs local copy on master. */
put_cpu();
@@ -594,16 +758,13 @@ extern unsigned long xcall_call_function;
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.
*/
-int smp_call_function(void (*func)(void *info), void *info,
- int nonatomic, int wait)
+static int smp_call_function_mask(void (*func)(void *info), void *info,
+ int nonatomic, int wait, cpumask_t mask)
{
struct call_data_struct data;
- int cpus = num_online_cpus() - 1;
+ int cpus;
long timeout;
- if (!cpus)
- return 0;
-
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
@@ -614,9 +775,14 @@ int smp_call_function(void (*func)(void *info), void *info,
spin_lock(&call_lock);
+ cpu_clear(smp_processor_id(), mask);
+ cpus = cpus_weight(mask);
+ if (!cpus)
+ goto out_unlock;
+
call_data = &data;
- smp_cross_call(&xcall_call_function, 0, 0, 0);
+ smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask);
/*
* Wait for other cpus to complete function or at
@@ -630,18 +796,25 @@ int smp_call_function(void (*func)(void *info), void *info,
udelay(1);
}
+out_unlock:
spin_unlock(&call_lock);
return 0;
out_timeout:
spin_unlock(&call_lock);
- printk("XCALL: Remote cpus not responding, ncpus=%ld finished=%ld\n",
- (long) num_online_cpus() - 1L,
- (long) atomic_read(&data.finished));
+ printk("XCALL: Remote cpus not responding, ncpus=%d finished=%d\n",
+ cpus, atomic_read(&data.finished));
return 0;
}
+int smp_call_function(void (*func)(void *info), void *info,
+ int nonatomic, int wait)
+{
+ return smp_call_function_mask(func, info, nonatomic, wait,
+ cpu_online_map);
+}
+
void smp_call_function_client(int irq, struct pt_regs *regs)
{
void (*func) (void *info) = call_data->func;
@@ -659,13 +832,25 @@ void smp_call_function_client(int irq, struct pt_regs *regs)
}
}
+static void tsb_sync(void *info)
+{
+ struct mm_struct *mm = info;
+
+ if (current->active_mm == mm)
+ tsb_context_switch(mm);
+}
+
+void smp_tsb_sync(struct mm_struct *mm)
+{
+ smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask);
+}
+
extern unsigned long xcall_flush_tlb_mm;
extern unsigned long xcall_flush_tlb_pending;
extern unsigned long xcall_flush_tlb_kernel_range;
-extern unsigned long xcall_flush_tlb_all_spitfire;
-extern unsigned long xcall_flush_tlb_all_cheetah;
extern unsigned long xcall_report_regs;
extern unsigned long xcall_receive_signal;
+extern unsigned long xcall_new_mmu_context_version;
#ifdef DCACHE_ALIASING_POSSIBLE
extern unsigned long xcall_flush_dcache_page_cheetah;
@@ -693,11 +878,17 @@ static __inline__ void __local_flush_dcache_page(struct page *page)
void smp_flush_dcache_page_impl(struct page *page, int cpu)
{
cpumask_t mask = cpumask_of_cpu(cpu);
- int this_cpu = get_cpu();
+ int this_cpu;
+
+ if (tlb_type == hypervisor)
+ return;
#ifdef CONFIG_DEBUG_DCFLUSH
atomic_inc(&dcpage_flushes);
#endif
+
+ this_cpu = get_cpu();
+
if (cpu == this_cpu) {
__local_flush_dcache_page(page);
} else if (cpu_online(cpu)) {
@@ -713,7 +904,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
__pa(pg_addr),
(u64) pg_addr,
mask);
- } else {
+ } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
#ifdef DCACHE_ALIASING_POSSIBLE
data0 =
((u64)&xcall_flush_dcache_page_cheetah);
@@ -735,7 +926,12 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
void *pg_addr = page_address(page);
cpumask_t mask = cpu_online_map;
u64 data0;
- int this_cpu = get_cpu();
+ int this_cpu;
+
+ if (tlb_type == hypervisor)
+ return;
+
+ this_cpu = get_cpu();
cpu_clear(this_cpu, mask);
@@ -752,7 +948,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
__pa(pg_addr),
(u64) pg_addr,
mask);
- } else {
+ } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
#ifdef DCACHE_ALIASING_POSSIBLE
data0 = ((u64)&xcall_flush_dcache_page_cheetah);
cheetah_xcall_deliver(data0,
@@ -769,38 +965,58 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
put_cpu();
}
+static void __smp_receive_signal_mask(cpumask_t mask)
+{
+ smp_cross_call_masked(&xcall_receive_signal, 0, 0, 0, mask);
+}
+
void smp_receive_signal(int cpu)
{
cpumask_t mask = cpumask_of_cpu(cpu);
- if (cpu_online(cpu)) {
- u64 data0 = (((u64)&xcall_receive_signal) & 0xffffffff);
-
- if (tlb_type == spitfire)
- spitfire_xcall_deliver(data0, 0, 0, mask);
- else
- cheetah_xcall_deliver(data0, 0, 0, mask);
- }
+ if (cpu_online(cpu))
+ __smp_receive_signal_mask(mask);
}
void smp_receive_signal_client(int irq, struct pt_regs *regs)
{
- /* Just return, rtrap takes care of the rest. */
clear_softint(1 << irq);
}
-void smp_report_regs(void)
+void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
{
- smp_cross_call(&xcall_report_regs, 0, 0, 0);
+ struct mm_struct *mm;
+ unsigned long flags;
+
+ clear_softint(1 << irq);
+
+ /* See if we need to allocate a new TLB context because
+ * the version of the one we are using is now out of date.
+ */
+ mm = current->active_mm;
+ if (unlikely(!mm || (mm == &init_mm)))
+ return;
+
+ spin_lock_irqsave(&mm->context.lock, flags);
+
+ if (unlikely(!CTX_VALID(mm->context)))
+ get_new_mmu_context(mm);
+
+ spin_unlock_irqrestore(&mm->context.lock, flags);
+
+ load_secondary_context(mm);
+ __flush_tlb_mm(CTX_HWBITS(mm->context),
+ SECONDARY_CONTEXT);
}
-void smp_flush_tlb_all(void)
+void smp_new_mmu_context_version(void)
{
- if (tlb_type == spitfire)
- smp_cross_call(&xcall_flush_tlb_all_spitfire, 0, 0, 0);
- else
- smp_cross_call(&xcall_flush_tlb_all_cheetah, 0, 0, 0);
- __flush_tlb_all();
+ smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
+}
+
+void smp_report_regs(void)
+{
+ smp_cross_call(&xcall_report_regs, 0, 0, 0);
}
/* We know that the window frames of the user have been flushed
@@ -944,24 +1160,19 @@ void smp_release(void)
* can service tlb flush xcalls...
*/
extern void prom_world(int);
-extern void save_alternate_globals(unsigned long *);
-extern void restore_alternate_globals(unsigned long *);
+
void smp_penguin_jailcell(int irq, struct pt_regs *regs)
{
- unsigned long global_save[24];
-
clear_softint(1 << irq);
preempt_disable();
__asm__ __volatile__("flushw");
- save_alternate_globals(global_save);
prom_world(1);
atomic_inc(&smp_capture_registry);
membar_storeload_storestore();
while (penguins_are_doing_time)
rmb();
- restore_alternate_globals(global_save);
atomic_dec(&smp_capture_registry);
prom_world(0);
@@ -1082,6 +1293,8 @@ int setup_profiling_timer(unsigned int multiplier)
/* Constrain the number of cpus to max_cpus. */
void __init smp_prepare_cpus(unsigned int max_cpus)
{
+ int i;
+
if (num_possible_cpus() > max_cpus) {
int instance, mid;
@@ -1096,6 +1309,20 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
}
}
+ for_each_cpu(i) {
+ if (tlb_type == hypervisor) {
+ int j;
+
+ /* XXX get this mapping from machine description */
+ for_each_cpu(j) {
+ if ((j >> 2) == (i >> 2))
+ cpu_set(j, cpu_sibling_map[i]);
+ }
+ } else {
+ cpu_set(i, cpu_sibling_map[i]);
+ }
+ }
+
smp_store_cpu_info(boot_cpu_id);
}
@@ -1117,12 +1344,15 @@ void __init smp_setup_cpu_possible_map(void)
void __devinit smp_prepare_boot_cpu(void)
{
- if (hard_smp_processor_id() >= NR_CPUS) {
+ int cpu = hard_smp_processor_id();
+
+ if (cpu >= NR_CPUS) {
prom_printf("Serious problem, boot cpu id >= NR_CPUS\n");
prom_halt();
}
- current_thread_info()->cpu = hard_smp_processor_id();
+ current_thread_info()->cpu = cpu;
+ __local_per_cpu_offset = __per_cpu_offset(cpu);
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), phys_cpu_present_map);
@@ -1139,7 +1369,11 @@ int __devinit __cpu_up(unsigned int cpu)
if (!cpu_isset(cpu, cpu_online_map)) {
ret = -ENODEV;
} else {
- smp_synchronize_one_tick(cpu);
+ /* On SUN4V, writes to %tick and %stick are
+ * not allowed.
+ */
+ if (tlb_type != hypervisor)
+ smp_synchronize_one_tick(cpu);
}
}
return ret;
@@ -1183,12 +1417,9 @@ void __init setup_per_cpu_areas(void)
{
unsigned long goal, size, i;
char *ptr;
- /* Created by linker magic */
- extern char __per_cpu_start[], __per_cpu_end[];
/* Copy section for each CPU (we discard the original) */
- goal = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE);
-
+ goal = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
#ifdef CONFIG_MODULES
if (goal < PERCPU_ENOUGH_ROOM)
goal = PERCPU_ENOUGH_ROOM;
@@ -1197,31 +1428,10 @@ void __init setup_per_cpu_areas(void)
for (size = 1UL; size < goal; size <<= 1UL)
__per_cpu_shift++;
- /* Make sure the resulting __per_cpu_base value
- * will fit in the 43-bit sign extended IMMU
- * TSB register.
- */
- ptr = __alloc_bootmem(size * NR_CPUS, PAGE_SIZE,
- (unsigned long) __per_cpu_start);
+ ptr = alloc_bootmem(size * NR_CPUS);
__per_cpu_base = ptr - __per_cpu_start;
- if ((__per_cpu_shift < PAGE_SHIFT) ||
- (__per_cpu_base & ~PAGE_MASK) ||
- (__per_cpu_base != (((long) __per_cpu_base << 20) >> 20))) {
- prom_printf("PER_CPU: Invalid layout, "
- "ptr[%p] shift[%lx] base[%lx]\n",
- ptr, __per_cpu_shift, __per_cpu_base);
- prom_halt();
- }
-
for (i = 0; i < NR_CPUS; i++, ptr += size)
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
-
- /* Finally, load in the boot cpu's base value.
- * We abuse the IMMU TSB register for trap handler
- * entry and exit loading of %g5. That is why it
- * has to be page aligned.
- */
- cpu_setup_percpu_base(hard_smp_processor_id());
}
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 3c06bfb92a8c..9914a17651b4 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -95,9 +95,6 @@ extern int __ashrdi3(int, int);
extern int dump_fpu (struct pt_regs * regs, elf_fpregset_t * fpregs);
-extern unsigned long phys_base;
-extern unsigned long pfn_base;
-
extern unsigned int sys_call_table[];
extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
@@ -108,6 +105,14 @@ extern void xor_vis_4(unsigned long, unsigned long *, unsigned long *,
extern void xor_vis_5(unsigned long, unsigned long *, unsigned long *,
unsigned long *, unsigned long *, unsigned long *);
+extern void xor_niagara_2(unsigned long, unsigned long *, unsigned long *);
+extern void xor_niagara_3(unsigned long, unsigned long *, unsigned long *,
+ unsigned long *);
+extern void xor_niagara_4(unsigned long, unsigned long *, unsigned long *,
+ unsigned long *, unsigned long *);
+extern void xor_niagara_5(unsigned long, unsigned long *, unsigned long *,
+ unsigned long *, unsigned long *, unsigned long *);
+
/* Per-CPU information table */
EXPORT_PER_CPU_SYMBOL(__cpu_data);
@@ -241,10 +246,6 @@ EXPORT_SYMBOL(verify_compat_iovec);
#endif
EXPORT_SYMBOL(dump_fpu);
-EXPORT_SYMBOL(pte_alloc_one_kernel);
-#ifndef CONFIG_SMP
-EXPORT_SYMBOL(pgt_quicklists);
-#endif
EXPORT_SYMBOL(put_fs_struct);
/* math-emu wants this */
@@ -339,14 +340,10 @@ EXPORT_SYMBOL(copy_to_user_fixup);
EXPORT_SYMBOL(copy_from_user_fixup);
EXPORT_SYMBOL(copy_in_user_fixup);
EXPORT_SYMBOL(__strncpy_from_user);
-EXPORT_SYMBOL(__bzero_noasi);
+EXPORT_SYMBOL(__clear_user);
/* Various address conversion macros use this. */
-EXPORT_SYMBOL(phys_base);
-EXPORT_SYMBOL(pfn_base);
EXPORT_SYMBOL(sparc64_valid_addr_bitmap);
-EXPORT_SYMBOL(page_to_pfn);
-EXPORT_SYMBOL(pfn_to_page);
/* No version information on this, heavily used in inline asm,
* and will always be 'void __ret_efault(void)'.
@@ -392,4 +389,9 @@ EXPORT_SYMBOL(xor_vis_3);
EXPORT_SYMBOL(xor_vis_4);
EXPORT_SYMBOL(xor_vis_5);
+EXPORT_SYMBOL(xor_niagara_2);
+EXPORT_SYMBOL(xor_niagara_3);
+EXPORT_SYMBOL(xor_niagara_4);
+EXPORT_SYMBOL(xor_niagara_5);
+
EXPORT_SYMBOL(prom_palette);
diff --git a/arch/sparc64/kernel/sun4v_ivec.S b/arch/sparc64/kernel/sun4v_ivec.S
new file mode 100644
index 000000000000..b49a68bdda43
--- /dev/null
+++ b/arch/sparc64/kernel/sun4v_ivec.S
@@ -0,0 +1,334 @@
+/* sun4v_ivec.S: Sun4v interrupt vector handling.
+ *
+ * Copyright (C) 2006 <davem@davemloft.net>
+ */
+
+#include <asm/cpudata.h>
+#include <asm/intr_queue.h>
+
+ .text
+ .align 32
+
+sun4v_cpu_mondo:
+ /* Head offset in %g2, tail offset in %g4.
+ * If they are the same, no work.
+ */
+ mov INTRQ_CPU_MONDO_HEAD, %g2
+ ldxa [%g2] ASI_QUEUE, %g2
+ mov INTRQ_CPU_MONDO_TAIL, %g4
+ ldxa [%g4] ASI_QUEUE, %g4
+ cmp %g2, %g4
+ be,pn %xcc, sun4v_cpu_mondo_queue_empty
+ nop
+
+ /* Get &trap_block[smp_processor_id()] into %g3. */
+ ldxa [%g0] ASI_SCRATCHPAD, %g3
+ sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3
+
+ /* Get CPU mondo queue base phys address into %g7. */
+ ldx [%g3 + TRAP_PER_CPU_CPU_MONDO_PA], %g7
+
+ /* Now get the cross-call arguments and handler PC, same
+ * layout as sun4u:
+ *
+ * 1st 64-bit word: low half is 32-bit PC, put into %g3 and jmpl to it
+ * high half is context arg to MMU flushes, into %g5
+ * 2nd 64-bit word: 64-bit arg, load into %g1
+ * 3rd 64-bit word: 64-bit arg, load into %g7
+ */
+ ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g3
+ add %g2, 0x8, %g2
+ srlx %g3, 32, %g5
+ ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g1
+ add %g2, 0x8, %g2
+ srl %g3, 0, %g3
+ ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g7
+ add %g2, 0x40 - 0x8 - 0x8, %g2
+
+ /* Update queue head pointer. */
+ sethi %hi(8192 - 1), %g4
+ or %g4, %lo(8192 - 1), %g4
+ and %g2, %g4, %g2
+
+ mov INTRQ_CPU_MONDO_HEAD, %g4
+ stxa %g2, [%g4] ASI_QUEUE
+ membar #Sync
+
+ jmpl %g3, %g0
+ nop
+
+sun4v_cpu_mondo_queue_empty:
+ retry
+
+sun4v_dev_mondo:
+ /* Head offset in %g2, tail offset in %g4. */
+ mov INTRQ_DEVICE_MONDO_HEAD, %g2
+ ldxa [%g2] ASI_QUEUE, %g2
+ mov INTRQ_DEVICE_MONDO_TAIL, %g4
+ ldxa [%g4] ASI_QUEUE, %g4
+ cmp %g2, %g4
+ be,pn %xcc, sun4v_dev_mondo_queue_empty
+ nop
+
+ /* Get &trap_block[smp_processor_id()] into %g3. */
+ ldxa [%g0] ASI_SCRATCHPAD, %g3
+ sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3
+
+ /* Get DEV mondo queue base phys address into %g5. */
+ ldx [%g3 + TRAP_PER_CPU_DEV_MONDO_PA], %g5
+
+ /* Load IVEC into %g3. */
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ add %g2, 0x40, %g2
+
+ /* XXX There can be a full 64-byte block of data here.
+ * XXX This is how we can get at MSI vector data.
+ * XXX Current we do not capture this, but when we do we'll
+ * XXX need to add a 64-byte storage area in the struct ino_bucket
+ * XXX or the struct irq_desc.
+ */
+
+ /* Update queue head pointer, this frees up some registers. */
+ sethi %hi(8192 - 1), %g4
+ or %g4, %lo(8192 - 1), %g4
+ and %g2, %g4, %g2
+
+ mov INTRQ_DEVICE_MONDO_HEAD, %g4
+ stxa %g2, [%g4] ASI_QUEUE
+ membar #Sync
+
+ /* Get &__irq_work[smp_processor_id()] into %g1. */
+ TRAP_LOAD_IRQ_WORK(%g1, %g4)
+
+ /* Get &ivector_table[IVEC] into %g4. */
+ sethi %hi(ivector_table), %g4
+ sllx %g3, 5, %g3
+ or %g4, %lo(ivector_table), %g4
+ add %g4, %g3, %g4
+
+ /* Load IRQ %pil into %g5. */
+ ldub [%g4 + 0x04], %g5
+
+ /* Insert ivector_table[] entry into __irq_work[] queue. */
+ sllx %g5, 2, %g3
+ lduw [%g1 + %g3], %g2 /* g2 = irq_work(cpu, pil) */
+ stw %g2, [%g4 + 0x00] /* bucket->irq_chain = g2 */
+ stw %g4, [%g1 + %g3] /* irq_work(cpu, pil) = bucket */
+
+ /* Signal the interrupt by setting (1 << pil) in %softint. */
+ mov 1, %g2
+ sllx %g2, %g5, %g2
+ wr %g2, 0x0, %set_softint
+
+sun4v_dev_mondo_queue_empty:
+ retry
+
+sun4v_res_mondo:
+ /* Head offset in %g2, tail offset in %g4. */
+ mov INTRQ_RESUM_MONDO_HEAD, %g2
+ ldxa [%g2] ASI_QUEUE, %g2
+ mov INTRQ_RESUM_MONDO_TAIL, %g4
+ ldxa [%g4] ASI_QUEUE, %g4
+ cmp %g2, %g4
+ be,pn %xcc, sun4v_res_mondo_queue_empty
+ nop
+
+ /* Get &trap_block[smp_processor_id()] into %g3. */
+ ldxa [%g0] ASI_SCRATCHPAD, %g3
+ sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3
+
+ /* Get RES mondo queue base phys address into %g5. */
+ ldx [%g3 + TRAP_PER_CPU_RESUM_MONDO_PA], %g5
+
+ /* Get RES kernel buffer base phys address into %g7. */
+ ldx [%g3 + TRAP_PER_CPU_RESUM_KBUF_PA], %g7
+
+ /* If the first word is non-zero, queue is full. */
+ ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g1
+ brnz,pn %g1, sun4v_res_mondo_queue_full
+ nop
+
+ /* Remember this entry's offset in %g1. */
+ mov %g2, %g1
+
+ /* Copy 64-byte queue entry into kernel buffer. */
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+
+ /* Update queue head pointer. */
+ sethi %hi(8192 - 1), %g4
+ or %g4, %lo(8192 - 1), %g4
+ and %g2, %g4, %g2
+
+ mov INTRQ_RESUM_MONDO_HEAD, %g4
+ stxa %g2, [%g4] ASI_QUEUE
+ membar #Sync
+
+ /* Disable interrupts and save register state so we can call
+ * C code. The etrap handling will leave %g4 in %l4 for us
+ * when it's done.
+ */
+ rdpr %pil, %g2
+ wrpr %g0, 15, %pil
+ mov %g1, %g4
+ ba,pt %xcc, etrap_irq
+ rd %pc, %g7
+
+ /* Log the event. */
+ add %sp, PTREGS_OFF, %o0
+ call sun4v_resum_error
+ mov %l4, %o1
+
+ /* Return from trap. */
+ ba,pt %xcc, rtrap_irq
+ nop
+
+sun4v_res_mondo_queue_empty:
+ retry
+
+sun4v_res_mondo_queue_full:
+ /* The queue is full, consolidate our damage by setting
+ * the head equal to the tail. We'll just trap again otherwise.
+ * Call C code to log the event.
+ */
+ mov INTRQ_RESUM_MONDO_HEAD, %g2
+ stxa %g4, [%g2] ASI_QUEUE
+ membar #Sync
+
+ rdpr %pil, %g2
+ wrpr %g0, 15, %pil
+ ba,pt %xcc, etrap_irq
+ rd %pc, %g7
+
+ call sun4v_resum_overflow
+ add %sp, PTREGS_OFF, %o0
+
+ ba,pt %xcc, rtrap_irq
+ nop
+
+sun4v_nonres_mondo:
+ /* Head offset in %g2, tail offset in %g4. */
+ mov INTRQ_NONRESUM_MONDO_HEAD, %g2
+ ldxa [%g2] ASI_QUEUE, %g2
+ mov INTRQ_NONRESUM_MONDO_TAIL, %g4
+ ldxa [%g4] ASI_QUEUE, %g4
+ cmp %g2, %g4
+ be,pn %xcc, sun4v_nonres_mondo_queue_empty
+ nop
+
+ /* Get &trap_block[smp_processor_id()] into %g3. */
+ ldxa [%g0] ASI_SCRATCHPAD, %g3
+ sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3
+
+ /* Get RES mondo queue base phys address into %g5. */
+ ldx [%g3 + TRAP_PER_CPU_NONRESUM_MONDO_PA], %g5
+
+ /* Get RES kernel buffer base phys address into %g7. */
+ ldx [%g3 + TRAP_PER_CPU_NONRESUM_KBUF_PA], %g7
+
+ /* If the first word is non-zero, queue is full. */
+ ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g1
+ brnz,pn %g1, sun4v_nonres_mondo_queue_full
+ nop
+
+ /* Remember this entry's offset in %g1. */
+ mov %g2, %g1
+
+ /* Copy 64-byte queue entry into kernel buffer. */
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+ ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3
+ stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC
+ add %g2, 0x08, %g2
+
+ /* Update queue head pointer. */
+ sethi %hi(8192 - 1), %g4
+ or %g4, %lo(8192 - 1), %g4
+ and %g2, %g4, %g2
+
+ mov INTRQ_NONRESUM_MONDO_HEAD, %g4
+ stxa %g2, [%g4] ASI_QUEUE
+ membar #Sync
+
+ /* Disable interrupts and save register state so we can call
+ * C code. The etrap handling will leave %g4 in %l4 for us
+ * when it's done.
+ */
+ rdpr %pil, %g2
+ wrpr %g0, 15, %pil
+ mov %g1, %g4
+ ba,pt %xcc, etrap_irq
+ rd %pc, %g7
+
+ /* Log the event. */
+ add %sp, PTREGS_OFF, %o0
+ call sun4v_nonresum_error
+ mov %l4, %o1
+
+ /* Return from trap. */
+ ba,pt %xcc, rtrap_irq
+ nop
+
+sun4v_nonres_mondo_queue_empty:
+ retry
+
+sun4v_nonres_mondo_queue_full:
+ /* The queue is full, consolidate our damage by setting
+ * the head equal to the tail. We'll just trap again otherwise.
+ * Call C code to log the event.
+ */
+ mov INTRQ_NONRESUM_MONDO_HEAD, %g2
+ stxa %g4, [%g2] ASI_QUEUE
+ membar #Sync
+
+ rdpr %pil, %g2
+ wrpr %g0, 15, %pil
+ ba,pt %xcc, etrap_irq
+ rd %pc, %g7
+
+ call sun4v_nonresum_overflow
+ add %sp, PTREGS_OFF, %o0
+
+ ba,pt %xcc, rtrap_irq
+ nop
diff --git a/arch/sparc64/kernel/sun4v_tlb_miss.S b/arch/sparc64/kernel/sun4v_tlb_miss.S
new file mode 100644
index 000000000000..ab23ddb7116e
--- /dev/null
+++ b/arch/sparc64/kernel/sun4v_tlb_miss.S
@@ -0,0 +1,421 @@
+/* sun4v_tlb_miss.S: Sun4v TLB miss handlers.
+ *
+ * Copyright (C) 2006 <davem@davemloft.net>
+ */
+
+ .text
+ .align 32
+
+ /* Load ITLB fault information into VADDR and CTX, using BASE. */
+#define LOAD_ITLB_INFO(BASE, VADDR, CTX) \
+ ldx [BASE + HV_FAULT_I_ADDR_OFFSET], VADDR; \
+ ldx [BASE + HV_FAULT_I_CTX_OFFSET], CTX;
+
+ /* Load DTLB fault information into VADDR and CTX, using BASE. */
+#define LOAD_DTLB_INFO(BASE, VADDR, CTX) \
+ ldx [BASE + HV_FAULT_D_ADDR_OFFSET], VADDR; \
+ ldx [BASE + HV_FAULT_D_CTX_OFFSET], CTX;
+
+ /* DEST = (VADDR >> 22)
+ *
+ * Branch to ZERO_CTX_LABEL if context is zero.
+ */
+#define COMPUTE_TAG_TARGET(DEST, VADDR, CTX, ZERO_CTX_LABEL) \
+ srlx VADDR, 22, DEST; \
+ brz,pn CTX, ZERO_CTX_LABEL; \
+ nop;
+
+ /* Create TSB pointer. This is something like:
+ *
+ * index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL;
+ * tsb_base = tsb_reg & ~0x7UL;
+ * tsb_index = ((vaddr >> PAGE_SHIFT) & tsb_mask);
+ * tsb_ptr = tsb_base + (tsb_index * 16);
+ */
+#define COMPUTE_TSB_PTR(TSB_PTR, VADDR, TMP1, TMP2) \
+ and TSB_PTR, 0x7, TMP1; \
+ mov 512, TMP2; \
+ andn TSB_PTR, 0x7, TSB_PTR; \
+ sllx TMP2, TMP1, TMP2; \
+ srlx VADDR, PAGE_SHIFT, TMP1; \
+ sub TMP2, 1, TMP2; \
+ and TMP1, TMP2, TMP1; \
+ sllx TMP1, 4, TMP1; \
+ add TSB_PTR, TMP1, TSB_PTR;
+
+sun4v_itlb_miss:
+ /* Load MMU Miss base into %g2. */
+ ldxa [%g0] ASI_SCRATCHPAD, %g2
+
+ /* Load UTSB reg into %g1. */
+ mov SCRATCHPAD_UTSBREG1, %g1
+ ldxa [%g1] ASI_SCRATCHPAD, %g1
+
+ LOAD_ITLB_INFO(%g2, %g4, %g5)
+ COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_itlb_4v)
+ COMPUTE_TSB_PTR(%g1, %g4, %g3, %g7)
+
+ /* Load TSB tag/pte into %g2/%g3 and compare the tag. */
+ ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2
+ cmp %g2, %g6
+ bne,a,pn %xcc, tsb_miss_page_table_walk
+ mov FAULT_CODE_ITLB, %g3
+ andcc %g3, _PAGE_EXEC_4V, %g0
+ be,a,pn %xcc, tsb_do_fault
+ mov FAULT_CODE_ITLB, %g3
+
+ /* We have a valid entry, make hypervisor call to load
+ * I-TLB and return from trap.
+ *
+ * %g3: PTE
+ * %g4: vaddr
+ */
+sun4v_itlb_load:
+ ldxa [%g0] ASI_SCRATCHPAD, %g6
+ mov %o0, %g1 ! save %o0
+ mov %o1, %g2 ! save %o1
+ mov %o2, %g5 ! save %o2
+ mov %o3, %g7 ! save %o3
+ mov %g4, %o0 ! vaddr
+ ldx [%g6 + HV_FAULT_I_CTX_OFFSET], %o1 ! ctx
+ mov %g3, %o2 ! PTE
+ mov HV_MMU_IMMU, %o3 ! flags
+ ta HV_MMU_MAP_ADDR_TRAP
+ brnz,pn %o0, sun4v_itlb_error
+ mov %g2, %o1 ! restore %o1
+ mov %g1, %o0 ! restore %o0
+ mov %g5, %o2 ! restore %o2
+ mov %g7, %o3 ! restore %o3
+
+ retry
+
+sun4v_dtlb_miss:
+ /* Load MMU Miss base into %g2. */
+ ldxa [%g0] ASI_SCRATCHPAD, %g2
+
+ /* Load UTSB reg into %g1. */
+ mov SCRATCHPAD_UTSBREG1, %g1
+ ldxa [%g1] ASI_SCRATCHPAD, %g1
+
+ LOAD_DTLB_INFO(%g2, %g4, %g5)
+ COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_dtlb_4v)
+ COMPUTE_TSB_PTR(%g1, %g4, %g3, %g7)
+
+ /* Load TSB tag/pte into %g2/%g3 and compare the tag. */
+ ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2
+ cmp %g2, %g6
+ bne,a,pn %xcc, tsb_miss_page_table_walk
+ mov FAULT_CODE_DTLB, %g3
+
+ /* We have a valid entry, make hypervisor call to load
+ * D-TLB and return from trap.
+ *
+ * %g3: PTE
+ * %g4: vaddr
+ */
+sun4v_dtlb_load:
+ ldxa [%g0] ASI_SCRATCHPAD, %g6
+ mov %o0, %g1 ! save %o0
+ mov %o1, %g2 ! save %o1
+ mov %o2, %g5 ! save %o2
+ mov %o3, %g7 ! save %o3
+ mov %g4, %o0 ! vaddr
+ ldx [%g6 + HV_FAULT_D_CTX_OFFSET], %o1 ! ctx
+ mov %g3, %o2 ! PTE
+ mov HV_MMU_DMMU, %o3 ! flags
+ ta HV_MMU_MAP_ADDR_TRAP
+ brnz,pn %o0, sun4v_dtlb_error
+ mov %g2, %o1 ! restore %o1
+ mov %g1, %o0 ! restore %o0
+ mov %g5, %o2 ! restore %o2
+ mov %g7, %o3 ! restore %o3
+
+ retry
+
+sun4v_dtlb_prot:
+ SET_GL(1)
+
+ /* Load MMU Miss base into %g5. */
+ ldxa [%g0] ASI_SCRATCHPAD, %g5
+
+ ldx [%g5 + HV_FAULT_D_ADDR_OFFSET], %g5
+ rdpr %tl, %g1
+ cmp %g1, 1
+ bgu,pn %xcc, winfix_trampoline
+ nop
+ ba,pt %xcc, sparc64_realfault_common
+ mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4
+
+ /* Called from trap table:
+ * %g4: vaddr
+ * %g5: context
+ * %g6: TAG TARGET
+ */
+sun4v_itsb_miss:
+ mov SCRATCHPAD_UTSBREG1, %g1
+ ldxa [%g1] ASI_SCRATCHPAD, %g1
+ brz,pn %g5, kvmap_itlb_4v
+ mov FAULT_CODE_ITLB, %g3
+ ba,a,pt %xcc, sun4v_tsb_miss_common
+
+ /* Called from trap table:
+ * %g4: vaddr
+ * %g5: context
+ * %g6: TAG TARGET
+ */
+sun4v_dtsb_miss:
+ mov SCRATCHPAD_UTSBREG1, %g1
+ ldxa [%g1] ASI_SCRATCHPAD, %g1
+ brz,pn %g5, kvmap_dtlb_4v
+ mov FAULT_CODE_DTLB, %g3
+
+ /* fallthrough */
+
+ /* Create TSB pointer into %g1. This is something like:
+ *
+ * index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL;
+ * tsb_base = tsb_reg & ~0x7UL;
+ * tsb_index = ((vaddr >> PAGE_SHIFT) & tsb_mask);
+ * tsb_ptr = tsb_base + (tsb_index * 16);
+ */
+sun4v_tsb_miss_common:
+ COMPUTE_TSB_PTR(%g1, %g4, %g5, %g7)
+
+ /* Branch directly to page table lookup. We have SCRATCHPAD_MMU_MISS
+ * still in %g2, so it's quite trivial to get at the PGD PHYS value
+ * so we can preload it into %g7.
+ */
+ sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2
+ ba,pt %xcc, tsb_miss_page_table_walk_sun4v_fastpath
+ ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7
+
+sun4v_itlb_error:
+ sethi %hi(sun4v_err_itlb_vaddr), %g1
+ stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)]
+ sethi %hi(sun4v_err_itlb_ctx), %g1
+ ldxa [%g0] ASI_SCRATCHPAD, %g6
+ ldx [%g6 + HV_FAULT_I_CTX_OFFSET], %o1
+ stx %o1, [%g1 + %lo(sun4v_err_itlb_ctx)]
+ sethi %hi(sun4v_err_itlb_pte), %g1
+ stx %g3, [%g1 + %lo(sun4v_err_itlb_pte)]
+ sethi %hi(sun4v_err_itlb_error), %g1
+ stx %o0, [%g1 + %lo(sun4v_err_itlb_error)]
+
+ rdpr %tl, %g4
+ cmp %g4, 1
+ ble,pt %icc, 1f
+ sethi %hi(2f), %g7
+ ba,pt %xcc, etraptl1
+ or %g7, %lo(2f), %g7
+
+1: ba,pt %xcc, etrap
+2: or %g7, %lo(2b), %g7
+ call sun4v_itlb_error_report
+ add %sp, PTREGS_OFF, %o0
+
+ /* NOTREACHED */
+
+sun4v_dtlb_error:
+ sethi %hi(sun4v_err_dtlb_vaddr), %g1
+ stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)]
+ sethi %hi(sun4v_err_dtlb_ctx), %g1
+ ldxa [%g0] ASI_SCRATCHPAD, %g6
+ ldx [%g6 + HV_FAULT_D_CTX_OFFSET], %o1
+ stx %o1, [%g1 + %lo(sun4v_err_dtlb_ctx)]
+ sethi %hi(sun4v_err_dtlb_pte), %g1
+ stx %g3, [%g1 + %lo(sun4v_err_dtlb_pte)]
+ sethi %hi(sun4v_err_dtlb_error), %g1
+ stx %o0, [%g1 + %lo(sun4v_err_dtlb_error)]
+
+ rdpr %tl, %g4
+ cmp %g4, 1
+ ble,pt %icc, 1f
+ sethi %hi(2f), %g7
+ ba,pt %xcc, etraptl1
+ or %g7, %lo(2f), %g7
+
+1: ba,pt %xcc, etrap
+2: or %g7, %lo(2b), %g7
+ call sun4v_dtlb_error_report
+ add %sp, PTREGS_OFF, %o0
+
+ /* NOTREACHED */
+
+ /* Instruction Access Exception, tl0. */
+sun4v_iacc:
+ ldxa [%g0] ASI_SCRATCHPAD, %g2
+ ldx [%g2 + HV_FAULT_I_TYPE_OFFSET], %g3
+ ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4
+ ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5
+ sllx %g3, 16, %g3
+ or %g5, %g3, %g5
+ ba,pt %xcc, etrap
+ rd %pc, %g7
+ mov %l4, %o1
+ mov %l5, %o2
+ call sun4v_insn_access_exception
+ add %sp, PTREGS_OFF, %o0
+ ba,a,pt %xcc, rtrap_clr_l6
+
+ /* Instruction Access Exception, tl1. */
+sun4v_iacc_tl1:
+ ldxa [%g0] ASI_SCRATCHPAD, %g2
+ ldx [%g2 + HV_FAULT_I_TYPE_OFFSET], %g3
+ ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4
+ ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5
+ sllx %g3, 16, %g3
+ or %g5, %g3, %g5
+ ba,pt %xcc, etraptl1
+ rd %pc, %g7
+ mov %l4, %o1
+ mov %l5, %o2
+ call sun4v_insn_access_exception_tl1
+ add %sp, PTREGS_OFF, %o0
+ ba,a,pt %xcc, rtrap_clr_l6
+
+ /* Data Access Exception, tl0. */
+sun4v_dacc:
+ ldxa [%g0] ASI_SCRATCHPAD, %g2
+ ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
+ ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+ ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5
+ sllx %g3, 16, %g3
+ or %g5, %g3, %g5
+ ba,pt %xcc, etrap
+ rd %pc, %g7
+ mov %l4, %o1
+ mov %l5, %o2
+ call sun4v_data_access_exception
+ add %sp, PTREGS_OFF, %o0
+ ba,a,pt %xcc, rtrap_clr_l6
+
+ /* Data Access Exception, tl1. */
+sun4v_dacc_tl1:
+ ldxa [%g0] ASI_SCRATCHPAD, %g2
+ ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
+ ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+ ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5
+ sllx %g3, 16, %g3
+ or %g5, %g3, %g5
+ ba,pt %xcc, etraptl1
+ rd %pc, %g7
+ mov %l4, %o1
+ mov %l5, %o2
+ call sun4v_data_access_exception_tl1
+ add %sp, PTREGS_OFF, %o0
+ ba,a,pt %xcc, rtrap_clr_l6
+
+ /* Memory Address Unaligned. */
+sun4v_mna:
+ /* Window fixup? */
+ rdpr %tl, %g2
+ cmp %g2, 1
+ ble,pt %icc, 1f
+ nop
+
+ SET_GL(1)
+ ldxa [%g0] ASI_SCRATCHPAD, %g2
+ ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g5
+ mov HV_FAULT_TYPE_UNALIGNED, %g3
+ ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g4
+ sllx %g3, 16, %g3
+ or %g4, %g3, %g4
+ ba,pt %xcc, winfix_mna
+ rdpr %tpc, %g3
+ /* not reached */
+
+1: ldxa [%g0] ASI_SCRATCHPAD, %g2
+ mov HV_FAULT_TYPE_UNALIGNED, %g3
+ ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+ ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5
+ sllx %g3, 16, %g3
+ or %g5, %g3, %g5
+
+ ba,pt %xcc, etrap
+ rd %pc, %g7
+ mov %l4, %o1
+ mov %l5, %o2
+ call sun4v_do_mna
+ add %sp, PTREGS_OFF, %o0
+ ba,a,pt %xcc, rtrap_clr_l6
+
+ /* Privileged Action. */
+sun4v_privact:
+ ba,pt %xcc, etrap
+ rd %pc, %g7
+ call do_privact
+ add %sp, PTREGS_OFF, %o0
+ ba,a,pt %xcc, rtrap_clr_l6
+
+ /* Unaligned ldd float, tl0. */
+sun4v_lddfmna:
+ ldxa [%g0] ASI_SCRATCHPAD, %g2
+ ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
+ ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+ ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5
+ sllx %g3, 16, %g3
+ or %g5, %g3, %g5
+ ba,pt %xcc, etrap
+ rd %pc, %g7
+ mov %l4, %o1
+ mov %l5, %o2
+ call handle_lddfmna
+ add %sp, PTREGS_OFF, %o0
+ ba,a,pt %xcc, rtrap_clr_l6
+
+ /* Unaligned std float, tl0. */
+sun4v_stdfmna:
+ ldxa [%g0] ASI_SCRATCHPAD, %g2
+ ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
+ ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+ ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5
+ sllx %g3, 16, %g3
+ or %g5, %g3, %g5
+ ba,pt %xcc, etrap
+ rd %pc, %g7
+ mov %l4, %o1
+ mov %l5, %o2
+ call handle_stdfmna
+ add %sp, PTREGS_OFF, %o0
+ ba,a,pt %xcc, rtrap_clr_l6
+
+#define BRANCH_ALWAYS 0x10680000
+#define NOP 0x01000000
+#define SUN4V_DO_PATCH(OLD, NEW) \
+ sethi %hi(NEW), %g1; \
+ or %g1, %lo(NEW), %g1; \
+ sethi %hi(OLD), %g2; \
+ or %g2, %lo(OLD), %g2; \
+ sub %g1, %g2, %g1; \
+ sethi %hi(BRANCH_ALWAYS), %g3; \
+ sll %g1, 11, %g1; \
+ srl %g1, 11 + 2, %g1; \
+ or %g3, %lo(BRANCH_ALWAYS), %g3; \
+ or %g3, %g1, %g3; \
+ stw %g3, [%g2]; \
+ sethi %hi(NOP), %g3; \
+ or %g3, %lo(NOP), %g3; \
+ stw %g3, [%g2 + 0x4]; \
+ flush %g2;
+
+ .globl sun4v_patch_tlb_handlers
+ .type sun4v_patch_tlb_handlers,#function
+sun4v_patch_tlb_handlers:
+ SUN4V_DO_PATCH(tl0_iamiss, sun4v_itlb_miss)
+ SUN4V_DO_PATCH(tl1_iamiss, sun4v_itlb_miss)
+ SUN4V_DO_PATCH(tl0_damiss, sun4v_dtlb_miss)
+ SUN4V_DO_PATCH(tl1_damiss, sun4v_dtlb_miss)
+ SUN4V_DO_PATCH(tl0_daprot, sun4v_dtlb_prot)
+ SUN4V_DO_PATCH(tl1_daprot, sun4v_dtlb_prot)
+ SUN4V_DO_PATCH(tl0_iax, sun4v_iacc)
+ SUN4V_DO_PATCH(tl1_iax, sun4v_iacc_tl1)
+ SUN4V_DO_PATCH(tl0_dax, sun4v_dacc)
+ SUN4V_DO_PATCH(tl1_dax, sun4v_dacc_tl1)
+ SUN4V_DO_PATCH(tl0_mna, sun4v_mna)
+ SUN4V_DO_PATCH(tl1_mna, sun4v_mna)
+ SUN4V_DO_PATCH(tl0_lddfmna, sun4v_lddfmna)
+ SUN4V_DO_PATCH(tl0_stdfmna, sun4v_stdfmna)
+ SUN4V_DO_PATCH(tl0_privact, sun4v_privact)
+ retl
+ nop
+ .size sun4v_patch_tlb_handlers,.-sun4v_patch_tlb_handlers
diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
index 5f8c822a2b4a..7a869138c37f 100644
--- a/arch/sparc64/kernel/sys_sparc.c
+++ b/arch/sparc64/kernel/sys_sparc.c
@@ -25,25 +25,93 @@
#include <linux/syscalls.h>
#include <linux/ipc.h>
#include <linux/personality.h>
+#include <linux/random.h>
#include <asm/uaccess.h>
#include <asm/ipc.h>
#include <asm/utrap.h>
#include <asm/perfctr.h>
+#include <asm/a.out.h>
/* #define DEBUG_UNIMP_SYSCALL */
-/* XXX Make this per-binary type, this way we can detect the type of
- * XXX a binary. Every Sparc executable calls this very early on.
- */
asmlinkage unsigned long sys_getpagesize(void)
{
return PAGE_SIZE;
}
-#define COLOUR_ALIGN(addr,pgoff) \
- ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \
- (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1)))
+#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL))
+#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL))
+
+/* Does addr --> addr+len fall within 4GB of the VA-space hole or
+ * overflow past the end of the 64-bit address space?
+ */
+static inline int invalid_64bit_range(unsigned long addr, unsigned long len)
+{
+ unsigned long va_exclude_start, va_exclude_end;
+
+ va_exclude_start = VA_EXCLUDE_START;
+ va_exclude_end = VA_EXCLUDE_END;
+
+ if (unlikely(len >= va_exclude_start))
+ return 1;
+
+ if (unlikely((addr + len) < addr))
+ return 1;
+
+ if (unlikely((addr >= va_exclude_start && addr < va_exclude_end) ||
+ ((addr + len) >= va_exclude_start &&
+ (addr + len) < va_exclude_end)))
+ return 1;
+
+ return 0;
+}
+
+/* Does start,end straddle the VA-space hole? */
+static inline int straddles_64bit_va_hole(unsigned long start, unsigned long end)
+{
+ unsigned long va_exclude_start, va_exclude_end;
+
+ va_exclude_start = VA_EXCLUDE_START;
+ va_exclude_end = VA_EXCLUDE_END;
+
+ if (likely(start < va_exclude_start && end < va_exclude_start))
+ return 0;
+
+ if (likely(start >= va_exclude_end && end >= va_exclude_end))
+ return 0;
+
+ return 1;
+}
+
+/* These functions differ from the default implementations in
+ * mm/mmap.c in two ways:
+ *
+ * 1) For file backed MAP_SHARED mmap()'s we D-cache color align,
+ * for fixed such mappings we just validate what the user gave us.
+ * 2) For 64-bit tasks we avoid mapping anything within 4GB of
+ * the spitfire/niagara VA-hole.
+ */
+
+static inline unsigned long COLOUR_ALIGN(unsigned long addr,
+ unsigned long pgoff)
+{
+ unsigned long base = (addr+SHMLBA-1)&~(SHMLBA-1);
+ unsigned long off = (pgoff<<PAGE_SHIFT) & (SHMLBA-1);
+
+ return base + off;
+}
+
+static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr,
+ unsigned long pgoff)
+{
+ unsigned long base = addr & ~(SHMLBA-1);
+ unsigned long off = (pgoff<<PAGE_SHIFT) & (SHMLBA-1);
+
+ if (base + off <= addr)
+ return base + off;
+ return base - off;
+}
unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
{
@@ -64,8 +132,8 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
}
if (test_thread_flag(TIF_32BIT))
- task_size = 0xf0000000UL;
- if (len > task_size || len > -PAGE_OFFSET)
+ task_size = STACK_TOP32;
+ if (unlikely(len > task_size || len >= VA_EXCLUDE_START))
return -ENOMEM;
do_color_align = 0;
@@ -84,11 +152,12 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
return addr;
}
- if (len <= mm->cached_hole_size) {
+ if (len > mm->cached_hole_size) {
+ start_addr = addr = mm->free_area_cache;
+ } else {
+ start_addr = addr = TASK_UNMAPPED_BASE;
mm->cached_hole_size = 0;
- mm->free_area_cache = TASK_UNMAPPED_BASE;
}
- start_addr = addr = mm->free_area_cache;
task_size -= len;
@@ -100,11 +169,12 @@ full_search:
for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
/* At this point: (!vma || addr < vma->vm_end). */
- if (addr < PAGE_OFFSET && -PAGE_OFFSET - len < addr) {
- addr = PAGE_OFFSET;
- vma = find_vma(mm, PAGE_OFFSET);
+ if (addr < VA_EXCLUDE_START &&
+ (addr + len) >= VA_EXCLUDE_START) {
+ addr = VA_EXCLUDE_END;
+ vma = find_vma(mm, VA_EXCLUDE_END);
}
- if (task_size < addr) {
+ if (unlikely(task_size < addr)) {
if (start_addr != TASK_UNMAPPED_BASE) {
start_addr = addr = TASK_UNMAPPED_BASE;
mm->cached_hole_size = 0;
@@ -112,7 +182,7 @@ full_search:
}
return -ENOMEM;
}
- if (!vma || addr + len <= vma->vm_start) {
+ if (likely(!vma || addr + len <= vma->vm_start)) {
/*
* Remember the place where we stopped the search:
*/
@@ -128,6 +198,121 @@ full_search:
}
}
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ const unsigned long len, const unsigned long pgoff,
+ const unsigned long flags)
+{
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = current->mm;
+ unsigned long task_size = STACK_TOP32;
+ unsigned long addr = addr0;
+ int do_color_align;
+
+ /* This should only ever run for 32-bit processes. */
+ BUG_ON(!test_thread_flag(TIF_32BIT));
+
+ if (flags & MAP_FIXED) {
+ /* We do not accept a shared mapping if it would violate
+ * cache aliasing constraints.
+ */
+ if ((flags & MAP_SHARED) &&
+ ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)))
+ return -EINVAL;
+ return addr;
+ }
+
+ if (unlikely(len > task_size))
+ return -ENOMEM;
+
+ do_color_align = 0;
+ if (filp || (flags & MAP_SHARED))
+ do_color_align = 1;
+
+ /* requesting a specific address */
+ if (addr) {
+ if (do_color_align)
+ addr = COLOUR_ALIGN(addr, pgoff);
+ else
+ addr = PAGE_ALIGN(addr);
+
+ vma = find_vma(mm, addr);
+ if (task_size - len >= addr &&
+ (!vma || addr + len <= vma->vm_start))
+ return addr;
+ }
+
+ /* check if free_area_cache is useful for us */
+ if (len <= mm->cached_hole_size) {
+ mm->cached_hole_size = 0;
+ mm->free_area_cache = mm->mmap_base;
+ }
+
+ /* either no address requested or can't fit in requested address hole */
+ addr = mm->free_area_cache;
+ if (do_color_align) {
+ unsigned long base = COLOUR_ALIGN_DOWN(addr-len, pgoff);
+
+ addr = base + len;
+ }
+
+ /* make sure it can fit in the remaining address space */
+ if (likely(addr > len)) {
+ vma = find_vma(mm, addr-len);
+ if (!vma || addr <= vma->vm_start) {
+ /* remember the address as a hint for next time */
+ return (mm->free_area_cache = addr-len);
+ }
+ }
+
+ if (unlikely(mm->mmap_base < len))
+ goto bottomup;
+
+ addr = mm->mmap_base-len;
+ if (do_color_align)
+ addr = COLOUR_ALIGN_DOWN(addr, pgoff);
+
+ do {
+ /*
+ * Lookup failure means no vma is above this address,
+ * else if new region fits below vma->vm_start,
+ * return with success:
+ */
+ vma = find_vma(mm, addr);
+ if (likely(!vma || addr+len <= vma->vm_start)) {
+ /* remember the address as a hint for next time */
+ return (mm->free_area_cache = addr);
+ }
+
+ /* remember the largest hole we saw so far */
+ if (addr + mm->cached_hole_size < vma->vm_start)
+ mm->cached_hole_size = vma->vm_start - addr;
+
+ /* try just below the current vma->vm_start */
+ addr = vma->vm_start-len;
+ if (do_color_align)
+ addr = COLOUR_ALIGN_DOWN(addr, pgoff);
+ } while (likely(len < vma->vm_start));
+
+bottomup:
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ mm->cached_hole_size = ~0UL;
+ mm->free_area_cache = TASK_UNMAPPED_BASE;
+ addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+ /*
+ * Restore the topdown base:
+ */
+ mm->free_area_cache = mm->mmap_base;
+ mm->cached_hole_size = ~0UL;
+
+ return addr;
+}
+
/* Try to align mapping such that we align it as much as possible. */
unsigned long get_fb_unmapped_area(struct file *filp, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags)
{
@@ -171,15 +356,57 @@ unsigned long get_fb_unmapped_area(struct file *filp, unsigned long orig_addr, u
return addr;
}
+/* Essentially the same as PowerPC... */
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+ unsigned long random_factor = 0UL;
+
+ if (current->flags & PF_RANDOMIZE) {
+ random_factor = get_random_int();
+ if (test_thread_flag(TIF_32BIT))
+ random_factor &= ((1 * 1024 * 1024) - 1);
+ else
+ random_factor = ((random_factor << PAGE_SHIFT) &
+ 0xffffffffUL);
+ }
+
+ /*
+ * Fall back to the standard layout if the personality
+ * bit is set, or if the expected stack growth is unlimited:
+ */
+ if (!test_thread_flag(TIF_32BIT) ||
+ (current->personality & ADDR_COMPAT_LAYOUT) ||
+ current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY ||
+ sysctl_legacy_va_layout) {
+ mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ mm->unmap_area = arch_unmap_area;
+ } else {
+ /* We know it's 32-bit */
+ unsigned long task_size = STACK_TOP32;
+ unsigned long gap;
+
+ gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+ if (gap < 128 * 1024 * 1024)
+ gap = 128 * 1024 * 1024;
+ if (gap > (task_size / 6 * 5))
+ gap = (task_size / 6 * 5);
+
+ mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor);
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ mm->unmap_area = arch_unmap_area_topdown;
+ }
+}
+
asmlinkage unsigned long sparc_brk(unsigned long brk)
{
/* People could try to be nasty and use ta 0x6d in 32bit programs */
- if (test_thread_flag(TIF_32BIT) &&
- brk >= 0xf0000000UL)
+ if (test_thread_flag(TIF_32BIT) && brk >= STACK_TOP32)
return current->mm->brk;
- if ((current->mm->brk & PAGE_OFFSET) != (brk & PAGE_OFFSET))
+ if (unlikely(straddles_64bit_va_hole(current->mm->brk, brk)))
return current->mm->brk;
+
return sys_brk(brk);
}
@@ -340,13 +567,16 @@ asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len,
retval = -EINVAL;
if (test_thread_flag(TIF_32BIT)) {
- if (len > 0xf0000000UL ||
- ((flags & MAP_FIXED) && addr > 0xf0000000UL - len))
+ if (len >= STACK_TOP32)
+ goto out_putf;
+
+ if ((flags & MAP_FIXED) && addr > STACK_TOP32 - len)
goto out_putf;
} else {
- if (len > -PAGE_OFFSET ||
- ((flags & MAP_FIXED) &&
- addr < PAGE_OFFSET && addr + len > -PAGE_OFFSET))
+ if (len >= VA_EXCLUDE_START)
+ goto out_putf;
+
+ if ((flags & MAP_FIXED) && invalid_64bit_range(addr, len))
goto out_putf;
}
@@ -365,9 +595,9 @@ asmlinkage long sys64_munmap(unsigned long addr, size_t len)
{
long ret;
- if (len > -PAGE_OFFSET ||
- (addr < PAGE_OFFSET && addr + len > -PAGE_OFFSET))
+ if (invalid_64bit_range(addr, len))
return -EINVAL;
+
down_write(&current->mm->mmap_sem);
ret = do_munmap(current->mm, addr, len);
up_write(&current->mm->mmap_sem);
@@ -384,18 +614,19 @@ asmlinkage unsigned long sys64_mremap(unsigned long addr,
{
struct vm_area_struct *vma;
unsigned long ret = -EINVAL;
+
if (test_thread_flag(TIF_32BIT))
goto out;
- if (old_len > -PAGE_OFFSET || new_len > -PAGE_OFFSET)
+ if (unlikely(new_len >= VA_EXCLUDE_START))
goto out;
- if (addr < PAGE_OFFSET && addr + old_len > -PAGE_OFFSET)
+ if (unlikely(invalid_64bit_range(addr, old_len)))
goto out;
+
down_write(&current->mm->mmap_sem);
if (flags & MREMAP_FIXED) {
- if (new_addr < PAGE_OFFSET &&
- new_addr + new_len > -PAGE_OFFSET)
+ if (invalid_64bit_range(new_addr, new_len))
goto out_sem;
- } else if (addr < PAGE_OFFSET && addr + new_len > -PAGE_OFFSET) {
+ } else if (invalid_64bit_range(addr, new_len)) {
unsigned long map_flags = 0;
struct file *file = NULL;
@@ -554,12 +785,10 @@ asmlinkage long sys_utrap_install(utrap_entry_t type,
}
if (!current_thread_info()->utraps) {
current_thread_info()->utraps =
- kmalloc((UT_TRAP_INSTRUCTION_31+1)*sizeof(long), GFP_KERNEL);
+ kzalloc((UT_TRAP_INSTRUCTION_31+1)*sizeof(long), GFP_KERNEL);
if (!current_thread_info()->utraps)
return -ENOMEM;
current_thread_info()->utraps[0] = 1;
- memset(current_thread_info()->utraps+1, 0,
- UT_TRAP_INSTRUCTION_31*sizeof(long));
} else {
if ((utrap_handler_t)current_thread_info()->utraps[type] != new_p &&
current_thread_info()->utraps[0] > 1) {
diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c
index 417727bd87ba..0e41df024489 100644
--- a/arch/sparc64/kernel/sys_sparc32.c
+++ b/arch/sparc64/kernel/sys_sparc32.c
@@ -62,6 +62,7 @@
#include <asm/fpumacro.h>
#include <asm/semaphore.h>
#include <asm/mmu_context.h>
+#include <asm/a.out.h>
asmlinkage long sys32_chown16(const char __user * filename, u16 user, u16 group)
{
@@ -1039,15 +1040,15 @@ asmlinkage unsigned long sys32_mremap(unsigned long addr,
unsigned long ret = -EINVAL;
unsigned long new_addr = __new_addr;
- if (old_len > 0xf0000000UL || new_len > 0xf0000000UL)
+ if (old_len > STACK_TOP32 || new_len > STACK_TOP32)
goto out;
- if (addr > 0xf0000000UL - old_len)
+ if (addr > STACK_TOP32 - old_len)
goto out;
down_write(&current->mm->mmap_sem);
if (flags & MREMAP_FIXED) {
- if (new_addr > 0xf0000000UL - new_len)
+ if (new_addr > STACK_TOP32 - new_len)
goto out_sem;
- } else if (addr > 0xf0000000UL - new_len) {
+ } else if (addr > STACK_TOP32 - new_len) {
unsigned long map_flags = 0;
struct file *file = NULL;
diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index a22930d62adf..7d61f1bfd3d3 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -30,6 +30,8 @@
#include <linux/cpufreq.h>
#include <linux/percpu.h>
#include <linux/profile.h>
+#include <linux/miscdevice.h>
+#include <linux/rtc.h>
#include <asm/oplib.h>
#include <asm/mostek.h>
@@ -45,6 +47,7 @@
#include <asm/smp.h>
#include <asm/sections.h>
#include <asm/cpudata.h>
+#include <asm/uaccess.h>
DEFINE_SPINLOCK(mostek_lock);
DEFINE_SPINLOCK(rtc_lock);
@@ -193,16 +196,22 @@ struct sparc64_tick_ops *tick_ops __read_mostly = &tick_operations;
static void stick_init_tick(unsigned long offset)
{
- tick_disable_protection();
-
- /* Let the user get at STICK too. */
- __asm__ __volatile__(
- " rd %%asr24, %%g2\n"
- " andn %%g2, %0, %%g2\n"
- " wr %%g2, 0, %%asr24"
- : /* no outputs */
- : "r" (TICK_PRIV_BIT)
- : "g1", "g2");
+ /* Writes to the %tick and %stick register are not
+ * allowed on sun4v. The Hypervisor controls that
+ * bit, per-strand.
+ */
+ if (tlb_type != hypervisor) {
+ tick_disable_protection();
+
+ /* Let the user get at STICK too. */
+ __asm__ __volatile__(
+ " rd %%asr24, %%g2\n"
+ " andn %%g2, %0, %%g2\n"
+ " wr %%g2, 0, %%asr24"
+ : /* no outputs */
+ : "r" (TICK_PRIV_BIT)
+ : "g1", "g2");
+ }
__asm__ __volatile__(
" rd %%asr24, %%g1\n"
@@ -683,6 +692,83 @@ static void __init set_system_time(void)
}
}
+/* davem suggests we keep this within the 4M locked kernel image */
+static u32 starfire_get_time(void)
+{
+ static char obp_gettod[32];
+ static u32 unix_tod;
+
+ sprintf(obp_gettod, "h# %08x unix-gettod",
+ (unsigned int) (long) &unix_tod);
+ prom_feval(obp_gettod);
+
+ return unix_tod;
+}
+
+static int starfire_set_time(u32 val)
+{
+ /* Do nothing, time is set using the service processor
+ * console on this platform.
+ */
+ return 0;
+}
+
+static u32 hypervisor_get_time(void)
+{
+ register unsigned long func asm("%o5");
+ register unsigned long arg0 asm("%o0");
+ register unsigned long arg1 asm("%o1");
+ int retries = 10000;
+
+retry:
+ func = HV_FAST_TOD_GET;
+ arg0 = 0;
+ arg1 = 0;
+ __asm__ __volatile__("ta %6"
+ : "=&r" (func), "=&r" (arg0), "=&r" (arg1)
+ : "0" (func), "1" (arg0), "2" (arg1),
+ "i" (HV_FAST_TRAP));
+ if (arg0 == HV_EOK)
+ return arg1;
+ if (arg0 == HV_EWOULDBLOCK) {
+ if (--retries > 0) {
+ udelay(100);
+ goto retry;
+ }
+ printk(KERN_WARNING "SUN4V: tod_get() timed out.\n");
+ return 0;
+ }
+ printk(KERN_WARNING "SUN4V: tod_get() not supported.\n");
+ return 0;
+}
+
+static int hypervisor_set_time(u32 secs)
+{
+ register unsigned long func asm("%o5");
+ register unsigned long arg0 asm("%o0");
+ int retries = 10000;
+
+retry:
+ func = HV_FAST_TOD_SET;
+ arg0 = secs;
+ __asm__ __volatile__("ta %4"
+ : "=&r" (func), "=&r" (arg0)
+ : "0" (func), "1" (arg0),
+ "i" (HV_FAST_TRAP));
+ if (arg0 == HV_EOK)
+ return 0;
+ if (arg0 == HV_EWOULDBLOCK) {
+ if (--retries > 0) {
+ udelay(100);
+ goto retry;
+ }
+ printk(KERN_WARNING "SUN4V: tod_set() timed out.\n");
+ return -EAGAIN;
+ }
+ printk(KERN_WARNING "SUN4V: tod_set() not supported.\n");
+ return -EOPNOTSUPP;
+}
+
void __init clock_probe(void)
{
struct linux_prom_registers clk_reg[2];
@@ -702,14 +788,14 @@ void __init clock_probe(void)
if (this_is_starfire) {
- /* davem suggests we keep this within the 4M locked kernel image */
- static char obp_gettod[256];
- static u32 unix_tod;
-
- sprintf(obp_gettod, "h# %08x unix-gettod",
- (unsigned int) (long) &unix_tod);
- prom_feval(obp_gettod);
- xtime.tv_sec = unix_tod;
+ xtime.tv_sec = starfire_get_time();
+ xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+ set_normalized_timespec(&wall_to_monotonic,
+ -xtime.tv_sec, -xtime.tv_nsec);
+ return;
+ }
+ if (tlb_type == hypervisor) {
+ xtime.tv_sec = hypervisor_get_time();
xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);
@@ -981,11 +1067,10 @@ static void sparc64_start_timers(irqreturn_t (*cfunc)(int, void *, struct pt_reg
}
struct freq_table {
- unsigned long udelay_val_ref;
unsigned long clock_tick_ref;
unsigned int ref_freq;
};
-static DEFINE_PER_CPU(struct freq_table, sparc64_freq_table) = { 0, 0, 0 };
+static DEFINE_PER_CPU(struct freq_table, sparc64_freq_table) = { 0, 0 };
unsigned long sparc64_get_clock_tick(unsigned int cpu)
{
@@ -1007,16 +1092,11 @@ static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val
if (!ft->ref_freq) {
ft->ref_freq = freq->old;
- ft->udelay_val_ref = cpu_data(cpu).udelay_val;
ft->clock_tick_ref = cpu_data(cpu).clock_tick;
}
if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
(val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
(val == CPUFREQ_RESUMECHANGE)) {
- cpu_data(cpu).udelay_val =
- cpufreq_scale(ft->udelay_val_ref,
- ft->ref_freq,
- freq->new);
cpu_data(cpu).clock_tick =
cpufreq_scale(ft->clock_tick_ref,
ft->ref_freq,
@@ -1179,3 +1259,246 @@ static int set_rtc_mmss(unsigned long nowtime)
return retval;
}
}
+
+#define RTC_IS_OPEN 0x01 /* means /dev/rtc is in use */
+static unsigned char mini_rtc_status; /* bitmapped status byte. */
+
+/* months start at 0 now */
+static unsigned char days_in_mo[] =
+{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+
+#define FEBRUARY 2
+#define STARTOFTIME 1970
+#define SECDAY 86400L
+#define SECYR (SECDAY * 365)
+#define leapyear(year) ((year) % 4 == 0 && \
+ ((year) % 100 != 0 || (year) % 400 == 0))
+#define days_in_year(a) (leapyear(a) ? 366 : 365)
+#define days_in_month(a) (month_days[(a) - 1])
+
+static int month_days[12] = {
+ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
+};
+
+/*
+ * This only works for the Gregorian calendar - i.e. after 1752 (in the UK)
+ */
+static void GregorianDay(struct rtc_time * tm)
+{
+ int leapsToDate;
+ int lastYear;
+ int day;
+ int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
+
+ lastYear = tm->tm_year - 1;
+
+ /*
+ * Number of leap corrections to apply up to end of last year
+ */
+ leapsToDate = lastYear / 4 - lastYear / 100 + lastYear / 400;
+
+ /*
+ * This year is a leap year if it is divisible by 4 except when it is
+ * divisible by 100 unless it is divisible by 400
+ *
+ * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 was
+ */
+ day = tm->tm_mon > 2 && leapyear(tm->tm_year);
+
+ day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] +
+ tm->tm_mday;
+
+ tm->tm_wday = day % 7;
+}
+
+static void to_tm(int tim, struct rtc_time *tm)
+{
+ register int i;
+ register long hms, day;
+
+ day = tim / SECDAY;
+ hms = tim % SECDAY;
+
+ /* Hours, minutes, seconds are easy */
+ tm->tm_hour = hms / 3600;
+ tm->tm_min = (hms % 3600) / 60;
+ tm->tm_sec = (hms % 3600) % 60;
+
+ /* Number of years in days */
+ for (i = STARTOFTIME; day >= days_in_year(i); i++)
+ day -= days_in_year(i);
+ tm->tm_year = i;
+
+ /* Number of months in days left */
+ if (leapyear(tm->tm_year))
+ days_in_month(FEBRUARY) = 29;
+ for (i = 1; day >= days_in_month(i); i++)
+ day -= days_in_month(i);
+ days_in_month(FEBRUARY) = 28;
+ tm->tm_mon = i;
+
+ /* Days are what is left over (+1) from all that. */
+ tm->tm_mday = day + 1;
+
+ /*
+ * Determine the day of week
+ */
+ GregorianDay(tm);
+}
+
+/* Both Starfire and SUN4V give us seconds since Jan 1st, 1970,
+ * aka Unix time. So we have to convert to/from rtc_time.
+ */
+static inline void mini_get_rtc_time(struct rtc_time *time)
+{
+ unsigned long flags;
+ u32 seconds;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ seconds = 0;
+ if (this_is_starfire)
+ seconds = starfire_get_time();
+ else if (tlb_type == hypervisor)
+ seconds = hypervisor_get_time();
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
+ to_tm(seconds, time);
+ time->tm_year -= 1900;
+ time->tm_mon -= 1;
+}
+
+static inline int mini_set_rtc_time(struct rtc_time *time)
+{
+ u32 seconds = mktime(time->tm_year + 1900, time->tm_mon + 1,
+ time->tm_mday, time->tm_hour,
+ time->tm_min, time->tm_sec);
+ unsigned long flags;
+ int err;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ err = -ENODEV;
+ if (this_is_starfire)
+ err = starfire_set_time(seconds);
+ else if (tlb_type == hypervisor)
+ err = hypervisor_set_time(seconds);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
+ return err;
+}
+
+static int mini_rtc_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct rtc_time wtime;
+ void __user *argp = (void __user *)arg;
+
+ switch (cmd) {
+
+ case RTC_PLL_GET:
+ return -EINVAL;
+
+ case RTC_PLL_SET:
+ return -EINVAL;
+
+ case RTC_UIE_OFF: /* disable ints from RTC updates. */
+ return 0;
+
+ case RTC_UIE_ON: /* enable ints for RTC updates. */
+ return -EINVAL;
+
+ case RTC_RD_TIME: /* Read the time/date from RTC */
+ /* this doesn't get week-day, who cares */
+ memset(&wtime, 0, sizeof(wtime));
+ mini_get_rtc_time(&wtime);
+
+ return copy_to_user(argp, &wtime, sizeof(wtime)) ? -EFAULT : 0;
+
+ case RTC_SET_TIME: /* Set the RTC */
+ {
+ int year;
+ unsigned char leap_yr;
+
+ if (!capable(CAP_SYS_TIME))
+ return -EACCES;
+
+ if (copy_from_user(&wtime, argp, sizeof(wtime)))
+ return -EFAULT;
+
+ year = wtime.tm_year + 1900;
+ leap_yr = ((!(year % 4) && (year % 100)) ||
+ !(year % 400));
+
+ if ((wtime.tm_mon < 0 || wtime.tm_mon > 11) || (wtime.tm_mday < 1))
+ return -EINVAL;
+
+ if (wtime.tm_mday < 0 || wtime.tm_mday >
+ (days_in_mo[wtime.tm_mon] + ((wtime.tm_mon == 1) && leap_yr)))
+ return -EINVAL;
+
+ if (wtime.tm_hour < 0 || wtime.tm_hour >= 24 ||
+ wtime.tm_min < 0 || wtime.tm_min >= 60 ||
+ wtime.tm_sec < 0 || wtime.tm_sec >= 60)
+ return -EINVAL;
+
+ return mini_set_rtc_time(&wtime);
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int mini_rtc_open(struct inode *inode, struct file *file)
+{
+ if (mini_rtc_status & RTC_IS_OPEN)
+ return -EBUSY;
+
+ mini_rtc_status |= RTC_IS_OPEN;
+
+ return 0;
+}
+
+static int mini_rtc_release(struct inode *inode, struct file *file)
+{
+ mini_rtc_status &= ~RTC_IS_OPEN;
+ return 0;
+}
+
+
+static struct file_operations mini_rtc_fops = {
+ .owner = THIS_MODULE,
+ .ioctl = mini_rtc_ioctl,
+ .open = mini_rtc_open,
+ .release = mini_rtc_release,
+};
+
+static struct miscdevice rtc_mini_dev =
+{
+ .minor = RTC_MINOR,
+ .name = "rtc",
+ .fops = &mini_rtc_fops,
+};
+
+static int __init rtc_mini_init(void)
+{
+ int retval;
+
+ if (tlb_type != hypervisor && !this_is_starfire)
+ return -ENODEV;
+
+ printk(KERN_INFO "Mini RTC Driver\n");
+
+ retval = misc_register(&rtc_mini_dev);
+ if (retval < 0)
+ return retval;
+
+ return 0;
+}
+
+static void __exit rtc_mini_exit(void)
+{
+ misc_deregister(&rtc_mini_dev);
+}
+
+
+module_init(rtc_mini_init);
+module_exit(rtc_mini_exit);
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index 9478551cb020..a4dc01a3d238 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -16,6 +16,8 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/mmu.h>
+#include <asm/hypervisor.h>
+#include <asm/cpudata.h>
.data
.align 8
@@ -28,14 +30,19 @@ itlb_load:
dtlb_load:
.asciz "SUNW,dtlb-load"
+ /* XXX __cpuinit this thing XXX */
+#define TRAMP_STACK_SIZE 1024
+ .align 16
+tramp_stack:
+ .skip TRAMP_STACK_SIZE
+
.text
.align 8
.globl sparc64_cpu_startup, sparc64_cpu_startup_end
sparc64_cpu_startup:
- flushw
-
- BRANCH_IF_CHEETAH_BASE(g1,g5,cheetah_startup)
- BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g5,cheetah_plus_startup)
+ BRANCH_IF_SUN4V(g1, niagara_startup)
+ BRANCH_IF_CHEETAH_BASE(g1, g5, cheetah_startup)
+ BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1, g5, cheetah_plus_startup)
ba,pt %xcc, spitfire_startup
nop
@@ -55,6 +62,7 @@ cheetah_startup:
or %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5
stxa %g5, [%g0] ASI_DCU_CONTROL_REG
membar #Sync
+ /* fallthru */
cheetah_generic_startup:
mov TSB_EXTENSION_P, %g3
@@ -70,7 +78,9 @@ cheetah_generic_startup:
stxa %g0, [%g3] ASI_DMMU
stxa %g0, [%g3] ASI_IMMU
membar #Sync
+ /* fallthru */
+niagara_startup:
/* Disable STICK_INT interrupts. */
sethi %hi(0x80000000), %g5
sllx %g5, 32, %g5
@@ -85,17 +95,17 @@ spitfire_startup:
membar #Sync
startup_continue:
- wrpr %g0, 15, %pil
-
sethi %hi(0x80000000), %g2
sllx %g2, 32, %g2
wr %g2, 0, %tick_cmpr
+ mov %o0, %l0
+
+ BRANCH_IF_SUN4V(g1, niagara_lock_tlb)
+
/* Call OBP by hand to lock KERNBASE into i/d tlbs.
* We lock 2 consequetive entries if we are 'bigkernel'.
*/
- mov %o0, %l0
-
sethi %hi(prom_entry_lock), %g2
1: ldstub [%g2 + %lo(prom_entry_lock)], %g1
membar #StoreLoad | #StoreStore
@@ -105,7 +115,6 @@ startup_continue:
sethi %hi(p1275buf), %g2
or %g2, %lo(p1275buf), %g2
ldx [%g2 + 0x10], %l2
- mov %sp, %l1
add %l2, -(192 + 128), %sp
flushw
@@ -142,8 +151,7 @@ startup_continue:
sethi %hi(bigkernel), %g2
lduw [%g2 + %lo(bigkernel)], %g2
- cmp %g2, 0
- be,pt %icc, do_dtlb
+ brz,pt %g2, do_dtlb
nop
sethi %hi(call_method), %g2
@@ -214,8 +222,7 @@ do_dtlb:
sethi %hi(bigkernel), %g2
lduw [%g2 + %lo(bigkernel)], %g2
- cmp %g2, 0
- be,pt %icc, do_unlock
+ brz,pt %g2, do_unlock
nop
sethi %hi(call_method), %g2
@@ -257,99 +264,180 @@ do_unlock:
stb %g0, [%g2 + %lo(prom_entry_lock)]
membar #StoreStore | #StoreLoad
- mov %l1, %sp
- flushw
+ ba,pt %xcc, after_lock_tlb
+ nop
+
+niagara_lock_tlb:
+ mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
+ sethi %hi(KERNBASE), %o0
+ clr %o1
+ sethi %hi(kern_locked_tte_data), %o2
+ ldx [%o2 + %lo(kern_locked_tte_data)], %o2
+ mov HV_MMU_IMMU, %o3
+ ta HV_FAST_TRAP
+
+ mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
+ sethi %hi(KERNBASE), %o0
+ clr %o1
+ sethi %hi(kern_locked_tte_data), %o2
+ ldx [%o2 + %lo(kern_locked_tte_data)], %o2
+ mov HV_MMU_DMMU, %o3
+ ta HV_FAST_TRAP
- mov %l0, %o0
+ sethi %hi(bigkernel), %g2
+ lduw [%g2 + %lo(bigkernel)], %g2
+ brz,pt %g2, after_lock_tlb
+ nop
+ mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
+ sethi %hi(KERNBASE + 0x400000), %o0
+ clr %o1
+ sethi %hi(kern_locked_tte_data), %o2
+ ldx [%o2 + %lo(kern_locked_tte_data)], %o2
+ sethi %hi(0x400000), %o3
+ add %o2, %o3, %o2
+ mov HV_MMU_IMMU, %o3
+ ta HV_FAST_TRAP
+
+ mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
+ sethi %hi(KERNBASE + 0x400000), %o0
+ clr %o1
+ sethi %hi(kern_locked_tte_data), %o2
+ ldx [%o2 + %lo(kern_locked_tte_data)], %o2
+ sethi %hi(0x400000), %o3
+ add %o2, %o3, %o2
+ mov HV_MMU_DMMU, %o3
+ ta HV_FAST_TRAP
+
+after_lock_tlb:
wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
wr %g0, 0, %fprs
- /* XXX Buggy PROM... */
- srl %o0, 0, %o0
- ldx [%o0], %g6
-
wr %g0, ASI_P, %asi
mov PRIMARY_CONTEXT, %g7
- stxa %g0, [%g7] ASI_DMMU
+
+661: stxa %g0, [%g7] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g0, [%g7] ASI_MMU
+ .previous
+
membar #Sync
mov SECONDARY_CONTEXT, %g7
- stxa %g0, [%g7] ASI_DMMU
+
+661: stxa %g0, [%g7] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g0, [%g7] ASI_MMU
+ .previous
+
membar #Sync
- mov 1, %g5
- sllx %g5, THREAD_SHIFT, %g5
- sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
- add %g6, %g5, %sp
+ /* Everything we do here, until we properly take over the
+ * trap table, must be done with extreme care. We cannot
+ * make any references to %g6 (current thread pointer),
+ * %g4 (current task pointer), or %g5 (base of current cpu's
+ * per-cpu area) until we properly take over the trap table
+ * from the firmware and hypervisor.
+ *
+ * Get onto temporary stack which is in the locked kernel image.
+ */
+ sethi %hi(tramp_stack), %g1
+ or %g1, %lo(tramp_stack), %g1
+ add %g1, TRAMP_STACK_SIZE, %g1
+ sub %g1, STACKFRAME_SZ + STACK_BIAS, %sp
mov 0, %fp
- wrpr %g0, 0, %wstate
- wrpr %g0, 0, %tl
+ /* Put garbage in these registers to trap any access to them. */
+ set 0xdeadbeef, %g4
+ set 0xdeadbeef, %g5
+ set 0xdeadbeef, %g6
- /* Setup the trap globals, then we can resurface. */
- rdpr %pstate, %o1
- mov %g6, %o2
- wrpr %o1, PSTATE_AG, %pstate
- sethi %hi(sparc64_ttable_tl0), %g5
- wrpr %g5, %tba
- mov %o2, %g6
-
- wrpr %o1, PSTATE_MG, %pstate
-#define KERN_HIGHBITS ((_PAGE_VALID|_PAGE_SZ4MB)^0xfffff80000000000)
-#define KERN_LOWBITS (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W)
-
- mov TSB_REG, %g1
- stxa %g0, [%g1] ASI_DMMU
- membar #Sync
- mov TLB_SFSR, %g1
- sethi %uhi(KERN_HIGHBITS), %g2
- or %g2, %ulo(KERN_HIGHBITS), %g2
- sllx %g2, 32, %g2
- or %g2, KERN_LOWBITS, %g2
+ call init_irqwork_curcpu
+ nop
- BRANCH_IF_ANY_CHEETAH(g3,g7,9f)
+ sethi %hi(tlb_type), %g3
+ lduw [%g3 + %lo(tlb_type)], %g2
+ cmp %g2, 3
+ bne,pt %icc, 1f
+ nop
- ba,pt %xcc, 1f
+ call hard_smp_processor_id
nop
+
+ mov %o0, %o1
+ mov 0, %o0
+ mov 0, %o2
+ call sun4v_init_mondo_queues
+ mov 1, %o3
-9:
- sethi %uhi(VPTE_BASE_CHEETAH), %g3
- or %g3, %ulo(VPTE_BASE_CHEETAH), %g3
- ba,pt %xcc, 2f
- sllx %g3, 32, %g3
-1:
- sethi %uhi(VPTE_BASE_SPITFIRE), %g3
- or %g3, %ulo(VPTE_BASE_SPITFIRE), %g3
- sllx %g3, 32, %g3
+1: call init_cur_cpu_trap
+ ldx [%l0], %o0
+
+ /* Start using proper page size encodings in ctx register. */
+ sethi %hi(sparc64_kern_pri_context), %g3
+ ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2
+ mov PRIMARY_CONTEXT, %g1
-2:
- clr %g7
-#undef KERN_HIGHBITS
-#undef KERN_LOWBITS
+661: stxa %g2, [%g1] ASI_DMMU
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ stxa %g2, [%g1] ASI_MMU
+ .previous
- wrpr %o1, 0x0, %pstate
- ldx [%g6 + TI_TASK], %g4
+ membar #Sync
wrpr %g0, 0, %wstate
- call init_irqwork_curcpu
+ /* As a hack, put &init_thread_union into %g6.
+ * prom_world() loads from here to restore the %asi
+ * register.
+ */
+ sethi %hi(init_thread_union), %g6
+ or %g6, %lo(init_thread_union), %g6
+
+ sethi %hi(is_sun4v), %o0
+ lduw [%o0 + %lo(is_sun4v)], %o0
+ brz,pt %o0, 1f
nop
- /* Start using proper page size encodings in ctx register. */
- sethi %hi(sparc64_kern_pri_context), %g3
- ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2
- mov PRIMARY_CONTEXT, %g1
- stxa %g2, [%g1] ASI_DMMU
- membar #Sync
+ TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
+ add %g2, TRAP_PER_CPU_FAULT_INFO, %g2
+ stxa %g2, [%g0] ASI_SCRATCHPAD
+
+ /* Compute physical address:
+ *
+ * paddr = kern_base + (mmfsa_vaddr - KERNBASE)
+ */
+ sethi %hi(KERNBASE), %g3
+ sub %g2, %g3, %g2
+ sethi %hi(kern_base), %g3
+ ldx [%g3 + %lo(kern_base)], %g3
+ add %g2, %g3, %o1
+
+ call prom_set_trap_table_sun4v
+ sethi %hi(sparc64_ttable_tl0), %o0
+
+ ba,pt %xcc, 2f
+ nop
+
+1: call prom_set_trap_table
+ sethi %hi(sparc64_ttable_tl0), %o0
+
+2: ldx [%l0], %g6
+ ldx [%g6 + TI_TASK], %g4
+
+ mov 1, %g5
+ sllx %g5, THREAD_SHIFT, %g5
+ sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
+ add %g6, %g5, %sp
+ mov 0, %fp
rdpr %pstate, %o1
or %o1, PSTATE_IE, %o1
wrpr %o1, 0, %pstate
- call prom_set_trap_table
- sethi %hi(sparc64_ttable_tl0), %o0
-
call smp_callin
nop
call cpu_idle
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 8d44ae5a15e3..7f7dba0ca96a 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -38,6 +38,7 @@
#include <asm/processor.h>
#include <asm/timer.h>
#include <asm/kdebug.h>
+#include <asm/head.h>
#ifdef CONFIG_KMOD
#include <linux/kmod.h>
#endif
@@ -72,12 +73,14 @@ struct tl1_traplog {
static void dump_tl1_traplog(struct tl1_traplog *p)
{
- int i;
+ int i, limit;
+
+ printk(KERN_EMERG "TRAPLOG: Error at trap level 0x%lx, "
+ "dumping track stack.\n", p->tl);
- printk("TRAPLOG: Error at trap level 0x%lx, dumping track stack.\n",
- p->tl);
- for (i = 0; i < 4; i++) {
- printk(KERN_CRIT
+ limit = (tlb_type == hypervisor) ? 2 : 4;
+ for (i = 0; i < limit; i++) {
+ printk(KERN_EMERG
"TRAPLOG: Trap level %d TSTATE[%016lx] TPC[%016lx] "
"TNPC[%016lx] TT[%lx]\n",
i + 1,
@@ -179,6 +182,45 @@ void spitfire_insn_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr
spitfire_insn_access_exception(regs, sfsr, sfar);
}
+void sun4v_insn_access_exception(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
+{
+ unsigned short type = (type_ctx >> 16);
+ unsigned short ctx = (type_ctx & 0xffff);
+ siginfo_t info;
+
+ if (notify_die(DIE_TRAP, "instruction access exception", regs,
+ 0, 0x8, SIGTRAP) == NOTIFY_STOP)
+ return;
+
+ if (regs->tstate & TSTATE_PRIV) {
+ printk("sun4v_insn_access_exception: ADDR[%016lx] "
+ "CTX[%04x] TYPE[%04x], going.\n",
+ addr, ctx, type);
+ die_if_kernel("Iax", regs);
+ }
+
+ if (test_thread_flag(TIF_32BIT)) {
+ regs->tpc &= 0xffffffff;
+ regs->tnpc &= 0xffffffff;
+ }
+ info.si_signo = SIGSEGV;
+ info.si_errno = 0;
+ info.si_code = SEGV_MAPERR;
+ info.si_addr = (void __user *) addr;
+ info.si_trapno = 0;
+ force_sig_info(SIGSEGV, &info, current);
+}
+
+void sun4v_insn_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
+{
+ if (notify_die(DIE_TRAP_TL1, "instruction access exception tl1", regs,
+ 0, 0x8, SIGTRAP) == NOTIFY_STOP)
+ return;
+
+ dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+ sun4v_insn_access_exception(regs, addr, type_ctx);
+}
+
void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
{
siginfo_t info;
@@ -227,6 +269,45 @@ void spitfire_data_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr
spitfire_data_access_exception(regs, sfsr, sfar);
}
+void sun4v_data_access_exception(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
+{
+ unsigned short type = (type_ctx >> 16);
+ unsigned short ctx = (type_ctx & 0xffff);
+ siginfo_t info;
+
+ if (notify_die(DIE_TRAP, "data access exception", regs,
+ 0, 0x8, SIGTRAP) == NOTIFY_STOP)
+ return;
+
+ if (regs->tstate & TSTATE_PRIV) {
+ printk("sun4v_data_access_exception: ADDR[%016lx] "
+ "CTX[%04x] TYPE[%04x], going.\n",
+ addr, ctx, type);
+ die_if_kernel("Dax", regs);
+ }
+
+ if (test_thread_flag(TIF_32BIT)) {
+ regs->tpc &= 0xffffffff;
+ regs->tnpc &= 0xffffffff;
+ }
+ info.si_signo = SIGSEGV;
+ info.si_errno = 0;
+ info.si_code = SEGV_MAPERR;
+ info.si_addr = (void __user *) addr;
+ info.si_trapno = 0;
+ force_sig_info(SIGSEGV, &info, current);
+}
+
+void sun4v_data_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
+{
+ if (notify_die(DIE_TRAP_TL1, "data access exception tl1", regs,
+ 0, 0x8, SIGTRAP) == NOTIFY_STOP)
+ return;
+
+ dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+ sun4v_data_access_exception(regs, addr, type_ctx);
+}
+
#ifdef CONFIG_PCI
/* This is really pathetic... */
extern volatile int pci_poke_in_progress;
@@ -788,7 +869,8 @@ void __init cheetah_ecache_flush_init(void)
cheetah_error_log[i].afsr = CHAFSR_INVALID;
__asm__ ("rdpr %%ver, %0" : "=r" (ver));
- if ((ver >> 32) == 0x003e0016) {
+ if ((ver >> 32) == __JALAPENO_ID ||
+ (ver >> 32) == __SERRANO_ID) {
cheetah_error_table = &__jalapeno_error_table[0];
cheetah_afsr_errors = JPAFSR_ERRORS;
} else if ((ver >> 32) == 0x003e0015) {
@@ -1666,6 +1748,238 @@ void cheetah_plus_parity_error(int type, struct pt_regs *regs)
regs->tpc);
}
+struct sun4v_error_entry {
+ u64 err_handle;
+ u64 err_stick;
+
+ u32 err_type;
+#define SUN4V_ERR_TYPE_UNDEFINED 0
+#define SUN4V_ERR_TYPE_UNCORRECTED_RES 1
+#define SUN4V_ERR_TYPE_PRECISE_NONRES 2
+#define SUN4V_ERR_TYPE_DEFERRED_NONRES 3
+#define SUN4V_ERR_TYPE_WARNING_RES 4
+
+ u32 err_attrs;
+#define SUN4V_ERR_ATTRS_PROCESSOR 0x00000001
+#define SUN4V_ERR_ATTRS_MEMORY 0x00000002
+#define SUN4V_ERR_ATTRS_PIO 0x00000004
+#define SUN4V_ERR_ATTRS_INT_REGISTERS 0x00000008
+#define SUN4V_ERR_ATTRS_FPU_REGISTERS 0x00000010
+#define SUN4V_ERR_ATTRS_USER_MODE 0x01000000
+#define SUN4V_ERR_ATTRS_PRIV_MODE 0x02000000
+#define SUN4V_ERR_ATTRS_RES_QUEUE_FULL 0x80000000
+
+ u64 err_raddr;
+ u32 err_size;
+ u16 err_cpu;
+ u16 err_pad;
+};
+
+static atomic_t sun4v_resum_oflow_cnt = ATOMIC_INIT(0);
+static atomic_t sun4v_nonresum_oflow_cnt = ATOMIC_INIT(0);
+
+static const char *sun4v_err_type_to_str(u32 type)
+{
+ switch (type) {
+ case SUN4V_ERR_TYPE_UNDEFINED:
+ return "undefined";
+ case SUN4V_ERR_TYPE_UNCORRECTED_RES:
+ return "uncorrected resumable";
+ case SUN4V_ERR_TYPE_PRECISE_NONRES:
+ return "precise nonresumable";
+ case SUN4V_ERR_TYPE_DEFERRED_NONRES:
+ return "deferred nonresumable";
+ case SUN4V_ERR_TYPE_WARNING_RES:
+ return "warning resumable";
+ default:
+ return "unknown";
+ };
+}
+
+static void sun4v_log_error(struct sun4v_error_entry *ent, int cpu, const char *pfx, atomic_t *ocnt)
+{
+ int cnt;
+
+ printk("%s: Reporting on cpu %d\n", pfx, cpu);
+ printk("%s: err_handle[%lx] err_stick[%lx] err_type[%08x:%s]\n",
+ pfx,
+ ent->err_handle, ent->err_stick,
+ ent->err_type,
+ sun4v_err_type_to_str(ent->err_type));
+ printk("%s: err_attrs[%08x:%s %s %s %s %s %s %s %s]\n",
+ pfx,
+ ent->err_attrs,
+ ((ent->err_attrs & SUN4V_ERR_ATTRS_PROCESSOR) ?
+ "processor" : ""),
+ ((ent->err_attrs & SUN4V_ERR_ATTRS_MEMORY) ?
+ "memory" : ""),
+ ((ent->err_attrs & SUN4V_ERR_ATTRS_PIO) ?
+ "pio" : ""),
+ ((ent->err_attrs & SUN4V_ERR_ATTRS_INT_REGISTERS) ?
+ "integer-regs" : ""),
+ ((ent->err_attrs & SUN4V_ERR_ATTRS_FPU_REGISTERS) ?
+ "fpu-regs" : ""),
+ ((ent->err_attrs & SUN4V_ERR_ATTRS_USER_MODE) ?
+ "user" : ""),
+ ((ent->err_attrs & SUN4V_ERR_ATTRS_PRIV_MODE) ?
+ "privileged" : ""),
+ ((ent->err_attrs & SUN4V_ERR_ATTRS_RES_QUEUE_FULL) ?
+ "queue-full" : ""));
+ printk("%s: err_raddr[%016lx] err_size[%u] err_cpu[%u]\n",
+ pfx,
+ ent->err_raddr, ent->err_size, ent->err_cpu);
+
+ if ((cnt = atomic_read(ocnt)) != 0) {
+ atomic_set(ocnt, 0);
+ wmb();
+ printk("%s: Queue overflowed %d times.\n",
+ pfx, cnt);
+ }
+}
+
+/* We run with %pil set to 15 and PSTATE_IE enabled in %pstate.
+ * Log the event and clear the first word of the entry.
+ */
+void sun4v_resum_error(struct pt_regs *regs, unsigned long offset)
+{
+ struct sun4v_error_entry *ent, local_copy;
+ struct trap_per_cpu *tb;
+ unsigned long paddr;
+ int cpu;
+
+ cpu = get_cpu();
+
+ tb = &trap_block[cpu];
+ paddr = tb->resum_kernel_buf_pa + offset;
+ ent = __va(paddr);
+
+ memcpy(&local_copy, ent, sizeof(struct sun4v_error_entry));
+
+ /* We have a local copy now, so release the entry. */
+ ent->err_handle = 0;
+ wmb();
+
+ put_cpu();
+
+ sun4v_log_error(&local_copy, cpu,
+ KERN_ERR "RESUMABLE ERROR",
+ &sun4v_resum_oflow_cnt);
+}
+
+/* If we try to printk() we'll probably make matters worse, by trying
+ * to retake locks this cpu already holds or causing more errors. So
+ * just bump a counter, and we'll report these counter bumps above.
+ */
+void sun4v_resum_overflow(struct pt_regs *regs)
+{
+ atomic_inc(&sun4v_resum_oflow_cnt);
+}
+
+/* We run with %pil set to 15 and PSTATE_IE enabled in %pstate.
+ * Log the event, clear the first word of the entry, and die.
+ */
+void sun4v_nonresum_error(struct pt_regs *regs, unsigned long offset)
+{
+ struct sun4v_error_entry *ent, local_copy;
+ struct trap_per_cpu *tb;
+ unsigned long paddr;
+ int cpu;
+
+ cpu = get_cpu();
+
+ tb = &trap_block[cpu];
+ paddr = tb->nonresum_kernel_buf_pa + offset;
+ ent = __va(paddr);
+
+ memcpy(&local_copy, ent, sizeof(struct sun4v_error_entry));
+
+ /* We have a local copy now, so release the entry. */
+ ent->err_handle = 0;
+ wmb();
+
+ put_cpu();
+
+#ifdef CONFIG_PCI
+ /* Check for the special PCI poke sequence. */
+ if (pci_poke_in_progress && pci_poke_cpu == cpu) {
+ pci_poke_faulted = 1;
+ regs->tpc += 4;
+ regs->tnpc = regs->tpc + 4;
+ return;
+ }
+#endif
+
+ sun4v_log_error(&local_copy, cpu,
+ KERN_EMERG "NON-RESUMABLE ERROR",
+ &sun4v_nonresum_oflow_cnt);
+
+ panic("Non-resumable error.");
+}
+
+/* If we try to printk() we'll probably make matters worse, by trying
+ * to retake locks this cpu already holds or causing more errors. So
+ * just bump a counter, and we'll report these counter bumps above.
+ */
+void sun4v_nonresum_overflow(struct pt_regs *regs)
+{
+ /* XXX Actually even this can make not that much sense. Perhaps
+ * XXX we should just pull the plug and panic directly from here?
+ */
+ atomic_inc(&sun4v_nonresum_oflow_cnt);
+}
+
+unsigned long sun4v_err_itlb_vaddr;
+unsigned long sun4v_err_itlb_ctx;
+unsigned long sun4v_err_itlb_pte;
+unsigned long sun4v_err_itlb_error;
+
+void sun4v_itlb_error_report(struct pt_regs *regs, int tl)
+{
+ if (tl > 1)
+ dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+
+ printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n",
+ regs->tpc, tl);
+ printk(KERN_EMERG "SUN4V-ITLB: vaddr[%lx] ctx[%lx] "
+ "pte[%lx] error[%lx]\n",
+ sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx,
+ sun4v_err_itlb_pte, sun4v_err_itlb_error);
+
+ prom_halt();
+}
+
+unsigned long sun4v_err_dtlb_vaddr;
+unsigned long sun4v_err_dtlb_ctx;
+unsigned long sun4v_err_dtlb_pte;
+unsigned long sun4v_err_dtlb_error;
+
+void sun4v_dtlb_error_report(struct pt_regs *regs, int tl)
+{
+ if (tl > 1)
+ dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+
+ printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n",
+ regs->tpc, tl);
+ printk(KERN_EMERG "SUN4V-DTLB: vaddr[%lx] ctx[%lx] "
+ "pte[%lx] error[%lx]\n",
+ sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx,
+ sun4v_err_dtlb_pte, sun4v_err_dtlb_error);
+
+ prom_halt();
+}
+
+void hypervisor_tlbop_error(unsigned long err, unsigned long op)
+{
+ printk(KERN_CRIT "SUN4V: TLB hv call error %lu for op %lu\n",
+ err, op);
+}
+
+void hypervisor_tlbop_error_xcall(unsigned long err, unsigned long op)
+{
+ printk(KERN_CRIT "SUN4V: XCALL TLB hv call error %lu for op %lu\n",
+ err, op);
+}
+
void do_fpe_common(struct pt_regs *regs)
{
if (regs->tstate & TSTATE_PRIV) {
@@ -1924,10 +2238,11 @@ void die_if_kernel(char *str, struct pt_regs *regs)
}
user_instruction_dump ((unsigned int __user *) regs->tpc);
}
+#if 0
#ifdef CONFIG_SMP
smp_report_regs();
#endif
-
+#endif
if (regs->tstate & TSTATE_PRIV)
do_exit(SIGKILL);
do_exit(SIGSEGV);
@@ -1958,6 +2273,11 @@ void do_illegal_instruction(struct pt_regs *regs)
} else if ((insn & 0xc1580000) == 0xc1100000) /* LDQ/STQ */ {
if (handle_ldf_stq(insn, regs))
return;
+ } else if (tlb_type == hypervisor) {
+ extern int vis_emul(struct pt_regs *, unsigned int);
+
+ if (!vis_emul(regs, insn))
+ return;
}
}
info.si_signo = SIGILL;
@@ -1968,6 +2288,8 @@ void do_illegal_instruction(struct pt_regs *regs)
force_sig_info(SIGILL, &info, current);
}
+extern void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn);
+
void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr)
{
siginfo_t info;
@@ -1977,13 +2299,7 @@ void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned lo
return;
if (regs->tstate & TSTATE_PRIV) {
- extern void kernel_unaligned_trap(struct pt_regs *regs,
- unsigned int insn,
- unsigned long sfar,
- unsigned long sfsr);
-
- kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc),
- sfar, sfsr);
+ kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc));
return;
}
info.si_signo = SIGBUS;
@@ -1994,6 +2310,26 @@ void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned lo
force_sig_info(SIGBUS, &info, current);
}
+void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
+{
+ siginfo_t info;
+
+ if (notify_die(DIE_TRAP, "memory address unaligned", regs,
+ 0, 0x34, SIGSEGV) == NOTIFY_STOP)
+ return;
+
+ if (regs->tstate & TSTATE_PRIV) {
+ kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc));
+ return;
+ }
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = BUS_ADRALN;
+ info.si_addr = (void __user *) addr;
+ info.si_trapno = 0;
+ force_sig_info(SIGBUS, &info, current);
+}
+
void do_privop(struct pt_regs *regs)
{
siginfo_t info;
@@ -2130,7 +2466,22 @@ void do_getpsr(struct pt_regs *regs)
}
}
+struct trap_per_cpu trap_block[NR_CPUS];
+
+/* This can get invoked before sched_init() so play it super safe
+ * and use hard_smp_processor_id().
+ */
+void init_cur_cpu_trap(struct thread_info *t)
+{
+ int cpu = hard_smp_processor_id();
+ struct trap_per_cpu *p = &trap_block[cpu];
+
+ p->thread = t;
+ p->pgd_paddr = 0;
+}
+
extern void thread_info_offsets_are_bolixed_dave(void);
+extern void trap_per_cpu_offsets_are_bolixed_dave(void);
/* Only invoked on boot processor. */
void __init trap_init(void)
@@ -2154,7 +2505,6 @@ void __init trap_init(void)
TI_KERN_CNTD0 != offsetof(struct thread_info, kernel_cntd0) ||
TI_KERN_CNTD1 != offsetof(struct thread_info, kernel_cntd1) ||
TI_PCR != offsetof(struct thread_info, pcr_reg) ||
- TI_CEE_STUFF != offsetof(struct thread_info, cee_stuff) ||
TI_PRE_COUNT != offsetof(struct thread_info, preempt_count) ||
TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
TI_SYS_NOERROR != offsetof(struct thread_info, syscall_noerror) ||
@@ -2165,6 +2515,29 @@ void __init trap_init(void)
(TI_FPREGS & (64 - 1)))
thread_info_offsets_are_bolixed_dave();
+ if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) ||
+ (TRAP_PER_CPU_PGD_PADDR !=
+ offsetof(struct trap_per_cpu, pgd_paddr)) ||
+ (TRAP_PER_CPU_CPU_MONDO_PA !=
+ offsetof(struct trap_per_cpu, cpu_mondo_pa)) ||
+ (TRAP_PER_CPU_DEV_MONDO_PA !=
+ offsetof(struct trap_per_cpu, dev_mondo_pa)) ||
+ (TRAP_PER_CPU_RESUM_MONDO_PA !=
+ offsetof(struct trap_per_cpu, resum_mondo_pa)) ||
+ (TRAP_PER_CPU_RESUM_KBUF_PA !=
+ offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) ||
+ (TRAP_PER_CPU_NONRESUM_MONDO_PA !=
+ offsetof(struct trap_per_cpu, nonresum_mondo_pa)) ||
+ (TRAP_PER_CPU_NONRESUM_KBUF_PA !=
+ offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) ||
+ (TRAP_PER_CPU_FAULT_INFO !=
+ offsetof(struct trap_per_cpu, fault_info)) ||
+ (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
+ offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) ||
+ (TRAP_PER_CPU_CPU_LIST_PA !=
+ offsetof(struct trap_per_cpu, cpu_list_pa)))
+ trap_per_cpu_offsets_are_bolixed_dave();
+
/* Attach to the address space of init_task. On SMP we
* do this in smp.c:smp_callin for other cpus.
*/
diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S
new file mode 100644
index 000000000000..118baea44f69
--- /dev/null
+++ b/arch/sparc64/kernel/tsb.S
@@ -0,0 +1,442 @@
+/* tsb.S: Sparc64 TSB table handling.
+ *
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ */
+
+#include <asm/tsb.h>
+#include <asm/hypervisor.h>
+
+ .text
+ .align 32
+
+ /* Invoked from TLB miss handler, we are in the
+ * MMU global registers and they are setup like
+ * this:
+ *
+ * %g1: TSB entry pointer
+ * %g2: available temporary
+ * %g3: FAULT_CODE_{D,I}TLB
+ * %g4: available temporary
+ * %g5: available temporary
+ * %g6: TAG TARGET
+ * %g7: available temporary, will be loaded by us with
+ * the physical address base of the linux page
+ * tables for the current address space
+ */
+tsb_miss_dtlb:
+ mov TLB_TAG_ACCESS, %g4
+ ba,pt %xcc, tsb_miss_page_table_walk
+ ldxa [%g4] ASI_DMMU, %g4
+
+tsb_miss_itlb:
+ mov TLB_TAG_ACCESS, %g4
+ ba,pt %xcc, tsb_miss_page_table_walk
+ ldxa [%g4] ASI_IMMU, %g4
+
+ /* At this point we have:
+ * %g1 -- TSB entry address
+ * %g3 -- FAULT_CODE_{D,I}TLB
+ * %g4 -- missing virtual address
+ * %g6 -- TAG TARGET (vaddr >> 22)
+ */
+tsb_miss_page_table_walk:
+ TRAP_LOAD_PGD_PHYS(%g7, %g5)
+
+ /* And now we have the PGD base physical address in %g7. */
+tsb_miss_page_table_walk_sun4v_fastpath:
+ USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
+
+ /* At this point we have:
+ * %g1 -- TSB entry address
+ * %g3 -- FAULT_CODE_{D,I}TLB
+ * %g5 -- physical address of PTE in Linux page tables
+ * %g6 -- TAG TARGET (vaddr >> 22)
+ */
+tsb_reload:
+ TSB_LOCK_TAG(%g1, %g2, %g7)
+
+ /* Load and check PTE. */
+ ldxa [%g5] ASI_PHYS_USE_EC, %g5
+ mov 1, %g7
+ sllx %g7, TSB_TAG_INVALID_BIT, %g7
+ brgez,a,pn %g5, tsb_do_fault
+ TSB_STORE(%g1, %g7)
+
+ TSB_WRITE(%g1, %g5, %g6)
+
+ /* Finally, load TLB and return from trap. */
+tsb_tlb_reload:
+ cmp %g3, FAULT_CODE_DTLB
+ bne,pn %xcc, tsb_itlb_load
+ nop
+
+tsb_dtlb_load:
+
+661: stxa %g5, [%g0] ASI_DTLB_DATA_IN
+ retry
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ nop
+ nop
+ .previous
+
+ /* For sun4v the ASI_DTLB_DATA_IN store and the retry
+ * instruction get nop'd out and we get here to branch
+ * to the sun4v tlb load code. The registers are setup
+ * as follows:
+ *
+ * %g4: vaddr
+ * %g5: PTE
+ * %g6: TAG
+ *
+ * The sun4v TLB load wants the PTE in %g3 so we fix that
+ * up here.
+ */
+ ba,pt %xcc, sun4v_dtlb_load
+ mov %g5, %g3
+
+tsb_itlb_load:
+ /* Executable bit must be set. */
+661: andcc %g5, _PAGE_EXEC_4U, %g0
+ .section .sun4v_1insn_patch, "ax"
+ .word 661b
+ andcc %g5, _PAGE_EXEC_4V, %g0
+ .previous
+
+ be,pn %xcc, tsb_do_fault
+ nop
+
+661: stxa %g5, [%g0] ASI_ITLB_DATA_IN
+ retry
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ nop
+ nop
+ .previous
+
+ /* For sun4v the ASI_ITLB_DATA_IN store and the retry
+ * instruction get nop'd out and we get here to branch
+ * to the sun4v tlb load code. The registers are setup
+ * as follows:
+ *
+ * %g4: vaddr
+ * %g5: PTE
+ * %g6: TAG
+ *
+ * The sun4v TLB load wants the PTE in %g3 so we fix that
+ * up here.
+ */
+ ba,pt %xcc, sun4v_itlb_load
+ mov %g5, %g3
+
+ /* No valid entry in the page tables, do full fault
+ * processing.
+ */
+
+ .globl tsb_do_fault
+tsb_do_fault:
+ cmp %g3, FAULT_CODE_DTLB
+
+661: rdpr %pstate, %g5
+ wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ SET_GL(1)
+ ldxa [%g0] ASI_SCRATCHPAD, %g4
+ .previous
+
+ bne,pn %xcc, tsb_do_itlb_fault
+ nop
+
+tsb_do_dtlb_fault:
+ rdpr %tl, %g3
+ cmp %g3, 1
+
+661: mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_DMMU, %g5
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ ldx [%g4 + HV_FAULT_D_ADDR_OFFSET], %g5
+ nop
+ .previous
+
+ be,pt %xcc, sparc64_realfault_common
+ mov FAULT_CODE_DTLB, %g4
+ ba,pt %xcc, winfix_trampoline
+ nop
+
+tsb_do_itlb_fault:
+ rdpr %tpc, %g5
+ ba,pt %xcc, sparc64_realfault_common
+ mov FAULT_CODE_ITLB, %g4
+
+ .globl sparc64_realfault_common
+sparc64_realfault_common:
+ /* fault code in %g4, fault address in %g5, etrap will
+ * preserve these two values in %l4 and %l5 respectively
+ */
+ ba,pt %xcc, etrap ! Save trap state
+1: rd %pc, %g7 ! ...
+ stb %l4, [%g6 + TI_FAULT_CODE] ! Save fault code
+ stx %l5, [%g6 + TI_FAULT_ADDR] ! Save fault address
+ call do_sparc64_fault ! Call fault handler
+ add %sp, PTREGS_OFF, %o0 ! Compute pt_regs arg
+ ba,pt %xcc, rtrap_clr_l6 ! Restore cpu state
+ nop ! Delay slot (fill me)
+
+winfix_trampoline:
+ rdpr %tpc, %g3 ! Prepare winfixup TNPC
+ or %g3, 0x7c, %g3 ! Compute branch offset
+ wrpr %g3, %tnpc ! Write it into TNPC
+ done ! Trap return
+
+ /* Insert an entry into the TSB.
+ *
+ * %o0: TSB entry pointer (virt or phys address)
+ * %o1: tag
+ * %o2: pte
+ */
+ .align 32
+ .globl __tsb_insert
+__tsb_insert:
+ rdpr %pstate, %o5
+ wrpr %o5, PSTATE_IE, %pstate
+ TSB_LOCK_TAG(%o0, %g2, %g3)
+ TSB_WRITE(%o0, %o2, %o1)
+ wrpr %o5, %pstate
+ retl
+ nop
+ .size __tsb_insert, .-__tsb_insert
+
+ /* Flush the given TSB entry if it has the matching
+ * tag.
+ *
+ * %o0: TSB entry pointer (virt or phys address)
+ * %o1: tag
+ */
+ .align 32
+ .globl tsb_flush
+ .type tsb_flush,#function
+tsb_flush:
+ sethi %hi(TSB_TAG_LOCK_HIGH), %g2
+1: TSB_LOAD_TAG(%o0, %g1)
+ srlx %g1, 32, %o3
+ andcc %o3, %g2, %g0
+ bne,pn %icc, 1b
+ membar #LoadLoad
+ cmp %g1, %o1
+ mov 1, %o3
+ bne,pt %xcc, 2f
+ sllx %o3, TSB_TAG_INVALID_BIT, %o3
+ TSB_CAS_TAG(%o0, %g1, %o3)
+ cmp %g1, %o3
+ bne,pn %xcc, 1b
+ nop
+2: retl
+ TSB_MEMBAR
+ .size tsb_flush, .-tsb_flush
+
+ /* Reload MMU related context switch state at
+ * schedule() time.
+ *
+ * %o0: page table physical address
+ * %o1: TSB register value
+ * %o2: TSB virtual address
+ * %o3: TSB mapping locked PTE
+ * %o4: Hypervisor TSB descriptor physical address
+ *
+ * We have to run this whole thing with interrupts
+ * disabled so that the current cpu doesn't change
+ * due to preemption.
+ */
+ .align 32
+ .globl __tsb_context_switch
+ .type __tsb_context_switch,#function
+__tsb_context_switch:
+ rdpr %pstate, %o5
+ wrpr %o5, PSTATE_IE, %pstate
+
+ ldub [%g6 + TI_CPU], %g1
+ sethi %hi(trap_block), %g2
+ sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g1
+ or %g2, %lo(trap_block), %g2
+ add %g2, %g1, %g2
+ stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR]
+
+ sethi %hi(tlb_type), %g1
+ lduw [%g1 + %lo(tlb_type)], %g1
+ cmp %g1, 3
+ bne,pt %icc, 1f
+ nop
+
+ /* Hypervisor TSB switch. */
+ mov SCRATCHPAD_UTSBREG1, %g1
+ stxa %o1, [%g1] ASI_SCRATCHPAD
+ mov -1, %g2
+ mov SCRATCHPAD_UTSBREG2, %g1
+ stxa %g2, [%g1] ASI_SCRATCHPAD
+
+ /* Save away %o5's %pstate, we have to use %o5 for
+ * the hypervisor call.
+ */
+ mov %o5, %g1
+
+ mov HV_FAST_MMU_TSB_CTXNON0, %o5
+ mov 1, %o0
+ mov %o4, %o1
+ ta HV_FAST_TRAP
+
+ /* Finish up and restore %o5. */
+ ba,pt %xcc, 9f
+ mov %g1, %o5
+
+ /* SUN4U TSB switch. */
+1: mov TSB_REG, %g1
+ stxa %o1, [%g1] ASI_DMMU
+ membar #Sync
+ stxa %o1, [%g1] ASI_IMMU
+ membar #Sync
+
+2: brz %o2, 9f
+ nop
+
+ sethi %hi(sparc64_highest_unlocked_tlb_ent), %g2
+ mov TLB_TAG_ACCESS, %g1
+ lduw [%g2 + %lo(sparc64_highest_unlocked_tlb_ent)], %g2
+ stxa %o2, [%g1] ASI_DMMU
+ membar #Sync
+ sllx %g2, 3, %g2
+ stxa %o3, [%g2] ASI_DTLB_DATA_ACCESS
+ membar #Sync
+9:
+ wrpr %o5, %pstate
+
+ retl
+ nop
+ .size __tsb_context_switch, .-__tsb_context_switch
+
+#define TSB_PASS_BITS ((1 << TSB_TAG_LOCK_BIT) | \
+ (1 << TSB_TAG_INVALID_BIT))
+
+ .align 32
+ .globl copy_tsb
+ .type copy_tsb,#function
+copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size
+ * %o2=new_tsb_base, %o3=new_tsb_size
+ */
+ sethi %uhi(TSB_PASS_BITS), %g7
+ srlx %o3, 4, %o3
+ add %o0, %o1, %g1 /* end of old tsb */
+ sllx %g7, 32, %g7
+ sub %o3, 1, %o3 /* %o3 == new tsb hash mask */
+
+661: prefetcha [%o0] ASI_N, #one_read
+ .section .tsb_phys_patch, "ax"
+ .word 661b
+ prefetcha [%o0] ASI_PHYS_USE_EC, #one_read
+ .previous
+
+90: andcc %o0, (64 - 1), %g0
+ bne 1f
+ add %o0, 64, %o5
+
+661: prefetcha [%o5] ASI_N, #one_read
+ .section .tsb_phys_patch, "ax"
+ .word 661b
+ prefetcha [%o5] ASI_PHYS_USE_EC, #one_read
+ .previous
+
+1: TSB_LOAD_QUAD(%o0, %g2) /* %g2/%g3 == TSB entry */
+ andcc %g2, %g7, %g0 /* LOCK or INVALID set? */
+ bne,pn %xcc, 80f /* Skip it */
+ sllx %g2, 22, %o4 /* TAG --> VADDR */
+
+ /* This can definitely be computed faster... */
+ srlx %o0, 4, %o5 /* Build index */
+ and %o5, 511, %o5 /* Mask index */
+ sllx %o5, PAGE_SHIFT, %o5 /* Put into vaddr position */
+ or %o4, %o5, %o4 /* Full VADDR. */
+ srlx %o4, PAGE_SHIFT, %o4 /* Shift down to create index */
+ and %o4, %o3, %o4 /* Mask with new_tsb_nents-1 */
+ sllx %o4, 4, %o4 /* Shift back up into tsb ent offset */
+ TSB_STORE(%o2 + %o4, %g2) /* Store TAG */
+ add %o4, 0x8, %o4 /* Advance to TTE */
+ TSB_STORE(%o2 + %o4, %g3) /* Store TTE */
+
+80: add %o0, 16, %o0
+ cmp %o0, %g1
+ bne,pt %xcc, 90b
+ nop
+
+ retl
+ TSB_MEMBAR
+ .size copy_tsb, .-copy_tsb
+
+ /* Set the invalid bit in all TSB entries. */
+ .align 32
+ .globl tsb_init
+ .type tsb_init,#function
+tsb_init: /* %o0 = TSB vaddr, %o1 = size in bytes */
+ prefetch [%o0 + 0x000], #n_writes
+ mov 1, %g1
+ prefetch [%o0 + 0x040], #n_writes
+ sllx %g1, TSB_TAG_INVALID_BIT, %g1
+ prefetch [%o0 + 0x080], #n_writes
+1: prefetch [%o0 + 0x0c0], #n_writes
+ stx %g1, [%o0 + 0x00]
+ stx %g1, [%o0 + 0x10]
+ stx %g1, [%o0 + 0x20]
+ stx %g1, [%o0 + 0x30]
+ prefetch [%o0 + 0x100], #n_writes
+ stx %g1, [%o0 + 0x40]
+ stx %g1, [%o0 + 0x50]
+ stx %g1, [%o0 + 0x60]
+ stx %g1, [%o0 + 0x70]
+ prefetch [%o0 + 0x140], #n_writes
+ stx %g1, [%o0 + 0x80]
+ stx %g1, [%o0 + 0x90]
+ stx %g1, [%o0 + 0xa0]
+ stx %g1, [%o0 + 0xb0]
+ prefetch [%o0 + 0x180], #n_writes
+ stx %g1, [%o0 + 0xc0]
+ stx %g1, [%o0 + 0xd0]
+ stx %g1, [%o0 + 0xe0]
+ stx %g1, [%o0 + 0xf0]
+ subcc %o1, 0x100, %o1
+ bne,pt %xcc, 1b
+ add %o0, 0x100, %o0
+ retl
+ nop
+ nop
+ nop
+ .size tsb_init, .-tsb_init
+
+ .globl NGtsb_init
+ .type NGtsb_init,#function
+NGtsb_init:
+ rd %asi, %g2
+ mov 1, %g1
+ wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+ sllx %g1, TSB_TAG_INVALID_BIT, %g1
+1: stxa %g1, [%o0 + 0x00] %asi
+ stxa %g1, [%o0 + 0x10] %asi
+ stxa %g1, [%o0 + 0x20] %asi
+ stxa %g1, [%o0 + 0x30] %asi
+ stxa %g1, [%o0 + 0x40] %asi
+ stxa %g1, [%o0 + 0x50] %asi
+ stxa %g1, [%o0 + 0x60] %asi
+ stxa %g1, [%o0 + 0x70] %asi
+ stxa %g1, [%o0 + 0x80] %asi
+ stxa %g1, [%o0 + 0x90] %asi
+ stxa %g1, [%o0 + 0xa0] %asi
+ stxa %g1, [%o0 + 0xb0] %asi
+ stxa %g1, [%o0 + 0xc0] %asi
+ stxa %g1, [%o0 + 0xd0] %asi
+ stxa %g1, [%o0 + 0xe0] %asi
+ stxa %g1, [%o0 + 0xf0] %asi
+ subcc %o1, 0x100, %o1
+ bne,pt %xcc, 1b
+ add %o0, 0x100, %o0
+ retl
+ wr %g2, 0x0, %asi
+ .size NGtsb_init, .-NGtsb_init
diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S
index 8365bc1f81f3..5d901519db55 100644
--- a/arch/sparc64/kernel/ttable.S
+++ b/arch/sparc64/kernel/ttable.S
@@ -1,7 +1,6 @@
-/* $Id: ttable.S,v 1.38 2002/02/09 19:49:30 davem Exp $
- * ttable.S: Sparc V9 Trap Table(s) with SpitFire/Cheetah extensions.
+/* ttable.S: Sparc V9 Trap Table(s) with SpitFire/Cheetah/SUN4V extensions.
*
- * Copyright (C) 1996, 2001 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996, 2001, 2006 David S. Miller (davem@davemloft.net)
*/
#include <linux/config.h>
@@ -19,7 +18,7 @@ tl0_resv000: BOOT_KERNEL BTRAP(0x1) BTRAP(0x2) BTRAP(0x3)
tl0_resv004: BTRAP(0x4) BTRAP(0x5) BTRAP(0x6) BTRAP(0x7)
tl0_iax: membar #Sync
TRAP_NOSAVE_7INSNS(__spitfire_insn_access_exception)
-tl0_resv009: BTRAP(0x9)
+tl0_itsb_4v: SUN4V_ITSB_MISS
tl0_iae: membar #Sync
TRAP_NOSAVE_7INSNS(__spitfire_access_error)
tl0_resv00b: BTRAP(0xb) BTRAP(0xc) BTRAP(0xd) BTRAP(0xe) BTRAP(0xf)
@@ -38,7 +37,7 @@ tl0_div0: TRAP(do_div0)
tl0_resv029: BTRAP(0x29) BTRAP(0x2a) BTRAP(0x2b) BTRAP(0x2c) BTRAP(0x2d) BTRAP(0x2e)
tl0_resv02f: BTRAP(0x2f)
tl0_dax: TRAP_NOSAVE(__spitfire_data_access_exception)
-tl0_resv031: BTRAP(0x31)
+tl0_dtsb_4v: SUN4V_DTSB_MISS
tl0_dae: membar #Sync
TRAP_NOSAVE_7INSNS(__spitfire_access_error)
tl0_resv033: BTRAP(0x33)
@@ -52,12 +51,13 @@ tl0_resv03e: BTRAP(0x3e) BTRAP(0x3f) BTRAP(0x40)
tl0_irq1: TRAP_IRQ(smp_call_function_client, 1)
tl0_irq2: TRAP_IRQ(smp_receive_signal_client, 2)
tl0_irq3: TRAP_IRQ(smp_penguin_jailcell, 3)
+tl0_irq4: TRAP_IRQ(smp_new_mmu_context_version_client, 4)
#else
tl0_irq1: BTRAP(0x41)
tl0_irq2: BTRAP(0x42)
tl0_irq3: BTRAP(0x43)
+tl0_irq4: BTRAP(0x44)
#endif
-tl0_irq4: TRAP_IRQ(handler_irq, 4)
tl0_irq5: TRAP_IRQ(handler_irq, 5) TRAP_IRQ(handler_irq, 6)
tl0_irq7: TRAP_IRQ(handler_irq, 7) TRAP_IRQ(handler_irq, 8)
tl0_irq9: TRAP_IRQ(handler_irq, 9) TRAP_IRQ(handler_irq, 10)
@@ -78,9 +78,9 @@ tl0_vaw: TRAP(do_vaw)
tl0_cee: membar #Sync
TRAP_NOSAVE_7INSNS(__spitfire_cee_trap)
tl0_iamiss:
-#include "itlb_base.S"
+#include "itlb_miss.S"
tl0_damiss:
-#include "dtlb_base.S"
+#include "dtlb_miss.S"
tl0_daprot:
#include "dtlb_prot.S"
tl0_fecc: BTRAP(0x70) /* Fast-ECC on Cheetah */
@@ -88,15 +88,18 @@ tl0_dcpe: BTRAP(0x71) /* D-cache Parity Error on Cheetah+ */
tl0_icpe: BTRAP(0x72) /* I-cache Parity Error on Cheetah+ */
tl0_resv073: BTRAP(0x73) BTRAP(0x74) BTRAP(0x75)
tl0_resv076: BTRAP(0x76) BTRAP(0x77) BTRAP(0x78) BTRAP(0x79) BTRAP(0x7a) BTRAP(0x7b)
-tl0_resv07c: BTRAP(0x7c) BTRAP(0x7d) BTRAP(0x7e) BTRAP(0x7f)
+tl0_cpu_mondo: TRAP_NOSAVE(sun4v_cpu_mondo)
+tl0_dev_mondo: TRAP_NOSAVE(sun4v_dev_mondo)
+tl0_res_mondo: TRAP_NOSAVE(sun4v_res_mondo)
+tl0_nres_mondo: TRAP_NOSAVE(sun4v_nonres_mondo)
tl0_s0n: SPILL_0_NORMAL
tl0_s1n: SPILL_1_NORMAL
tl0_s2n: SPILL_2_NORMAL
-tl0_s3n: SPILL_3_NORMAL
-tl0_s4n: SPILL_4_NORMAL
-tl0_s5n: SPILL_5_NORMAL
-tl0_s6n: SPILL_6_NORMAL
-tl0_s7n: SPILL_7_NORMAL
+tl0_s3n: SPILL_0_NORMAL_ETRAP
+tl0_s4n: SPILL_1_GENERIC_ETRAP
+tl0_s5n: SPILL_1_GENERIC_ETRAP_FIXUP
+tl0_s6n: SPILL_2_GENERIC_ETRAP
+tl0_s7n: SPILL_2_GENERIC_ETRAP_FIXUP
tl0_s0o: SPILL_0_OTHER
tl0_s1o: SPILL_1_OTHER
tl0_s2o: SPILL_2_OTHER
@@ -110,9 +113,9 @@ tl0_f1n: FILL_1_NORMAL
tl0_f2n: FILL_2_NORMAL
tl0_f3n: FILL_3_NORMAL
tl0_f4n: FILL_4_NORMAL
-tl0_f5n: FILL_5_NORMAL
-tl0_f6n: FILL_6_NORMAL
-tl0_f7n: FILL_7_NORMAL
+tl0_f5n: FILL_0_NORMAL_RTRAP
+tl0_f6n: FILL_1_GENERIC_RTRAP
+tl0_f7n: FILL_2_GENERIC_RTRAP
tl0_f0o: FILL_0_OTHER
tl0_f1o: FILL_1_OTHER
tl0_f2o: FILL_2_OTHER
@@ -128,7 +131,7 @@ tl0_flushw: FLUSH_WINDOW_TRAP
tl0_resv104: BTRAP(0x104) BTRAP(0x105) BTRAP(0x106) BTRAP(0x107)
.globl tl0_solaris
tl0_solaris: SOLARIS_SYSCALL_TRAP
-tl0_netbsd: NETBSD_SYSCALL_TRAP
+tl0_resv109: BTRAP(0x109)
tl0_resv10a: BTRAP(0x10a) BTRAP(0x10b) BTRAP(0x10c) BTRAP(0x10d) BTRAP(0x10e)
tl0_resv10f: BTRAP(0x10f)
tl0_linux32: LINUX_32BIT_SYSCALL_TRAP
@@ -179,7 +182,7 @@ sparc64_ttable_tl1:
tl1_resv000: BOOT_KERNEL BTRAPTL1(0x1) BTRAPTL1(0x2) BTRAPTL1(0x3)
tl1_resv004: BTRAPTL1(0x4) BTRAPTL1(0x5) BTRAPTL1(0x6) BTRAPTL1(0x7)
tl1_iax: TRAP_NOSAVE(__spitfire_insn_access_exception_tl1)
-tl1_resv009: BTRAPTL1(0x9)
+tl1_itsb_4v: SUN4V_ITSB_MISS
tl1_iae: membar #Sync
TRAP_NOSAVE_7INSNS(__spitfire_access_error)
tl1_resv00b: BTRAPTL1(0xb) BTRAPTL1(0xc) BTRAPTL1(0xd) BTRAPTL1(0xe) BTRAPTL1(0xf)
@@ -198,7 +201,7 @@ tl1_div0: TRAPTL1(do_div0_tl1)
tl1_resv029: BTRAPTL1(0x29) BTRAPTL1(0x2a) BTRAPTL1(0x2b) BTRAPTL1(0x2c)
tl1_resv02d: BTRAPTL1(0x2d) BTRAPTL1(0x2e) BTRAPTL1(0x2f)
tl1_dax: TRAP_NOSAVE(__spitfire_data_access_exception_tl1)
-tl1_resv031: BTRAPTL1(0x31)
+tl1_dtsb_4v: SUN4V_DTSB_MISS
tl1_dae: membar #Sync
TRAP_NOSAVE_7INSNS(__spitfire_access_error)
tl1_resv033: BTRAPTL1(0x33)
@@ -222,26 +225,10 @@ tl1_resv05c: BTRAPTL1(0x5c) BTRAPTL1(0x5d) BTRAPTL1(0x5e) BTRAPTL1(0x5f)
tl1_ivec: TRAP_IVEC
tl1_paw: TRAPTL1(do_paw_tl1)
tl1_vaw: TRAPTL1(do_vaw_tl1)
-
- /* The grotty trick to save %g1 into current->thread.cee_stuff
- * is because when we take this trap we could be interrupting
- * trap code already using the trap alternate global registers.
- *
- * We cross our fingers and pray that this store/load does
- * not cause yet another CEE trap.
- */
-tl1_cee: membar #Sync
- stx %g1, [%g6 + TI_CEE_STUFF]
- ldxa [%g0] ASI_AFSR, %g1
- membar #Sync
- stxa %g1, [%g0] ASI_AFSR
- membar #Sync
- ldx [%g6 + TI_CEE_STUFF], %g1
- retry
-
+tl1_cee: BTRAPTL1(0x63)
tl1_iamiss: BTRAPTL1(0x64) BTRAPTL1(0x65) BTRAPTL1(0x66) BTRAPTL1(0x67)
tl1_damiss:
-#include "dtlb_backend.S"
+#include "dtlb_miss.S"
tl1_daprot:
#include "dtlb_prot.S"
tl1_fecc: BTRAPTL1(0x70) /* Fast-ECC on Cheetah */
diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c
index 70faf630603b..001e8518331f 100644
--- a/arch/sparc64/kernel/unaligned.c
+++ b/arch/sparc64/kernel/unaligned.c
@@ -277,7 +277,7 @@ static void kernel_mna_trap_fault(void)
regs->tstate |= (ASI_AIUS << 24UL);
}
-asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn, unsigned long sfar, unsigned long sfsr)
+asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
{
enum direction dir = decode_direction(insn);
int size = decode_access_size(insn);
@@ -405,6 +405,9 @@ extern void do_privact(struct pt_regs *regs);
extern void spitfire_data_access_exception(struct pt_regs *regs,
unsigned long sfsr,
unsigned long sfar);
+extern void sun4v_data_access_exception(struct pt_regs *regs,
+ unsigned long addr,
+ unsigned long type_ctx);
int handle_ldf_stq(u32 insn, struct pt_regs *regs)
{
@@ -447,14 +450,20 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs)
break;
}
default:
- spitfire_data_access_exception(regs, 0, addr);
+ if (tlb_type == hypervisor)
+ sun4v_data_access_exception(regs, addr, 0);
+ else
+ spitfire_data_access_exception(regs, 0, addr);
return 1;
}
if (put_user (first >> 32, (u32 __user *)addr) ||
__put_user ((u32)first, (u32 __user *)(addr + 4)) ||
__put_user (second >> 32, (u32 __user *)(addr + 8)) ||
__put_user ((u32)second, (u32 __user *)(addr + 12))) {
- spitfire_data_access_exception(regs, 0, addr);
+ if (tlb_type == hypervisor)
+ sun4v_data_access_exception(regs, addr, 0);
+ else
+ spitfire_data_access_exception(regs, 0, addr);
return 1;
}
} else {
@@ -467,7 +476,10 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs)
do_privact(regs);
return 1;
} else if (asi > ASI_SNFL) {
- spitfire_data_access_exception(regs, 0, addr);
+ if (tlb_type == hypervisor)
+ sun4v_data_access_exception(regs, addr, 0);
+ else
+ spitfire_data_access_exception(regs, 0, addr);
return 1;
}
switch (insn & 0x180000) {
@@ -484,7 +496,10 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs)
err |= __get_user (data[i], (u32 __user *)(addr + 4*i));
}
if (err && !(asi & 0x2 /* NF */)) {
- spitfire_data_access_exception(regs, 0, addr);
+ if (tlb_type == hypervisor)
+ sun4v_data_access_exception(regs, addr, 0);
+ else
+ spitfire_data_access_exception(regs, 0, addr);
return 1;
}
if (asi & 0x8) /* Little */ {
@@ -548,7 +563,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
u32 insn;
u32 first, second;
u64 value;
- u8 asi, freg;
+ u8 freg;
int flag;
struct fpustate *f = FPUSTATE;
@@ -557,7 +572,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
if (test_thread_flag(TIF_32BIT))
pc = (u32)pc;
if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
- asi = sfsr >> 16;
+ int asi = decode_asi(insn, regs);
if ((asi > ASI_SNFL) ||
(asi < ASI_P))
goto daex;
@@ -587,7 +602,11 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
*(u64 *)(f->regs + freg) = value;
current_thread_info()->fpsaved[0] |= flag;
} else {
-daex: spitfire_data_access_exception(regs, sfsr, sfar);
+daex:
+ if (tlb_type == hypervisor)
+ sun4v_data_access_exception(regs, sfar, sfsr);
+ else
+ spitfire_data_access_exception(regs, sfsr, sfar);
return;
}
advance(regs);
@@ -600,7 +619,7 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
unsigned long tstate = regs->tstate;
u32 insn;
u64 value;
- u8 asi, freg;
+ u8 freg;
int flag;
struct fpustate *f = FPUSTATE;
@@ -609,8 +628,8 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
if (test_thread_flag(TIF_32BIT))
pc = (u32)pc;
if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
+ int asi = decode_asi(insn, regs);
freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20);
- asi = sfsr >> 16;
value = 0;
flag = (freg < 32) ? FPRS_DL : FPRS_DU;
if ((asi > ASI_SNFL) ||
@@ -631,7 +650,11 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
__put_user ((u32)value, (u32 __user *)(sfar + 4)))
goto daex;
} else {
-daex: spitfire_data_access_exception(regs, sfsr, sfar);
+daex:
+ if (tlb_type == hypervisor)
+ sun4v_data_access_exception(regs, sfar, sfsr);
+ else
+ spitfire_data_access_exception(regs, sfsr, sfar);
return;
}
advance(regs);
diff --git a/arch/sparc64/kernel/us2e_cpufreq.c b/arch/sparc64/kernel/us2e_cpufreq.c
index b35dc8dc995a..1f83fe6a82d6 100644
--- a/arch/sparc64/kernel/us2e_cpufreq.c
+++ b/arch/sparc64/kernel/us2e_cpufreq.c
@@ -346,6 +346,9 @@ static int __init us2e_freq_init(void)
unsigned long manuf, impl, ver;
int ret;
+ if (tlb_type != spitfire)
+ return -ENODEV;
+
__asm__("rdpr %%ver, %0" : "=r" (ver));
manuf = ((ver >> 48) & 0xffff);
impl = ((ver >> 32) & 0xffff);
@@ -354,20 +357,16 @@ static int __init us2e_freq_init(void)
struct cpufreq_driver *driver;
ret = -ENOMEM;
- driver = kmalloc(sizeof(struct cpufreq_driver), GFP_KERNEL);
+ driver = kzalloc(sizeof(struct cpufreq_driver), GFP_KERNEL);
if (!driver)
goto err_out;
- memset(driver, 0, sizeof(*driver));
- us2e_freq_table = kmalloc(
+ us2e_freq_table = kzalloc(
(NR_CPUS * sizeof(struct us2e_freq_percpu_info)),
GFP_KERNEL);
if (!us2e_freq_table)
goto err_out;
- memset(us2e_freq_table, 0,
- (NR_CPUS * sizeof(struct us2e_freq_percpu_info)));
-
driver->init = us2e_freq_cpu_init;
driver->verify = us2e_freq_verify;
driver->target = us2e_freq_target;
diff --git a/arch/sparc64/kernel/us3_cpufreq.c b/arch/sparc64/kernel/us3_cpufreq.c
index 6d1f9a3c464f..47e3acafb5be 100644
--- a/arch/sparc64/kernel/us3_cpufreq.c
+++ b/arch/sparc64/kernel/us3_cpufreq.c
@@ -203,6 +203,9 @@ static int __init us3_freq_init(void)
unsigned long manuf, impl, ver;
int ret;
+ if (tlb_type != cheetah && tlb_type != cheetah_plus)
+ return -ENODEV;
+
__asm__("rdpr %%ver, %0" : "=r" (ver));
manuf = ((ver >> 48) & 0xffff);
impl = ((ver >> 32) & 0xffff);
@@ -215,20 +218,16 @@ static int __init us3_freq_init(void)
struct cpufreq_driver *driver;
ret = -ENOMEM;
- driver = kmalloc(sizeof(struct cpufreq_driver), GFP_KERNEL);
+ driver = kzalloc(sizeof(struct cpufreq_driver), GFP_KERNEL);
if (!driver)
goto err_out;
- memset(driver, 0, sizeof(*driver));
- us3_freq_table = kmalloc(
+ us3_freq_table = kzalloc(
(NR_CPUS * sizeof(struct us3_freq_percpu_info)),
GFP_KERNEL);
if (!us3_freq_table)
goto err_out;
- memset(us3_freq_table, 0,
- (NR_CPUS * sizeof(struct us3_freq_percpu_info)));
-
driver->init = us3_freq_cpu_init;
driver->verify = us3_freq_verify;
driver->target = us3_freq_target;
diff --git a/arch/sparc64/kernel/visemul.c b/arch/sparc64/kernel/visemul.c
new file mode 100644
index 000000000000..84fedaa38aae
--- /dev/null
+++ b/arch/sparc64/kernel/visemul.c
@@ -0,0 +1,894 @@
+/* visemul.c: Emulation of VIS instructions.
+ *
+ * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/thread_info.h>
+
+#include <asm/ptrace.h>
+#include <asm/pstate.h>
+#include <asm/system.h>
+#include <asm/fpumacro.h>
+#include <asm/uaccess.h>
+
+/* OPF field of various VIS instructions. */
+
+/* 000111011 - four 16-bit packs */
+#define FPACK16_OPF 0x03b
+
+/* 000111010 - two 32-bit packs */
+#define FPACK32_OPF 0x03a
+
+/* 000111101 - four 16-bit packs */
+#define FPACKFIX_OPF 0x03d
+
+/* 001001101 - four 16-bit expands */
+#define FEXPAND_OPF 0x04d
+
+/* 001001011 - two 32-bit merges */
+#define FPMERGE_OPF 0x04b
+
+/* 000110001 - 8-by-16-bit partitoned product */
+#define FMUL8x16_OPF 0x031
+
+/* 000110011 - 8-by-16-bit upper alpha partitioned product */
+#define FMUL8x16AU_OPF 0x033
+
+/* 000110101 - 8-by-16-bit lower alpha partitioned product */
+#define FMUL8x16AL_OPF 0x035
+
+/* 000110110 - upper 8-by-16-bit partitioned product */
+#define FMUL8SUx16_OPF 0x036
+
+/* 000110111 - lower 8-by-16-bit partitioned product */
+#define FMUL8ULx16_OPF 0x037
+
+/* 000111000 - upper 8-by-16-bit partitioned product */
+#define FMULD8SUx16_OPF 0x038
+
+/* 000111001 - lower unsigned 8-by-16-bit partitioned product */
+#define FMULD8ULx16_OPF 0x039
+
+/* 000101000 - four 16-bit compare; set rd if src1 > src2 */
+#define FCMPGT16_OPF 0x028
+
+/* 000101100 - two 32-bit compare; set rd if src1 > src2 */
+#define FCMPGT32_OPF 0x02c
+
+/* 000100000 - four 16-bit compare; set rd if src1 <= src2 */
+#define FCMPLE16_OPF 0x020
+
+/* 000100100 - two 32-bit compare; set rd if src1 <= src2 */
+#define FCMPLE32_OPF 0x024
+
+/* 000100010 - four 16-bit compare; set rd if src1 != src2 */
+#define FCMPNE16_OPF 0x022
+
+/* 000100110 - two 32-bit compare; set rd if src1 != src2 */
+#define FCMPNE32_OPF 0x026
+
+/* 000101010 - four 16-bit compare; set rd if src1 == src2 */
+#define FCMPEQ16_OPF 0x02a
+
+/* 000101110 - two 32-bit compare; set rd if src1 == src2 */
+#define FCMPEQ32_OPF 0x02e
+
+/* 000000000 - Eight 8-bit edge boundary processing */
+#define EDGE8_OPF 0x000
+
+/* 000000001 - Eight 8-bit edge boundary processing, no CC */
+#define EDGE8N_OPF 0x001
+
+/* 000000010 - Eight 8-bit edge boundary processing, little-endian */
+#define EDGE8L_OPF 0x002
+
+/* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */
+#define EDGE8LN_OPF 0x003
+
+/* 000000100 - Four 16-bit edge boundary processing */
+#define EDGE16_OPF 0x004
+
+/* 000000101 - Four 16-bit edge boundary processing, no CC */
+#define EDGE16N_OPF 0x005
+
+/* 000000110 - Four 16-bit edge boundary processing, little-endian */
+#define EDGE16L_OPF 0x006
+
+/* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */
+#define EDGE16LN_OPF 0x007
+
+/* 000001000 - Two 32-bit edge boundary processing */
+#define EDGE32_OPF 0x008
+
+/* 000001001 - Two 32-bit edge boundary processing, no CC */
+#define EDGE32N_OPF 0x009
+
+/* 000001010 - Two 32-bit edge boundary processing, little-endian */
+#define EDGE32L_OPF 0x00a
+
+/* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */
+#define EDGE32LN_OPF 0x00b
+
+/* 000111110 - distance between 8 8-bit components */
+#define PDIST_OPF 0x03e
+
+/* 000010000 - convert 8-bit 3-D address to blocked byte address */
+#define ARRAY8_OPF 0x010
+
+/* 000010010 - convert 16-bit 3-D address to blocked byte address */
+#define ARRAY16_OPF 0x012
+
+/* 000010100 - convert 32-bit 3-D address to blocked byte address */
+#define ARRAY32_OPF 0x014
+
+/* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */
+#define BMASK_OPF 0x019
+
+/* 001001100 - Permute bytes as specified by GSR.MASK */
+#define BSHUFFLE_OPF 0x04c
+
+#define VIS_OPCODE_MASK ((0x3 << 30) | (0x3f << 19))
+#define VIS_OPCODE_VAL ((0x2 << 30) | (0x36 << 19))
+
+#define VIS_OPF_SHIFT 5
+#define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT)
+
+#define RS1(INSN) (((INSN) >> 24) & 0x1f)
+#define RS2(INSN) (((INSN) >> 0) & 0x1f)
+#define RD(INSN) (((INSN) >> 25) & 0x1f)
+
+static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
+ unsigned int rd, int from_kernel)
+{
+ if (rs2 >= 16 || rs1 >= 16 || rd >= 16) {
+ if (from_kernel != 0)
+ __asm__ __volatile__("flushw");
+ else
+ flushw_user();
+ }
+}
+
+static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
+{
+ unsigned long value;
+
+ if (reg < 16)
+ return (!reg ? 0 : regs->u_regs[reg]);
+ if (regs->tstate & TSTATE_PRIV) {
+ struct reg_window *win;
+ win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+ value = win->locals[reg - 16];
+ } else if (test_thread_flag(TIF_32BIT)) {
+ struct reg_window32 __user *win32;
+ win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
+ get_user(value, &win32->locals[reg - 16]);
+ } else {
+ struct reg_window __user *win;
+ win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+ get_user(value, &win->locals[reg - 16]);
+ }
+ return value;
+}
+
+static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg,
+ struct pt_regs *regs)
+{
+ BUG_ON(reg < 16);
+ BUG_ON(regs->tstate & TSTATE_PRIV);
+
+ if (test_thread_flag(TIF_32BIT)) {
+ struct reg_window32 __user *win32;
+ win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
+ return (unsigned long __user *)&win32->locals[reg - 16];
+ } else {
+ struct reg_window __user *win;
+ win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+ return &win->locals[reg - 16];
+ }
+}
+
+static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg,
+ struct pt_regs *regs)
+{
+ BUG_ON(reg >= 16);
+ BUG_ON(regs->tstate & TSTATE_PRIV);
+
+ return &regs->u_regs[reg];
+}
+
+static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd)
+{
+ if (rd < 16) {
+ unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs);
+
+ *rd_kern = val;
+ } else {
+ unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs);
+
+ if (test_thread_flag(TIF_32BIT))
+ __put_user((u32)val, (u32 __user *)rd_user);
+ else
+ __put_user(val, rd_user);
+ }
+}
+
+static inline unsigned long fpd_regval(struct fpustate *f,
+ unsigned int insn_regnum)
+{
+ insn_regnum = (((insn_regnum & 1) << 5) |
+ (insn_regnum & 0x1e));
+
+ return *(unsigned long *) &f->regs[insn_regnum];
+}
+
+static inline unsigned long *fpd_regaddr(struct fpustate *f,
+ unsigned int insn_regnum)
+{
+ insn_regnum = (((insn_regnum & 1) << 5) |
+ (insn_regnum & 0x1e));
+
+ return (unsigned long *) &f->regs[insn_regnum];
+}
+
+static inline unsigned int fps_regval(struct fpustate *f,
+ unsigned int insn_regnum)
+{
+ return f->regs[insn_regnum];
+}
+
+static inline unsigned int *fps_regaddr(struct fpustate *f,
+ unsigned int insn_regnum)
+{
+ return &f->regs[insn_regnum];
+}
+
+struct edge_tab {
+ u16 left, right;
+};
+struct edge_tab edge8_tab[8] = {
+ { 0xff, 0x80 },
+ { 0x7f, 0xc0 },
+ { 0x3f, 0xe0 },
+ { 0x1f, 0xf0 },
+ { 0x0f, 0xf8 },
+ { 0x07, 0xfc },
+ { 0x03, 0xfe },
+ { 0x01, 0xff },
+};
+struct edge_tab edge8_tab_l[8] = {
+ { 0xff, 0x01 },
+ { 0xfe, 0x03 },
+ { 0xfc, 0x07 },
+ { 0xf8, 0x0f },
+ { 0xf0, 0x1f },
+ { 0xe0, 0x3f },
+ { 0xc0, 0x7f },
+ { 0x80, 0xff },
+};
+struct edge_tab edge16_tab[4] = {
+ { 0xf, 0x8 },
+ { 0x7, 0xc },
+ { 0x3, 0xe },
+ { 0x1, 0xf },
+};
+struct edge_tab edge16_tab_l[4] = {
+ { 0xf, 0x1 },
+ { 0xe, 0x3 },
+ { 0xc, 0x7 },
+ { 0x8, 0xf },
+};
+struct edge_tab edge32_tab[2] = {
+ { 0x3, 0x2 },
+ { 0x1, 0x3 },
+};
+struct edge_tab edge32_tab_l[2] = {
+ { 0x3, 0x1 },
+ { 0x2, 0x3 },
+};
+
+static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf)
+{
+ unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val;
+ u16 left, right;
+
+ maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
+ orig_rs1 = rs1 = fetch_reg(RS1(insn), regs);
+ orig_rs2 = rs2 = fetch_reg(RS2(insn), regs);
+
+ if (test_thread_flag(TIF_32BIT)) {
+ rs1 = rs1 & 0xffffffff;
+ rs2 = rs2 & 0xffffffff;
+ }
+ switch (opf) {
+ default:
+ case EDGE8_OPF:
+ case EDGE8N_OPF:
+ left = edge8_tab[rs1 & 0x7].left;
+ right = edge8_tab[rs2 & 0x7].right;
+ break;
+ case EDGE8L_OPF:
+ case EDGE8LN_OPF:
+ left = edge8_tab_l[rs1 & 0x7].left;
+ right = edge8_tab_l[rs2 & 0x7].right;
+ break;
+
+ case EDGE16_OPF:
+ case EDGE16N_OPF:
+ left = edge16_tab[(rs1 >> 1) & 0x3].left;
+ right = edge16_tab[(rs2 >> 1) & 0x3].right;
+ break;
+
+ case EDGE16L_OPF:
+ case EDGE16LN_OPF:
+ left = edge16_tab_l[(rs1 >> 1) & 0x3].left;
+ right = edge16_tab_l[(rs2 >> 1) & 0x3].right;
+ break;
+
+ case EDGE32_OPF:
+ case EDGE32N_OPF:
+ left = edge32_tab[(rs1 >> 2) & 0x1].left;
+ right = edge32_tab[(rs2 >> 2) & 0x1].right;
+ break;
+
+ case EDGE32L_OPF:
+ case EDGE32LN_OPF:
+ left = edge32_tab_l[(rs1 >> 2) & 0x1].left;
+ right = edge32_tab_l[(rs2 >> 2) & 0x1].right;
+ break;
+ };
+
+ if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL))
+ rd_val = right & left;
+ else
+ rd_val = left;
+
+ store_reg(regs, rd_val, RD(insn));
+
+ switch (opf) {
+ case EDGE8_OPF:
+ case EDGE8L_OPF:
+ case EDGE16_OPF:
+ case EDGE16L_OPF:
+ case EDGE32_OPF:
+ case EDGE32L_OPF: {
+ unsigned long ccr, tstate;
+
+ __asm__ __volatile__("subcc %1, %2, %%g0\n\t"
+ "rd %%ccr, %0"
+ : "=r" (ccr)
+ : "r" (orig_rs1), "r" (orig_rs2)
+ : "cc");
+ tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC);
+ regs->tstate = tstate | (ccr << 32UL);
+ }
+ };
+}
+
+static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf)
+{
+ unsigned long rs1, rs2, rd_val;
+ unsigned int bits, bits_mask;
+
+ maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
+ rs1 = fetch_reg(RS1(insn), regs);
+ rs2 = fetch_reg(RS2(insn), regs);
+
+ bits = (rs2 > 5 ? 5 : rs2);
+ bits_mask = (1UL << bits) - 1UL;
+
+ rd_val = ((((rs1 >> 11) & 0x3) << 0) |
+ (((rs1 >> 33) & 0x3) << 2) |
+ (((rs1 >> 55) & 0x1) << 4) |
+ (((rs1 >> 13) & 0xf) << 5) |
+ (((rs1 >> 35) & 0xf) << 9) |
+ (((rs1 >> 56) & 0xf) << 13) |
+ (((rs1 >> 17) & bits_mask) << 17) |
+ (((rs1 >> 39) & bits_mask) << (17 + bits)) |
+ (((rs1 >> 60) & 0xf) << (17 + (2*bits))));
+
+ switch (opf) {
+ case ARRAY16_OPF:
+ rd_val <<= 1;
+ break;
+
+ case ARRAY32_OPF:
+ rd_val <<= 2;
+ };
+
+ store_reg(regs, rd_val, RD(insn));
+}
+
+static void bmask(struct pt_regs *regs, unsigned int insn)
+{
+ unsigned long rs1, rs2, rd_val, gsr;
+
+ maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
+ rs1 = fetch_reg(RS1(insn), regs);
+ rs2 = fetch_reg(RS2(insn), regs);
+ rd_val = rs1 + rs2;
+
+ store_reg(regs, rd_val, RD(insn));
+
+ gsr = current_thread_info()->gsr[0] & 0xffffffff;
+ gsr |= rd_val << 32UL;
+ current_thread_info()->gsr[0] = gsr;
+}
+
+static void bshuffle(struct pt_regs *regs, unsigned int insn)
+{
+ struct fpustate *f = FPUSTATE;
+ unsigned long rs1, rs2, rd_val;
+ unsigned long bmask, i;
+
+ bmask = current_thread_info()->gsr[0] >> 32UL;
+
+ rs1 = fpd_regval(f, RS1(insn));
+ rs2 = fpd_regval(f, RS2(insn));
+
+ rd_val = 0UL;
+ for (i = 0; i < 8; i++) {
+ unsigned long which = (bmask >> (i * 4)) & 0xf;
+ unsigned long byte;
+
+ if (which < 8)
+ byte = (rs1 >> (which * 8)) & 0xff;
+ else
+ byte = (rs2 >> ((which-8)*8)) & 0xff;
+ rd_val |= (byte << (i * 8));
+ }
+
+ *fpd_regaddr(f, RD(insn)) = rd_val;
+}
+
+static void pdist(struct pt_regs *regs, unsigned int insn)
+{
+ struct fpustate *f = FPUSTATE;
+ unsigned long rs1, rs2, *rd, rd_val;
+ unsigned long i;
+
+ rs1 = fpd_regval(f, RS1(insn));
+ rs2 = fpd_regval(f, RS1(insn));
+ rd = fpd_regaddr(f, RD(insn));
+
+ rd_val = *rd;
+
+ for (i = 0; i < 8; i++) {
+ s16 s1, s2;
+
+ s1 = (rs1 >> (56 - (i * 8))) & 0xff;
+ s2 = (rs2 >> (56 - (i * 8))) & 0xff;
+
+ /* Absolute value of difference. */
+ s1 -= s2;
+ if (s1 < 0)
+ s1 = ~s1 + 1;
+
+ rd_val += s1;
+ }
+
+ *rd = rd_val;
+}
+
+static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf)
+{
+ struct fpustate *f = FPUSTATE;
+ unsigned long rs1, rs2, gsr, scale, rd_val;
+
+ gsr = current_thread_info()->gsr[0];
+ scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f);
+ switch (opf) {
+ case FPACK16_OPF: {
+ unsigned long byte;
+
+ rs2 = fpd_regval(f, RS2(insn));
+ rd_val = 0;
+ for (byte = 0; byte < 4; byte++) {
+ unsigned int val;
+ s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL;
+ int scaled = src << scale;
+ int from_fixed = scaled >> 7;
+
+ val = ((from_fixed < 0) ?
+ 0 :
+ (from_fixed > 255) ?
+ 255 : from_fixed);
+
+ rd_val |= (val << (8 * byte));
+ }
+ *fps_regaddr(f, RD(insn)) = rd_val;
+ break;
+ }
+
+ case FPACK32_OPF: {
+ unsigned long word;
+
+ rs1 = fpd_regval(f, RS1(insn));
+ rs2 = fpd_regval(f, RS2(insn));
+ rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL);
+ for (word = 0; word < 2; word++) {
+ unsigned long val;
+ s32 src = (rs2 >> (word * 32UL));
+ s64 scaled = src << scale;
+ s64 from_fixed = scaled >> 23;
+
+ val = ((from_fixed < 0) ?
+ 0 :
+ (from_fixed > 255) ?
+ 255 : from_fixed);
+
+ rd_val |= (val << (32 * word));
+ }
+ *fpd_regaddr(f, RD(insn)) = rd_val;
+ break;
+ }
+
+ case FPACKFIX_OPF: {
+ unsigned long word;
+
+ rs2 = fpd_regval(f, RS2(insn));
+
+ rd_val = 0;
+ for (word = 0; word < 2; word++) {
+ long val;
+ s32 src = (rs2 >> (word * 32UL));
+ s64 scaled = src << scale;
+ s64 from_fixed = scaled >> 16;
+
+ val = ((from_fixed < -32768) ?
+ -32768 :
+ (from_fixed > 32767) ?
+ 32767 : from_fixed);
+
+ rd_val |= ((val & 0xffff) << (word * 16));
+ }
+ *fps_regaddr(f, RD(insn)) = rd_val;
+ break;
+ }
+
+ case FEXPAND_OPF: {
+ unsigned long byte;
+
+ rs2 = fps_regval(f, RS2(insn));
+
+ rd_val = 0;
+ for (byte = 0; byte < 4; byte++) {
+ unsigned long val;
+ u8 src = (rs2 >> (byte * 8)) & 0xff;
+
+ val = src << 4;
+
+ rd_val |= (val << (byte * 16));
+ }
+ *fpd_regaddr(f, RD(insn)) = rd_val;
+ break;
+ }
+
+ case FPMERGE_OPF: {
+ rs1 = fps_regval(f, RS1(insn));
+ rs2 = fps_regval(f, RS2(insn));
+
+ rd_val = (((rs2 & 0x000000ff) << 0) |
+ ((rs1 & 0x000000ff) << 8) |
+ ((rs2 & 0x0000ff00) << 8) |
+ ((rs1 & 0x0000ff00) << 16) |
+ ((rs2 & 0x00ff0000) << 16) |
+ ((rs1 & 0x00ff0000) << 24) |
+ ((rs2 & 0xff000000) << 24) |
+ ((rs1 & 0xff000000) << 32));
+ *fpd_regaddr(f, RD(insn)) = rd_val;
+ break;
+ }
+ };
+}
+
+static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf)
+{
+ struct fpustate *f = FPUSTATE;
+ unsigned long rs1, rs2, rd_val;
+
+ switch (opf) {
+ case FMUL8x16_OPF: {
+ unsigned long byte;
+
+ rs1 = fps_regval(f, RS1(insn));
+ rs2 = fpd_regval(f, RS2(insn));
+
+ rd_val = 0;
+ for (byte = 0; byte < 4; byte++) {
+ u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
+ s16 src2 = (rs2 >> (byte * 16)) & 0xffff;
+ u32 prod = src1 * src2;
+ u16 scaled = ((prod & 0x00ffff00) >> 8);
+
+ /* Round up. */
+ if (prod & 0x80)
+ scaled++;
+ rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
+ }
+
+ *fpd_regaddr(f, RD(insn)) = rd_val;
+ break;
+ }
+
+ case FMUL8x16AU_OPF:
+ case FMUL8x16AL_OPF: {
+ unsigned long byte;
+ s16 src2;
+
+ rs1 = fps_regval(f, RS1(insn));
+ rs2 = fps_regval(f, RS2(insn));
+
+ rd_val = 0;
+ src2 = (rs2 >> (opf == FMUL8x16AU_OPF) ? 16 : 0);
+ for (byte = 0; byte < 4; byte++) {
+ u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
+ u32 prod = src1 * src2;
+ u16 scaled = ((prod & 0x00ffff00) >> 8);
+
+ /* Round up. */
+ if (prod & 0x80)
+ scaled++;
+ rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
+ }
+
+ *fpd_regaddr(f, RD(insn)) = rd_val;
+ break;
+ }
+
+ case FMUL8SUx16_OPF:
+ case FMUL8ULx16_OPF: {
+ unsigned long byte, ushift;
+
+ rs1 = fpd_regval(f, RS1(insn));
+ rs2 = fpd_regval(f, RS2(insn));
+
+ rd_val = 0;
+ ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0;
+ for (byte = 0; byte < 4; byte++) {
+ u16 src1;
+ s16 src2;
+ u32 prod;
+ u16 scaled;
+
+ src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
+ src2 = ((rs2 >> (16 * byte)) & 0xffff);
+ prod = src1 * src2;
+ scaled = ((prod & 0x00ffff00) >> 8);
+
+ /* Round up. */
+ if (prod & 0x80)
+ scaled++;
+ rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
+ }
+
+ *fpd_regaddr(f, RD(insn)) = rd_val;
+ break;
+ }
+
+ case FMULD8SUx16_OPF:
+ case FMULD8ULx16_OPF: {
+ unsigned long byte, ushift;
+
+ rs1 = fps_regval(f, RS1(insn));
+ rs2 = fps_regval(f, RS2(insn));
+
+ rd_val = 0;
+ ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0;
+ for (byte = 0; byte < 2; byte++) {
+ u16 src1;
+ s16 src2;
+ u32 prod;
+ u16 scaled;
+
+ src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
+ src2 = ((rs2 >> (16 * byte)) & 0xffff);
+ prod = src1 * src2;
+ scaled = ((prod & 0x00ffff00) >> 8);
+
+ /* Round up. */
+ if (prod & 0x80)
+ scaled++;
+ rd_val |= ((scaled & 0xffffUL) <<
+ ((byte * 32UL) + 7UL));
+ }
+ *fpd_regaddr(f, RD(insn)) = rd_val;
+ break;
+ }
+ };
+}
+
+static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
+{
+ struct fpustate *f = FPUSTATE;
+ unsigned long rs1, rs2, rd_val, i;
+
+ rs1 = fpd_regval(f, RS1(insn));
+ rs2 = fpd_regval(f, RS2(insn));
+
+ rd_val = 0;
+
+ switch (opf) {
+ case FCMPGT16_OPF:
+ for (i = 0; i < 4; i++) {
+ s16 a = (rs1 >> (i * 16)) & 0xffff;
+ s16 b = (rs2 >> (i * 16)) & 0xffff;
+
+ if (a > b)
+ rd_val |= 1 << i;
+ }
+ break;
+
+ case FCMPGT32_OPF:
+ for (i = 0; i < 2; i++) {
+ s32 a = (rs1 >> (i * 32)) & 0xffff;
+ s32 b = (rs2 >> (i * 32)) & 0xffff;
+
+ if (a > b)
+ rd_val |= 1 << i;
+ }
+ break;
+
+ case FCMPLE16_OPF:
+ for (i = 0; i < 4; i++) {
+ s16 a = (rs1 >> (i * 16)) & 0xffff;
+ s16 b = (rs2 >> (i * 16)) & 0xffff;
+
+ if (a <= b)
+ rd_val |= 1 << i;
+ }
+ break;
+
+ case FCMPLE32_OPF:
+ for (i = 0; i < 2; i++) {
+ s32 a = (rs1 >> (i * 32)) & 0xffff;
+ s32 b = (rs2 >> (i * 32)) & 0xffff;
+
+ if (a <= b)
+ rd_val |= 1 << i;
+ }
+ break;
+
+ case FCMPNE16_OPF:
+ for (i = 0; i < 4; i++) {
+ s16 a = (rs1 >> (i * 16)) & 0xffff;
+ s16 b = (rs2 >> (i * 16)) & 0xffff;
+
+ if (a != b)
+ rd_val |= 1 << i;
+ }
+ break;
+
+ case FCMPNE32_OPF:
+ for (i = 0; i < 2; i++) {
+ s32 a = (rs1 >> (i * 32)) & 0xffff;
+ s32 b = (rs2 >> (i * 32)) & 0xffff;
+
+ if (a != b)
+ rd_val |= 1 << i;
+ }
+ break;
+
+ case FCMPEQ16_OPF:
+ for (i = 0; i < 4; i++) {
+ s16 a = (rs1 >> (i * 16)) & 0xffff;
+ s16 b = (rs2 >> (i * 16)) & 0xffff;
+
+ if (a == b)
+ rd_val |= 1 << i;
+ }
+ break;
+
+ case FCMPEQ32_OPF:
+ for (i = 0; i < 2; i++) {
+ s32 a = (rs1 >> (i * 32)) & 0xffff;
+ s32 b = (rs2 >> (i * 32)) & 0xffff;
+
+ if (a == b)
+ rd_val |= 1 << i;
+ }
+ break;
+ };
+
+ maybe_flush_windows(0, 0, RD(insn), 0);
+ store_reg(regs, rd_val, RD(insn));
+}
+
+/* Emulate the VIS instructions which are not implemented in
+ * hardware on Niagara.
+ */
+int vis_emul(struct pt_regs *regs, unsigned int insn)
+{
+ unsigned long pc = regs->tpc;
+ unsigned int opf;
+
+ BUG_ON(regs->tstate & TSTATE_PRIV);
+
+ if (test_thread_flag(TIF_32BIT))
+ pc = (u32)pc;
+
+ if (get_user(insn, (u32 __user *) pc))
+ return -EFAULT;
+
+ if ((insn & VIS_OPCODE_MASK) != VIS_OPCODE_VAL)
+ return -EINVAL;
+
+ opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT;
+ switch (opf) {
+ default:
+ return -EINVAL;
+
+ /* Pixel Formatting Instructions. */
+ case FPACK16_OPF:
+ case FPACK32_OPF:
+ case FPACKFIX_OPF:
+ case FEXPAND_OPF:
+ case FPMERGE_OPF:
+ pformat(regs, insn, opf);
+ break;
+
+ /* Partitioned Multiply Instructions */
+ case FMUL8x16_OPF:
+ case FMUL8x16AU_OPF:
+ case FMUL8x16AL_OPF:
+ case FMUL8SUx16_OPF:
+ case FMUL8ULx16_OPF:
+ case FMULD8SUx16_OPF:
+ case FMULD8ULx16_OPF:
+ pmul(regs, insn, opf);
+ break;
+
+ /* Pixel Compare Instructions */
+ case FCMPGT16_OPF:
+ case FCMPGT32_OPF:
+ case FCMPLE16_OPF:
+ case FCMPLE32_OPF:
+ case FCMPNE16_OPF:
+ case FCMPNE32_OPF:
+ case FCMPEQ16_OPF:
+ case FCMPEQ32_OPF:
+ pcmp(regs, insn, opf);
+ break;
+
+ /* Edge Handling Instructions */
+ case EDGE8_OPF:
+ case EDGE8N_OPF:
+ case EDGE8L_OPF:
+ case EDGE8LN_OPF:
+ case EDGE16_OPF:
+ case EDGE16N_OPF:
+ case EDGE16L_OPF:
+ case EDGE16LN_OPF:
+ case EDGE32_OPF:
+ case EDGE32N_OPF:
+ case EDGE32L_OPF:
+ case EDGE32LN_OPF:
+ edge(regs, insn, opf);
+ break;
+
+ /* Pixel Component Distance */
+ case PDIST_OPF:
+ pdist(regs, insn);
+ break;
+
+ /* Three-Dimensional Array Addressing Instructions */
+ case ARRAY8_OPF:
+ case ARRAY16_OPF:
+ case ARRAY32_OPF:
+ array(regs, insn, opf);
+ break;
+
+ /* Byte Mask and Shuffle Instructions */
+ case BMASK_OPF:
+ bmask(regs, insn);
+ break;
+
+ case BSHUFFLE_OPF:
+ bshuffle(regs, insn);
+ break;
+ };
+
+ regs->tpc = regs->tnpc;
+ regs->tnpc += 4;
+ return 0;
+}
diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S
index 467d13a0d5c1..b097379a49a8 100644
--- a/arch/sparc64/kernel/vmlinux.lds.S
+++ b/arch/sparc64/kernel/vmlinux.lds.S
@@ -70,6 +70,22 @@ SECTIONS
.con_initcall.init : { *(.con_initcall.init) }
__con_initcall_end = .;
SECURITY_INIT
+ . = ALIGN(4);
+ __tsb_ldquad_phys_patch = .;
+ .tsb_ldquad_phys_patch : { *(.tsb_ldquad_phys_patch) }
+ __tsb_ldquad_phys_patch_end = .;
+ __tsb_phys_patch = .;
+ .tsb_phys_patch : { *(.tsb_phys_patch) }
+ __tsb_phys_patch_end = .;
+ __cpuid_patch = .;
+ .cpuid_patch : { *(.cpuid_patch) }
+ __cpuid_patch_end = .;
+ __sun4v_1insn_patch = .;
+ .sun4v_1insn_patch : { *(.sun4v_1insn_patch) }
+ __sun4v_1insn_patch_end = .;
+ __sun4v_2insn_patch = .;
+ .sun4v_2insn_patch : { *(.sun4v_2insn_patch) }
+ __sun4v_2insn_patch_end = .;
. = ALIGN(8192);
__initramfs_start = .;
.init.ramfs : { *(.init.ramfs) }
diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S
index 39160926267b..c4aa110a10e5 100644
--- a/arch/sparc64/kernel/winfixup.S
+++ b/arch/sparc64/kernel/winfixup.S
@@ -1,8 +1,6 @@
-/* $Id: winfixup.S,v 1.30 2002/02/09 19:49:30 davem Exp $
+/* winfixup.S: Handle cases where user stack pointer is found to be bogus.
*
- * winfixup.S: Handle cases where user stack pointer is found to be bogus.
- *
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1997, 2006 David S. Miller (davem@davemloft.net)
*/
#include <asm/asi.h>
@@ -15,374 +13,144 @@
.text
-set_pcontext:
- sethi %hi(sparc64_kern_pri_context), %l1
- ldx [%l1 + %lo(sparc64_kern_pri_context)], %l1
- mov PRIMARY_CONTEXT, %g1
- stxa %l1, [%g1] ASI_DMMU
- flush %g6
- retl
- nop
+ /* It used to be the case that these register window fault
+ * handlers could run via the save and restore instructions
+ * done by the trap entry and exit code. They now do the
+ * window spill/fill by hand, so that case no longer can occur.
+ */
.align 32
-
- /* Here are the rules, pay attention.
- *
- * The kernel is disallowed from touching user space while
- * the trap level is greater than zero, except for from within
- * the window spill/fill handlers. This must be followed
- * so that we can easily detect the case where we tried to
- * spill/fill with a bogus (or unmapped) user stack pointer.
- *
- * These are layed out in a special way for cache reasons,
- * don't touch...
- */
- .globl fill_fixup, spill_fixup
fill_fixup:
- rdpr %tstate, %g1
- andcc %g1, TSTATE_PRIV, %g0
- or %g4, FAULT_CODE_WINFIXUP, %g4
- be,pt %xcc, window_scheisse_from_user_common
- and %g1, TSTATE_CWP, %g1
-
- /* This is the extremely complex case, but it does happen from
- * time to time if things are just right. Essentially the restore
- * done in rtrap right before going back to user mode, with tl=1
- * and that levels trap stack registers all setup, took a fill trap,
- * the user stack was not mapped in the tlb, and tlb miss occurred,
- * the pte found was not valid, and a simple ref bit watch update
- * could not satisfy the miss, so we got here.
- *
- * We must carefully unwind the state so we get back to tl=0, preserve
- * all the register values we were going to give to the user. Luckily
- * most things are where they need to be, we also have the address
- * which triggered the fault handy as well.
- *
- * Also note that we must preserve %l5 and %l6. If the user was
- * returning from a system call, we must make it look this way
- * after we process the fill fault on the users stack.
- *
- * First, get into the window where the original restore was executed.
- */
-
- rdpr %wstate, %g2 ! Grab user mode wstate.
- wrpr %g1, %cwp ! Get into the right window.
- sll %g2, 3, %g2 ! NORMAL-->OTHER
-
- wrpr %g0, 0x0, %canrestore ! Standard etrap stuff.
- wrpr %g2, 0x0, %wstate ! This must be consistent.
- wrpr %g0, 0x0, %otherwin ! We know this.
- call set_pcontext ! Change contexts...
+ TRAP_LOAD_THREAD_REG(%g6, %g1)
+ rdpr %tstate, %g1
+ and %g1, TSTATE_CWP, %g1
+ or %g4, FAULT_CODE_WINFIXUP, %g4
+ stb %g4, [%g6 + TI_FAULT_CODE]
+ stx %g5, [%g6 + TI_FAULT_ADDR]
+ wrpr %g1, %cwp
+ ba,pt %xcc, etrap
+ rd %pc, %g7
+ call do_sparc64_fault
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap_clr_l6
nop
- rdpr %pstate, %l1 ! Prepare to change globals.
- mov %g6, %o7 ! Get current.
-
- andn %l1, PSTATE_MM, %l1 ! We want to be in RMO
- stb %g4, [%g6 + TI_FAULT_CODE]
- stx %g5, [%g6 + TI_FAULT_ADDR]
- wrpr %g0, 0x0, %tl ! Out of trap levels.
- wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
- mov %o7, %g6
- ldx [%g6 + TI_TASK], %g4
-#ifdef CONFIG_SMP
- mov TSB_REG, %g1
- ldxa [%g1] ASI_IMMU, %g5
-#endif
- /* This is the same as below, except we handle this a bit special
- * since we must preserve %l5 and %l6, see comment above.
- */
- call do_sparc64_fault
- add %sp, PTREGS_OFF, %o0
- ba,pt %xcc, rtrap
- nop ! yes, nop is correct
-
- /* Be very careful about usage of the alternate globals here.
- * You cannot touch %g4/%g5 as that has the fault information
- * should this be from usermode. Also be careful for the case
- * where we get here from the save instruction in etrap.S when
- * coming from either user or kernel (does not matter which, it
- * is the same problem in both cases). Essentially this means
- * do not touch %g7 or %g2 so we handle the two cases fine.
+ /* Be very careful about usage of the trap globals here.
+ * You cannot touch %g5 as that has the fault information.
*/
spill_fixup:
- ldx [%g6 + TI_FLAGS], %g1
- andcc %g1, _TIF_32BIT, %g0
- ldub [%g6 + TI_WSAVED], %g1
-
- sll %g1, 3, %g3
- add %g6, %g3, %g3
- stx %sp, [%g3 + TI_RWIN_SPTRS]
- sll %g1, 7, %g3
- bne,pt %xcc, 1f
- add %g6, %g3, %g3
- stx %l0, [%g3 + TI_REG_WINDOW + 0x00]
- stx %l1, [%g3 + TI_REG_WINDOW + 0x08]
-
- stx %l2, [%g3 + TI_REG_WINDOW + 0x10]
- stx %l3, [%g3 + TI_REG_WINDOW + 0x18]
- stx %l4, [%g3 + TI_REG_WINDOW + 0x20]
- stx %l5, [%g3 + TI_REG_WINDOW + 0x28]
- stx %l6, [%g3 + TI_REG_WINDOW + 0x30]
- stx %l7, [%g3 + TI_REG_WINDOW + 0x38]
- stx %i0, [%g3 + TI_REG_WINDOW + 0x40]
- stx %i1, [%g3 + TI_REG_WINDOW + 0x48]
-
- stx %i2, [%g3 + TI_REG_WINDOW + 0x50]
- stx %i3, [%g3 + TI_REG_WINDOW + 0x58]
- stx %i4, [%g3 + TI_REG_WINDOW + 0x60]
- stx %i5, [%g3 + TI_REG_WINDOW + 0x68]
- stx %i6, [%g3 + TI_REG_WINDOW + 0x70]
- b,pt %xcc, 2f
- stx %i7, [%g3 + TI_REG_WINDOW + 0x78]
-1: stw %l0, [%g3 + TI_REG_WINDOW + 0x00]
-
- stw %l1, [%g3 + TI_REG_WINDOW + 0x04]
- stw %l2, [%g3 + TI_REG_WINDOW + 0x08]
- stw %l3, [%g3 + TI_REG_WINDOW + 0x0c]
- stw %l4, [%g3 + TI_REG_WINDOW + 0x10]
- stw %l5, [%g3 + TI_REG_WINDOW + 0x14]
- stw %l6, [%g3 + TI_REG_WINDOW + 0x18]
- stw %l7, [%g3 + TI_REG_WINDOW + 0x1c]
- stw %i0, [%g3 + TI_REG_WINDOW + 0x20]
-
- stw %i1, [%g3 + TI_REG_WINDOW + 0x24]
- stw %i2, [%g3 + TI_REG_WINDOW + 0x28]
- stw %i3, [%g3 + TI_REG_WINDOW + 0x2c]
- stw %i4, [%g3 + TI_REG_WINDOW + 0x30]
- stw %i5, [%g3 + TI_REG_WINDOW + 0x34]
- stw %i6, [%g3 + TI_REG_WINDOW + 0x38]
- stw %i7, [%g3 + TI_REG_WINDOW + 0x3c]
-2: add %g1, 1, %g1
-
- stb %g1, [%g6 + TI_WSAVED]
- rdpr %tstate, %g1
- andcc %g1, TSTATE_PRIV, %g0
+spill_fixup_mna:
+spill_fixup_dax:
+ TRAP_LOAD_THREAD_REG(%g6, %g1)
+ ldx [%g6 + TI_FLAGS], %g1
+ andcc %g1, _TIF_32BIT, %g0
+ ldub [%g6 + TI_WSAVED], %g1
+ sll %g1, 3, %g3
+ add %g6, %g3, %g3
+ stx %sp, [%g3 + TI_RWIN_SPTRS]
+ sll %g1, 7, %g3
+ bne,pt %xcc, 1f
+ add %g6, %g3, %g3
+ stx %l0, [%g3 + TI_REG_WINDOW + 0x00]
+ stx %l1, [%g3 + TI_REG_WINDOW + 0x08]
+ stx %l2, [%g3 + TI_REG_WINDOW + 0x10]
+ stx %l3, [%g3 + TI_REG_WINDOW + 0x18]
+ stx %l4, [%g3 + TI_REG_WINDOW + 0x20]
+ stx %l5, [%g3 + TI_REG_WINDOW + 0x28]
+ stx %l6, [%g3 + TI_REG_WINDOW + 0x30]
+ stx %l7, [%g3 + TI_REG_WINDOW + 0x38]
+ stx %i0, [%g3 + TI_REG_WINDOW + 0x40]
+ stx %i1, [%g3 + TI_REG_WINDOW + 0x48]
+ stx %i2, [%g3 + TI_REG_WINDOW + 0x50]
+ stx %i3, [%g3 + TI_REG_WINDOW + 0x58]
+ stx %i4, [%g3 + TI_REG_WINDOW + 0x60]
+ stx %i5, [%g3 + TI_REG_WINDOW + 0x68]
+ stx %i6, [%g3 + TI_REG_WINDOW + 0x70]
+ ba,pt %xcc, 2f
+ stx %i7, [%g3 + TI_REG_WINDOW + 0x78]
+1: stw %l0, [%g3 + TI_REG_WINDOW + 0x00]
+ stw %l1, [%g3 + TI_REG_WINDOW + 0x04]
+ stw %l2, [%g3 + TI_REG_WINDOW + 0x08]
+ stw %l3, [%g3 + TI_REG_WINDOW + 0x0c]
+ stw %l4, [%g3 + TI_REG_WINDOW + 0x10]
+ stw %l5, [%g3 + TI_REG_WINDOW + 0x14]
+ stw %l6, [%g3 + TI_REG_WINDOW + 0x18]
+ stw %l7, [%g3 + TI_REG_WINDOW + 0x1c]
+ stw %i0, [%g3 + TI_REG_WINDOW + 0x20]
+ stw %i1, [%g3 + TI_REG_WINDOW + 0x24]
+ stw %i2, [%g3 + TI_REG_WINDOW + 0x28]
+ stw %i3, [%g3 + TI_REG_WINDOW + 0x2c]
+ stw %i4, [%g3 + TI_REG_WINDOW + 0x30]
+ stw %i5, [%g3 + TI_REG_WINDOW + 0x34]
+ stw %i6, [%g3 + TI_REG_WINDOW + 0x38]
+ stw %i7, [%g3 + TI_REG_WINDOW + 0x3c]
+2: add %g1, 1, %g1
+ stb %g1, [%g6 + TI_WSAVED]
+ rdpr %tstate, %g1
+ andcc %g1, TSTATE_PRIV, %g0
saved
- and %g1, TSTATE_CWP, %g1
- be,pn %xcc, window_scheisse_from_user_common
- mov FAULT_CODE_WRITE | FAULT_CODE_DTLB | FAULT_CODE_WINFIXUP, %g4
+ be,pn %xcc, 1f
+ and %g1, TSTATE_CWP, %g1
retry
+1: mov FAULT_CODE_WRITE | FAULT_CODE_DTLB | FAULT_CODE_WINFIXUP, %g4
+ stb %g4, [%g6 + TI_FAULT_CODE]
+ stx %g5, [%g6 + TI_FAULT_ADDR]
+ wrpr %g1, %cwp
+ ba,pt %xcc, etrap
+ rd %pc, %g7
+ call do_sparc64_fault
+ add %sp, PTREGS_OFF, %o0
+ ba,a,pt %xcc, rtrap_clr_l6
-window_scheisse_from_user_common:
- stb %g4, [%g6 + TI_FAULT_CODE]
- stx %g5, [%g6 + TI_FAULT_ADDR]
- wrpr %g1, %cwp
- ba,pt %xcc, etrap
- rd %pc, %g7
- call do_sparc64_fault
- add %sp, PTREGS_OFF, %o0
- ba,a,pt %xcc, rtrap_clr_l6
-
- .globl winfix_mna, fill_fixup_mna, spill_fixup_mna
winfix_mna:
- andn %g3, 0x7f, %g3
- add %g3, 0x78, %g3
- wrpr %g3, %tnpc
+ andn %g3, 0x7f, %g3
+ add %g3, 0x78, %g3
+ wrpr %g3, %tnpc
done
-fill_fixup_mna:
- rdpr %tstate, %g1
- andcc %g1, TSTATE_PRIV, %g0
- be,pt %xcc, window_mna_from_user_common
- and %g1, TSTATE_CWP, %g1
- /* Please, see fill_fixup commentary about why we must preserve
- * %l5 and %l6 to preserve absolute correct semantics.
- */
- rdpr %wstate, %g2 ! Grab user mode wstate.
- wrpr %g1, %cwp ! Get into the right window.
- sll %g2, 3, %g2 ! NORMAL-->OTHER
- wrpr %g0, 0x0, %canrestore ! Standard etrap stuff.
-
- wrpr %g2, 0x0, %wstate ! This must be consistent.
- wrpr %g0, 0x0, %otherwin ! We know this.
- call set_pcontext ! Change contexts...
+fill_fixup_mna:
+ rdpr %tstate, %g1
+ and %g1, TSTATE_CWP, %g1
+ wrpr %g1, %cwp
+ ba,pt %xcc, etrap
+ rd %pc, %g7
+ sethi %hi(tlb_type), %g1
+ lduw [%g1 + %lo(tlb_type)], %g1
+ cmp %g1, 3
+ bne,pt %icc, 1f
+ add %sp, PTREGS_OFF, %o0
+ mov %l4, %o2
+ call sun4v_do_mna
+ mov %l5, %o1
+ ba,a,pt %xcc, rtrap_clr_l6
+1: mov %l4, %o1
+ mov %l5, %o2
+ call mem_address_unaligned
nop
- rdpr %pstate, %l1 ! Prepare to change globals.
- mov %g4, %o2 ! Setup args for
- mov %g5, %o1 ! final call to mem_address_unaligned.
- andn %l1, PSTATE_MM, %l1 ! We want to be in RMO
+ ba,a,pt %xcc, rtrap_clr_l6
- mov %g6, %o7 ! Stash away current.
- wrpr %g0, 0x0, %tl ! Out of trap levels.
- wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
- mov %o7, %g6 ! Get current back.
- ldx [%g6 + TI_TASK], %g4 ! Finish it.
-#ifdef CONFIG_SMP
- mov TSB_REG, %g1
- ldxa [%g1] ASI_IMMU, %g5
-#endif
- call mem_address_unaligned
- add %sp, PTREGS_OFF, %o0
-
- b,pt %xcc, rtrap
- nop ! yes, the nop is correct
-spill_fixup_mna:
- ldx [%g6 + TI_FLAGS], %g1
- andcc %g1, _TIF_32BIT, %g0
- ldub [%g6 + TI_WSAVED], %g1
- sll %g1, 3, %g3
- add %g6, %g3, %g3
- stx %sp, [%g3 + TI_RWIN_SPTRS]
-
- sll %g1, 7, %g3
- bne,pt %xcc, 1f
- add %g6, %g3, %g3
- stx %l0, [%g3 + TI_REG_WINDOW + 0x00]
- stx %l1, [%g3 + TI_REG_WINDOW + 0x08]
- stx %l2, [%g3 + TI_REG_WINDOW + 0x10]
- stx %l3, [%g3 + TI_REG_WINDOW + 0x18]
- stx %l4, [%g3 + TI_REG_WINDOW + 0x20]
-
- stx %l5, [%g3 + TI_REG_WINDOW + 0x28]
- stx %l6, [%g3 + TI_REG_WINDOW + 0x30]
- stx %l7, [%g3 + TI_REG_WINDOW + 0x38]
- stx %i0, [%g3 + TI_REG_WINDOW + 0x40]
- stx %i1, [%g3 + TI_REG_WINDOW + 0x48]
- stx %i2, [%g3 + TI_REG_WINDOW + 0x50]
- stx %i3, [%g3 + TI_REG_WINDOW + 0x58]
- stx %i4, [%g3 + TI_REG_WINDOW + 0x60]
-
- stx %i5, [%g3 + TI_REG_WINDOW + 0x68]
- stx %i6, [%g3 + TI_REG_WINDOW + 0x70]
- stx %i7, [%g3 + TI_REG_WINDOW + 0x78]
- b,pt %xcc, 2f
- add %g1, 1, %g1
-1: std %l0, [%g3 + TI_REG_WINDOW + 0x00]
- std %l2, [%g3 + TI_REG_WINDOW + 0x08]
- std %l4, [%g3 + TI_REG_WINDOW + 0x10]
-
- std %l6, [%g3 + TI_REG_WINDOW + 0x18]
- std %i0, [%g3 + TI_REG_WINDOW + 0x20]
- std %i2, [%g3 + TI_REG_WINDOW + 0x28]
- std %i4, [%g3 + TI_REG_WINDOW + 0x30]
- std %i6, [%g3 + TI_REG_WINDOW + 0x38]
- add %g1, 1, %g1
-2: stb %g1, [%g6 + TI_WSAVED]
- rdpr %tstate, %g1
-
- andcc %g1, TSTATE_PRIV, %g0
- saved
- be,pn %xcc, window_mna_from_user_common
- and %g1, TSTATE_CWP, %g1
- retry
-window_mna_from_user_common:
- wrpr %g1, %cwp
- sethi %hi(109f), %g7
- ba,pt %xcc, etrap
-109: or %g7, %lo(109b), %g7
- mov %l4, %o2
- mov %l5, %o1
- call mem_address_unaligned
- add %sp, PTREGS_OFF, %o0
- ba,pt %xcc, rtrap
- clr %l6
-
- /* These are only needed for 64-bit mode processes which
- * put their stack pointer into the VPTE area and there
- * happens to be a VPTE tlb entry mapped there during
- * a spill/fill trap to that stack frame.
- */
- .globl winfix_dax, fill_fixup_dax, spill_fixup_dax
winfix_dax:
- andn %g3, 0x7f, %g3
- add %g3, 0x74, %g3
- wrpr %g3, %tnpc
+ andn %g3, 0x7f, %g3
+ add %g3, 0x74, %g3
+ wrpr %g3, %tnpc
done
-fill_fixup_dax:
- rdpr %tstate, %g1
- andcc %g1, TSTATE_PRIV, %g0
- be,pt %xcc, window_dax_from_user_common
- and %g1, TSTATE_CWP, %g1
-
- /* Please, see fill_fixup commentary about why we must preserve
- * %l5 and %l6 to preserve absolute correct semantics.
- */
- rdpr %wstate, %g2 ! Grab user mode wstate.
- wrpr %g1, %cwp ! Get into the right window.
- sll %g2, 3, %g2 ! NORMAL-->OTHER
- wrpr %g0, 0x0, %canrestore ! Standard etrap stuff.
- wrpr %g2, 0x0, %wstate ! This must be consistent.
- wrpr %g0, 0x0, %otherwin ! We know this.
- call set_pcontext ! Change contexts...
+fill_fixup_dax:
+ rdpr %tstate, %g1
+ and %g1, TSTATE_CWP, %g1
+ wrpr %g1, %cwp
+ ba,pt %xcc, etrap
+ rd %pc, %g7
+ sethi %hi(tlb_type), %g1
+ mov %l4, %o1
+ lduw [%g1 + %lo(tlb_type)], %g1
+ mov %l5, %o2
+ cmp %g1, 3
+ bne,pt %icc, 1f
+ add %sp, PTREGS_OFF, %o0
+ call sun4v_data_access_exception
nop
- rdpr %pstate, %l1 ! Prepare to change globals.
- mov %g4, %o1 ! Setup args for
- mov %g5, %o2 ! final call to spitfire_data_access_exception.
- andn %l1, PSTATE_MM, %l1 ! We want to be in RMO
-
- mov %g6, %o7 ! Stash away current.
- wrpr %g0, 0x0, %tl ! Out of trap levels.
- wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
- mov %o7, %g6 ! Get current back.
- ldx [%g6 + TI_TASK], %g4 ! Finish it.
-#ifdef CONFIG_SMP
- mov TSB_REG, %g1
- ldxa [%g1] ASI_IMMU, %g5
-#endif
- call spitfire_data_access_exception
- add %sp, PTREGS_OFF, %o0
-
- b,pt %xcc, rtrap
- nop ! yes, the nop is correct
-spill_fixup_dax:
- ldx [%g6 + TI_FLAGS], %g1
- andcc %g1, _TIF_32BIT, %g0
- ldub [%g6 + TI_WSAVED], %g1
- sll %g1, 3, %g3
- add %g6, %g3, %g3
- stx %sp, [%g3 + TI_RWIN_SPTRS]
-
- sll %g1, 7, %g3
- bne,pt %xcc, 1f
- add %g6, %g3, %g3
- stx %l0, [%g3 + TI_REG_WINDOW + 0x00]
- stx %l1, [%g3 + TI_REG_WINDOW + 0x08]
- stx %l2, [%g3 + TI_REG_WINDOW + 0x10]
- stx %l3, [%g3 + TI_REG_WINDOW + 0x18]
- stx %l4, [%g3 + TI_REG_WINDOW + 0x20]
-
- stx %l5, [%g3 + TI_REG_WINDOW + 0x28]
- stx %l6, [%g3 + TI_REG_WINDOW + 0x30]
- stx %l7, [%g3 + TI_REG_WINDOW + 0x38]
- stx %i0, [%g3 + TI_REG_WINDOW + 0x40]
- stx %i1, [%g3 + TI_REG_WINDOW + 0x48]
- stx %i2, [%g3 + TI_REG_WINDOW + 0x50]
- stx %i3, [%g3 + TI_REG_WINDOW + 0x58]
- stx %i4, [%g3 + TI_REG_WINDOW + 0x60]
-
- stx %i5, [%g3 + TI_REG_WINDOW + 0x68]
- stx %i6, [%g3 + TI_REG_WINDOW + 0x70]
- stx %i7, [%g3 + TI_REG_WINDOW + 0x78]
- b,pt %xcc, 2f
- add %g1, 1, %g1
-1: std %l0, [%g3 + TI_REG_WINDOW + 0x00]
- std %l2, [%g3 + TI_REG_WINDOW + 0x08]
- std %l4, [%g3 + TI_REG_WINDOW + 0x10]
-
- std %l6, [%g3 + TI_REG_WINDOW + 0x18]
- std %i0, [%g3 + TI_REG_WINDOW + 0x20]
- std %i2, [%g3 + TI_REG_WINDOW + 0x28]
- std %i4, [%g3 + TI_REG_WINDOW + 0x30]
- std %i6, [%g3 + TI_REG_WINDOW + 0x38]
- add %g1, 1, %g1
-2: stb %g1, [%g6 + TI_WSAVED]
- rdpr %tstate, %g1
-
- andcc %g1, TSTATE_PRIV, %g0
- saved
- be,pn %xcc, window_dax_from_user_common
- and %g1, TSTATE_CWP, %g1
- retry
-window_dax_from_user_common:
- wrpr %g1, %cwp
- sethi %hi(109f), %g7
- ba,pt %xcc, etrap
-109: or %g7, %lo(109b), %g7
- mov %l4, %o1
- mov %l5, %o2
- call spitfire_data_access_exception
- add %sp, PTREGS_OFF, %o0
- ba,pt %xcc, rtrap
- clr %l6
+ ba,a,pt %xcc, rtrap_clr_l6
+1: call spitfire_data_access_exception
+ nop
+ ba,a,pt %xcc, rtrap_clr_l6
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile
index c295806500f7..8812ded19f01 100644
--- a/arch/sparc64/lib/Makefile
+++ b/arch/sparc64/lib/Makefile
@@ -11,6 +11,8 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \
VISsave.o atomic.o bitops.o \
U1memcpy.o U1copy_from_user.o U1copy_to_user.o \
U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \
+ NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \
+ NGpage.o NGbzero.o \
copy_in_user.o user_fixup.o memmove.o \
mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o
diff --git a/arch/sparc64/lib/NGbzero.S b/arch/sparc64/lib/NGbzero.S
new file mode 100644
index 000000000000..e86baece5cc8
--- /dev/null
+++ b/arch/sparc64/lib/NGbzero.S
@@ -0,0 +1,163 @@
+/* NGbzero.S: Niagara optimized memset/clear_user.
+ *
+ * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ */
+#include <asm/asi.h>
+
+#define EX_ST(x,y) \
+98: x,y; \
+ .section .fixup; \
+ .align 4; \
+99: retl; \
+ mov %o1, %o0; \
+ .section __ex_table; \
+ .align 4; \
+ .word 98b, 99b; \
+ .text; \
+ .align 4;
+
+ .text
+
+ .globl NGmemset
+ .type NGmemset, #function
+NGmemset: /* %o0=buf, %o1=pat, %o2=len */
+ and %o1, 0xff, %o3
+ mov %o2, %o1
+ sllx %o3, 8, %g1
+ or %g1, %o3, %o2
+ sllx %o2, 16, %g1
+ or %g1, %o2, %o2
+ sllx %o2, 32, %g1
+ ba,pt %xcc, 1f
+ or %g1, %o2, %o2
+
+ .globl NGbzero
+ .type NGbzero, #function
+NGbzero:
+ clr %o2
+1: brz,pn %o1, NGbzero_return
+ mov %o0, %o3
+
+ /* %o5: saved %asi, restored at NGbzero_done
+ * %g7: store-init %asi to use
+ * %o4: non-store-init %asi to use
+ */
+ rd %asi, %o5
+ mov ASI_BLK_INIT_QUAD_LDD_P, %g7
+ mov ASI_P, %o4
+ wr %o4, 0x0, %asi
+
+NGbzero_from_clear_user:
+ cmp %o1, 15
+ bl,pn %icc, NGbzero_tiny
+ andcc %o0, 0x7, %g1
+ be,pt %xcc, 2f
+ mov 8, %g2
+ sub %g2, %g1, %g1
+ sub %o1, %g1, %o1
+1: EX_ST(stba %o2, [%o0 + 0x00] %asi)
+ subcc %g1, 1, %g1
+ bne,pt %xcc, 1b
+ add %o0, 1, %o0
+2: cmp %o1, 128
+ bl,pn %icc, NGbzero_medium
+ andcc %o0, (64 - 1), %g1
+ be,pt %xcc, NGbzero_pre_loop
+ mov 64, %g2
+ sub %g2, %g1, %g1
+ sub %o1, %g1, %o1
+1: EX_ST(stxa %o2, [%o0 + 0x00] %asi)
+ subcc %g1, 8, %g1
+ bne,pt %xcc, 1b
+ add %o0, 8, %o0
+
+NGbzero_pre_loop:
+ wr %g7, 0x0, %asi
+ andn %o1, (64 - 1), %g1
+ sub %o1, %g1, %o1
+NGbzero_loop:
+ EX_ST(stxa %o2, [%o0 + 0x00] %asi)
+ EX_ST(stxa %o2, [%o0 + 0x08] %asi)
+ EX_ST(stxa %o2, [%o0 + 0x10] %asi)
+ EX_ST(stxa %o2, [%o0 + 0x18] %asi)
+ EX_ST(stxa %o2, [%o0 + 0x20] %asi)
+ EX_ST(stxa %o2, [%o0 + 0x28] %asi)
+ EX_ST(stxa %o2, [%o0 + 0x30] %asi)
+ EX_ST(stxa %o2, [%o0 + 0x38] %asi)
+ subcc %g1, 64, %g1
+ bne,pt %xcc, NGbzero_loop
+ add %o0, 64, %o0
+
+ wr %o4, 0x0, %asi
+ brz,pn %o1, NGbzero_done
+NGbzero_medium:
+ andncc %o1, 0x7, %g1
+ be,pn %xcc, 2f
+ sub %o1, %g1, %o1
+1: EX_ST(stxa %o2, [%o0 + 0x00] %asi)
+ subcc %g1, 8, %g1
+ bne,pt %xcc, 1b
+ add %o0, 8, %o0
+2: brz,pt %o1, NGbzero_done
+ nop
+
+NGbzero_tiny:
+1: EX_ST(stba %o2, [%o0 + 0x00] %asi)
+ subcc %o1, 1, %o1
+ bne,pt %icc, 1b
+ add %o0, 1, %o0
+
+ /* fallthrough */
+
+NGbzero_done:
+ wr %o5, 0x0, %asi
+
+NGbzero_return:
+ retl
+ mov %o3, %o0
+ .size NGbzero, .-NGbzero
+ .size NGmemset, .-NGmemset
+
+ .globl NGclear_user
+ .type NGclear_user, #function
+NGclear_user: /* %o0=buf, %o1=len */
+ rd %asi, %o5
+ brz,pn %o1, NGbzero_done
+ clr %o3
+ cmp %o5, ASI_AIUS
+ bne,pn %icc, NGbzero
+ clr %o2
+ mov ASI_BLK_INIT_QUAD_LDD_AIUS, %g7
+ ba,pt %xcc, NGbzero_from_clear_user
+ mov ASI_AIUS, %o4
+ .size NGclear_user, .-NGclear_user
+
+#define BRANCH_ALWAYS 0x10680000
+#define NOP 0x01000000
+#define NG_DO_PATCH(OLD, NEW) \
+ sethi %hi(NEW), %g1; \
+ or %g1, %lo(NEW), %g1; \
+ sethi %hi(OLD), %g2; \
+ or %g2, %lo(OLD), %g2; \
+ sub %g1, %g2, %g1; \
+ sethi %hi(BRANCH_ALWAYS), %g3; \
+ sll %g1, 11, %g1; \
+ srl %g1, 11 + 2, %g1; \
+ or %g3, %lo(BRANCH_ALWAYS), %g3; \
+ or %g3, %g1, %g3; \
+ stw %g3, [%g2]; \
+ sethi %hi(NOP), %g3; \
+ or %g3, %lo(NOP), %g3; \
+ stw %g3, [%g2 + 0x4]; \
+ flush %g2;
+
+ .globl niagara_patch_bzero
+ .type niagara_patch_bzero,#function
+niagara_patch_bzero:
+ NG_DO_PATCH(memset, NGmemset)
+ NG_DO_PATCH(__bzero, NGbzero)
+ NG_DO_PATCH(__clear_user, NGclear_user)
+ NG_DO_PATCH(tsb_init, NGtsb_init)
+ retl
+ nop
+ .size niagara_patch_bzero,.-niagara_patch_bzero
diff --git a/arch/sparc64/lib/NGcopy_from_user.S b/arch/sparc64/lib/NGcopy_from_user.S
new file mode 100644
index 000000000000..2d93456f76dd
--- /dev/null
+++ b/arch/sparc64/lib/NGcopy_from_user.S
@@ -0,0 +1,37 @@
+/* NGcopy_from_user.S: Niagara optimized copy from userspace.
+ *
+ * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_LD(x) \
+98: x; \
+ .section .fixup; \
+ .align 4; \
+99: wr %g0, ASI_AIUS, %asi;\
+ retl; \
+ mov 1, %o0; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, 99b; \
+ .text; \
+ .align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS 0x11
+#endif
+
+#define FUNC_NAME NGcopy_from_user
+#define LOAD(type,addr,dest) type##a [addr] ASI_AIUS, dest
+#define LOAD_TWIN(addr_reg,dest0,dest1) \
+ ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_AIUS, dest0
+#define EX_RETVAL(x) 0
+
+#ifdef __KERNEL__
+#define PREAMBLE \
+ rd %asi, %g1; \
+ cmp %g1, ASI_AIUS; \
+ bne,pn %icc, memcpy_user_stub; \
+ nop
+#endif
+
+#include "NGmemcpy.S"
diff --git a/arch/sparc64/lib/NGcopy_to_user.S b/arch/sparc64/lib/NGcopy_to_user.S
new file mode 100644
index 000000000000..34112d5054ef
--- /dev/null
+++ b/arch/sparc64/lib/NGcopy_to_user.S
@@ -0,0 +1,40 @@
+/* NGcopy_to_user.S: Niagara optimized copy to userspace.
+ *
+ * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_ST(x) \
+98: x; \
+ .section .fixup; \
+ .align 4; \
+99: wr %g0, ASI_AIUS, %asi;\
+ retl; \
+ mov 1, %o0; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, 99b; \
+ .text; \
+ .align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS 0x11
+#endif
+
+#define FUNC_NAME NGcopy_to_user
+#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS
+#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS
+#define EX_RETVAL(x) 0
+
+#ifdef __KERNEL__
+ /* Writing to %asi is _expensive_ so we hardcode it.
+ * Reading %asi to check for KERNEL_DS is comparatively
+ * cheap.
+ */
+#define PREAMBLE \
+ rd %asi, %g1; \
+ cmp %g1, ASI_AIUS; \
+ bne,pn %icc, memcpy_user_stub; \
+ nop
+#endif
+
+#include "NGmemcpy.S"
diff --git a/arch/sparc64/lib/NGmemcpy.S b/arch/sparc64/lib/NGmemcpy.S
new file mode 100644
index 000000000000..8e522b3dc095
--- /dev/null
+++ b/arch/sparc64/lib/NGmemcpy.S
@@ -0,0 +1,368 @@
+/* NGmemcpy.S: Niagara optimized memcpy.
+ *
+ * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ */
+
+#ifdef __KERNEL__
+#include <asm/asi.h>
+#include <asm/thread_info.h>
+#define GLOBAL_SPARE %g7
+#define RESTORE_ASI(TMP) \
+ ldub [%g6 + TI_CURRENT_DS], TMP; \
+ wr TMP, 0x0, %asi;
+#else
+#define GLOBAL_SPARE %g5
+#define RESTORE_ASI(TMP) \
+ wr %g0, ASI_PNF, %asi
+#endif
+
+#ifndef STORE_ASI
+#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P
+#endif
+
+#ifndef EX_LD
+#define EX_LD(x) x
+#endif
+
+#ifndef EX_ST
+#define EX_ST(x) x
+#endif
+
+#ifndef EX_RETVAL
+#define EX_RETVAL(x) x
+#endif
+
+#ifndef LOAD
+#ifndef MEMCPY_DEBUG
+#define LOAD(type,addr,dest) type [addr], dest
+#else
+#define LOAD(type,addr,dest) type##a [addr] 0x80, dest
+#endif
+#endif
+
+#ifndef LOAD_TWIN
+#define LOAD_TWIN(addr_reg,dest0,dest1) \
+ ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0
+#endif
+
+#ifndef STORE
+#define STORE(type,src,addr) type src, [addr]
+#endif
+
+#ifndef STORE_INIT
+#define STORE_INIT(src,addr) stxa src, [addr] %asi
+#endif
+
+#ifndef FUNC_NAME
+#define FUNC_NAME NGmemcpy
+#endif
+
+#ifndef PREAMBLE
+#define PREAMBLE
+#endif
+
+#ifndef XCC
+#define XCC xcc
+#endif
+
+ .register %g2,#scratch
+ .register %g3,#scratch
+
+ .text
+ .align 64
+
+ .globl FUNC_NAME
+ .type FUNC_NAME,#function
+FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ srlx %o2, 31, %g2
+ cmp %g2, 0
+ tne %xcc, 5
+ PREAMBLE
+ mov %o0, GLOBAL_SPARE
+ cmp %o2, 0
+ be,pn %XCC, 85f
+ or %o0, %o1, %o3
+ cmp %o2, 16
+ blu,a,pn %XCC, 80f
+ or %o3, %o2, %o3
+
+ /* 2 blocks (128 bytes) is the minimum we can do the block
+ * copy with. We need to ensure that we'll iterate at least
+ * once in the block copy loop. At worst we'll need to align
+ * the destination to a 64-byte boundary which can chew up
+ * to (64 - 1) bytes from the length before we perform the
+ * block copy loop.
+ */
+ cmp %o2, (2 * 64)
+ blu,pt %XCC, 70f
+ andcc %o3, 0x7, %g0
+
+ /* %o0: dst
+ * %o1: src
+ * %o2: len (known to be >= 128)
+ *
+ * The block copy loops will use %o4/%o5,%g2/%g3 as
+ * temporaries while copying the data.
+ */
+
+ LOAD(prefetch, %o1, #one_read)
+ wr %g0, STORE_ASI, %asi
+
+ /* Align destination on 64-byte boundary. */
+ andcc %o0, (64 - 1), %o4
+ be,pt %XCC, 2f
+ sub %o4, 64, %o4
+ sub %g0, %o4, %o4 ! bytes to align dst
+ sub %o2, %o4, %o2
+1: subcc %o4, 1, %o4
+ EX_LD(LOAD(ldub, %o1, %g1))
+ EX_ST(STORE(stb, %g1, %o0))
+ add %o1, 1, %o1
+ bne,pt %XCC, 1b
+ add %o0, 1, %o0
+
+ /* If the source is on a 16-byte boundary we can do
+ * the direct block copy loop. If it is 8-byte aligned
+ * we can do the 16-byte loads offset by -8 bytes and the
+ * init stores offset by one register.
+ *
+ * If the source is not even 8-byte aligned, we need to do
+ * shifting and masking (basically integer faligndata).
+ *
+ * The careful bit with init stores is that if we store
+ * to any part of the cache line we have to store the whole
+ * cacheline else we can end up with corrupt L2 cache line
+ * contents. Since the loop works on 64-bytes of 64-byte
+ * aligned store data at a time, this is easy to ensure.
+ */
+2:
+ andcc %o1, (16 - 1), %o4
+ andn %o2, (64 - 1), %g1 ! block copy loop iterator
+ sub %o2, %g1, %o2 ! final sub-block copy bytes
+ be,pt %XCC, 50f
+ cmp %o4, 8
+ be,a,pt %XCC, 10f
+ sub %o1, 0x8, %o1
+
+ /* Neither 8-byte nor 16-byte aligned, shift and mask. */
+ mov %g1, %o4
+ and %o1, 0x7, %g1
+ sll %g1, 3, %g1
+ mov 64, %o3
+ andn %o1, 0x7, %o1
+ EX_LD(LOAD(ldx, %o1, %g2))
+ sub %o3, %g1, %o3
+ sllx %g2, %g1, %g2
+
+#define SWIVEL_ONE_DWORD(SRC, TMP1, TMP2, PRE_VAL, PRE_SHIFT, POST_SHIFT, DST)\
+ EX_LD(LOAD(ldx, SRC, TMP1)); \
+ srlx TMP1, PRE_SHIFT, TMP2; \
+ or TMP2, PRE_VAL, TMP2; \
+ EX_ST(STORE_INIT(TMP2, DST)); \
+ sllx TMP1, POST_SHIFT, PRE_VAL;
+
+1: add %o1, 0x8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x00)
+ add %o1, 0x8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x08)
+ add %o1, 0x8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x10)
+ add %o1, 0x8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x18)
+ add %o1, 32, %o1
+ LOAD(prefetch, %o1, #one_read)
+ sub %o1, 32 - 8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x20)
+ add %o1, 8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x28)
+ add %o1, 8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x30)
+ add %o1, 8, %o1
+ SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x38)
+ subcc %o4, 64, %o4
+ bne,pt %XCC, 1b
+ add %o0, 64, %o0
+
+#undef SWIVEL_ONE_DWORD
+
+ srl %g1, 3, %g1
+ ba,pt %XCC, 60f
+ add %o1, %g1, %o1
+
+10: /* Destination is 64-byte aligned, source was only 8-byte
+ * aligned but it has been subtracted by 8 and we perform
+ * one twin load ahead, then add 8 back into source when
+ * we finish the loop.
+ */
+ EX_LD(LOAD_TWIN(%o1, %o4, %o5))
+1: add %o1, 16, %o1
+ EX_LD(LOAD_TWIN(%o1, %g2, %g3))
+ add %o1, 16 + 32, %o1
+ LOAD(prefetch, %o1, #one_read)
+ sub %o1, 32, %o1
+ EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line
+ EX_ST(STORE_INIT(%g2, %o0 + 0x08))
+ EX_LD(LOAD_TWIN(%o1, %o4, %o5))
+ add %o1, 16, %o1
+ EX_ST(STORE_INIT(%g3, %o0 + 0x10))
+ EX_ST(STORE_INIT(%o4, %o0 + 0x18))
+ EX_LD(LOAD_TWIN(%o1, %g2, %g3))
+ add %o1, 16, %o1
+ EX_ST(STORE_INIT(%o5, %o0 + 0x20))
+ EX_ST(STORE_INIT(%g2, %o0 + 0x28))
+ EX_LD(LOAD_TWIN(%o1, %o4, %o5))
+ EX_ST(STORE_INIT(%g3, %o0 + 0x30))
+ EX_ST(STORE_INIT(%o4, %o0 + 0x38))
+ subcc %g1, 64, %g1
+ bne,pt %XCC, 1b
+ add %o0, 64, %o0
+
+ ba,pt %XCC, 60f
+ add %o1, 0x8, %o1
+
+50: /* Destination is 64-byte aligned, and source is 16-byte
+ * aligned.
+ */
+1: EX_LD(LOAD_TWIN(%o1, %o4, %o5))
+ add %o1, 16, %o1
+ EX_LD(LOAD_TWIN(%o1, %g2, %g3))
+ add %o1, 16 + 32, %o1
+ LOAD(prefetch, %o1, #one_read)
+ sub %o1, 32, %o1
+ EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line
+ EX_ST(STORE_INIT(%o5, %o0 + 0x08))
+ EX_LD(LOAD_TWIN(%o1, %o4, %o5))
+ add %o1, 16, %o1
+ EX_ST(STORE_INIT(%g2, %o0 + 0x10))
+ EX_ST(STORE_INIT(%g3, %o0 + 0x18))
+ EX_LD(LOAD_TWIN(%o1, %g2, %g3))
+ add %o1, 16, %o1
+ EX_ST(STORE_INIT(%o4, %o0 + 0x20))
+ EX_ST(STORE_INIT(%o5, %o0 + 0x28))
+ EX_ST(STORE_INIT(%g2, %o0 + 0x30))
+ EX_ST(STORE_INIT(%g3, %o0 + 0x38))
+ subcc %g1, 64, %g1
+ bne,pt %XCC, 1b
+ add %o0, 64, %o0
+ /* fall through */
+
+60:
+ /* %o2 contains any final bytes still needed to be copied
+ * over. If anything is left, we copy it one byte at a time.
+ */
+ RESTORE_ASI(%o3)
+ brz,pt %o2, 85f
+ sub %o0, %o1, %o3
+ ba,a,pt %XCC, 90f
+
+ .align 64
+70: /* 16 < len <= 64 */
+ bne,pn %XCC, 75f
+ sub %o0, %o1, %o3
+
+72:
+ andn %o2, 0xf, %o4
+ and %o2, 0xf, %o2
+1: subcc %o4, 0x10, %o4
+ EX_LD(LOAD(ldx, %o1, %o5))
+ add %o1, 0x08, %o1
+ EX_LD(LOAD(ldx, %o1, %g1))
+ sub %o1, 0x08, %o1
+ EX_ST(STORE(stx, %o5, %o1 + %o3))
+ add %o1, 0x8, %o1
+ EX_ST(STORE(stx, %g1, %o1 + %o3))
+ bgu,pt %XCC, 1b
+ add %o1, 0x8, %o1
+73: andcc %o2, 0x8, %g0
+ be,pt %XCC, 1f
+ nop
+ sub %o2, 0x8, %o2
+ EX_LD(LOAD(ldx, %o1, %o5))
+ EX_ST(STORE(stx, %o5, %o1 + %o3))
+ add %o1, 0x8, %o1
+1: andcc %o2, 0x4, %g0
+ be,pt %XCC, 1f
+ nop
+ sub %o2, 0x4, %o2
+ EX_LD(LOAD(lduw, %o1, %o5))
+ EX_ST(STORE(stw, %o5, %o1 + %o3))
+ add %o1, 0x4, %o1
+1: cmp %o2, 0
+ be,pt %XCC, 85f
+ nop
+ ba,pt %xcc, 90f
+ nop
+
+75:
+ andcc %o0, 0x7, %g1
+ sub %g1, 0x8, %g1
+ be,pn %icc, 2f
+ sub %g0, %g1, %g1
+ sub %o2, %g1, %o2
+
+1: subcc %g1, 1, %g1
+ EX_LD(LOAD(ldub, %o1, %o5))
+ EX_ST(STORE(stb, %o5, %o1 + %o3))
+ bgu,pt %icc, 1b
+ add %o1, 1, %o1
+
+2: add %o1, %o3, %o0
+ andcc %o1, 0x7, %g1
+ bne,pt %icc, 8f
+ sll %g1, 3, %g1
+
+ cmp %o2, 16
+ bgeu,pt %icc, 72b
+ nop
+ ba,a,pt %xcc, 73b
+
+8: mov 64, %o3
+ andn %o1, 0x7, %o1
+ EX_LD(LOAD(ldx, %o1, %g2))
+ sub %o3, %g1, %o3
+ andn %o2, 0x7, %o4
+ sllx %g2, %g1, %g2
+1: add %o1, 0x8, %o1
+ EX_LD(LOAD(ldx, %o1, %g3))
+ subcc %o4, 0x8, %o4
+ srlx %g3, %o3, %o5
+ or %o5, %g2, %o5
+ EX_ST(STORE(stx, %o5, %o0))
+ add %o0, 0x8, %o0
+ bgu,pt %icc, 1b
+ sllx %g3, %g1, %g2
+
+ srl %g1, 3, %g1
+ andcc %o2, 0x7, %o2
+ be,pn %icc, 85f
+ add %o1, %g1, %o1
+ ba,pt %xcc, 90f
+ sub %o0, %o1, %o3
+
+ .align 64
+80: /* 0 < len <= 16 */
+ andcc %o3, 0x3, %g0
+ bne,pn %XCC, 90f
+ sub %o0, %o1, %o3
+
+1:
+ subcc %o2, 4, %o2
+ EX_LD(LOAD(lduw, %o1, %g1))
+ EX_ST(STORE(stw, %g1, %o1 + %o3))
+ bgu,pt %XCC, 1b
+ add %o1, 4, %o1
+
+85: retl
+ mov EX_RETVAL(GLOBAL_SPARE), %o0
+
+ .align 32
+90:
+ subcc %o2, 1, %o2
+ EX_LD(LOAD(ldub, %o1, %g1))
+ EX_ST(STORE(stb, %g1, %o1 + %o3))
+ bgu,pt %XCC, 90b
+ add %o1, 1, %o1
+ retl
+ mov EX_RETVAL(GLOBAL_SPARE), %o0
+
+ .size FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc64/lib/NGpage.S b/arch/sparc64/lib/NGpage.S
new file mode 100644
index 000000000000..7d7c3bb8dcbf
--- /dev/null
+++ b/arch/sparc64/lib/NGpage.S
@@ -0,0 +1,96 @@
+/* NGpage.S: Niagara optimize clear and copy page.
+ *
+ * Copyright (C) 2006 (davem@davemloft.net)
+ */
+
+#include <asm/asi.h>
+#include <asm/page.h>
+
+ .text
+ .align 32
+
+ /* This is heavily simplified from the sun4u variants
+ * because Niagara does not have any D-cache aliasing issues
+ * and also we don't need to use the FPU in order to implement
+ * an optimal page copy/clear.
+ */
+
+NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
+ prefetch [%o1 + 0x00], #one_read
+ mov 8, %g1
+ mov 16, %g2
+ mov 24, %g3
+ set PAGE_SIZE, %g7
+
+1: ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2
+ ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4
+ prefetch [%o1 + 0x40], #one_read
+ add %o1, 32, %o1
+ stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
+ ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2
+ stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
+ ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4
+ add %o1, 32, %o1
+ add %o0, 32, %o0
+ stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
+ subcc %g7, 64, %g7
+ bne,pt %xcc, 1b
+ add %o0, 32, %o0
+ retl
+ nop
+
+NGclear_page: /* %o0=dest */
+NGclear_user_page: /* %o0=dest, %o1=vaddr */
+ mov 8, %g1
+ mov 16, %g2
+ mov 24, %g3
+ set PAGE_SIZE, %g7
+
+1: stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
+ add %o0, 32, %o0
+ stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+ stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
+ subcc %g7, 64, %g7
+ bne,pt %xcc, 1b
+ add %o0, 32, %o0
+ retl
+ nop
+
+#define BRANCH_ALWAYS 0x10680000
+#define NOP 0x01000000
+#define NG_DO_PATCH(OLD, NEW) \
+ sethi %hi(NEW), %g1; \
+ or %g1, %lo(NEW), %g1; \
+ sethi %hi(OLD), %g2; \
+ or %g2, %lo(OLD), %g2; \
+ sub %g1, %g2, %g1; \
+ sethi %hi(BRANCH_ALWAYS), %g3; \
+ sll %g1, 11, %g1; \
+ srl %g1, 11 + 2, %g1; \
+ or %g3, %lo(BRANCH_ALWAYS), %g3; \
+ or %g3, %g1, %g3; \
+ stw %g3, [%g2]; \
+ sethi %hi(NOP), %g3; \
+ or %g3, %lo(NOP), %g3; \
+ stw %g3, [%g2 + 0x4]; \
+ flush %g2;
+
+ .globl niagara_patch_pageops
+ .type niagara_patch_pageops,#function
+niagara_patch_pageops:
+ NG_DO_PATCH(copy_user_page, NGcopy_user_page)
+ NG_DO_PATCH(_clear_page, NGclear_page)
+ NG_DO_PATCH(clear_user_page, NGclear_user_page)
+ retl
+ nop
+ .size niagara_patch_pageops,.-niagara_patch_pageops
diff --git a/arch/sparc64/lib/NGpatch.S b/arch/sparc64/lib/NGpatch.S
new file mode 100644
index 000000000000..3b0674fc3366
--- /dev/null
+++ b/arch/sparc64/lib/NGpatch.S
@@ -0,0 +1,33 @@
+/* NGpatch.S: Patch Ultra-I routines with Niagara variant.
+ *
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ */
+
+#define BRANCH_ALWAYS 0x10680000
+#define NOP 0x01000000
+#define NG_DO_PATCH(OLD, NEW) \
+ sethi %hi(NEW), %g1; \
+ or %g1, %lo(NEW), %g1; \
+ sethi %hi(OLD), %g2; \
+ or %g2, %lo(OLD), %g2; \
+ sub %g1, %g2, %g1; \
+ sethi %hi(BRANCH_ALWAYS), %g3; \
+ sll %g1, 11, %g1; \
+ srl %g1, 11 + 2, %g1; \
+ or %g3, %lo(BRANCH_ALWAYS), %g3; \
+ or %g3, %g1, %g3; \
+ stw %g3, [%g2]; \
+ sethi %hi(NOP), %g3; \
+ or %g3, %lo(NOP), %g3; \
+ stw %g3, [%g2 + 0x4]; \
+ flush %g2;
+
+ .globl niagara_patch_copyops
+ .type niagara_patch_copyops,#function
+niagara_patch_copyops:
+ NG_DO_PATCH(memcpy, NGmemcpy)
+ NG_DO_PATCH(___copy_from_user, NGcopy_from_user)
+ NG_DO_PATCH(___copy_to_user, NGcopy_to_user)
+ retl
+ nop
+ .size niagara_patch_copyops,.-niagara_patch_copyops
diff --git a/arch/sparc64/lib/U3patch.S b/arch/sparc64/lib/U3patch.S
index e2b6c5e4b95a..ecc302619a6e 100644
--- a/arch/sparc64/lib/U3patch.S
+++ b/arch/sparc64/lib/U3patch.S
@@ -12,7 +12,8 @@
or %g2, %lo(OLD), %g2; \
sub %g1, %g2, %g1; \
sethi %hi(BRANCH_ALWAYS), %g3; \
- srl %g1, 2, %g1; \
+ sll %g1, 11, %g1; \
+ srl %g1, 11 + 2, %g1; \
or %g3, %lo(BRANCH_ALWAYS), %g3; \
or %g3, %g1, %g3; \
stw %g3, [%g2]; \
diff --git a/arch/sparc64/lib/bzero.S b/arch/sparc64/lib/bzero.S
index 1d2abcfa4e52..c7bbae8c590f 100644
--- a/arch/sparc64/lib/bzero.S
+++ b/arch/sparc64/lib/bzero.S
@@ -98,12 +98,12 @@ __bzero_done:
.text; \
.align 4;
- .globl __bzero_noasi
- .type __bzero_noasi, #function
-__bzero_noasi: /* %o0=buf, %o1=len */
- brz,pn %o1, __bzero_noasi_done
+ .globl __clear_user
+ .type __clear_user, #function
+__clear_user: /* %o0=buf, %o1=len */
+ brz,pn %o1, __clear_user_done
cmp %o1, 16
- bl,pn %icc, __bzero_noasi_tiny
+ bl,pn %icc, __clear_user_tiny
EX_ST(prefetcha [%o0 + 0x00] %asi, #n_writes)
andcc %o0, 0x3, %g0
be,pt %icc, 2f
@@ -145,14 +145,14 @@ __bzero_noasi: /* %o0=buf, %o1=len */
subcc %g1, 8, %g1
bne,pt %icc, 5b
add %o0, 0x8, %o0
-6: brz,pt %o1, __bzero_noasi_done
+6: brz,pt %o1, __clear_user_done
nop
-__bzero_noasi_tiny:
+__clear_user_tiny:
1: EX_ST(stba %g0, [%o0 + 0x00] %asi)
subcc %o1, 1, %o1
bne,pt %icc, 1b
add %o0, 1, %o0
-__bzero_noasi_done:
+__clear_user_done:
retl
clr %o0
- .size __bzero_noasi, .-__bzero_noasi
+ .size __clear_user, .-__clear_user
diff --git a/arch/sparc64/lib/clear_page.S b/arch/sparc64/lib/clear_page.S
index b59884ef051d..77e531f6c2a7 100644
--- a/arch/sparc64/lib/clear_page.S
+++ b/arch/sparc64/lib/clear_page.S
@@ -9,6 +9,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/spitfire.h>
+#include <asm/head.h>
/* What we used to do was lock a TLB entry into a specific
* TLB slot, clear the page with interrupts disabled, then
@@ -22,9 +23,6 @@
* disable preemption during the clear.
*/
-#define TTE_BITS_TOP (_PAGE_VALID | _PAGE_SZBITS)
-#define TTE_BITS_BOTTOM (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W)
-
.text
.globl _clear_page
@@ -43,12 +41,11 @@ clear_user_page: /* %o0=dest, %o1=vaddr */
sethi %hi(PAGE_SIZE), %o4
sllx %g2, 32, %g2
- sethi %uhi(TTE_BITS_TOP), %g3
+ sethi %hi(PAGE_KERNEL_LOCKED), %g3
- sllx %g3, 32, %g3
+ ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3
sub %o0, %g2, %g1 ! paddr
- or %g3, TTE_BITS_BOTTOM, %g3
and %o1, %o4, %o0 ! vaddr D-cache alias bit
or %g1, %g3, %g1 ! TTE data
@@ -66,7 +63,8 @@ clear_user_page: /* %o0=dest, %o1=vaddr */
wrpr %o4, PSTATE_IE, %pstate
stxa %o0, [%g3] ASI_DMMU
stxa %g1, [%g0] ASI_DTLB_DATA_IN
- flush %g6
+ sethi %hi(KERNBASE), %g1
+ flush %g1
wrpr %o4, 0x0, %pstate
mov 1, %o4
diff --git a/arch/sparc64/lib/copy_page.S b/arch/sparc64/lib/copy_page.S
index feebb14fd27a..37460666a5c3 100644
--- a/arch/sparc64/lib/copy_page.S
+++ b/arch/sparc64/lib/copy_page.S
@@ -23,8 +23,6 @@
* disable preemption during the clear.
*/
-#define TTE_BITS_TOP (_PAGE_VALID | _PAGE_SZBITS)
-#define TTE_BITS_BOTTOM (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W)
#define DCACHE_SIZE (PAGE_SIZE * 2)
#if (PAGE_SHIFT == 13) || (PAGE_SHIFT == 19)
@@ -52,13 +50,12 @@ copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
sethi %hi(PAGE_SIZE), %o3
sllx %g2, 32, %g2
- sethi %uhi(TTE_BITS_TOP), %g3
+ sethi %hi(PAGE_KERNEL_LOCKED), %g3
- sllx %g3, 32, %g3
+ ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3
sub %o0, %g2, %g1 ! dest paddr
sub %o1, %g2, %g2 ! src paddr
- or %g3, TTE_BITS_BOTTOM, %g3
and %o2, %o3, %o0 ! vaddr D-cache alias bit
or %g1, %g3, %g1 ! dest TTE data
diff --git a/arch/sparc64/lib/delay.c b/arch/sparc64/lib/delay.c
index e8808727617a..fb27e54a03ee 100644
--- a/arch/sparc64/lib/delay.c
+++ b/arch/sparc64/lib/delay.c
@@ -1,6 +1,6 @@
/* delay.c: Delay loops for sparc64
*
- * Copyright (C) 2004 David S. Miller <davem@redhat.com>
+ * Copyright (C) 2004, 2006 David S. Miller <davem@davemloft.net>
*
* Based heavily upon x86 variant which is:
* Copyright (C) 1993 Linus Torvalds
@@ -8,19 +8,16 @@
*/
#include <linux/delay.h>
+#include <asm/timer.h>
void __delay(unsigned long loops)
{
- __asm__ __volatile__(
-" b,pt %%xcc, 1f\n"
-" cmp %0, 0\n"
-" .align 32\n"
-"1:\n"
-" bne,pt %%xcc, 1b\n"
-" subcc %0, 1, %0\n"
- : "=&r" (loops)
- : "0" (loops)
- : "cc");
+ unsigned long bclock, now;
+
+ bclock = tick_ops->get_tick();
+ do {
+ now = tick_ops->get_tick();
+ } while ((now-bclock) < loops);
}
/* We used to multiply by HZ after shifting down by 32 bits
diff --git a/arch/sparc64/lib/xor.S b/arch/sparc64/lib/xor.S
index 4cd5d2be1ae1..a79c8888170d 100644
--- a/arch/sparc64/lib/xor.S
+++ b/arch/sparc64/lib/xor.S
@@ -2,9 +2,10 @@
* arch/sparc64/lib/xor.S
*
* High speed xor_block operation for RAID4/5 utilizing the
- * UltraSparc Visual Instruction Set.
+ * UltraSparc Visual Instruction Set and Niagara store-init/twin-load.
*
* Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
*/
#include <asm/visasm.h>
@@ -19,6 +20,8 @@
*/
.text
.align 32
+
+ /* VIS versions. */
.globl xor_vis_2
.type xor_vis_2,#function
xor_vis_2:
@@ -352,3 +355,298 @@ xor_vis_5:
ret
restore
.size xor_vis_5, .-xor_vis_5
+
+ /* Niagara versions. */
+ .globl xor_niagara_2
+ .type xor_niagara_2,#function
+xor_niagara_2: /* %o0=bytes, %o1=dest, %o2=src */
+ save %sp, -192, %sp
+ prefetch [%i1], #n_writes
+ prefetch [%i2], #one_read
+ rd %asi, %g7
+ wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+ srlx %i0, 6, %g1
+ mov %i1, %i0
+ mov %i2, %i1
+1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src + 0x00 */
+ ldda [%i1 + 0x10] %asi, %i4 /* %i4/%i5 = src + 0x10 */
+ ldda [%i1 + 0x20] %asi, %g2 /* %g2/%g3 = src + 0x20 */
+ ldda [%i1 + 0x30] %asi, %l0 /* %l0/%l1 = src + 0x30 */
+ prefetch [%i1 + 0x40], #one_read
+ ldda [%i0 + 0x00] %asi, %o0 /* %o0/%o1 = dest + 0x00 */
+ ldda [%i0 + 0x10] %asi, %o2 /* %o2/%o3 = dest + 0x10 */
+ ldda [%i0 + 0x20] %asi, %o4 /* %o4/%o5 = dest + 0x20 */
+ ldda [%i0 + 0x30] %asi, %l2 /* %l2/%l3 = dest + 0x30 */
+ prefetch [%i0 + 0x40], #n_writes
+ xor %o0, %i2, %o0
+ xor %o1, %i3, %o1
+ stxa %o0, [%i0 + 0x00] %asi
+ stxa %o1, [%i0 + 0x08] %asi
+ xor %o2, %i4, %o2
+ xor %o3, %i5, %o3
+ stxa %o2, [%i0 + 0x10] %asi
+ stxa %o3, [%i0 + 0x18] %asi
+ xor %o4, %g2, %o4
+ xor %o5, %g3, %o5
+ stxa %o4, [%i0 + 0x20] %asi
+ stxa %o5, [%i0 + 0x28] %asi
+ xor %l2, %l0, %l2
+ xor %l3, %l1, %l3
+ stxa %l2, [%i0 + 0x30] %asi
+ stxa %l3, [%i0 + 0x38] %asi
+ add %i0, 0x40, %i0
+ subcc %g1, 1, %g1
+ bne,pt %xcc, 1b
+ add %i1, 0x40, %i1
+ membar #Sync
+ wr %g7, 0x0, %asi
+ ret
+ restore
+ .size xor_niagara_2, .-xor_niagara_2
+
+ .globl xor_niagara_3
+ .type xor_niagara_3,#function
+xor_niagara_3: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */
+ save %sp, -192, %sp
+ prefetch [%i1], #n_writes
+ prefetch [%i2], #one_read
+ prefetch [%i3], #one_read
+ rd %asi, %g7
+ wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+ srlx %i0, 6, %g1
+ mov %i1, %i0
+ mov %i2, %i1
+ mov %i3, %l7
+1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src1 + 0x00 */
+ ldda [%i1 + 0x10] %asi, %i4 /* %i4/%i5 = src1 + 0x10 */
+ ldda [%l7 + 0x00] %asi, %g2 /* %g2/%g3 = src2 + 0x00 */
+ ldda [%l7 + 0x10] %asi, %l0 /* %l0/%l1 = src2 + 0x10 */
+ ldda [%i0 + 0x00] %asi, %o0 /* %o0/%o1 = dest + 0x00 */
+ ldda [%i0 + 0x10] %asi, %o2 /* %o2/%o3 = dest + 0x10 */
+ xor %g2, %i2, %g2
+ xor %g3, %i3, %g3
+ xor %o0, %g2, %o0
+ xor %o1, %g3, %o1
+ stxa %o0, [%i0 + 0x00] %asi
+ stxa %o1, [%i0 + 0x08] %asi
+ ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */
+ ldda [%l7 + 0x20] %asi, %g2 /* %g2/%g3 = src2 + 0x20 */
+ ldda [%i0 + 0x20] %asi, %o0 /* %o0/%o1 = dest + 0x20 */
+ xor %l0, %i4, %l0
+ xor %l1, %i5, %l1
+ xor %o2, %l0, %o2
+ xor %o3, %l1, %o3
+ stxa %o2, [%i0 + 0x10] %asi
+ stxa %o3, [%i0 + 0x18] %asi
+ ldda [%i1 + 0x30] %asi, %i4 /* %i4/%i5 = src1 + 0x30 */
+ ldda [%l7 + 0x30] %asi, %l0 /* %l0/%l1 = src2 + 0x30 */
+ ldda [%i0 + 0x30] %asi, %o2 /* %o2/%o3 = dest + 0x30 */
+ prefetch [%i1 + 0x40], #one_read
+ prefetch [%l7 + 0x40], #one_read
+ prefetch [%i0 + 0x40], #n_writes
+ xor %g2, %i2, %g2
+ xor %g3, %i3, %g3
+ xor %o0, %g2, %o0
+ xor %o1, %g3, %o1
+ stxa %o0, [%i0 + 0x20] %asi
+ stxa %o1, [%i0 + 0x28] %asi
+ xor %l0, %i4, %l0
+ xor %l1, %i5, %l1
+ xor %o2, %l0, %o2
+ xor %o3, %l1, %o3
+ stxa %o2, [%i0 + 0x30] %asi
+ stxa %o3, [%i0 + 0x38] %asi
+ add %i0, 0x40, %i0
+ add %i1, 0x40, %i1
+ subcc %g1, 1, %g1
+ bne,pt %xcc, 1b
+ add %l7, 0x40, %l7
+ membar #Sync
+ wr %g7, 0x0, %asi
+ ret
+ restore
+ .size xor_niagara_3, .-xor_niagara_3
+
+ .globl xor_niagara_4
+ .type xor_niagara_4,#function
+xor_niagara_4: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */
+ save %sp, -192, %sp
+ prefetch [%i1], #n_writes
+ prefetch [%i2], #one_read
+ prefetch [%i3], #one_read
+ prefetch [%i4], #one_read
+ rd %asi, %g7
+ wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+ srlx %i0, 6, %g1
+ mov %i1, %i0
+ mov %i2, %i1
+ mov %i3, %l7
+ mov %i4, %l6
+1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src1 + 0x00 */
+ ldda [%l7 + 0x00] %asi, %i4 /* %i4/%i5 = src2 + 0x00 */
+ ldda [%l6 + 0x00] %asi, %g2 /* %g2/%g3 = src3 + 0x00 */
+ ldda [%i0 + 0x00] %asi, %l0 /* %l0/%l1 = dest + 0x00 */
+ xor %i4, %i2, %i4
+ xor %i5, %i3, %i5
+ ldda [%i1 + 0x10] %asi, %i2 /* %i2/%i3 = src1 + 0x10 */
+ xor %g2, %i4, %g2
+ xor %g3, %i5, %g3
+ ldda [%i7 + 0x10] %asi, %i4 /* %i4/%i5 = src2 + 0x10 */
+ xor %l0, %g2, %l0
+ xor %l1, %g3, %l1
+ stxa %l0, [%i0 + 0x00] %asi
+ stxa %l1, [%i0 + 0x08] %asi
+ ldda [%i6 + 0x10] %asi, %g2 /* %g2/%g3 = src3 + 0x10 */
+ ldda [%i0 + 0x10] %asi, %l0 /* %l0/%l1 = dest + 0x10 */
+
+ xor %i4, %i2, %i4
+ xor %i5, %i3, %i5
+ ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */
+ xor %g2, %i4, %g2
+ xor %g3, %i5, %g3
+ ldda [%i7 + 0x20] %asi, %i4 /* %i4/%i5 = src2 + 0x20 */
+ xor %l0, %g2, %l0
+ xor %l1, %g3, %l1
+ stxa %l0, [%i0 + 0x10] %asi
+ stxa %l1, [%i0 + 0x18] %asi
+ ldda [%i6 + 0x20] %asi, %g2 /* %g2/%g3 = src3 + 0x20 */
+ ldda [%i0 + 0x20] %asi, %l0 /* %l0/%l1 = dest + 0x20 */
+
+ xor %i4, %i2, %i4
+ xor %i5, %i3, %i5
+ ldda [%i1 + 0x30] %asi, %i2 /* %i2/%i3 = src1 + 0x30 */
+ xor %g2, %i4, %g2
+ xor %g3, %i5, %g3
+ ldda [%i7 + 0x30] %asi, %i4 /* %i4/%i5 = src2 + 0x30 */
+ xor %l0, %g2, %l0
+ xor %l1, %g3, %l1
+ stxa %l0, [%i0 + 0x20] %asi
+ stxa %l1, [%i0 + 0x28] %asi
+ ldda [%i6 + 0x30] %asi, %g2 /* %g2/%g3 = src3 + 0x30 */
+ ldda [%i0 + 0x30] %asi, %l0 /* %l0/%l1 = dest + 0x30 */
+
+ prefetch [%i1 + 0x40], #one_read
+ prefetch [%l7 + 0x40], #one_read
+ prefetch [%l6 + 0x40], #one_read
+ prefetch [%i0 + 0x40], #n_writes
+
+ xor %i4, %i2, %i4
+ xor %i5, %i3, %i5
+ xor %g2, %i4, %g2
+ xor %g3, %i5, %g3
+ xor %l0, %g2, %l0
+ xor %l1, %g3, %l1
+ stxa %l0, [%i0 + 0x30] %asi
+ stxa %l1, [%i0 + 0x38] %asi
+
+ add %i0, 0x40, %i0
+ add %i1, 0x40, %i1
+ add %l7, 0x40, %l7
+ subcc %g1, 1, %g1
+ bne,pt %xcc, 1b
+ add %l6, 0x40, %l6
+ membar #Sync
+ wr %g7, 0x0, %asi
+ ret
+ restore
+ .size xor_niagara_4, .-xor_niagara_4
+
+ .globl xor_niagara_5
+ .type xor_niagara_5,#function
+xor_niagara_5: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */
+ save %sp, -192, %sp
+ prefetch [%i1], #n_writes
+ prefetch [%i2], #one_read
+ prefetch [%i3], #one_read
+ prefetch [%i4], #one_read
+ prefetch [%i5], #one_read
+ rd %asi, %g7
+ wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+ srlx %i0, 6, %g1
+ mov %i1, %i0
+ mov %i2, %i1
+ mov %i3, %l7
+ mov %i4, %l6
+ mov %i5, %l5
+1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src1 + 0x00 */
+ ldda [%l7 + 0x00] %asi, %i4 /* %i4/%i5 = src2 + 0x00 */
+ ldda [%l6 + 0x00] %asi, %g2 /* %g2/%g3 = src3 + 0x00 */
+ ldda [%l5 + 0x00] %asi, %l0 /* %l0/%l1 = src4 + 0x00 */
+ ldda [%i0 + 0x00] %asi, %l2 /* %l2/%l3 = dest + 0x00 */
+ xor %i4, %i2, %i4
+ xor %i5, %i3, %i5
+ ldda [%i1 + 0x10] %asi, %i2 /* %i2/%i3 = src1 + 0x10 */
+ xor %g2, %i4, %g2
+ xor %g3, %i5, %g3
+ ldda [%l7 + 0x10] %asi, %i4 /* %i4/%i5 = src2 + 0x10 */
+ xor %l0, %g2, %l0
+ xor %l1, %g3, %l1
+ ldda [%l6 + 0x10] %asi, %g2 /* %g2/%g3 = src3 + 0x10 */
+ xor %l2, %l0, %l2
+ xor %l3, %l1, %l3
+ stxa %l2, [%i0 + 0x00] %asi
+ stxa %l3, [%i0 + 0x08] %asi
+ ldda [%l5 + 0x10] %asi, %l0 /* %l0/%l1 = src4 + 0x10 */
+ ldda [%i0 + 0x10] %asi, %l2 /* %l2/%l3 = dest + 0x10 */
+
+ xor %i4, %i2, %i4
+ xor %i5, %i3, %i5
+ ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */
+ xor %g2, %i4, %g2
+ xor %g3, %i5, %g3
+ ldda [%l7 + 0x20] %asi, %i4 /* %i4/%i5 = src2 + 0x20 */
+ xor %l0, %g2, %l0
+ xor %l1, %g3, %l1
+ ldda [%l6 + 0x20] %asi, %g2 /* %g2/%g3 = src3 + 0x20 */
+ xor %l2, %l0, %l2
+ xor %l3, %l1, %l3
+ stxa %l2, [%i0 + 0x10] %asi
+ stxa %l3, [%i0 + 0x18] %asi
+ ldda [%l5 + 0x20] %asi, %l0 /* %l0/%l1 = src4 + 0x20 */
+ ldda [%i0 + 0x20] %asi, %l2 /* %l2/%l3 = dest + 0x20 */
+
+ xor %i4, %i2, %i4
+ xor %i5, %i3, %i5
+ ldda [%i1 + 0x30] %asi, %i2 /* %i2/%i3 = src1 + 0x30 */
+ xor %g2, %i4, %g2
+ xor %g3, %i5, %g3
+ ldda [%l7 + 0x30] %asi, %i4 /* %i4/%i5 = src2 + 0x30 */
+ xor %l0, %g2, %l0
+ xor %l1, %g3, %l1
+ ldda [%l6 + 0x30] %asi, %g2 /* %g2/%g3 = src3 + 0x30 */
+ xor %l2, %l0, %l2
+ xor %l3, %l1, %l3
+ stxa %l2, [%i0 + 0x20] %asi
+ stxa %l3, [%i0 + 0x28] %asi
+ ldda [%l5 + 0x30] %asi, %l0 /* %l0/%l1 = src4 + 0x30 */
+ ldda [%i0 + 0x30] %asi, %l2 /* %l2/%l3 = dest + 0x30 */
+
+ prefetch [%i1 + 0x40], #one_read
+ prefetch [%l7 + 0x40], #one_read
+ prefetch [%l6 + 0x40], #one_read
+ prefetch [%l5 + 0x40], #one_read
+ prefetch [%i0 + 0x40], #n_writes
+
+ xor %i4, %i2, %i4
+ xor %i5, %i3, %i5
+ xor %g2, %i4, %g2
+ xor %g3, %i5, %g3
+ xor %l0, %g2, %l0
+ xor %l1, %g3, %l1
+ xor %l2, %l0, %l2
+ xor %l3, %l1, %l3
+ stxa %l2, [%i0 + 0x30] %asi
+ stxa %l3, [%i0 + 0x38] %asi
+
+ add %i0, 0x40, %i0
+ add %i1, 0x40, %i1
+ add %l7, 0x40, %l7
+ add %l6, 0x40, %l6
+ subcc %g1, 1, %g1
+ bne,pt %xcc, 1b
+ add %l5, 0x40, %l5
+ membar #Sync
+ wr %g7, 0x0, %asi
+ ret
+ restore
+ .size xor_niagara_5, .-xor_niagara_5
diff --git a/arch/sparc64/math-emu/math.c b/arch/sparc64/math-emu/math.c
index 2ae05cd7b773..6ee496c2864a 100644
--- a/arch/sparc64/math-emu/math.c
+++ b/arch/sparc64/math-emu/math.c
@@ -206,9 +206,29 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f)
case FSTOQ: TYPE(3,3,1,1,1,0,0); break;
case FDTOQ: TYPE(3,3,1,2,1,0,0); break;
case FQTOI: TYPE(3,1,0,3,1,0,0); break;
+
+ /* We can get either unimplemented or unfinished
+ * for these cases. Pre-Niagara systems generate
+ * unfinished fpop for SUBNORMAL cases, and Niagara
+ * always gives unimplemented fpop for fsqrt{s,d}.
+ */
+ case FSQRTS: {
+ unsigned long x = current_thread_info()->xfsr[0];
+
+ x = (x >> 14) & 0xf;
+ TYPE(x,1,1,1,1,0,0);
+ break;
+ }
+
+ case FSQRTD: {
+ unsigned long x = current_thread_info()->xfsr[0];
+
+ x = (x >> 14) & 0xf;
+ TYPE(x,2,1,2,1,0,0);
+ break;
+ }
+
/* SUBNORMAL - ftt == 2 */
- case FSQRTS: TYPE(2,1,1,1,1,0,0); break;
- case FSQRTD: TYPE(2,2,1,2,1,0,0); break;
case FADDD:
case FSUBD:
case FMULD:
diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile
index 9d0960e69f48..e415bf942bcd 100644
--- a/arch/sparc64/mm/Makefile
+++ b/arch/sparc64/mm/Makefile
@@ -5,6 +5,6 @@
EXTRA_AFLAGS := -ansi
EXTRA_CFLAGS := -Werror
-obj-y := ultra.o tlb.o fault.o init.o generic.o
+obj-y := ultra.o tlb.o tsb.o fault.o init.o generic.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
index 6f0539aa44d0..63b6cc0cd5d5 100644
--- a/arch/sparc64/mm/fault.c
+++ b/arch/sparc64/mm/fault.c
@@ -29,6 +29,7 @@
#include <asm/lsu.h>
#include <asm/sections.h>
#include <asm/kdebug.h>
+#include <asm/mmu_context.h>
/*
* To debug kernel to catch accesses to certain virtual/physical addresses.
@@ -91,12 +92,13 @@ static void __kprobes unhandled_fault(unsigned long address,
die_if_kernel("Oops", regs);
}
-static void bad_kernel_pc(struct pt_regs *regs)
+static void bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr)
{
unsigned long *ksp;
printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n",
regs->tpc);
+ printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr);
__asm__("mov %%sp, %0" : "=r" (ksp));
show_stack(current, ksp);
unhandled_fault(regs->tpc, current, regs);
@@ -137,7 +139,7 @@ static unsigned int get_user_insn(unsigned long tpc)
if (!pte_present(pte))
goto out;
- pa = (pte_val(pte) & _PAGE_PADDR);
+ pa = (pte_pfn(pte) << PAGE_SHIFT);
pa += (tpc & ~PAGE_MASK);
/* Use phys bypass so we don't pollute dtlb/dcache. */
@@ -257,7 +259,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
struct vm_area_struct *vma;
unsigned int insn = 0;
int si_code, fault_code;
- unsigned long address;
+ unsigned long address, mm_rss;
fault_code = get_thread_fault_code();
@@ -280,7 +282,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
(tpc >= MODULES_VADDR && tpc < MODULES_END)) {
/* Valid, no problems... */
} else {
- bad_kernel_pc(regs);
+ bad_kernel_pc(regs, address);
return;
}
}
@@ -406,6 +408,11 @@ good_area:
}
up_read(&mm->mmap_sem);
+
+ mm_rss = get_mm_rss(mm);
+ if (unlikely(mm_rss >= mm->context.tsb_rss_limit))
+ tsb_grow(mm, mm_rss);
+
return;
/*
diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c
index 580b63da836b..5fc5c579e35e 100644
--- a/arch/sparc64/mm/generic.c
+++ b/arch/sparc64/mm/generic.c
@@ -15,15 +15,6 @@
#include <asm/page.h>
#include <asm/tlbflush.h>
-static inline pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space)
-{
- pte_t pte;
- pte_val(pte) = (((page) | pgprot_val(prot) | _PAGE_E) &
- ~(unsigned long)_PAGE_CACHE);
- pte_val(pte) |= (((unsigned long)space) << 32);
- return pte;
-}
-
/* Remap IO memory, the same way as remap_pfn_range(), but use
* the obio memory space.
*
@@ -48,24 +39,29 @@ static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte,
pte_t entry;
unsigned long curend = address + PAGE_SIZE;
- entry = mk_pte_io(offset, prot, space);
+ entry = mk_pte_io(offset, prot, space, PAGE_SIZE);
if (!(address & 0xffff)) {
- if (!(address & 0x3fffff) && !(offset & 0x3ffffe) && end >= address + 0x400000) {
- entry = mk_pte_io(offset,
- __pgprot(pgprot_val (prot) | _PAGE_SZ4MB),
- space);
+ if (PAGE_SIZE < (4 * 1024 * 1024) &&
+ !(address & 0x3fffff) &&
+ !(offset & 0x3ffffe) &&
+ end >= address + 0x400000) {
+ entry = mk_pte_io(offset, prot, space,
+ 4 * 1024 * 1024);
curend = address + 0x400000;
offset += 0x400000;
- } else if (!(address & 0x7ffff) && !(offset & 0x7fffe) && end >= address + 0x80000) {
- entry = mk_pte_io(offset,
- __pgprot(pgprot_val (prot) | _PAGE_SZ512K),
- space);
+ } else if (PAGE_SIZE < (512 * 1024) &&
+ !(address & 0x7ffff) &&
+ !(offset & 0x7fffe) &&
+ end >= address + 0x80000) {
+ entry = mk_pte_io(offset, prot, space,
+ 512 * 1024 * 1024);
curend = address + 0x80000;
offset += 0x80000;
- } else if (!(offset & 0xfffe) && end >= address + 0x10000) {
- entry = mk_pte_io(offset,
- __pgprot(pgprot_val (prot) | _PAGE_SZ64K),
- space);
+ } else if (PAGE_SIZE < (64 * 1024) &&
+ !(offset & 0xfffe) &&
+ end >= address + 0x10000) {
+ entry = mk_pte_io(offset, prot, space,
+ 64 * 1024);
curend = address + 0x10000;
offset += 0x10000;
} else
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index 625cbb336a23..a7a24869d045 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -1,7 +1,7 @@
/*
* SPARC64 Huge TLB page support.
*
- * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net)
*/
#include <linux/config.h>
@@ -22,6 +22,175 @@
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
+/* Slightly simplified from the non-hugepage variant because by
+ * definition we don't have to worry about any page coloring stuff
+ */
+#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL))
+#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL))
+
+static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
+ unsigned long addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct * vma;
+ unsigned long task_size = TASK_SIZE;
+ unsigned long start_addr;
+
+ if (test_thread_flag(TIF_32BIT))
+ task_size = STACK_TOP32;
+ if (unlikely(len >= VA_EXCLUDE_START))
+ return -ENOMEM;
+
+ if (len > mm->cached_hole_size) {
+ start_addr = addr = mm->free_area_cache;
+ } else {
+ start_addr = addr = TASK_UNMAPPED_BASE;
+ mm->cached_hole_size = 0;
+ }
+
+ task_size -= len;
+
+full_search:
+ addr = ALIGN(addr, HPAGE_SIZE);
+
+ for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+ /* At this point: (!vma || addr < vma->vm_end). */
+ if (addr < VA_EXCLUDE_START &&
+ (addr + len) >= VA_EXCLUDE_START) {
+ addr = VA_EXCLUDE_END;
+ vma = find_vma(mm, VA_EXCLUDE_END);
+ }
+ if (unlikely(task_size < addr)) {
+ if (start_addr != TASK_UNMAPPED_BASE) {
+ start_addr = addr = TASK_UNMAPPED_BASE;
+ mm->cached_hole_size = 0;
+ goto full_search;
+ }
+ return -ENOMEM;
+ }
+ if (likely(!vma || addr + len <= vma->vm_start)) {
+ /*
+ * Remember the place where we stopped the search:
+ */
+ mm->free_area_cache = addr + len;
+ return addr;
+ }
+ if (addr + mm->cached_hole_size < vma->vm_start)
+ mm->cached_hole_size = vma->vm_start - addr;
+
+ addr = ALIGN(vma->vm_end, HPAGE_SIZE);
+ }
+}
+
+static unsigned long
+hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ const unsigned long len,
+ const unsigned long pgoff,
+ const unsigned long flags)
+{
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = current->mm;
+ unsigned long addr = addr0;
+
+ /* This should only ever run for 32-bit processes. */
+ BUG_ON(!test_thread_flag(TIF_32BIT));
+
+ /* check if free_area_cache is useful for us */
+ if (len <= mm->cached_hole_size) {
+ mm->cached_hole_size = 0;
+ mm->free_area_cache = mm->mmap_base;
+ }
+
+ /* either no address requested or can't fit in requested address hole */
+ addr = mm->free_area_cache & HPAGE_MASK;
+
+ /* make sure it can fit in the remaining address space */
+ if (likely(addr > len)) {
+ vma = find_vma(mm, addr-len);
+ if (!vma || addr <= vma->vm_start) {
+ /* remember the address as a hint for next time */
+ return (mm->free_area_cache = addr-len);
+ }
+ }
+
+ if (unlikely(mm->mmap_base < len))
+ goto bottomup;
+
+ addr = (mm->mmap_base-len) & HPAGE_MASK;
+
+ do {
+ /*
+ * Lookup failure means no vma is above this address,
+ * else if new region fits below vma->vm_start,
+ * return with success:
+ */
+ vma = find_vma(mm, addr);
+ if (likely(!vma || addr+len <= vma->vm_start)) {
+ /* remember the address as a hint for next time */
+ return (mm->free_area_cache = addr);
+ }
+
+ /* remember the largest hole we saw so far */
+ if (addr + mm->cached_hole_size < vma->vm_start)
+ mm->cached_hole_size = vma->vm_start - addr;
+
+ /* try just below the current vma->vm_start */
+ addr = (vma->vm_start-len) & HPAGE_MASK;
+ } while (likely(len < vma->vm_start));
+
+bottomup:
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ mm->cached_hole_size = ~0UL;
+ mm->free_area_cache = TASK_UNMAPPED_BASE;
+ addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+ /*
+ * Restore the topdown base:
+ */
+ mm->free_area_cache = mm->mmap_base;
+ mm->cached_hole_size = ~0UL;
+
+ return addr;
+}
+
+unsigned long
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long task_size = TASK_SIZE;
+
+ if (test_thread_flag(TIF_32BIT))
+ task_size = STACK_TOP32;
+
+ if (len & ~HPAGE_MASK)
+ return -EINVAL;
+ if (len > task_size)
+ return -ENOMEM;
+
+ if (addr) {
+ addr = ALIGN(addr, HPAGE_SIZE);
+ vma = find_vma(mm, addr);
+ if (task_size - len >= addr &&
+ (!vma || addr + len <= vma->vm_start))
+ return addr;
+ }
+ if (mm->get_unmapped_area == arch_get_unmapped_area)
+ return hugetlb_get_unmapped_area_bottomup(file, addr, len,
+ pgoff, flags);
+ else
+ return hugetlb_get_unmapped_area_topdown(file, addr, len,
+ pgoff, flags);
+}
+
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
@@ -48,12 +217,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
pmd_t *pmd;
pte_t *pte = NULL;
+ addr &= HPAGE_MASK;
+
pgd = pgd_offset(mm, addr);
- if (pgd) {
+ if (!pgd_none(*pgd)) {
pud = pud_offset(pgd, addr);
- if (pud) {
+ if (!pud_none(*pud)) {
pmd = pmd_offset(pud, addr);
- if (pmd)
+ if (!pmd_none(*pmd))
pte = pte_offset_map(pmd, addr);
}
}
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 1e44ee26cee8..c2b556106fc1 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -6,6 +6,7 @@
*/
#include <linux/config.h>
+#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
@@ -39,9 +40,27 @@
#include <asm/tlb.h>
#include <asm/spitfire.h>
#include <asm/sections.h>
+#include <asm/tsb.h>
+#include <asm/hypervisor.h>
extern void device_scan(void);
+#define MAX_PHYS_ADDRESS (1UL << 42UL)
+#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL)
+#define KPTE_BITMAP_BYTES \
+ ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 8)
+
+unsigned long kern_linear_pte_xor[2] __read_mostly;
+
+/* A bitmap, one bit for every 256MB of physical memory. If the bit
+ * is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else
+ * if set we should use a 256MB page (via kern_linear_pte_xor[1]).
+ */
+unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
+
+/* A special kernel TSB for 4MB and 256MB linear mappings. */
+struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
+
#define MAX_BANKS 32
static struct linux_prom64_registers pavail[MAX_BANKS] __initdata;
@@ -111,11 +130,9 @@ static void __init read_obp_memory(const char *property,
unsigned long *sparc64_valid_addr_bitmap __read_mostly;
-/* Ugly, but necessary... -DaveM */
-unsigned long phys_base __read_mostly;
+/* Kernel physical address base and size in bytes. */
unsigned long kern_base __read_mostly;
unsigned long kern_size __read_mostly;
-unsigned long pfn_base __read_mostly;
/* get_new_mmu_context() uses "cache + 1". */
DEFINE_SPINLOCK(ctx_alloc_lock);
@@ -141,24 +158,28 @@ unsigned long sparc64_kern_sec_context __read_mostly;
int bigkernel = 0;
-/* XXX Tune this... */
-#define PGT_CACHE_LOW 25
-#define PGT_CACHE_HIGH 50
+kmem_cache_t *pgtable_cache __read_mostly;
+
+static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
+{
+ clear_page(addr);
+}
+
+extern void tsb_cache_init(void);
-void check_pgt_cache(void)
+void pgtable_cache_init(void)
{
- preempt_disable();
- if (pgtable_cache_size > PGT_CACHE_HIGH) {
- do {
- if (pgd_quicklist)
- free_pgd_slow(get_pgd_fast());
- if (pte_quicklist[0])
- free_pte_slow(pte_alloc_one_fast(NULL, 0));
- if (pte_quicklist[1])
- free_pte_slow(pte_alloc_one_fast(NULL, 1 << (PAGE_SHIFT + 10)));
- } while (pgtable_cache_size > PGT_CACHE_LOW);
+ pgtable_cache = kmem_cache_create("pgtable_cache",
+ PAGE_SIZE, PAGE_SIZE,
+ SLAB_HWCACHE_ALIGN |
+ SLAB_MUST_HWCACHE_ALIGN,
+ zero_ctor,
+ NULL);
+ if (!pgtable_cache) {
+ prom_printf("Could not create pgtable_cache\n");
+ prom_halt();
}
- preempt_enable();
+ tsb_cache_init();
}
#ifdef CONFIG_DEBUG_DCFLUSH
@@ -168,8 +189,9 @@ atomic_t dcpage_flushes_xcall = ATOMIC_INIT(0);
#endif
#endif
-__inline__ void flush_dcache_page_impl(struct page *page)
+inline void flush_dcache_page_impl(struct page *page)
{
+ BUG_ON(tlb_type == hypervisor);
#ifdef CONFIG_DEBUG_DCFLUSH
atomic_inc(&dcpage_flushes);
#endif
@@ -186,8 +208,8 @@ __inline__ void flush_dcache_page_impl(struct page *page)
}
#define PG_dcache_dirty PG_arch_1
-#define PG_dcache_cpu_shift 24
-#define PG_dcache_cpu_mask (256 - 1)
+#define PG_dcache_cpu_shift 24UL
+#define PG_dcache_cpu_mask (256UL - 1UL)
#if NR_CPUS > 256
#error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus
@@ -243,32 +265,61 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
: "g1", "g7");
}
+static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte)
+{
+ unsigned long tsb_addr = (unsigned long) ent;
+
+ if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+ tsb_addr = __pa(tsb_addr);
+
+ __tsb_insert(tsb_addr, tag, pte);
+}
+
+unsigned long _PAGE_ALL_SZ_BITS __read_mostly;
+unsigned long _PAGE_SZBITS __read_mostly;
+
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
{
- struct page *page;
- unsigned long pfn;
- unsigned long pg_flags;
-
- pfn = pte_pfn(pte);
- if (pfn_valid(pfn) &&
- (page = pfn_to_page(pfn), page_mapping(page)) &&
- ((pg_flags = page->flags) & (1UL << PG_dcache_dirty))) {
- int cpu = ((pg_flags >> PG_dcache_cpu_shift) &
- PG_dcache_cpu_mask);
- int this_cpu = get_cpu();
-
- /* This is just to optimize away some function calls
- * in the SMP case.
- */
- if (cpu == this_cpu)
- flush_dcache_page_impl(page);
- else
- smp_flush_dcache_page_impl(page, cpu);
+ struct mm_struct *mm;
+ struct tsb *tsb;
+ unsigned long tag, flags;
+
+ if (tlb_type != hypervisor) {
+ unsigned long pfn = pte_pfn(pte);
+ unsigned long pg_flags;
+ struct page *page;
+
+ if (pfn_valid(pfn) &&
+ (page = pfn_to_page(pfn), page_mapping(page)) &&
+ ((pg_flags = page->flags) & (1UL << PG_dcache_dirty))) {
+ int cpu = ((pg_flags >> PG_dcache_cpu_shift) &
+ PG_dcache_cpu_mask);
+ int this_cpu = get_cpu();
+
+ /* This is just to optimize away some function calls
+ * in the SMP case.
+ */
+ if (cpu == this_cpu)
+ flush_dcache_page_impl(page);
+ else
+ smp_flush_dcache_page_impl(page, cpu);
- clear_dcache_dirty_cpu(page, cpu);
+ clear_dcache_dirty_cpu(page, cpu);
- put_cpu();
+ put_cpu();
+ }
}
+
+ mm = vma->vm_mm;
+
+ spin_lock_irqsave(&mm->context.lock, flags);
+
+ tsb = &mm->context.tsb[(address >> PAGE_SHIFT) &
+ (mm->context.tsb_nentries - 1UL)];
+ tag = (address >> 22UL);
+ tsb_insert(tsb, tag, pte_val(pte));
+
+ spin_unlock_irqrestore(&mm->context.lock, flags);
}
void flush_dcache_page(struct page *page)
@@ -276,6 +327,9 @@ void flush_dcache_page(struct page *page)
struct address_space *mapping;
int this_cpu;
+ if (tlb_type == hypervisor)
+ return;
+
/* Do not bother with the expensive D-cache flush if it
* is merely the zero page. The 'bigcore' testcase in GDB
* causes this case to run millions of times.
@@ -311,7 +365,7 @@ out:
void __kprobes flush_icache_range(unsigned long start, unsigned long end)
{
- /* Cheetah has coherent I-cache. */
+ /* Cheetah and Hypervisor platform cpus have coherent I-cache. */
if (tlb_type == spitfire) {
unsigned long kaddr;
@@ -320,16 +374,6 @@ void __kprobes flush_icache_range(unsigned long start, unsigned long end)
}
}
-unsigned long page_to_pfn(struct page *page)
-{
- return (unsigned long) ((page - mem_map) + pfn_base);
-}
-
-struct page *pfn_to_page(unsigned long pfn)
-{
- return (mem_map + (pfn - pfn_base));
-}
-
void show_mem(void)
{
printk("Mem-info:\n");
@@ -338,7 +382,6 @@ void show_mem(void)
nr_swap_pages << (PAGE_SHIFT-10));
printk("%ld pages of RAM\n", num_physpages);
printk("%d free pages\n", nr_free_pages());
- printk("%d pages in page table cache\n",pgtable_cache_size);
}
void mmu_info(struct seq_file *m)
@@ -349,6 +392,8 @@ void mmu_info(struct seq_file *m)
seq_printf(m, "MMU Type\t: Cheetah+\n");
else if (tlb_type == spitfire)
seq_printf(m, "MMU Type\t: Spitfire\n");
+ else if (tlb_type == hypervisor)
+ seq_printf(m, "MMU Type\t: Hypervisor (sun4v)\n");
else
seq_printf(m, "MMU Type\t: ???\n");
@@ -371,45 +416,13 @@ struct linux_prom_translation {
/* Exported for kernel TLB miss handling in ktlb.S */
struct linux_prom_translation prom_trans[512] __read_mostly;
unsigned int prom_trans_ents __read_mostly;
-unsigned int swapper_pgd_zero __read_mostly;
-
-extern unsigned long prom_boot_page;
-extern void prom_remap(unsigned long physpage, unsigned long virtpage, int mmu_ihandle);
-extern int prom_get_mmu_ihandle(void);
-extern void register_prom_callbacks(void);
/* Exported for SMP bootup purposes. */
unsigned long kern_locked_tte_data;
-/*
- * Translate PROM's mapping we capture at boot time into physical address.
- * The second parameter is only set from prom_callback() invocations.
- */
-unsigned long prom_virt_to_phys(unsigned long promva, int *error)
-{
- int i;
-
- for (i = 0; i < prom_trans_ents; i++) {
- struct linux_prom_translation *p = &prom_trans[i];
-
- if (promva >= p->virt &&
- promva < (p->virt + p->size)) {
- unsigned long base = p->data & _PAGE_PADDR;
-
- if (error)
- *error = 0;
- return base + (promva & (8192 - 1));
- }
- }
- if (error)
- *error = 1;
- return 0UL;
-}
-
/* The obp translations are saved based on 8k pagesize, since obp can
* use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS ->
- * HI_OBP_ADDRESS range are handled in ktlb.S and do not use the vpte
- * scheme (also, see rant in inherit_locked_prom_mappings()).
+ * HI_OBP_ADDRESS range are handled in ktlb.S.
*/
static inline int in_obp_range(unsigned long vaddr)
{
@@ -490,6 +503,36 @@ static void __init read_obp_translations(void)
}
}
+static void __init hypervisor_tlb_lock(unsigned long vaddr,
+ unsigned long pte,
+ unsigned long mmu)
+{
+ register unsigned long func asm("%o5");
+ register unsigned long arg0 asm("%o0");
+ register unsigned long arg1 asm("%o1");
+ register unsigned long arg2 asm("%o2");
+ register unsigned long arg3 asm("%o3");
+
+ func = HV_FAST_MMU_MAP_PERM_ADDR;
+ arg0 = vaddr;
+ arg1 = 0;
+ arg2 = pte;
+ arg3 = mmu;
+ __asm__ __volatile__("ta 0x80"
+ : "=&r" (func), "=&r" (arg0),
+ "=&r" (arg1), "=&r" (arg2),
+ "=&r" (arg3)
+ : "0" (func), "1" (arg0), "2" (arg1),
+ "3" (arg2), "4" (arg3));
+ if (arg0 != 0) {
+ prom_printf("hypervisor_tlb_lock[%lx:%lx:%lx:%lx]: "
+ "errors with %lx\n", vaddr, 0, pte, mmu, arg0);
+ prom_halt();
+ }
+}
+
+static unsigned long kern_large_tte(unsigned long paddr);
+
static void __init remap_kernel(void)
{
unsigned long phys_page, tte_vaddr, tte_data;
@@ -497,25 +540,34 @@ static void __init remap_kernel(void)
tte_vaddr = (unsigned long) KERNBASE;
phys_page = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
- tte_data = (phys_page | (_PAGE_VALID | _PAGE_SZ4MB |
- _PAGE_CP | _PAGE_CV | _PAGE_P |
- _PAGE_L | _PAGE_W));
+ tte_data = kern_large_tte(phys_page);
kern_locked_tte_data = tte_data;
- /* Now lock us into the TLBs via OBP. */
- prom_dtlb_load(tlb_ent, tte_data, tte_vaddr);
- prom_itlb_load(tlb_ent, tte_data, tte_vaddr);
- if (bigkernel) {
- tlb_ent -= 1;
- prom_dtlb_load(tlb_ent,
- tte_data + 0x400000,
- tte_vaddr + 0x400000);
- prom_itlb_load(tlb_ent,
- tte_data + 0x400000,
- tte_vaddr + 0x400000);
+ /* Now lock us into the TLBs via Hypervisor or OBP. */
+ if (tlb_type == hypervisor) {
+ hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU);
+ hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU);
+ if (bigkernel) {
+ tte_vaddr += 0x400000;
+ tte_data += 0x400000;
+ hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU);
+ hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU);
+ }
+ } else {
+ prom_dtlb_load(tlb_ent, tte_data, tte_vaddr);
+ prom_itlb_load(tlb_ent, tte_data, tte_vaddr);
+ if (bigkernel) {
+ tlb_ent -= 1;
+ prom_dtlb_load(tlb_ent,
+ tte_data + 0x400000,
+ tte_vaddr + 0x400000);
+ prom_itlb_load(tlb_ent,
+ tte_data + 0x400000,
+ tte_vaddr + 0x400000);
+ }
+ sparc64_highest_unlocked_tlb_ent = tlb_ent - 1;
}
- sparc64_highest_unlocked_tlb_ent = tlb_ent - 1;
if (tlb_type == cheetah_plus) {
sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 |
CTX_CHEETAH_PLUS_NUC);
@@ -533,372 +585,14 @@ static void __init inherit_prom_mappings(void)
prom_printf("Remapping the kernel... ");
remap_kernel();
prom_printf("done.\n");
-
- prom_printf("Registering callbacks... ");
- register_prom_callbacks();
- prom_printf("done.\n");
-}
-
-/* The OBP specifications for sun4u mark 0xfffffffc00000000 and
- * upwards as reserved for use by the firmware (I wonder if this
- * will be the same on Cheetah...). We use this virtual address
- * range for the VPTE table mappings of the nucleus so we need
- * to zap them when we enter the PROM. -DaveM
- */
-static void __flush_nucleus_vptes(void)
-{
- unsigned long prom_reserved_base = 0xfffffffc00000000UL;
- int i;
-
- /* Only DTLB must be checked for VPTE entries. */
- if (tlb_type == spitfire) {
- for (i = 0; i < 63; i++) {
- unsigned long tag;
-
- /* Spitfire Errata #32 workaround */
- /* NOTE: Always runs on spitfire, so no cheetah+
- * page size encodings.
- */
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "flush %%g6"
- : /* No outputs */
- : "r" (0),
- "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
-
- tag = spitfire_get_dtlb_tag(i);
- if (((tag & ~(PAGE_MASK)) == 0) &&
- ((tag & (PAGE_MASK)) >= prom_reserved_base)) {
- __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
- "membar #Sync"
- : /* no outputs */
- : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
- spitfire_put_dtlb_data(i, 0x0UL);
- }
- }
- } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
- for (i = 0; i < 512; i++) {
- unsigned long tag = cheetah_get_dtlb_tag(i, 2);
-
- if ((tag & ~PAGE_MASK) == 0 &&
- (tag & PAGE_MASK) >= prom_reserved_base) {
- __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
- "membar #Sync"
- : /* no outputs */
- : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
- cheetah_put_dtlb_data(i, 0x0UL, 2);
- }
-
- if (tlb_type != cheetah_plus)
- continue;
-
- tag = cheetah_get_dtlb_tag(i, 3);
-
- if ((tag & ~PAGE_MASK) == 0 &&
- (tag & PAGE_MASK) >= prom_reserved_base) {
- __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
- "membar #Sync"
- : /* no outputs */
- : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
- cheetah_put_dtlb_data(i, 0x0UL, 3);
- }
- }
- } else {
- /* Implement me :-) */
- BUG();
- }
}
-static int prom_ditlb_set;
-struct prom_tlb_entry {
- int tlb_ent;
- unsigned long tlb_tag;
- unsigned long tlb_data;
-};
-struct prom_tlb_entry prom_itlb[16], prom_dtlb[16];
-
void prom_world(int enter)
{
- unsigned long pstate;
- int i;
-
if (!enter)
set_fs((mm_segment_t) { get_thread_current_ds() });
- if (!prom_ditlb_set)
- return;
-
- /* Make sure the following runs atomically. */
- __asm__ __volatile__("flushw\n\t"
- "rdpr %%pstate, %0\n\t"
- "wrpr %0, %1, %%pstate"
- : "=r" (pstate)
- : "i" (PSTATE_IE));
-
- if (enter) {
- /* Kick out nucleus VPTEs. */
- __flush_nucleus_vptes();
-
- /* Install PROM world. */
- for (i = 0; i < 16; i++) {
- if (prom_dtlb[i].tlb_ent != -1) {
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "membar #Sync"
- : : "r" (prom_dtlb[i].tlb_tag), "r" (TLB_TAG_ACCESS),
- "i" (ASI_DMMU));
- if (tlb_type == spitfire)
- spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent,
- prom_dtlb[i].tlb_data);
- else if (tlb_type == cheetah || tlb_type == cheetah_plus)
- cheetah_put_ldtlb_data(prom_dtlb[i].tlb_ent,
- prom_dtlb[i].tlb_data);
- }
- if (prom_itlb[i].tlb_ent != -1) {
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "membar #Sync"
- : : "r" (prom_itlb[i].tlb_tag),
- "r" (TLB_TAG_ACCESS),
- "i" (ASI_IMMU));
- if (tlb_type == spitfire)
- spitfire_put_itlb_data(prom_itlb[i].tlb_ent,
- prom_itlb[i].tlb_data);
- else if (tlb_type == cheetah || tlb_type == cheetah_plus)
- cheetah_put_litlb_data(prom_itlb[i].tlb_ent,
- prom_itlb[i].tlb_data);
- }
- }
- } else {
- for (i = 0; i < 16; i++) {
- if (prom_dtlb[i].tlb_ent != -1) {
- __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
- "membar #Sync"
- : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
- if (tlb_type == spitfire)
- spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent, 0x0UL);
- else
- cheetah_put_ldtlb_data(prom_dtlb[i].tlb_ent, 0x0UL);
- }
- if (prom_itlb[i].tlb_ent != -1) {
- __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
- "membar #Sync"
- : : "r" (TLB_TAG_ACCESS),
- "i" (ASI_IMMU));
- if (tlb_type == spitfire)
- spitfire_put_itlb_data(prom_itlb[i].tlb_ent, 0x0UL);
- else
- cheetah_put_litlb_data(prom_itlb[i].tlb_ent, 0x0UL);
- }
- }
- }
- __asm__ __volatile__("wrpr %0, 0, %%pstate"
- : : "r" (pstate));
-}
-
-void inherit_locked_prom_mappings(int save_p)
-{
- int i;
- int dtlb_seen = 0;
- int itlb_seen = 0;
-
- /* Fucking losing PROM has more mappings in the TLB, but
- * it (conveniently) fails to mention any of these in the
- * translations property. The only ones that matter are
- * the locked PROM tlb entries, so we impose the following
- * irrecovable rule on the PROM, it is allowed 8 locked
- * entries in the ITLB and 8 in the DTLB.
- *
- * Supposedly the upper 16GB of the address space is
- * reserved for OBP, BUT I WISH THIS WAS DOCUMENTED
- * SOMEWHERE!!!!!!!!!!!!!!!!! Furthermore the entire interface
- * used between the client program and the firmware on sun5
- * systems to coordinate mmu mappings is also COMPLETELY
- * UNDOCUMENTED!!!!!! Thanks S(t)un!
- */
- if (save_p) {
- for (i = 0; i < 16; i++) {
- prom_itlb[i].tlb_ent = -1;
- prom_dtlb[i].tlb_ent = -1;
- }
- }
- if (tlb_type == spitfire) {
- int high = sparc64_highest_unlocked_tlb_ent;
- for (i = 0; i <= high; i++) {
- unsigned long data;
-
- /* Spitfire Errata #32 workaround */
- /* NOTE: Always runs on spitfire, so no cheetah+
- * page size encodings.
- */
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "flush %%g6"
- : /* No outputs */
- : "r" (0),
- "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
-
- data = spitfire_get_dtlb_data(i);
- if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) {
- unsigned long tag;
-
- /* Spitfire Errata #32 workaround */
- /* NOTE: Always runs on spitfire, so no
- * cheetah+ page size encodings.
- */
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "flush %%g6"
- : /* No outputs */
- : "r" (0),
- "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
-
- tag = spitfire_get_dtlb_tag(i);
- if (save_p) {
- prom_dtlb[dtlb_seen].tlb_ent = i;
- prom_dtlb[dtlb_seen].tlb_tag = tag;
- prom_dtlb[dtlb_seen].tlb_data = data;
- }
- __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
- "membar #Sync"
- : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
- spitfire_put_dtlb_data(i, 0x0UL);
-
- dtlb_seen++;
- if (dtlb_seen > 15)
- break;
- }
- }
-
- for (i = 0; i < high; i++) {
- unsigned long data;
-
- /* Spitfire Errata #32 workaround */
- /* NOTE: Always runs on spitfire, so no
- * cheetah+ page size encodings.
- */
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "flush %%g6"
- : /* No outputs */
- : "r" (0),
- "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
-
- data = spitfire_get_itlb_data(i);
- if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) {
- unsigned long tag;
-
- /* Spitfire Errata #32 workaround */
- /* NOTE: Always runs on spitfire, so no
- * cheetah+ page size encodings.
- */
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "flush %%g6"
- : /* No outputs */
- : "r" (0),
- "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
-
- tag = spitfire_get_itlb_tag(i);
- if (save_p) {
- prom_itlb[itlb_seen].tlb_ent = i;
- prom_itlb[itlb_seen].tlb_tag = tag;
- prom_itlb[itlb_seen].tlb_data = data;
- }
- __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
- "membar #Sync"
- : : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
- spitfire_put_itlb_data(i, 0x0UL);
-
- itlb_seen++;
- if (itlb_seen > 15)
- break;
- }
- }
- } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
- int high = sparc64_highest_unlocked_tlb_ent;
-
- for (i = 0; i <= high; i++) {
- unsigned long data;
-
- data = cheetah_get_ldtlb_data(i);
- if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) {
- unsigned long tag;
-
- tag = cheetah_get_ldtlb_tag(i);
- if (save_p) {
- prom_dtlb[dtlb_seen].tlb_ent = i;
- prom_dtlb[dtlb_seen].tlb_tag = tag;
- prom_dtlb[dtlb_seen].tlb_data = data;
- }
- __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
- "membar #Sync"
- : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
- cheetah_put_ldtlb_data(i, 0x0UL);
-
- dtlb_seen++;
- if (dtlb_seen > 15)
- break;
- }
- }
-
- for (i = 0; i < high; i++) {
- unsigned long data;
-
- data = cheetah_get_litlb_data(i);
- if ((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) {
- unsigned long tag;
-
- tag = cheetah_get_litlb_tag(i);
- if (save_p) {
- prom_itlb[itlb_seen].tlb_ent = i;
- prom_itlb[itlb_seen].tlb_tag = tag;
- prom_itlb[itlb_seen].tlb_data = data;
- }
- __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
- "membar #Sync"
- : : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
- cheetah_put_litlb_data(i, 0x0UL);
-
- itlb_seen++;
- if (itlb_seen > 15)
- break;
- }
- }
- } else {
- /* Implement me :-) */
- BUG();
- }
- if (save_p)
- prom_ditlb_set = 1;
-}
-
-/* Give PROM back his world, done during reboots... */
-void prom_reload_locked(void)
-{
- int i;
-
- for (i = 0; i < 16; i++) {
- if (prom_dtlb[i].tlb_ent != -1) {
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "membar #Sync"
- : : "r" (prom_dtlb[i].tlb_tag), "r" (TLB_TAG_ACCESS),
- "i" (ASI_DMMU));
- if (tlb_type == spitfire)
- spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent,
- prom_dtlb[i].tlb_data);
- else if (tlb_type == cheetah || tlb_type == cheetah_plus)
- cheetah_put_ldtlb_data(prom_dtlb[i].tlb_ent,
- prom_dtlb[i].tlb_data);
- }
-
- if (prom_itlb[i].tlb_ent != -1) {
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "membar #Sync"
- : : "r" (prom_itlb[i].tlb_tag),
- "r" (TLB_TAG_ACCESS),
- "i" (ASI_IMMU));
- if (tlb_type == spitfire)
- spitfire_put_itlb_data(prom_itlb[i].tlb_ent,
- prom_itlb[i].tlb_data);
- else
- cheetah_put_litlb_data(prom_itlb[i].tlb_ent,
- prom_itlb[i].tlb_data);
- }
- }
+ __asm__ __volatile__("flushw");
}
#ifdef DCACHE_ALIASING_POSSIBLE
@@ -914,7 +608,7 @@ void __flush_dcache_range(unsigned long start, unsigned long end)
if (++n >= 512)
break;
}
- } else {
+ } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
start = __pa(start);
end = __pa(end);
for (va = start; va < end; va += 32)
@@ -927,63 +621,6 @@ void __flush_dcache_range(unsigned long start, unsigned long end)
}
#endif /* DCACHE_ALIASING_POSSIBLE */
-/* If not locked, zap it. */
-void __flush_tlb_all(void)
-{
- unsigned long pstate;
- int i;
-
- __asm__ __volatile__("flushw\n\t"
- "rdpr %%pstate, %0\n\t"
- "wrpr %0, %1, %%pstate"
- : "=r" (pstate)
- : "i" (PSTATE_IE));
- if (tlb_type == spitfire) {
- for (i = 0; i < 64; i++) {
- /* Spitfire Errata #32 workaround */
- /* NOTE: Always runs on spitfire, so no
- * cheetah+ page size encodings.
- */
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "flush %%g6"
- : /* No outputs */
- : "r" (0),
- "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
-
- if (!(spitfire_get_dtlb_data(i) & _PAGE_L)) {
- __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
- "membar #Sync"
- : /* no outputs */
- : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
- spitfire_put_dtlb_data(i, 0x0UL);
- }
-
- /* Spitfire Errata #32 workaround */
- /* NOTE: Always runs on spitfire, so no
- * cheetah+ page size encodings.
- */
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "flush %%g6"
- : /* No outputs */
- : "r" (0),
- "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
-
- if (!(spitfire_get_itlb_data(i) & _PAGE_L)) {
- __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
- "membar #Sync"
- : /* no outputs */
- : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
- spitfire_put_itlb_data(i, 0x0UL);
- }
- }
- } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
- cheetah_flush_dtlb_all();
- cheetah_flush_itlb_all();
- }
- __asm__ __volatile__("wrpr %0, 0, %%pstate"
- : : "r" (pstate));
-}
-
/* Caller does TLB context flushing on local CPU if necessary.
* The caller also ensures that CTX_VALID(mm->context) is false.
*
@@ -991,17 +628,21 @@ void __flush_tlb_all(void)
* let the user have CTX 0 (nucleus) or we ever use a CTX
* version of zero (and thus NO_CONTEXT would not be caught
* by version mis-match tests in mmu_context.h).
+ *
+ * Always invoked with interrupts disabled.
*/
void get_new_mmu_context(struct mm_struct *mm)
{
unsigned long ctx, new_ctx;
unsigned long orig_pgsz_bits;
-
+ unsigned long flags;
+ int new_version;
- spin_lock(&ctx_alloc_lock);
+ spin_lock_irqsave(&ctx_alloc_lock, flags);
orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
+ new_version = 0;
if (new_ctx >= (1 << CTX_NR_BITS)) {
new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
if (new_ctx >= ctx) {
@@ -1024,6 +665,7 @@ void get_new_mmu_context(struct mm_struct *mm)
mmu_context_bmap[i + 2] = 0;
mmu_context_bmap[i + 3] = 0;
}
+ new_version = 1;
goto out;
}
}
@@ -1032,79 +674,10 @@ void get_new_mmu_context(struct mm_struct *mm)
out:
tlb_context_cache = new_ctx;
mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
- spin_unlock(&ctx_alloc_lock);
-}
-
-#ifndef CONFIG_SMP
-struct pgtable_cache_struct pgt_quicklists;
-#endif
-
-/* OK, we have to color these pages. The page tables are accessed
- * by non-Dcache enabled mapping in the VPTE area by the dtlb_backend.S
- * code, as well as by PAGE_OFFSET range direct-mapped addresses by
- * other parts of the kernel. By coloring, we make sure that the tlbmiss
- * fast handlers do not get data from old/garbage dcache lines that
- * correspond to an old/stale virtual address (user/kernel) that
- * previously mapped the pagetable page while accessing vpte range
- * addresses. The idea is that if the vpte color and PAGE_OFFSET range
- * color is the same, then when the kernel initializes the pagetable
- * using the later address range, accesses with the first address
- * range will see the newly initialized data rather than the garbage.
- */
-#ifdef DCACHE_ALIASING_POSSIBLE
-#define DC_ALIAS_SHIFT 1
-#else
-#define DC_ALIAS_SHIFT 0
-#endif
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
-{
- struct page *page;
- unsigned long color;
-
- {
- pte_t *ptep = pte_alloc_one_fast(mm, address);
-
- if (ptep)
- return ptep;
- }
+ spin_unlock_irqrestore(&ctx_alloc_lock, flags);
- color = VPTE_COLOR(address);
- page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, DC_ALIAS_SHIFT);
- if (page) {
- unsigned long *to_free;
- unsigned long paddr;
- pte_t *pte;
-
-#ifdef DCACHE_ALIASING_POSSIBLE
- set_page_count(page, 1);
- ClearPageCompound(page);
-
- set_page_count((page + 1), 1);
- ClearPageCompound(page + 1);
-#endif
- paddr = (unsigned long) page_address(page);
- memset((char *)paddr, 0, (PAGE_SIZE << DC_ALIAS_SHIFT));
-
- if (!color) {
- pte = (pte_t *) paddr;
- to_free = (unsigned long *) (paddr + PAGE_SIZE);
- } else {
- pte = (pte_t *) (paddr + PAGE_SIZE);
- to_free = (unsigned long *) paddr;
- }
-
-#ifdef DCACHE_ALIASING_POSSIBLE
- /* Now free the other one up, adjust cache size. */
- preempt_disable();
- *to_free = (unsigned long) pte_quicklist[color ^ 0x1];
- pte_quicklist[color ^ 0x1] = to_free;
- pgtable_cache_size++;
- preempt_enable();
-#endif
-
- return pte;
- }
- return NULL;
+ if (unlikely(new_version))
+ smp_new_mmu_context_version();
}
void sparc_ultra_dump_itlb(void)
@@ -1196,9 +769,78 @@ void sparc_ultra_dump_dtlb(void)
extern unsigned long cmdline_memory_size;
-unsigned long __init bootmem_init(unsigned long *pages_avail)
+/* Find a free area for the bootmem map, avoiding the kernel image
+ * and the initial ramdisk.
+ */
+static unsigned long __init choose_bootmap_pfn(unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ unsigned long avoid_start, avoid_end, bootmap_size;
+ int i;
+
+ bootmap_size = ((end_pfn - start_pfn) + 7) / 8;
+ bootmap_size = ALIGN(bootmap_size, sizeof(long));
+
+ avoid_start = avoid_end = 0;
+#ifdef CONFIG_BLK_DEV_INITRD
+ avoid_start = initrd_start;
+ avoid_end = PAGE_ALIGN(initrd_end);
+#endif
+
+#ifdef CONFIG_DEBUG_BOOTMEM
+ prom_printf("choose_bootmap_pfn: kern[%lx:%lx] avoid[%lx:%lx]\n",
+ kern_base, PAGE_ALIGN(kern_base + kern_size),
+ avoid_start, avoid_end);
+#endif
+ for (i = 0; i < pavail_ents; i++) {
+ unsigned long start, end;
+
+ start = pavail[i].phys_addr;
+ end = start + pavail[i].reg_size;
+
+ while (start < end) {
+ if (start >= kern_base &&
+ start < PAGE_ALIGN(kern_base + kern_size)) {
+ start = PAGE_ALIGN(kern_base + kern_size);
+ continue;
+ }
+ if (start >= avoid_start && start < avoid_end) {
+ start = avoid_end;
+ continue;
+ }
+
+ if ((end - start) < bootmap_size)
+ break;
+
+ if (start < kern_base &&
+ (start + bootmap_size) > kern_base) {
+ start = PAGE_ALIGN(kern_base + kern_size);
+ continue;
+ }
+
+ if (start < avoid_start &&
+ (start + bootmap_size) > avoid_start) {
+ start = avoid_end;
+ continue;
+ }
+
+ /* OK, it doesn't overlap anything, use it. */
+#ifdef CONFIG_DEBUG_BOOTMEM
+ prom_printf("choose_bootmap_pfn: Using %lx [%lx]\n",
+ start >> PAGE_SHIFT, start);
+#endif
+ return start >> PAGE_SHIFT;
+ }
+ }
+
+ prom_printf("Cannot find free area for bootmap, aborting.\n");
+ prom_halt();
+}
+
+static unsigned long __init bootmem_init(unsigned long *pages_avail,
+ unsigned long phys_base)
{
- unsigned long bootmap_size, start_pfn, end_pfn;
+ unsigned long bootmap_size, end_pfn;
unsigned long end_of_phys_memory = 0UL;
unsigned long bootmap_pfn, bytes_avail, size;
int i;
@@ -1236,14 +878,6 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
*pages_avail = bytes_avail >> PAGE_SHIFT;
- /* Start with page aligned address of last symbol in kernel
- * image. The kernel is hard mapped below PAGE_OFFSET in a
- * 4MB locked TLB translation.
- */
- start_pfn = PAGE_ALIGN(kern_base + kern_size) >> PAGE_SHIFT;
-
- bootmap_pfn = start_pfn;
-
end_pfn = end_of_phys_memory >> PAGE_SHIFT;
#ifdef CONFIG_BLK_DEV_INITRD
@@ -1260,23 +894,22 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
"(0x%016lx > 0x%016lx)\ndisabling initrd\n",
initrd_end, end_of_phys_memory);
initrd_start = 0;
- }
- if (initrd_start) {
- if (initrd_start >= (start_pfn << PAGE_SHIFT) &&
- initrd_start < (start_pfn << PAGE_SHIFT) + 2 * PAGE_SIZE)
- bootmap_pfn = PAGE_ALIGN (initrd_end) >> PAGE_SHIFT;
+ initrd_end = 0;
}
}
#endif
/* Initialize the boot-time allocator. */
max_pfn = max_low_pfn = end_pfn;
- min_low_pfn = pfn_base;
+ min_low_pfn = (phys_base >> PAGE_SHIFT);
+
+ bootmap_pfn = choose_bootmap_pfn(min_low_pfn, end_pfn);
#ifdef CONFIG_DEBUG_BOOTMEM
prom_printf("init_bootmem(min[%lx], bootmap[%lx], max[%lx])\n",
min_low_pfn, bootmap_pfn, max_low_pfn);
#endif
- bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, pfn_base, end_pfn);
+ bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn,
+ min_low_pfn, end_pfn);
/* Now register the available physical memory with the
* allocator.
@@ -1324,9 +957,26 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size);
*pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
+ for (i = 0; i < pavail_ents; i++) {
+ unsigned long start_pfn, end_pfn;
+
+ start_pfn = pavail[i].phys_addr >> PAGE_SHIFT;
+ end_pfn = (start_pfn + (pavail[i].reg_size >> PAGE_SHIFT));
+#ifdef CONFIG_DEBUG_BOOTMEM
+ prom_printf("memory_present(0, %lx, %lx)\n",
+ start_pfn, end_pfn);
+#endif
+ memory_present(0, start_pfn, end_pfn);
+ }
+
+ sparse_init();
+
return end_pfn;
}
+static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
+static int pall_ents __initdata;
+
#ifdef CONFIG_DEBUG_PAGEALLOC
static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend, pgprot_t prot)
{
@@ -1382,14 +1032,44 @@ static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend,
return alloc_bytes;
}
-static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
-static int pall_ents __initdata;
-
extern unsigned int kvmap_linear_patch[1];
+#endif /* CONFIG_DEBUG_PAGEALLOC */
+
+static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
+{
+ const unsigned long shift_256MB = 28;
+ const unsigned long mask_256MB = ((1UL << shift_256MB) - 1UL);
+ const unsigned long size_256MB = (1UL << shift_256MB);
+
+ while (start < end) {
+ long remains;
+
+ remains = end - start;
+ if (remains < size_256MB)
+ break;
+
+ if (start & mask_256MB) {
+ start = (start + size_256MB) & ~mask_256MB;
+ continue;
+ }
+
+ while (remains >= size_256MB) {
+ unsigned long index = start >> shift_256MB;
+
+ __set_bit(index, kpte_linear_bitmap);
+
+ start += size_256MB;
+ remains -= size_256MB;
+ }
+ }
+}
static void __init kernel_physical_mapping_init(void)
{
- unsigned long i, mem_alloced = 0UL;
+ unsigned long i;
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ unsigned long mem_alloced = 0UL;
+#endif
read_obp_memory("reg", &pall[0], &pall_ents);
@@ -1398,10 +1078,16 @@ static void __init kernel_physical_mapping_init(void)
phys_start = pall[i].phys_addr;
phys_end = phys_start + pall[i].reg_size;
+
+ mark_kpte_bitmap(phys_start, phys_end);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
mem_alloced += kernel_map_range(phys_start, phys_end,
PAGE_KERNEL);
+#endif
}
+#ifdef CONFIG_DEBUG_PAGEALLOC
printk("Allocated %ld bytes for kernel page tables.\n",
mem_alloced);
@@ -1409,8 +1095,10 @@ static void __init kernel_physical_mapping_init(void)
flushi(&kvmap_linear_patch[0]);
__flush_tlb_all();
+#endif
}
+#ifdef CONFIG_DEBUG_PAGEALLOC
void kernel_map_pages(struct page *page, int numpages, int enable)
{
unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT;
@@ -1419,6 +1107,9 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
kernel_map_range(phys_start, phys_end,
(enable ? PAGE_KERNEL : __pgprot(0)));
+ flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
+ PAGE_OFFSET + phys_end);
+
/* we should perform an IPI and flush all tlbs,
* but that can deadlock->flush only current cpu.
*/
@@ -1439,18 +1130,150 @@ unsigned long __init find_ecache_flush_span(unsigned long size)
return ~0UL;
}
+static void __init tsb_phys_patch(void)
+{
+ struct tsb_ldquad_phys_patch_entry *pquad;
+ struct tsb_phys_patch_entry *p;
+
+ pquad = &__tsb_ldquad_phys_patch;
+ while (pquad < &__tsb_ldquad_phys_patch_end) {
+ unsigned long addr = pquad->addr;
+
+ if (tlb_type == hypervisor)
+ *(unsigned int *) addr = pquad->sun4v_insn;
+ else
+ *(unsigned int *) addr = pquad->sun4u_insn;
+ wmb();
+ __asm__ __volatile__("flush %0"
+ : /* no outputs */
+ : "r" (addr));
+
+ pquad++;
+ }
+
+ p = &__tsb_phys_patch;
+ while (p < &__tsb_phys_patch_end) {
+ unsigned long addr = p->addr;
+
+ *(unsigned int *) addr = p->insn;
+ wmb();
+ __asm__ __volatile__("flush %0"
+ : /* no outputs */
+ : "r" (addr));
+
+ p++;
+ }
+}
+
+/* Don't mark as init, we give this to the Hypervisor. */
+static struct hv_tsb_descr ktsb_descr[2];
+extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
+
+static void __init sun4v_ktsb_init(void)
+{
+ unsigned long ktsb_pa;
+
+ /* First KTSB for PAGE_SIZE mappings. */
+ ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE);
+
+ switch (PAGE_SIZE) {
+ case 8 * 1024:
+ default:
+ ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_8K;
+ ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_8K;
+ break;
+
+ case 64 * 1024:
+ ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_64K;
+ ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_64K;
+ break;
+
+ case 512 * 1024:
+ ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_512K;
+ ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_512K;
+ break;
+
+ case 4 * 1024 * 1024:
+ ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_4MB;
+ ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_4MB;
+ break;
+ };
+
+ ktsb_descr[0].assoc = 1;
+ ktsb_descr[0].num_ttes = KERNEL_TSB_NENTRIES;
+ ktsb_descr[0].ctx_idx = 0;
+ ktsb_descr[0].tsb_base = ktsb_pa;
+ ktsb_descr[0].resv = 0;
+
+ /* Second KTSB for 4MB/256MB mappings. */
+ ktsb_pa = (kern_base +
+ ((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
+
+ ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB;
+ ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB |
+ HV_PGSZ_MASK_256MB);
+ ktsb_descr[1].assoc = 1;
+ ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES;
+ ktsb_descr[1].ctx_idx = 0;
+ ktsb_descr[1].tsb_base = ktsb_pa;
+ ktsb_descr[1].resv = 0;
+}
+
+void __cpuinit sun4v_ktsb_register(void)
+{
+ register unsigned long func asm("%o5");
+ register unsigned long arg0 asm("%o0");
+ register unsigned long arg1 asm("%o1");
+ unsigned long pa;
+
+ pa = kern_base + ((unsigned long)&ktsb_descr[0] - KERNBASE);
+
+ func = HV_FAST_MMU_TSB_CTX0;
+ arg0 = 2;
+ arg1 = pa;
+ __asm__ __volatile__("ta %6"
+ : "=&r" (func), "=&r" (arg0), "=&r" (arg1)
+ : "0" (func), "1" (arg0), "2" (arg1),
+ "i" (HV_FAST_TRAP));
+}
+
/* paging_init() sets up the page tables */
extern void cheetah_ecache_flush_init(void);
+extern void sun4v_patch_tlb_handlers(void);
static unsigned long last_valid_pfn;
pgd_t swapper_pg_dir[2048];
+static void sun4u_pgprot_init(void);
+static void sun4v_pgprot_init(void);
+
void __init paging_init(void)
{
- unsigned long end_pfn, pages_avail, shift;
+ unsigned long end_pfn, pages_avail, shift, phys_base;
unsigned long real_end, i;
+ kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
+ kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
+
+ /* Invalidate both kernel TSBs. */
+ memset(swapper_tsb, 0x40, sizeof(swapper_tsb));
+ memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
+
+ if (tlb_type == hypervisor)
+ sun4v_pgprot_init();
+ else
+ sun4u_pgprot_init();
+
+ if (tlb_type == cheetah_plus ||
+ tlb_type == hypervisor)
+ tsb_phys_patch();
+
+ if (tlb_type == hypervisor) {
+ sun4v_patch_tlb_handlers();
+ sun4v_ktsb_init();
+ }
+
/* Find available physical memory... */
read_obp_memory("available", &pavail[0], &pavail_ents);
@@ -1458,11 +1281,6 @@ void __init paging_init(void)
for (i = 0; i < pavail_ents; i++)
phys_base = min(phys_base, pavail[i].phys_addr);
- pfn_base = phys_base >> PAGE_SHIFT;
-
- kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
- kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
-
set_bit(0, mmu_context_bmap);
shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE);
@@ -1486,47 +1304,38 @@ void __init paging_init(void)
pud_set(pud_offset(&swapper_pg_dir[0], 0),
swapper_low_pmd_dir + (shift / sizeof(pgd_t)));
- swapper_pgd_zero = pgd_val(swapper_pg_dir[0]);
-
inherit_prom_mappings();
- /* Ok, we can use our TLB miss and window trap handlers safely.
- * We need to do a quick peek here to see if we are on StarFire
- * or not, so setup_tba can setup the IRQ globals correctly (it
- * needs to get the hard smp processor id correctly).
- */
- {
- extern void setup_tba(int);
- setup_tba(this_is_starfire);
- }
-
- inherit_locked_prom_mappings(1);
+ /* Ok, we can use our TLB miss and window trap handlers safely. */
+ setup_tba();
__flush_tlb_all();
+ if (tlb_type == hypervisor)
+ sun4v_ktsb_register();
+
/* Setup bootmem... */
pages_avail = 0;
- last_valid_pfn = end_pfn = bootmem_init(&pages_avail);
+ last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base);
+
+ max_mapnr = last_valid_pfn;
-#ifdef CONFIG_DEBUG_PAGEALLOC
kernel_physical_mapping_init();
-#endif
{
unsigned long zones_size[MAX_NR_ZONES];
unsigned long zholes_size[MAX_NR_ZONES];
- unsigned long npages;
int znum;
for (znum = 0; znum < MAX_NR_ZONES; znum++)
zones_size[znum] = zholes_size[znum] = 0;
- npages = end_pfn - pfn_base;
- zones_size[ZONE_DMA] = npages;
- zholes_size[ZONE_DMA] = npages - pages_avail;
+ zones_size[ZONE_DMA] = end_pfn;
+ zholes_size[ZONE_DMA] = end_pfn - pages_avail;
free_area_init_node(0, &contig_page_data, zones_size,
- phys_base >> PAGE_SHIFT, zholes_size);
+ __pa(PAGE_OFFSET) >> PAGE_SHIFT,
+ zholes_size);
}
device_scan();
@@ -1596,7 +1405,6 @@ void __init mem_init(void)
taint_real_pages();
- max_mapnr = last_valid_pfn - pfn_base;
high_memory = __va(last_valid_pfn << PAGE_SHIFT);
#ifdef CONFIG_DEBUG_BOOTMEM
@@ -1676,3 +1484,342 @@ void free_initrd_mem(unsigned long start, unsigned long end)
}
}
#endif
+
+#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U)
+#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V)
+#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
+#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
+#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
+#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
+
+pgprot_t PAGE_KERNEL __read_mostly;
+EXPORT_SYMBOL(PAGE_KERNEL);
+
+pgprot_t PAGE_KERNEL_LOCKED __read_mostly;
+pgprot_t PAGE_COPY __read_mostly;
+
+pgprot_t PAGE_SHARED __read_mostly;
+EXPORT_SYMBOL(PAGE_SHARED);
+
+pgprot_t PAGE_EXEC __read_mostly;
+unsigned long pg_iobits __read_mostly;
+
+unsigned long _PAGE_IE __read_mostly;
+
+unsigned long _PAGE_E __read_mostly;
+EXPORT_SYMBOL(_PAGE_E);
+
+unsigned long _PAGE_CACHE __read_mostly;
+EXPORT_SYMBOL(_PAGE_CACHE);
+
+static void prot_init_common(unsigned long page_none,
+ unsigned long page_shared,
+ unsigned long page_copy,
+ unsigned long page_readonly,
+ unsigned long page_exec_bit)
+{
+ PAGE_COPY = __pgprot(page_copy);
+ PAGE_SHARED = __pgprot(page_shared);
+
+ protection_map[0x0] = __pgprot(page_none);
+ protection_map[0x1] = __pgprot(page_readonly & ~page_exec_bit);
+ protection_map[0x2] = __pgprot(page_copy & ~page_exec_bit);
+ protection_map[0x3] = __pgprot(page_copy & ~page_exec_bit);
+ protection_map[0x4] = __pgprot(page_readonly);
+ protection_map[0x5] = __pgprot(page_readonly);
+ protection_map[0x6] = __pgprot(page_copy);
+ protection_map[0x7] = __pgprot(page_copy);
+ protection_map[0x8] = __pgprot(page_none);
+ protection_map[0x9] = __pgprot(page_readonly & ~page_exec_bit);
+ protection_map[0xa] = __pgprot(page_shared & ~page_exec_bit);
+ protection_map[0xb] = __pgprot(page_shared & ~page_exec_bit);
+ protection_map[0xc] = __pgprot(page_readonly);
+ protection_map[0xd] = __pgprot(page_readonly);
+ protection_map[0xe] = __pgprot(page_shared);
+ protection_map[0xf] = __pgprot(page_shared);
+}
+
+static void __init sun4u_pgprot_init(void)
+{
+ unsigned long page_none, page_shared, page_copy, page_readonly;
+ unsigned long page_exec_bit;
+
+ PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
+ _PAGE_CACHE_4U | _PAGE_P_4U |
+ __ACCESS_BITS_4U | __DIRTY_BITS_4U |
+ _PAGE_EXEC_4U);
+ PAGE_KERNEL_LOCKED = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
+ _PAGE_CACHE_4U | _PAGE_P_4U |
+ __ACCESS_BITS_4U | __DIRTY_BITS_4U |
+ _PAGE_EXEC_4U | _PAGE_L_4U);
+ PAGE_EXEC = __pgprot(_PAGE_EXEC_4U);
+
+ _PAGE_IE = _PAGE_IE_4U;
+ _PAGE_E = _PAGE_E_4U;
+ _PAGE_CACHE = _PAGE_CACHE_4U;
+
+ pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4U | __DIRTY_BITS_4U |
+ __ACCESS_BITS_4U | _PAGE_E_4U);
+
+ kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^
+ 0xfffff80000000000;
+ kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U |
+ _PAGE_P_4U | _PAGE_W_4U);
+
+ /* XXX Should use 256MB on Panther. XXX */
+ kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
+
+ _PAGE_SZBITS = _PAGE_SZBITS_4U;
+ _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U |
+ _PAGE_SZ64K_4U | _PAGE_SZ8K_4U |
+ _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U);
+
+
+ page_none = _PAGE_PRESENT_4U | _PAGE_ACCESSED_4U | _PAGE_CACHE_4U;
+ page_shared = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
+ __ACCESS_BITS_4U | _PAGE_WRITE_4U | _PAGE_EXEC_4U);
+ page_copy = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
+ __ACCESS_BITS_4U | _PAGE_EXEC_4U);
+ page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
+ __ACCESS_BITS_4U | _PAGE_EXEC_4U);
+
+ page_exec_bit = _PAGE_EXEC_4U;
+
+ prot_init_common(page_none, page_shared, page_copy, page_readonly,
+ page_exec_bit);
+}
+
+static void __init sun4v_pgprot_init(void)
+{
+ unsigned long page_none, page_shared, page_copy, page_readonly;
+ unsigned long page_exec_bit;
+
+ PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
+ _PAGE_CACHE_4V | _PAGE_P_4V |
+ __ACCESS_BITS_4V | __DIRTY_BITS_4V |
+ _PAGE_EXEC_4V);
+ PAGE_KERNEL_LOCKED = PAGE_KERNEL;
+ PAGE_EXEC = __pgprot(_PAGE_EXEC_4V);
+
+ _PAGE_IE = _PAGE_IE_4V;
+ _PAGE_E = _PAGE_E_4V;
+ _PAGE_CACHE = _PAGE_CACHE_4V;
+
+ kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
+ 0xfffff80000000000;
+ kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+ _PAGE_P_4V | _PAGE_W_4V);
+
+ kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
+ 0xfffff80000000000;
+ kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+ _PAGE_P_4V | _PAGE_W_4V);
+
+ pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V |
+ __ACCESS_BITS_4V | _PAGE_E_4V);
+
+ _PAGE_SZBITS = _PAGE_SZBITS_4V;
+ _PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V |
+ _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V |
+ _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V |
+ _PAGE_SZ64K_4V | _PAGE_SZ8K_4V);
+
+ page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V;
+ page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
+ __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V);
+ page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
+ __ACCESS_BITS_4V | _PAGE_EXEC_4V);
+ page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
+ __ACCESS_BITS_4V | _PAGE_EXEC_4V);
+
+ page_exec_bit = _PAGE_EXEC_4V;
+
+ prot_init_common(page_none, page_shared, page_copy, page_readonly,
+ page_exec_bit);
+}
+
+unsigned long pte_sz_bits(unsigned long sz)
+{
+ if (tlb_type == hypervisor) {
+ switch (sz) {
+ case 8 * 1024:
+ default:
+ return _PAGE_SZ8K_4V;
+ case 64 * 1024:
+ return _PAGE_SZ64K_4V;
+ case 512 * 1024:
+ return _PAGE_SZ512K_4V;
+ case 4 * 1024 * 1024:
+ return _PAGE_SZ4MB_4V;
+ };
+ } else {
+ switch (sz) {
+ case 8 * 1024:
+ default:
+ return _PAGE_SZ8K_4U;
+ case 64 * 1024:
+ return _PAGE_SZ64K_4U;
+ case 512 * 1024:
+ return _PAGE_SZ512K_4U;
+ case 4 * 1024 * 1024:
+ return _PAGE_SZ4MB_4U;
+ };
+ }
+}
+
+pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space, unsigned long page_size)
+{
+ pte_t pte;
+
+ pte_val(pte) = page | pgprot_val(pgprot_noncached(prot));
+ pte_val(pte) |= (((unsigned long)space) << 32);
+ pte_val(pte) |= pte_sz_bits(page_size);
+
+ return pte;
+}
+
+static unsigned long kern_large_tte(unsigned long paddr)
+{
+ unsigned long val;
+
+ val = (_PAGE_VALID | _PAGE_SZ4MB_4U |
+ _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U |
+ _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U);
+ if (tlb_type == hypervisor)
+ val = (_PAGE_VALID | _PAGE_SZ4MB_4V |
+ _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V |
+ _PAGE_EXEC_4V | _PAGE_W_4V);
+
+ return val | paddr;
+}
+
+/*
+ * Translate PROM's mapping we capture at boot time into physical address.
+ * The second parameter is only set from prom_callback() invocations.
+ */
+unsigned long prom_virt_to_phys(unsigned long promva, int *error)
+{
+ unsigned long mask;
+ int i;
+
+ mask = _PAGE_PADDR_4U;
+ if (tlb_type == hypervisor)
+ mask = _PAGE_PADDR_4V;
+
+ for (i = 0; i < prom_trans_ents; i++) {
+ struct linux_prom_translation *p = &prom_trans[i];
+
+ if (promva >= p->virt &&
+ promva < (p->virt + p->size)) {
+ unsigned long base = p->data & mask;
+
+ if (error)
+ *error = 0;
+ return base + (promva & (8192 - 1));
+ }
+ }
+ if (error)
+ *error = 1;
+ return 0UL;
+}
+
+/* XXX We should kill off this ugly thing at so me point. XXX */
+unsigned long sun4u_get_pte(unsigned long addr)
+{
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+ unsigned long mask = _PAGE_PADDR_4U;
+
+ if (tlb_type == hypervisor)
+ mask = _PAGE_PADDR_4V;
+
+ if (addr >= PAGE_OFFSET)
+ return addr & mask;
+
+ if ((addr >= LOW_OBP_ADDRESS) && (addr < HI_OBP_ADDRESS))
+ return prom_virt_to_phys(addr, NULL);
+
+ pgdp = pgd_offset_k(addr);
+ pudp = pud_offset(pgdp, addr);
+ pmdp = pmd_offset(pudp, addr);
+ ptep = pte_offset_kernel(pmdp, addr);
+
+ return pte_val(*ptep) & mask;
+}
+
+/* If not locked, zap it. */
+void __flush_tlb_all(void)
+{
+ unsigned long pstate;
+ int i;
+
+ __asm__ __volatile__("flushw\n\t"
+ "rdpr %%pstate, %0\n\t"
+ "wrpr %0, %1, %%pstate"
+ : "=r" (pstate)
+ : "i" (PSTATE_IE));
+ if (tlb_type == spitfire) {
+ for (i = 0; i < 64; i++) {
+ /* Spitfire Errata #32 workaround */
+ /* NOTE: Always runs on spitfire, so no
+ * cheetah+ page size encodings.
+ */
+ __asm__ __volatile__("stxa %0, [%1] %2\n\t"
+ "flush %%g6"
+ : /* No outputs */
+ : "r" (0),
+ "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
+
+ if (!(spitfire_get_dtlb_data(i) & _PAGE_L_4U)) {
+ __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
+ "membar #Sync"
+ : /* no outputs */
+ : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
+ spitfire_put_dtlb_data(i, 0x0UL);
+ }
+
+ /* Spitfire Errata #32 workaround */
+ /* NOTE: Always runs on spitfire, so no
+ * cheetah+ page size encodings.
+ */
+ __asm__ __volatile__("stxa %0, [%1] %2\n\t"
+ "flush %%g6"
+ : /* No outputs */
+ : "r" (0),
+ "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
+
+ if (!(spitfire_get_itlb_data(i) & _PAGE_L_4U)) {
+ __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
+ "membar #Sync"
+ : /* no outputs */
+ : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
+ spitfire_put_itlb_data(i, 0x0UL);
+ }
+ }
+ } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+ cheetah_flush_dtlb_all();
+ cheetah_flush_itlb_all();
+ }
+ __asm__ __volatile__("wrpr %0, 0, %%pstate"
+ : : "r" (pstate));
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+
+void online_page(struct page *page)
+{
+ ClearPageReserved(page);
+ set_page_count(page, 0);
+ free_cold_page(page);
+ totalram_pages++;
+ num_physpages++;
+}
+
+int remove_memory(u64 start, u64 size)
+{
+ return -EINVAL;
+}
+
+#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
index 8b104be4662b..a079cf42505e 100644
--- a/arch/sparc64/mm/tlb.c
+++ b/arch/sparc64/mm/tlb.c
@@ -25,6 +25,8 @@ void flush_tlb_pending(void)
struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
if (mp->tlb_nr) {
+ flush_tsb_user(mp);
+
if (CTX_VALID(mp->mm->context)) {
#ifdef CONFIG_SMP
smp_flush_tlb_pending(mp->mm, mp->tlb_nr,
@@ -47,7 +49,8 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t
if (pte_exec(orig))
vaddr |= 0x1UL;
- if (pte_dirty(orig)) {
+ if (tlb_type != hypervisor &&
+ pte_dirty(orig)) {
unsigned long paddr, pfn = pte_pfn(orig);
struct address_space *mapping;
struct page *page;
@@ -89,62 +92,3 @@ no_cache_flush:
if (nr >= TLB_BATCH_NR)
flush_tlb_pending();
}
-
-void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
- struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
- unsigned long nr = mp->tlb_nr;
- long s = start, e = end, vpte_base;
-
- if (mp->fullmm)
- return;
-
- /* If start is greater than end, that is a real problem. */
- BUG_ON(start > end);
-
- /* However, straddling the VA space hole is quite normal. */
- s &= PMD_MASK;
- e = (e + PMD_SIZE - 1) & PMD_MASK;
-
- vpte_base = (tlb_type == spitfire ?
- VPTE_BASE_SPITFIRE :
- VPTE_BASE_CHEETAH);
-
- if (unlikely(nr != 0 && mm != mp->mm)) {
- flush_tlb_pending();
- nr = 0;
- }
-
- if (nr == 0)
- mp->mm = mm;
-
- start = vpte_base + (s >> (PAGE_SHIFT - 3));
- end = vpte_base + (e >> (PAGE_SHIFT - 3));
-
- /* If the request straddles the VA space hole, we
- * need to swap start and end. The reason this
- * occurs is that "vpte_base" is the center of
- * the linear page table mapping area. Thus,
- * high addresses with the sign bit set map to
- * addresses below vpte_base and non-sign bit
- * addresses map to addresses above vpte_base.
- */
- if (end < start) {
- unsigned long tmp = start;
-
- start = end;
- end = tmp;
- }
-
- while (start < end) {
- mp->vaddrs[nr] = start;
- mp->tlb_nr = ++nr;
- if (nr >= TLB_BATCH_NR) {
- flush_tlb_pending();
- nr = 0;
- }
- start += PAGE_SIZE;
- }
- if (nr)
- flush_tlb_pending();
-}
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
new file mode 100644
index 000000000000..b2064e2a44d6
--- /dev/null
+++ b/arch/sparc64/mm/tsb.c
@@ -0,0 +1,440 @@
+/* arch/sparc64/mm/tsb.c
+ *
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <asm/system.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/tsb.h>
+#include <asm/oplib.h>
+
+extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
+
+static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long nentries)
+{
+ vaddr >>= PAGE_SHIFT;
+ return vaddr & (nentries - 1);
+}
+
+static inline int tag_compare(unsigned long tag, unsigned long vaddr)
+{
+ return (tag == (vaddr >> 22));
+}
+
+/* TSB flushes need only occur on the processor initiating the address
+ * space modification, not on each cpu the address space has run on.
+ * Only the TLB flush needs that treatment.
+ */
+
+void flush_tsb_kernel_range(unsigned long start, unsigned long end)
+{
+ unsigned long v;
+
+ for (v = start; v < end; v += PAGE_SIZE) {
+ unsigned long hash = tsb_hash(v, KERNEL_TSB_NENTRIES);
+ struct tsb *ent = &swapper_tsb[hash];
+
+ if (tag_compare(ent->tag, v)) {
+ ent->tag = (1UL << TSB_TAG_INVALID_BIT);
+ membar_storeload_storestore();
+ }
+ }
+}
+
+void flush_tsb_user(struct mmu_gather *mp)
+{
+ struct mm_struct *mm = mp->mm;
+ unsigned long nentries, base, flags;
+ struct tsb *tsb;
+ int i;
+
+ spin_lock_irqsave(&mm->context.lock, flags);
+
+ tsb = mm->context.tsb;
+ nentries = mm->context.tsb_nentries;
+
+ if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+ base = __pa(tsb);
+ else
+ base = (unsigned long) tsb;
+
+ for (i = 0; i < mp->tlb_nr; i++) {
+ unsigned long v = mp->vaddrs[i];
+ unsigned long tag, ent, hash;
+
+ v &= ~0x1UL;
+
+ hash = tsb_hash(v, nentries);
+ ent = base + (hash * sizeof(struct tsb));
+ tag = (v >> 22UL);
+
+ tsb_flush(ent, tag);
+ }
+
+ spin_unlock_irqrestore(&mm->context.lock, flags);
+}
+
+static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
+{
+ unsigned long tsb_reg, base, tsb_paddr;
+ unsigned long page_sz, tte;
+
+ mm->context.tsb_nentries = tsb_bytes / sizeof(struct tsb);
+
+ base = TSBMAP_BASE;
+ tte = pgprot_val(PAGE_KERNEL_LOCKED);
+ tsb_paddr = __pa(mm->context.tsb);
+ BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
+
+ /* Use the smallest page size that can map the whole TSB
+ * in one TLB entry.
+ */
+ switch (tsb_bytes) {
+ case 8192 << 0:
+ tsb_reg = 0x0UL;
+#ifdef DCACHE_ALIASING_POSSIBLE
+ base += (tsb_paddr & 8192);
+#endif
+ page_sz = 8192;
+ break;
+
+ case 8192 << 1:
+ tsb_reg = 0x1UL;
+ page_sz = 64 * 1024;
+ break;
+
+ case 8192 << 2:
+ tsb_reg = 0x2UL;
+ page_sz = 64 * 1024;
+ break;
+
+ case 8192 << 3:
+ tsb_reg = 0x3UL;
+ page_sz = 64 * 1024;
+ break;
+
+ case 8192 << 4:
+ tsb_reg = 0x4UL;
+ page_sz = 512 * 1024;
+ break;
+
+ case 8192 << 5:
+ tsb_reg = 0x5UL;
+ page_sz = 512 * 1024;
+ break;
+
+ case 8192 << 6:
+ tsb_reg = 0x6UL;
+ page_sz = 512 * 1024;
+ break;
+
+ case 8192 << 7:
+ tsb_reg = 0x7UL;
+ page_sz = 4 * 1024 * 1024;
+ break;
+
+ default:
+ BUG();
+ };
+ tte |= pte_sz_bits(page_sz);
+
+ if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
+ /* Physical mapping, no locked TLB entry for TSB. */
+ tsb_reg |= tsb_paddr;
+
+ mm->context.tsb_reg_val = tsb_reg;
+ mm->context.tsb_map_vaddr = 0;
+ mm->context.tsb_map_pte = 0;
+ } else {
+ tsb_reg |= base;
+ tsb_reg |= (tsb_paddr & (page_sz - 1UL));
+ tte |= (tsb_paddr & ~(page_sz - 1UL));
+
+ mm->context.tsb_reg_val = tsb_reg;
+ mm->context.tsb_map_vaddr = base;
+ mm->context.tsb_map_pte = tte;
+ }
+
+ /* Setup the Hypervisor TSB descriptor. */
+ if (tlb_type == hypervisor) {
+ struct hv_tsb_descr *hp = &mm->context.tsb_descr;
+
+ switch (PAGE_SIZE) {
+ case 8192:
+ default:
+ hp->pgsz_idx = HV_PGSZ_IDX_8K;
+ break;
+
+ case 64 * 1024:
+ hp->pgsz_idx = HV_PGSZ_IDX_64K;
+ break;
+
+ case 512 * 1024:
+ hp->pgsz_idx = HV_PGSZ_IDX_512K;
+ break;
+
+ case 4 * 1024 * 1024:
+ hp->pgsz_idx = HV_PGSZ_IDX_4MB;
+ break;
+ };
+ hp->assoc = 1;
+ hp->num_ttes = tsb_bytes / 16;
+ hp->ctx_idx = 0;
+ switch (PAGE_SIZE) {
+ case 8192:
+ default:
+ hp->pgsz_mask = HV_PGSZ_MASK_8K;
+ break;
+
+ case 64 * 1024:
+ hp->pgsz_mask = HV_PGSZ_MASK_64K;
+ break;
+
+ case 512 * 1024:
+ hp->pgsz_mask = HV_PGSZ_MASK_512K;
+ break;
+
+ case 4 * 1024 * 1024:
+ hp->pgsz_mask = HV_PGSZ_MASK_4MB;
+ break;
+ };
+ hp->tsb_base = tsb_paddr;
+ hp->resv = 0;
+ }
+}
+
+static kmem_cache_t *tsb_caches[8] __read_mostly;
+
+static const char *tsb_cache_names[8] = {
+ "tsb_8KB",
+ "tsb_16KB",
+ "tsb_32KB",
+ "tsb_64KB",
+ "tsb_128KB",
+ "tsb_256KB",
+ "tsb_512KB",
+ "tsb_1MB",
+};
+
+void __init tsb_cache_init(void)
+{
+ unsigned long i;
+
+ for (i = 0; i < 8; i++) {
+ unsigned long size = 8192 << i;
+ const char *name = tsb_cache_names[i];
+
+ tsb_caches[i] = kmem_cache_create(name,
+ size, size,
+ SLAB_HWCACHE_ALIGN |
+ SLAB_MUST_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!tsb_caches[i]) {
+ prom_printf("Could not create %s cache\n", name);
+ prom_halt();
+ }
+ }
+}
+
+/* When the RSS of an address space exceeds mm->context.tsb_rss_limit,
+ * do_sparc64_fault() invokes this routine to try and grow the TSB.
+ *
+ * When we reach the maximum TSB size supported, we stick ~0UL into
+ * mm->context.tsb_rss_limit so the grow checks in update_mmu_cache()
+ * will not trigger any longer.
+ *
+ * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
+ * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB
+ * must be 512K aligned. It also must be physically contiguous, so we
+ * cannot use vmalloc().
+ *
+ * The idea here is to grow the TSB when the RSS of the process approaches
+ * the number of entries that the current TSB can hold at once. Currently,
+ * we trigger when the RSS hits 3/4 of the TSB capacity.
+ */
+void tsb_grow(struct mm_struct *mm, unsigned long rss)
+{
+ unsigned long max_tsb_size = 1 * 1024 * 1024;
+ unsigned long new_size, old_size, flags;
+ struct tsb *old_tsb, *new_tsb;
+ unsigned long new_cache_index, old_cache_index;
+ unsigned long new_rss_limit;
+ gfp_t gfp_flags;
+
+ if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
+ max_tsb_size = (PAGE_SIZE << MAX_ORDER);
+
+ new_cache_index = 0;
+ for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) {
+ unsigned long n_entries = new_size / sizeof(struct tsb);
+
+ n_entries = (n_entries * 3) / 4;
+ if (n_entries > rss)
+ break;
+
+ new_cache_index++;
+ }
+
+ if (new_size == max_tsb_size)
+ new_rss_limit = ~0UL;
+ else
+ new_rss_limit = ((new_size / sizeof(struct tsb)) * 3) / 4;
+
+retry_tsb_alloc:
+ gfp_flags = GFP_KERNEL;
+ if (new_size > (PAGE_SIZE * 2))
+ gfp_flags = __GFP_NOWARN | __GFP_NORETRY;
+
+ new_tsb = kmem_cache_alloc(tsb_caches[new_cache_index], gfp_flags);
+ if (unlikely(!new_tsb)) {
+ /* Not being able to fork due to a high-order TSB
+ * allocation failure is very bad behavior. Just back
+ * down to a 0-order allocation and force no TSB
+ * growing for this address space.
+ */
+ if (mm->context.tsb == NULL && new_cache_index > 0) {
+ new_cache_index = 0;
+ new_size = 8192;
+ new_rss_limit = ~0UL;
+ goto retry_tsb_alloc;
+ }
+
+ /* If we failed on a TSB grow, we are under serious
+ * memory pressure so don't try to grow any more.
+ */
+ if (mm->context.tsb != NULL)
+ mm->context.tsb_rss_limit = ~0UL;
+ return;
+ }
+
+ /* Mark all tags as invalid. */
+ tsb_init(new_tsb, new_size);
+
+ /* Ok, we are about to commit the changes. If we are
+ * growing an existing TSB the locking is very tricky,
+ * so WATCH OUT!
+ *
+ * We have to hold mm->context.lock while committing to the
+ * new TSB, this synchronizes us with processors in
+ * flush_tsb_user() and switch_mm() for this address space.
+ *
+ * But even with that lock held, processors run asynchronously
+ * accessing the old TSB via TLB miss handling. This is OK
+ * because those actions are just propagating state from the
+ * Linux page tables into the TSB, page table mappings are not
+ * being changed. If a real fault occurs, the processor will
+ * synchronize with us when it hits flush_tsb_user(), this is
+ * also true for the case where vmscan is modifying the page
+ * tables. The only thing we need to be careful with is to
+ * skip any locked TSB entries during copy_tsb().
+ *
+ * When we finish committing to the new TSB, we have to drop
+ * the lock and ask all other cpus running this address space
+ * to run tsb_context_switch() to see the new TSB table.
+ */
+ spin_lock_irqsave(&mm->context.lock, flags);
+
+ old_tsb = mm->context.tsb;
+ old_cache_index = (mm->context.tsb_reg_val & 0x7UL);
+ old_size = mm->context.tsb_nentries * sizeof(struct tsb);
+
+
+ /* Handle multiple threads trying to grow the TSB at the same time.
+ * One will get in here first, and bump the size and the RSS limit.
+ * The others will get in here next and hit this check.
+ */
+ if (unlikely(old_tsb && (rss < mm->context.tsb_rss_limit))) {
+ spin_unlock_irqrestore(&mm->context.lock, flags);
+
+ kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
+ return;
+ }
+
+ mm->context.tsb_rss_limit = new_rss_limit;
+
+ if (old_tsb) {
+ extern void copy_tsb(unsigned long old_tsb_base,
+ unsigned long old_tsb_size,
+ unsigned long new_tsb_base,
+ unsigned long new_tsb_size);
+ unsigned long old_tsb_base = (unsigned long) old_tsb;
+ unsigned long new_tsb_base = (unsigned long) new_tsb;
+
+ if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
+ old_tsb_base = __pa(old_tsb_base);
+ new_tsb_base = __pa(new_tsb_base);
+ }
+ copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
+ }
+
+ mm->context.tsb = new_tsb;
+ setup_tsb_params(mm, new_size);
+
+ spin_unlock_irqrestore(&mm->context.lock, flags);
+
+ /* If old_tsb is NULL, we're being invoked for the first time
+ * from init_new_context().
+ */
+ if (old_tsb) {
+ /* Reload it on the local cpu. */
+ tsb_context_switch(mm);
+
+ /* Now force other processors to do the same. */
+ smp_tsb_sync(mm);
+
+ /* Now it is safe to free the old tsb. */
+ kmem_cache_free(tsb_caches[old_cache_index], old_tsb);
+ }
+}
+
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+ spin_lock_init(&mm->context.lock);
+
+ mm->context.sparc64_ctx_val = 0UL;
+
+ /* copy_mm() copies over the parent's mm_struct before calling
+ * us, so we need to zero out the TSB pointer or else tsb_grow()
+ * will be confused and think there is an older TSB to free up.
+ */
+ mm->context.tsb = NULL;
+
+ /* If this is fork, inherit the parent's TSB size. We would
+ * grow it to that size on the first page fault anyways.
+ */
+ tsb_grow(mm, get_mm_rss(mm));
+
+ if (unlikely(!mm->context.tsb))
+ return -ENOMEM;
+
+ return 0;
+}
+
+void destroy_context(struct mm_struct *mm)
+{
+ unsigned long flags, cache_index;
+
+ cache_index = (mm->context.tsb_reg_val & 0x7UL);
+ kmem_cache_free(tsb_caches[cache_index], mm->context.tsb);
+
+ /* We can remove these later, but for now it's useful
+ * to catch any bogus post-destroy_context() references
+ * to the TSB.
+ */
+ mm->context.tsb = NULL;
+ mm->context.tsb_reg_val = 0UL;
+
+ spin_lock_irqsave(&ctx_alloc_lock, flags);
+
+ if (CTX_VALID(mm->context)) {
+ unsigned long nr = CTX_NRBITS(mm->context);
+ mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
+ }
+
+ spin_unlock_irqrestore(&ctx_alloc_lock, flags);
+}
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index e4c9151fa116..f8479fad4047 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -15,6 +15,7 @@
#include <asm/head.h>
#include <asm/thread_info.h>
#include <asm/cacheflush.h>
+#include <asm/hypervisor.h>
/* Basically, most of the Spitfire vs. Cheetah madness
* has to do with the fact that Cheetah does not support
@@ -29,16 +30,18 @@
.text
.align 32
.globl __flush_tlb_mm
-__flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
+__flush_tlb_mm: /* 18 insns */
+ /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
ldxa [%o1] ASI_DMMU, %g2
cmp %g2, %o0
bne,pn %icc, __spitfire_flush_tlb_mm_slow
mov 0x50, %g3
stxa %g0, [%g3] ASI_DMMU_DEMAP
stxa %g0, [%g3] ASI_IMMU_DEMAP
+ sethi %hi(KERNBASE), %g3
+ flush %g3
retl
- flush %g6
- nop
+ nop
nop
nop
nop
@@ -51,7 +54,7 @@ __flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
.align 32
.globl __flush_tlb_pending
-__flush_tlb_pending:
+__flush_tlb_pending: /* 26 insns */
/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
rdpr %pstate, %g7
sllx %o1, 3, %o1
@@ -72,7 +75,8 @@ __flush_tlb_pending:
brnz,pt %o1, 1b
nop
stxa %g2, [%o4] ASI_DMMU
- flush %g6
+ sethi %hi(KERNBASE), %o4
+ flush %o4
retl
wrpr %g7, 0x0, %pstate
nop
@@ -82,7 +86,8 @@ __flush_tlb_pending:
.align 32
.globl __flush_tlb_kernel_range
-__flush_tlb_kernel_range: /* %o0=start, %o1=end */
+__flush_tlb_kernel_range: /* 16 insns */
+ /* %o0=start, %o1=end */
cmp %o0, %o1
be,pn %xcc, 2f
sethi %hi(PAGE_SIZE), %o4
@@ -94,8 +99,11 @@ __flush_tlb_kernel_range: /* %o0=start, %o1=end */
membar #Sync
brnz,pt %o3, 1b
sub %o3, %o4, %o3
-2: retl
- flush %g6
+2: sethi %hi(KERNBASE), %o3
+ flush %o3
+ retl
+ nop
+ nop
__spitfire_flush_tlb_mm_slow:
rdpr %pstate, %g1
@@ -105,7 +113,8 @@ __spitfire_flush_tlb_mm_slow:
stxa %g0, [%g3] ASI_IMMU_DEMAP
flush %g6
stxa %g2, [%o1] ASI_DMMU
- flush %g6
+ sethi %hi(KERNBASE), %o1
+ flush %o1
retl
wrpr %g1, 0, %pstate
@@ -181,7 +190,7 @@ __flush_dcache_page: /* %o0=kaddr, %o1=flush_icache */
.previous
/* Cheetah specific versions, patched at boot time. */
-__cheetah_flush_tlb_mm: /* 18 insns */
+__cheetah_flush_tlb_mm: /* 19 insns */
rdpr %pstate, %g7
andn %g7, PSTATE_IE, %g2
wrpr %g2, 0x0, %pstate
@@ -196,12 +205,13 @@ __cheetah_flush_tlb_mm: /* 18 insns */
stxa %g0, [%g3] ASI_DMMU_DEMAP
stxa %g0, [%g3] ASI_IMMU_DEMAP
stxa %g2, [%o2] ASI_DMMU
- flush %g6
+ sethi %hi(KERNBASE), %o2
+ flush %o2
wrpr %g0, 0, %tl
retl
wrpr %g7, 0x0, %pstate
-__cheetah_flush_tlb_pending: /* 26 insns */
+__cheetah_flush_tlb_pending: /* 27 insns */
/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
rdpr %pstate, %g7
sllx %o1, 3, %o1
@@ -225,7 +235,8 @@ __cheetah_flush_tlb_pending: /* 26 insns */
brnz,pt %o1, 1b
nop
stxa %g2, [%o4] ASI_DMMU
- flush %g6
+ sethi %hi(KERNBASE), %o4
+ flush %o4
wrpr %g0, 0, %tl
retl
wrpr %g7, 0x0, %pstate
@@ -245,7 +256,76 @@ __cheetah_flush_dcache_page: /* 11 insns */
nop
#endif /* DCACHE_ALIASING_POSSIBLE */
-cheetah_patch_one:
+ /* Hypervisor specific versions, patched at boot time. */
+__hypervisor_tlb_tl0_error:
+ save %sp, -192, %sp
+ mov %i0, %o0
+ call hypervisor_tlbop_error
+ mov %i1, %o1
+ ret
+ restore
+
+__hypervisor_flush_tlb_mm: /* 10 insns */
+ mov %o0, %o2 /* ARG2: mmu context */
+ mov 0, %o0 /* ARG0: CPU lists unimplemented */
+ mov 0, %o1 /* ARG1: CPU lists unimplemented */
+ mov HV_MMU_ALL, %o3 /* ARG3: flags */
+ mov HV_FAST_MMU_DEMAP_CTX, %o5
+ ta HV_FAST_TRAP
+ brnz,pn %o0, __hypervisor_tlb_tl0_error
+ mov HV_FAST_MMU_DEMAP_CTX, %o1
+ retl
+ nop
+
+__hypervisor_flush_tlb_pending: /* 16 insns */
+ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
+ sllx %o1, 3, %g1
+ mov %o2, %g2
+ mov %o0, %g3
+1: sub %g1, (1 << 3), %g1
+ ldx [%g2 + %g1], %o0 /* ARG0: vaddr + IMMU-bit */
+ mov %g3, %o1 /* ARG1: mmu context */
+ mov HV_MMU_ALL, %o2 /* ARG2: flags */
+ srlx %o0, PAGE_SHIFT, %o0
+ sllx %o0, PAGE_SHIFT, %o0
+ ta HV_MMU_UNMAP_ADDR_TRAP
+ brnz,pn %o0, __hypervisor_tlb_tl0_error
+ mov HV_MMU_UNMAP_ADDR_TRAP, %o1
+ brnz,pt %g1, 1b
+ nop
+ retl
+ nop
+
+__hypervisor_flush_tlb_kernel_range: /* 16 insns */
+ /* %o0=start, %o1=end */
+ cmp %o0, %o1
+ be,pn %xcc, 2f
+ sethi %hi(PAGE_SIZE), %g3
+ mov %o0, %g1
+ sub %o1, %g1, %g2
+ sub %g2, %g3, %g2
+1: add %g1, %g2, %o0 /* ARG0: virtual address */
+ mov 0, %o1 /* ARG1: mmu context */
+ mov HV_MMU_ALL, %o2 /* ARG2: flags */
+ ta HV_MMU_UNMAP_ADDR_TRAP
+ brnz,pn %o0, __hypervisor_tlb_tl0_error
+ mov HV_MMU_UNMAP_ADDR_TRAP, %o1
+ brnz,pt %g2, 1b
+ sub %g2, %g3, %g2
+2: retl
+ nop
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+ /* XXX Niagara and friends have an 8K cache, so no aliasing is
+ * XXX possible, but nothing explicit in the Hypervisor API
+ * XXX guarantees this.
+ */
+__hypervisor_flush_dcache_page: /* 2 insns */
+ retl
+ nop
+#endif
+
+tlb_patch_one:
1: lduw [%o1], %g1
stw %g1, [%o0]
flush %o0
@@ -264,22 +344,22 @@ cheetah_patch_cachetlbops:
or %o0, %lo(__flush_tlb_mm), %o0
sethi %hi(__cheetah_flush_tlb_mm), %o1
or %o1, %lo(__cheetah_flush_tlb_mm), %o1
- call cheetah_patch_one
- mov 18, %o2
+ call tlb_patch_one
+ mov 19, %o2
sethi %hi(__flush_tlb_pending), %o0
or %o0, %lo(__flush_tlb_pending), %o0
sethi %hi(__cheetah_flush_tlb_pending), %o1
or %o1, %lo(__cheetah_flush_tlb_pending), %o1
- call cheetah_patch_one
- mov 26, %o2
+ call tlb_patch_one
+ mov 27, %o2
#ifdef DCACHE_ALIASING_POSSIBLE
sethi %hi(__flush_dcache_page), %o0
or %o0, %lo(__flush_dcache_page), %o0
sethi %hi(__cheetah_flush_dcache_page), %o1
or %o1, %lo(__cheetah_flush_dcache_page), %o1
- call cheetah_patch_one
+ call tlb_patch_one
mov 11, %o2
#endif /* DCACHE_ALIASING_POSSIBLE */
@@ -295,16 +375,14 @@ cheetah_patch_cachetlbops:
* %g1 address arg 1 (tlb page and range flushes)
* %g7 address arg 2 (tlb range flush only)
*
- * %g6 ivector table, don't touch
- * %g2 scratch 1
- * %g3 scratch 2
- * %g4 scratch 3
- *
- * TODO: Make xcall TLB range flushes use the tricks above... -DaveM
+ * %g6 scratch 1
+ * %g2 scratch 2
+ * %g3 scratch 3
+ * %g4 scratch 4
*/
.align 32
.globl xcall_flush_tlb_mm
-xcall_flush_tlb_mm:
+xcall_flush_tlb_mm: /* 21 insns */
mov PRIMARY_CONTEXT, %g2
ldxa [%g2] ASI_DMMU, %g3
srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4
@@ -316,9 +394,19 @@ xcall_flush_tlb_mm:
stxa %g0, [%g4] ASI_IMMU_DEMAP
stxa %g3, [%g2] ASI_DMMU
retry
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
.globl xcall_flush_tlb_pending
-xcall_flush_tlb_pending:
+xcall_flush_tlb_pending: /* 21 insns */
/* %g5=context, %g1=nr, %g7=vaddrs[] */
sllx %g1, 3, %g1
mov PRIMARY_CONTEXT, %g4
@@ -341,9 +429,10 @@ xcall_flush_tlb_pending:
nop
stxa %g2, [%g4] ASI_DMMU
retry
+ nop
.globl xcall_flush_tlb_kernel_range
-xcall_flush_tlb_kernel_range:
+xcall_flush_tlb_kernel_range: /* 25 insns */
sethi %hi(PAGE_SIZE - 1), %g2
or %g2, %lo(PAGE_SIZE - 1), %g2
andn %g1, %g2, %g1
@@ -360,14 +449,30 @@ xcall_flush_tlb_kernel_range:
retry
nop
nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
/* This runs in a very controlled environment, so we do
* not need to worry about BH races etc.
*/
.globl xcall_sync_tick
xcall_sync_tick:
- rdpr %pstate, %g2
+
+661: rdpr %pstate, %g2
wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ nop
+ nop
+ .previous
+
rdpr %pil, %g2
wrpr %g0, 15, %pil
sethi %hi(109f), %g7
@@ -390,8 +495,15 @@ xcall_sync_tick:
*/
.globl xcall_report_regs
xcall_report_regs:
- rdpr %pstate, %g2
+
+661: rdpr %pstate, %g2
wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ nop
+ nop
+ .previous
+
rdpr %pil, %g2
wrpr %g0, 15, %pil
sethi %hi(109f), %g7
@@ -453,62 +565,96 @@ xcall_flush_dcache_page_spitfire: /* %g1 == physical page address
nop
nop
- .data
-
-errata32_hwbug:
- .xword 0
-
- .text
-
- /* These two are not performance critical... */
- .globl xcall_flush_tlb_all_spitfire
-xcall_flush_tlb_all_spitfire:
- /* Spitfire Errata #32 workaround. */
- sethi %hi(errata32_hwbug), %g4
- stx %g0, [%g4 + %lo(errata32_hwbug)]
-
- clr %g2
- clr %g3
-1: ldxa [%g3] ASI_DTLB_DATA_ACCESS, %g4
- and %g4, _PAGE_L, %g5
- brnz,pn %g5, 2f
- mov TLB_TAG_ACCESS, %g7
-
- stxa %g0, [%g7] ASI_DMMU
- membar #Sync
- stxa %g0, [%g3] ASI_DTLB_DATA_ACCESS
+ /* %g5: error
+ * %g6: tlb op
+ */
+__hypervisor_tlb_xcall_error:
+ mov %g5, %g4
+ mov %g6, %g5
+ ba,pt %xcc, etrap
+ rd %pc, %g7
+ mov %l4, %o0
+ call hypervisor_tlbop_error_xcall
+ mov %l5, %o1
+ ba,a,pt %xcc, rtrap_clr_l6
+
+ .globl __hypervisor_xcall_flush_tlb_mm
+__hypervisor_xcall_flush_tlb_mm: /* 21 insns */
+ /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
+ mov %o0, %g2
+ mov %o1, %g3
+ mov %o2, %g4
+ mov %o3, %g1
+ mov %o5, %g7
+ clr %o0 /* ARG0: CPU lists unimplemented */
+ clr %o1 /* ARG1: CPU lists unimplemented */
+ mov %g5, %o2 /* ARG2: mmu context */
+ mov HV_MMU_ALL, %o3 /* ARG3: flags */
+ mov HV_FAST_MMU_DEMAP_CTX, %o5
+ ta HV_FAST_TRAP
+ mov HV_FAST_MMU_DEMAP_CTX, %g6
+ brnz,pn %o0, __hypervisor_tlb_xcall_error
+ mov %o0, %g5
+ mov %g2, %o0
+ mov %g3, %o1
+ mov %g4, %o2
+ mov %g1, %o3
+ mov %g7, %o5
membar #Sync
+ retry
- /* Spitfire Errata #32 workaround. */
- sethi %hi(errata32_hwbug), %g4
- stx %g0, [%g4 + %lo(errata32_hwbug)]
-
-2: ldxa [%g3] ASI_ITLB_DATA_ACCESS, %g4
- and %g4, _PAGE_L, %g5
- brnz,pn %g5, 2f
- mov TLB_TAG_ACCESS, %g7
-
- stxa %g0, [%g7] ASI_IMMU
- membar #Sync
- stxa %g0, [%g3] ASI_ITLB_DATA_ACCESS
+ .globl __hypervisor_xcall_flush_tlb_pending
+__hypervisor_xcall_flush_tlb_pending: /* 21 insns */
+ /* %g5=ctx, %g1=nr, %g7=vaddrs[], %g2,%g3,%g4,g6=scratch */
+ sllx %g1, 3, %g1
+ mov %o0, %g2
+ mov %o1, %g3
+ mov %o2, %g4
+1: sub %g1, (1 << 3), %g1
+ ldx [%g7 + %g1], %o0 /* ARG0: virtual address */
+ mov %g5, %o1 /* ARG1: mmu context */
+ mov HV_MMU_ALL, %o2 /* ARG2: flags */
+ srlx %o0, PAGE_SHIFT, %o0
+ sllx %o0, PAGE_SHIFT, %o0
+ ta HV_MMU_UNMAP_ADDR_TRAP
+ mov HV_MMU_UNMAP_ADDR_TRAP, %g6
+ brnz,a,pn %o0, __hypervisor_tlb_xcall_error
+ mov %o0, %g5
+ brnz,pt %g1, 1b
+ nop
+ mov %g2, %o0
+ mov %g3, %o1
+ mov %g4, %o2
membar #Sync
-
- /* Spitfire Errata #32 workaround. */
- sethi %hi(errata32_hwbug), %g4
- stx %g0, [%g4 + %lo(errata32_hwbug)]
-
-2: add %g2, 1, %g2
- cmp %g2, SPITFIRE_HIGHEST_LOCKED_TLBENT
- ble,pt %icc, 1b
- sll %g2, 3, %g3
- flush %g6
retry
- .globl xcall_flush_tlb_all_cheetah
-xcall_flush_tlb_all_cheetah:
- mov 0x80, %g2
- stxa %g0, [%g2] ASI_DMMU_DEMAP
- stxa %g0, [%g2] ASI_IMMU_DEMAP
+ .globl __hypervisor_xcall_flush_tlb_kernel_range
+__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */
+ /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
+ sethi %hi(PAGE_SIZE - 1), %g2
+ or %g2, %lo(PAGE_SIZE - 1), %g2
+ andn %g1, %g2, %g1
+ andn %g7, %g2, %g7
+ sub %g7, %g1, %g3
+ add %g2, 1, %g2
+ sub %g3, %g2, %g3
+ mov %o0, %g2
+ mov %o1, %g4
+ mov %o2, %g7
+1: add %g1, %g3, %o0 /* ARG0: virtual address */
+ mov 0, %o1 /* ARG1: mmu context */
+ mov HV_MMU_ALL, %o2 /* ARG2: flags */
+ ta HV_MMU_UNMAP_ADDR_TRAP
+ mov HV_MMU_UNMAP_ADDR_TRAP, %g6
+ brnz,pn %o0, __hypervisor_tlb_xcall_error
+ mov %o0, %g5
+ sethi %hi(PAGE_SIZE), %o2
+ brnz,pt %g3, 1b
+ sub %g3, %o2, %g3
+ mov %g2, %o0
+ mov %g4, %o1
+ mov %g7, %o2
+ membar #Sync
retry
/* These just get rescheduled to PIL vectors. */
@@ -527,4 +673,70 @@ xcall_capture:
wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint
retry
+ .globl xcall_new_mmu_context_version
+xcall_new_mmu_context_version:
+ wr %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint
+ retry
+
#endif /* CONFIG_SMP */
+
+
+ .globl hypervisor_patch_cachetlbops
+hypervisor_patch_cachetlbops:
+ save %sp, -128, %sp
+
+ sethi %hi(__flush_tlb_mm), %o0
+ or %o0, %lo(__flush_tlb_mm), %o0
+ sethi %hi(__hypervisor_flush_tlb_mm), %o1
+ or %o1, %lo(__hypervisor_flush_tlb_mm), %o1
+ call tlb_patch_one
+ mov 10, %o2
+
+ sethi %hi(__flush_tlb_pending), %o0
+ or %o0, %lo(__flush_tlb_pending), %o0
+ sethi %hi(__hypervisor_flush_tlb_pending), %o1
+ or %o1, %lo(__hypervisor_flush_tlb_pending), %o1
+ call tlb_patch_one
+ mov 16, %o2
+
+ sethi %hi(__flush_tlb_kernel_range), %o0
+ or %o0, %lo(__flush_tlb_kernel_range), %o0
+ sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1
+ or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
+ call tlb_patch_one
+ mov 16, %o2
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+ sethi %hi(__flush_dcache_page), %o0
+ or %o0, %lo(__flush_dcache_page), %o0
+ sethi %hi(__hypervisor_flush_dcache_page), %o1
+ or %o1, %lo(__hypervisor_flush_dcache_page), %o1
+ call tlb_patch_one
+ mov 2, %o2
+#endif /* DCACHE_ALIASING_POSSIBLE */
+
+#ifdef CONFIG_SMP
+ sethi %hi(xcall_flush_tlb_mm), %o0
+ or %o0, %lo(xcall_flush_tlb_mm), %o0
+ sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1
+ or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
+ call tlb_patch_one
+ mov 21, %o2
+
+ sethi %hi(xcall_flush_tlb_pending), %o0
+ or %o0, %lo(xcall_flush_tlb_pending), %o0
+ sethi %hi(__hypervisor_xcall_flush_tlb_pending), %o1
+ or %o1, %lo(__hypervisor_xcall_flush_tlb_pending), %o1
+ call tlb_patch_one
+ mov 21, %o2
+
+ sethi %hi(xcall_flush_tlb_kernel_range), %o0
+ or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
+ sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
+ or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
+ call tlb_patch_one
+ mov 25, %o2
+#endif /* CONFIG_SMP */
+
+ ret
+ restore
diff --git a/arch/sparc64/prom/cif.S b/arch/sparc64/prom/cif.S
index 29d0ae74aed8..5f27ad779c0c 100644
--- a/arch/sparc64/prom/cif.S
+++ b/arch/sparc64/prom/cif.S
@@ -1,10 +1,12 @@
/* cif.S: PROM entry/exit assembler trampolines.
*
- * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
+ * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 2005, 2006 David S. Miller <davem@davemloft.net>
*/
#include <asm/pstate.h>
+#include <asm/cpudata.h>
+#include <asm/thread_info.h>
.text
.globl prom_cif_interface
@@ -12,78 +14,16 @@ prom_cif_interface:
sethi %hi(p1275buf), %o0
or %o0, %lo(p1275buf), %o0
ldx [%o0 + 0x010], %o1 ! prom_cif_stack
- save %o1, -0x190, %sp
+ save %o1, -192, %sp
ldx [%i0 + 0x008], %l2 ! prom_cif_handler
- rdpr %pstate, %l4
- wrpr %g0, 0x15, %pstate ! save alternate globals
- stx %g1, [%sp + 2047 + 0x0b0]
- stx %g2, [%sp + 2047 + 0x0b8]
- stx %g3, [%sp + 2047 + 0x0c0]
- stx %g4, [%sp + 2047 + 0x0c8]
- stx %g5, [%sp + 2047 + 0x0d0]
- stx %g6, [%sp + 2047 + 0x0d8]
- stx %g7, [%sp + 2047 + 0x0e0]
- wrpr %g0, 0x814, %pstate ! save interrupt globals
- stx %g1, [%sp + 2047 + 0x0e8]
- stx %g2, [%sp + 2047 + 0x0f0]
- stx %g3, [%sp + 2047 + 0x0f8]
- stx %g4, [%sp + 2047 + 0x100]
- stx %g5, [%sp + 2047 + 0x108]
- stx %g6, [%sp + 2047 + 0x110]
- stx %g7, [%sp + 2047 + 0x118]
- wrpr %g0, 0x14, %pstate ! save normal globals
- stx %g1, [%sp + 2047 + 0x120]
- stx %g2, [%sp + 2047 + 0x128]
- stx %g3, [%sp + 2047 + 0x130]
- stx %g4, [%sp + 2047 + 0x138]
- stx %g5, [%sp + 2047 + 0x140]
- stx %g6, [%sp + 2047 + 0x148]
- stx %g7, [%sp + 2047 + 0x150]
- wrpr %g0, 0x414, %pstate ! save mmu globals
- stx %g1, [%sp + 2047 + 0x158]
- stx %g2, [%sp + 2047 + 0x160]
- stx %g3, [%sp + 2047 + 0x168]
- stx %g4, [%sp + 2047 + 0x170]
- stx %g5, [%sp + 2047 + 0x178]
- stx %g6, [%sp + 2047 + 0x180]
- stx %g7, [%sp + 2047 + 0x188]
- mov %g1, %l0 ! also save to locals, so we can handle
- mov %g2, %l1 ! tlb faults later on, when accessing
- mov %g3, %l3 ! the stack.
- mov %g7, %l5
- wrpr %l4, PSTATE_IE, %pstate ! turn off interrupts
+ mov %g4, %l0
+ mov %g5, %l1
+ mov %g6, %l3
call %l2
add %i0, 0x018, %o0 ! prom_args
- wrpr %g0, 0x414, %pstate ! restore mmu globals
- mov %l0, %g1
- mov %l1, %g2
- mov %l3, %g3
- mov %l5, %g7
- wrpr %g0, 0x14, %pstate ! restore normal globals
- ldx [%sp + 2047 + 0x120], %g1
- ldx [%sp + 2047 + 0x128], %g2
- ldx [%sp + 2047 + 0x130], %g3
- ldx [%sp + 2047 + 0x138], %g4
- ldx [%sp + 2047 + 0x140], %g5
- ldx [%sp + 2047 + 0x148], %g6
- ldx [%sp + 2047 + 0x150], %g7
- wrpr %g0, 0x814, %pstate ! restore interrupt globals
- ldx [%sp + 2047 + 0x0e8], %g1
- ldx [%sp + 2047 + 0x0f0], %g2
- ldx [%sp + 2047 + 0x0f8], %g3
- ldx [%sp + 2047 + 0x100], %g4
- ldx [%sp + 2047 + 0x108], %g5
- ldx [%sp + 2047 + 0x110], %g6
- ldx [%sp + 2047 + 0x118], %g7
- wrpr %g0, 0x15, %pstate ! restore alternate globals
- ldx [%sp + 2047 + 0x0b0], %g1
- ldx [%sp + 2047 + 0x0b8], %g2
- ldx [%sp + 2047 + 0x0c0], %g3
- ldx [%sp + 2047 + 0x0c8], %g4
- ldx [%sp + 2047 + 0x0d0], %g5
- ldx [%sp + 2047 + 0x0d8], %g6
- ldx [%sp + 2047 + 0x0e0], %g7
- wrpr %l4, 0, %pstate ! restore original pstate
+ mov %l0, %g4
+ mov %l1, %g5
+ mov %l3, %g6
ret
restore
@@ -91,135 +31,18 @@ prom_cif_interface:
prom_cif_callback:
sethi %hi(p1275buf), %o1
or %o1, %lo(p1275buf), %o1
- save %sp, -0x270, %sp
- rdpr %pstate, %l4
- wrpr %g0, 0x15, %pstate ! save PROM alternate globals
- stx %g1, [%sp + 2047 + 0x0b0]
- stx %g2, [%sp + 2047 + 0x0b8]
- stx %g3, [%sp + 2047 + 0x0c0]
- stx %g4, [%sp + 2047 + 0x0c8]
- stx %g5, [%sp + 2047 + 0x0d0]
- stx %g6, [%sp + 2047 + 0x0d8]
- stx %g7, [%sp + 2047 + 0x0e0]
- ! restore Linux alternate globals
- ldx [%sp + 2047 + 0x190], %g1
- ldx [%sp + 2047 + 0x198], %g2
- ldx [%sp + 2047 + 0x1a0], %g3
- ldx [%sp + 2047 + 0x1a8], %g4
- ldx [%sp + 2047 + 0x1b0], %g5
- ldx [%sp + 2047 + 0x1b8], %g6
- ldx [%sp + 2047 + 0x1c0], %g7
- wrpr %g0, 0x814, %pstate ! save PROM interrupt globals
- stx %g1, [%sp + 2047 + 0x0e8]
- stx %g2, [%sp + 2047 + 0x0f0]
- stx %g3, [%sp + 2047 + 0x0f8]
- stx %g4, [%sp + 2047 + 0x100]
- stx %g5, [%sp + 2047 + 0x108]
- stx %g6, [%sp + 2047 + 0x110]
- stx %g7, [%sp + 2047 + 0x118]
- ! restore Linux interrupt globals
- ldx [%sp + 2047 + 0x1c8], %g1
- ldx [%sp + 2047 + 0x1d0], %g2
- ldx [%sp + 2047 + 0x1d8], %g3
- ldx [%sp + 2047 + 0x1e0], %g4
- ldx [%sp + 2047 + 0x1e8], %g5
- ldx [%sp + 2047 + 0x1f0], %g6
- ldx [%sp + 2047 + 0x1f8], %g7
- wrpr %g0, 0x14, %pstate ! save PROM normal globals
- stx %g1, [%sp + 2047 + 0x120]
- stx %g2, [%sp + 2047 + 0x128]
- stx %g3, [%sp + 2047 + 0x130]
- stx %g4, [%sp + 2047 + 0x138]
- stx %g5, [%sp + 2047 + 0x140]
- stx %g6, [%sp + 2047 + 0x148]
- stx %g7, [%sp + 2047 + 0x150]
- ! restore Linux normal globals
- ldx [%sp + 2047 + 0x200], %g1
- ldx [%sp + 2047 + 0x208], %g2
- ldx [%sp + 2047 + 0x210], %g3
- ldx [%sp + 2047 + 0x218], %g4
- ldx [%sp + 2047 + 0x220], %g5
- ldx [%sp + 2047 + 0x228], %g6
- ldx [%sp + 2047 + 0x230], %g7
- wrpr %g0, 0x414, %pstate ! save PROM mmu globals
- stx %g1, [%sp + 2047 + 0x158]
- stx %g2, [%sp + 2047 + 0x160]
- stx %g3, [%sp + 2047 + 0x168]
- stx %g4, [%sp + 2047 + 0x170]
- stx %g5, [%sp + 2047 + 0x178]
- stx %g6, [%sp + 2047 + 0x180]
- stx %g7, [%sp + 2047 + 0x188]
- ! restore Linux mmu globals
- ldx [%sp + 2047 + 0x238], %o0
- ldx [%sp + 2047 + 0x240], %o1
- ldx [%sp + 2047 + 0x248], %l2
- ldx [%sp + 2047 + 0x250], %l3
- ldx [%sp + 2047 + 0x258], %l5
- ldx [%sp + 2047 + 0x260], %l6
- ldx [%sp + 2047 + 0x268], %l7
- ! switch to Linux tba
- sethi %hi(sparc64_ttable_tl0), %l1
- rdpr %tba, %l0 ! save PROM tba
- mov %o0, %g1
- mov %o1, %g2
- mov %l2, %g3
- mov %l3, %g4
- mov %l5, %g5
- mov %l6, %g6
- mov %l7, %g7
- wrpr %l1, %tba ! install Linux tba
- wrpr %l4, 0, %pstate ! restore PSTATE
+ save %sp, -192, %sp
+ TRAP_LOAD_THREAD_REG(%g6, %g1)
+ LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %o0)
+ ldx [%g6 + TI_TASK], %g4
call prom_world
- mov %g0, %o0
+ mov 0, %o0
ldx [%i1 + 0x000], %l2
call %l2
mov %i0, %o0
mov %o0, %l1
call prom_world
- or %g0, 1, %o0
- wrpr %g0, 0x14, %pstate ! interrupts off
- ! restore PROM mmu globals
- ldx [%sp + 2047 + 0x158], %o0
- ldx [%sp + 2047 + 0x160], %o1
- ldx [%sp + 2047 + 0x168], %l2
- ldx [%sp + 2047 + 0x170], %l3
- ldx [%sp + 2047 + 0x178], %l5
- ldx [%sp + 2047 + 0x180], %l6
- ldx [%sp + 2047 + 0x188], %l7
- wrpr %g0, 0x414, %pstate ! restore PROM mmu globals
- mov %o0, %g1
- mov %o1, %g2
- mov %l2, %g3
- mov %l3, %g4
- mov %l5, %g5
- mov %l6, %g6
- mov %l7, %g7
- wrpr %l0, %tba ! restore PROM tba
- wrpr %g0, 0x14, %pstate ! restore PROM normal globals
- ldx [%sp + 2047 + 0x120], %g1
- ldx [%sp + 2047 + 0x128], %g2
- ldx [%sp + 2047 + 0x130], %g3
- ldx [%sp + 2047 + 0x138], %g4
- ldx [%sp + 2047 + 0x140], %g5
- ldx [%sp + 2047 + 0x148], %g6
- ldx [%sp + 2047 + 0x150], %g7
- wrpr %g0, 0x814, %pstate ! restore PROM interrupt globals
- ldx [%sp + 2047 + 0x0e8], %g1
- ldx [%sp + 2047 + 0x0f0], %g2
- ldx [%sp + 2047 + 0x0f8], %g3
- ldx [%sp + 2047 + 0x100], %g4
- ldx [%sp + 2047 + 0x108], %g5
- ldx [%sp + 2047 + 0x110], %g6
- ldx [%sp + 2047 + 0x118], %g7
- wrpr %g0, 0x15, %pstate ! restore PROM alternate globals
- ldx [%sp + 2047 + 0x0b0], %g1
- ldx [%sp + 2047 + 0x0b8], %g2
- ldx [%sp + 2047 + 0x0c0], %g3
- ldx [%sp + 2047 + 0x0c8], %g4
- ldx [%sp + 2047 + 0x0d0], %g5
- ldx [%sp + 2047 + 0x0d8], %g6
- ldx [%sp + 2047 + 0x0e0], %g7
- wrpr %l4, 0, %pstate
+ mov 1, %o0
ret
restore %l1, 0, %o0
diff --git a/arch/sparc64/prom/console.c b/arch/sparc64/prom/console.c
index ac6d035dd150..7c25c54cefdc 100644
--- a/arch/sparc64/prom/console.c
+++ b/arch/sparc64/prom/console.c
@@ -102,6 +102,9 @@ prom_query_input_device(void)
if (!strncmp (propb, "rsc", 3))
return PROMDEV_IRSC;
+ if (!strncmp (propb, "virtual-console", 3))
+ return PROMDEV_IVCONS;
+
if (strncmp (propb, "tty", 3) || !propb[3])
return PROMDEV_I_UNK;
@@ -143,6 +146,9 @@ prom_query_output_device(void)
if (!strncmp (propb, "rsc", 3))
return PROMDEV_ORSC;
+ if (!strncmp (propb, "virtual-console", 3))
+ return PROMDEV_OVCONS;
+
if (strncmp (propb, "tty", 3) || !propb[3])
return PROMDEV_O_UNK;
diff --git a/arch/sparc64/prom/init.c b/arch/sparc64/prom/init.c
index f3cc2d8578b2..1c0db842a6f4 100644
--- a/arch/sparc64/prom/init.c
+++ b/arch/sparc64/prom/init.c
@@ -14,11 +14,10 @@
#include <asm/openprom.h>
#include <asm/oplib.h>
-enum prom_major_version prom_vers;
-unsigned int prom_rev, prom_prev;
+/* OBP version string. */
+char prom_version[80];
/* The root node of the prom device tree. */
-int prom_root_node;
int prom_stdin, prom_stdout;
int prom_chosen_node;
@@ -31,68 +30,25 @@ extern void prom_cif_init(void *, void *);
void __init prom_init(void *cif_handler, void *cif_stack)
{
- char buffer[80], *p;
- int ints[3];
int node;
- int i = 0;
- int bufadjust;
-
- prom_vers = PROM_P1275;
prom_cif_init(cif_handler, cif_stack);
- prom_root_node = prom_getsibling(0);
- if((prom_root_node == 0) || (prom_root_node == -1))
- prom_halt();
-
prom_chosen_node = prom_finddevice(prom_chosen_path);
if (!prom_chosen_node || prom_chosen_node == -1)
prom_halt();
- prom_stdin = prom_getint (prom_chosen_node, "stdin");
- prom_stdout = prom_getint (prom_chosen_node, "stdout");
+ prom_stdin = prom_getint(prom_chosen_node, "stdin");
+ prom_stdout = prom_getint(prom_chosen_node, "stdout");
node = prom_finddevice("/openprom");
if (!node || node == -1)
prom_halt();
- prom_getstring (node, "version", buffer, sizeof (buffer));
-
- prom_printf ("\n");
-
- if (strncmp (buffer, "OBP ", 4))
- goto strange_version;
-
- /*
- * Version field is expected to be 'OBP xx.yy.zz date...'
- * However, Sun can't stick to this format very well, so
- * we need to check for 'OBP xx.yy.zz date...' and adjust
- * accordingly. -spot
- */
-
- if (strncmp (buffer, "OBP ", 5))
- bufadjust = 4;
- else
- bufadjust = 5;
-
- p = buffer + bufadjust;
- while (p && isdigit(*p) && i < 3) {
- ints[i++] = simple_strtoul(p, NULL, 0);
- if ((p = strchr(p, '.')) != NULL)
- p++;
- }
- if (i != 3)
- goto strange_version;
-
- prom_rev = ints[1];
- prom_prev = (ints[0] << 16) | (ints[1] << 8) | ints[2];
-
- printk ("PROMLIB: Sun IEEE Boot Prom %s\n", buffer + bufadjust);
+ prom_getstring(node, "version", prom_version, sizeof(prom_version));
- /* Initialization successful. */
- return;
+ prom_printf("\n");
-strange_version:
- prom_printf ("Strange OBP version `%s'.\n", buffer);
- prom_halt ();
+ printk("PROMLIB: Sun IEEE Boot Prom '%s'\n", prom_version);
+ printk("PROMLIB: Root node compatible: %s\n", prom_root_compatible);
}
diff --git a/arch/sparc64/prom/misc.c b/arch/sparc64/prom/misc.c
index 87f5cfce23bb..577bde8b6647 100644
--- a/arch/sparc64/prom/misc.c
+++ b/arch/sparc64/prom/misc.c
@@ -112,28 +112,20 @@ unsigned char prom_get_idprom(char *idbuf, int num_bytes)
return 0xff;
}
-/* Get the major prom version number. */
-int prom_version(void)
-{
- return PROM_P1275;
-}
-
-/* Get the prom plugin-revision. */
-int prom_getrev(void)
-{
- return prom_rev;
-}
-
-/* Get the prom firmware print revision. */
-int prom_getprev(void)
+/* Install Linux trap table so PROM uses that instead of its own. */
+void prom_set_trap_table(unsigned long tba)
{
- return prom_prev;
+ p1275_cmd("SUNW,set-trap-table",
+ (P1275_ARG(0, P1275_ARG_IN_64B) |
+ P1275_INOUT(1, 0)), tba);
}
-/* Install Linux trap table so PROM uses that instead of its own. */
-void prom_set_trap_table(unsigned long tba)
+void prom_set_trap_table_sun4v(unsigned long tba, unsigned long mmfsa)
{
- p1275_cmd("SUNW,set-trap-table", P1275_INOUT(1, 0), tba);
+ p1275_cmd("SUNW,set-trap-table",
+ (P1275_ARG(0, P1275_ARG_IN_64B) |
+ P1275_ARG(1, P1275_ARG_IN_64B) |
+ P1275_INOUT(2, 0)), tba, mmfsa);
}
int prom_get_mmu_ihandle(void)
@@ -303,9 +295,21 @@ int prom_wakeupsystem(void)
}
#ifdef CONFIG_SMP
-void prom_startcpu(int cpunode, unsigned long pc, unsigned long o0)
+void prom_startcpu(int cpunode, unsigned long pc, unsigned long arg)
+{
+ p1275_cmd("SUNW,start-cpu", P1275_INOUT(3, 0), cpunode, pc, arg);
+}
+
+void prom_startcpu_cpuid(int cpuid, unsigned long pc, unsigned long arg)
+{
+ p1275_cmd("SUNW,start-cpu-by-cpuid", P1275_INOUT(3, 0),
+ cpuid, pc, arg);
+}
+
+void prom_stopcpu_cpuid(int cpuid)
{
- p1275_cmd("SUNW,start-cpu", P1275_INOUT(3, 0), cpunode, pc, o0);
+ p1275_cmd("SUNW,stop-cpu-by-cpuid", P1275_INOUT(1, 0),
+ cpuid);
}
void prom_stopself(void)
diff --git a/arch/sparc64/prom/p1275.c b/arch/sparc64/prom/p1275.c
index a5a7c5712028..2b32c489860c 100644
--- a/arch/sparc64/prom/p1275.c
+++ b/arch/sparc64/prom/p1275.c
@@ -30,16 +30,6 @@ extern void prom_world(int);
extern void prom_cif_interface(void);
extern void prom_cif_callback(void);
-static inline unsigned long spitfire_get_primary_context(void)
-{
- unsigned long ctx;
-
- __asm__ __volatile__("ldxa [%1] %2, %0"
- : "=r" (ctx)
- : "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
- return ctx;
-}
-
/*
* This provides SMP safety on the p1275buf. prom_callback() drops this lock
* to allow recursuve acquisition.
@@ -55,7 +45,6 @@ long p1275_cmd(const char *service, long fmt, ...)
long attrs, x;
p = p1275buf.prom_buffer;
- BUG_ON((spitfire_get_primary_context() & CTX_NR_MASK) != 0);
spin_lock_irqsave(&prom_entry_lock, flags);
diff --git a/arch/sparc64/prom/tree.c b/arch/sparc64/prom/tree.c
index b1ff9e87dcc6..49075abd7cbc 100644
--- a/arch/sparc64/prom/tree.c
+++ b/arch/sparc64/prom/tree.c
@@ -51,7 +51,7 @@ prom_getparent(int node)
__inline__ int
__prom_getsibling(int node)
{
- return p1275_cmd ("peer", P1275_INOUT(1, 1), node);
+ return p1275_cmd(prom_peer_name, P1275_INOUT(1, 1), node);
}
__inline__ int
@@ -59,9 +59,12 @@ prom_getsibling(int node)
{
int sibnode;
- if(node == -1) return 0;
+ if (node == -1)
+ return 0;
sibnode = __prom_getsibling(node);
- if(sibnode == -1) return 0;
+ if (sibnode == -1)
+ return 0;
+
return sibnode;
}
diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c
index 3ab4677395f2..5284996780a7 100644
--- a/arch/sparc64/solaris/misc.c
+++ b/arch/sparc64/solaris/misc.c
@@ -90,7 +90,7 @@ static u32 do_solaris_mmap(u32 addr, u32 len, u32 prot, u32 flags, u32 fd, u64 o
len = PAGE_ALIGN(len);
if(!(flags & MAP_FIXED))
addr = 0;
- else if (len > 0xf0000000UL || addr > 0xf0000000UL - len)
+ else if (len > STACK_TOP32 || addr > STACK_TOP32 - len)
goto out_putf;
ret_type = flags & _MAP_NEW;
flags &= ~_MAP_NEW;
@@ -102,7 +102,7 @@ static u32 do_solaris_mmap(u32 addr, u32 len, u32 prot, u32 flags, u32 fd, u64 o
(unsigned long) prot, (unsigned long) flags, off);
up_write(&current->mm->mmap_sem);
if(!ret_type)
- retval = ((retval < 0xf0000000) ? 0 : retval);
+ retval = ((retval < STACK_TOP32) ? 0 : retval);
out_putf:
if (file)
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index bd49b25fba6b..6c6c5498899f 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -9097,6 +9097,10 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp)
tp->phy_id = PHY_ID_INVALID;
tp->led_ctrl = LED_CTRL_MODE_PHY_1;
+ /* Do not even try poking around in here on Sun parts. */
+ if (tp->tg3_flags2 & TG3_FLG2_SUN_570X)
+ return;
+
tg3_read_mem(tp, NIC_SRAM_DATA_SIG, &val);
if (val == NIC_SRAM_DATA_SIG_MAGIC) {
u32 nic_cfg, led_cfg;
diff --git a/drivers/sbus/char/bbc_i2c.c b/drivers/sbus/char/bbc_i2c.c
index 1c8b612d8234..3e156e005f2e 100644
--- a/drivers/sbus/char/bbc_i2c.c
+++ b/drivers/sbus/char/bbc_i2c.c
@@ -440,7 +440,8 @@ static int __init bbc_i2c_init(void)
struct linux_ebus_device *edev = NULL;
int err, index = 0;
- if (tlb_type != cheetah || !bbc_present())
+ if ((tlb_type != cheetah && tlb_type != cheetah_plus) ||
+ !bbc_present())
return -ENODEV;
for_each_ebus(ebus) {
@@ -486,3 +487,4 @@ static void bbc_i2c_cleanup(void)
module_init(bbc_i2c_init);
module_exit(bbc_i2c_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index b3c561abe3f6..89e5413cc2a3 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -582,6 +582,13 @@ config SERIAL_SUNSAB_CONSOLE
on your Sparc system as the console, you can do so by answering
Y to this option.
+config SERIAL_SUNHV
+ bool "Sun4v Hypervisor Console support"
+ depends on SPARC64
+ help
+ This driver supports the console device found on SUN4V Sparc
+ systems. Say Y if you want to be able to use this device.
+
config SERIAL_IP22_ZILOG
tristate "IP22 Zilog8530 serial support"
depends on SGI_IP22
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index eaf8e01db198..50c221af9e6d 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_SERIAL_PXA) += pxa.o
obj-$(CONFIG_SERIAL_SA1100) += sa1100.o
obj-$(CONFIG_SERIAL_S3C2410) += s3c2410.o
obj-$(CONFIG_SERIAL_SUNCORE) += suncore.o
+obj-$(CONFIG_SERIAL_SUNHV) += sunhv.o
obj-$(CONFIG_SERIAL_SUNZILOG) += sunzilog.o
obj-$(CONFIG_SERIAL_IP22_ZILOG) += ip22zilog.o
obj-$(CONFIG_SERIAL_SUNSU) += sunsu.o
diff --git a/drivers/serial/sunhv.c b/drivers/serial/sunhv.c
new file mode 100644
index 000000000000..f137804b3133
--- /dev/null
+++ b/drivers/serial/sunhv.c
@@ -0,0 +1,550 @@
+/* sunhv.c: Serial driver for SUN4V hypervisor console.
+ *
+ * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/major.h>
+#include <linux/circ_buf.h>
+#include <linux/serial.h>
+#include <linux/sysrq.h>
+#include <linux/console.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+
+#include <asm/hypervisor.h>
+#include <asm/spitfire.h>
+#include <asm/vdev.h>
+#include <asm/oplib.h>
+#include <asm/irq.h>
+
+#if defined(CONFIG_MAGIC_SYSRQ)
+#define SUPPORT_SYSRQ
+#endif
+
+#include <linux/serial_core.h>
+
+#include "suncore.h"
+
+#define CON_BREAK ((long)-1)
+#define CON_HUP ((long)-2)
+
+static inline long hypervisor_con_getchar(long *status)
+{
+ register unsigned long func asm("%o5");
+ register unsigned long arg0 asm("%o0");
+ register unsigned long arg1 asm("%o1");
+
+ func = HV_FAST_CONS_GETCHAR;
+ arg0 = 0;
+ arg1 = 0;
+ __asm__ __volatile__("ta %6"
+ : "=&r" (func), "=&r" (arg0), "=&r" (arg1)
+ : "0" (func), "1" (arg0), "2" (arg1),
+ "i" (HV_FAST_TRAP));
+
+ *status = arg0;
+
+ return (long) arg1;
+}
+
+static inline long hypervisor_con_putchar(long ch)
+{
+ register unsigned long func asm("%o5");
+ register unsigned long arg0 asm("%o0");
+
+ func = HV_FAST_CONS_PUTCHAR;
+ arg0 = ch;
+ __asm__ __volatile__("ta %4"
+ : "=&r" (func), "=&r" (arg0)
+ : "0" (func), "1" (arg0), "i" (HV_FAST_TRAP));
+
+ return (long) arg0;
+}
+
+#define IGNORE_BREAK 0x1
+#define IGNORE_ALL 0x2
+
+static int hung_up = 0;
+
+static struct tty_struct *receive_chars(struct uart_port *port, struct pt_regs *regs)
+{
+ struct tty_struct *tty = NULL;
+ int saw_console_brk = 0;
+ int limit = 10000;
+
+ if (port->info != NULL) /* Unopened serial console */
+ tty = port->info->tty;
+
+ while (limit-- > 0) {
+ long status;
+ long c = hypervisor_con_getchar(&status);
+ unsigned char flag;
+
+ if (status == HV_EWOULDBLOCK)
+ break;
+
+ if (c == CON_BREAK) {
+ if (uart_handle_break(port))
+ continue;
+ saw_console_brk = 1;
+ c = 0;
+ }
+
+ if (c == CON_HUP) {
+ hung_up = 1;
+ uart_handle_dcd_change(port, 0);
+ } else if (hung_up) {
+ hung_up = 0;
+ uart_handle_dcd_change(port, 1);
+ }
+
+ if (tty == NULL) {
+ uart_handle_sysrq_char(port, c, regs);
+ continue;
+ }
+
+ flag = TTY_NORMAL;
+ port->icount.rx++;
+ if (c == CON_BREAK) {
+ port->icount.brk++;
+ if (uart_handle_break(port))
+ continue;
+ flag = TTY_BREAK;
+ }
+
+ if (uart_handle_sysrq_char(port, c, regs))
+ continue;
+
+ if ((port->ignore_status_mask & IGNORE_ALL) ||
+ ((port->ignore_status_mask & IGNORE_BREAK) &&
+ (c == CON_BREAK)))
+ continue;
+
+ tty_insert_flip_char(tty, c, flag);
+ }
+
+ if (saw_console_brk)
+ sun_do_break();
+
+ return tty;
+}
+
+static void transmit_chars(struct uart_port *port)
+{
+ struct circ_buf *xmit;
+
+ if (!port->info)
+ return;
+
+ xmit = &port->info->xmit;
+ if (uart_circ_empty(xmit) || uart_tx_stopped(port))
+ return;
+
+ while (!uart_circ_empty(xmit)) {
+ long status = hypervisor_con_putchar(xmit->buf[xmit->tail]);
+
+ if (status != HV_EOK)
+ break;
+
+ xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+ port->icount.tx++;
+ }
+
+ if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+ uart_write_wakeup(port);
+}
+
+static irqreturn_t sunhv_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ struct uart_port *port = dev_id;
+ struct tty_struct *tty;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ tty = receive_chars(port, regs);
+ transmit_chars(port);
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ if (tty)
+ tty_flip_buffer_push(tty);
+
+ return IRQ_HANDLED;
+}
+
+/* port->lock is not held. */
+static unsigned int sunhv_tx_empty(struct uart_port *port)
+{
+ /* Transmitter is always empty for us. If the circ buffer
+ * is non-empty or there is an x_char pending, our caller
+ * will do the right thing and ignore what we return here.
+ */
+ return TIOCSER_TEMT;
+}
+
+/* port->lock held by caller. */
+static void sunhv_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+ return;
+}
+
+/* port->lock is held by caller and interrupts are disabled. */
+static unsigned int sunhv_get_mctrl(struct uart_port *port)
+{
+ return TIOCM_DSR | TIOCM_CAR | TIOCM_CTS;
+}
+
+/* port->lock held by caller. */
+static void sunhv_stop_tx(struct uart_port *port)
+{
+ return;
+}
+
+/* port->lock held by caller. */
+static void sunhv_start_tx(struct uart_port *port)
+{
+ struct circ_buf *xmit = &port->info->xmit;
+
+ while (!uart_circ_empty(xmit)) {
+ long status = hypervisor_con_putchar(xmit->buf[xmit->tail]);
+
+ if (status != HV_EOK)
+ break;
+
+ xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+ port->icount.tx++;
+ }
+}
+
+/* port->lock is not held. */
+static void sunhv_send_xchar(struct uart_port *port, char ch)
+{
+ unsigned long flags;
+ int limit = 10000;
+
+ spin_lock_irqsave(&port->lock, flags);
+
+ while (limit-- > 0) {
+ long status = hypervisor_con_putchar(ch);
+ if (status == HV_EOK)
+ break;
+ }
+
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/* port->lock held by caller. */
+static void sunhv_stop_rx(struct uart_port *port)
+{
+}
+
+/* port->lock held by caller. */
+static void sunhv_enable_ms(struct uart_port *port)
+{
+}
+
+/* port->lock is not held. */
+static void sunhv_break_ctl(struct uart_port *port, int break_state)
+{
+ if (break_state) {
+ unsigned long flags;
+ int limit = 1000000;
+
+ spin_lock_irqsave(&port->lock, flags);
+
+ while (limit-- > 0) {
+ long status = hypervisor_con_putchar(CON_BREAK);
+ if (status == HV_EOK)
+ break;
+ udelay(2);
+ }
+
+ spin_unlock_irqrestore(&port->lock, flags);
+ }
+}
+
+/* port->lock is not held. */
+static int sunhv_startup(struct uart_port *port)
+{
+ return 0;
+}
+
+/* port->lock is not held. */
+static void sunhv_shutdown(struct uart_port *port)
+{
+}
+
+/* port->lock is not held. */
+static void sunhv_set_termios(struct uart_port *port, struct termios *termios,
+ struct termios *old)
+{
+ unsigned int baud = uart_get_baud_rate(port, termios, old, 0, 4000000);
+ unsigned int quot = uart_get_divisor(port, baud);
+ unsigned int iflag, cflag;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+
+ iflag = termios->c_iflag;
+ cflag = termios->c_cflag;
+
+ port->ignore_status_mask = 0;
+ if (iflag & IGNBRK)
+ port->ignore_status_mask |= IGNORE_BREAK;
+ if ((cflag & CREAD) == 0)
+ port->ignore_status_mask |= IGNORE_ALL;
+
+ /* XXX */
+ uart_update_timeout(port, cflag,
+ (port->uartclk / (16 * quot)));
+
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static const char *sunhv_type(struct uart_port *port)
+{
+ return "SUN4V HCONS";
+}
+
+static void sunhv_release_port(struct uart_port *port)
+{
+}
+
+static int sunhv_request_port(struct uart_port *port)
+{
+ return 0;
+}
+
+static void sunhv_config_port(struct uart_port *port, int flags)
+{
+}
+
+static int sunhv_verify_port(struct uart_port *port, struct serial_struct *ser)
+{
+ return -EINVAL;
+}
+
+static struct uart_ops sunhv_pops = {
+ .tx_empty = sunhv_tx_empty,
+ .set_mctrl = sunhv_set_mctrl,
+ .get_mctrl = sunhv_get_mctrl,
+ .stop_tx = sunhv_stop_tx,
+ .start_tx = sunhv_start_tx,
+ .send_xchar = sunhv_send_xchar,
+ .stop_rx = sunhv_stop_rx,
+ .enable_ms = sunhv_enable_ms,
+ .break_ctl = sunhv_break_ctl,
+ .startup = sunhv_startup,
+ .shutdown = sunhv_shutdown,
+ .set_termios = sunhv_set_termios,
+ .type = sunhv_type,
+ .release_port = sunhv_release_port,
+ .request_port = sunhv_request_port,
+ .config_port = sunhv_config_port,
+ .verify_port = sunhv_verify_port,
+};
+
+static struct uart_driver sunhv_reg = {
+ .owner = THIS_MODULE,
+ .driver_name = "serial",
+ .devfs_name = "tts/",
+ .dev_name = "ttyS",
+ .major = TTY_MAJOR,
+};
+
+static struct uart_port *sunhv_port;
+
+static inline void sunhv_console_putchar(struct uart_port *port, char c)
+{
+ unsigned long flags;
+ int limit = 1000000;
+
+ spin_lock_irqsave(&port->lock, flags);
+
+ while (limit-- > 0) {
+ long status = hypervisor_con_putchar(c);
+ if (status == HV_EOK)
+ break;
+ udelay(2);
+ }
+
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void sunhv_console_write(struct console *con, const char *s, unsigned n)
+{
+ struct uart_port *port = sunhv_port;
+ int i;
+
+ for (i = 0; i < n; i++) {
+ if (*s == '\n')
+ sunhv_console_putchar(port, '\r');
+ sunhv_console_putchar(port, *s++);
+ }
+}
+
+static struct console sunhv_console = {
+ .name = "ttyHV",
+ .write = sunhv_console_write,
+ .device = uart_console_device,
+ .flags = CON_PRINTBUFFER,
+ .index = -1,
+ .data = &sunhv_reg,
+};
+
+static inline struct console *SUNHV_CONSOLE(void)
+{
+ if (con_is_present())
+ return NULL;
+
+ sunhv_console.index = 0;
+
+ return &sunhv_console;
+}
+
+static int __init hv_console_compatible(char *buf, int len)
+{
+ while (len) {
+ int this_len;
+
+ if (!strcmp(buf, "qcn"))
+ return 1;
+
+ this_len = strlen(buf) + 1;
+
+ buf += this_len;
+ len -= this_len;
+ }
+
+ return 0;
+}
+
+static unsigned int __init get_interrupt(void)
+{
+ const char *cons_str = "console";
+ const char *compat_str = "compatible";
+ int node = prom_getchild(sun4v_vdev_root);
+ char buf[64];
+ int err, len;
+
+ node = prom_searchsiblings(node, cons_str);
+ if (!node)
+ return 0;
+
+ len = prom_getproplen(node, compat_str);
+ if (len == 0 || len == -1)
+ return 0;
+
+ err = prom_getproperty(node, compat_str, buf, 64);
+ if (err == -1)
+ return 0;
+
+ if (!hv_console_compatible(buf, len))
+ return 0;
+
+ /* Ok, the this is the OBP node for the sun4v hypervisor
+ * console device. Decode the interrupt.
+ */
+ return sun4v_vdev_device_interrupt(node);
+}
+
+static int __init sunhv_init(void)
+{
+ struct uart_port *port;
+ int ret;
+
+ if (tlb_type != hypervisor)
+ return -ENODEV;
+
+ port = kmalloc(sizeof(struct uart_port), GFP_KERNEL);
+ if (unlikely(!port))
+ return -ENOMEM;
+
+ memset(port, 0, sizeof(struct uart_port));
+
+ port->line = 0;
+ port->ops = &sunhv_pops;
+ port->type = PORT_SUNHV;
+ port->uartclk = ( 29491200 / 16 ); /* arbitrary */
+
+ /* Set this just to make uart_configure_port() happy. */
+ port->membase = (unsigned char __iomem *) __pa(port);
+
+ port->irq = get_interrupt();
+ if (!port->irq) {
+ kfree(port);
+ return -ENODEV;
+ }
+
+ sunhv_reg.minor = sunserial_current_minor;
+ sunhv_reg.nr = 1;
+
+ ret = uart_register_driver(&sunhv_reg);
+ if (ret < 0) {
+ printk(KERN_ERR "SUNHV: uart_register_driver() failed %d\n",
+ ret);
+ kfree(port);
+
+ return ret;
+ }
+
+ sunhv_reg.tty_driver->name_base = sunhv_reg.minor - 64;
+ sunserial_current_minor += 1;
+
+ sunhv_reg.cons = SUNHV_CONSOLE();
+
+ sunhv_port = port;
+
+ ret = uart_add_one_port(&sunhv_reg, port);
+ if (ret < 0) {
+ printk(KERN_ERR "SUNHV: uart_add_one_port() failed %d\n", ret);
+ sunserial_current_minor -= 1;
+ uart_unregister_driver(&sunhv_reg);
+ kfree(port);
+ sunhv_port = NULL;
+ return -ENODEV;
+ }
+
+ if (request_irq(port->irq, sunhv_interrupt,
+ SA_SHIRQ, "serial(sunhv)", port)) {
+ printk(KERN_ERR "sunhv: Cannot register IRQ\n");
+ uart_remove_one_port(&sunhv_reg, port);
+ sunserial_current_minor -= 1;
+ uart_unregister_driver(&sunhv_reg);
+ kfree(port);
+ sunhv_port = NULL;
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static void __exit sunhv_exit(void)
+{
+ struct uart_port *port = sunhv_port;
+
+ BUG_ON(!port);
+
+ free_irq(port->irq, port);
+
+ uart_remove_one_port(&sunhv_reg, port);
+ sunserial_current_minor -= 1;
+
+ uart_unregister_driver(&sunhv_reg);
+
+ kfree(sunhv_port);
+ sunhv_port = NULL;
+}
+
+module_init(sunhv_init);
+module_exit(sunhv_exit);
+
+MODULE_AUTHOR("David S. Miller");
+MODULE_DESCRIPTION("SUN4V Hypervisor console driver")
+MODULE_LICENSE("GPL");
diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c
index 85664228a0b6..a2fb0c2fb121 100644
--- a/drivers/serial/sunsab.c
+++ b/drivers/serial/sunsab.c
@@ -955,14 +955,13 @@ static struct console sunsab_console = {
.index = -1,
.data = &sunsab_reg,
};
-#define SUNSAB_CONSOLE (&sunsab_console)
-static void __init sunsab_console_init(void)
+static inline struct console *SUNSAB_CONSOLE(void)
{
int i;
if (con_is_present())
- return;
+ return NULL;
for (i = 0; i < num_channels; i++) {
int this_minor = sunsab_reg.minor + i;
@@ -971,13 +970,14 @@ static void __init sunsab_console_init(void)
break;
}
if (i == num_channels)
- return;
+ return NULL;
sunsab_console.index = i;
- register_console(&sunsab_console);
+
+ return &sunsab_console;
}
#else
-#define SUNSAB_CONSOLE (NULL)
+#define SUNSAB_CONSOLE() (NULL)
#define sunsab_console_init() do { } while (0)
#endif
@@ -1124,7 +1124,6 @@ static int __init sunsab_init(void)
sunsab_reg.minor = sunserial_current_minor;
sunsab_reg.nr = num_channels;
- sunsab_reg.cons = SUNSAB_CONSOLE;
ret = uart_register_driver(&sunsab_reg);
if (ret < 0) {
@@ -1143,10 +1142,12 @@ static int __init sunsab_init(void)
return ret;
}
+ sunsab_reg.tty_driver->name_base = sunsab_reg.minor - 64;
+
+ sunsab_reg.cons = SUNSAB_CONSOLE();
+
sunserial_current_minor += num_channels;
- sunsab_console_init();
-
for (i = 0; i < num_channels; i++) {
struct uart_sunsab_port *up = &sunsab_ports[i];
diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c
index 4e453fa966ae..46c44b83f57c 100644
--- a/drivers/serial/sunsu.c
+++ b/drivers/serial/sunsu.c
@@ -1280,6 +1280,7 @@ static int __init sunsu_kbd_ms_init(struct uart_sunsu_port *up, int channel)
struct serio *serio;
#endif
+ spin_lock_init(&up->port.lock);
up->port.line = channel;
up->port.type = PORT_UNKNOWN;
up->port.uartclk = (SU_BASE_BAUD * 16);
@@ -1464,18 +1465,17 @@ static struct console sunsu_cons = {
.index = -1,
.data = &sunsu_reg,
};
-#define SUNSU_CONSOLE (&sunsu_cons)
/*
* Register console.
*/
-static int __init sunsu_serial_console_init(void)
+static inline struct console *SUNSU_CONSOLE(void)
{
int i;
if (con_is_present())
- return 0;
+ return NULL;
for (i = 0; i < UART_NR; i++) {
int this_minor = sunsu_reg.minor + i;
@@ -1484,16 +1484,16 @@ static int __init sunsu_serial_console_init(void)
break;
}
if (i == UART_NR)
- return 0;
+ return NULL;
if (sunsu_ports[i].port_node == 0)
- return 0;
+ return NULL;
sunsu_cons.index = i;
- register_console(&sunsu_cons);
- return 0;
+
+ return &sunsu_cons;
}
#else
-#define SUNSU_CONSOLE (NULL)
+#define SUNSU_CONSOLE() (NULL)
#define sunsu_serial_console_init() do { } while (0)
#endif
@@ -1510,6 +1510,7 @@ static int __init sunsu_serial_init(void)
up->su_type == SU_PORT_KBD)
continue;
+ spin_lock_init(&up->port.lock);
up->port.flags |= UPF_BOOT_AUTOCONF;
up->port.type = PORT_UNKNOWN;
up->port.uartclk = (SU_BASE_BAUD * 16);
@@ -1523,16 +1524,19 @@ static int __init sunsu_serial_init(void)
}
sunsu_reg.minor = sunserial_current_minor;
- sunserial_current_minor += instance;
sunsu_reg.nr = instance;
- sunsu_reg.cons = SUNSU_CONSOLE;
ret = uart_register_driver(&sunsu_reg);
if (ret < 0)
return ret;
- sunsu_serial_console_init();
+ sunsu_reg.tty_driver->name_base = sunsu_reg.minor - 64;
+
+ sunserial_current_minor += instance;
+
+ sunsu_reg.cons = SUNSU_CONSOLE();
+
for (i = 0; i < UART_NR; i++) {
struct uart_sunsu_port *up = &sunsu_ports[i];
diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c
index 5cc4d4c2935c..10b35c6f287d 100644
--- a/drivers/serial/sunzilog.c
+++ b/drivers/serial/sunzilog.c
@@ -1390,7 +1390,6 @@ static struct console sunzilog_console = {
.index = -1,
.data = &sunzilog_reg,
};
-#define SUNZILOG_CONSOLE (&sunzilog_console)
static int __init sunzilog_console_init(void)
{
@@ -1413,8 +1412,31 @@ static int __init sunzilog_console_init(void)
register_console(&sunzilog_console);
return 0;
}
+
+static inline struct console *SUNZILOG_CONSOLE(void)
+{
+ int i;
+
+ if (con_is_present())
+ return NULL;
+
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ int this_minor = sunzilog_reg.minor + i;
+
+ if ((this_minor - 64) == (serial_console - 1))
+ break;
+ }
+ if (i == NUM_CHANNELS)
+ return NULL;
+
+ sunzilog_console.index = i;
+ sunzilog_port_table[i].flags |= SUNZILOG_FLAG_IS_CONS;
+
+ return &sunzilog_console;
+}
+
#else
-#define SUNZILOG_CONSOLE (NULL)
+#define SUNZILOG_CONSOLE() (NULL)
#define sunzilog_console_init() do { } while (0)
#endif
@@ -1666,14 +1688,15 @@ static int __init sunzilog_ports_init(void)
}
sunzilog_reg.nr = uart_count;
- sunzilog_reg.cons = SUNZILOG_CONSOLE;
-
sunzilog_reg.minor = sunserial_current_minor;
- sunserial_current_minor += uart_count;
ret = uart_register_driver(&sunzilog_reg);
if (ret == 0) {
- sunzilog_console_init();
+ sunzilog_reg.tty_driver->name_base = sunzilog_reg.minor - 64;
+ sunzilog_reg.cons = SUNZILOG_CONSOLE();
+
+ sunserial_current_minor += uart_count;
+
for (i = 0; i < NUM_CHANNELS; i++) {
struct uart_sunzilog_port *up = &sunzilog_port_table[i];
diff --git a/include/asm-sparc/idprom.h b/include/asm-sparc/idprom.h
index d856e640acd3..59083ed85232 100644
--- a/include/asm-sparc/idprom.h
+++ b/include/asm-sparc/idprom.h
@@ -7,27 +7,19 @@
#ifndef _SPARC_IDPROM_H
#define _SPARC_IDPROM_H
-/* Offset into the EEPROM where the id PROM is located on the 4c */
-#define IDPROM_OFFSET 0x7d8
+#include <linux/types.h>
-/* On sun4m; physical. */
-/* MicroSPARC(-II) does not decode 31rd bit, but it works. */
-#define IDPROM_OFFSET_M 0xfd8
-
-struct idprom
-{
- unsigned char id_format; /* Format identifier (always 0x01) */
- unsigned char id_machtype; /* Machine type */
- unsigned char id_ethaddr[6]; /* Hardware ethernet address */
- long id_date; /* Date of manufacture */
- unsigned int id_sernum:24; /* Unique serial number */
- unsigned char id_cksum; /* Checksum - xor of the data bytes */
- unsigned char reserved[16];
+struct idprom {
+ u8 id_format; /* Format identifier (always 0x01) */
+ u8 id_machtype; /* Machine type */
+ u8 id_ethaddr[6]; /* Hardware ethernet address */
+ s32 id_date; /* Date of manufacture */
+ u32 id_sernum:24; /* Unique serial number */
+ u8 id_cksum; /* Checksum - xor of the data bytes */
+ u8 reserved[16];
};
extern struct idprom *idprom;
extern void idprom_init(void);
-#define IDPROM_SIZE (sizeof(struct idprom))
-
#endif /* !(_SPARC_IDPROM_H) */
diff --git a/include/asm-sparc/oplib.h b/include/asm-sparc/oplib.h
index d0d76b30eb4c..f283f8aaf6a9 100644
--- a/include/asm-sparc/oplib.h
+++ b/include/asm-sparc/oplib.h
@@ -165,6 +165,7 @@ enum prom_input_device {
PROMDEV_ITTYA, /* input from ttya */
PROMDEV_ITTYB, /* input from ttyb */
PROMDEV_IRSC, /* input from rsc */
+ PROMDEV_IVCONS, /* input from virtual-console */
PROMDEV_I_UNK,
};
@@ -177,6 +178,7 @@ enum prom_output_device {
PROMDEV_OTTYA, /* to ttya */
PROMDEV_OTTYB, /* to ttyb */
PROMDEV_ORSC, /* to rsc */
+ PROMDEV_OVCONS, /* to virtual-console */
PROMDEV_O_UNK,
};
diff --git a/include/asm-sparc/uaccess.h b/include/asm-sparc/uaccess.h
index f8f1ec1f06e6..3cf132e1aa25 100644
--- a/include/asm-sparc/uaccess.h
+++ b/include/asm-sparc/uaccess.h
@@ -120,17 +120,6 @@ case 8: __put_user_asm(x,d,addr,__pu_ret); break; \
default: __pu_ret = __put_user_bad(); break; \
} } else { __pu_ret = -EFAULT; } __pu_ret; })
-#define __put_user_check_ret(x,addr,size,retval) ({ \
-register int __foo __asm__ ("l1"); \
-if (__access_ok(addr,size)) { \
-switch (size) { \
-case 1: __put_user_asm_ret(x,b,addr,retval,__foo); break; \
-case 2: __put_user_asm_ret(x,h,addr,retval,__foo); break; \
-case 4: __put_user_asm_ret(x,,addr,retval,__foo); break; \
-case 8: __put_user_asm_ret(x,d,addr,retval,__foo); break; \
-default: if (__put_user_bad()) return retval; break; \
-} } else return retval; })
-
#define __put_user_nocheck(x,addr,size) ({ \
register int __pu_ret; \
switch (size) { \
@@ -141,16 +130,6 @@ case 8: __put_user_asm(x,d,addr,__pu_ret); break; \
default: __pu_ret = __put_user_bad(); break; \
} __pu_ret; })
-#define __put_user_nocheck_ret(x,addr,size,retval) ({ \
-register int __foo __asm__ ("l1"); \
-switch (size) { \
-case 1: __put_user_asm_ret(x,b,addr,retval,__foo); break; \
-case 2: __put_user_asm_ret(x,h,addr,retval,__foo); break; \
-case 4: __put_user_asm_ret(x,,addr,retval,__foo); break; \
-case 8: __put_user_asm_ret(x,d,addr,retval,__foo); break; \
-default: if (__put_user_bad()) return retval; break; \
-} })
-
#define __put_user_asm(x,size,addr,ret) \
__asm__ __volatile__( \
"/* Put user asm, inline. */\n" \
@@ -170,32 +149,6 @@ __asm__ __volatile__( \
: "=&r" (ret) : "r" (x), "m" (*__m(addr)), \
"i" (-EFAULT))
-#define __put_user_asm_ret(x,size,addr,ret,foo) \
-if (__builtin_constant_p(ret) && ret == -EFAULT) \
-__asm__ __volatile__( \
- "/* Put user asm ret, inline. */\n" \
-"1:\t" "st"#size " %1, %2\n\n\t" \
- ".section __ex_table,#alloc\n\t" \
- ".align 4\n\t" \
- ".word 1b, __ret_efault\n\n\t" \
- ".previous\n\n\t" \
- : "=r" (foo) : "r" (x), "m" (*__m(addr))); \
-else \
-__asm__ __volatile( \
- "/* Put user asm ret, inline. */\n" \
-"1:\t" "st"#size " %1, %2\n\n\t" \
- ".section .fixup,#alloc,#execinstr\n\t" \
- ".align 4\n" \
-"3:\n\t" \
- "ret\n\t" \
- " restore %%g0, %3, %%o0\n\t" \
- ".previous\n\n\t" \
- ".section __ex_table,#alloc\n\t" \
- ".align 4\n\t" \
- ".word 1b, 3b\n\n\t" \
- ".previous\n\n\t" \
- : "=r" (foo) : "r" (x), "m" (*__m(addr)), "i" (ret))
-
extern int __put_user_bad(void);
#define __get_user_check(x,addr,size,type) ({ \
diff --git a/include/asm-sparc64/a.out.h b/include/asm-sparc64/a.out.h
index 02af289e3f46..35cb5c9e0c92 100644
--- a/include/asm-sparc64/a.out.h
+++ b/include/asm-sparc64/a.out.h
@@ -95,7 +95,11 @@ struct relocation_info /* used when header.a_machtype == M_SPARC */
#ifdef __KERNEL__
-#define STACK_TOP (test_thread_flag(TIF_32BIT) ? 0xf0000000 : 0x80000000000L)
+#define STACK_TOP32 ((1UL << 32UL) - PAGE_SIZE)
+#define STACK_TOP64 (0x0000080000000000UL - (1UL << 32UL))
+
+#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \
+ STACK_TOP32 : STACK_TOP64)
#endif
diff --git a/include/asm-sparc64/asi.h b/include/asm-sparc64/asi.h
index 534855660f2a..662a21107ae6 100644
--- a/include/asm-sparc64/asi.h
+++ b/include/asm-sparc64/asi.h
@@ -25,14 +25,27 @@
/* SpitFire and later extended ASIs. The "(III)" marker designates
* UltraSparc-III and later specific ASIs. The "(CMT)" marker designates
- * Chip Multi Threading specific ASIs.
+ * Chip Multi Threading specific ASIs. "(NG)" designates Niagara specific
+ * ASIs, "(4V)" designates SUN4V specific ASIs.
*/
#define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cachable */
#define ASI_PHYS_BYPASS_EC_E 0x15 /* PADDR, E-bit */
+#define ASI_BLK_AIUP_4V 0x16 /* (4V) Prim, user, block ld/st */
+#define ASI_BLK_AIUS_4V 0x17 /* (4V) Sec, user, block ld/st */
#define ASI_PHYS_USE_EC_L 0x1c /* PADDR, E-cachable, little endian*/
#define ASI_PHYS_BYPASS_EC_E_L 0x1d /* PADDR, E-bit, little endian */
+#define ASI_BLK_AIUP_L_4V 0x1e /* (4V) Prim, user, block, l-endian*/
+#define ASI_BLK_AIUS_L_4V 0x1f /* (4V) Sec, user, block, l-endian */
+#define ASI_SCRATCHPAD 0x20 /* (4V) Scratch Pad Registers */
+#define ASI_MMU 0x21 /* (4V) MMU Context Registers */
+#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 /* (NG) init-store, twin load,
+ * secondary, user
+ */
#define ASI_NUCLEUS_QUAD_LDD 0x24 /* Cachable, qword load */
+#define ASI_QUEUE 0x25 /* (4V) Interrupt Queue Registers */
+#define ASI_QUAD_LDD_PHYS_4V 0x26 /* (4V) Physical, qword load */
#define ASI_NUCLEUS_QUAD_LDD_L 0x2c /* Cachable, qword load, l-endian */
+#define ASI_QUAD_LDD_PHYS_L_4V 0x2e /* (4V) Phys, qword load, l-endian */
#define ASI_PCACHE_DATA_STATUS 0x30 /* (III) PCache data stat RAM diag */
#define ASI_PCACHE_DATA 0x31 /* (III) PCache data RAM diag */
#define ASI_PCACHE_TAG 0x32 /* (III) PCache tag RAM diag */
@@ -137,6 +150,9 @@
#define ASI_FL16_SL 0xdb /* Secondary, 1 16-bit, fpu ld/st,L*/
#define ASI_BLK_COMMIT_P 0xe0 /* Primary, blk store commit */
#define ASI_BLK_COMMIT_S 0xe1 /* Secondary, blk store commit */
+#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 /* (NG) init-store, twin load,
+ * primary, implicit
+ */
#define ASI_BLK_P 0xf0 /* Primary, blk ld/st */
#define ASI_BLK_S 0xf1 /* Secondary, blk ld/st */
#define ASI_BLK_PL 0xf8 /* Primary, blk ld/st, little */
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
index 74de79dca915..c66a81bbc84d 100644
--- a/include/asm-sparc64/cpudata.h
+++ b/include/asm-sparc64/cpudata.h
@@ -1,41 +1,224 @@
/* cpudata.h: Per-cpu parameters.
*
- * Copyright (C) 2003, 2005 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2003, 2005, 2006 David S. Miller (davem@davemloft.net)
*/
#ifndef _SPARC64_CPUDATA_H
#define _SPARC64_CPUDATA_H
+#include <asm/hypervisor.h>
+#include <asm/asi.h>
+
+#ifndef __ASSEMBLY__
+
#include <linux/percpu.h>
+#include <linux/threads.h>
typedef struct {
/* Dcache line 1 */
unsigned int __softirq_pending; /* must be 1st, see rtrap.S */
unsigned int multiplier;
unsigned int counter;
- unsigned int idle_volume;
+ unsigned int __pad1;
unsigned long clock_tick; /* %tick's per second */
unsigned long udelay_val;
- /* Dcache line 2 */
- unsigned int pgcache_size;
- unsigned int __pad1;
- unsigned long *pte_cache[2];
- unsigned long *pgd_cache;
-
- /* Dcache line 3, rarely used */
+ /* Dcache line 2, rarely used */
unsigned int dcache_size;
unsigned int dcache_line_size;
unsigned int icache_size;
unsigned int icache_line_size;
unsigned int ecache_size;
unsigned int ecache_line_size;
- unsigned int __pad2;
unsigned int __pad3;
+ unsigned int __pad4;
} cpuinfo_sparc;
DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
#define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu))
#define local_cpu_data() __get_cpu_var(__cpu_data)
+/* Trap handling code needs to get at a few critical values upon
+ * trap entry and to process TSB misses. These cannot be in the
+ * per_cpu() area as we really need to lock them into the TLB and
+ * thus make them part of the main kernel image. As a result we
+ * try to make this as small as possible.
+ *
+ * This is padded out and aligned to 64-bytes to avoid false sharing
+ * on SMP.
+ */
+
+/* If you modify the size of this structure, please update
+ * TRAP_BLOCK_SZ_SHIFT below.
+ */
+struct thread_info;
+struct trap_per_cpu {
+/* D-cache line 1: Basic thread information, cpu and device mondo queues */
+ struct thread_info *thread;
+ unsigned long pgd_paddr;
+ unsigned long cpu_mondo_pa;
+ unsigned long dev_mondo_pa;
+
+/* D-cache line 2: Error Mondo Queue and kernel buffer pointers */
+ unsigned long resum_mondo_pa;
+ unsigned long resum_kernel_buf_pa;
+ unsigned long nonresum_mondo_pa;
+ unsigned long nonresum_kernel_buf_pa;
+
+/* Dcache lines 3, 4, 5, and 6: Hypervisor Fault Status */
+ struct hv_fault_status fault_info;
+
+/* Dcache line 7: Physical addresses of CPU send mondo block and CPU list. */
+ unsigned long cpu_mondo_block_pa;
+ unsigned long cpu_list_pa;
+ unsigned long __pad1[2];
+
+/* Dcache line 8: Unused, needed to keep trap_block a power-of-2 in size. */
+ unsigned long __pad2[4];
+} __attribute__((aligned(64)));
+extern struct trap_per_cpu trap_block[NR_CPUS];
+extern void init_cur_cpu_trap(struct thread_info *);
+extern void setup_tba(void);
+
+struct cpuid_patch_entry {
+ unsigned int addr;
+ unsigned int cheetah_safari[4];
+ unsigned int cheetah_jbus[4];
+ unsigned int starfire[4];
+ unsigned int sun4v[4];
+};
+extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end;
+
+struct sun4v_1insn_patch_entry {
+ unsigned int addr;
+ unsigned int insn;
+};
+extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch,
+ __sun4v_1insn_patch_end;
+
+struct sun4v_2insn_patch_entry {
+ unsigned int addr;
+ unsigned int insns[2];
+};
+extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
+ __sun4v_2insn_patch_end;
+
+#endif /* !(__ASSEMBLY__) */
+
+#define TRAP_PER_CPU_THREAD 0x00
+#define TRAP_PER_CPU_PGD_PADDR 0x08
+#define TRAP_PER_CPU_CPU_MONDO_PA 0x10
+#define TRAP_PER_CPU_DEV_MONDO_PA 0x18
+#define TRAP_PER_CPU_RESUM_MONDO_PA 0x20
+#define TRAP_PER_CPU_RESUM_KBUF_PA 0x28
+#define TRAP_PER_CPU_NONRESUM_MONDO_PA 0x30
+#define TRAP_PER_CPU_NONRESUM_KBUF_PA 0x38
+#define TRAP_PER_CPU_FAULT_INFO 0x40
+#define TRAP_PER_CPU_CPU_MONDO_BLOCK_PA 0xc0
+#define TRAP_PER_CPU_CPU_LIST_PA 0xc8
+
+#define TRAP_BLOCK_SZ_SHIFT 8
+
+#include <asm/scratchpad.h>
+
+#define __GET_CPUID(REG) \
+ /* Spitfire implementation (default). */ \
+661: ldxa [%g0] ASI_UPA_CONFIG, REG; \
+ srlx REG, 17, REG; \
+ and REG, 0x1f, REG; \
+ nop; \
+ .section .cpuid_patch, "ax"; \
+ /* Instruction location. */ \
+ .word 661b; \
+ /* Cheetah Safari implementation. */ \
+ ldxa [%g0] ASI_SAFARI_CONFIG, REG; \
+ srlx REG, 17, REG; \
+ and REG, 0x3ff, REG; \
+ nop; \
+ /* Cheetah JBUS implementation. */ \
+ ldxa [%g0] ASI_JBUS_CONFIG, REG; \
+ srlx REG, 17, REG; \
+ and REG, 0x1f, REG; \
+ nop; \
+ /* Starfire implementation. */ \
+ sethi %hi(0x1fff40000d0 >> 9), REG; \
+ sllx REG, 9, REG; \
+ or REG, 0xd0, REG; \
+ lduwa [REG] ASI_PHYS_BYPASS_EC_E, REG;\
+ /* sun4v implementation. */ \
+ mov SCRATCHPAD_CPUID, REG; \
+ ldxa [REG] ASI_SCRATCHPAD, REG; \
+ nop; \
+ nop; \
+ .previous;
+
+#ifdef CONFIG_SMP
+
+#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \
+ __GET_CPUID(TMP) \
+ sethi %hi(trap_block), DEST; \
+ sllx TMP, TRAP_BLOCK_SZ_SHIFT, TMP; \
+ or DEST, %lo(trap_block), DEST; \
+ add DEST, TMP, DEST; \
+
+/* Clobbers TMP, current address space PGD phys address into DEST. */
+#define TRAP_LOAD_PGD_PHYS(DEST, TMP) \
+ TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \
+ ldx [DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
+
+/* Clobbers TMP, loads local processor's IRQ work area into DEST. */
+#define TRAP_LOAD_IRQ_WORK(DEST, TMP) \
+ __GET_CPUID(TMP) \
+ sethi %hi(__irq_work), DEST; \
+ sllx TMP, 6, TMP; \
+ or DEST, %lo(__irq_work), DEST; \
+ add DEST, TMP, DEST;
+
+/* Clobbers TMP, loads DEST with current thread info pointer. */
+#define TRAP_LOAD_THREAD_REG(DEST, TMP) \
+ TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \
+ ldx [DEST + TRAP_PER_CPU_THREAD], DEST;
+
+/* Given the current thread info pointer in THR, load the per-cpu
+ * area base of the current processor into DEST. REG1, REG2, and REG3 are
+ * clobbered.
+ *
+ * You absolutely cannot use DEST as a temporary in this code. The
+ * reason is that traps can happen during execution, and return from
+ * trap will load the fully resolved DEST per-cpu base. This can corrupt
+ * the calculations done by the macro mid-stream.
+ */
+#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \
+ ldub [THR + TI_CPU], REG1; \
+ sethi %hi(__per_cpu_shift), REG3; \
+ sethi %hi(__per_cpu_base), REG2; \
+ ldx [REG3 + %lo(__per_cpu_shift)], REG3; \
+ ldx [REG2 + %lo(__per_cpu_base)], REG2; \
+ sllx REG1, REG3, REG3; \
+ add REG3, REG2, DEST;
+
+#else
+
+#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \
+ sethi %hi(trap_block), DEST; \
+ or DEST, %lo(trap_block), DEST; \
+
+/* Uniprocessor versions, we know the cpuid is zero. */
+#define TRAP_LOAD_PGD_PHYS(DEST, TMP) \
+ TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \
+ ldx [DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
+
+#define TRAP_LOAD_IRQ_WORK(DEST, TMP) \
+ sethi %hi(__irq_work), DEST; \
+ or DEST, %lo(__irq_work), DEST;
+
+#define TRAP_LOAD_THREAD_REG(DEST, TMP) \
+ TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \
+ ldx [DEST + TRAP_PER_CPU_THREAD], DEST;
+
+/* No per-cpu areas on uniprocessor, so no need to load DEST. */
+#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3)
+
+#endif /* !(CONFIG_SMP) */
+
#endif /* _SPARC64_CPUDATA_H */
diff --git a/include/asm-sparc64/elf.h b/include/asm-sparc64/elf.h
index 69539a8ab833..303d85e2f82e 100644
--- a/include/asm-sparc64/elf.h
+++ b/include/asm-sparc64/elf.h
@@ -10,6 +10,7 @@
#ifdef __KERNEL__
#include <asm/processor.h>
#include <asm/uaccess.h>
+#include <asm/spitfire.h>
#endif
/*
@@ -68,6 +69,7 @@
#define HWCAP_SPARC_MULDIV 8
#define HWCAP_SPARC_V9 16
#define HWCAP_SPARC_ULTRA3 32
+#define HWCAP_SPARC_BLKINIT 64
/*
* These are used to set parameters in the core dumps.
@@ -145,11 +147,21 @@ typedef struct {
instruction set this cpu supports. */
/* On Ultra, we support all of the v8 capabilities. */
-#define ELF_HWCAP ((HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | \
- HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV | \
- HWCAP_SPARC_V9) | \
- ((tlb_type == cheetah || tlb_type == cheetah_plus) ? \
- HWCAP_SPARC_ULTRA3 : 0))
+static inline unsigned int sparc64_elf_hwcap(void)
+{
+ unsigned int cap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR |
+ HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV |
+ HWCAP_SPARC_V9);
+
+ if (tlb_type == cheetah || tlb_type == cheetah_plus)
+ cap |= HWCAP_SPARC_ULTRA3;
+ else if (tlb_type == hypervisor)
+ cap |= HWCAP_SPARC_BLKINIT;
+
+ return cap;
+}
+
+#define ELF_HWCAP sparc64_elf_hwcap();
/* This yields a string that ld.so will use to load implementation
specific libraries for optimization. This is more specific in
diff --git a/include/asm-sparc64/head.h b/include/asm-sparc64/head.h
index 0abd3a674e8f..67960a751f4d 100644
--- a/include/asm-sparc64/head.h
+++ b/include/asm-sparc64/head.h
@@ -4,12 +4,21 @@
#include <asm/pstate.h>
+ /* wrpr %g0, val, %gl */
+#define SET_GL(val) \
+ .word 0xa1902000 | val
+
+ /* rdpr %gl, %gN */
+#define GET_GL_GLOBAL(N) \
+ .word 0x81540000 | (N << 25)
+
#define KERNBASE 0x400000
#define PTREGS_OFF (STACK_BIAS + STACKFRAME_SZ)
#define __CHEETAH_ID 0x003e0014
#define __JALAPENO_ID 0x003e0016
+#define __SERRANO_ID 0x003e0022
#define CHEETAH_MANUF 0x003e
#define CHEETAH_IMPL 0x0014 /* Ultra-III */
@@ -19,6 +28,12 @@
#define PANTHER_IMPL 0x0019 /* Ultra-IV+ */
#define SERRANO_IMPL 0x0022 /* Ultra-IIIi+ */
+#define BRANCH_IF_SUN4V(tmp1,label) \
+ sethi %hi(is_sun4v), %tmp1; \
+ lduw [%tmp1 + %lo(is_sun4v)], %tmp1; \
+ brnz,pn %tmp1, label; \
+ nop
+
#define BRANCH_IF_CHEETAH_BASE(tmp1,tmp2,label) \
rdpr %ver, %tmp1; \
sethi %hi(__CHEETAH_ID), %tmp2; \
diff --git a/include/asm-sparc64/hypervisor.h b/include/asm-sparc64/hypervisor.h
new file mode 100644
index 000000000000..612bf319753f
--- /dev/null
+++ b/include/asm-sparc64/hypervisor.h
@@ -0,0 +1,2128 @@
+#ifndef _SPARC64_HYPERVISOR_H
+#define _SPARC64_HYPERVISOR_H
+
+/* Sun4v hypervisor interfaces and defines.
+ *
+ * Hypervisor calls are made via traps to software traps number 0x80
+ * and above. Registers %o0 to %o5 serve as argument, status, and
+ * return value registers.
+ *
+ * There are two kinds of these traps. First there are the normal
+ * "fast traps" which use software trap 0x80 and encode the function
+ * to invoke by number in register %o5. Argument and return value
+ * handling is as follows:
+ *
+ * -----------------------------------------------
+ * | %o5 | function number | undefined |
+ * | %o0 | argument 0 | return status |
+ * | %o1 | argument 1 | return value 1 |
+ * | %o2 | argument 2 | return value 2 |
+ * | %o3 | argument 3 | return value 3 |
+ * | %o4 | argument 4 | return value 4 |
+ * -----------------------------------------------
+ *
+ * The second type are "hyper-fast traps" which encode the function
+ * number in the software trap number itself. So these use trap
+ * numbers > 0x80. The register usage for hyper-fast traps is as
+ * follows:
+ *
+ * -----------------------------------------------
+ * | %o0 | argument 0 | return status |
+ * | %o1 | argument 1 | return value 1 |
+ * | %o2 | argument 2 | return value 2 |
+ * | %o3 | argument 3 | return value 3 |
+ * | %o4 | argument 4 | return value 4 |
+ * -----------------------------------------------
+ *
+ * Registers providing explicit arguments to the hypervisor calls
+ * are volatile across the call. Upon return their values are
+ * undefined unless explicitly specified as containing a particular
+ * return value by the specific call. The return status is always
+ * returned in register %o0, zero indicates a successful execution of
+ * the hypervisor call and other values indicate an error status as
+ * defined below. So, for example, if a hyper-fast trap takes
+ * arguments 0, 1, and 2, then %o0, %o1, and %o2 are volatile across
+ * the call and %o3, %o4, and %o5 would be preserved.
+ *
+ * If the hypervisor trap is invalid, or the fast trap function number
+ * is invalid, HV_EBADTRAP will be returned in %o0. Also, all 64-bits
+ * of the argument and return values are significant.
+ */
+
+/* Trap numbers. */
+#define HV_FAST_TRAP 0x80
+#define HV_MMU_MAP_ADDR_TRAP 0x83
+#define HV_MMU_UNMAP_ADDR_TRAP 0x84
+#define HV_TTRACE_ADDENTRY_TRAP 0x85
+#define HV_CORE_TRAP 0xff
+
+/* Error codes. */
+#define HV_EOK 0 /* Successful return */
+#define HV_ENOCPU 1 /* Invalid CPU id */
+#define HV_ENORADDR 2 /* Invalid real address */
+#define HV_ENOINTR 3 /* Invalid interrupt id */
+#define HV_EBADPGSZ 4 /* Invalid pagesize encoding */
+#define HV_EBADTSB 5 /* Invalid TSB description */
+#define HV_EINVAL 6 /* Invalid argument */
+#define HV_EBADTRAP 7 /* Invalid function number */
+#define HV_EBADALIGN 8 /* Invalid address alignment */
+#define HV_EWOULDBLOCK 9 /* Cannot complete w/o blocking */
+#define HV_ENOACCESS 10 /* No access to resource */
+#define HV_EIO 11 /* I/O error */
+#define HV_ECPUERROR 12 /* CPU in error state */
+#define HV_ENOTSUPPORTED 13 /* Function not supported */
+#define HV_ENOMAP 14 /* No mapping found */
+#define HV_ETOOMANY 15 /* Too many items specified */
+
+/* mach_exit()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MACH_EXIT
+ * ARG0: exit code
+ * ERRORS: This service does not return.
+ *
+ * Stop all CPUs in the virtual domain and place them into the stopped
+ * state. The 64-bit exit code may be passed to a service entity as
+ * the domain's exit status. On systems without a service entity, the
+ * domain will undergo a reset, and the boot firmware will be
+ * reloaded.
+ *
+ * This function will never return to the guest that invokes it.
+ *
+ * Note: By convention an exit code of zero denotes a successful exit by
+ * the guest code. A non-zero exit code denotes a guest specific
+ * error indication.
+ *
+ */
+#define HV_FAST_MACH_EXIT 0x00
+
+/* Domain services. */
+
+/* mach_desc()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MACH_DESC
+ * ARG0: buffer
+ * ARG1: length
+ * RET0: status
+ * RET1: length
+ * ERRORS: HV_EBADALIGN Buffer is badly aligned
+ * HV_ENORADDR Buffer is to an illegal real address.
+ * HV_EINVAL Buffer length is too small for complete
+ * machine description.
+ *
+ * Copy the most current machine description into the buffer indicated
+ * by the real address in ARG0. The buffer provided must be 16 byte
+ * aligned. Upon success or HV_EINVAL, this service returns the
+ * actual size of the machine description in the RET1 return value.
+ *
+ * Note: A method of determining the appropriate buffer size for the
+ * machine description is to first call this service with a buffer
+ * length of 0 bytes.
+ */
+#define HV_FAST_MACH_DESC 0x01
+
+/* mach_exit()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MACH_SIR
+ * ERRORS: This service does not return.
+ *
+ * Perform a software initiated reset of the virtual machine domain.
+ * All CPUs are captured as soon as possible, all hardware devices are
+ * returned to the entry default state, and the domain is restarted at
+ * the SIR (trap type 0x04) real trap table (RTBA) entry point on one
+ * of the CPUs. The single CPU restarted is selected as determined by
+ * platform specific policy. Memory is preserved across this
+ * operation.
+ */
+#define HV_FAST_MACH_SIR 0x02
+
+/* mach_set_soft_state()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MACH_SET_SOFT_STATE
+ * ARG0: software state
+ * ARG1: software state description pointer
+ * RET0: status
+ * ERRORS: EINVAL software state not valid or software state
+ * description is not NULL terminated
+ * ENORADDR software state description pointer is not a
+ * valid real address
+ * EBADALIGNED software state description is not correctly
+ * aligned
+ *
+ * This allows the guest to report it's soft state to the hypervisor. There
+ * are two primary components to this state. The first part states whether
+ * the guest software is running or not. The second containts optional
+ * details specific to the software.
+ *
+ * The software state argument is defined below in HV_SOFT_STATE_*, and
+ * indicates whether the guest is operating normally or in a transitional
+ * state.
+ *
+ * The software state description argument is a real address of a data buffer
+ * of size 32-bytes aligned on a 32-byte boundary. It is treated as a NULL
+ * terminated 7-bit ASCII string of up to 31 characters not including the
+ * NULL termination.
+ */
+#define HV_FAST_MACH_SET_SOFT_STATE 0x03
+#define HV_SOFT_STATE_NORMAL 0x01
+#define HV_SOFT_STATE_TRANSITION 0x02
+
+/* mach_get_soft_state()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MACH_GET_SOFT_STATE
+ * ARG0: software state description pointer
+ * RET0: status
+ * RET1: software state
+ * ERRORS: ENORADDR software state description pointer is not a
+ * valid real address
+ * EBADALIGNED software state description is not correctly
+ * aligned
+ *
+ * Retrieve the current value of the guest's software state. The rules
+ * for the software state pointer are the same as for mach_set_soft_state()
+ * above.
+ */
+#define HV_FAST_MACH_GET_SOFT_STATE 0x04
+
+/* CPU services.
+ *
+ * CPUs represent devices that can execute software threads. A single
+ * chip that contains multiple cores or strands is represented as
+ * multiple CPUs with unique CPU identifiers. CPUs are exported to
+ * OBP via the machine description (and to the OS via the OBP device
+ * tree). CPUs are always in one of three states: stopped, running,
+ * or error.
+ *
+ * A CPU ID is a pre-assigned 16-bit value that uniquely identifies a
+ * CPU within a logical domain. Operations that are to be performed
+ * on multiple CPUs specify them via a CPU list. A CPU list is an
+ * array in real memory, of which each 16-bit word is a CPU ID. CPU
+ * lists are passed through the API as two arguments. The first is
+ * the number of entries (16-bit words) in the CPU list, and the
+ * second is the (real address) pointer to the CPU ID list.
+ */
+
+/* cpu_start()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_START
+ * ARG0: CPU ID
+ * ARG1: PC
+ * ARG1: RTBA
+ * ARG1: target ARG0
+ * RET0: status
+ * ERRORS: ENOCPU Invalid CPU ID
+ * EINVAL Target CPU ID is not in the stopped state
+ * ENORADDR Invalid PC or RTBA real address
+ * EBADALIGN Unaligned PC or unaligned RTBA
+ * EWOULDBLOCK Starting resources are not available
+ *
+ * Start CPU with given CPU ID with PC in %pc and with a real trap
+ * base address value of RTBA. The indicated CPU must be in the
+ * stopped state. The supplied RTBA must be aligned on a 256 byte
+ * boundary. On successful completion, the specified CPU will be in
+ * the running state and will be supplied with "target ARG0" in %o0
+ * and RTBA in %tba.
+ */
+#define HV_FAST_CPU_START 0x10
+
+/* cpu_stop()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_STOP
+ * ARG0: CPU ID
+ * RET0: status
+ * ERRORS: ENOCPU Invalid CPU ID
+ * EINVAL Target CPU ID is the current cpu
+ * EINVAL Target CPU ID is not in the running state
+ * EWOULDBLOCK Stopping resources are not available
+ * ENOTSUPPORTED Not supported on this platform
+ *
+ * The specified CPU is stopped. The indicated CPU must be in the
+ * running state. On completion, it will be in the stopped state. It
+ * is not legal to stop the current CPU.
+ *
+ * Note: As this service cannot be used to stop the current cpu, this service
+ * may not be used to stop the last running CPU in a domain. To stop
+ * and exit a running domain, a guest must use the mach_exit() service.
+ */
+#define HV_FAST_CPU_STOP 0x11
+
+/* cpu_yield()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_YIELD
+ * RET0: status
+ * ERRORS: No possible error.
+ *
+ * Suspend execution on the current CPU. Execution will resume when
+ * an interrupt (device, %stick_compare, or cross-call) is targeted to
+ * the CPU. On some CPUs, this API may be used by the hypervisor to
+ * save power by disabling hardware strands.
+ */
+#define HV_FAST_CPU_YIELD 0x12
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_cpu_yield(void);
+#endif
+
+/* cpu_qconf()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_QCONF
+ * ARG0: queue
+ * ARG1: base real address
+ * ARG2: number of entries
+ * RET0: status
+ * ERRORS: ENORADDR Invalid base real address
+ * EINVAL Invalid queue or number of entries is less
+ * than 2 or too large.
+ * EBADALIGN Base real address is not correctly aligned
+ * for size.
+ *
+ * Configure the given queue to be placed at the given base real
+ * address, with the given number of entries. The number of entries
+ * must be a power of 2. The base real address must be aligned
+ * exactly to match the queue size. Each queue entry is 64 bytes
+ * long, so for example a 32 entry queue must be aligned on a 2048
+ * byte real address boundary.
+ *
+ * The specified queue is unconfigured if the number of entries is given
+ * as zero.
+ *
+ * For the current version of this API service, the argument queue is defined
+ * as follows:
+ *
+ * queue description
+ * ----- -------------------------
+ * 0x3c cpu mondo queue
+ * 0x3d device mondo queue
+ * 0x3e resumable error queue
+ * 0x3f non-resumable error queue
+ *
+ * Note: The maximum number of entries for each queue for a specific cpu may
+ * be determined from the machine description.
+ */
+#define HV_FAST_CPU_QCONF 0x14
+#define HV_CPU_QUEUE_CPU_MONDO 0x3c
+#define HV_CPU_QUEUE_DEVICE_MONDO 0x3d
+#define HV_CPU_QUEUE_RES_ERROR 0x3e
+#define HV_CPU_QUEUE_NONRES_ERROR 0x3f
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_cpu_qconf(unsigned long type,
+ unsigned long queue_paddr,
+ unsigned long num_queue_entries);
+#endif
+
+/* cpu_qinfo()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_QINFO
+ * ARG0: queue
+ * RET0: status
+ * RET1: base real address
+ * RET1: number of entries
+ * ERRORS: EINVAL Invalid queue
+ *
+ * Return the configuration info for the given queue. The base real
+ * address and number of entries of the defined queue are returned.
+ * The queue argument values are the same as for cpu_qconf() above.
+ *
+ * If the specified queue is a valid queue number, but no queue has
+ * been defined, the number of entries will be set to zero and the
+ * base real address returned is undefined.
+ */
+#define HV_FAST_CPU_QINFO 0x15
+
+/* cpu_mondo_send()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_MONDO_SEND
+ * ARG0-1: CPU list
+ * ARG2: data real address
+ * RET0: status
+ * ERRORS: EBADALIGN Mondo data is not 64-byte aligned or CPU list
+ * is not 2-byte aligned.
+ * ENORADDR Invalid data mondo address, or invalid cpu list
+ * address.
+ * ENOCPU Invalid cpu in CPU list
+ * EWOULDBLOCK Some or all of the listed CPUs did not receive
+ * the mondo
+ * ECPUERROR One or more of the listed CPUs are in error
+ * state, use HV_FAST_CPU_STATE to see which ones
+ * EINVAL CPU list includes caller's CPU ID
+ *
+ * Send a mondo interrupt to the CPUs in the given CPU list with the
+ * 64-bytes at the given data real address. The data must be 64-byte
+ * aligned. The mondo data will be delivered to the cpu_mondo queues
+ * of the recipient CPUs.
+ *
+ * In all cases, error or not, the CPUs in the CPU list to which the
+ * mondo has been successfully delivered will be indicated by having
+ * their entry in CPU list updated with the value 0xffff.
+ */
+#define HV_FAST_CPU_MONDO_SEND 0x42
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_cpu_mondo_send(unsigned long cpu_count, unsigned long cpu_list_pa, unsigned long mondo_block_pa);
+#endif
+
+/* cpu_myid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_MYID
+ * RET0: status
+ * RET1: CPU ID
+ * ERRORS: No errors defined.
+ *
+ * Return the hypervisor ID handle for the current CPU. Use by a
+ * virtual CPU to discover it's own identity.
+ */
+#define HV_FAST_CPU_MYID 0x16
+
+/* cpu_state()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_STATE
+ * ARG0: CPU ID
+ * RET0: status
+ * RET1: state
+ * ERRORS: ENOCPU Invalid CPU ID
+ *
+ * Retrieve the current state of the CPU with the given CPU ID.
+ */
+#define HV_FAST_CPU_STATE 0x17
+#define HV_CPU_STATE_STOPPED 0x01
+#define HV_CPU_STATE_RUNNING 0x02
+#define HV_CPU_STATE_ERROR 0x03
+
+#ifndef __ASSEMBLY__
+extern long sun4v_cpu_state(unsigned long cpuid);
+#endif
+
+/* cpu_set_rtba()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_SET_RTBA
+ * ARG0: RTBA
+ * RET0: status
+ * RET1: previous RTBA
+ * ERRORS: ENORADDR Invalid RTBA real address
+ * EBADALIGN RTBA is incorrectly aligned for a trap table
+ *
+ * Set the real trap base address of the local cpu to the given RTBA.
+ * The supplied RTBA must be aligned on a 256 byte boundary. Upon
+ * success the previous value of the RTBA is returned in RET1.
+ *
+ * Note: This service does not affect %tba
+ */
+#define HV_FAST_CPU_SET_RTBA 0x18
+
+/* cpu_set_rtba()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CPU_GET_RTBA
+ * RET0: status
+ * RET1: previous RTBA
+ * ERRORS: No possible error.
+ *
+ * Returns the current value of RTBA in RET1.
+ */
+#define HV_FAST_CPU_GET_RTBA 0x19
+
+/* MMU services.
+ *
+ * Layout of a TSB description for mmu_tsb_ctx{,non}0() calls.
+ */
+#ifndef __ASSEMBLY__
+struct hv_tsb_descr {
+ unsigned short pgsz_idx;
+ unsigned short assoc;
+ unsigned int num_ttes; /* in TTEs */
+ unsigned int ctx_idx;
+ unsigned int pgsz_mask;
+ unsigned long tsb_base;
+ unsigned long resv;
+};
+#endif
+#define HV_TSB_DESCR_PGSZ_IDX_OFFSET 0x00
+#define HV_TSB_DESCR_ASSOC_OFFSET 0x02
+#define HV_TSB_DESCR_NUM_TTES_OFFSET 0x04
+#define HV_TSB_DESCR_CTX_IDX_OFFSET 0x08
+#define HV_TSB_DESCR_PGSZ_MASK_OFFSET 0x0c
+#define HV_TSB_DESCR_TSB_BASE_OFFSET 0x10
+#define HV_TSB_DESCR_RESV_OFFSET 0x18
+
+/* Page size bitmask. */
+#define HV_PGSZ_MASK_8K (1 << 0)
+#define HV_PGSZ_MASK_64K (1 << 1)
+#define HV_PGSZ_MASK_512K (1 << 2)
+#define HV_PGSZ_MASK_4MB (1 << 3)
+#define HV_PGSZ_MASK_32MB (1 << 4)
+#define HV_PGSZ_MASK_256MB (1 << 5)
+#define HV_PGSZ_MASK_2GB (1 << 6)
+#define HV_PGSZ_MASK_16GB (1 << 7)
+
+/* Page size index. The value given in the TSB descriptor must correspond
+ * to the smallest page size specified in the pgsz_mask page size bitmask.
+ */
+#define HV_PGSZ_IDX_8K 0
+#define HV_PGSZ_IDX_64K 1
+#define HV_PGSZ_IDX_512K 2
+#define HV_PGSZ_IDX_4MB 3
+#define HV_PGSZ_IDX_32MB 4
+#define HV_PGSZ_IDX_256MB 5
+#define HV_PGSZ_IDX_2GB 6
+#define HV_PGSZ_IDX_16GB 7
+
+/* MMU fault status area.
+ *
+ * MMU related faults have their status and fault address information
+ * placed into a memory region made available by privileged code. Each
+ * virtual processor must make a mmu_fault_area_conf() call to tell the
+ * hypervisor where that processor's fault status should be stored.
+ *
+ * The fault status block is a multiple of 64-bytes and must be aligned
+ * on a 64-byte boundary.
+ */
+#ifndef __ASSEMBLY__
+struct hv_fault_status {
+ unsigned long i_fault_type;
+ unsigned long i_fault_addr;
+ unsigned long i_fault_ctx;
+ unsigned long i_reserved[5];
+ unsigned long d_fault_type;
+ unsigned long d_fault_addr;
+ unsigned long d_fault_ctx;
+ unsigned long d_reserved[5];
+};
+#endif
+#define HV_FAULT_I_TYPE_OFFSET 0x00
+#define HV_FAULT_I_ADDR_OFFSET 0x08
+#define HV_FAULT_I_CTX_OFFSET 0x10
+#define HV_FAULT_D_TYPE_OFFSET 0x40
+#define HV_FAULT_D_ADDR_OFFSET 0x48
+#define HV_FAULT_D_CTX_OFFSET 0x50
+
+#define HV_FAULT_TYPE_FAST_MISS 1
+#define HV_FAULT_TYPE_FAST_PROT 2
+#define HV_FAULT_TYPE_MMU_MISS 3
+#define HV_FAULT_TYPE_INV_RA 4
+#define HV_FAULT_TYPE_PRIV_VIOL 5
+#define HV_FAULT_TYPE_PROT_VIOL 6
+#define HV_FAULT_TYPE_NFO 7
+#define HV_FAULT_TYPE_NFO_SEFF 8
+#define HV_FAULT_TYPE_INV_VA 9
+#define HV_FAULT_TYPE_INV_ASI 10
+#define HV_FAULT_TYPE_NC_ATOMIC 11
+#define HV_FAULT_TYPE_PRIV_ACT 12
+#define HV_FAULT_TYPE_RESV1 13
+#define HV_FAULT_TYPE_UNALIGNED 14
+#define HV_FAULT_TYPE_INV_PGSZ 15
+/* Values 16 --> -2 are reserved. */
+#define HV_FAULT_TYPE_MULTIPLE -1
+
+/* Flags argument for mmu_{map,unmap}_addr(), mmu_demap_{page,context,all}(),
+ * and mmu_{map,unmap}_perm_addr().
+ */
+#define HV_MMU_DMMU 0x01
+#define HV_MMU_IMMU 0x02
+#define HV_MMU_ALL (HV_MMU_DMMU | HV_MMU_IMMU)
+
+/* mmu_map_addr()
+ * TRAP: HV_MMU_MAP_ADDR_TRAP
+ * ARG0: virtual address
+ * ARG1: mmu context
+ * ARG2: TTE
+ * ARG3: flags (HV_MMU_{IMMU,DMMU})
+ * ERRORS: EINVAL Invalid virtual address, mmu context, or flags
+ * EBADPGSZ Invalid page size value
+ * ENORADDR Invalid real address in TTE
+ *
+ * Create a non-permanent mapping using the given TTE, virtual
+ * address, and mmu context. The flags argument determines which
+ * (data, or instruction, or both) TLB the mapping gets loaded into.
+ *
+ * The behavior is undefined if the valid bit is clear in the TTE.
+ *
+ * Note: This API call is for privileged code to specify temporary translation
+ * mappings without the need to create and manage a TSB.
+ */
+
+/* mmu_unmap_addr()
+ * TRAP: HV_MMU_UNMAP_ADDR_TRAP
+ * ARG0: virtual address
+ * ARG1: mmu context
+ * ARG2: flags (HV_MMU_{IMMU,DMMU})
+ * ERRORS: EINVAL Invalid virtual address, mmu context, or flags
+ *
+ * Demaps the given virtual address in the given mmu context on this
+ * CPU. This function is intended to be used to demap pages mapped
+ * with mmu_map_addr. This service is equivalent to invoking
+ * mmu_demap_page() with only the current CPU in the CPU list. The
+ * flags argument determines which (data, or instruction, or both) TLB
+ * the mapping gets unmapped from.
+ *
+ * Attempting to perform an unmap operation for a previously defined
+ * permanent mapping will have undefined results.
+ */
+
+/* mmu_tsb_ctx0()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_TSB_CTX0
+ * ARG0: number of TSB descriptions
+ * ARG1: TSB descriptions pointer
+ * RET0: status
+ * ERRORS: ENORADDR Invalid TSB descriptions pointer or
+ * TSB base within a descriptor
+ * EBADALIGN TSB descriptions pointer is not aligned
+ * to an 8-byte boundary, or TSB base
+ * within a descriptor is not aligned for
+ * the given TSB size
+ * EBADPGSZ Invalid page size in a TSB descriptor
+ * EBADTSB Invalid associativity or size in a TSB
+ * descriptor
+ * EINVAL Invalid number of TSB descriptions, or
+ * invalid context index in a TSB
+ * descriptor, or index page size not
+ * equal to smallest page size in page
+ * size bitmask field.
+ *
+ * Configures the TSBs for the current CPU for virtual addresses with
+ * context zero. The TSB descriptions pointer is a pointer to an
+ * array of the given number of TSB descriptions.
+ *
+ * Note: The maximum number of TSBs available to a virtual CPU is given by the
+ * mmu-max-#tsbs property of the cpu's corresponding "cpu" node in the
+ * machine description.
+ */
+#define HV_FAST_MMU_TSB_CTX0 0x20
+
+/* mmu_tsb_ctxnon0()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_TSB_CTXNON0
+ * ARG0: number of TSB descriptions
+ * ARG1: TSB descriptions pointer
+ * RET0: status
+ * ERRORS: Same as for mmu_tsb_ctx0() above.
+ *
+ * Configures the TSBs for the current CPU for virtual addresses with
+ * non-zero contexts. The TSB descriptions pointer is a pointer to an
+ * array of the given number of TSB descriptions.
+ *
+ * Note: A maximum of 16 TSBs may be specified in the TSB description list.
+ */
+#define HV_FAST_MMU_TSB_CTXNON0 0x21
+
+/* mmu_demap_page()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_DEMAP_PAGE
+ * ARG0: reserved, must be zero
+ * ARG1: reserved, must be zero
+ * ARG2: virtual address
+ * ARG3: mmu context
+ * ARG4: flags (HV_MMU_{IMMU,DMMU})
+ * RET0: status
+ * ERRORS: EINVAL Invalid virutal address, context, or
+ * flags value
+ * ENOTSUPPORTED ARG0 or ARG1 is non-zero
+ *
+ * Demaps any page mapping of the given virtual address in the given
+ * mmu context for the current virtual CPU. Any virtually tagged
+ * caches are guaranteed to be kept consistent. The flags argument
+ * determines which TLB (instruction, or data, or both) participate in
+ * the operation.
+ *
+ * ARG0 and ARG1 are both reserved and must be set to zero.
+ */
+#define HV_FAST_MMU_DEMAP_PAGE 0x22
+
+/* mmu_demap_ctx()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_DEMAP_CTX
+ * ARG0: reserved, must be zero
+ * ARG1: reserved, must be zero
+ * ARG2: mmu context
+ * ARG3: flags (HV_MMU_{IMMU,DMMU})
+ * RET0: status
+ * ERRORS: EINVAL Invalid context or flags value
+ * ENOTSUPPORTED ARG0 or ARG1 is non-zero
+ *
+ * Demaps all non-permanent virtual page mappings previously specified
+ * for the given context for the current virtual CPU. Any virtual
+ * tagged caches are guaranteed to be kept consistent. The flags
+ * argument determines which TLB (instruction, or data, or both)
+ * participate in the operation.
+ *
+ * ARG0 and ARG1 are both reserved and must be set to zero.
+ */
+#define HV_FAST_MMU_DEMAP_CTX 0x23
+
+/* mmu_demap_all()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_DEMAP_ALL
+ * ARG0: reserved, must be zero
+ * ARG1: reserved, must be zero
+ * ARG2: flags (HV_MMU_{IMMU,DMMU})
+ * RET0: status
+ * ERRORS: EINVAL Invalid flags value
+ * ENOTSUPPORTED ARG0 or ARG1 is non-zero
+ *
+ * Demaps all non-permanent virtual page mappings previously specified
+ * for the current virtual CPU. Any virtual tagged caches are
+ * guaranteed to be kept consistent. The flags argument determines
+ * which TLB (instruction, or data, or both) participate in the
+ * operation.
+ *
+ * ARG0 and ARG1 are both reserved and must be set to zero.
+ */
+#define HV_FAST_MMU_DEMAP_ALL 0x24
+
+/* mmu_map_perm_addr()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_MAP_PERM_ADDR
+ * ARG0: virtual address
+ * ARG1: reserved, must be zero
+ * ARG2: TTE
+ * ARG3: flags (HV_MMU_{IMMU,DMMU})
+ * RET0: status
+ * ERRORS: EINVAL Invalid virutal address or flags value
+ * EBADPGSZ Invalid page size value
+ * ENORADDR Invalid real address in TTE
+ * ETOOMANY Too many mappings (max of 8 reached)
+ *
+ * Create a permanent mapping using the given TTE and virtual address
+ * for context 0 on the calling virtual CPU. A maximum of 8 such
+ * permanent mappings may be specified by privileged code. Mappings
+ * may be removed with mmu_unmap_perm_addr().
+ *
+ * The behavior is undefined if a TTE with the valid bit clear is given.
+ *
+ * Note: This call is used to specify address space mappings for which
+ * privileged code does not expect to receive misses. For example,
+ * this mechanism can be used to map kernel nucleus code and data.
+ */
+#define HV_FAST_MMU_MAP_PERM_ADDR 0x25
+
+/* mmu_fault_area_conf()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_FAULT_AREA_CONF
+ * ARG0: real address
+ * RET0: status
+ * RET1: previous mmu fault area real address
+ * ERRORS: ENORADDR Invalid real address
+ * EBADALIGN Invalid alignment for fault area
+ *
+ * Configure the MMU fault status area for the calling CPU. A 64-byte
+ * aligned real address specifies where MMU fault status information
+ * is placed. The return value is the previously specified area, or 0
+ * for the first invocation. Specifying a fault area at real address
+ * 0 is not allowed.
+ */
+#define HV_FAST_MMU_FAULT_AREA_CONF 0x26
+
+/* mmu_enable()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_ENABLE
+ * ARG0: enable flag
+ * ARG1: return target address
+ * RET0: status
+ * ERRORS: ENORADDR Invalid real address when disabling
+ * translation.
+ * EBADALIGN The return target address is not
+ * aligned to an instruction.
+ * EINVAL The enable flag request the current
+ * operating mode (e.g. disable if already
+ * disabled)
+ *
+ * Enable or disable virtual address translation for the calling CPU
+ * within the virtual machine domain. If the enable flag is zero,
+ * translation is disabled, any non-zero value will enable
+ * translation.
+ *
+ * When this function returns, the newly selected translation mode
+ * will be active. If the mmu is being enabled, then the return
+ * target address is a virtual address else it is a real address.
+ *
+ * Upon successful completion, control will be returned to the given
+ * return target address (ie. the cpu will jump to that address). On
+ * failure, the previous mmu mode remains and the trap simply returns
+ * as normal with the appropriate error code in RET0.
+ */
+#define HV_FAST_MMU_ENABLE 0x27
+
+/* mmu_unmap_perm_addr()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_UNMAP_PERM_ADDR
+ * ARG0: virtual address
+ * ARG1: reserved, must be zero
+ * ARG2: flags (HV_MMU_{IMMU,DMMU})
+ * RET0: status
+ * ERRORS: EINVAL Invalid virutal address or flags value
+ * ENOMAP Specified mapping was not found
+ *
+ * Demaps any permanent page mapping (established via
+ * mmu_map_perm_addr()) at the given virtual address for context 0 on
+ * the current virtual CPU. Any virtual tagged caches are guaranteed
+ * to be kept consistent.
+ */
+#define HV_FAST_MMU_UNMAP_PERM_ADDR 0x28
+
+/* mmu_tsb_ctx0_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_TSB_CTX0_INFO
+ * ARG0: max TSBs
+ * ARG1: buffer pointer
+ * RET0: status
+ * RET1: number of TSBs
+ * ERRORS: EINVAL Supplied buffer is too small
+ * EBADALIGN The buffer pointer is badly aligned
+ * ENORADDR Invalid real address for buffer pointer
+ *
+ * Return the TSB configuration as previous defined by mmu_tsb_ctx0()
+ * into the provided buffer. The size of the buffer is given in ARG1
+ * in terms of the number of TSB description entries.
+ *
+ * Upon return, RET1 always contains the number of TSB descriptions
+ * previously configured. If zero TSBs were configured, EOK is
+ * returned with RET1 containing 0.
+ */
+#define HV_FAST_MMU_TSB_CTX0_INFO 0x29
+
+/* mmu_tsb_ctxnon0_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_TSB_CTXNON0_INFO
+ * ARG0: max TSBs
+ * ARG1: buffer pointer
+ * RET0: status
+ * RET1: number of TSBs
+ * ERRORS: EINVAL Supplied buffer is too small
+ * EBADALIGN The buffer pointer is badly aligned
+ * ENORADDR Invalid real address for buffer pointer
+ *
+ * Return the TSB configuration as previous defined by
+ * mmu_tsb_ctxnon0() into the provided buffer. The size of the buffer
+ * is given in ARG1 in terms of the number of TSB description entries.
+ *
+ * Upon return, RET1 always contains the number of TSB descriptions
+ * previously configured. If zero TSBs were configured, EOK is
+ * returned with RET1 containing 0.
+ */
+#define HV_FAST_MMU_TSB_CTXNON0_INFO 0x2a
+
+/* mmu_fault_area_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMU_FAULT_AREA_INFO
+ * RET0: status
+ * RET1: fault area real address
+ * ERRORS: No errors defined.
+ *
+ * Return the currently defined MMU fault status area for the current
+ * CPU. The real address of the fault status area is returned in
+ * RET1, or 0 is returned in RET1 if no fault status area is defined.
+ *
+ * Note: mmu_fault_area_conf() may be called with the return value (RET1)
+ * from this service if there is a need to save and restore the fault
+ * area for a cpu.
+ */
+#define HV_FAST_MMU_FAULT_AREA_INFO 0x2b
+
+/* Cache and Memory services. */
+
+/* mem_scrub()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MEM_SCRUB
+ * ARG0: real address
+ * ARG1: length
+ * RET0: status
+ * RET1: length scrubbed
+ * ERRORS: ENORADDR Invalid real address
+ * EBADALIGN Start address or length are not correctly
+ * aligned
+ * EINVAL Length is zero
+ *
+ * Zero the memory contents in the range real address to real address
+ * plus length minus 1. Also, valid ECC will be generated for that
+ * memory address range. Scrubbing is started at the given real
+ * address, but may not scrub the entire given length. The actual
+ * length scrubbed will be returned in RET1.
+ *
+ * The real address and length must be aligned on an 8K boundary, or
+ * contain the start address and length from a sun4v error report.
+ *
+ * Note: There are two uses for this function. The first use is to block clear
+ * and initialize memory and the second is to scrub an u ncorrectable
+ * error reported via a resumable or non-resumable trap. The second
+ * use requires the arguments to be equal to the real address and length
+ * provided in a sun4v memory error report.
+ */
+#define HV_FAST_MEM_SCRUB 0x31
+
+/* mem_sync()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MEM_SYNC
+ * ARG0: real address
+ * ARG1: length
+ * RET0: status
+ * RET1: length synced
+ * ERRORS: ENORADDR Invalid real address
+ * EBADALIGN Start address or length are not correctly
+ * aligned
+ * EINVAL Length is zero
+ *
+ * Force the next access within the real address to real address plus
+ * length minus 1 to be fetches from main system memory. Less than
+ * the given length may be synced, the actual amount synced is
+ * returned in RET1. The real address and length must be aligned on
+ * an 8K boundary.
+ */
+#define HV_FAST_MEM_SYNC 0x32
+
+/* Time of day services.
+ *
+ * The hypervisor maintains the time of day on a per-domain basis.
+ * Changing the time of day in one domain does not affect the time of
+ * day on any other domain.
+ *
+ * Time is described by a single unsigned 64-bit word which is the
+ * number of seconds since the UNIX Epoch (00:00:00 UTC, January 1,
+ * 1970).
+ */
+
+/* tod_get()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_TOD_GET
+ * RET0: status
+ * RET1: TOD
+ * ERRORS: EWOULDBLOCK TOD resource is temporarily unavailable
+ * ENOTSUPPORTED If TOD not supported on this platform
+ *
+ * Return the current time of day. May block if TOD access is
+ * temporarily not possible.
+ */
+#define HV_FAST_TOD_GET 0x50
+
+/* tod_set()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_TOD_SET
+ * ARG0: TOD
+ * RET0: status
+ * ERRORS: EWOULDBLOCK TOD resource is temporarily unavailable
+ * ENOTSUPPORTED If TOD not supported on this platform
+ *
+ * The current time of day is set to the value specified in ARG0. May
+ * block if TOD access is temporarily not possible.
+ */
+#define HV_FAST_TOD_SET 0x51
+
+/* Console services */
+
+/* con_getchar()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CONS_GETCHAR
+ * RET0: status
+ * RET1: character
+ * ERRORS: EWOULDBLOCK No character available.
+ *
+ * Returns a character from the console device. If no character is
+ * available then an EWOULDBLOCK error is returned. If a character is
+ * available, then the returned status is EOK and the character value
+ * is in RET1.
+ *
+ * A virtual BREAK is represented by the 64-bit value -1.
+ *
+ * A virtual HUP signal is represented by the 64-bit value -2.
+ */
+#define HV_FAST_CONS_GETCHAR 0x60
+
+/* con_putchar()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_CONS_PUTCHAR
+ * ARG0: character
+ * RET0: status
+ * ERRORS: EINVAL Illegal character
+ * EWOULDBLOCK Output buffer currently full, would block
+ *
+ * Send a character to the console device. Only character values
+ * between 0 and 255 may be used. Values outside this range are
+ * invalid except for the 64-bit value -1 which is used to send a
+ * virtual BREAK.
+ */
+#define HV_FAST_CONS_PUTCHAR 0x61
+
+/* Trap trace services.
+ *
+ * The hypervisor provides a trap tracing capability for privileged
+ * code running on each virtual CPU. Privileged code provides a
+ * round-robin trap trace queue within which the hypervisor writes
+ * 64-byte entries detailing hyperprivileged traps taken n behalf of
+ * privileged code. This is provided as a debugging capability for
+ * privileged code.
+ *
+ * The trap trace control structure is 64-bytes long and placed at the
+ * start (offset 0) of the trap trace buffer, and is described as
+ * follows:
+ */
+#ifndef __ASSEMBLY__
+struct hv_trap_trace_control {
+ unsigned long head_offset;
+ unsigned long tail_offset;
+ unsigned long __reserved[0x30 / sizeof(unsigned long)];
+};
+#endif
+#define HV_TRAP_TRACE_CTRL_HEAD_OFFSET 0x00
+#define HV_TRAP_TRACE_CTRL_TAIL_OFFSET 0x08
+
+/* The head offset is the offset of the most recently completed entry
+ * in the trap-trace buffer. The tail offset is the offset of the
+ * next entry to be written. The control structure is owned and
+ * modified by the hypervisor. A guest may not modify the control
+ * structure contents. Attempts to do so will result in undefined
+ * behavior for the guest.
+ *
+ * Each trap trace buffer entry is layed out as follows:
+ */
+#ifndef __ASSEMBLY__
+struct hv_trap_trace_entry {
+ unsigned char type; /* Hypervisor or guest entry? */
+ unsigned char hpstate; /* Hyper-privileged state */
+ unsigned char tl; /* Trap level */
+ unsigned char gl; /* Global register level */
+ unsigned short tt; /* Trap type */
+ unsigned short tag; /* Extended trap identifier */
+ unsigned long tstate; /* Trap state */
+ unsigned long tick; /* Tick */
+ unsigned long tpc; /* Trap PC */
+ unsigned long f1; /* Entry specific */
+ unsigned long f2; /* Entry specific */
+ unsigned long f3; /* Entry specific */
+ unsigned long f4; /* Entry specific */
+};
+#endif
+#define HV_TRAP_TRACE_ENTRY_TYPE 0x00
+#define HV_TRAP_TRACE_ENTRY_HPSTATE 0x01
+#define HV_TRAP_TRACE_ENTRY_TL 0x02
+#define HV_TRAP_TRACE_ENTRY_GL 0x03
+#define HV_TRAP_TRACE_ENTRY_TT 0x04
+#define HV_TRAP_TRACE_ENTRY_TAG 0x06
+#define HV_TRAP_TRACE_ENTRY_TSTATE 0x08
+#define HV_TRAP_TRACE_ENTRY_TICK 0x10
+#define HV_TRAP_TRACE_ENTRY_TPC 0x18
+#define HV_TRAP_TRACE_ENTRY_F1 0x20
+#define HV_TRAP_TRACE_ENTRY_F2 0x28
+#define HV_TRAP_TRACE_ENTRY_F3 0x30
+#define HV_TRAP_TRACE_ENTRY_F4 0x38
+
+/* The type field is encoded as follows. */
+#define HV_TRAP_TYPE_UNDEF 0x00 /* Entry content undefined */
+#define HV_TRAP_TYPE_HV 0x01 /* Hypervisor trap entry */
+#define HV_TRAP_TYPE_GUEST 0xff /* Added via ttrace_addentry() */
+
+/* ttrace_buf_conf()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_TTRACE_BUF_CONF
+ * ARG0: real address
+ * ARG1: number of entries
+ * RET0: status
+ * RET1: number of entries
+ * ERRORS: ENORADDR Invalid real address
+ * EINVAL Size is too small
+ * EBADALIGN Real address not aligned on 64-byte boundary
+ *
+ * Requests hypervisor trap tracing and declares a virtual CPU's trap
+ * trace buffer to the hypervisor. The real address supplies the real
+ * base address of the trap trace queue and must be 64-byte aligned.
+ * Specifying a value of 0 for the number of entries disables trap
+ * tracing for the calling virtual CPU. The buffer allocated must be
+ * sized for a power of two number of 64-byte trap trace entries plus
+ * an initial 64-byte control structure.
+ *
+ * This may be invoked any number of times so that a virtual CPU may
+ * relocate a trap trace buffer or create "snapshots" of information.
+ *
+ * If the real address is illegal or badly aligned, then trap tracing
+ * is disabled and an error is returned.
+ *
+ * Upon failure with EINVAL, this service call returns in RET1 the
+ * minimum number of buffer entries required. Upon other failures
+ * RET1 is undefined.
+ */
+#define HV_FAST_TTRACE_BUF_CONF 0x90
+
+/* ttrace_buf_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_TTRACE_BUF_INFO
+ * RET0: status
+ * RET1: real address
+ * RET2: size
+ * ERRORS: None defined.
+ *
+ * Returns the size and location of the previously declared trap-trace
+ * buffer. In the event that no buffer was previously defined, or the
+ * buffer is disabled, this call will return a size of zero bytes.
+ */
+#define HV_FAST_TTRACE_BUF_INFO 0x91
+
+/* ttrace_enable()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_TTRACE_ENABLE
+ * ARG0: enable
+ * RET0: status
+ * RET1: previous enable state
+ * ERRORS: EINVAL No trap trace buffer currently defined
+ *
+ * Enable or disable trap tracing, and return the previous enabled
+ * state in RET1. Future systems may define various flags for the
+ * enable argument (ARG0), for the moment a guest should pass
+ * "(uint64_t) -1" to enable, and "(uint64_t) 0" to disable all
+ * tracing - which will ensure future compatability.
+ */
+#define HV_FAST_TTRACE_ENABLE 0x92
+
+/* ttrace_freeze()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_TTRACE_FREEZE
+ * ARG0: freeze
+ * RET0: status
+ * RET1: previous freeze state
+ * ERRORS: EINVAL No trap trace buffer currently defined
+ *
+ * Freeze or unfreeze trap tracing, returning the previous freeze
+ * state in RET1. A guest should pass a non-zero value to freeze and
+ * a zero value to unfreeze all tracing. The returned previous state
+ * is 0 for not frozen and 1 for frozen.
+ */
+#define HV_FAST_TTRACE_FREEZE 0x93
+
+/* ttrace_addentry()
+ * TRAP: HV_TTRACE_ADDENTRY_TRAP
+ * ARG0: tag (16-bits)
+ * ARG1: data word 0
+ * ARG2: data word 1
+ * ARG3: data word 2
+ * ARG4: data word 3
+ * RET0: status
+ * ERRORS: EINVAL No trap trace buffer currently defined
+ *
+ * Add an entry to the trap trace buffer. Upon return only ARG0/RET0
+ * is modified - none of the other registers holding arguments are
+ * volatile across this hypervisor service.
+ */
+
+/* Core dump services.
+ *
+ * Since the hypervisor viraulizes and thus obscures a lot of the
+ * physical machine layout and state, traditional OS crash dumps can
+ * be difficult to diagnose especially when the problem is a
+ * configuration error of some sort.
+ *
+ * The dump services provide an opaque buffer into which the
+ * hypervisor can place it's internal state in order to assist in
+ * debugging such situations. The contents are opaque and extremely
+ * platform and hypervisor implementation specific. The guest, during
+ * a core dump, requests that the hypervisor update any information in
+ * the dump buffer in preparation to being dumped as part of the
+ * domain's memory image.
+ */
+
+/* dump_buf_update()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_DUMP_BUF_UPDATE
+ * ARG0: real address
+ * ARG1: size
+ * RET0: status
+ * RET1: required size of dump buffer
+ * ERRORS: ENORADDR Invalid real address
+ * EBADALIGN Real address is not aligned on a 64-byte
+ * boundary
+ * EINVAL Size is non-zero but less than minimum size
+ * required
+ * ENOTSUPPORTED Operation not supported on current logical
+ * domain
+ *
+ * Declare a domain dump buffer to the hypervisor. The real address
+ * provided for the domain dump buffer must be 64-byte aligned. The
+ * size specifies the size of the dump buffer and may be larger than
+ * the minimum size specified in the machine description. The
+ * hypervisor will fill the dump buffer with opaque data.
+ *
+ * Note: A guest may elect to include dump buffer contents as part of a crash
+ * dump to assist with debugging. This function may be called any number
+ * of times so that a guest may relocate a dump buffer, or create
+ * "snapshots" of any dump-buffer information. Each call to
+ * dump_buf_update() atomically declares the new dump buffer to the
+ * hypervisor.
+ *
+ * A specified size of 0 unconfigures the dump buffer. If the real
+ * address is illegal or badly aligned, then any currently active dump
+ * buffer is disabled and an error is returned.
+ *
+ * In the event that the call fails with EINVAL, RET1 contains the
+ * minimum size requires by the hypervisor for a valid dump buffer.
+ */
+#define HV_FAST_DUMP_BUF_UPDATE 0x94
+
+/* dump_buf_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_DUMP_BUF_INFO
+ * RET0: status
+ * RET1: real address of current dump buffer
+ * RET2: size of current dump buffer
+ * ERRORS: No errors defined.
+ *
+ * Return the currently configures dump buffer description. A
+ * returned size of 0 bytes indicates an undefined dump buffer. In
+ * this case the return address in RET1 is undefined.
+ */
+#define HV_FAST_DUMP_BUF_INFO 0x95
+
+/* Device interrupt services.
+ *
+ * Device interrupts are allocated to system bus bridges by the hypervisor,
+ * and described to OBP in the machine description. OBP then describes
+ * these interrupts to the OS via properties in the device tree.
+ *
+ * Terminology:
+ *
+ * cpuid Unique opaque value which represents a target cpu.
+ *
+ * devhandle Device handle. It uniquely identifies a device, and
+ * consistes of the lower 28-bits of the hi-cell of the
+ * first entry of the device's "reg" property in the
+ * OBP device tree.
+ *
+ * devino Device interrupt number. Specifies the relative
+ * interrupt number within the device. The unique
+ * combination of devhandle and devino are used to
+ * identify a specific device interrupt.
+ *
+ * Note: The devino value is the same as the values in the
+ * "interrupts" property or "interrupt-map" property
+ * in the OBP device tree for that device.
+ *
+ * sysino System interrupt number. A 64-bit unsigned interger
+ * representing a unique interrupt within a virtual
+ * machine.
+ *
+ * intr_state A flag representing the interrupt state for a given
+ * sysino. The state values are defined below.
+ *
+ * intr_enabled A flag representing the 'enabled' state for a given
+ * sysino. The enable values are defined below.
+ */
+
+#define HV_INTR_STATE_IDLE 0 /* Nothing pending */
+#define HV_INTR_STATE_RECEIVED 1 /* Interrupt received by hardware */
+#define HV_INTR_STATE_DELIVERED 2 /* Interrupt delivered to queue */
+
+#define HV_INTR_DISABLED 0 /* sysino not enabled */
+#define HV_INTR_ENABLED 1 /* sysino enabled */
+
+/* intr_devino_to_sysino()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_INTR_DEVINO2SYSINO
+ * ARG0: devhandle
+ * ARG1: devino
+ * RET0: status
+ * RET1: sysino
+ * ERRORS: EINVAL Invalid devhandle/devino
+ *
+ * Converts a device specific interrupt number of the given
+ * devhandle/devino into a system specific ino (sysino).
+ */
+#define HV_FAST_INTR_DEVINO2SYSINO 0xa0
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_devino_to_sysino(unsigned long devhandle,
+ unsigned long devino);
+#endif
+
+/* intr_getenabled()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_INTR_GETENABLED
+ * ARG0: sysino
+ * RET0: status
+ * RET1: intr_enabled (HV_INTR_{DISABLED,ENABLED})
+ * ERRORS: EINVAL Invalid sysino
+ *
+ * Returns interrupt enabled state in RET1 for the interrupt defined
+ * by the given sysino.
+ */
+#define HV_FAST_INTR_GETENABLED 0xa1
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_intr_getenabled(unsigned long sysino);
+#endif
+
+/* intr_setenabled()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_INTR_SETENABLED
+ * ARG0: sysino
+ * ARG1: intr_enabled (HV_INTR_{DISABLED,ENABLED})
+ * RET0: status
+ * ERRORS: EINVAL Invalid sysino or intr_enabled value
+ *
+ * Set the 'enabled' state of the interrupt sysino.
+ */
+#define HV_FAST_INTR_SETENABLED 0xa2
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_intr_setenabled(unsigned long sysino, unsigned long intr_enabled);
+#endif
+
+/* intr_getstate()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_INTR_GETSTATE
+ * ARG0: sysino
+ * RET0: status
+ * RET1: intr_state (HV_INTR_STATE_*)
+ * ERRORS: EINVAL Invalid sysino
+ *
+ * Returns current state of the interrupt defined by the given sysino.
+ */
+#define HV_FAST_INTR_GETSTATE 0xa3
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_intr_getstate(unsigned long sysino);
+#endif
+
+/* intr_setstate()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_INTR_SETSTATE
+ * ARG0: sysino
+ * ARG1: intr_state (HV_INTR_STATE_*)
+ * RET0: status
+ * ERRORS: EINVAL Invalid sysino or intr_state value
+ *
+ * Sets the current state of the interrupt described by the given sysino
+ * value.
+ *
+ * Note: Setting the state to HV_INTR_STATE_IDLE clears any pending
+ * interrupt for sysino.
+ */
+#define HV_FAST_INTR_SETSTATE 0xa4
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_intr_setstate(unsigned long sysino, unsigned long intr_state);
+#endif
+
+/* intr_gettarget()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_INTR_GETTARGET
+ * ARG0: sysino
+ * RET0: status
+ * RET1: cpuid
+ * ERRORS: EINVAL Invalid sysino
+ *
+ * Returns CPU that is the current target of the interrupt defined by
+ * the given sysino. The CPU value returned is undefined if the target
+ * has not been set via intr_settarget().
+ */
+#define HV_FAST_INTR_GETTARGET 0xa5
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_intr_gettarget(unsigned long sysino);
+#endif
+
+/* intr_settarget()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_INTR_SETTARGET
+ * ARG0: sysino
+ * ARG1: cpuid
+ * RET0: status
+ * ERRORS: EINVAL Invalid sysino
+ * ENOCPU Invalid cpuid
+ *
+ * Set the target CPU for the interrupt defined by the given sysino.
+ */
+#define HV_FAST_INTR_SETTARGET 0xa6
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_intr_settarget(unsigned long sysino, unsigned long cpuid);
+#endif
+
+/* PCI IO services.
+ *
+ * See the terminology descriptions in the device interrupt services
+ * section above as those apply here too. Here are terminology
+ * definitions specific to these PCI IO services:
+ *
+ * tsbnum TSB number. Indentifies which io-tsb is used.
+ * For this version of the specification, tsbnum
+ * must be zero.
+ *
+ * tsbindex TSB index. Identifies which entry in the TSB
+ * is used. The first entry is zero.
+ *
+ * tsbid A 64-bit aligned data structure which contains
+ * a tsbnum and a tsbindex. Bits 63:32 contain the
+ * tsbnum and bits 31:00 contain the tsbindex.
+ *
+ * Use the HV_PCI_TSBID() macro to construct such
+ * values.
+ *
+ * io_attributes IO attributes for IOMMU mappings. One of more
+ * of the attritbute bits are stores in a 64-bit
+ * value. The values are defined below.
+ *
+ * r_addr 64-bit real address
+ *
+ * pci_device PCI device address. A PCI device address identifies
+ * a specific device on a specific PCI bus segment.
+ * A PCI device address ia a 32-bit unsigned integer
+ * with the following format:
+ *
+ * 00000000.bbbbbbbb.dddddfff.00000000
+ *
+ * Use the HV_PCI_DEVICE_BUILD() macro to construct
+ * such values.
+ *
+ * pci_config_offset
+ * PCI configureation space offset. For conventional
+ * PCI a value between 0 and 255. For extended
+ * configuration space, a value between 0 and 4095.
+ *
+ * Note: For PCI configuration space accesses, the offset
+ * must be aligned to the access size.
+ *
+ * error_flag A return value which specifies if the action succeeded
+ * or failed. 0 means no error, non-0 means some error
+ * occurred while performing the service.
+ *
+ * io_sync_direction
+ * Direction definition for pci_dma_sync(), defined
+ * below in HV_PCI_SYNC_*.
+ *
+ * io_page_list A list of io_page_addresses, an io_page_address is
+ * a real address.
+ *
+ * io_page_list_p A pointer to an io_page_list.
+ *
+ * "size based byte swap" - Some functions do size based byte swapping
+ * which allows sw to access pointers and
+ * counters in native form when the processor
+ * operates in a different endianness than the
+ * IO bus. Size-based byte swapping converts a
+ * multi-byte field between big-endian and
+ * little-endian format.
+ */
+
+#define HV_PCI_MAP_ATTR_READ 0x01
+#define HV_PCI_MAP_ATTR_WRITE 0x02
+
+#define HV_PCI_DEVICE_BUILD(b,d,f) \
+ ((((b) & 0xff) << 16) | \
+ (((d) & 0x1f) << 11) | \
+ (((f) & 0x07) << 8))
+
+#define HV_PCI_TSBID(__tsb_num, __tsb_index) \
+ ((((u64)(__tsb_num)) << 32UL) | ((u64)(__tsb_index)))
+
+#define HV_PCI_SYNC_FOR_DEVICE 0x01
+#define HV_PCI_SYNC_FOR_CPU 0x02
+
+/* pci_iommu_map()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOMMU_MAP
+ * ARG0: devhandle
+ * ARG1: tsbid
+ * ARG2: #ttes
+ * ARG3: io_attributes
+ * ARG4: io_page_list_p
+ * RET0: status
+ * RET1: #ttes mapped
+ * ERRORS: EINVAL Invalid devhandle/tsbnum/tsbindex/io_attributes
+ * EBADALIGN Improperly aligned real address
+ * ENORADDR Invalid real address
+ *
+ * Create IOMMU mappings in the sun4v device defined by the given
+ * devhandle. The mappings are created in the TSB defined by the
+ * tsbnum component of the given tsbid. The first mapping is created
+ * in the TSB i ndex defined by the tsbindex component of the given tsbid.
+ * The call creates up to #ttes mappings, the first one at tsbnum, tsbindex,
+ * the second at tsbnum, tsbindex + 1, etc.
+ *
+ * All mappings are created with the attributes defined by the io_attributes
+ * argument. The page mapping addresses are described in the io_page_list
+ * defined by the given io_page_list_p, which is a pointer to the io_page_list.
+ * The first entry in the io_page_list is the address for the first iotte, the
+ * 2nd for the 2nd iotte, and so on.
+ *
+ * Each io_page_address in the io_page_list must be appropriately aligned.
+ * #ttes must be greater than zero. For this version of the spec, the tsbnum
+ * component of the given tsbid must be zero.
+ *
+ * Returns the actual number of mappings creates, which may be less than
+ * or equal to the argument #ttes. If the function returns a value which
+ * is less than the #ttes, the caller may continus to call the function with
+ * an updated tsbid, #ttes, io_page_list_p arguments until all pages are
+ * mapped.
+ *
+ * Note: This function does not imply an iotte cache flush. The guest must
+ * demap an entry before re-mapping it.
+ */
+#define HV_FAST_PCI_IOMMU_MAP 0xb0
+
+/* pci_iommu_demap()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOMMU_DEMAP
+ * ARG0: devhandle
+ * ARG1: tsbid
+ * ARG2: #ttes
+ * RET0: status
+ * RET1: #ttes demapped
+ * ERRORS: EINVAL Invalid devhandle/tsbnum/tsbindex
+ *
+ * Demap and flush IOMMU mappings in the device defined by the given
+ * devhandle. Demaps up to #ttes entries in the TSB defined by the tsbnum
+ * component of the given tsbid, starting at the TSB index defined by the
+ * tsbindex component of the given tsbid.
+ *
+ * For this version of the spec, the tsbnum of the given tsbid must be zero.
+ * #ttes must be greater than zero.
+ *
+ * Returns the actual number of ttes demapped, which may be less than or equal
+ * to the argument #ttes. If #ttes demapped is less than #ttes, the caller
+ * may continue to call this function with updated tsbid and #ttes arguments
+ * until all pages are demapped.
+ *
+ * Note: Entries do not have to be mapped to be demapped. A demap of an
+ * unmapped page will flush the entry from the tte cache.
+ */
+#define HV_FAST_PCI_IOMMU_DEMAP 0xb1
+
+/* pci_iommu_getmap()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOMMU_GETMAP
+ * ARG0: devhandle
+ * ARG1: tsbid
+ * RET0: status
+ * RET1: io_attributes
+ * RET2: real address
+ * ERRORS: EINVAL Invalid devhandle/tsbnum/tsbindex
+ * ENOMAP Mapping is not valid, no translation exists
+ *
+ * Read and return the mapping in the device described by the given devhandle
+ * and tsbid. If successful, the io_attributes shall be returned in RET1
+ * and the page address of the mapping shall be returned in RET2.
+ *
+ * For this version of the spec, the tsbnum component of the given tsbid
+ * must be zero.
+ */
+#define HV_FAST_PCI_IOMMU_GETMAP 0xb2
+
+/* pci_iommu_getbypass()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_IOMMU_GETBYPASS
+ * ARG0: devhandle
+ * ARG1: real address
+ * ARG2: io_attributes
+ * RET0: status
+ * RET1: io_addr
+ * ERRORS: EINVAL Invalid devhandle/io_attributes
+ * ENORADDR Invalid real address
+ * ENOTSUPPORTED Function not supported in this implementation.
+ *
+ * Create a "special" mapping in the device described by the given devhandle,
+ * for the given real address and attributes. Return the IO address in RET1
+ * if successful.
+ */
+#define HV_FAST_PCI_IOMMU_GETBYPASS 0xb3
+
+/* pci_config_get()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_CONFIG_GET
+ * ARG0: devhandle
+ * ARG1: pci_device
+ * ARG2: pci_config_offset
+ * ARG3: size
+ * RET0: status
+ * RET1: error_flag
+ * RET2: data
+ * ERRORS: EINVAL Invalid devhandle/pci_device/offset/size
+ * EBADALIGN pci_config_offset not size aligned
+ * ENOACCESS Access to this offset is not permitted
+ *
+ * Read PCI configuration space for the adapter described by the given
+ * devhandle. Read size (1, 2, or 4) bytes of data from the given
+ * pci_device, at pci_config_offset from the beginning of the device's
+ * configuration space. If there was no error, RET1 is set to zero and
+ * RET2 is set to the data read. Insignificant bits in RET2 are not
+ * guarenteed to have any specific value and therefore must be ignored.
+ *
+ * The data returned in RET2 is size based byte swapped.
+ *
+ * If an error occurs during the read, set RET1 to a non-zero value. The
+ * given pci_config_offset must be 'size' aligned.
+ */
+#define HV_FAST_PCI_CONFIG_GET 0xb4
+
+/* pci_config_put()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_CONFIG_PUT
+ * ARG0: devhandle
+ * ARG1: pci_device
+ * ARG2: pci_config_offset
+ * ARG3: size
+ * ARG4: data
+ * RET0: status
+ * RET1: error_flag
+ * ERRORS: EINVAL Invalid devhandle/pci_device/offset/size
+ * EBADALIGN pci_config_offset not size aligned
+ * ENOACCESS Access to this offset is not permitted
+ *
+ * Write PCI configuration space for the adapter described by the given
+ * devhandle. Write size (1, 2, or 4) bytes of data in a single operation,
+ * at pci_config_offset from the beginning of the device's configuration
+ * space. The data argument contains the data to be written to configuration
+ * space. Prior to writing, the data is size based byte swapped.
+ *
+ * If an error occurs during the write access, do not generate an error
+ * report, do set RET1 to a non-zero value. Otherwise RET1 is zero.
+ * The given pci_config_offset must be 'size' aligned.
+ *
+ * This function is permitted to read from offset zero in the configuration
+ * space described by the given pci_device if necessary to ensure that the
+ * write access to config space completes.
+ */
+#define HV_FAST_PCI_CONFIG_PUT 0xb5
+
+/* pci_peek()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_PEEK
+ * ARG0: devhandle
+ * ARG1: real address
+ * ARG2: size
+ * RET0: status
+ * RET1: error_flag
+ * RET2: data
+ * ERRORS: EINVAL Invalid devhandle or size
+ * EBADALIGN Improperly aligned real address
+ * ENORADDR Bad real address
+ * ENOACCESS Guest access prohibited
+ *
+ * Attempt to read the IO address given by the given devhandle, real address,
+ * and size. Size must be 1, 2, 4, or 8. The read is performed as a single
+ * access operation using the given size. If an error occurs when reading
+ * from the given location, do not generate an error report, but return a
+ * non-zero value in RET1. If the read was successful, return zero in RET1
+ * and return the actual data read in RET2. The data returned is size based
+ * byte swapped.
+ *
+ * Non-significant bits in RET2 are not guarenteed to have any specific value
+ * and therefore must be ignored. If RET1 is returned as non-zero, the data
+ * value is not guarenteed to have any specific value and should be ignored.
+ *
+ * The caller must have permission to read from the given devhandle, real
+ * address, which must be an IO address. The argument real address must be a
+ * size aligned address.
+ *
+ * The hypervisor implementation of this function must block access to any
+ * IO address that the guest does not have explicit permission to access.
+ */
+#define HV_FAST_PCI_PEEK 0xb6
+
+/* pci_poke()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_POKE
+ * ARG0: devhandle
+ * ARG1: real address
+ * ARG2: size
+ * ARG3: data
+ * ARG4: pci_device
+ * RET0: status
+ * RET1: error_flag
+ * ERRORS: EINVAL Invalid devhandle, size, or pci_device
+ * EBADALIGN Improperly aligned real address
+ * ENORADDR Bad real address
+ * ENOACCESS Guest access prohibited
+ * ENOTSUPPORTED Function is not supported by implementation
+ *
+ * Attempt to write data to the IO address given by the given devhandle,
+ * real address, and size. Size must be 1, 2, 4, or 8. The write is
+ * performed as a single access operation using the given size. Prior to
+ * writing the data is size based swapped.
+ *
+ * If an error occurs when writing to the given location, do not generate an
+ * error report, but return a non-zero value in RET1. If the write was
+ * successful, return zero in RET1.
+ *
+ * pci_device describes the configuration address of the device being
+ * written to. The implementation may safely read from offset 0 with
+ * the configuration space of the device described by devhandle and
+ * pci_device in order to guarantee that the write portion of the operation
+ * completes
+ *
+ * Any error that occurs due to the read shall be reported using the normal
+ * error reporting mechanisms .. the read error is not suppressed.
+ *
+ * The caller must have permission to write to the given devhandle, real
+ * address, which must be an IO address. The argument real address must be a
+ * size aligned address. The caller must have permission to read from
+ * the given devhandle, pci_device cofiguration space offset 0.
+ *
+ * The hypervisor implementation of this function must block access to any
+ * IO address that the guest does not have explicit permission to access.
+ */
+#define HV_FAST_PCI_POKE 0xb7
+
+/* pci_dma_sync()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_DMA_SYNC
+ * ARG0: devhandle
+ * ARG1: real address
+ * ARG2: size
+ * ARG3: io_sync_direction
+ * RET0: status
+ * RET1: #synced
+ * ERRORS: EINVAL Invalid devhandle or io_sync_direction
+ * ENORADDR Bad real address
+ *
+ * Synchronize a memory region described by the given real address and size,
+ * for the device defined by the given devhandle using the direction(s)
+ * defined by the given io_sync_direction. The argument size is the size of
+ * the memory region in bytes.
+ *
+ * Return the actual number of bytes synchronized in the return value #synced,
+ * which may be less than or equal to the argument size. If the return
+ * value #synced is less than size, the caller must continue to call this
+ * function with updated real address and size arguments until the entire
+ * memory region is synchronized.
+ */
+#define HV_FAST_PCI_DMA_SYNC 0xb8
+
+/* PCI MSI services. */
+
+#define HV_MSITYPE_MSI32 0x00
+#define HV_MSITYPE_MSI64 0x01
+
+#define HV_MSIQSTATE_IDLE 0x00
+#define HV_MSIQSTATE_ERROR 0x01
+
+#define HV_MSIQ_INVALID 0x00
+#define HV_MSIQ_VALID 0x01
+
+#define HV_MSISTATE_IDLE 0x00
+#define HV_MSISTATE_DELIVERED 0x01
+
+#define HV_MSIVALID_INVALID 0x00
+#define HV_MSIVALID_VALID 0x01
+
+#define HV_PCIE_MSGTYPE_PME_MSG 0x18
+#define HV_PCIE_MSGTYPE_PME_ACK_MSG 0x1b
+#define HV_PCIE_MSGTYPE_CORR_MSG 0x30
+#define HV_PCIE_MSGTYPE_NONFATAL_MSG 0x31
+#define HV_PCIE_MSGTYPE_FATAL_MSG 0x33
+
+#define HV_MSG_INVALID 0x00
+#define HV_MSG_VALID 0x01
+
+/* pci_msiq_conf()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_CONF
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * ARG2: real address
+ * ARG3: number of entries
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle, msiqid or nentries
+ * EBADALIGN Improperly aligned real address
+ * ENORADDR Bad real address
+ *
+ * Configure the MSI queue given by the devhandle and msiqid arguments,
+ * and to be placed at the given real address and be of the given
+ * number of entries. The real address must be aligned exactly to match
+ * the queue size. Each queue entry is 64-bytes long, so f.e. a 32 entry
+ * queue must be aligned on a 2048 byte real address boundary. The MSI-EQ
+ * Head and Tail are initialized so that the MSI-EQ is 'empty'.
+ *
+ * Implementation Note: Certain implementations have fixed sized queues. In
+ * that case, number of entries must contain the correct
+ * value.
+ */
+#define HV_FAST_PCI_MSIQ_CONF 0xc0
+
+/* pci_msiq_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_INFO
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * RET0: status
+ * RET1: real address
+ * RET2: number of entries
+ * ERRORS: EINVAL Invalid devhandle or msiqid
+ *
+ * Return the configuration information for the MSI queue described
+ * by the given devhandle and msiqid. The base address of the queue
+ * is returned in ARG1 and the number of entries is returned in ARG2.
+ * If the queue is unconfigured, the real address is undefined and the
+ * number of entries will be returned as zero.
+ */
+#define HV_FAST_PCI_MSIQ_INFO 0xc1
+
+/* pci_msiq_getvalid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_GETVALID
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * RET0: status
+ * RET1: msiqvalid (HV_MSIQ_VALID or HV_MSIQ_INVALID)
+ * ERRORS: EINVAL Invalid devhandle or msiqid
+ *
+ * Get the valid state of the MSI-EQ described by the given devhandle and
+ * msiqid.
+ */
+#define HV_FAST_PCI_MSIQ_GETVALID 0xc2
+
+/* pci_msiq_setvalid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_SETVALID
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * ARG2: msiqvalid (HV_MSIQ_VALID or HV_MSIQ_INVALID)
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or msiqid or msiqvalid
+ * value or MSI EQ is uninitialized
+ *
+ * Set the valid state of the MSI-EQ described by the given devhandle and
+ * msiqid to the given msiqvalid.
+ */
+#define HV_FAST_PCI_MSIQ_SETVALID 0xc3
+
+/* pci_msiq_getstate()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_GETSTATE
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * RET0: status
+ * RET1: msiqstate (HV_MSIQSTATE_IDLE or HV_MSIQSTATE_ERROR)
+ * ERRORS: EINVAL Invalid devhandle or msiqid
+ *
+ * Get the state of the MSI-EQ described by the given devhandle and
+ * msiqid.
+ */
+#define HV_FAST_PCI_MSIQ_GETSTATE 0xc4
+
+/* pci_msiq_getvalid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_GETVALID
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * ARG2: msiqstate (HV_MSIQSTATE_IDLE or HV_MSIQSTATE_ERROR)
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or msiqid or msiqstate
+ * value or MSI EQ is uninitialized
+ *
+ * Set the state of the MSI-EQ described by the given devhandle and
+ * msiqid to the given msiqvalid.
+ */
+#define HV_FAST_PCI_MSIQ_SETSTATE 0xc5
+
+/* pci_msiq_gethead()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_GETHEAD
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * RET0: status
+ * RET1: msiqhead
+ * ERRORS: EINVAL Invalid devhandle or msiqid
+ *
+ * Get the current MSI EQ queue head for the MSI-EQ described by the
+ * given devhandle and msiqid.
+ */
+#define HV_FAST_PCI_MSIQ_GETHEAD 0xc6
+
+/* pci_msiq_sethead()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_SETHEAD
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * ARG2: msiqhead
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or msiqid or msiqhead,
+ * or MSI EQ is uninitialized
+ *
+ * Set the current MSI EQ queue head for the MSI-EQ described by the
+ * given devhandle and msiqid.
+ */
+#define HV_FAST_PCI_MSIQ_SETHEAD 0xc7
+
+/* pci_msiq_gettail()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSIQ_GETTAIL
+ * ARG0: devhandle
+ * ARG1: msiqid
+ * RET0: status
+ * RET1: msiqtail
+ * ERRORS: EINVAL Invalid devhandle or msiqid
+ *
+ * Get the current MSI EQ queue tail for the MSI-EQ described by the
+ * given devhandle and msiqid.
+ */
+#define HV_FAST_PCI_MSIQ_GETTAIL 0xc8
+
+/* pci_msi_getvalid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSI_GETVALID
+ * ARG0: devhandle
+ * ARG1: msinum
+ * RET0: status
+ * RET1: msivalidstate
+ * ERRORS: EINVAL Invalid devhandle or msinum
+ *
+ * Get the current valid/enabled state for the MSI defined by the
+ * given devhandle and msinum.
+ */
+#define HV_FAST_PCI_MSI_GETVALID 0xc9
+
+/* pci_msi_setvalid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSI_SETVALID
+ * ARG0: devhandle
+ * ARG1: msinum
+ * ARG2: msivalidstate
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or msinum or msivalidstate
+ *
+ * Set the current valid/enabled state for the MSI defined by the
+ * given devhandle and msinum.
+ */
+#define HV_FAST_PCI_MSI_SETVALID 0xca
+
+/* pci_msi_getmsiq()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSI_GETMSIQ
+ * ARG0: devhandle
+ * ARG1: msinum
+ * RET0: status
+ * RET1: msiqid
+ * ERRORS: EINVAL Invalid devhandle or msinum or MSI is unbound
+ *
+ * Get the MSI EQ that the MSI defined by the given devhandle and
+ * msinum is bound to.
+ */
+#define HV_FAST_PCI_MSI_GETMSIQ 0xcb
+
+/* pci_msi_setmsiq()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSI_SETMSIQ
+ * ARG0: devhandle
+ * ARG1: msinum
+ * ARG2: msitype
+ * ARG3: msiqid
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or msinum or msiqid
+ *
+ * Set the MSI EQ that the MSI defined by the given devhandle and
+ * msinum is bound to.
+ */
+#define HV_FAST_PCI_MSI_SETMSIQ 0xcc
+
+/* pci_msi_getstate()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSI_GETSTATE
+ * ARG0: devhandle
+ * ARG1: msinum
+ * RET0: status
+ * RET1: msistate
+ * ERRORS: EINVAL Invalid devhandle or msinum
+ *
+ * Get the state of the MSI defined by the given devhandle and msinum.
+ * If not initialized, return HV_MSISTATE_IDLE.
+ */
+#define HV_FAST_PCI_MSI_GETSTATE 0xcd
+
+/* pci_msi_setstate()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSI_SETSTATE
+ * ARG0: devhandle
+ * ARG1: msinum
+ * ARG2: msistate
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or msinum or msistate
+ *
+ * Set the state of the MSI defined by the given devhandle and msinum.
+ */
+#define HV_FAST_PCI_MSI_SETSTATE 0xce
+
+/* pci_msg_getmsiq()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSG_GETMSIQ
+ * ARG0: devhandle
+ * ARG1: msgtype
+ * RET0: status
+ * RET1: msiqid
+ * ERRORS: EINVAL Invalid devhandle or msgtype
+ *
+ * Get the MSI EQ of the MSG defined by the given devhandle and msgtype.
+ */
+#define HV_FAST_PCI_MSG_GETMSIQ 0xd0
+
+/* pci_msg_setmsiq()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSG_SETMSIQ
+ * ARG0: devhandle
+ * ARG1: msgtype
+ * ARG2: msiqid
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle, msgtype, or msiqid
+ *
+ * Set the MSI EQ of the MSG defined by the given devhandle and msgtype.
+ */
+#define HV_FAST_PCI_MSG_SETMSIQ 0xd1
+
+/* pci_msg_getvalid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSG_GETVALID
+ * ARG0: devhandle
+ * ARG1: msgtype
+ * RET0: status
+ * RET1: msgvalidstate
+ * ERRORS: EINVAL Invalid devhandle or msgtype
+ *
+ * Get the valid/enabled state of the MSG defined by the given
+ * devhandle and msgtype.
+ */
+#define HV_FAST_PCI_MSG_GETVALID 0xd2
+
+/* pci_msg_setvalid()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_PCI_MSG_SETVALID
+ * ARG0: devhandle
+ * ARG1: msgtype
+ * ARG2: msgvalidstate
+ * RET0: status
+ * ERRORS: EINVAL Invalid devhandle or msgtype or msgvalidstate
+ *
+ * Set the valid/enabled state of the MSG defined by the given
+ * devhandle and msgtype.
+ */
+#define HV_FAST_PCI_MSG_SETVALID 0xd3
+
+/* Performance counter services. */
+
+#define HV_PERF_JBUS_PERF_CTRL_REG 0x00
+#define HV_PERF_JBUS_PERF_CNT_REG 0x01
+#define HV_PERF_DRAM_PERF_CTRL_REG_0 0x02
+#define HV_PERF_DRAM_PERF_CNT_REG_0 0x03
+#define HV_PERF_DRAM_PERF_CTRL_REG_1 0x04
+#define HV_PERF_DRAM_PERF_CNT_REG_1 0x05
+#define HV_PERF_DRAM_PERF_CTRL_REG_2 0x06
+#define HV_PERF_DRAM_PERF_CNT_REG_2 0x07
+#define HV_PERF_DRAM_PERF_CTRL_REG_3 0x08
+#define HV_PERF_DRAM_PERF_CNT_REG_3 0x09
+
+/* get_perfreg()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_GET_PERFREG
+ * ARG0: performance reg number
+ * RET0: status
+ * RET1: performance reg value
+ * ERRORS: EINVAL Invalid performance register number
+ * ENOACCESS No access allowed to performance counters
+ *
+ * Read the value of the given DRAM/JBUS performance counter/control register.
+ */
+#define HV_FAST_GET_PERFREG 0x100
+
+/* set_perfreg()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_SET_PERFREG
+ * ARG0: performance reg number
+ * ARG1: performance reg value
+ * RET0: status
+ * ERRORS: EINVAL Invalid performance register number
+ * ENOACCESS No access allowed to performance counters
+ *
+ * Write the given performance reg value to the given DRAM/JBUS
+ * performance counter/control register.
+ */
+#define HV_FAST_SET_PERFREG 0x101
+
+/* MMU statistics services.
+ *
+ * The hypervisor maintains MMU statistics and privileged code provides
+ * a buffer where these statistics can be collected. It is continually
+ * updated once configured. The layout is as follows:
+ */
+#ifndef __ASSEMBLY__
+struct hv_mmu_statistics {
+ unsigned long immu_tsb_hits_ctx0_8k_tte;
+ unsigned long immu_tsb_ticks_ctx0_8k_tte;
+ unsigned long immu_tsb_hits_ctx0_64k_tte;
+ unsigned long immu_tsb_ticks_ctx0_64k_tte;
+ unsigned long __reserved1[2];
+ unsigned long immu_tsb_hits_ctx0_4mb_tte;
+ unsigned long immu_tsb_ticks_ctx0_4mb_tte;
+ unsigned long __reserved2[2];
+ unsigned long immu_tsb_hits_ctx0_256mb_tte;
+ unsigned long immu_tsb_ticks_ctx0_256mb_tte;
+ unsigned long __reserved3[4];
+ unsigned long immu_tsb_hits_ctxnon0_8k_tte;
+ unsigned long immu_tsb_ticks_ctxnon0_8k_tte;
+ unsigned long immu_tsb_hits_ctxnon0_64k_tte;
+ unsigned long immu_tsb_ticks_ctxnon0_64k_tte;
+ unsigned long __reserved4[2];
+ unsigned long immu_tsb_hits_ctxnon0_4mb_tte;
+ unsigned long immu_tsb_ticks_ctxnon0_4mb_tte;
+ unsigned long __reserved5[2];
+ unsigned long immu_tsb_hits_ctxnon0_256mb_tte;
+ unsigned long immu_tsb_ticks_ctxnon0_256mb_tte;
+ unsigned long __reserved6[4];
+ unsigned long dmmu_tsb_hits_ctx0_8k_tte;
+ unsigned long dmmu_tsb_ticks_ctx0_8k_tte;
+ unsigned long dmmu_tsb_hits_ctx0_64k_tte;
+ unsigned long dmmu_tsb_ticks_ctx0_64k_tte;
+ unsigned long __reserved7[2];
+ unsigned long dmmu_tsb_hits_ctx0_4mb_tte;
+ unsigned long dmmu_tsb_ticks_ctx0_4mb_tte;
+ unsigned long __reserved8[2];
+ unsigned long dmmu_tsb_hits_ctx0_256mb_tte;
+ unsigned long dmmu_tsb_ticks_ctx0_256mb_tte;
+ unsigned long __reserved9[4];
+ unsigned long dmmu_tsb_hits_ctxnon0_8k_tte;
+ unsigned long dmmu_tsb_ticks_ctxnon0_8k_tte;
+ unsigned long dmmu_tsb_hits_ctxnon0_64k_tte;
+ unsigned long dmmu_tsb_ticks_ctxnon0_64k_tte;
+ unsigned long __reserved10[2];
+ unsigned long dmmu_tsb_hits_ctxnon0_4mb_tte;
+ unsigned long dmmu_tsb_ticks_ctxnon0_4mb_tte;
+ unsigned long __reserved11[2];
+ unsigned long dmmu_tsb_hits_ctxnon0_256mb_tte;
+ unsigned long dmmu_tsb_ticks_ctxnon0_256mb_tte;
+ unsigned long __reserved12[4];
+};
+#endif
+
+/* mmustat_conf()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMUSTAT_CONF
+ * ARG0: real address
+ * RET0: status
+ * RET1: real address
+ * ERRORS: ENORADDR Invalid real address
+ * EBADALIGN Real address not aligned on 64-byte boundary
+ * EBADTRAP API not supported on this processor
+ *
+ * Enable MMU statistic gathering using the buffer at the given real
+ * address on the current virtual CPU. The new buffer real address
+ * is given in ARG1, and the previously specified buffer real address
+ * is returned in RET1, or is returned as zero for the first invocation.
+ *
+ * If the passed in real address argument is zero, this will disable
+ * MMU statistic collection on the current virtual CPU. If an error is
+ * returned then no statistics are collected.
+ *
+ * The buffer contents should be initialized to all zeros before being
+ * given to the hypervisor or else the statistics will be meaningless.
+ */
+#define HV_FAST_MMUSTAT_CONF 0x102
+
+/* mmustat_info()
+ * TRAP: HV_FAST_TRAP
+ * FUNCTION: HV_FAST_MMUSTAT_INFO
+ * RET0: status
+ * RET1: real address
+ * ERRORS: EBADTRAP API not supported on this processor
+ *
+ * Return the current state and real address of the currently configured
+ * MMU statistics buffer on the current virtual CPU.
+ */
+#define HV_FAST_MMUSTAT_INFO 0x103
+
+/* Function numbers for HV_CORE_TRAP. */
+#define HV_CORE_VER 0x00
+#define HV_CORE_PUTCHAR 0x01
+#define HV_CORE_EXIT 0x02
+
+#endif /* !(_SPARC64_HYPERVISOR_H) */
diff --git a/include/asm-sparc64/idprom.h b/include/asm-sparc64/idprom.h
index 701483c5465d..77fbf987385f 100644
--- a/include/asm-sparc64/idprom.h
+++ b/include/asm-sparc64/idprom.h
@@ -9,15 +9,7 @@
#include <linux/types.h>
-/* Offset into the EEPROM where the id PROM is located on the 4c */
-#define IDPROM_OFFSET 0x7d8
-
-/* On sun4m; physical. */
-/* MicroSPARC(-II) does not decode 31rd bit, but it works. */
-#define IDPROM_OFFSET_M 0xfd8
-
-struct idprom
-{
+struct idprom {
u8 id_format; /* Format identifier (always 0x01) */
u8 id_machtype; /* Machine type */
u8 id_ethaddr[6]; /* Hardware ethernet address */
@@ -30,6 +22,4 @@ struct idprom
extern struct idprom *idprom;
extern void idprom_init(void);
-#define IDPROM_SIZE (sizeof(struct idprom))
-
#endif /* !(_SPARC_IDPROM_H) */
diff --git a/include/asm-sparc64/intr_queue.h b/include/asm-sparc64/intr_queue.h
new file mode 100644
index 000000000000..206077dedc2a
--- /dev/null
+++ b/include/asm-sparc64/intr_queue.h
@@ -0,0 +1,15 @@
+#ifndef _SPARC64_INTR_QUEUE_H
+#define _SPARC64_INTR_QUEUE_H
+
+/* Sun4v interrupt queue registers, accessed via ASI_QUEUE. */
+
+#define INTRQ_CPU_MONDO_HEAD 0x3c0 /* CPU mondo head */
+#define INTRQ_CPU_MONDO_TAIL 0x3c8 /* CPU mondo tail */
+#define INTRQ_DEVICE_MONDO_HEAD 0x3d0 /* Device mondo head */
+#define INTRQ_DEVICE_MONDO_TAIL 0x3d8 /* Device mondo tail */
+#define INTRQ_RESUM_MONDO_HEAD 0x3e0 /* Resumable error mondo head */
+#define INTRQ_RESUM_MONDO_TAIL 0x3e8 /* Resumable error mondo tail */
+#define INTRQ_NONRESUM_MONDO_HEAD 0x3f0 /* Non-resumable error mondo head */
+#define INTRQ_NONRESUM_MONDO_TAIL 0x3f8 /* Non-resumable error mondo head */
+
+#endif /* !(_SPARC64_INTR_QUEUE_H) */
diff --git a/include/asm-sparc64/irq.h b/include/asm-sparc64/irq.h
index 8b70edcb80dc..de33d6e1afb5 100644
--- a/include/asm-sparc64/irq.h
+++ b/include/asm-sparc64/irq.h
@@ -72,8 +72,11 @@ struct ino_bucket {
#define IMAP_VALID 0x80000000 /* IRQ Enabled */
#define IMAP_TID_UPA 0x7c000000 /* UPA TargetID */
#define IMAP_TID_JBUS 0x7c000000 /* JBUS TargetID */
+#define IMAP_TID_SHIFT 26
#define IMAP_AID_SAFARI 0x7c000000 /* Safari AgentID */
+#define IMAP_AID_SHIFT 26
#define IMAP_NID_SAFARI 0x03e00000 /* Safari NodeID */
+#define IMAP_NID_SHIFT 21
#define IMAP_IGN 0x000007c0 /* IRQ Group Number */
#define IMAP_INO 0x0000003f /* IRQ Number */
#define IMAP_INR 0x000007ff /* Full interrupt number*/
@@ -111,6 +114,7 @@ extern void disable_irq(unsigned int);
#define disable_irq_nosync disable_irq
extern void enable_irq(unsigned int);
extern unsigned int build_irq(int pil, int inofixup, unsigned long iclr, unsigned long imap);
+extern unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino, int pil, unsigned char flags);
extern unsigned int sbus_build_irq(void *sbus, unsigned int ino);
static __inline__ void set_softint(unsigned long bits)
diff --git a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h
index 8627eed6e83d..230ba678d3b0 100644
--- a/include/asm-sparc64/mmu.h
+++ b/include/asm-sparc64/mmu.h
@@ -4,20 +4,9 @@
#include <linux/config.h>
#include <asm/page.h>
#include <asm/const.h>
+#include <asm/hypervisor.h>
-/*
- * For the 8k pagesize kernel, use only 10 hw context bits to optimize some
- * shifts in the fast tlbmiss handlers, instead of all 13 bits (specifically
- * for vpte offset calculation). For other pagesizes, this optimization in
- * the tlbhandlers can not be done; but still, all 13 bits can not be used
- * because the tlb handlers use "andcc" instruction which sign extends 13
- * bit arguments.
- */
-#if PAGE_SHIFT == 13
-#define CTX_NR_BITS 10
-#else
-#define CTX_NR_BITS 12
-#endif
+#define CTX_NR_BITS 13
#define TAG_CONTEXT_BITS ((_AC(1,UL) << CTX_NR_BITS) - _AC(1,UL))
@@ -90,8 +79,27 @@
#ifndef __ASSEMBLY__
+#define TSB_ENTRY_ALIGNMENT 16
+
+struct tsb {
+ unsigned long tag;
+ unsigned long pte;
+} __attribute__((aligned(TSB_ENTRY_ALIGNMENT)));
+
+extern void __tsb_insert(unsigned long ent, unsigned long tag, unsigned long pte);
+extern void tsb_flush(unsigned long ent, unsigned long tag);
+extern void tsb_init(struct tsb *tsb, unsigned long size);
+
typedef struct {
- unsigned long sparc64_ctx_val;
+ spinlock_t lock;
+ unsigned long sparc64_ctx_val;
+ struct tsb *tsb;
+ unsigned long tsb_rss_limit;
+ unsigned long tsb_nentries;
+ unsigned long tsb_reg_val;
+ unsigned long tsb_map_vaddr;
+ unsigned long tsb_map_pte;
+ struct hv_tsb_descr tsb_descr;
} mm_context_t;
#endif /* !__ASSEMBLY__ */
diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h
index 57ee7b306189..e7974321d052 100644
--- a/include/asm-sparc64/mmu_context.h
+++ b/include/asm-sparc64/mmu_context.h
@@ -19,96 +19,98 @@ extern unsigned long tlb_context_cache;
extern unsigned long mmu_context_bmap[];
extern void get_new_mmu_context(struct mm_struct *mm);
+#ifdef CONFIG_SMP
+extern void smp_new_mmu_context_version(void);
+#else
+#define smp_new_mmu_context_version() do { } while (0)
+#endif
+
+extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+extern void destroy_context(struct mm_struct *mm);
+
+extern void __tsb_context_switch(unsigned long pgd_pa,
+ unsigned long tsb_reg,
+ unsigned long tsb_vaddr,
+ unsigned long tsb_pte,
+ unsigned long tsb_descr_pa);
+
+static inline void tsb_context_switch(struct mm_struct *mm)
+{
+ __tsb_context_switch(__pa(mm->pgd), mm->context.tsb_reg_val,
+ mm->context.tsb_map_vaddr,
+ mm->context.tsb_map_pte,
+ __pa(&mm->context.tsb_descr));
+}
-/* Initialize a new mmu context. This is invoked when a new
- * address space instance (unique or shared) is instantiated.
- * This just needs to set mm->context to an invalid context.
- */
-#define init_new_context(__tsk, __mm) \
- (((__mm)->context.sparc64_ctx_val = 0UL), 0)
-
-/* Destroy a dead context. This occurs when mmput drops the
- * mm_users count to zero, the mmaps have been released, and
- * all the page tables have been flushed. Our job is to destroy
- * any remaining processor-specific state, and in the sparc64
- * case this just means freeing up the mmu context ID held by
- * this task if valid.
- */
-#define destroy_context(__mm) \
-do { spin_lock(&ctx_alloc_lock); \
- if (CTX_VALID((__mm)->context)) { \
- unsigned long nr = CTX_NRBITS((__mm)->context); \
- mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); \
- } \
- spin_unlock(&ctx_alloc_lock); \
-} while(0)
-
-/* Reload the two core values used by TLB miss handler
- * processing on sparc64. They are:
- * 1) The physical address of mm->pgd, when full page
- * table walks are necessary, this is where the
- * search begins.
- * 2) A "PGD cache". For 32-bit tasks only pgd[0] is
- * ever used since that maps the entire low 4GB
- * completely. To speed up TLB miss processing we
- * make this value available to the handlers. This
- * decreases the amount of memory traffic incurred.
- */
-#define reload_tlbmiss_state(__tsk, __mm) \
-do { \
- register unsigned long paddr asm("o5"); \
- register unsigned long pgd_cache asm("o4"); \
- paddr = __pa((__mm)->pgd); \
- pgd_cache = 0UL; \
- if (task_thread_info(__tsk)->flags & _TIF_32BIT) \
- pgd_cache = get_pgd_cache((__mm)->pgd); \
- __asm__ __volatile__("wrpr %%g0, 0x494, %%pstate\n\t" \
- "mov %3, %%g4\n\t" \
- "mov %0, %%g7\n\t" \
- "stxa %1, [%%g4] %2\n\t" \
- "membar #Sync\n\t" \
- "wrpr %%g0, 0x096, %%pstate" \
- : /* no outputs */ \
- : "r" (paddr), "r" (pgd_cache),\
- "i" (ASI_DMMU), "i" (TSB_REG)); \
-} while(0)
+extern void tsb_grow(struct mm_struct *mm, unsigned long mm_rss);
+#ifdef CONFIG_SMP
+extern void smp_tsb_sync(struct mm_struct *mm);
+#else
+#define smp_tsb_sync(__mm) do { } while (0)
+#endif
/* Set MMU context in the actual hardware. */
#define load_secondary_context(__mm) \
- __asm__ __volatile__("stxa %0, [%1] %2\n\t" \
- "flush %%g6" \
- : /* No outputs */ \
- : "r" (CTX_HWBITS((__mm)->context)), \
- "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU))
+ __asm__ __volatile__( \
+ "\n661: stxa %0, [%1] %2\n" \
+ " .section .sun4v_1insn_patch, \"ax\"\n" \
+ " .word 661b\n" \
+ " stxa %0, [%1] %3\n" \
+ " .previous\n" \
+ " flush %%g6\n" \
+ : /* No outputs */ \
+ : "r" (CTX_HWBITS((__mm)->context)), \
+ "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU), "i" (ASI_MMU))
extern void __flush_tlb_mm(unsigned long, unsigned long);
-/* Switch the current MM context. */
+/* Switch the current MM context. Interrupts are disabled. */
static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk)
{
- unsigned long ctx_valid;
+ unsigned long ctx_valid, flags;
int cpu;
- /* Note: page_table_lock is used here to serialize switch_mm
- * and activate_mm, and their calls to get_new_mmu_context.
- * This use of page_table_lock is unrelated to its other uses.
- */
- spin_lock(&mm->page_table_lock);
+ spin_lock_irqsave(&mm->context.lock, flags);
ctx_valid = CTX_VALID(mm->context);
if (!ctx_valid)
get_new_mmu_context(mm);
- spin_unlock(&mm->page_table_lock);
- if (!ctx_valid || (old_mm != mm)) {
- load_secondary_context(mm);
- reload_tlbmiss_state(tsk, mm);
- }
+ /* We have to be extremely careful here or else we will miss
+ * a TSB grow if we switch back and forth between a kernel
+ * thread and an address space which has it's TSB size increased
+ * on another processor.
+ *
+ * It is possible to play some games in order to optimize the
+ * switch, but the safest thing to do is to unconditionally
+ * perform the secondary context load and the TSB context switch.
+ *
+ * For reference the bad case is, for address space "A":
+ *
+ * CPU 0 CPU 1
+ * run address space A
+ * set cpu0's bits in cpu_vm_mask
+ * switch to kernel thread, borrow
+ * address space A via entry_lazy_tlb
+ * run address space A
+ * set cpu1's bit in cpu_vm_mask
+ * flush_tlb_pending()
+ * reset cpu_vm_mask to just cpu1
+ * TSB grow
+ * run address space A
+ * context was valid, so skip
+ * TSB context switch
+ *
+ * At that point cpu0 continues to use a stale TSB, the one from
+ * before the TSB grow performed on cpu1. cpu1 did not cross-call
+ * cpu0 to update it's TSB because at that point the cpu_vm_mask
+ * only had cpu1 set in it.
+ */
+ load_secondary_context(mm);
+ tsb_context_switch(mm);
- /* Even if (mm == old_mm) we _must_ check
- * the cpu_vm_mask. If we do not we could
- * corrupt the TLB state because of how
- * smp_flush_tlb_{page,range,mm} on sparc64
- * and lazy tlb switches work. -DaveM
+ /* Any time a processor runs a context on an address space
+ * for the first time, we must flush that context out of the
+ * local TLB.
*/
cpu = smp_processor_id();
if (!ctx_valid || !cpu_isset(cpu, mm->cpu_vm_mask)) {
@@ -116,6 +118,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
__flush_tlb_mm(CTX_HWBITS(mm->context),
SECONDARY_CONTEXT);
}
+ spin_unlock_irqrestore(&mm->context.lock, flags);
}
#define deactivate_mm(tsk,mm) do { } while (0)
@@ -123,23 +126,20 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
/* Activate a new MM instance for the current task. */
static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm)
{
+ unsigned long flags;
int cpu;
- /* Note: page_table_lock is used here to serialize switch_mm
- * and activate_mm, and their calls to get_new_mmu_context.
- * This use of page_table_lock is unrelated to its other uses.
- */
- spin_lock(&mm->page_table_lock);
+ spin_lock_irqsave(&mm->context.lock, flags);
if (!CTX_VALID(mm->context))
get_new_mmu_context(mm);
cpu = smp_processor_id();
if (!cpu_isset(cpu, mm->cpu_vm_mask))
cpu_set(cpu, mm->cpu_vm_mask);
- spin_unlock(&mm->page_table_lock);
load_secondary_context(mm);
__flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
- reload_tlbmiss_state(current, mm);
+ tsb_context_switch(mm);
+ spin_unlock_irqrestore(&mm->context.lock, flags);
}
#endif /* !(__ASSEMBLY__) */
diff --git a/include/asm-sparc64/numnodes.h b/include/asm-sparc64/numnodes.h
new file mode 100644
index 000000000000..017e7e74f5e7
--- /dev/null
+++ b/include/asm-sparc64/numnodes.h
@@ -0,0 +1,6 @@
+#ifndef _SPARC64_NUMNODES_H
+#define _SPARC64_NUMNODES_H
+
+#define NODES_SHIFT 0
+
+#endif /* !(_SPARC64_NUMNODES_H) */
diff --git a/include/asm-sparc64/oplib.h b/include/asm-sparc64/oplib.h
index 3c59b2693fb9..c754676e13ef 100644
--- a/include/asm-sparc64/oplib.h
+++ b/include/asm-sparc64/oplib.h
@@ -12,18 +12,8 @@
#include <linux/config.h>
#include <asm/openprom.h>
-/* Enumeration to describe the prom major version we have detected. */
-enum prom_major_version {
- PROM_V0, /* Original sun4c V0 prom */
- PROM_V2, /* sun4c and early sun4m V2 prom */
- PROM_V3, /* sun4m and later, up to sun4d/sun4e machines V3 */
- PROM_P1275, /* IEEE compliant ISA based Sun PROM, only sun4u */
- PROM_AP1000, /* actually no prom at all */
-};
-
-extern enum prom_major_version prom_vers;
-/* Revision, and firmware revision. */
-extern unsigned int prom_rev, prom_prev;
+/* OBP version string. */
+extern char prom_version[];
/* Root node of the prom device tree, this stays constant after
* initialization is complete.
@@ -39,6 +29,9 @@ extern int prom_stdin, prom_stdout;
extern int prom_chosen_node;
/* Helper values and strings in arch/sparc64/kernel/head.S */
+extern const char prom_peer_name[];
+extern const char prom_compatible_name[];
+extern const char prom_root_compatible[];
extern const char prom_finddev_name[];
extern const char prom_chosen_path[];
extern const char prom_getprop_name[];
@@ -130,15 +123,6 @@ extern void prom_setcallback(callback_func_t func_ptr);
*/
extern unsigned char prom_get_idprom(char *idp_buffer, int idpbuf_size);
-/* Get the prom major version. */
-extern int prom_version(void);
-
-/* Get the prom plugin revision. */
-extern int prom_getrev(void);
-
-/* Get the prom firmware revision. */
-extern int prom_getprev(void);
-
/* Character operations to/from the console.... */
/* Non-blocking get character from console. */
@@ -164,6 +148,7 @@ enum prom_input_device {
PROMDEV_ITTYA, /* input from ttya */
PROMDEV_ITTYB, /* input from ttyb */
PROMDEV_IRSC, /* input from rsc */
+ PROMDEV_IVCONS, /* input from virtual-console */
PROMDEV_I_UNK,
};
@@ -176,6 +161,7 @@ enum prom_output_device {
PROMDEV_OTTYA, /* to ttya */
PROMDEV_OTTYB, /* to ttyb */
PROMDEV_ORSC, /* to rsc */
+ PROMDEV_OVCONS, /* to virtual-console */
PROMDEV_O_UNK,
};
@@ -183,10 +169,18 @@ extern enum prom_output_device prom_query_output_device(void);
/* Multiprocessor operations... */
#ifdef CONFIG_SMP
-/* Start the CPU with the given device tree node, context table, and context
- * at the passed program counter.
+/* Start the CPU with the given device tree node at the passed program
+ * counter with the given arg passed in via register %o0.
+ */
+extern void prom_startcpu(int cpunode, unsigned long pc, unsigned long arg);
+
+/* Start the CPU with the given cpu ID at the passed program
+ * counter with the given arg passed in via register %o0.
*/
-extern void prom_startcpu(int cpunode, unsigned long pc, unsigned long o0);
+extern void prom_startcpu_cpuid(int cpuid, unsigned long pc, unsigned long arg);
+
+/* Stop the CPU with the given cpu ID. */
+extern void prom_stopcpu_cpuid(int cpuid);
/* Stop the current CPU. */
extern void prom_stopself(void);
@@ -335,6 +329,7 @@ int cpu_find_by_mid(int mid, int *prom_node);
/* Client interface level routines. */
extern void prom_set_trap_table(unsigned long tba);
+extern void prom_set_trap_table_sun4v(unsigned long tba, unsigned long mmfsa);
extern long p1275_cmd(const char *, long, ...);
diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h
index 5426bb28a993..fcb2812265f4 100644
--- a/include/asm-sparc64/page.h
+++ b/include/asm-sparc64/page.h
@@ -104,10 +104,12 @@ typedef unsigned long pgprot_t;
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define ARCH_HAS_SETCLEAR_HUGE_PTE
#define ARCH_HAS_HUGETLB_PREFAULT_HOOK
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#endif
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
- (_AC(0x0000000070000000,UL)) : (PAGE_OFFSET))
+ (_AC(0x0000000070000000,UL)) : \
+ (_AC(0xfffff80000000000,UL) + (1UL << 32UL)))
#endif /* !(__ASSEMBLY__) */
@@ -124,17 +126,10 @@ typedef unsigned long pgprot_t;
#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET)
#define __va(x) ((void *)((unsigned long) (x) + PAGE_OFFSET))
-/* PFNs are real physical page numbers. However, mem_map only begins to record
- * per-page information starting at pfn_base. This is to handle systems where
- * the first physical page in the machine is at some huge physical address,
- * such as 4GB. This is common on a partitioned E10000, for example.
- */
-extern struct page *pfn_to_page(unsigned long pfn);
-extern unsigned long page_to_pfn(struct page *);
+#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr)>>PAGE_SHIFT)
-#define pfn_valid(pfn) (((pfn)-(pfn_base)) < max_mapnr)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#define virt_to_phys __pa
diff --git a/include/asm-sparc64/pbm.h b/include/asm-sparc64/pbm.h
index dd35a2c7798a..1396f110939a 100644
--- a/include/asm-sparc64/pbm.h
+++ b/include/asm-sparc64/pbm.h
@@ -139,6 +139,9 @@ struct pci_pbm_info {
/* Opaque 32-bit system bus Port ID. */
u32 portid;
+ /* Opaque 32-bit handle used for hypervisor calls. */
+ u32 devhandle;
+
/* Chipset version information. */
int chip_type;
#define PBM_CHIP_TYPE_SABRE 1
diff --git a/include/asm-sparc64/pci.h b/include/asm-sparc64/pci.h
index 89bd71b1c0d8..7c5a589ea437 100644
--- a/include/asm-sparc64/pci.h
+++ b/include/asm-sparc64/pci.h
@@ -41,10 +41,26 @@ static inline void pcibios_penalize_isa_irq(int irq, int active)
struct pci_dev;
+struct pci_iommu_ops {
+ void *(*alloc_consistent)(struct pci_dev *, size_t, dma_addr_t *);
+ void (*free_consistent)(struct pci_dev *, size_t, void *, dma_addr_t);
+ dma_addr_t (*map_single)(struct pci_dev *, void *, size_t, int);
+ void (*unmap_single)(struct pci_dev *, dma_addr_t, size_t, int);
+ int (*map_sg)(struct pci_dev *, struct scatterlist *, int, int);
+ void (*unmap_sg)(struct pci_dev *, struct scatterlist *, int, int);
+ void (*dma_sync_single_for_cpu)(struct pci_dev *, dma_addr_t, size_t, int);
+ void (*dma_sync_sg_for_cpu)(struct pci_dev *, struct scatterlist *, int, int);
+};
+
+extern struct pci_iommu_ops *pci_iommu_ops;
+
/* Allocate and map kernel buffer using consistent mode DMA for a device.
* hwdev should be valid struct pci_dev pointer for PCI devices.
*/
-extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle);
+static inline void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle)
+{
+ return pci_iommu_ops->alloc_consistent(hwdev, size, dma_handle);
+}
/* Free and unmap a consistent DMA buffer.
* cpu_addr is what was returned from pci_alloc_consistent,
@@ -54,7 +70,10 @@ extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t
* References to the memory and mappings associated with cpu_addr/dma_addr
* past this call are illegal.
*/
-extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle);
+static inline void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle)
+{
+ return pci_iommu_ops->free_consistent(hwdev, size, vaddr, dma_handle);
+}
/* Map a single buffer of the indicated size for DMA in streaming mode.
* The 32-bit bus address to use is returned.
@@ -62,7 +81,10 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr,
* Once the device is given the dma address, the device owns this memory
* until either pci_unmap_single or pci_dma_sync_single_for_cpu is performed.
*/
-extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction);
+static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction)
+{
+ return pci_iommu_ops->map_single(hwdev, ptr, size, direction);
+}
/* Unmap a single streaming mode DMA translation. The dma_addr and size
* must match what was provided for in a previous pci_map_single call. All
@@ -71,7 +93,10 @@ extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
* After this call, reads by the cpu to the buffer are guaranteed to see
* whatever the device wrote there.
*/
-extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction);
+static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction)
+{
+ pci_iommu_ops->unmap_single(hwdev, dma_addr, size, direction);
+}
/* No highmem on sparc64, plus we have an IOMMU, so mapping pages is easy. */
#define pci_map_page(dev, page, off, size, dir) \
@@ -107,15 +132,19 @@ extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t
* Device ownership issues as mentioned above for pci_map_single are
* the same here.
*/
-extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
- int nents, int direction);
+static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+{
+ return pci_iommu_ops->map_sg(hwdev, sg, nents, direction);
+}
/* Unmap a set of streaming mode DMA translations.
* Again, cpu read rules concerning calls here are the same as for
* pci_unmap_single() above.
*/
-extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
- int nhwents, int direction);
+static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nhwents, int direction)
+{
+ pci_iommu_ops->unmap_sg(hwdev, sg, nhwents, direction);
+}
/* Make physical memory consistent for a single
* streaming mode DMA translation after a transfer.
@@ -127,8 +156,10 @@ extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
* must first perform a pci_dma_sync_for_device, and then the
* device again owns the buffer.
*/
-extern void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle,
- size_t size, int direction);
+static inline void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction)
+{
+ pci_iommu_ops->dma_sync_single_for_cpu(hwdev, dma_handle, size, direction);
+}
static inline void
pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle,
@@ -144,7 +175,10 @@ pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle,
* The same as pci_dma_sync_single_* but for a scatter-gather list,
* same rules and usage.
*/
-extern void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction);
+static inline void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+ pci_iommu_ops->dma_sync_sg_for_cpu(hwdev, sg, nelems, direction);
+}
static inline void
pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg,
diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
index a96067cca963..12e4a273bd43 100644
--- a/include/asm-sparc64/pgalloc.h
+++ b/include/asm-sparc64/pgalloc.h
@@ -6,6 +6,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/slab.h>
#include <asm/spitfire.h>
#include <asm/cpudata.h>
@@ -13,172 +14,59 @@
#include <asm/page.h>
/* Page table allocation/freeing. */
-#ifdef CONFIG_SMP
-/* Sliiiicck */
-#define pgt_quicklists local_cpu_data()
-#else
-extern struct pgtable_cache_struct {
- unsigned long *pgd_cache;
- unsigned long *pte_cache[2];
- unsigned int pgcache_size;
-} pgt_quicklists;
-#endif
-#define pgd_quicklist (pgt_quicklists.pgd_cache)
-#define pmd_quicklist ((unsigned long *)0)
-#define pte_quicklist (pgt_quicklists.pte_cache)
-#define pgtable_cache_size (pgt_quicklists.pgcache_size)
+extern kmem_cache_t *pgtable_cache;
-static __inline__ void free_pgd_fast(pgd_t *pgd)
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- preempt_disable();
- *(unsigned long *)pgd = (unsigned long) pgd_quicklist;
- pgd_quicklist = (unsigned long *) pgd;
- pgtable_cache_size++;
- preempt_enable();
+ return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
}
-static __inline__ pgd_t *get_pgd_fast(void)
+static inline void pgd_free(pgd_t *pgd)
{
- unsigned long *ret;
-
- preempt_disable();
- if((ret = pgd_quicklist) != NULL) {
- pgd_quicklist = (unsigned long *)(*ret);
- ret[0] = 0;
- pgtable_cache_size--;
- preempt_enable();
- } else {
- preempt_enable();
- ret = (unsigned long *) __get_free_page(GFP_KERNEL|__GFP_REPEAT);
- if(ret)
- memset(ret, 0, PAGE_SIZE);
- }
- return (pgd_t *)ret;
+ kmem_cache_free(pgtable_cache, pgd);
}
-static __inline__ void free_pgd_slow(pgd_t *pgd)
-{
- free_page((unsigned long)pgd);
-}
-
-#ifdef DCACHE_ALIASING_POSSIBLE
-#define VPTE_COLOR(address) (((address) >> (PAGE_SHIFT + 10)) & 1UL)
-#define DCACHE_COLOR(address) (((address) >> PAGE_SHIFT) & 1UL)
-#else
-#define VPTE_COLOR(address) 0
-#define DCACHE_COLOR(address) 0
-#endif
-
#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD)
-static __inline__ pmd_t *pmd_alloc_one_fast(struct mm_struct *mm, unsigned long address)
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- unsigned long *ret;
- int color = 0;
-
- preempt_disable();
- if (pte_quicklist[color] == NULL)
- color = 1;
-
- if((ret = (unsigned long *)pte_quicklist[color]) != NULL) {
- pte_quicklist[color] = (unsigned long *)(*ret);
- ret[0] = 0;
- pgtable_cache_size--;
- }
- preempt_enable();
-
- return (pmd_t *)ret;
+ return kmem_cache_alloc(pgtable_cache,
+ GFP_KERNEL|__GFP_REPEAT);
}
-static __inline__ pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline void pmd_free(pmd_t *pmd)
{
- pmd_t *pmd;
-
- pmd = pmd_alloc_one_fast(mm, address);
- if (!pmd) {
- pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
- if (pmd)
- memset(pmd, 0, PAGE_SIZE);
- }
- return pmd;
+ kmem_cache_free(pgtable_cache, pmd);
}
-static __inline__ void free_pmd_fast(pmd_t *pmd)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long address)
{
- unsigned long color = DCACHE_COLOR((unsigned long)pmd);
-
- preempt_disable();
- *(unsigned long *)pmd = (unsigned long) pte_quicklist[color];
- pte_quicklist[color] = (unsigned long *) pmd;
- pgtable_cache_size++;
- preempt_enable();
+ return kmem_cache_alloc(pgtable_cache,
+ GFP_KERNEL|__GFP_REPEAT);
}
-static __inline__ void free_pmd_slow(pmd_t *pmd)
+static inline struct page *pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
{
- free_page((unsigned long)pmd);
+ return virt_to_page(pte_alloc_one_kernel(mm, address));
}
-
-#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
-#define pmd_populate(MM,PMD,PTE_PAGE) \
- pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE))
-
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
-
-static inline struct page *
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- pte_t *pte = pte_alloc_one_kernel(mm, addr);
-
- if (pte)
- return virt_to_page(pte);
-
- return NULL;
-}
-
-static __inline__ pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address)
-{
- unsigned long color = VPTE_COLOR(address);
- unsigned long *ret;
-
- preempt_disable();
- if((ret = (unsigned long *)pte_quicklist[color]) != NULL) {
- pte_quicklist[color] = (unsigned long *)(*ret);
- ret[0] = 0;
- pgtable_cache_size--;
- }
- preempt_enable();
- return (pte_t *)ret;
-}
-
-static __inline__ void free_pte_fast(pte_t *pte)
-{
- unsigned long color = DCACHE_COLOR((unsigned long)pte);
-
- preempt_disable();
- *(unsigned long *)pte = (unsigned long) pte_quicklist[color];
- pte_quicklist[color] = (unsigned long *) pte;
- pgtable_cache_size++;
- preempt_enable();
-}
-
-static __inline__ void free_pte_slow(pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
+
static inline void pte_free_kernel(pte_t *pte)
{
- free_pte_fast(pte);
+ kmem_cache_free(pgtable_cache, pte);
}
static inline void pte_free(struct page *ptepage)
{
- free_pte_fast(page_address(ptepage));
+ pte_free_kernel(page_address(ptepage));
}
-#define pmd_free(pmd) free_pmd_fast(pmd)
-#define pgd_free(pgd) free_pgd_fast(pgd)
-#define pgd_alloc(mm) get_pgd_fast()
+
+#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
+#define pmd_populate(MM,PMD,PTE_PAGE) \
+ pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE))
+
+#define check_pgt_cache() do { } while (0)
#endif /* _SPARC64_PGALLOC_H */
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index f0a9b44d3eb5..ed4124edf837 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -25,7 +25,8 @@
#include <asm/const.h>
/* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 32MB).
- * The page copy blockops can use 0x2000000 to 0x10000000.
+ * The page copy blockops can use 0x2000000 to 0x4000000.
+ * The TSB is mapped in the 0x4000000 to 0x6000000 range.
* The PROM resides in an area spanning 0xf0000000 to 0x100000000.
* The vmalloc area spans 0x100000000 to 0x200000000.
* Since modules need to be in the lowest 32-bits of the address space,
@@ -34,6 +35,7 @@
* 0x400000000.
*/
#define TLBTEMP_BASE _AC(0x0000000002000000,UL)
+#define TSBMAP_BASE _AC(0x0000000004000000,UL)
#define MODULES_VADDR _AC(0x0000000010000000,UL)
#define MODULES_LEN _AC(0x00000000e0000000,UL)
#define MODULES_END _AC(0x00000000f0000000,UL)
@@ -88,162 +90,538 @@
#endif /* !(__ASSEMBLY__) */
-/* Spitfire/Cheetah TTE bits. */
-#define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */
-#define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit up to date*/
-#define _PAGE_SZ4MB _AC(0x6000000000000000,UL) /* 4MB Page */
-#define _PAGE_SZ512K _AC(0x4000000000000000,UL) /* 512K Page */
-#define _PAGE_SZ64K _AC(0x2000000000000000,UL) /* 64K Page */
-#define _PAGE_SZ8K _AC(0x0000000000000000,UL) /* 8K Page */
-#define _PAGE_NFO _AC(0x1000000000000000,UL) /* No Fault Only */
-#define _PAGE_IE _AC(0x0800000000000000,UL) /* Invert Endianness */
-#define _PAGE_SOFT2 _AC(0x07FC000000000000,UL) /* Software bits, set 2 */
-#define _PAGE_RES1 _AC(0x0002000000000000,UL) /* Reserved */
-#define _PAGE_SZ32MB _AC(0x0001000000000000,UL) /* (Panther) 32MB page */
-#define _PAGE_SZ256MB _AC(0x2001000000000000,UL) /* (Panther) 256MB page */
-#define _PAGE_SN _AC(0x0000800000000000,UL) /* (Cheetah) Snoop */
-#define _PAGE_RES2 _AC(0x0000780000000000,UL) /* Reserved */
-#define _PAGE_PADDR_SF _AC(0x000001FFFFFFE000,UL) /* (Spitfire) paddr[40:13]*/
-#define _PAGE_PADDR _AC(0x000007FFFFFFE000,UL) /* (Cheetah) paddr[42:13] */
-#define _PAGE_SOFT _AC(0x0000000000001F80,UL) /* Software bits */
-#define _PAGE_L _AC(0x0000000000000040,UL) /* Locked TTE */
-#define _PAGE_CP _AC(0x0000000000000020,UL) /* Cacheable in P-Cache */
-#define _PAGE_CV _AC(0x0000000000000010,UL) /* Cacheable in V-Cache */
-#define _PAGE_E _AC(0x0000000000000008,UL) /* side-Effect */
-#define _PAGE_P _AC(0x0000000000000004,UL) /* Privileged Page */
-#define _PAGE_W _AC(0x0000000000000002,UL) /* Writable */
-#define _PAGE_G _AC(0x0000000000000001,UL) /* Global */
-
-/* Here are the SpitFire software bits we use in the TTE's.
- *
- * WARNING: If you are going to try and start using some
- * of the soft2 bits, you will need to make
- * modifications to the swap entry implementation.
- * For example, one thing that could happen is that
- * swp_entry_to_pte() would BUG_ON() if you tried
- * to use one of the soft2 bits for _PAGE_FILE.
- *
- * Like other architectures, I have aliased _PAGE_FILE with
- * _PAGE_MODIFIED. This works because _PAGE_FILE is never
- * interpreted that way unless _PAGE_PRESENT is clear.
- */
-#define _PAGE_EXEC _AC(0x0000000000001000,UL) /* Executable SW bit */
-#define _PAGE_MODIFIED _AC(0x0000000000000800,UL) /* Modified (dirty) */
-#define _PAGE_FILE _AC(0x0000000000000800,UL) /* Pagecache page */
-#define _PAGE_ACCESSED _AC(0x0000000000000400,UL) /* Accessed (ref'd) */
-#define _PAGE_READ _AC(0x0000000000000200,UL) /* Readable SW Bit */
-#define _PAGE_WRITE _AC(0x0000000000000100,UL) /* Writable SW Bit */
-#define _PAGE_PRESENT _AC(0x0000000000000080,UL) /* Present */
+/* PTE bits which are the same in SUN4U and SUN4V format. */
+#define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */
+#define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/
+
+/* SUN4U pte bits... */
+#define _PAGE_SZ4MB_4U _AC(0x6000000000000000,UL) /* 4MB Page */
+#define _PAGE_SZ512K_4U _AC(0x4000000000000000,UL) /* 512K Page */
+#define _PAGE_SZ64K_4U _AC(0x2000000000000000,UL) /* 64K Page */
+#define _PAGE_SZ8K_4U _AC(0x0000000000000000,UL) /* 8K Page */
+#define _PAGE_NFO_4U _AC(0x1000000000000000,UL) /* No Fault Only */
+#define _PAGE_IE_4U _AC(0x0800000000000000,UL) /* Invert Endianness */
+#define _PAGE_SOFT2_4U _AC(0x07FC000000000000,UL) /* Software bits, set 2 */
+#define _PAGE_RES1_4U _AC(0x0002000000000000,UL) /* Reserved */
+#define _PAGE_SZ32MB_4U _AC(0x0001000000000000,UL) /* (Panther) 32MB page */
+#define _PAGE_SZ256MB_4U _AC(0x2001000000000000,UL) /* (Panther) 256MB page */
+#define _PAGE_SN_4U _AC(0x0000800000000000,UL) /* (Cheetah) Snoop */
+#define _PAGE_RES2_4U _AC(0x0000780000000000,UL) /* Reserved */
+#define _PAGE_PADDR_4U _AC(0x000007FFFFFFE000,UL) /* (Cheetah) pa[42:13] */
+#define _PAGE_SOFT_4U _AC(0x0000000000001F80,UL) /* Software bits: */
+#define _PAGE_EXEC_4U _AC(0x0000000000001000,UL) /* Executable SW bit */
+#define _PAGE_MODIFIED_4U _AC(0x0000000000000800,UL) /* Modified (dirty) */
+#define _PAGE_FILE_4U _AC(0x0000000000000800,UL) /* Pagecache page */
+#define _PAGE_ACCESSED_4U _AC(0x0000000000000400,UL) /* Accessed (ref'd) */
+#define _PAGE_READ_4U _AC(0x0000000000000200,UL) /* Readable SW Bit */
+#define _PAGE_WRITE_4U _AC(0x0000000000000100,UL) /* Writable SW Bit */
+#define _PAGE_PRESENT_4U _AC(0x0000000000000080,UL) /* Present */
+#define _PAGE_L_4U _AC(0x0000000000000040,UL) /* Locked TTE */
+#define _PAGE_CP_4U _AC(0x0000000000000020,UL) /* Cacheable in P-Cache */
+#define _PAGE_CV_4U _AC(0x0000000000000010,UL) /* Cacheable in V-Cache */
+#define _PAGE_E_4U _AC(0x0000000000000008,UL) /* side-Effect */
+#define _PAGE_P_4U _AC(0x0000000000000004,UL) /* Privileged Page */
+#define _PAGE_W_4U _AC(0x0000000000000002,UL) /* Writable */
+
+/* SUN4V pte bits... */
+#define _PAGE_NFO_4V _AC(0x4000000000000000,UL) /* No Fault Only */
+#define _PAGE_SOFT2_4V _AC(0x3F00000000000000,UL) /* Software bits, set 2 */
+#define _PAGE_MODIFIED_4V _AC(0x2000000000000000,UL) /* Modified (dirty) */
+#define _PAGE_ACCESSED_4V _AC(0x1000000000000000,UL) /* Accessed (ref'd) */
+#define _PAGE_READ_4V _AC(0x0800000000000000,UL) /* Readable SW Bit */
+#define _PAGE_WRITE_4V _AC(0x0400000000000000,UL) /* Writable SW Bit */
+#define _PAGE_PADDR_4V _AC(0x00FFFFFFFFFFE000,UL) /* paddr[55:13] */
+#define _PAGE_IE_4V _AC(0x0000000000001000,UL) /* Invert Endianness */
+#define _PAGE_E_4V _AC(0x0000000000000800,UL) /* side-Effect */
+#define _PAGE_CP_4V _AC(0x0000000000000400,UL) /* Cacheable in P-Cache */
+#define _PAGE_CV_4V _AC(0x0000000000000200,UL) /* Cacheable in V-Cache */
+#define _PAGE_P_4V _AC(0x0000000000000100,UL) /* Privileged Page */
+#define _PAGE_EXEC_4V _AC(0x0000000000000080,UL) /* Executable Page */
+#define _PAGE_W_4V _AC(0x0000000000000040,UL) /* Writable */
+#define _PAGE_SOFT_4V _AC(0x0000000000000030,UL) /* Software bits */
+#define _PAGE_FILE_4V _AC(0x0000000000000020,UL) /* Pagecache page */
+#define _PAGE_PRESENT_4V _AC(0x0000000000000010,UL) /* Present */
+#define _PAGE_RESV_4V _AC(0x0000000000000008,UL) /* Reserved */
+#define _PAGE_SZ16GB_4V _AC(0x0000000000000007,UL) /* 16GB Page */
+#define _PAGE_SZ2GB_4V _AC(0x0000000000000006,UL) /* 2GB Page */
+#define _PAGE_SZ256MB_4V _AC(0x0000000000000005,UL) /* 256MB Page */
+#define _PAGE_SZ32MB_4V _AC(0x0000000000000004,UL) /* 32MB Page */
+#define _PAGE_SZ4MB_4V _AC(0x0000000000000003,UL) /* 4MB Page */
+#define _PAGE_SZ512K_4V _AC(0x0000000000000002,UL) /* 512K Page */
+#define _PAGE_SZ64K_4V _AC(0x0000000000000001,UL) /* 64K Page */
+#define _PAGE_SZ8K_4V _AC(0x0000000000000000,UL) /* 8K Page */
#if PAGE_SHIFT == 13
-#define _PAGE_SZBITS _PAGE_SZ8K
+#define _PAGE_SZBITS_4U _PAGE_SZ8K_4U
+#define _PAGE_SZBITS_4V _PAGE_SZ8K_4V
#elif PAGE_SHIFT == 16
-#define _PAGE_SZBITS _PAGE_SZ64K
+#define _PAGE_SZBITS_4U _PAGE_SZ64K_4U
+#define _PAGE_SZBITS_4V _PAGE_SZ64K_4V
#elif PAGE_SHIFT == 19
-#define _PAGE_SZBITS _PAGE_SZ512K
+#define _PAGE_SZBITS_4U _PAGE_SZ512K_4U
+#define _PAGE_SZBITS_4V _PAGE_SZ512K_4V
#elif PAGE_SHIFT == 22
-#define _PAGE_SZBITS _PAGE_SZ4MB
+#define _PAGE_SZBITS_4U _PAGE_SZ4MB_4U
+#define _PAGE_SZBITS_4V _PAGE_SZ4MB_4V
#else
#error Wrong PAGE_SHIFT specified
#endif
#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
-#define _PAGE_SZHUGE _PAGE_SZ4MB
+#define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U
+#define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
-#define _PAGE_SZHUGE _PAGE_SZ512K
+#define _PAGE_SZHUGE_4U _PAGE_SZ512K_4U
+#define _PAGE_SZHUGE_4V _PAGE_SZ512K_4V
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
-#define _PAGE_SZHUGE _PAGE_SZ64K
+#define _PAGE_SZHUGE_4U _PAGE_SZ64K_4U
+#define _PAGE_SZHUGE_4V _PAGE_SZ64K_4V
#endif
-#define _PAGE_CACHE (_PAGE_CP | _PAGE_CV)
+/* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */
+#define __P000 __pgprot(0)
+#define __P001 __pgprot(0)
+#define __P010 __pgprot(0)
+#define __P011 __pgprot(0)
+#define __P100 __pgprot(0)
+#define __P101 __pgprot(0)
+#define __P110 __pgprot(0)
+#define __P111 __pgprot(0)
+
+#define __S000 __pgprot(0)
+#define __S001 __pgprot(0)
+#define __S010 __pgprot(0)
+#define __S011 __pgprot(0)
+#define __S100 __pgprot(0)
+#define __S101 __pgprot(0)
+#define __S110 __pgprot(0)
+#define __S111 __pgprot(0)
-#define __DIRTY_BITS (_PAGE_MODIFIED | _PAGE_WRITE | _PAGE_W)
-#define __ACCESS_BITS (_PAGE_ACCESSED | _PAGE_READ | _PAGE_R)
-#define __PRIV_BITS _PAGE_P
+#ifndef __ASSEMBLY__
-#define PAGE_NONE __pgprot (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_CACHE)
+extern pte_t mk_pte_io(unsigned long, pgprot_t, int, unsigned long);
-/* Don't set the TTE _PAGE_W bit here, else the dirty bit never gets set. */
-#define PAGE_SHARED __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \
- __ACCESS_BITS | _PAGE_WRITE | _PAGE_EXEC)
+extern unsigned long pte_sz_bits(unsigned long size);
-#define PAGE_COPY __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \
- __ACCESS_BITS | _PAGE_EXEC)
+extern pgprot_t PAGE_KERNEL;
+extern pgprot_t PAGE_KERNEL_LOCKED;
+extern pgprot_t PAGE_COPY;
+extern pgprot_t PAGE_SHARED;
-#define PAGE_READONLY __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \
- __ACCESS_BITS | _PAGE_EXEC)
+/* XXX This uglyness is for the atyfb driver's sparc mmap() support. XXX */
+extern unsigned long _PAGE_IE;
+extern unsigned long _PAGE_E;
+extern unsigned long _PAGE_CACHE;
-#define PAGE_KERNEL __pgprot (_PAGE_PRESENT | _PAGE_VALID | _PAGE_CACHE | \
- __PRIV_BITS | \
- __ACCESS_BITS | __DIRTY_BITS | _PAGE_EXEC)
+extern unsigned long pg_iobits;
+extern unsigned long _PAGE_ALL_SZ_BITS;
+extern unsigned long _PAGE_SZBITS;
-#define PAGE_SHARED_NOEXEC __pgprot (_PAGE_PRESENT | _PAGE_VALID | \
- _PAGE_CACHE | \
- __ACCESS_BITS | _PAGE_WRITE)
+extern struct page *mem_map_zero;
+#define ZERO_PAGE(vaddr) (mem_map_zero)
-#define PAGE_COPY_NOEXEC __pgprot (_PAGE_PRESENT | _PAGE_VALID | \
- _PAGE_CACHE | __ACCESS_BITS)
+/* PFNs are real physical page numbers. However, mem_map only begins to record
+ * per-page information starting at pfn_base. This is to handle systems where
+ * the first physical page in the machine is at some huge physical address,
+ * such as 4GB. This is common on a partitioned E10000, for example.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
+{
+ unsigned long paddr = pfn << PAGE_SHIFT;
+ unsigned long sz_bits;
+
+ sz_bits = 0UL;
+ if (_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL) {
+ __asm__ __volatile__(
+ "\n661: sethi %uhi(%1), %0\n"
+ " sllx %0, 32, %0\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " mov %2, %0\n"
+ " nop\n"
+ " .previous\n"
+ : "=r" (sz_bits)
+ : "i" (_PAGE_SZBITS_4U), "i" (_PAGE_SZBITS_4V));
+ }
+ return __pte(paddr | sz_bits | pgprot_val(prot));
+}
+#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
-#define PAGE_READONLY_NOEXEC __pgprot (_PAGE_PRESENT | _PAGE_VALID | \
- _PAGE_CACHE | __ACCESS_BITS)
+/* This one can be done with two shifts. */
+static inline unsigned long pte_pfn(pte_t pte)
+{
+ unsigned long ret;
+
+ __asm__ __volatile__(
+ "\n661: sllx %1, %2, %0\n"
+ " srlx %0, %3, %0\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sllx %1, %4, %0\n"
+ " srlx %0, %5, %0\n"
+ " .previous\n"
+ : "=r" (ret)
+ : "r" (pte_val(pte)),
+ "i" (21), "i" (21 + PAGE_SHIFT),
+ "i" (8), "i" (8 + PAGE_SHIFT));
+
+ return ret;
+}
+#define pte_page(x) pfn_to_page(pte_pfn(x))
-#define _PFN_MASK _PAGE_PADDR
+static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
+{
+ unsigned long mask, tmp;
+
+ /* SUN4U: 0x600307ffffffecb8 (negated == 0x9ffcf80000001347)
+ * SUN4V: 0x30ffffffffffee17 (negated == 0xcf000000000011e8)
+ *
+ * Even if we use negation tricks the result is still a 6
+ * instruction sequence, so don't try to play fancy and just
+ * do the most straightforward implementation.
+ *
+ * Note: We encode this into 3 sun4v 2-insn patch sequences.
+ */
-#define pg_iobits (_PAGE_VALID | _PAGE_PRESENT | __DIRTY_BITS | \
- __ACCESS_BITS | _PAGE_E)
+ __asm__ __volatile__(
+ "\n661: sethi %%uhi(%2), %1\n"
+ " sethi %%hi(%2), %0\n"
+ "\n662: or %1, %%ulo(%2), %1\n"
+ " or %0, %%lo(%2), %0\n"
+ "\n663: sllx %1, 32, %1\n"
+ " or %0, %1, %0\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%3), %1\n"
+ " sethi %%hi(%3), %0\n"
+ " .word 662b\n"
+ " or %1, %%ulo(%3), %1\n"
+ " or %0, %%lo(%3), %0\n"
+ " .word 663b\n"
+ " sllx %1, 32, %1\n"
+ " or %0, %1, %0\n"
+ " .previous\n"
+ : "=r" (mask), "=r" (tmp)
+ : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U |
+ _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U |
+ _PAGE_SZBITS_4U),
+ "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
+ _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V |
+ _PAGE_SZBITS_4V));
+
+ return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask));
+}
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY_NOEXEC
-#define __P010 PAGE_COPY_NOEXEC
-#define __P011 PAGE_COPY_NOEXEC
-#define __P100 PAGE_READONLY
-#define __P101 PAGE_READONLY
-#define __P110 PAGE_COPY
-#define __P111 PAGE_COPY
+static inline pte_t pgoff_to_pte(unsigned long off)
+{
+ off <<= PAGE_SHIFT;
+
+ __asm__ __volatile__(
+ "\n661: or %0, %2, %0\n"
+ " .section .sun4v_1insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " or %0, %3, %0\n"
+ " .previous\n"
+ : "=r" (off)
+ : "0" (off), "i" (_PAGE_FILE_4U), "i" (_PAGE_FILE_4V));
+
+ return __pte(off);
+}
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY_NOEXEC
-#define __S010 PAGE_SHARED_NOEXEC
-#define __S011 PAGE_SHARED_NOEXEC
-#define __S100 PAGE_READONLY
-#define __S101 PAGE_READONLY
-#define __S110 PAGE_SHARED
-#define __S111 PAGE_SHARED
+static inline pgprot_t pgprot_noncached(pgprot_t prot)
+{
+ unsigned long val = pgprot_val(prot);
+
+ __asm__ __volatile__(
+ "\n661: andn %0, %2, %0\n"
+ " or %0, %3, %0\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " andn %0, %4, %0\n"
+ " or %0, %3, %0\n"
+ " .previous\n"
+ : "=r" (val)
+ : "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U),
+ "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V));
+
+ return __pgprot(val);
+}
+/* Various pieces of code check for platform support by ifdef testing
+ * on "pgprot_noncached". That's broken and should be fixed, but for
+ * now...
+ */
+#define pgprot_noncached pgprot_noncached
-#ifndef __ASSEMBLY__
+#ifdef CONFIG_HUGETLB_PAGE
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: sethi %%uhi(%1), %0\n"
+ " sllx %0, 32, %0\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " mov %2, %0\n"
+ " nop\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_SZHUGE_4U), "i" (_PAGE_SZHUGE_4V));
+
+ return __pte(pte_val(pte) | mask);
+}
+#endif
-extern unsigned long phys_base;
-extern unsigned long pfn_base;
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+ unsigned long val = pte_val(pte), tmp;
+
+ __asm__ __volatile__(
+ "\n661: or %0, %3, %0\n"
+ " nop\n"
+ "\n662: nop\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%4), %1\n"
+ " sllx %1, 32, %1\n"
+ " .word 662b\n"
+ " or %1, %%lo(%4), %1\n"
+ " or %0, %1, %0\n"
+ " .previous\n"
+ : "=r" (val), "=r" (tmp)
+ : "0" (val), "i" (_PAGE_MODIFIED_4U | _PAGE_W_4U),
+ "i" (_PAGE_MODIFIED_4V | _PAGE_W_4V));
+
+ return __pte(val);
+}
-extern struct page *mem_map_zero;
-#define ZERO_PAGE(vaddr) (mem_map_zero)
+static inline pte_t pte_mkclean(pte_t pte)
+{
+ unsigned long val = pte_val(pte), tmp;
+
+ __asm__ __volatile__(
+ "\n661: andn %0, %3, %0\n"
+ " nop\n"
+ "\n662: nop\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%4), %1\n"
+ " sllx %1, 32, %1\n"
+ " .word 662b\n"
+ " or %1, %%lo(%4), %1\n"
+ " andn %0, %1, %0\n"
+ " .previous\n"
+ : "=r" (val), "=r" (tmp)
+ : "0" (val), "i" (_PAGE_MODIFIED_4U | _PAGE_W_4U),
+ "i" (_PAGE_MODIFIED_4V | _PAGE_W_4V));
+
+ return __pte(val);
+}
-/* PFNs are real physical page numbers. However, mem_map only begins to record
- * per-page information starting at pfn_base. This is to handle systems where
- * the first physical page in the machine is at some huge physical address,
- * such as 4GB. This is common on a partitioned E10000, for example.
- */
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+ unsigned long val = pte_val(pte), mask;
+
+ __asm__ __volatile__(
+ "\n661: mov %1, %0\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%2), %0\n"
+ " sllx %0, 32, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_WRITE_4U), "i" (_PAGE_WRITE_4V));
+
+ return __pte(val | mask);
+}
-#define pfn_pte(pfn, prot) \
- __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot) | _PAGE_SZBITS)
-#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ unsigned long val = pte_val(pte), tmp;
+
+ __asm__ __volatile__(
+ "\n661: andn %0, %3, %0\n"
+ " nop\n"
+ "\n662: nop\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%4), %1\n"
+ " sllx %1, 32, %1\n"
+ " .word 662b\n"
+ " or %1, %%lo(%4), %1\n"
+ " andn %0, %1, %0\n"
+ " .previous\n"
+ : "=r" (val), "=r" (tmp)
+ : "0" (val), "i" (_PAGE_WRITE_4U | _PAGE_W_4U),
+ "i" (_PAGE_WRITE_4V | _PAGE_W_4V));
+
+ return __pte(val);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: mov %1, %0\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%2), %0\n"
+ " sllx %0, 32, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_ACCESSED_4U), "i" (_PAGE_ACCESSED_4V));
+
+ mask |= _PAGE_R;
+
+ return __pte(pte_val(pte) & ~mask);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: mov %1, %0\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%2), %0\n"
+ " sllx %0, 32, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_ACCESSED_4U), "i" (_PAGE_ACCESSED_4V));
+
+ mask |= _PAGE_R;
+
+ return __pte(pte_val(pte) | mask);
+}
-#define pte_pfn(x) ((pte_val(x) & _PAGE_PADDR)>>PAGE_SHIFT)
-#define pte_page(x) pfn_to_page(pte_pfn(x))
+static inline unsigned long pte_young(pte_t pte)
+{
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: mov %1, %0\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%2), %0\n"
+ " sllx %0, 32, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_ACCESSED_4U), "i" (_PAGE_ACCESSED_4V));
+
+ return (pte_val(pte) & mask);
+}
+
+static inline unsigned long pte_dirty(pte_t pte)
+{
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: mov %1, %0\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%2), %0\n"
+ " sllx %0, 32, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_MODIFIED_4U), "i" (_PAGE_MODIFIED_4V));
+
+ return (pte_val(pte) & mask);
+}
-static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
+static inline unsigned long pte_write(pte_t pte)
{
- pte_t __pte;
- const unsigned long preserve_mask = (_PFN_MASK |
- _PAGE_MODIFIED | _PAGE_ACCESSED |
- _PAGE_CACHE | _PAGE_E |
- _PAGE_PRESENT | _PAGE_SZBITS);
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: mov %1, %0\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%2), %0\n"
+ " sllx %0, 32, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_WRITE_4U), "i" (_PAGE_WRITE_4V));
+
+ return (pte_val(pte) & mask);
+}
- pte_val(__pte) = (pte_val(orig_pte) & preserve_mask) |
- (pgprot_val(new_prot) & ~preserve_mask);
+static inline unsigned long pte_exec(pte_t pte)
+{
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: sethi %%hi(%1), %0\n"
+ " .section .sun4v_1insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " mov %2, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_EXEC_4U), "i" (_PAGE_EXEC_4V));
+
+ return (pte_val(pte) & mask);
+}
- return __pte;
+static inline unsigned long pte_read(pte_t pte)
+{
+ unsigned long mask;
+
+ __asm__ __volatile__(
+ "\n661: mov %1, %0\n"
+ " nop\n"
+ " .section .sun4v_2insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " sethi %%uhi(%2), %0\n"
+ " sllx %0, 32, %0\n"
+ " .previous\n"
+ : "=r" (mask)
+ : "i" (_PAGE_READ_4U), "i" (_PAGE_READ_4V));
+
+ return (pte_val(pte) & mask);
}
+
+static inline unsigned long pte_file(pte_t pte)
+{
+ unsigned long val = pte_val(pte);
+
+ __asm__ __volatile__(
+ "\n661: and %0, %2, %0\n"
+ " .section .sun4v_1insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " and %0, %3, %0\n"
+ " .previous\n"
+ : "=r" (val)
+ : "0" (val), "i" (_PAGE_FILE_4U), "i" (_PAGE_FILE_4V));
+
+ return val;
+}
+
+static inline unsigned long pte_present(pte_t pte)
+{
+ unsigned long val = pte_val(pte);
+
+ __asm__ __volatile__(
+ "\n661: and %0, %2, %0\n"
+ " .section .sun4v_1insn_patch, \"ax\"\n"
+ " .word 661b\n"
+ " and %0, %3, %0\n"
+ " .previous\n"
+ : "=r" (val)
+ : "0" (val), "i" (_PAGE_PRESENT_4U), "i" (_PAGE_PRESENT_4V));
+
+ return val;
+}
+
#define pmd_set(pmdp, ptep) \
(pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> 11UL))
#define pud_set(pudp, pmdp) \
@@ -253,8 +631,6 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd))
#define pud_page(pud) \
((unsigned long) __va((((unsigned long)pud_val(pud))<<11UL)))
-#define pte_none(pte) (!pte_val(pte))
-#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT)
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) (0)
#define pmd_present(pmd) (pmd_val(pmd) != 0U)
@@ -264,30 +640,8 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
#define pud_present(pud) (pud_val(pud) != 0U)
#define pud_clear(pudp) (pud_val(*(pudp)) = 0U)
-/* The following only work if pte_present() is true.
- * Undefined behaviour if not..
- */
-#define pte_read(pte) (pte_val(pte) & _PAGE_READ)
-#define pte_exec(pte) (pte_val(pte) & _PAGE_EXEC)
-#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE)
-#define pte_dirty(pte) (pte_val(pte) & _PAGE_MODIFIED)
-#define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED)
-#define pte_wrprotect(pte) (__pte(pte_val(pte) & ~(_PAGE_WRITE|_PAGE_W)))
-#define pte_rdprotect(pte) \
- (__pte(((pte_val(pte)<<1UL)>>1UL) & ~_PAGE_READ))
-#define pte_mkclean(pte) \
- (__pte(pte_val(pte) & ~(_PAGE_MODIFIED|_PAGE_W)))
-#define pte_mkold(pte) \
- (__pte(((pte_val(pte)<<1UL)>>1UL) & ~_PAGE_ACCESSED))
-
-/* Permanent address of a page. */
-#define __page_address(page) page_address(page)
-
-/* Be very careful when you change these three, they are delicate. */
-#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_ACCESSED | _PAGE_R))
-#define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_WRITE))
-#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_MODIFIED | _PAGE_W))
-#define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_SZHUGE))
+/* Same in both SUN4V and SUN4U. */
+#define pte_none(pte) (!pte_val(pte))
/* to find an entry in a page-table-directory. */
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
@@ -296,11 +650,6 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-/* extract the pgd cache used for optimizing the tlb miss
- * slow path when executing 32-bit compat processes
- */
-#define get_pgd_cache(pgd) ((unsigned long) pgd_val(*pgd) << 11)
-
/* Find an entry in the second-level page table.. */
#define pmd_offset(pudp, address) \
((pmd_t *) pud_page(*(pudp)) + \
@@ -327,6 +676,9 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *p
/* It is more efficient to let flush_tlb_kernel_range()
* handle init_mm tlb flushes.
+ *
+ * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U
+ * and SUN4V pte layout, so this inline test is fine.
*/
if (likely(mm != &init_mm) && (pte_val(orig) & _PAGE_VALID))
tlb_batch_add(mm, addr, ptep, orig);
@@ -361,42 +713,23 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
/* File offset in PTE support. */
-#define pte_file(pte) (pte_val(pte) & _PAGE_FILE)
+extern unsigned long pte_file(pte_t);
#define pte_to_pgoff(pte) (pte_val(pte) >> PAGE_SHIFT)
-#define pgoff_to_pte(off) (__pte(((off) << PAGE_SHIFT) | _PAGE_FILE))
+extern pte_t pgoff_to_pte(unsigned long);
#define PTE_FILE_MAX_BITS (64UL - PAGE_SHIFT - 1UL)
extern unsigned long prom_virt_to_phys(unsigned long, int *);
-static __inline__ unsigned long
-sun4u_get_pte (unsigned long addr)
-{
- pgd_t *pgdp;
- pud_t *pudp;
- pmd_t *pmdp;
- pte_t *ptep;
-
- if (addr >= PAGE_OFFSET)
- return addr & _PAGE_PADDR;
- if ((addr >= LOW_OBP_ADDRESS) && (addr < HI_OBP_ADDRESS))
- return prom_virt_to_phys(addr, NULL);
- pgdp = pgd_offset_k(addr);
- pudp = pud_offset(pgdp, addr);
- pmdp = pmd_offset(pudp, addr);
- ptep = pte_offset_kernel(pmdp, addr);
- return pte_val(*ptep) & _PAGE_PADDR;
-}
+extern unsigned long sun4u_get_pte(unsigned long);
-static __inline__ unsigned long
-__get_phys (unsigned long addr)
+static inline unsigned long __get_phys(unsigned long addr)
{
- return sun4u_get_pte (addr);
+ return sun4u_get_pte(addr);
}
-static __inline__ int
-__get_iospace (unsigned long addr)
+static inline int __get_iospace(unsigned long addr)
{
- return ((sun4u_get_pte (addr) & 0xf0000000) >> 28);
+ return ((sun4u_get_pte(addr) & 0xf0000000) >> 28);
}
extern unsigned long *sparc64_valid_addr_bitmap;
@@ -409,11 +742,6 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
unsigned long pfn,
unsigned long size, pgprot_t prot);
-/* Clear virtual and physical cachability, set side-effect bit. */
-#define pgprot_noncached(prot) \
- (__pgprot((pgprot_val(prot) & ~(_PAGE_CP | _PAGE_CV)) | \
- _PAGE_E))
-
/*
* For sparc32&64, the pfn in io_remap_pfn_range() carries <iospace> in
* its high 4 bits. These macros/functions put it there or get it from there.
@@ -424,8 +752,11 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
#include <asm-generic/pgtable.h>
-/* We provide our own get_unmapped_area to cope with VA holes for userland */
+/* We provide our own get_unmapped_area to cope with VA holes and
+ * SHM area cache aliasing for userland.
+ */
#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
/* We provide a special get_unmapped_area for framebuffer mmaps to try and use
* the largest alignment possible such that larget PTEs can be used.
@@ -435,12 +766,9 @@ extern unsigned long get_fb_unmapped_area(struct file *filp, unsigned long,
unsigned long);
#define HAVE_ARCH_FB_UNMAPPED_AREA
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init() do { } while (0)
-
-extern void check_pgt_cache(void);
+extern void pgtable_cache_init(void);
+extern void sun4v_register_fault_status(void);
+extern void sun4v_ktsb_register(void);
#endif /* !(__ASSEMBLY__) */
diff --git a/include/asm-sparc64/pil.h b/include/asm-sparc64/pil.h
index 8f87750c3517..79f827eb3f5d 100644
--- a/include/asm-sparc64/pil.h
+++ b/include/asm-sparc64/pil.h
@@ -16,11 +16,13 @@
#define PIL_SMP_CALL_FUNC 1
#define PIL_SMP_RECEIVE_SIGNAL 2
#define PIL_SMP_CAPTURE 3
+#define PIL_SMP_CTX_NEW_VERSION 4
#ifndef __ASSEMBLY__
#define PIL_RESERVED(PIL) ((PIL) == PIL_SMP_CALL_FUNC || \
(PIL) == PIL_SMP_RECEIVE_SIGNAL || \
- (PIL) == PIL_SMP_CAPTURE)
+ (PIL) == PIL_SMP_CAPTURE || \
+ (PIL) == PIL_SMP_CTX_NEW_VERSION)
#endif
#endif /* !(_SPARC64_PIL_H) */
diff --git a/include/asm-sparc64/processor.h b/include/asm-sparc64/processor.h
index cd8d9b4c8658..c6896b88283e 100644
--- a/include/asm-sparc64/processor.h
+++ b/include/asm-sparc64/processor.h
@@ -28,6 +28,8 @@
* User lives in his very own context, and cannot reference us. Note
* that TASK_SIZE is a misnomer, it really gives maximum user virtual
* address that the kernel will allocate out.
+ *
+ * XXX No longer using virtual page tables, kill this upper limit...
*/
#define VA_BITS 44
#ifndef __ASSEMBLY__
@@ -37,18 +39,6 @@
#endif
#define TASK_SIZE ((unsigned long)-VPTE_SIZE)
-/*
- * The vpte base must be able to hold the entire vpte, half
- * of which lives above, and half below, the base. And it
- * is placed as close to the highest address range as possible.
- */
-#define VPTE_BASE_SPITFIRE (-(VPTE_SIZE/2))
-#if 1
-#define VPTE_BASE_CHEETAH VPTE_BASE_SPITFIRE
-#else
-#define VPTE_BASE_CHEETAH 0xffe0000000000000
-#endif
-
#ifndef __ASSEMBLY__
typedef struct {
@@ -101,7 +91,8 @@ extern unsigned long thread_saved_pc(struct task_struct *);
/* Do necessary setup to start up a newly executed thread. */
#define start_thread(regs, pc, sp) \
do { \
- regs->tstate = (regs->tstate & (TSTATE_CWP)) | (TSTATE_INITIAL_MM|TSTATE_IE) | (ASI_PNF << 24); \
+ unsigned long __asi = ASI_PNF; \
+ regs->tstate = (regs->tstate & (TSTATE_CWP)) | (TSTATE_INITIAL_MM|TSTATE_IE) | (__asi << 24UL); \
regs->tpc = ((pc & (~3)) - 4); \
regs->tnpc = regs->tpc + 4; \
regs->y = 0; \
@@ -138,10 +129,10 @@ do { \
#define start_thread32(regs, pc, sp) \
do { \
+ unsigned long __asi = ASI_PNF; \
pc &= 0x00000000ffffffffUL; \
sp &= 0x00000000ffffffffUL; \
-\
- regs->tstate = (regs->tstate & (TSTATE_CWP))|(TSTATE_INITIAL_MM|TSTATE_IE|TSTATE_AM); \
+ regs->tstate = (regs->tstate & (TSTATE_CWP))|(TSTATE_INITIAL_MM|TSTATE_IE|TSTATE_AM) | (__asi << 24UL); \
regs->tpc = ((pc & (~3)) - 4); \
regs->tnpc = regs->tpc + 4; \
regs->y = 0; \
@@ -226,6 +217,8 @@ static inline void prefetchw(const void *x)
#define spin_lock_prefetch(x) prefetchw(x)
+#define HAVE_ARCH_PICK_MMAP_LAYOUT
+
#endif /* !(__ASSEMBLY__) */
#endif /* !(__ASM_SPARC64_PROCESSOR_H) */
diff --git a/include/asm-sparc64/pstate.h b/include/asm-sparc64/pstate.h
index 29fb74aa805d..49a7924a89ab 100644
--- a/include/asm-sparc64/pstate.h
+++ b/include/asm-sparc64/pstate.h
@@ -28,11 +28,12 @@
/* The V9 TSTATE Register (with SpitFire and Linux extensions).
*
- * ---------------------------------------------------------------
- * | Resv | CCR | ASI | %pil | PSTATE | Resv | CWP |
- * ---------------------------------------------------------------
- * 63 40 39 32 31 24 23 20 19 8 7 5 4 0
+ * ---------------------------------------------------------------------
+ * | Resv | GL | CCR | ASI | %pil | PSTATE | Resv | CWP |
+ * ---------------------------------------------------------------------
+ * 63 43 42 40 39 32 31 24 23 20 19 8 7 5 4 0
*/
+#define TSTATE_GL _AC(0x0000070000000000,UL) /* Global reg level */
#define TSTATE_CCR _AC(0x000000ff00000000,UL) /* Condition Codes. */
#define TSTATE_XCC _AC(0x000000f000000000,UL) /* Condition Codes. */
#define TSTATE_XNEG _AC(0x0000008000000000,UL) /* %xcc Negative. */
diff --git a/include/asm-sparc64/scratchpad.h b/include/asm-sparc64/scratchpad.h
new file mode 100644
index 000000000000..5e8b01fb3343
--- /dev/null
+++ b/include/asm-sparc64/scratchpad.h
@@ -0,0 +1,14 @@
+#ifndef _SPARC64_SCRATCHPAD_H
+#define _SPARC64_SCRATCHPAD_H
+
+/* Sun4v scratchpad registers, accessed via ASI_SCRATCHPAD. */
+
+#define SCRATCHPAD_MMU_MISS 0x00 /* Shared with OBP - set by OBP */
+#define SCRATCHPAD_CPUID 0x08 /* Shared with OBP - set by hypervisor */
+#define SCRATCHPAD_UTSBREG1 0x10
+#define SCRATCHPAD_UTSBREG2 0x18
+ /* 0x20 and 0x28, hypervisor only... */
+#define SCRATCHPAD_UNUSED1 0x30
+#define SCRATCHPAD_UNUSED2 0x38 /* Reserved for OBP */
+
+#endif /* !(_SPARC64_SCRATCHPAD_H) */
diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h
index 473edb2603ec..89d86ecaab24 100644
--- a/include/asm-sparc64/smp.h
+++ b/include/asm-sparc64/smp.h
@@ -33,37 +33,13 @@
extern cpumask_t phys_cpu_present_map;
#define cpu_possible_map phys_cpu_present_map
+extern cpumask_t cpu_sibling_map[NR_CPUS];
+
/*
* General functions that each host system must provide.
*/
-static __inline__ int hard_smp_processor_id(void)
-{
- if (tlb_type == cheetah || tlb_type == cheetah_plus) {
- unsigned long cfg, ver;
- __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
- if ((ver >> 32) == 0x003e0016) {
- __asm__ __volatile__("ldxa [%%g0] %1, %0"
- : "=r" (cfg)
- : "i" (ASI_JBUS_CONFIG));
- return ((cfg >> 17) & 0x1f);
- } else {
- __asm__ __volatile__("ldxa [%%g0] %1, %0"
- : "=r" (cfg)
- : "i" (ASI_SAFARI_CONFIG));
- return ((cfg >> 17) & 0x3ff);
- }
- } else if (this_is_starfire != 0) {
- return starfire_hard_smp_processor_id();
- } else {
- unsigned long upaconfig;
- __asm__ __volatile__("ldxa [%%g0] %1, %0"
- : "=r" (upaconfig)
- : "i" (ASI_UPA_CONFIG));
- return ((upaconfig >> 17) & 0x1f);
- }
-}
-
+extern int hard_smp_processor_id(void);
#define raw_smp_processor_id() (current_thread_info()->cpu)
extern void smp_setup_cpu_possible_map(void);
diff --git a/include/asm-sparc64/sparsemem.h b/include/asm-sparc64/sparsemem.h
new file mode 100644
index 000000000000..ed5c9d8541e2
--- /dev/null
+++ b/include/asm-sparc64/sparsemem.h
@@ -0,0 +1,12 @@
+#ifndef _SPARC64_SPARSEMEM_H
+#define _SPARC64_SPARSEMEM_H
+
+#ifdef __KERNEL__
+
+#define SECTION_SIZE_BITS 26
+#define MAX_PHYSADDR_BITS 42
+#define MAX_PHYSMEM_BITS 42
+
+#endif /* !(__KERNEL__) */
+
+#endif /* !(_SPARC64_SPARSEMEM_H) */
diff --git a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h
index 962638c9d122..23ad8a7987ad 100644
--- a/include/asm-sparc64/spitfire.h
+++ b/include/asm-sparc64/spitfire.h
@@ -44,6 +44,7 @@ enum ultra_tlb_layout {
spitfire = 0,
cheetah = 1,
cheetah_plus = 2,
+ hypervisor = 3,
};
extern enum ultra_tlb_layout tlb_type;
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index af254e581834..a18ec87a52c1 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -209,9 +209,10 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \
/* so that ASI is only written if it changes, think again. */ \
__asm__ __volatile__("wr %%g0, %0, %%asi" \
: : "r" (__thread_flag_byte_ptr(task_thread_info(next))[TI_FLAG_BYTE_CURRENT_DS]));\
+ trap_block[current_thread_info()->cpu].thread = \
+ task_thread_info(next); \
__asm__ __volatile__( \
"mov %%g4, %%g7\n\t" \
- "wrpr %%g0, 0x95, %%pstate\n\t" \
"stx %%i6, [%%sp + 2047 + 0x70]\n\t" \
"stx %%i7, [%%sp + 2047 + 0x78]\n\t" \
"rdpr %%wstate, %%o5\n\t" \
@@ -225,14 +226,10 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \
"ldx [%%g6 + %3], %%o6\n\t" \
"ldub [%%g6 + %2], %%o5\n\t" \
"ldub [%%g6 + %4], %%o7\n\t" \
- "mov %%g6, %%l2\n\t" \
"wrpr %%o5, 0x0, %%wstate\n\t" \
"ldx [%%sp + 2047 + 0x70], %%i6\n\t" \
"ldx [%%sp + 2047 + 0x78], %%i7\n\t" \
- "wrpr %%g0, 0x94, %%pstate\n\t" \
- "mov %%l2, %%g6\n\t" \
"ldx [%%g6 + %6], %%g4\n\t" \
- "wrpr %%g0, 0x96, %%pstate\n\t" \
"brz,pt %%o7, 1f\n\t" \
" mov %%g7, %0\n\t" \
"b,a ret_from_syscall\n\t" \
diff --git a/include/asm-sparc64/thread_info.h b/include/asm-sparc64/thread_info.h
index ac9d068aab4f..2ebf7f27bf91 100644
--- a/include/asm-sparc64/thread_info.h
+++ b/include/asm-sparc64/thread_info.h
@@ -64,8 +64,6 @@ struct thread_info {
__u64 kernel_cntd0, kernel_cntd1;
__u64 pcr_reg;
- __u64 cee_stuff;
-
struct restart_block restart_block;
struct pt_regs *kern_una_regs;
@@ -104,10 +102,9 @@ struct thread_info {
#define TI_KERN_CNTD0 0x00000480
#define TI_KERN_CNTD1 0x00000488
#define TI_PCR 0x00000490
-#define TI_CEE_STUFF 0x00000498
-#define TI_RESTART_BLOCK 0x000004a0
-#define TI_KUNA_REGS 0x000004c8
-#define TI_KUNA_INSN 0x000004d0
+#define TI_RESTART_BLOCK 0x00000498
+#define TI_KUNA_REGS 0x000004c0
+#define TI_KUNA_INSN 0x000004c8
#define TI_FPREGS 0x00000500
/* We embed this in the uppermost byte of thread_info->flags */
diff --git a/include/asm-sparc64/timex.h b/include/asm-sparc64/timex.h
index 9e8d4175bcb2..2a5e4ebaad80 100644
--- a/include/asm-sparc64/timex.h
+++ b/include/asm-sparc64/timex.h
@@ -14,4 +14,10 @@
typedef unsigned long cycles_t;
#define get_cycles() tick_ops->get_tick()
+#define ARCH_HAS_READ_CURRENT_TIMER 1
+#define read_current_timer(timer_val_p) \
+({ *timer_val_p = tick_ops->get_tick(); \
+ 0; \
+})
+
#endif
diff --git a/include/asm-sparc64/tlbflush.h b/include/asm-sparc64/tlbflush.h
index 3ef9909ac3ac..9ad5d9c51d42 100644
--- a/include/asm-sparc64/tlbflush.h
+++ b/include/asm-sparc64/tlbflush.h
@@ -5,6 +5,11 @@
#include <linux/mm.h>
#include <asm/mmu_context.h>
+/* TSB flush operations. */
+struct mmu_gather;
+extern void flush_tsb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_tsb_user(struct mmu_gather *mp);
+
/* TLB flush operations. */
extern void flush_tlb_pending(void);
@@ -14,28 +19,36 @@ extern void flush_tlb_pending(void);
#define flush_tlb_page(vma,addr) flush_tlb_pending()
#define flush_tlb_mm(mm) flush_tlb_pending()
+/* Local cpu only. */
extern void __flush_tlb_all(void);
+
extern void __flush_tlb_page(unsigned long context, unsigned long page, unsigned long r);
extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end);
#ifndef CONFIG_SMP
-#define flush_tlb_all() __flush_tlb_all()
#define flush_tlb_kernel_range(start,end) \
- __flush_tlb_kernel_range(start,end)
+do { flush_tsb_kernel_range(start,end); \
+ __flush_tlb_kernel_range(start,end); \
+} while (0)
#else /* CONFIG_SMP */
-extern void smp_flush_tlb_all(void);
extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end);
-#define flush_tlb_all() smp_flush_tlb_all()
#define flush_tlb_kernel_range(start, end) \
- smp_flush_tlb_kernel_range(start, end)
+do { flush_tsb_kernel_range(start,end); \
+ smp_flush_tlb_kernel_range(start, end); \
+} while (0)
#endif /* ! CONFIG_SMP */
-extern void flush_tlb_pgtables(struct mm_struct *, unsigned long, unsigned long);
+static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ /* We don't use virtual page tables for TLB miss processing
+ * any more. Nowadays we use the TSB.
+ */
+}
#endif /* _SPARC64_TLBFLUSH_H */
diff --git a/include/asm-sparc64/tsb.h b/include/asm-sparc64/tsb.h
new file mode 100644
index 000000000000..e82612cd9f33
--- /dev/null
+++ b/include/asm-sparc64/tsb.h
@@ -0,0 +1,281 @@
+#ifndef _SPARC64_TSB_H
+#define _SPARC64_TSB_H
+
+/* The sparc64 TSB is similar to the powerpc hashtables. It's a
+ * power-of-2 sized table of TAG/PTE pairs. The cpu precomputes
+ * pointers into this table for 8K and 64K page sizes, and also a
+ * comparison TAG based upon the virtual address and context which
+ * faults.
+ *
+ * TLB miss trap handler software does the actual lookup via something
+ * of the form:
+ *
+ * ldxa [%g0] ASI_{D,I}MMU_TSB_8KB_PTR, %g1
+ * ldxa [%g0] ASI_{D,I}MMU, %g6
+ * sllx %g6, 22, %g6
+ * srlx %g6, 22, %g6
+ * ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4
+ * cmp %g4, %g6
+ * bne,pn %xcc, tsb_miss_{d,i}tlb
+ * mov FAULT_CODE_{D,I}TLB, %g3
+ * stxa %g5, [%g0] ASI_{D,I}TLB_DATA_IN
+ * retry
+ *
+ *
+ * Each 16-byte slot of the TSB is the 8-byte tag and then the 8-byte
+ * PTE. The TAG is of the same layout as the TLB TAG TARGET mmu
+ * register which is:
+ *
+ * -------------------------------------------------
+ * | - | CONTEXT | - | VADDR bits 63:22 |
+ * -------------------------------------------------
+ * 63 61 60 48 47 42 41 0
+ *
+ * But actually, since we use per-mm TSB's, we zero out the CONTEXT
+ * field.
+ *
+ * Like the powerpc hashtables we need to use locking in order to
+ * synchronize while we update the entries. PTE updates need locking
+ * as well.
+ *
+ * We need to carefully choose a lock bits for the TSB entry. We
+ * choose to use bit 47 in the tag. Also, since we never map anything
+ * at page zero in context zero, we use zero as an invalid tag entry.
+ * When the lock bit is set, this forces a tag comparison failure.
+ */
+
+#define TSB_TAG_LOCK_BIT 47
+#define TSB_TAG_LOCK_HIGH (1 << (TSB_TAG_LOCK_BIT - 32))
+
+#define TSB_TAG_INVALID_BIT 46
+#define TSB_TAG_INVALID_HIGH (1 << (TSB_TAG_INVALID_BIT - 32))
+
+#define TSB_MEMBAR membar #StoreStore
+
+/* Some cpus support physical address quad loads. We want to use
+ * those if possible so we don't need to hard-lock the TSB mapping
+ * into the TLB. We encode some instruction patching in order to
+ * support this.
+ *
+ * The kernel TSB is locked into the TLB by virtue of being in the
+ * kernel image, so we don't play these games for swapper_tsb access.
+ */
+#ifndef __ASSEMBLY__
+struct tsb_ldquad_phys_patch_entry {
+ unsigned int addr;
+ unsigned int sun4u_insn;
+ unsigned int sun4v_insn;
+};
+extern struct tsb_ldquad_phys_patch_entry __tsb_ldquad_phys_patch,
+ __tsb_ldquad_phys_patch_end;
+
+struct tsb_phys_patch_entry {
+ unsigned int addr;
+ unsigned int insn;
+};
+extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
+#endif
+#define TSB_LOAD_QUAD(TSB, REG) \
+661: ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG; \
+ .section .tsb_ldquad_phys_patch, "ax"; \
+ .word 661b; \
+ ldda [TSB] ASI_QUAD_LDD_PHYS, REG; \
+ ldda [TSB] ASI_QUAD_LDD_PHYS_4V, REG; \
+ .previous
+
+#define TSB_LOAD_TAG_HIGH(TSB, REG) \
+661: lduwa [TSB] ASI_N, REG; \
+ .section .tsb_phys_patch, "ax"; \
+ .word 661b; \
+ lduwa [TSB] ASI_PHYS_USE_EC, REG; \
+ .previous
+
+#define TSB_LOAD_TAG(TSB, REG) \
+661: ldxa [TSB] ASI_N, REG; \
+ .section .tsb_phys_patch, "ax"; \
+ .word 661b; \
+ ldxa [TSB] ASI_PHYS_USE_EC, REG; \
+ .previous
+
+#define TSB_CAS_TAG_HIGH(TSB, REG1, REG2) \
+661: casa [TSB] ASI_N, REG1, REG2; \
+ .section .tsb_phys_patch, "ax"; \
+ .word 661b; \
+ casa [TSB] ASI_PHYS_USE_EC, REG1, REG2; \
+ .previous
+
+#define TSB_CAS_TAG(TSB, REG1, REG2) \
+661: casxa [TSB] ASI_N, REG1, REG2; \
+ .section .tsb_phys_patch, "ax"; \
+ .word 661b; \
+ casxa [TSB] ASI_PHYS_USE_EC, REG1, REG2; \
+ .previous
+
+#define TSB_STORE(ADDR, VAL) \
+661: stxa VAL, [ADDR] ASI_N; \
+ .section .tsb_phys_patch, "ax"; \
+ .word 661b; \
+ stxa VAL, [ADDR] ASI_PHYS_USE_EC; \
+ .previous
+
+#define TSB_LOCK_TAG(TSB, REG1, REG2) \
+99: TSB_LOAD_TAG_HIGH(TSB, REG1); \
+ sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\
+ andcc REG1, REG2, %g0; \
+ bne,pn %icc, 99b; \
+ nop; \
+ TSB_CAS_TAG_HIGH(TSB, REG1, REG2); \
+ cmp REG1, REG2; \
+ bne,pn %icc, 99b; \
+ nop; \
+ TSB_MEMBAR
+
+#define TSB_WRITE(TSB, TTE, TAG) \
+ add TSB, 0x8, TSB; \
+ TSB_STORE(TSB, TTE); \
+ sub TSB, 0x8, TSB; \
+ TSB_MEMBAR; \
+ TSB_STORE(TSB, TAG);
+
+#define KTSB_LOAD_QUAD(TSB, REG) \
+ ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG;
+
+#define KTSB_STORE(ADDR, VAL) \
+ stxa VAL, [ADDR] ASI_N;
+
+#define KTSB_LOCK_TAG(TSB, REG1, REG2) \
+99: lduwa [TSB] ASI_N, REG1; \
+ sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\
+ andcc REG1, REG2, %g0; \
+ bne,pn %icc, 99b; \
+ nop; \
+ casa [TSB] ASI_N, REG1, REG2;\
+ cmp REG1, REG2; \
+ bne,pn %icc, 99b; \
+ nop; \
+ TSB_MEMBAR
+
+#define KTSB_WRITE(TSB, TTE, TAG) \
+ add TSB, 0x8, TSB; \
+ stxa TTE, [TSB] ASI_N; \
+ sub TSB, 0x8, TSB; \
+ TSB_MEMBAR; \
+ stxa TAG, [TSB] ASI_N;
+
+ /* Do a kernel page table walk. Leaves physical PTE pointer in
+ * REG1. Jumps to FAIL_LABEL on early page table walk termination.
+ * VADDR will not be clobbered, but REG2 will.
+ */
+#define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \
+ sethi %hi(swapper_pg_dir), REG1; \
+ or REG1, %lo(swapper_pg_dir), REG1; \
+ sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ andn REG2, 0x3, REG2; \
+ lduw [REG1 + REG2], REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x3, REG2; \
+ lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - PMD_SHIFT, REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x7, REG2; \
+ add REG1, REG2, REG1;
+
+ /* Do a user page table walk in MMU globals. Leaves physical PTE
+ * pointer in REG1. Jumps to FAIL_LABEL on early page table walk
+ * termination. Physical base of page tables is in PHYS_PGD which
+ * will not be modified.
+ *
+ * VADDR will not be clobbered, but REG1 and REG2 will.
+ */
+#define USER_PGTABLE_WALK_TL1(VADDR, PHYS_PGD, REG1, REG2, FAIL_LABEL) \
+ sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ andn REG2, 0x3, REG2; \
+ lduwa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x3, REG2; \
+ lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - PMD_SHIFT, REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x7, REG2; \
+ add REG1, REG2, REG1;
+
+/* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0.
+ * If no entry is found, FAIL_LABEL will be branched to. On success
+ * the resulting PTE value will be left in REG1. VADDR is preserved
+ * by this routine.
+ */
+#define OBP_TRANS_LOOKUP(VADDR, REG1, REG2, REG3, FAIL_LABEL) \
+ sethi %hi(prom_trans), REG1; \
+ or REG1, %lo(prom_trans), REG1; \
+97: ldx [REG1 + 0x00], REG2; \
+ brz,pn REG2, FAIL_LABEL; \
+ nop; \
+ ldx [REG1 + 0x08], REG3; \
+ add REG2, REG3, REG3; \
+ cmp REG2, VADDR; \
+ bgu,pt %xcc, 98f; \
+ cmp VADDR, REG3; \
+ bgeu,pt %xcc, 98f; \
+ ldx [REG1 + 0x10], REG3; \
+ sub VADDR, REG2, REG2; \
+ ba,pt %xcc, 99f; \
+ add REG3, REG2, REG1; \
+98: ba,pt %xcc, 97b; \
+ add REG1, (3 * 8), REG1; \
+99:
+
+ /* We use a 32K TSB for the whole kernel, this allows to
+ * handle about 16MB of modules and vmalloc mappings without
+ * incurring many hash conflicts.
+ */
+#define KERNEL_TSB_SIZE_BYTES (32 * 1024)
+#define KERNEL_TSB_NENTRIES \
+ (KERNEL_TSB_SIZE_BYTES / 16)
+#define KERNEL_TSB4M_NENTRIES 4096
+
+ /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL
+ * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries
+ * and the found TTE will be left in REG1. REG3 and REG4 must
+ * be an even/odd pair of registers.
+ *
+ * VADDR and TAG will be preserved and not clobbered by this macro.
+ */
+#define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
+ sethi %hi(swapper_tsb), REG1; \
+ or REG1, %lo(swapper_tsb), REG1; \
+ srlx VADDR, PAGE_SHIFT, REG2; \
+ and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \
+ sllx REG2, 4, REG2; \
+ add REG1, REG2, REG2; \
+ KTSB_LOAD_QUAD(REG2, REG3); \
+ cmp REG3, TAG; \
+ be,a,pt %xcc, OK_LABEL; \
+ mov REG4, REG1;
+
+ /* This version uses a trick, the TAG is already (VADDR >> 22) so
+ * we can make use of that for the index computation.
+ */
+#define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
+ sethi %hi(swapper_4m_tsb), REG1; \
+ or REG1, %lo(swapper_4m_tsb), REG1; \
+ and TAG, (KERNEL_TSB_NENTRIES - 1), REG2; \
+ sllx REG2, 4, REG2; \
+ add REG1, REG2, REG2; \
+ KTSB_LOAD_QUAD(REG2, REG3); \
+ cmp REG3, TAG; \
+ be,a,pt %xcc, OK_LABEL; \
+ mov REG4, REG1;
+
+#endif /* !(_SPARC64_TSB_H) */
diff --git a/include/asm-sparc64/ttable.h b/include/asm-sparc64/ttable.h
index 2784f80094c3..2d5e3c464df5 100644
--- a/include/asm-sparc64/ttable.h
+++ b/include/asm-sparc64/ttable.h
@@ -93,7 +93,7 @@
#define SYSCALL_TRAP(routine, systbl) \
sethi %hi(109f), %g7; \
- ba,pt %xcc, scetrap; \
+ ba,pt %xcc, etrap; \
109: or %g7, %lo(109b), %g7; \
sethi %hi(systbl), %l7; \
ba,pt %xcc, routine; \
@@ -109,14 +109,14 @@
nop;nop;nop;
#define TRAP_UTRAP(handler,lvl) \
- ldx [%g6 + TI_UTRAPS], %g1; \
- sethi %hi(109f), %g7; \
- brz,pn %g1, utrap; \
- or %g7, %lo(109f), %g7; \
- ba,pt %xcc, utrap; \
-109: ldx [%g1 + handler*8], %g1; \
- ba,pt %xcc, utrap_ill; \
- mov lvl, %o1;
+ mov handler, %g3; \
+ ba,pt %xcc, utrap_trap; \
+ mov lvl, %g4; \
+ nop; \
+ nop; \
+ nop; \
+ nop; \
+ nop;
#ifdef CONFIG_SUNOS_EMUL
#define SUNOS_SYSCALL_TRAP SYSCALL_TRAP(linux_sparc_syscall32, sunos_sys_table)
@@ -136,8 +136,6 @@
#else
#define SOLARIS_SYSCALL_TRAP TRAP(solaris_syscall)
#endif
-/* FIXME: Write these actually */
-#define NETBSD_SYSCALL_TRAP TRAP(netbsd_syscall)
#define BREAKPOINT_TRAP TRAP(breakpoint_trap)
#define TRAP_IRQ(routine, level) \
@@ -182,6 +180,26 @@
#define KPROBES_TRAP(lvl) TRAP_ARG(bad_trap, lvl)
#endif
+#define SUN4V_ITSB_MISS \
+ ldxa [%g0] ASI_SCRATCHPAD, %g2; \
+ ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4; \
+ ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5; \
+ srlx %g4, 22, %g6; \
+ ba,pt %xcc, sun4v_itsb_miss; \
+ nop; \
+ nop; \
+ nop;
+
+#define SUN4V_DTSB_MISS \
+ ldxa [%g0] ASI_SCRATCHPAD, %g2; \
+ ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4; \
+ ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5; \
+ srlx %g4, 22, %g6; \
+ ba,pt %xcc, sun4v_dtsb_miss; \
+ nop; \
+ nop; \
+ nop;
+
/* Before touching these macros, you owe it to yourself to go and
* see how arch/sparc64/kernel/winfixup.S works... -DaveM
*
@@ -221,6 +239,31 @@
saved; retry; nop; nop; nop; nop; nop; nop; \
nop; nop; nop; nop; nop; nop; nop; nop;
+#define SPILL_0_NORMAL_ETRAP \
+etrap_kernel_spill: \
+ stx %l0, [%sp + STACK_BIAS + 0x00]; \
+ stx %l1, [%sp + STACK_BIAS + 0x08]; \
+ stx %l2, [%sp + STACK_BIAS + 0x10]; \
+ stx %l3, [%sp + STACK_BIAS + 0x18]; \
+ stx %l4, [%sp + STACK_BIAS + 0x20]; \
+ stx %l5, [%sp + STACK_BIAS + 0x28]; \
+ stx %l6, [%sp + STACK_BIAS + 0x30]; \
+ stx %l7, [%sp + STACK_BIAS + 0x38]; \
+ stx %i0, [%sp + STACK_BIAS + 0x40]; \
+ stx %i1, [%sp + STACK_BIAS + 0x48]; \
+ stx %i2, [%sp + STACK_BIAS + 0x50]; \
+ stx %i3, [%sp + STACK_BIAS + 0x58]; \
+ stx %i4, [%sp + STACK_BIAS + 0x60]; \
+ stx %i5, [%sp + STACK_BIAS + 0x68]; \
+ stx %i6, [%sp + STACK_BIAS + 0x70]; \
+ stx %i7, [%sp + STACK_BIAS + 0x78]; \
+ saved; \
+ sub %g1, 2, %g1; \
+ ba,pt %xcc, etrap_save; \
+ wrpr %g1, %cwp; \
+ nop; nop; nop; nop; nop; nop; nop; nop; \
+ nop; nop; nop; nop;
+
/* Normal 64bit spill */
#define SPILL_1_GENERIC(ASI) \
add %sp, STACK_BIAS + 0x00, %g1; \
@@ -254,6 +297,67 @@
b,a,pt %xcc, spill_fixup_mna; \
b,a,pt %xcc, spill_fixup;
+#define SPILL_1_GENERIC_ETRAP \
+etrap_user_spill_64bit: \
+ stxa %l0, [%sp + STACK_BIAS + 0x00] %asi; \
+ stxa %l1, [%sp + STACK_BIAS + 0x08] %asi; \
+ stxa %l2, [%sp + STACK_BIAS + 0x10] %asi; \
+ stxa %l3, [%sp + STACK_BIAS + 0x18] %asi; \
+ stxa %l4, [%sp + STACK_BIAS + 0x20] %asi; \
+ stxa %l5, [%sp + STACK_BIAS + 0x28] %asi; \
+ stxa %l6, [%sp + STACK_BIAS + 0x30] %asi; \
+ stxa %l7, [%sp + STACK_BIAS + 0x38] %asi; \
+ stxa %i0, [%sp + STACK_BIAS + 0x40] %asi; \
+ stxa %i1, [%sp + STACK_BIAS + 0x48] %asi; \
+ stxa %i2, [%sp + STACK_BIAS + 0x50] %asi; \
+ stxa %i3, [%sp + STACK_BIAS + 0x58] %asi; \
+ stxa %i4, [%sp + STACK_BIAS + 0x60] %asi; \
+ stxa %i5, [%sp + STACK_BIAS + 0x68] %asi; \
+ stxa %i6, [%sp + STACK_BIAS + 0x70] %asi; \
+ stxa %i7, [%sp + STACK_BIAS + 0x78] %asi; \
+ saved; \
+ sub %g1, 2, %g1; \
+ ba,pt %xcc, etrap_save; \
+ wrpr %g1, %cwp; \
+ nop; nop; nop; nop; nop; \
+ nop; nop; nop; nop; \
+ ba,a,pt %xcc, etrap_spill_fixup_64bit; \
+ ba,a,pt %xcc, etrap_spill_fixup_64bit; \
+ ba,a,pt %xcc, etrap_spill_fixup_64bit;
+
+#define SPILL_1_GENERIC_ETRAP_FIXUP \
+etrap_spill_fixup_64bit: \
+ ldub [%g6 + TI_WSAVED], %g1; \
+ sll %g1, 3, %g3; \
+ add %g6, %g3, %g3; \
+ stx %sp, [%g3 + TI_RWIN_SPTRS]; \
+ sll %g1, 7, %g3; \
+ add %g6, %g3, %g3; \
+ stx %l0, [%g3 + TI_REG_WINDOW + 0x00]; \
+ stx %l1, [%g3 + TI_REG_WINDOW + 0x08]; \
+ stx %l2, [%g3 + TI_REG_WINDOW + 0x10]; \
+ stx %l3, [%g3 + TI_REG_WINDOW + 0x18]; \
+ stx %l4, [%g3 + TI_REG_WINDOW + 0x20]; \
+ stx %l5, [%g3 + TI_REG_WINDOW + 0x28]; \
+ stx %l6, [%g3 + TI_REG_WINDOW + 0x30]; \
+ stx %l7, [%g3 + TI_REG_WINDOW + 0x38]; \
+ stx %i0, [%g3 + TI_REG_WINDOW + 0x40]; \
+ stx %i1, [%g3 + TI_REG_WINDOW + 0x48]; \
+ stx %i2, [%g3 + TI_REG_WINDOW + 0x50]; \
+ stx %i3, [%g3 + TI_REG_WINDOW + 0x58]; \
+ stx %i4, [%g3 + TI_REG_WINDOW + 0x60]; \
+ stx %i5, [%g3 + TI_REG_WINDOW + 0x68]; \
+ stx %i6, [%g3 + TI_REG_WINDOW + 0x70]; \
+ stx %i7, [%g3 + TI_REG_WINDOW + 0x78]; \
+ add %g1, 1, %g1; \
+ stb %g1, [%g6 + TI_WSAVED]; \
+ saved; \
+ rdpr %cwp, %g1; \
+ sub %g1, 2, %g1; \
+ ba,pt %xcc, etrap_save; \
+ wrpr %g1, %cwp; \
+ nop; nop; nop
+
/* Normal 32bit spill */
#define SPILL_2_GENERIC(ASI) \
srl %sp, 0, %sp; \
@@ -287,6 +391,68 @@
b,a,pt %xcc, spill_fixup_mna; \
b,a,pt %xcc, spill_fixup;
+#define SPILL_2_GENERIC_ETRAP \
+etrap_user_spill_32bit: \
+ srl %sp, 0, %sp; \
+ stwa %l0, [%sp + 0x00] %asi; \
+ stwa %l1, [%sp + 0x04] %asi; \
+ stwa %l2, [%sp + 0x08] %asi; \
+ stwa %l3, [%sp + 0x0c] %asi; \
+ stwa %l4, [%sp + 0x10] %asi; \
+ stwa %l5, [%sp + 0x14] %asi; \
+ stwa %l6, [%sp + 0x18] %asi; \
+ stwa %l7, [%sp + 0x1c] %asi; \
+ stwa %i0, [%sp + 0x20] %asi; \
+ stwa %i1, [%sp + 0x24] %asi; \
+ stwa %i2, [%sp + 0x28] %asi; \
+ stwa %i3, [%sp + 0x2c] %asi; \
+ stwa %i4, [%sp + 0x30] %asi; \
+ stwa %i5, [%sp + 0x34] %asi; \
+ stwa %i6, [%sp + 0x38] %asi; \
+ stwa %i7, [%sp + 0x3c] %asi; \
+ saved; \
+ sub %g1, 2, %g1; \
+ ba,pt %xcc, etrap_save; \
+ wrpr %g1, %cwp; \
+ nop; nop; nop; nop; \
+ nop; nop; nop; nop; \
+ ba,a,pt %xcc, etrap_spill_fixup_32bit; \
+ ba,a,pt %xcc, etrap_spill_fixup_32bit; \
+ ba,a,pt %xcc, etrap_spill_fixup_32bit;
+
+#define SPILL_2_GENERIC_ETRAP_FIXUP \
+etrap_spill_fixup_32bit: \
+ ldub [%g6 + TI_WSAVED], %g1; \
+ sll %g1, 3, %g3; \
+ add %g6, %g3, %g3; \
+ stx %sp, [%g3 + TI_RWIN_SPTRS]; \
+ sll %g1, 7, %g3; \
+ add %g6, %g3, %g3; \
+ stw %l0, [%g3 + TI_REG_WINDOW + 0x00]; \
+ stw %l1, [%g3 + TI_REG_WINDOW + 0x04]; \
+ stw %l2, [%g3 + TI_REG_WINDOW + 0x08]; \
+ stw %l3, [%g3 + TI_REG_WINDOW + 0x0c]; \
+ stw %l4, [%g3 + TI_REG_WINDOW + 0x10]; \
+ stw %l5, [%g3 + TI_REG_WINDOW + 0x14]; \
+ stw %l6, [%g3 + TI_REG_WINDOW + 0x18]; \
+ stw %l7, [%g3 + TI_REG_WINDOW + 0x1c]; \
+ stw %i0, [%g3 + TI_REG_WINDOW + 0x20]; \
+ stw %i1, [%g3 + TI_REG_WINDOW + 0x24]; \
+ stw %i2, [%g3 + TI_REG_WINDOW + 0x28]; \
+ stw %i3, [%g3 + TI_REG_WINDOW + 0x2c]; \
+ stw %i4, [%g3 + TI_REG_WINDOW + 0x30]; \
+ stw %i5, [%g3 + TI_REG_WINDOW + 0x34]; \
+ stw %i6, [%g3 + TI_REG_WINDOW + 0x38]; \
+ stw %i7, [%g3 + TI_REG_WINDOW + 0x3c]; \
+ add %g1, 1, %g1; \
+ stb %g1, [%g6 + TI_WSAVED]; \
+ saved; \
+ rdpr %cwp, %g1; \
+ sub %g1, 2, %g1; \
+ ba,pt %xcc, etrap_save; \
+ wrpr %g1, %cwp; \
+ nop; nop; nop
+
#define SPILL_1_NORMAL SPILL_1_GENERIC(ASI_AIUP)
#define SPILL_2_NORMAL SPILL_2_GENERIC(ASI_AIUP)
#define SPILL_3_NORMAL SPILL_0_NORMAL
@@ -325,6 +491,35 @@
restored; retry; nop; nop; nop; nop; nop; nop; \
nop; nop; nop; nop; nop; nop; nop; nop;
+#define FILL_0_NORMAL_RTRAP \
+kern_rtt_fill: \
+ rdpr %cwp, %g1; \
+ sub %g1, 1, %g1; \
+ wrpr %g1, %cwp; \
+ ldx [%sp + STACK_BIAS + 0x00], %l0; \
+ ldx [%sp + STACK_BIAS + 0x08], %l1; \
+ ldx [%sp + STACK_BIAS + 0x10], %l2; \
+ ldx [%sp + STACK_BIAS + 0x18], %l3; \
+ ldx [%sp + STACK_BIAS + 0x20], %l4; \
+ ldx [%sp + STACK_BIAS + 0x28], %l5; \
+ ldx [%sp + STACK_BIAS + 0x30], %l6; \
+ ldx [%sp + STACK_BIAS + 0x38], %l7; \
+ ldx [%sp + STACK_BIAS + 0x40], %i0; \
+ ldx [%sp + STACK_BIAS + 0x48], %i1; \
+ ldx [%sp + STACK_BIAS + 0x50], %i2; \
+ ldx [%sp + STACK_BIAS + 0x58], %i3; \
+ ldx [%sp + STACK_BIAS + 0x60], %i4; \
+ ldx [%sp + STACK_BIAS + 0x68], %i5; \
+ ldx [%sp + STACK_BIAS + 0x70], %i6; \
+ ldx [%sp + STACK_BIAS + 0x78], %i7; \
+ restored; \
+ add %g1, 1, %g1; \
+ ba,pt %xcc, kern_rtt_restore; \
+ wrpr %g1, %cwp; \
+ nop; nop; nop; nop; nop; \
+ nop; nop; nop; nop;
+
+
/* Normal 64bit fill */
#define FILL_1_GENERIC(ASI) \
add %sp, STACK_BIAS + 0x00, %g1; \
@@ -356,6 +551,33 @@
b,a,pt %xcc, fill_fixup_mna; \
b,a,pt %xcc, fill_fixup;
+#define FILL_1_GENERIC_RTRAP \
+user_rtt_fill_64bit: \
+ ldxa [%sp + STACK_BIAS + 0x00] %asi, %l0; \
+ ldxa [%sp + STACK_BIAS + 0x08] %asi, %l1; \
+ ldxa [%sp + STACK_BIAS + 0x10] %asi, %l2; \
+ ldxa [%sp + STACK_BIAS + 0x18] %asi, %l3; \
+ ldxa [%sp + STACK_BIAS + 0x20] %asi, %l4; \
+ ldxa [%sp + STACK_BIAS + 0x28] %asi, %l5; \
+ ldxa [%sp + STACK_BIAS + 0x30] %asi, %l6; \
+ ldxa [%sp + STACK_BIAS + 0x38] %asi, %l7; \
+ ldxa [%sp + STACK_BIAS + 0x40] %asi, %i0; \
+ ldxa [%sp + STACK_BIAS + 0x48] %asi, %i1; \
+ ldxa [%sp + STACK_BIAS + 0x50] %asi, %i2; \
+ ldxa [%sp + STACK_BIAS + 0x58] %asi, %i3; \
+ ldxa [%sp + STACK_BIAS + 0x60] %asi, %i4; \
+ ldxa [%sp + STACK_BIAS + 0x68] %asi, %i5; \
+ ldxa [%sp + STACK_BIAS + 0x70] %asi, %i6; \
+ ldxa [%sp + STACK_BIAS + 0x78] %asi, %i7; \
+ ba,pt %xcc, user_rtt_pre_restore; \
+ restored; \
+ nop; nop; nop; nop; nop; nop; \
+ nop; nop; nop; nop; nop; \
+ ba,a,pt %xcc, user_rtt_fill_fixup; \
+ ba,a,pt %xcc, user_rtt_fill_fixup; \
+ ba,a,pt %xcc, user_rtt_fill_fixup;
+
+
/* Normal 32bit fill */
#define FILL_2_GENERIC(ASI) \
srl %sp, 0, %sp; \
@@ -387,6 +609,34 @@
b,a,pt %xcc, fill_fixup_mna; \
b,a,pt %xcc, fill_fixup;
+#define FILL_2_GENERIC_RTRAP \
+user_rtt_fill_32bit: \
+ srl %sp, 0, %sp; \
+ lduwa [%sp + 0x00] %asi, %l0; \
+ lduwa [%sp + 0x04] %asi, %l1; \
+ lduwa [%sp + 0x08] %asi, %l2; \
+ lduwa [%sp + 0x0c] %asi, %l3; \
+ lduwa [%sp + 0x10] %asi, %l4; \
+ lduwa [%sp + 0x14] %asi, %l5; \
+ lduwa [%sp + 0x18] %asi, %l6; \
+ lduwa [%sp + 0x1c] %asi, %l7; \
+ lduwa [%sp + 0x20] %asi, %i0; \
+ lduwa [%sp + 0x24] %asi, %i1; \
+ lduwa [%sp + 0x28] %asi, %i2; \
+ lduwa [%sp + 0x2c] %asi, %i3; \
+ lduwa [%sp + 0x30] %asi, %i4; \
+ lduwa [%sp + 0x34] %asi, %i5; \
+ lduwa [%sp + 0x38] %asi, %i6; \
+ lduwa [%sp + 0x3c] %asi, %i7; \
+ ba,pt %xcc, user_rtt_pre_restore; \
+ restored; \
+ nop; nop; nop; nop; nop; \
+ nop; nop; nop; nop; nop; \
+ ba,a,pt %xcc, user_rtt_fill_fixup; \
+ ba,a,pt %xcc, user_rtt_fill_fixup; \
+ ba,a,pt %xcc, user_rtt_fill_fixup;
+
+
#define FILL_1_NORMAL FILL_1_GENERIC(ASI_AIUP)
#define FILL_2_NORMAL FILL_2_GENERIC(ASI_AIUP)
#define FILL_3_NORMAL FILL_0_NORMAL
diff --git a/include/asm-sparc64/uaccess.h b/include/asm-sparc64/uaccess.h
index c91d1e38eac6..afe236ba555b 100644
--- a/include/asm-sparc64/uaccess.h
+++ b/include/asm-sparc64/uaccess.h
@@ -114,16 +114,6 @@ case 8: __put_user_asm(data,x,addr,__pu_ret); break; \
default: __pu_ret = __put_user_bad(); break; \
} __pu_ret; })
-#define __put_user_nocheck_ret(data,addr,size,retval) ({ \
-register int __foo __asm__ ("l1"); \
-switch (size) { \
-case 1: __put_user_asm_ret(data,b,addr,retval,__foo); break; \
-case 2: __put_user_asm_ret(data,h,addr,retval,__foo); break; \
-case 4: __put_user_asm_ret(data,w,addr,retval,__foo); break; \
-case 8: __put_user_asm_ret(data,x,addr,retval,__foo); break; \
-default: if (__put_user_bad()) return retval; break; \
-} })
-
#define __put_user_asm(x,size,addr,ret) \
__asm__ __volatile__( \
"/* Put user asm, inline. */\n" \
@@ -143,33 +133,6 @@ __asm__ __volatile__( \
: "=r" (ret) : "r" (x), "r" (__m(addr)), \
"i" (-EFAULT))
-#define __put_user_asm_ret(x,size,addr,ret,foo) \
-if (__builtin_constant_p(ret) && ret == -EFAULT) \
-__asm__ __volatile__( \
- "/* Put user asm ret, inline. */\n" \
-"1:\t" "st"#size "a %1, [%2] %%asi\n\n\t" \
- ".section __ex_table,\"a\"\n\t" \
- ".align 4\n\t" \
- ".word 1b, __ret_efault\n\n\t" \
- ".previous\n\n\t" \
- : "=r" (foo) : "r" (x), "r" (__m(addr))); \
-else \
-__asm__ __volatile__( \
- "/* Put user asm ret, inline. */\n" \
-"1:\t" "st"#size "a %1, [%2] %%asi\n\n\t" \
- ".section .fixup,#alloc,#execinstr\n\t" \
- ".align 4\n" \
-"3:\n\t" \
- "ret\n\t" \
- " restore %%g0, %3, %%o0\n\n\t" \
- ".previous\n\t" \
- ".section __ex_table,\"a\"\n\t" \
- ".align 4\n\t" \
- ".word 1b, 3b\n\n\t" \
- ".previous\n\n\t" \
- : "=r" (foo) : "r" (x), "r" (__m(addr)), \
- "i" (ret))
-
extern int __put_user_bad(void);
#define __get_user_nocheck(data,addr,size,type) ({ \
@@ -289,14 +252,7 @@ copy_in_user(void __user *to, void __user *from, unsigned long size)
}
#define __copy_in_user copy_in_user
-extern unsigned long __must_check __bzero_noasi(void __user *, unsigned long);
-
-static inline unsigned long __must_check
-__clear_user(void __user *addr, unsigned long size)
-{
-
- return __bzero_noasi(addr, size);
-}
+extern unsigned long __must_check __clear_user(void __user *, unsigned long);
#define clear_user __clear_user
diff --git a/include/asm-sparc64/vdev.h b/include/asm-sparc64/vdev.h
new file mode 100644
index 000000000000..996e6be7b976
--- /dev/null
+++ b/include/asm-sparc64/vdev.h
@@ -0,0 +1,16 @@
+/* vdev.h: SUN4V virtual device interfaces and defines.
+ *
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ */
+
+#ifndef _SPARC64_VDEV_H
+#define _SPARC64_VDEV_H
+
+#include <linux/types.h>
+
+extern u32 sun4v_vdev_devhandle;
+extern int sun4v_vdev_root;
+
+extern unsigned int sun4v_vdev_device_interrupt(unsigned int);
+
+#endif /* !(_SPARC64_VDEV_H) */
diff --git a/include/asm-sparc64/xor.h b/include/asm-sparc64/xor.h
index 8b3a7e4b6062..8ce3f1813e28 100644
--- a/include/asm-sparc64/xor.h
+++ b/include/asm-sparc64/xor.h
@@ -2,9 +2,11 @@
* include/asm-sparc64/xor.h
*
* High speed xor_block operation for RAID4/5 utilizing the
- * UltraSparc Visual Instruction Set.
+ * UltraSparc Visual Instruction Set and Niagara block-init
+ * twin-load instructions.
*
* Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,8 +18,7 @@
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <asm/pstate.h>
-#include <asm/asi.h>
+#include <asm/spitfire.h>
extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *,
@@ -37,4 +38,29 @@ static struct xor_block_template xor_block_VIS = {
.do_5 = xor_vis_5,
};
-#define XOR_TRY_TEMPLATES xor_speed(&xor_block_VIS)
+extern void xor_niagara_2(unsigned long, unsigned long *, unsigned long *);
+extern void xor_niagara_3(unsigned long, unsigned long *, unsigned long *,
+ unsigned long *);
+extern void xor_niagara_4(unsigned long, unsigned long *, unsigned long *,
+ unsigned long *, unsigned long *);
+extern void xor_niagara_5(unsigned long, unsigned long *, unsigned long *,
+ unsigned long *, unsigned long *, unsigned long *);
+
+static struct xor_block_template xor_block_niagara = {
+ .name = "Niagara",
+ .do_2 = xor_niagara_2,
+ .do_3 = xor_niagara_3,
+ .do_4 = xor_niagara_4,
+ .do_5 = xor_niagara_5,
+};
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+ do { \
+ xor_speed(&xor_block_VIS); \
+ xor_speed(&xor_block_niagara); \
+ } while (0)
+
+/* For VIS for everything except Niagara. */
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+ (tlb_type == hypervisor ? &xor_block_niagara : &xor_block_VIS)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 4041122dabfc..57abcea1cb5d 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -127,6 +127,9 @@
/* Hilscher netx */
#define PORT_NETX 71
+/* SUN4V Hypervisor Console */
+#define PORT_SUNHV 72
+
#ifdef __KERNEL__
#include <linux/config.h>