Merge master.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6

author: Linus Torvalds <torvalds@g5.osdl.org> 2005-07-07 10:24:51 -0700
committer: Linus Torvalds <torvalds@g5.osdl.org> 2005-07-07 10:24:51 -0700
commit: 043d051615aa5da09a7e44f1edbb69798458e067 (patch)
tree: a0bfe7f6ed6efa4e0eb7f6b9891a0dc3f2fafe57
parent: c101f3136cc98a003d0d16be6fab7d0d950581a6 (diff)
parent: 21517a57e838a1fbb7a54a8a77501024e77f83e0 (diff)
download: linux-043d051615aa5da09a7e44f1edbb69798458e067.tar.bz2
31 files changed, 1255 insertions, 544 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index b2e2f6509eb0..e1fb68ddec26 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_IA64_PALINFO)	+= palinfo.o
 obj-$(CONFIG_IOSAPIC)		+= iosapic.o
 obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_SMP)		+= smp.o smpboot.o domain.o
+obj-$(CONFIG_NUMA)		+= numa.o
 obj-$(CONFIG_PERFMON)		+= perfmon_default_smpl.o
 obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
 obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index cda06f88c66e..542256e98e60 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -640,9 +640,11 @@ acpi_boot_init (void)
 			if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id())
 				node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu];
 	}
-	build_cpu_to_node_map();
 # endif
 #endif
+#ifdef CONFIG_ACPI_NUMA
+	build_cpu_to_node_map();
+#endif
 	/* Make boot-up look pretty */
 	printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus);
 	return 0;
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
new file mode 100644
index 000000000000..a68ce6678092
--- /dev/null
+++ b/arch/ia64/kernel/numa.c
@@ -0,0 +1,57 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ia64 kernel NUMA specific stuff
+ *
+ * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de>
+ * Copyright (C) 2004 Silicon Graphics, Inc.
+ *   Jesse Barnes <jbarnes@sgi.com>
+ */
+#include <linux/config.h>
+#include <linux/topology.h>
+#include <linux/module.h>
+#include <asm/processor.h>
+#include <asm/smp.h>
+
+u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_to_node_map);
+
+cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
+
+/**
+ * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays
+ *
+ * Build cpu to node mapping and initialize the per node cpu masks using
+ * info from the node_cpuid array handed to us by ACPI.
+ */
+void __init build_cpu_to_node_map(void)
+{
+	int cpu, i, node;
+
+	for(node=0; node < MAX_NUMNODES; node++)
+		cpus_clear(node_to_cpu_mask[node]);
+
+	for(cpu = 0; cpu < NR_CPUS; ++cpu) {
+		node = -1;
+		for (i = 0; i < NR_CPUS; ++i)
+			if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
+				node = node_cpuid[i].nid;
+				break;
+			}
+		cpu_to_node_map[cpu] = (node >= 0) ? node : 0;
+		if (node >= 0)
+			cpu_set(cpu, node_to_cpu_mask[node]);
+	}
+}
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index edd9f07860b2..b8a0a7d257a9 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -143,6 +143,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
 
 		__copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
 		psr->mfh = 0;	/* drop signal handler's fph contents... */
+		preempt_disable();
 		if (psr->dfh)
 			ia64_drop_fpu(current);
 		else {
@@ -150,6 +151,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
 			__ia64_load_fpu(current->thread.fph);
 			ia64_set_local_fpu_owner(current);
 		}
+		preempt_enable();
 	}
 	return err;
 }
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 623b0a546709..7d72c0d872b3 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -525,47 +525,6 @@ smp_build_cpu_map (void)
 	}
 }
 
-#ifdef CONFIG_NUMA
-
-/* on which node is each logical CPU (one cacheline even for 64 CPUs) */
-u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_to_node_map);
-/* which logical CPUs are on which nodes */
-cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
-
-/*
- * Build cpu to node mapping and initialize the per node cpu masks.
- */
-void __init
-build_cpu_to_node_map (void)
-{
-	int cpu, i, node;
-
-	for(node=0; node<MAX_NUMNODES; node++)
-		cpus_clear(node_to_cpu_mask[node]);
-	for(cpu = 0; cpu < NR_CPUS; ++cpu) {
-		/*
-		 * All Itanium NUMA platforms I know use ACPI, so maybe we
-		 * can drop this ifdef completely.                    [EF]
-		 */
-#ifdef CONFIG_ACPI_NUMA
-		node = -1;
-		for (i = 0; i < NR_CPUS; ++i)
-			if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
-				node = node_cpuid[i].nid;
-				break;
-			}
-#else
-#		error Fixme: Dunno how to build CPU-to-node map.
-#endif
-		cpu_to_node_map[cpu] = (node >= 0) ? node : 0;
-		if (node >= 0)
-			cpu_set(cpu, node_to_cpu_mask[node]);
-	}
-}
-
-#endif /* CONFIG_NUMA */
-
 /*
  * Cycle through the APs sending Wakeup IPIs to boot each.
  */
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index e7e520d90f03..4440c8343fa4 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -90,14 +90,16 @@ die (const char *str, struct pt_regs *regs, long err)
 		.lock_owner_depth =	0
 	};
 	static int die_counter;
+	int cpu = get_cpu();
 
-	if (die.lock_owner != smp_processor_id()) {
+	if (die.lock_owner != cpu) {
 		console_verbose();
 		spin_lock_irq(&die.lock);
-		die.lock_owner = smp_processor_id();
+		die.lock_owner = cpu;
 		die.lock_owner_depth = 0;
 		bust_spinlocks(1);
 	}
+	put_cpu();
 
 	if (++die.lock_owner_depth < 3) {
 		printk("%s[%d]: %s %ld [%d]\n",
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index f3fd528ead3b..b5c90e548195 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -44,150 +44,7 @@ struct early_node_data {
 };
 
 static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
-
-/**
- * reassign_cpu_only_nodes - called from find_memory to move CPU-only nodes to a memory node
- *
- * This function will move nodes with only CPUs (no memory)
- * to a node with memory which is at the minimum numa_slit distance.
- * Any reassigments will result in the compression of the nodes
- * and renumbering the nid values where appropriate.
- * The static declarations below are to avoid large stack size which
- * makes the code not re-entrant.
- */
-static void __init reassign_cpu_only_nodes(void)
-{
-	struct node_memblk_s *p;
-	int i, j, k, nnode, nid, cpu, cpunid, pxm;
-	u8 cslit, slit;
-	static DECLARE_BITMAP(nodes_with_mem, MAX_NUMNODES) __initdata;
-	static u8 numa_slit_fix[MAX_NUMNODES * MAX_NUMNODES] __initdata;
-	static int node_flip[MAX_NUMNODES] __initdata;
-	static int old_nid_map[NR_CPUS] __initdata;
-
-	for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
-		if (!test_bit(p->nid, (void *) nodes_with_mem)) {
-			set_bit(p->nid, (void *) nodes_with_mem);
-			nnode++;
-		}
-
-	/*
-	 * All nids with memory.
-	 */
-	if (nnode == num_online_nodes())
-		return;
-
-	/*
-	 * Change nids and attempt to migrate CPU-only nodes
-	 * to the best numa_slit (closest neighbor) possible.
-	 * For reassigned CPU nodes a nid can't be arrived at
-	 * until after this loop because the target nid's new
-	 * identity might not have been established yet. So
-	 * new nid values are fabricated above num_online_nodes() and
-	 * mapped back later to their true value.
-	 */
-	/* MCD - This code is a bit complicated, but may be unnecessary now.
-	 * We can now handle much more interesting node-numbering.
-	 * The old requirement that 0 <= nid <= numnodes <= MAX_NUMNODES
-	 * and that there be no holes in the numbering 0..numnodes
-	 * has become simply 0 <= nid <= MAX_NUMNODES.
-	 */
-	nid = 0;
-	for_each_online_node(i)  {
-		if (test_bit(i, (void *) nodes_with_mem)) {
-			/*
-			 * Save original nid value for numa_slit
-			 * fixup and node_cpuid reassignments.
-			 */
-			node_flip[nid] = i;
-
-			if (i == nid) {
-				nid++;
-				continue;
-			}
-
-			for (p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
-				if (p->nid == i)
-					p->nid = nid;
-
-			cpunid = nid;
-			nid++;
-		} else
-			cpunid = MAX_NUMNODES;
-
-		for (cpu = 0; cpu < NR_CPUS; cpu++)
-			if (node_cpuid[cpu].nid == i) {
-				/*
-				 * For nodes not being reassigned just
-				 * fix the cpu's nid and reverse pxm map
-				 */
-				if (cpunid < MAX_NUMNODES) {
-					pxm = nid_to_pxm_map[i];
-					pxm_to_nid_map[pxm] =
-					          node_cpuid[cpu].nid = cpunid;
-					continue;
-				}
-
-				/*
-				 * For nodes being reassigned, find best node by
-				 * numa_slit information and then make a temporary
-				 * nid value based on current nid and num_online_nodes().
-				 */
-				slit = 0xff;
-				k = 2*num_online_nodes();
-				for_each_online_node(j) {
-					if (i == j)
-						continue;
-					else if (test_bit(j, (void *) nodes_with_mem)) {
-						cslit = numa_slit[i * num_online_nodes() + j];
-						if (cslit < slit) {
-							k = num_online_nodes() + j;
-							slit = cslit;
-						}
-					}
-				}
-
-				/* save old nid map so we can update the pxm */
-				old_nid_map[cpu] = node_cpuid[cpu].nid;
-				node_cpuid[cpu].nid = k;
-			}
-	}
-
-	/*
-	 * Fixup temporary nid values for CPU-only nodes.
-	 */
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
-		if (node_cpuid[cpu].nid == (2*num_online_nodes())) {
-			pxm = nid_to_pxm_map[old_nid_map[cpu]];
-			pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = nnode - 1;
-		} else {
-			for (i = 0; i < nnode; i++) {
-				if (node_flip[i] != (node_cpuid[cpu].nid - num_online_nodes()))
-					continue;
-
-				pxm = nid_to_pxm_map[old_nid_map[cpu]];
-				pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = i;
-				break;
-			}
-		}
-
-	/*
-	 * Fix numa_slit by compressing from larger
-	 * nid array to reduced nid array.
-	 */
-	for (i = 0; i < nnode; i++)
-		for (j = 0; j < nnode; j++)
-			numa_slit_fix[i * nnode + j] =
-				numa_slit[node_flip[i] * num_online_nodes() + node_flip[j]];
-
-	memcpy(numa_slit, numa_slit_fix, sizeof (numa_slit));
-
-	nodes_clear(node_online_map);
-	for (i = 0; i < nnode; i++)
-		node_set_online(i);
-
-	return;
-}
+static nodemask_t memory_less_mask __initdata;
 
 /*
  * To prevent cache aliasing effects, align per-node structures so that they
@@ -233,44 +90,101 @@ static int __init build_node_maps(unsigned long start, unsigned long len,
 }
 
 /**
- * early_nr_phys_cpus_node - return number of physical cpus on a given node
+ * early_nr_cpus_node - return number of cpus on a given node
  * @node: node to check
  *
- * Count the number of physical cpus on @node.  These are cpus that actually
- * exist.  We can't use nr_cpus_node() yet because
+ * Count the number of cpus on @node.  We can't use nr_cpus_node() yet because
  * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
- * called yet.
+ * called yet.  Note that node 0 will also count all non-existent cpus.
  */
-static int early_nr_phys_cpus_node(int node)
+static int __init early_nr_cpus_node(int node)
 {
 	int cpu, n = 0;
 
 	for (cpu = 0; cpu < NR_CPUS; cpu++)
 		if (node == node_cpuid[cpu].nid)
-			if ((cpu == 0) || node_cpuid[cpu].phys_id)
-				n++;
+			n++;
 
 	return n;
 }
 
+/**
+ * compute_pernodesize - compute size of pernode data
+ * @node: the node id.
+ */
+static unsigned long __init compute_pernodesize(int node)
+{
+	unsigned long pernodesize = 0, cpus;
+
+	cpus = early_nr_cpus_node(node);
+	pernodesize += PERCPU_PAGE_SIZE * cpus;
+	pernodesize += node * L1_CACHE_BYTES;
+	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
+	pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
+	pernodesize = PAGE_ALIGN(pernodesize);
+	return pernodesize;
+}
 
 /**
- * early_nr_cpus_node - return number of cpus on a given node
- * @node: node to check
+ * per_cpu_node_setup - setup per-cpu areas on each node
+ * @cpu_data: per-cpu area on this node
+ * @node: node to setup
  *
- * Count the number of cpus on @node.  We can't use nr_cpus_node() yet because
- * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
- * called yet.  Note that node 0 will also count all non-existent cpus.
+ * Copy the static per-cpu data into the region we just set aside and then
+ * setup __per_cpu_offset for each CPU on this node.  Return a pointer to
+ * the end of the area.
  */
-static int early_nr_cpus_node(int node)
+static void *per_cpu_node_setup(void *cpu_data, int node)
 {
-	int cpu, n = 0;
+#ifdef CONFIG_SMP
+	int cpu;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
-		if (node == node_cpuid[cpu].nid)
-			n++;
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		if (node == node_cpuid[cpu].nid) {
+			memcpy(__va(cpu_data), __phys_per_cpu_start,
+			       __per_cpu_end - __per_cpu_start);
+			__per_cpu_offset[cpu] = (char*)__va(cpu_data) -
+				__per_cpu_start;
+			cpu_data += PERCPU_PAGE_SIZE;
+		}
+	}
+#endif
+	return cpu_data;
+}
 
-	return n;
+/**
+ * fill_pernode - initialize pernode data.
+ * @node: the node id.
+ * @pernode: physical address of pernode data
+ * @pernodesize: size of the pernode data
+ */
+static void __init fill_pernode(int node, unsigned long pernode,
+	unsigned long pernodesize)
+{
+	void *cpu_data;
+	int cpus = early_nr_cpus_node(node);
+	struct bootmem_data *bdp = &mem_data[node].bootmem_data;
+
+	mem_data[node].pernode_addr = pernode;
+	mem_data[node].pernode_size = pernodesize;
+	memset(__va(pernode), 0, pernodesize);
+
+	cpu_data = (void *)pernode;
+	pernode += PERCPU_PAGE_SIZE * cpus;
+	pernode += node * L1_CACHE_BYTES;
+
+	mem_data[node].pgdat = __va(pernode);
+	pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
+
+	mem_data[node].node_data = __va(pernode);
+	pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
+
+	mem_data[node].pgdat->bdata = bdp;
+	pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
+
+	cpu_data = per_cpu_node_setup(cpu_data, node);
+
+	return;
 }
 
 /**
@@ -304,9 +218,8 @@ static int early_nr_cpus_node(int node)
 static int __init find_pernode_space(unsigned long start, unsigned long len,
 				     int node)
 {
-	unsigned long epfn, cpu, cpus, phys_cpus;
+	unsigned long epfn;
 	unsigned long pernodesize = 0, pernode, pages, mapsize;
-	void *cpu_data;
 	struct bootmem_data *bdp = &mem_data[node].bootmem_data;
 
 	epfn = (start + len) >> PAGE_SHIFT;
@@ -329,49 +242,12 @@ static int __init find_pernode_space(unsigned long start, unsigned long len,
 	 * Calculate total size needed, incl. what's necessary
 	 * for good alignment and alias prevention.
 	 */
-	cpus = early_nr_cpus_node(node);
-	phys_cpus = early_nr_phys_cpus_node(node);
-	pernodesize += PERCPU_PAGE_SIZE * cpus;
-	pernodesize += node * L1_CACHE_BYTES;
-	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
-	pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
-	pernodesize = PAGE_ALIGN(pernodesize);
+	pernodesize = compute_pernodesize(node);
 	pernode = NODEDATA_ALIGN(start, node);
 
 	/* Is this range big enough for what we want to store here? */
-	if (start + len > (pernode + pernodesize + mapsize)) {
-		mem_data[node].pernode_addr = pernode;
-		mem_data[node].pernode_size = pernodesize;
-		memset(__va(pernode), 0, pernodesize);
-
-		cpu_data = (void *)pernode;
-		pernode += PERCPU_PAGE_SIZE * cpus;
-		pernode += node * L1_CACHE_BYTES;
-
-		mem_data[node].pgdat = __va(pernode);
-		pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
-
-		mem_data[node].node_data = __va(pernode);
-		pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
-
-		mem_data[node].pgdat->bdata = bdp;
-		pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
-
-		/*
-		 * Copy the static per-cpu data into the region we
-		 * just set aside and then setup __per_cpu_offset
-		 * for each CPU on this node.
-		 */
-		for (cpu = 0; cpu < NR_CPUS; cpu++) {
-			if (node == node_cpuid[cpu].nid) {
-				memcpy(__va(cpu_data), __phys_per_cpu_start,
-				       __per_cpu_end - __per_cpu_start);
-				__per_cpu_offset[cpu] = (char*)__va(cpu_data) -
-					__per_cpu_start;
-				cpu_data += PERCPU_PAGE_SIZE;
-			}
-		}
-	}
+	if (start + len > (pernode + pernodesize + mapsize))
+		fill_pernode(node, pernode, pernodesize);
 
 	return 0;
 }
@@ -411,6 +287,9 @@ static void __init reserve_pernode_space(void)
 	for_each_online_node(node) {
 		pg_data_t *pdp = mem_data[node].pgdat;
 
+		if (node_isset(node, memory_less_mask))
+			continue;
+
 		bdp = pdp->bdata;
 
 		/* First the bootmem_map itself */
@@ -436,8 +315,8 @@ static void __init reserve_pernode_space(void)
  */
 static void __init initialize_pernode_data(void)
 {
-	int cpu, node;
 	pg_data_t *pgdat_list[MAX_NUMNODES];
+	int cpu, node;
 
 	for_each_online_node(node)
 		pgdat_list[node] = mem_data[node].pgdat;
@@ -447,12 +326,99 @@ static void __init initialize_pernode_data(void)
 		memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list,
 		       sizeof(pgdat_list));
 	}
-
+#ifdef CONFIG_SMP
 	/* Set the node_data pointer for each per-cpu struct */
 	for (cpu = 0; cpu < NR_CPUS; cpu++) {
 		node = node_cpuid[cpu].nid;
 		per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
 	}
+#else
+	{
+		struct cpuinfo_ia64 *cpu0_cpu_info;
+		cpu = 0;
+		node = node_cpuid[cpu].nid;
+		cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start +
+			((char *)&per_cpu__cpu_info - __per_cpu_start));
+		cpu0_cpu_info->node_data = mem_data[node].node_data;
+	}
+#endif /* CONFIG_SMP */
+}
+
+/**
+ * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit
+ * 	node but fall back to any other node when __alloc_bootmem_node fails
+ *	for best.
+ * @nid: node id
+ * @pernodesize: size of this node's pernode data
+ * @align: alignment to use for this node's pernode data
+ */
+static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize,
+	unsigned long align)
+{
+	void *ptr = NULL;
+	u8 best = 0xff;
+	int bestnode = -1, node;
+
+	for_each_online_node(node) {
+		if (node_isset(node, memory_less_mask))
+			continue;
+		else if (node_distance(nid, node) < best) {
+			best = node_distance(nid, node);
+			bestnode = node;
+		}
+	}
+
+	ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat,
+		pernodesize, align, __pa(MAX_DMA_ADDRESS));
+
+	if (!ptr)
+		panic("NO memory for memory less node\n");
+	return ptr;
+}
+
+/**
+ * pgdat_insert - insert the pgdat into global pgdat_list
+ * @pgdat: the pgdat for a node.
+ */
+static void __init pgdat_insert(pg_data_t *pgdat)
+{
+	pg_data_t *prev = NULL, *next;
+
+	for_each_pgdat(next)
+		if (pgdat->node_id < next->node_id)
+			break;
+		else
+			prev = next;
+
+	if (prev) {
+		prev->pgdat_next = pgdat;
+		pgdat->pgdat_next = next;
+	} else {
+		pgdat->pgdat_next = pgdat_list;
+		pgdat_list = pgdat;
+	}
+
+	return;
+}
+
+/**
+ * memory_less_nodes - allocate and initialize CPU only nodes pernode
+ *	information.
+ */
+static void __init memory_less_nodes(void)
+{
+	unsigned long pernodesize;
+	void *pernode;
+	int node;
+
+	for_each_node_mask(node, memory_less_mask) {
+		pernodesize = compute_pernodesize(node);
+		pernode = memory_less_node_alloc(node, pernodesize,
+			(node) ? (node * PERCPU_PAGE_SIZE) : (1024*1024));
+		fill_pernode(node, __pa(pernode), pernodesize);
+	}
+
+	return;
 }
 
 /**
@@ -472,16 +438,19 @@ void __init find_memory(void)
 		node_set_online(0);
 	}
 
+	nodes_or(memory_less_mask, memory_less_mask, node_online_map);
 	min_low_pfn = -1;
 	max_low_pfn = 0;
 
-	if (num_online_nodes() > 1)
-		reassign_cpu_only_nodes();
-
 	/* These actually end up getting called by call_pernode_memory() */
 	efi_memmap_walk(filter_rsvd_memory, build_node_maps);
 	efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
 
+	for_each_online_node(node)
+		if (mem_data[node].bootmem_data.node_low_pfn) {
+			node_clear(node, memory_less_mask);
+			mem_data[node].min_pfn = ~0UL;
+		}
 	/*
 	 * Initialize the boot memory maps in reverse order since that's
 	 * what the bootmem allocator expects
@@ -492,17 +461,14 @@ void __init find_memory(void)
 
 		if (!node_online(node))
 			continue;
+		else if (node_isset(node, memory_less_mask))
+			continue;
 
 		bdp = &mem_data[node].bootmem_data;
 		pernode = mem_data[node].pernode_addr;
 		pernodesize = mem_data[node].pernode_size;
 		map = pernode + pernodesize;
 
-		/* Sanity check... */
-		if (!pernode)
-			panic("pernode space for node %d "
-			      "could not be allocated!", node);
-
 		init_bootmem_node(mem_data[node].pgdat,
 				  map>>PAGE_SHIFT,
 				  bdp->node_boot_start>>PAGE_SHIFT,
@@ -512,6 +478,7 @@ void __init find_memory(void)
 	efi_memmap_walk(filter_rsvd_memory, free_node_bootmem);
 
 	reserve_pernode_space();
+	memory_less_nodes();
 	initialize_pernode_data();
 
 	max_pfn = max_low_pfn;
@@ -519,6 +486,7 @@ void __init find_memory(void)
 	find_initrd();
 }
 
+#ifdef CONFIG_SMP
 /**
  * per_cpu_init - setup per-cpu variables
  *
@@ -529,15 +497,15 @@ void *per_cpu_init(void)
 {
 	int cpu;
 
-	if (smp_processor_id() == 0) {
-		for (cpu = 0; cpu < NR_CPUS; cpu++) {
-			per_cpu(local_per_cpu_offset, cpu) =
-				__per_cpu_offset[cpu];
-		}
-	}
+	if (smp_processor_id() != 0)
+		return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+		per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
 
 	return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
 }
+#endif /* CONFIG_SMP */
 
 /**
  * show_mem - give short summary of memory stats
@@ -680,12 +648,13 @@ void __init paging_init(void)
 
 	max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 
-	/* so min() will work in count_node_pages */
-	for_each_online_node(node)
-		mem_data[node].min_pfn = ~0UL;
-
 	efi_memmap_walk(filter_rsvd_memory, count_node_pages);
 
+	vmalloc_end -= PAGE_ALIGN(max_low_pfn * sizeof(struct page));
+	vmem_map = (struct page *) vmalloc_end;
+	efi_memmap_walk(create_mem_map_page_table, NULL);
+	printk("Virtual mem_map starts at 0x%p\n", vmem_map);
+
 	for_each_online_node(node) {
 		memset(zones_size, 0, sizeof(zones_size));
 		memset(zholes_size, 0, sizeof(zholes_size));
@@ -719,15 +688,6 @@ void __init paging_init(void)
 				 mem_data[node].num_dma_physpages);
 		}
 
-		if (node == 0) {
-			vmalloc_end -=
-				PAGE_ALIGN(max_low_pfn * sizeof(struct page));
-			vmem_map = (struct page *) vmalloc_end;
-
-			efi_memmap_walk(create_mem_map_page_table, NULL);
-			printk("Virtual mem_map starts at 0x%p\n", vmem_map);
-		}
-
 		pfn_offset = mem_data[node].min_pfn;
 
 		NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
@@ -735,5 +695,11 @@ void __init paging_init(void)
 				    pfn_offset, zholes_size);
 	}
 
+	/*
+	 * Make memory less nodes become a member of the known nodes.
+	 */
+	for_each_node_mask(node, memory_less_mask)
+		pgdat_insert(mem_data[node].pgdat);
+
 	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
 }
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 4eb2f52b87a1..65f9958db9f0 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -597,7 +597,8 @@ mem_init (void)
 	kclist_add(&kcore_kernel, _stext, _end - _stext);
 
 	for_each_pgdat(pgdat)
-		totalram_pages += free_all_bootmem_node(pgdat);
+		if (pgdat->bdata->node_bootmem_map)
+			totalram_pages += free_all_bootmem_node(pgdat);
 
 	reserved_pages = 0;
 	efi_memmap_walk(count_reserved_pages, &reserved_pages);
diff --git a/arch/ia64/sn/include/xtalk/hubdev.h b/arch/ia64/sn/include/xtalk/hubdev.h
index 868e7ecae84b..580a1c0403a7 100644
--- a/arch/ia64/sn/include/xtalk/hubdev.h
+++ b/arch/ia64/sn/include/xtalk/hubdev.h
@@ -8,6 +8,8 @@
 #ifndef _ASM_IA64_SN_XTALK_HUBDEV_H
 #define _ASM_IA64_SN_XTALK_HUBDEV_H
 
+#include "xtalk/xwidgetdev.h"
+
 #define HUB_WIDGET_ID_MAX 0xf
 #define DEV_PER_WIDGET (2*2*8)
 #define IIO_ITTE_WIDGET_BITS    4       /* size of widget field */
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 783eb4323847..a67f39e448cb 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -9,21 +9,28 @@
 #include <linux/bootmem.h>
 #include <linux/nodemask.h>
 #include <asm/sn/types.h>
-#include <asm/sn/sn_sal.h>
 #include <asm/sn/addrs.h>
-#include <asm/sn/pcibus_provider_defs.h>
-#include <asm/sn/pcidev.h>
-#include "pci/pcibr_provider.h"
-#include "xtalk/xwidgetdev.h"
 #include <asm/sn/geo.h>
-#include "xtalk/hubdev.h"
 #include <asm/sn/io.h>
+#include <asm/sn/pcibr_provider.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
 #include <asm/sn/simulator.h>
+#include <asm/sn/sn_sal.h>
 #include <asm/sn/tioca_provider.h>
+#include "xtalk/hubdev.h"
+#include "xtalk/xwidgetdev.h"
 
-char master_baseio_wid;
 nasid_t master_nasid = INVALID_NASID;	/* Partition Master */
 
+static struct list_head sn_sysdata_list;
+
+/* sysdata list struct */
+struct sysdata_el {
+	struct list_head entry;
+	void *sysdata;
+};
+
 struct slab_info {
 	struct hubdev_info hubdev;
 };
@@ -138,23 +145,6 @@ sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev,
 }
 
 /*
- * sn_alloc_pci_sysdata() - This routine allocates a pci controller
- *	which is expected as the pci_dev and pci_bus sysdata by the Linux
- *	PCI infrastructure.
- */
-static inline struct pci_controller *sn_alloc_pci_sysdata(void)
-{
-	struct pci_controller *pci_sysdata;
-
-	pci_sysdata = kmalloc(sizeof(*pci_sysdata), GFP_KERNEL);
-	if (!pci_sysdata)
-		BUG();
-
-	memset(pci_sysdata, 0, sizeof(*pci_sysdata));
-	return pci_sysdata;
-}
-
-/*
  * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for 
  *	each node in the system.
  */
@@ -221,22 +211,34 @@ static void sn_fixup_ionodes(void)
 
 }
 
+void sn_pci_unfixup_slot(struct pci_dev *dev)
+{
+	struct pci_dev *host_pci_dev = SN_PCIDEV_INFO(dev)->host_pci_dev;
+
+	sn_irq_unfixup(dev);
+	pci_dev_put(host_pci_dev);
+	pci_dev_put(dev);
+}
+
 /*
  * sn_pci_fixup_slot() - This routine sets up a slot's resources
  * consistent with the Linux PCI abstraction layer.  Resources acquired
  * from our PCI provider include PIO maps to BAR space and interrupt
  * objects.
  */
-static void sn_pci_fixup_slot(struct pci_dev *dev)
+void sn_pci_fixup_slot(struct pci_dev *dev)
 {
 	int idx;
 	int segment = 0;
-	uint64_t size;
-	struct sn_irq_info *sn_irq_info;
-	struct pci_dev *host_pci_dev;
 	int status = 0;
 	struct pcibus_bussoft *bs;
+ 	struct pci_bus *host_pci_bus;
+ 	struct pci_dev *host_pci_dev;
+ 	struct sn_irq_info *sn_irq_info;
+ 	unsigned long size;
+ 	unsigned int bus_no, devfn;
 
+	pci_dev_get(dev); /* for the sysdata pointer */
 	dev->sysdata = kmalloc(sizeof(struct pcidev_info), GFP_KERNEL);
 	if (SN_PCIDEV_INFO(dev) <= 0)
 		BUG();		/* Cannot afford to run out of memory */
@@ -253,7 +255,7 @@ static void sn_pci_fixup_slot(struct pci_dev *dev)
 				     (u64) __pa(SN_PCIDEV_INFO(dev)),
 				     (u64) __pa(sn_irq_info));
 	if (status)
-		BUG();		/* Cannot get platform pci device information information */
+		BUG(); /* Cannot get platform pci device information */
 
 	/* Copy over PIO Mapped Addresses */
 	for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) {
@@ -275,15 +277,21 @@ static void sn_pci_fixup_slot(struct pci_dev *dev)
 			dev->resource[idx].parent = &iomem_resource;
 	}
 
-	/* set up host bus linkages */
-	bs = SN_PCIBUS_BUSSOFT(dev->bus);
-	host_pci_dev =
-	    pci_find_slot(SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32,
-			  SN_PCIDEV_INFO(dev)->
-			  pdi_slot_host_handle & 0xffffffff);
+	/*
+	 * Using the PROMs values for the PCI host bus, get the Linux
+ 	 * PCI host_pci_dev struct and set up host bus linkages
+ 	 */
+
+ 	bus_no = SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32;
+ 	devfn = SN_PCIDEV_INFO(dev)->pdi_slot_host_handle & 0xffffffff;
+ 	host_pci_bus = pci_find_bus(pci_domain_nr(dev->bus), bus_no);
+ 	host_pci_dev = pci_get_slot(host_pci_bus, devfn);
+
+	SN_PCIDEV_INFO(dev)->host_pci_dev = host_pci_dev;
 	SN_PCIDEV_INFO(dev)->pdi_host_pcidev_info =
-	    SN_PCIDEV_INFO(host_pci_dev);
+	    					SN_PCIDEV_INFO(host_pci_dev);
 	SN_PCIDEV_INFO(dev)->pdi_linux_pcidev = dev;
+	bs = SN_PCIBUS_BUSSOFT(dev->bus);
 	SN_PCIDEV_INFO(dev)->pdi_pcibus_info = bs;
 
 	if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) {
@@ -297,6 +305,9 @@ static void sn_pci_fixup_slot(struct pci_dev *dev)
 		SN_PCIDEV_INFO(dev)->pdi_sn_irq_info = sn_irq_info;
 		dev->irq = SN_PCIDEV_INFO(dev)->pdi_sn_irq_info->irq_irq;
 		sn_irq_fixup(dev, sn_irq_info);
+	} else {
+		SN_PCIDEV_INFO(dev)->pdi_sn_irq_info = NULL;
+		kfree(sn_irq_info);
 	}
 }
 
@@ -304,55 +315,57 @@ static void sn_pci_fixup_slot(struct pci_dev *dev)
  * sn_pci_controller_fixup() - This routine sets up a bus's resources
  * consistent with the Linux PCI abstraction layer.
  */
-static void sn_pci_controller_fixup(int segment, int busnum)
+void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
 {
 	int status = 0;
 	int nasid, cnode;
-	struct pci_bus *bus;
 	struct pci_controller *controller;
 	struct pcibus_bussoft *prom_bussoft_ptr;
 	struct hubdev_info *hubdev_info;
 	void *provider_soft;
 	struct sn_pcibus_provider *provider;
 
-	status =
-	    sal_get_pcibus_info((u64) segment, (u64) busnum,
-				(u64) ia64_tpa(&prom_bussoft_ptr));
-	if (status > 0) {
-		return;		/* bus # does not exist */
-	}
-
+ 	status = sal_get_pcibus_info((u64) segment, (u64) busnum,
+ 				     (u64) ia64_tpa(&prom_bussoft_ptr));
+ 	if (status > 0)
+		return;		/*bus # does not exist */
 	prom_bussoft_ptr = __va(prom_bussoft_ptr);
-	controller = sn_alloc_pci_sysdata();
-	/* controller non-zero is BUG'd in sn_alloc_pci_sysdata */
 
-	bus = pci_scan_bus(busnum, &pci_root_ops, controller);
+ 	controller = kcalloc(1,sizeof(struct pci_controller), GFP_KERNEL);
+ 	if (!controller)
+ 		BUG();
+
 	if (bus == NULL) {
-		return;		/* error, or bus already scanned */
+ 		bus = pci_scan_bus(busnum, &pci_root_ops, controller);
+ 		if (bus == NULL)
+ 			return;	/* error, or bus already scanned */
+ 		bus->sysdata = NULL;
 	}
 
+	if (bus->sysdata)
+		goto error_return; /* sysdata already alloc'd */
+
 	/*
 	 * Per-provider fixup.  Copies the contents from prom to local
 	 * area and links SN_PCIBUS_BUSSOFT().
 	 */
 
-	if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) {
+	if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES)
 		return;		/* unsupported asic type */
-	}
+
+	if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB)
+		goto error_return; /* no further fixup necessary */
 
 	provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type];
-	if (provider == NULL) {
+	if (provider == NULL)
 		return;		/* no provider registerd for this asic */
-	}
 
 	provider_soft = NULL;
-	if (provider->bus_fixup) {
+	if (provider->bus_fixup)
 		provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr);
-	}
 
-	if (provider_soft == NULL) {
+	if (provider_soft == NULL)
 		return;		/* fixup failed or not applicable */
-	}
 
 	/*
 	 * Generic bus fixup goes here.  Don't reference prom_bussoft_ptr
@@ -361,12 +374,47 @@ static void sn_pci_controller_fixup(int segment, int busnum)
 
 	bus->sysdata = controller;
 	PCI_CONTROLLER(bus)->platform_data = provider_soft;
-
 	nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base);
 	cnode = nasid_to_cnodeid(nasid);
 	hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
 	SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info =
 	    &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]);
+
+	return;
+
+error_return:
+
+	kfree(controller);
+	return;
+}
+
+void sn_bus_store_sysdata(struct pci_dev *dev)
+{
+	struct sysdata_el *element;
+
+	element = kcalloc(1, sizeof(struct sysdata_el), GFP_KERNEL);
+	if (!element) {
+		dev_dbg(dev, "%s: out of memory!\n", __FUNCTION__);
+		return;
+	}
+	element->sysdata = dev->sysdata;
+	list_add(&element->entry, &sn_sysdata_list);
+}
+
+void sn_bus_free_sysdata(void)
+{
+	struct sysdata_el *element;
+	struct list_head *list;
+
+sn_sysdata_free_start:
+	list_for_each(list, &sn_sysdata_list) {
+		element = list_entry(list, struct sysdata_el, entry);
+		list_del(&element->entry);
+		kfree(element->sysdata);
+		kfree(element);
+		goto sn_sysdata_free_start;
+	}
+	return;
 }
 
 /*
@@ -403,20 +451,17 @@ static int __init sn_pci_init(void)
 	 */
 	ia64_max_iommu_merge_mask = ~PAGE_MASK;
 	sn_fixup_ionodes();
-	sn_irq = kmalloc(sizeof(struct sn_irq_info *) * NR_IRQS, GFP_KERNEL);
-	if (sn_irq <= 0)
-		BUG();		/* Canno afford to run out of memory. */
-	memset(sn_irq, 0, sizeof(struct sn_irq_info *) * NR_IRQS);
-
+	sn_irq_lh_init();
+	INIT_LIST_HEAD(&sn_sysdata_list);
 	sn_init_cpei_timer();
 
 #ifdef CONFIG_PROC_FS
 	register_sn_procfs();
 #endif
 
-	for (i = 0; i < PCI_BUSES_TO_SCAN; i++) {
-		sn_pci_controller_fixup(0, i);
-	}
+	/* busses are not known yet ... */
+	for (i = 0; i < PCI_BUSES_TO_SCAN; i++)
+		sn_pci_controller_fixup(0, i, NULL);
 
 	/*
 	 * Generic Linux PCI Layer has created the pci_bus and pci_dev 
@@ -425,9 +470,8 @@ static int __init sn_pci_init(void)
 	 */
 
 	while ((pci_dev =
-		pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL) {
+		pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL)
 		sn_pci_fixup_slot(pci_dev);
-	}
 
 	sn_ioif_inited = 1;	/* sn I/O infrastructure now initialized */
 
@@ -469,3 +513,8 @@ cnodeid_get_geoid(cnodeid_t cnode)
 }
 
 subsys_initcall(sn_pci_init);
+EXPORT_SYMBOL(sn_pci_fixup_slot);
+EXPORT_SYMBOL(sn_pci_unfixup_slot);
+EXPORT_SYMBOL(sn_pci_controller_fixup);
+EXPORT_SYMBOL(sn_bus_store_sysdata);
+EXPORT_SYMBOL(sn_bus_free_sysdata);
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 0f4e8138658f..84d276a14ecb 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -9,13 +9,13 @@
  */
 
 #include <linux/irq.h>
-#include <asm/sn/intr.h>
+#include <linux/spinlock.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/arch.h>
-#include "xtalk/xwidgetdev.h"
+#include <asm/sn/intr.h>
+#include <asm/sn/pcibr_provider.h>
 #include <asm/sn/pcibus_provider_defs.h>
 #include <asm/sn/pcidev.h>
-#include "pci/pcibr_provider.h"
 #include <asm/sn/shub_mmr.h>
 #include <asm/sn/sn_sal.h>
 
@@ -25,7 +25,8 @@ static void unregister_intr_pda(struct sn_irq_info *sn_irq_info);
 
 extern int sn_force_interrupt_flag;
 extern int sn_ioif_inited;
-struct sn_irq_info **sn_irq;
+static struct list_head **sn_irq_lh;
+static spinlock_t sn_irq_info_lock = SPIN_LOCK_UNLOCKED; /* non-IRQ lock */
 
 static inline uint64_t sn_intr_alloc(nasid_t local_nasid, int local_widget,
 				     u64 sn_irq_info,
@@ -101,7 +102,7 @@ static void sn_end_irq(unsigned int irq)
 		nasid = get_nasid();
 		event_occurred = HUB_L((uint64_t *) GLOBAL_MMR_ADDR
 				       (nasid, SH_EVENT_OCCURRED));
-		/* If the UART bit is set here, we may have received an 
+		/* If the UART bit is set here, we may have received an
 		 * interrupt from the UART that the driver missed.  To
 		 * make sure, we IPI ourselves to force us to look again.
 		 */
@@ -115,82 +116,84 @@ static void sn_end_irq(unsigned int irq)
 		force_interrupt(irq);
 }
 
+static void sn_irq_info_free(struct rcu_head *head);
+
 static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-	struct sn_irq_info *sn_irq_info = sn_irq[irq];
-	struct sn_irq_info *tmp_sn_irq_info;
+	struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
 	int cpuid, cpuphys;
-	nasid_t t_nasid;	/* nasid to target */
-	int t_slice;		/* slice to target */
-
-	/* allocate a temp sn_irq_info struct to get new target info */
-	tmp_sn_irq_info = kmalloc(sizeof(*tmp_sn_irq_info), GFP_KERNEL);
-	if (!tmp_sn_irq_info)
-		return;
 
 	cpuid = first_cpu(mask);
 	cpuphys = cpu_physical_id(cpuid);
-	t_nasid = cpuid_to_nasid(cpuid);
-	t_slice = cpuid_to_slice(cpuid);
 
-	while (sn_irq_info) {
-		int status;
-		int local_widget;
-		uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge;
-		nasid_t local_nasid = NASID_GET(bridge);
+	list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
+				 sn_irq_lh[irq], list) {
+		uint64_t bridge;
+		int local_widget, status;
+		nasid_t local_nasid;
+		struct sn_irq_info *new_irq_info;
+
+		new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC);
+		if (new_irq_info == NULL)
+			break;
+		memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info));
+
+		bridge = (uint64_t) new_irq_info->irq_bridge;
+		if (!bridge) {
+			kfree(new_irq_info);
+			break; /* irq is not a device interrupt */
+		}
 
-		if (!bridge)
-			break;	/* irq is not a device interrupt */
+		local_nasid = NASID_GET(bridge);
 
 		if (local_nasid & 1)
 			local_widget = TIO_SWIN_WIDGETNUM(bridge);
 		else
 			local_widget = SWIN_WIDGETNUM(bridge);
 
-		/* Free the old PROM sn_irq_info structure */
-		sn_intr_free(local_nasid, local_widget, sn_irq_info);
+		/* Free the old PROM new_irq_info structure */
+		sn_intr_free(local_nasid, local_widget, new_irq_info);
+		/* Update kernels new_irq_info with new target info */
+		unregister_intr_pda(new_irq_info);
 
-		/* allocate a new PROM sn_irq_info struct */
+		/* allocate a new PROM new_irq_info struct */
 		status = sn_intr_alloc(local_nasid, local_widget,
-				       __pa(tmp_sn_irq_info), irq, t_nasid,
-				       t_slice);
-
-		if (status == 0) {
-			/* Update kernels sn_irq_info with new target info */
-			unregister_intr_pda(sn_irq_info);
-			sn_irq_info->irq_cpuid = cpuid;
-			sn_irq_info->irq_nasid = t_nasid;
-			sn_irq_info->irq_slice = t_slice;
-			sn_irq_info->irq_xtalkaddr =
-			    tmp_sn_irq_info->irq_xtalkaddr;
-			sn_irq_info->irq_cookie = tmp_sn_irq_info->irq_cookie;
-			register_intr_pda(sn_irq_info);
-
-			if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type)) {
-				pcibr_change_devices_irq(sn_irq_info);
-			}
+				       __pa(new_irq_info), irq,
+				       cpuid_to_nasid(cpuid),
+				       cpuid_to_slice(cpuid));
+
+		/* SAL call failed */
+		if (status) {
+			kfree(new_irq_info);
+			break;
+		}
+
+		new_irq_info->irq_cpuid = cpuid;
+		register_intr_pda(new_irq_info);
+
+		if (IS_PCI_BRIDGE_ASIC(new_irq_info->irq_bridge_type))
+			pcibr_change_devices_irq(new_irq_info);
 
-			sn_irq_info = sn_irq_info->irq_next;
+		spin_lock(&sn_irq_info_lock);
+		list_replace_rcu(&sn_irq_info->list, &new_irq_info->list);
+		spin_unlock(&sn_irq_info_lock);
+		call_rcu(&sn_irq_info->rcu, sn_irq_info_free);
 
 #ifdef CONFIG_SMP
-			set_irq_affinity_info((irq & 0xff), cpuphys, 0);
+		set_irq_affinity_info((irq & 0xff), cpuphys, 0);
 #endif
-		} else {
-			break;	/* snp_affinity failed the intr_alloc */
-		}
 	}
-	kfree(tmp_sn_irq_info);
 }
 
 struct hw_interrupt_type irq_type_sn = {
-	"SN hub",
-	sn_startup_irq,
-	sn_shutdown_irq,
-	sn_enable_irq,
-	sn_disable_irq,
-	sn_ack_irq,
-	sn_end_irq,
-	sn_set_affinity_irq
+	.typename	= "SN hub",
+	.startup	= sn_startup_irq,
+	.shutdown	= sn_shutdown_irq,
+	.enable		= sn_enable_irq,
+	.disable	= sn_disable_irq,
+	.ack		= sn_ack_irq,
+	.end		= sn_end_irq,
+	.set_affinity	= sn_set_affinity_irq
 };
 
 unsigned int sn_local_vector_to_irq(u8 vector)
@@ -231,19 +234,18 @@ static void unregister_intr_pda(struct sn_irq_info *sn_irq_info)
 	struct sn_irq_info *tmp_irq_info;
 	int i, foundmatch;
 
+	rcu_read_lock();
 	if (pdacpu(cpu)->sn_last_irq == irq) {
 		foundmatch = 0;
-		for (i = pdacpu(cpu)->sn_last_irq - 1; i; i--) {
-			tmp_irq_info = sn_irq[i];
-			while (tmp_irq_info) {
+		for (i = pdacpu(cpu)->sn_last_irq - 1;
+		     i && !foundmatch; i--) {
+			list_for_each_entry_rcu(tmp_irq_info,
+						sn_irq_lh[i],
+						list) {
 				if (tmp_irq_info->irq_cpuid == cpu) {
-					foundmatch++;
+					foundmatch = 1;
 					break;
 				}
-				tmp_irq_info = tmp_irq_info->irq_next;
-			}
-			if (foundmatch) {
-				break;
 			}
 		}
 		pdacpu(cpu)->sn_last_irq = i;
@@ -251,60 +253,27 @@ static void unregister_intr_pda(struct sn_irq_info *sn_irq_info)
 
 	if (pdacpu(cpu)->sn_first_irq == irq) {
 		foundmatch = 0;
-		for (i = pdacpu(cpu)->sn_first_irq + 1; i < NR_IRQS; i++) {
-			tmp_irq_info = sn_irq[i];
-			while (tmp_irq_info) {
+		for (i = pdacpu(cpu)->sn_first_irq + 1;
+		     i < NR_IRQS && !foundmatch; i++) {
+			list_for_each_entry_rcu(tmp_irq_info,
+						sn_irq_lh[i],
+						list) {
 				if (tmp_irq_info->irq_cpuid == cpu) {
-					foundmatch++;
+					foundmatch = 1;
 					break;
 				}
-				tmp_irq_info = tmp_irq_info->irq_next;
-			}
-			if (foundmatch) {
-				break;
 			}
 		}
 		pdacpu(cpu)->sn_first_irq = ((i == NR_IRQS) ? 0 : i);
 	}
+	rcu_read_unlock();
 }
 
-struct sn_irq_info *sn_irq_alloc(nasid_t local_nasid, int local_widget, int irq,
-				 nasid_t nasid, int slice)
+static void sn_irq_info_free(struct rcu_head *head)
 {
 	struct sn_irq_info *sn_irq_info;
-	int status;
-
-	sn_irq_info = kmalloc(sizeof(*sn_irq_info), GFP_KERNEL);
-	if (sn_irq_info == NULL)
-		return NULL;
-
-	memset(sn_irq_info, 0x0, sizeof(*sn_irq_info));
-
-	status =
-	    sn_intr_alloc(local_nasid, local_widget, __pa(sn_irq_info), irq,
-			  nasid, slice);
-
-	if (status) {
-		kfree(sn_irq_info);
-		return NULL;
-	} else {
-		return sn_irq_info;
-	}
-}
-
-void sn_irq_free(struct sn_irq_info *sn_irq_info)
-{
-	uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge;
-	nasid_t local_nasid = NASID_GET(bridge);
-	int local_widget;
-
-	if (local_nasid & 1)	/* tio check */
-		local_widget = TIO_SWIN_WIDGETNUM(bridge);
-	else
-		local_widget = SWIN_WIDGETNUM(bridge);
-
-	sn_intr_free(local_nasid, local_widget, sn_irq_info);
 
+	sn_irq_info = container_of(head, struct sn_irq_info, rcu);
 	kfree(sn_irq_info);
 }
 
@@ -314,30 +283,54 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info)
 	int slice = sn_irq_info->irq_slice;
 	int cpu = nasid_slice_to_cpuid(nasid, slice);
 
+	pci_dev_get(pci_dev);
 	sn_irq_info->irq_cpuid = cpu;
 	sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev);
 
 	/* link it into the sn_irq[irq] list */
-	sn_irq_info->irq_next = sn_irq[sn_irq_info->irq_irq];
-	sn_irq[sn_irq_info->irq_irq] = sn_irq_info;
+	spin_lock(&sn_irq_info_lock);
+	list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]);
+	spin_unlock(&sn_irq_info_lock);
 
 	(void)register_intr_pda(sn_irq_info);
 }
 
+void sn_irq_unfixup(struct pci_dev *pci_dev)
+{
+	struct sn_irq_info *sn_irq_info;
+
+	/* Only cleanup IRQ stuff if this device has a host bus context */
+	if (!SN_PCIDEV_BUSSOFT(pci_dev))
+		return;
+
+	sn_irq_info = SN_PCIDEV_INFO(pci_dev)->pdi_sn_irq_info;
+	if (!sn_irq_info || !sn_irq_info->irq_irq) {
+		kfree(sn_irq_info);
+		return;
+	}
+
+	unregister_intr_pda(sn_irq_info);
+	spin_lock(&sn_irq_info_lock);
+	list_del_rcu(&sn_irq_info->list);
+	spin_unlock(&sn_irq_info_lock);
+	call_rcu(&sn_irq_info->rcu, sn_irq_info_free);
+	pci_dev_put(pci_dev);
+}
+
 static void force_interrupt(int irq)
 {
 	struct sn_irq_info *sn_irq_info;
 
 	if (!sn_ioif_inited)
 		return;
-	sn_irq_info = sn_irq[irq];
-	while (sn_irq_info) {
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[irq], list) {
 		if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) &&
-		    (sn_irq_info->irq_bridge != NULL)) {
+		    (sn_irq_info->irq_bridge != NULL))
 			pcibr_force_interrupt(sn_irq_info);
-		}
-		sn_irq_info = sn_irq_info->irq_next;
 	}
+	rcu_read_unlock();
 }
 
 /*
@@ -402,19 +395,41 @@ static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info)
 
 void sn_lb_int_war_check(void)
 {
+	struct sn_irq_info *sn_irq_info;
 	int i;
 
 	if (!sn_ioif_inited || pda->sn_first_irq == 0)
 		return;
+
+	rcu_read_lock();
 	for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) {
-		struct sn_irq_info *sn_irq_info = sn_irq[i];
-		while (sn_irq_info) {
-			/* Only call for PCI bridges that are fully initialized. */
+		list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[i], list) {
+			/*
+			 * Only call for PCI bridges that are fully
+			 * initialized.
+			 */
 			if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) &&
-			    (sn_irq_info->irq_bridge != NULL)) {
+			    (sn_irq_info->irq_bridge != NULL))
 				sn_check_intr(i, sn_irq_info);
-			}
-			sn_irq_info = sn_irq_info->irq_next;
 		}
 	}
+	rcu_read_unlock();
+}
+
+void sn_irq_lh_init(void)
+{
+	int i;
+
+	sn_irq_lh = kmalloc(sizeof(struct list_head *) * NR_IRQS, GFP_KERNEL);
+	if (!sn_irq_lh)
+		panic("SN PCI INIT: Failed to allocate memory for PCI init\n");
+
+	for (i = 0; i < NR_IRQS; i++) {
+		sn_irq_lh[i] = kmalloc(sizeof(struct list_head), GFP_KERNEL);
+		if (!sn_irq_lh[i])
+			panic("SN PCI INIT: Failed IRQ memory allocation\n");
+
+		INIT_LIST_HEAD(sn_irq_lh[i]);
+	}
+
 }
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 22e10d282c7f..7c7fe441d623 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -270,7 +270,7 @@ void __init sn_setup(char **cmdline_p)
 {
 	long status, ticks_per_sec, drift;
 	int pxm;
-	int major = sn_sal_rev_major(), minor = sn_sal_rev_minor();
+	u32 version = sn_sal_rev();
 	extern void sn_cpu_init(void);
 
 	ia64_sn_plat_set_error_handling_features();
@@ -308,22 +308,21 @@ void __init sn_setup(char **cmdline_p)
 	 * support here so we don't have to listen to failed keyboard probe
 	 * messages.
 	 */
-	if ((major < 2 || (major == 2 && minor <= 9)) &&
-	    acpi_kbd_controller_present) {
+	if (version <= 0x0209 && acpi_kbd_controller_present) {
 		printk(KERN_INFO "Disabling legacy keyboard support as prom "
 		       "is too old and doesn't provide FADT\n");
 		acpi_kbd_controller_present = 0;
 	}
 
-	printk("SGI SAL version %x.%02x\n", major, minor);
+	printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
 
 	/*
 	 * Confirm the SAL we're running on is recent enough...
 	 */
-	if ((major < SN_SAL_MIN_MAJOR) || (major == SN_SAL_MIN_MAJOR &&
-					   minor < SN_SAL_MIN_MINOR)) {
+	if (version < SN_SAL_MIN_VERSION) {
 		printk(KERN_ERR "This kernel needs SGI SAL version >= "
-		       "%x.%02x\n", SN_SAL_MIN_MAJOR, SN_SAL_MIN_MINOR);
+		       "%x.%02x\n", SN_SAL_MIN_VERSION >> 8,
+		        SN_SAL_MIN_VERSION & 0x00FF);
 		panic("PROM version too old\n");
 	}
 
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
index 8716f4d5314b..c1cbcd1a1398 100644
--- a/arch/ia64/sn/kernel/tiocx.c
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -14,6 +14,7 @@
 #include <linux/proc_fs.h>
 #include <linux/device.h>
 #include <linux/delay.h>
+#include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/addrs.h>
@@ -481,6 +482,9 @@ static int __init tiocx_init(void)
 	cnodeid_t cnodeid;
 	int found_tiocx_device = 0;
 
+	if (!ia64_platform_is("sn2"))
+		return -ENODEV;
+
 	bus_register(&tiocx_bus_type);
 
 	for (cnodeid = 0; cnodeid < MAX_COMPACT_NODES; cnodeid++) {
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index 5da9bdbde7cb..a2f7a88aefbb 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -11,9 +11,10 @@
 
 #include <linux/module.h>
 #include <asm/dma.h>
-#include <asm/sn/sn_sal.h>
+#include <asm/sn/pcibr_provider.h>
 #include <asm/sn/pcibus_provider_defs.h>
 #include <asm/sn/pcidev.h>
+#include <asm/sn/sn_sal.h>
 
 #define SG_ENT_VIRT_ADDRESS(sg)	(page_address((sg)->page) + (sg)->offset)
 #define SG_ENT_PHYS_ADDRESS(SG)	virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
index 0e47bce85f2d..d1647b863e61 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
@@ -8,9 +8,9 @@
 
 #include <linux/types.h>
 #include <asm/sn/sn_sal.h>
+#include <asm/sn/pcibr_provider.h>
 #include <asm/sn/pcibus_provider_defs.h>
 #include <asm/sn/pcidev.h>
-#include "pci/pcibr_provider.h"
 
 int pcibr_invalidate_ate = 0;	/* by default don't invalidate ATE on free */
 
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
index 64af2b2c1787..b058dc2a0b9d 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
@@ -8,18 +8,17 @@
 
 #include <linux/types.h>
 #include <linux/pci.h>
-#include <asm/sn/sn_sal.h>
+#include <asm/sn/addrs.h>
 #include <asm/sn/geo.h>
-#include "xtalk/xwidgetdev.h"
-#include "xtalk/hubdev.h"
+#include <asm/sn/pcibr_provider.h>
 #include <asm/sn/pcibus_provider_defs.h>
 #include <asm/sn/pcidev.h>
-#include "pci/tiocp.h"
-#include "pci/pic.h"
-#include "pci/pcibr_provider.h"
-#include "pci/tiocp.h"
+#include <asm/sn/pic.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/tiocp.h>
 #include "tio.h"
-#include <asm/sn/addrs.h>
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
 
 extern int sn_ioif_inited;
 
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
index 3893999d23d8..9813da56d311 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -6,18 +6,51 @@
  * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved.
  */
 
-#include <linux/types.h>
 #include <linux/interrupt.h>
+#include <linux/types.h>
 #include <linux/pci.h>
-#include <asm/sn/sn_sal.h>
-#include "xtalk/xwidgetdev.h"
+#include <asm/sn/addrs.h>
 #include <asm/sn/geo.h>
-#include "xtalk/hubdev.h"
+#include <asm/sn/pcibr_provider.h>
 #include <asm/sn/pcibus_provider_defs.h>
 #include <asm/sn/pcidev.h>
-#include "pci/pcibr_provider.h"
-#include <asm/sn/addrs.h>
+#include <asm/sn/sn_sal.h>
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
+
+int
+sal_pcibr_slot_enable(struct pcibus_info *soft, int device, void *resp)
+{
+	struct ia64_sal_retval ret_stuff;
+	uint64_t busnum;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
 
+	busnum = soft->pbi_buscommon.bs_persist_busnum;
+	SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_SLOT_ENABLE, (u64) busnum,
+			(u64) device, (u64) resp, 0, 0, 0, 0);
+
+	return (int)ret_stuff.v0;
+}
+
+int
+sal_pcibr_slot_disable(struct pcibus_info *soft, int device, int action,
+		       void *resp)
+{
+	struct ia64_sal_retval ret_stuff;
+	uint64_t busnum;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	busnum = soft->pbi_buscommon.bs_persist_busnum;
+	SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_SLOT_DISABLE,
+			(u64) busnum, (u64) device, (u64) action,
+			(u64) resp, 0, 0, 0);
+
+	return (int)ret_stuff.v0;
+}
 
 static int sal_pcibr_error_interrupt(struct pcibus_info *soft)
 {
@@ -188,3 +221,6 @@ pcibr_init_provider(void)
 
 	return 0;
 }
+
+EXPORT_SYMBOL_GPL(sal_pcibr_slot_enable);
+EXPORT_SYMBOL_GPL(sal_pcibr_slot_disable);
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_reg.c b/arch/ia64/sn/pci/pcibr/pcibr_reg.c
index 865c11c3b50a..21426d02fbe6 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_reg.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_reg.c
@@ -6,13 +6,13 @@
  * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
  */
 
-#include <linux/types.h>
 #include <linux/interrupt.h>
+#include <linux/types.h>
+#include <asm/sn/pcibr_provider.h>
 #include <asm/sn/pcibus_provider_defs.h>
 #include <asm/sn/pcidev.h>
-#include "pci/tiocp.h"
-#include "pci/pic.h"
-#include "pci/pcibr_provider.h"
+#include <asm/sn/pic.h>
+#include <asm/sn/tiocp.h>
 
 union br_ptr {
 	struct tiocp tio;
diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
index 05aa8c2fe9bb..51cc4e63092c 100644
--- a/arch/ia64/sn/pci/tioca_provider.c
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -589,8 +589,7 @@ tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft)
 
 	/* sanity check prom rev */
 
-	if (sn_sal_rev_major() < 4 ||
-	    (sn_sal_rev_major() == 4 && sn_sal_rev_minor() < 6)) {
+	if (sn_sal_rev() < 0x0406) {
 		printk
 		    (KERN_ERR "%s:  SGI prom rev 4.06 or greater required "
 		     "for tioca support\n", __FUNCTION__);
diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
index 1a4d4ca2a4dc..9c4a39ee89b5 100644
--- a/drivers/pci/hotplug/Kconfig
+++ b/drivers/pci/hotplug/Kconfig
@@ -187,9 +187,10 @@ config HOTPLUG_PCI_RPA_DLPAR
 
 config HOTPLUG_PCI_SGI
 	tristate "SGI PCI Hotplug Support"
-	depends on HOTPLUG_PCI && IA64_SGI_SN2
+	depends on HOTPLUG_PCI && (IA64_SGI_SN2 || IA64_GENERIC)
 	help
-	  Say Y here if you have an SGI IA64 Altix system.
+	  Say Y here if you want to use the SGI Altix Hotplug
+	  Driver for PCI devices.
 
 	  When in doubt, say N.
 
diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile
index 3e632ff8c717..31a307004b94 100644
--- a/drivers/pci/hotplug/Makefile
+++ b/drivers/pci/hotplug/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_HOTPLUG_PCI_PCIE)		+= pciehp.o
 obj-$(CONFIG_HOTPLUG_PCI_SHPC)		+= shpchp.o
 obj-$(CONFIG_HOTPLUG_PCI_RPA)		+= rpaphp.o
 obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR)	+= rpadlpar_io.o
+obj-$(CONFIG_HOTPLUG_PCI_SGI)		+= sgi_hotplug.o
 
 pci_hotplug-objs	:=	pci_hotplug_core.o
 
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
new file mode 100644
index 000000000000..323041fd41dc
--- /dev/null
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -0,0 +1,611 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2005 Silicon Graphics, Inc. All rights reserved.
+ *
+ * This work was based on the 2.4/2.6 kernel development by Dick Reigner.
+ * Work to add BIOS PROM support was completed by Mike Habeck.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/types.h>
+
+#include <asm/sn/addrs.h>
+#include <asm/sn/l1.h>
+#include <asm/sn/module.h>
+#include <asm/sn/pcibr_provider.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/types.h>
+
+#include "../pci.h"
+#include "pci_hotplug.h"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("SGI (prarit@sgi.com, dickie@sgi.com, habeck@sgi.com)");
+MODULE_DESCRIPTION("SGI Altix Hot Plug PCI Controller Driver");
+
+#define PCIIO_ASIC_TYPE_TIOCA     4
+#define PCI_SLOT_ALREADY_UP       2     /* slot already up */
+#define PCI_SLOT_ALREADY_DOWN     3     /* slot already down */
+#define PCI_L1_ERR                7     /* L1 console command error */
+#define PCI_EMPTY_33MHZ          15     /* empty 33 MHz bus */
+#define PCI_L1_QSIZE            128     /* our L1 message buffer size */
+#define SN_MAX_HP_SLOTS		 32	/* max number of hotplug slots */
+#define SGI_HOTPLUG_PROM_REV	0x0420  /* Min. required PROM version */
+
+/* internal list head */
+static struct list_head sn_hp_list;
+
+/* hotplug_slot struct's private pointer */
+struct slot {
+	int device_num;
+	struct pci_bus *pci_bus;
+	/* this struct for glue internal only */
+	struct hotplug_slot *hotplug_slot;
+	struct list_head hp_list;
+};
+
+struct pcibr_slot_enable_resp {
+	int resp_sub_errno;
+	char resp_l1_msg[PCI_L1_QSIZE + 1];
+};
+
+struct pcibr_slot_disable_resp {
+	int resp_sub_errno;
+	char resp_l1_msg[PCI_L1_QSIZE + 1];
+};
+
+enum sn_pci_req_e {
+	PCI_REQ_SLOT_ELIGIBLE,
+	PCI_REQ_SLOT_DISABLE
+};
+
+static int enable_slot(struct hotplug_slot *slot);
+static int disable_slot(struct hotplug_slot *slot);
+static int get_power_status(struct hotplug_slot *slot, u8 *value);
+
+static struct hotplug_slot_ops sn_hotplug_slot_ops = {
+	.owner                  = THIS_MODULE,
+	.enable_slot            = enable_slot,
+	.disable_slot           = disable_slot,
+	.get_power_status       = get_power_status,
+};
+
+static DECLARE_MUTEX(sn_hotplug_sem);
+
+static int sn_pci_slot_valid(struct pci_bus *pci_bus, int device)
+{
+	struct pcibus_info *pcibus_info;
+	int bricktype;
+	int bus_num;
+
+	pcibus_info = SN_PCIBUS_BUSSOFT_INFO(pci_bus);
+
+	/* Check to see if this is a valid slot on 'pci_bus' */
+	if (!(pcibus_info->pbi_valid_devices & (1 << device)))
+		return -EPERM;
+
+	bricktype = MODULE_GET_BTYPE(pcibus_info->pbi_moduleid);
+	bus_num = pcibus_info->pbi_buscommon.bs_persist_busnum & 0xf;
+
+	/* Do not allow hotplug operations on base I/O cards */
+	if ((bricktype == L1_BRICKTYPE_IX ||  bricktype == L1_BRICKTYPE_IA) &&
+	    (bus_num == 1 && device != 1))
+		return -EPERM;
+
+	return 1;
+}
+
+static int sn_pci_bus_valid(struct pci_bus *pci_bus)
+{
+	struct pcibus_info *pcibus_info;
+	int asic_type;
+	int bricktype;
+
+	pcibus_info = SN_PCIBUS_BUSSOFT_INFO(pci_bus);
+
+	/* Don't register slots hanging off the TIOCA bus */
+	asic_type = pcibus_info->pbi_buscommon.bs_asic_type;
+	if (asic_type == PCIIO_ASIC_TYPE_TIOCA)
+		return -EPERM;
+
+	/* Only register slots in I/O Bricks that support hotplug */
+	bricktype = MODULE_GET_BTYPE(pcibus_info->pbi_moduleid);
+	switch (bricktype) {
+	case L1_BRICKTYPE_IX:
+	case L1_BRICKTYPE_PX:
+	case L1_BRICKTYPE_IA:
+	case L1_BRICKTYPE_PA:
+		return 1;
+		break;
+	default:
+		return -EPERM;
+		break;
+	}
+
+	return -EIO;
+}
+
+static int sn_hp_slot_private_alloc(struct hotplug_slot *bss_hotplug_slot,
+				    struct pci_bus *pci_bus, int device)
+{
+	struct pcibus_info *pcibus_info;
+	struct slot *slot;
+
+	pcibus_info = SN_PCIBUS_BUSSOFT_INFO(pci_bus);
+
+	bss_hotplug_slot->private = kcalloc(1, sizeof(struct slot),
+					    GFP_KERNEL);
+	if (!bss_hotplug_slot->private)
+		return -ENOMEM;
+	slot = (struct slot *)bss_hotplug_slot->private;
+
+	bss_hotplug_slot->name = kmalloc(33, GFP_KERNEL);
+	if (!bss_hotplug_slot->name) {
+		kfree(bss_hotplug_slot->private);
+		return -ENOMEM;
+	}
+
+	slot->device_num = device;
+	slot->pci_bus = pci_bus;
+
+	sprintf(bss_hotplug_slot->name, "module_%c%c%c%c%.2d_b_%d_s_%d",
+		'0'+RACK_GET_CLASS(MODULE_GET_RACK(pcibus_info->pbi_moduleid)),
+		'0'+RACK_GET_GROUP(MODULE_GET_RACK(pcibus_info->pbi_moduleid)),
+		'0'+RACK_GET_NUM(MODULE_GET_RACK(pcibus_info->pbi_moduleid)),
+		MODULE_GET_BTCHAR(pcibus_info->pbi_moduleid),
+		MODULE_GET_BPOS(pcibus_info->pbi_moduleid),
+		((int)pcibus_info->pbi_buscommon.bs_persist_busnum) & 0xf,
+		device + 1);
+
+	slot->hotplug_slot = bss_hotplug_slot;
+	list_add(&slot->hp_list, &sn_hp_list);
+
+	return 0;
+}
+
+static struct hotplug_slot * sn_hp_destroy(void)
+{
+	struct slot *slot;
+	struct list_head *list;
+	struct hotplug_slot *bss_hotplug_slot = NULL;
+
+	list_for_each(list, &sn_hp_list) {
+		slot = list_entry(list, struct slot, hp_list);
+		bss_hotplug_slot = slot->hotplug_slot;
+		list_del(&((struct slot *)bss_hotplug_slot->private)->
+			 hp_list);
+		break;
+	}
+	return bss_hotplug_slot;
+}
+
+static void sn_bus_alloc_data(struct pci_dev *dev)
+{
+	struct list_head *node;
+	struct pci_bus *subordinate_bus;
+	struct pci_dev *child;
+
+	sn_pci_fixup_slot(dev);
+
+	/* Recursively sets up the sn_irq_info structs */
+	if (dev->subordinate) {
+		subordinate_bus = dev->subordinate;
+		list_for_each(node, &subordinate_bus->devices) {
+			child = list_entry(node, struct pci_dev, bus_list);
+			sn_bus_alloc_data(child);
+		}
+	}
+}
+
+static void sn_bus_free_data(struct pci_dev *dev)
+{
+	struct list_head *node;
+	struct pci_bus *subordinate_bus;
+	struct pci_dev *child;
+
+	/* Recursively clean up sn_irq_info structs */
+	if (dev->subordinate) {
+		subordinate_bus = dev->subordinate;
+		list_for_each(node, &subordinate_bus->devices) {
+			child = list_entry(node, struct pci_dev, bus_list);
+			sn_bus_free_data(child);
+		}
+	}
+	sn_pci_unfixup_slot(dev);
+}
+
+static u8 sn_power_status_get(struct hotplug_slot *bss_hotplug_slot)
+{
+	struct slot *slot = (struct slot *)bss_hotplug_slot->private;
+	struct pcibus_info *pcibus_info;
+	u8 retval;
+
+	pcibus_info = SN_PCIBUS_BUSSOFT_INFO(slot->pci_bus);
+	retval = pcibus_info->pbi_enabled_devices & (1 << slot->device_num);
+
+	return retval ? 1 : 0;
+}
+
+static void sn_slot_mark_enable(struct hotplug_slot *bss_hotplug_slot,
+				int device_num)
+{
+	struct slot *slot = (struct slot *)bss_hotplug_slot->private;
+	struct pcibus_info *pcibus_info;
+
+	pcibus_info = SN_PCIBUS_BUSSOFT_INFO(slot->pci_bus);
+	pcibus_info->pbi_enabled_devices |= (1 << device_num);
+}
+
+static void sn_slot_mark_disable(struct hotplug_slot *bss_hotplug_slot,
+				 int device_num)
+{
+	struct slot *slot = (struct slot *)bss_hotplug_slot->private;
+	struct pcibus_info *pcibus_info;
+
+	pcibus_info = SN_PCIBUS_BUSSOFT_INFO(slot->pci_bus);
+	pcibus_info->pbi_enabled_devices &= ~(1 << device_num);
+}
+
+static int sn_slot_enable(struct hotplug_slot *bss_hotplug_slot,
+			  int device_num)
+{
+	struct slot *slot = (struct slot *)bss_hotplug_slot->private;
+	struct pcibus_info *pcibus_info;
+	struct pcibr_slot_enable_resp resp;
+	int rc;
+
+	pcibus_info = SN_PCIBUS_BUSSOFT_INFO(slot->pci_bus);
+
+	/*
+	 * Power-on and initialize the slot in the SN
+	 * PCI infrastructure.
+	 */
+	rc = sal_pcibr_slot_enable(pcibus_info, device_num, &resp);
+
+	if (rc == PCI_SLOT_ALREADY_UP) {
+		dev_dbg(slot->pci_bus->self, "is already active\n");
+		return -EPERM;
+	}
+
+	if (rc == PCI_L1_ERR) {
+		dev_dbg(slot->pci_bus->self,
+			"L1 failure %d with message: %s",
+			resp.resp_sub_errno, resp.resp_l1_msg);
+		return -EPERM;
+	}
+
+	if (rc) {
+		dev_dbg(slot->pci_bus->self,
+			"insert failed with error %d sub-error %d\n",
+			rc, resp.resp_sub_errno);
+		return -EIO;
+	}
+
+	sn_slot_mark_enable(bss_hotplug_slot, device_num);
+
+	return 0;
+}
+
+static int sn_slot_disable(struct hotplug_slot *bss_hotplug_slot,
+			   int device_num, int action)
+{
+	struct slot *slot = (struct slot *)bss_hotplug_slot->private;
+	struct pcibus_info *pcibus_info;
+	struct pcibr_slot_disable_resp resp;
+	int rc;
+
+	pcibus_info = SN_PCIBUS_BUSSOFT_INFO(slot->pci_bus);
+
+	rc = sal_pcibr_slot_disable(pcibus_info, device_num, action, &resp);
+
+	if (action == PCI_REQ_SLOT_ELIGIBLE && rc == PCI_SLOT_ALREADY_DOWN) {
+		dev_dbg(slot->pci_bus->self, "Slot %s already inactive\n");
+		return -ENODEV;
+	}
+
+	if (action == PCI_REQ_SLOT_ELIGIBLE && rc == PCI_EMPTY_33MHZ) {
+		dev_dbg(slot->pci_bus->self,
+			"Cannot remove last 33MHz card\n");
+		return -EPERM;
+	}
+
+	if (action == PCI_REQ_SLOT_ELIGIBLE && rc == PCI_L1_ERR) {
+		dev_dbg(slot->pci_bus->self,
+			"L1 failure %d with message \n%s\n",
+			resp.resp_sub_errno, resp.resp_l1_msg);
+		return -EPERM;
+	}
+
+	if (action == PCI_REQ_SLOT_ELIGIBLE && rc) {
+		dev_dbg(slot->pci_bus->self,
+			"remove failed with error %d sub-error %d\n",
+			rc, resp.resp_sub_errno);
+		return -EIO;
+	}
+
+	if (action == PCI_REQ_SLOT_ELIGIBLE && !rc)
+		return 0;
+
+	if (action == PCI_REQ_SLOT_DISABLE && !rc) {
+		sn_slot_mark_disable(bss_hotplug_slot, device_num);
+		dev_dbg(slot->pci_bus->self, "remove successful\n");
+		return 0;
+	}
+
+	if (action == PCI_REQ_SLOT_DISABLE && rc) {
+		dev_dbg(slot->pci_bus->self,"remove failed rc = %d\n", rc);
+		return rc;
+	}
+
+	return rc;
+}
+
+static int enable_slot(struct hotplug_slot *bss_hotplug_slot)
+{
+	struct slot *slot = (struct slot *)bss_hotplug_slot->private;
+	struct pci_bus *new_bus = NULL;
+	struct pci_dev *dev;
+	int func, num_funcs;
+	int new_ppb = 0;
+	int rc;
+
+	/* Serialize the Linux PCI infrastructure */
+	down(&sn_hotplug_sem);
+
+	/*
+	 * Power-on and initialize the slot in the SN
+	 * PCI infrastructure.
+	 */
+	rc = sn_slot_enable(bss_hotplug_slot, slot->device_num);
+	if (rc) {
+		up(&sn_hotplug_sem);
+		return rc;
+	}
+
+	num_funcs = pci_scan_slot(slot->pci_bus, PCI_DEVFN(slot->device_num+1,
+							   PCI_FUNC(0)));
+	if (!num_funcs) {
+		dev_dbg(slot->pci_bus->self, "no device in slot\n");
+		up(&sn_hotplug_sem);
+		return -ENODEV;
+	}
+
+	sn_pci_controller_fixup(pci_domain_nr(slot->pci_bus),
+				slot->pci_bus->number,
+				slot->pci_bus);
+	/*
+	 * Map SN resources for all functions on the card
+	 * to the Linux PCI interface and tell the drivers
+	 * about them.
+	 */
+	for (func = 0; func < num_funcs;  func++) {
+		dev = pci_get_slot(slot->pci_bus,
+				   PCI_DEVFN(slot->device_num + 1,
+					     PCI_FUNC(func)));
+
+
+		if (dev) {
+			if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+				unsigned char sec_bus;
+				pci_read_config_byte(dev, PCI_SECONDARY_BUS,
+						     &sec_bus);
+				new_bus = pci_add_new_bus(dev->bus, dev,
+							  sec_bus);
+				pci_scan_child_bus(new_bus);
+				sn_pci_controller_fixup(pci_domain_nr(new_bus),
+							new_bus->number,
+							new_bus);
+				new_ppb = 1;
+			}
+			sn_bus_alloc_data(dev);
+			pci_dev_put(dev);
+		}
+	}
+
+	/* Call the driver for the new device */
+	pci_bus_add_devices(slot->pci_bus);
+	/* Call the drivers for the new devices subordinate to PPB */
+	if (new_ppb)
+		pci_bus_add_devices(new_bus);
+
+	up(&sn_hotplug_sem);
+
+	if (rc == 0)
+		dev_dbg(slot->pci_bus->self,
+			"insert operation successful\n");
+	else
+		dev_dbg(slot->pci_bus->self,
+			"insert operation failed rc = %d\n", rc);
+
+	return rc;
+}
+
+static int disable_slot(struct hotplug_slot *bss_hotplug_slot)
+{
+	struct slot *slot = (struct slot *)bss_hotplug_slot->private;
+	struct pci_dev *dev;
+	int func;
+	int rc;
+
+	/* Acquire update access to the bus */
+	down(&sn_hotplug_sem);
+
+	/* is it okay to bring this slot down? */
+	rc = sn_slot_disable(bss_hotplug_slot, slot->device_num,
+			     PCI_REQ_SLOT_ELIGIBLE);
+	if (rc)
+		goto leaving;
+
+	/* Free the SN resources assigned to the Linux device.*/
+	for (func = 0; func < 8;  func++) {
+		dev = pci_get_slot(slot->pci_bus,
+				   PCI_DEVFN(slot->device_num+1,
+				   	     PCI_FUNC(func)));
+		if (dev) {
+			/*
+			 * Some drivers may use dma accesses during the
+			 * driver remove function. We release the sysdata
+			 * areas after the driver remove functions have
+			 * been called.
+			 */
+			sn_bus_store_sysdata(dev);
+			sn_bus_free_data(dev);
+			pci_remove_bus_device(dev);
+			pci_dev_put(dev);
+		}
+	}
+
+	/* free the collected sysdata pointers */
+	sn_bus_free_sysdata();
+
+	/* Deactivate slot */
+	rc = sn_slot_disable(bss_hotplug_slot, slot->device_num,
+			     PCI_REQ_SLOT_DISABLE);
+ leaving:
+	/* Release the bus lock */
+	up(&sn_hotplug_sem);
+
+	return rc;
+}
+
+static int get_power_status(struct hotplug_slot *bss_hotplug_slot, u8 *value)
+{
+	down(&sn_hotplug_sem);
+	*value = sn_power_status_get(bss_hotplug_slot);
+	up(&sn_hotplug_sem);
+	return 0;
+}
+
+static void sn_release_slot(struct hotplug_slot *bss_hotplug_slot)
+{
+	kfree(bss_hotplug_slot->info);
+	kfree(bss_hotplug_slot->name);
+	kfree(bss_hotplug_slot->private);
+	kfree(bss_hotplug_slot);
+}
+
+static int sn_hotplug_slot_register(struct pci_bus *pci_bus)
+{
+	int device;
+	struct hotplug_slot *bss_hotplug_slot;
+	int rc = 0;
+
+	/*
+	 * Currently only four devices are supported,
+	 * in the future there maybe more -- up to 32.
+	 */
+
+	for (device = 0; device < SN_MAX_HP_SLOTS ; device++) {
+		if (sn_pci_slot_valid(pci_bus, device) != 1)
+			continue;
+
+		bss_hotplug_slot = kcalloc(1,sizeof(struct hotplug_slot),
+					   GFP_KERNEL);
+		if (!bss_hotplug_slot) {
+			rc = -ENOMEM;
+			goto alloc_err;
+		}
+
+		bss_hotplug_slot->info =
+			kcalloc(1,sizeof(struct hotplug_slot_info),
+				GFP_KERNEL);
+		if (!bss_hotplug_slot->info) {
+			rc = -ENOMEM;
+			goto alloc_err;
+		}
+
+		if (sn_hp_slot_private_alloc(bss_hotplug_slot,
+					     pci_bus, device)) {
+			rc = -ENOMEM;
+			goto alloc_err;
+		}
+
+		bss_hotplug_slot->ops = &sn_hotplug_slot_ops;
+		bss_hotplug_slot->release = &sn_release_slot;
+
+		rc = pci_hp_register(bss_hotplug_slot);
+		if (rc)
+			goto register_err;
+	}
+	dev_dbg(pci_bus->self, "Registered bus with hotplug\n");
+	return rc;
+
+register_err:
+	dev_dbg(pci_bus->self, "bus failed to register with err = %d\n",
+	        rc);
+
+alloc_err:
+	if (rc == -ENOMEM)
+		dev_dbg(pci_bus->self, "Memory allocation error\n");
+
+	/* destroy THIS element */
+	if (bss_hotplug_slot)
+		sn_release_slot(bss_hotplug_slot);
+
+	/* destroy anything else on the list */
+	while ((bss_hotplug_slot = sn_hp_destroy()))
+		pci_hp_deregister(bss_hotplug_slot);
+
+	return rc;
+}
+
+static int sn_pci_hotplug_init(void)
+{
+	struct pci_bus *pci_bus = NULL;
+	int rc;
+	int registered = 0;
+
+	INIT_LIST_HEAD(&sn_hp_list);
+
+	if (sn_sal_rev() < SGI_HOTPLUG_PROM_REV) {
+		printk(KERN_ERR "%s: PROM version must be greater than 4.05\n",
+		       __FUNCTION__);
+		return -EPERM;
+	}
+
+	while ((pci_bus = pci_find_next_bus(pci_bus))) {
+		if (!pci_bus->sysdata)
+			continue;
+
+		rc = sn_pci_bus_valid(pci_bus);
+		if (rc != 1) {
+			dev_dbg(pci_bus->self, "not a valid hotplug bus\n");
+			continue;
+		}
+		dev_dbg(pci_bus->self, "valid hotplug bus\n");
+
+		rc = sn_hotplug_slot_register(pci_bus);
+		if (!rc)
+			registered = 1;
+		else {
+			registered = 0;
+			break;
+		}
+	}
+
+	return registered == 1 ? 0 : -ENODEV;
+}
+
+static void sn_pci_hotplug_exit(void)
+{
+	struct hotplug_slot *bss_hotplug_slot;
+
+	while ((bss_hotplug_slot = sn_hp_destroy())) {
+		pci_hp_deregister(bss_hotplug_slot);
+	}
+
+	if (!list_empty(&sn_hp_list))
+		printk(KERN_ERR "%s: internal list is not empty\n", __FILE__);
+}
+
+module_init(sn_pci_hotplug_init);
+module_exit(sn_pci_hotplug_exit);
diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index a90a533eba0f..05fa91a31c62 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -379,6 +379,7 @@ exit:
 EXPORT_SYMBOL(pci_dev_present);
 
 EXPORT_SYMBOL(pci_find_bus);
+EXPORT_SYMBOL(pci_find_next_bus);
 EXPORT_SYMBOL(pci_find_device);
 EXPORT_SYMBOL(pci_find_device_reverse);
 EXPORT_SYMBOL(pci_find_slot);
diff --git a/include/asm-ia64/sn/arch.h b/include/asm-ia64/sn/arch.h
index 635fdce854a8..ab827d298569 100644
--- a/include/asm-ia64/sn/arch.h
+++ b/include/asm-ia64/sn/arch.h
@@ -11,6 +11,7 @@
 #ifndef _ASM_IA64_SN_ARCH_H
 #define _ASM_IA64_SN_ARCH_H
 
+#include <linux/numa.h>
 #include <asm/types.h>
 #include <asm/percpu.h>
 #include <asm/sn/types.h>
diff --git a/include/asm-ia64/sn/intr.h b/include/asm-ia64/sn/intr.h
index e51471fb0867..e190dd4213d5 100644
--- a/include/asm-ia64/sn/intr.h
+++ b/include/asm-ia64/sn/intr.h
@@ -9,6 +9,8 @@
 #ifndef _ASM_IA64_SN_INTR_H
 #define _ASM_IA64_SN_INTR_H
 
+#include <linux/rcupdate.h>
+
 #define SGI_UART_VECTOR		(0xe9)
 #define SGI_PCIBR_ERROR		(0x33)
 
@@ -33,7 +35,7 @@
 
 // The SN PROM irq struct
 struct sn_irq_info {
-	struct sn_irq_info *irq_next;	/* sharing irq list	     */
+	struct sn_irq_info *irq_next;	/* deprecated DO NOT USE     */
 	short		irq_nasid;	/* Nasid IRQ is assigned to  */
 	int		irq_slice;	/* slice IRQ is assigned to  */
 	int		irq_cpuid;	/* kernel logical cpuid	     */
@@ -47,6 +49,8 @@ struct sn_irq_info {
 	int		irq_cookie;	/* unique cookie 	     */
 	int		irq_flags;	/* flags */
 	int		irq_share_cnt;	/* num devices sharing IRQ   */
+	struct list_head	list;	/* list of sn_irq_info structs */
+	struct rcu_head		rcu;	/* rcu callback list */
 };
 
 extern void sn_send_IPI_phys(int, long, int, int);
diff --git a/arch/ia64/sn/include/pci/pcibr_provider.h b/include/asm-ia64/sn/pcibr_provider.h
index 1cd291d8badd..f9b8d2164007 100644
--- a/arch/ia64/sn/include/pci/pcibr_provider.h
+++ b/include/asm-ia64/sn/pcibr_provider.h
@@ -8,6 +8,9 @@
 #ifndef _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H
 #define _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H
 
+#include <asm/sn/intr.h>
+#include <asm/sn/pcibus_provider_defs.h>
+
 /* Workarounds */
 #define PV907516 (1 << 1) /* TIOCP: Don't write the write buffer flush reg */
 
@@ -20,7 +23,7 @@
 #define IS_PIC_SOFT(ps)     (ps->pbi_bridge_type == PCIBR_BRIDGETYPE_PIC)
 
 
-/* 
+/*
  * The different PCI Bridge types supported on the SGI Altix platforms
  */
 #define PCIBR_BRIDGETYPE_UNKNOWN       -1
@@ -100,15 +103,16 @@ struct pcibus_info {
 
 	struct ate_resource     pbi_int_ate_resource;
 	uint64_t                pbi_int_ate_size;
-	
+
 	uint64_t                pbi_dir_xbase;
 	char                    pbi_hub_xid;
 
 	uint64_t                pbi_devreg[8];
-	spinlock_t              pbi_lock;
 
 	uint32_t		pbi_valid_devices;
 	uint32_t		pbi_enabled_devices;
+
+	spinlock_t              pbi_lock;
 };
 
 /*
@@ -148,4 +152,8 @@ extern void 		pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info);
 extern int 		pcibr_ate_alloc(struct pcibus_info *, int);
 extern void 		pcibr_ate_free(struct pcibus_info *, int);
 extern void 		ate_write(struct pcibus_info *, int, int, uint64_t);
+extern int sal_pcibr_slot_enable(struct pcibus_info *soft, int device,
+				 void *resp);
+extern int sal_pcibr_slot_disable(struct pcibus_info *soft, int device,
+				  int action, void *resp);
 #endif
diff --git a/include/asm-ia64/sn/pcidev.h b/include/asm-ia64/sn/pcidev.h
index ed4031d80811..49711d00ad04 100644
--- a/include/asm-ia64/sn/pcidev.h
+++ b/include/asm-ia64/sn/pcidev.h
@@ -10,11 +10,11 @@
 
 #include <linux/pci.h>
 
-extern struct sn_irq_info **sn_irq;
-
 #define SN_PCIDEV_INFO(pci_dev) \
         ((struct pcidev_info *)(pci_dev)->sysdata)
 
+#define SN_PCIBUS_BUSSOFT_INFO(pci_bus) \
+	(struct pcibus_info *)((struct pcibus_bussoft *)(PCI_CONTROLLER((pci_bus))->platform_data))
 /*
  * Given a pci_bus, return the sn pcibus_bussoft struct.  Note that
  * this only works for root busses, not for busses represented by PPB's.
@@ -23,6 +23,8 @@ extern struct sn_irq_info **sn_irq;
 #define SN_PCIBUS_BUSSOFT(pci_bus) \
         ((struct pcibus_bussoft *)(PCI_CONTROLLER((pci_bus))->platform_data))
 
+#define SN_PCIBUS_BUSSOFT_INFO(pci_bus) \
+	(struct pcibus_info *)((struct pcibus_bussoft *)(PCI_CONTROLLER((pci_bus))->platform_data))
 /*
  * Given a struct pci_dev, return the sn pcibus_bussoft struct.  Note
  * that this is not equivalent to SN_PCIBUS_BUSSOFT(pci_dev->bus) due
@@ -50,9 +52,17 @@ struct pcidev_info {
 
 	struct sn_irq_info	*pdi_sn_irq_info;
 	struct sn_pcibus_provider *pdi_provider;	/* sn pci ops */
+	struct pci_dev 		*host_pci_dev;		/* host bus link */
 };
 
 extern void sn_irq_fixup(struct pci_dev *pci_dev,
 			 struct sn_irq_info *sn_irq_info);
-
+extern void sn_irq_unfixup(struct pci_dev *pci_dev);
+extern void sn_pci_controller_fixup(int segment, int busnum,
+ 				    struct pci_bus *bus);
+extern void sn_bus_store_sysdata(struct pci_dev *dev);
+extern void sn_bus_free_sysdata(void);
+extern void sn_pci_fixup_slot(struct pci_dev *dev);
+extern void sn_pci_unfixup_slot(struct pci_dev *dev);
+extern void sn_irq_lh_init(void);
 #endif				/* _ASM_IA64_SN_PCI_PCIDEV_H */
diff --git a/arch/ia64/sn/include/pci/pic.h b/include/asm-ia64/sn/pic.h
index fd18acecb1e6..0de82e6b0893 100644
--- a/arch/ia64/sn/include/pci/pic.h
+++ b/include/asm-ia64/sn/pic.h
@@ -15,7 +15,7 @@
  * PIC handles PCI/X busses.  PCI/X requires that the 'bridge' (i.e. PIC)
  * be designated as 'device 0'.   That is a departure from earlier SGI
  * PCI bridges.  Because of that we use config space 1 to access the
- * config space of the first actual PCI device on the bus. 
+ * config space of the first actual PCI device on the bus.
  * Here's what the PIC manual says:
  *
  *     The current PCI-X bus specification now defines that the parent
@@ -29,14 +29,14 @@
  *     correlated Configs pace and our device space 0 <-> 0, 1 <-> 1, etc.
  *     PCI-X requires we start a 1, not 0 and currently the PX brick
  *     does associate our:
- * 
+ *
  *         device 0 with configuration space window 1,
- *         device 1 with configuration space window 2, 
+ *         device 1 with configuration space window 2,
  *         device 2 with configuration space window 3,
  *         device 3 with configuration space window 4.
  *
- * The net effect is that all config space access are off-by-one with 
- * relation to other per-slot accesses on the PIC.   
+ * The net effect is that all config space access are off-by-one with
+ * relation to other per-slot accesses on the PIC.
  * Here is a table that shows some of that:
  *
  *                               Internal Slot#
@@ -65,7 +65,7 @@
  *****************************************************************************/
 
 /* NOTE: PIC WAR. PV#854697.  PIC does not allow writes just to [31:0]
- * of a 64-bit register.  When writing PIC registers, always write the 
+ * of a 64-bit register.  When writing PIC registers, always write the
  * entire 64 bits.
  */
 
@@ -164,7 +164,7 @@ struct pic {
 	uint64_t	clear_all;			/* 0x000{438,,,5F8} */
     } p_buf_count[8];
 
-    
+
     /* 0x000600-0x0009FF -- PCI/X registers */
     uint64_t		p_pcix_bus_err_addr;		/* 0x000600 */
     uint64_t		p_pcix_bus_err_attr;		/* 0x000608 */
diff --git a/include/asm-ia64/sn/sn_cpuid.h b/include/asm-ia64/sn/sn_cpuid.h
index 20b300187669..d2c1d34dcce4 100644
--- a/include/asm-ia64/sn/sn_cpuid.h
+++ b/include/asm-ia64/sn/sn_cpuid.h
@@ -81,11 +81,6 @@
  *
  */
 
-#ifndef CONFIG_SMP
-#define cpu_physical_id(cpuid)			((ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff)
-#endif
-
-
 #define get_node_number(addr)			NASID_GET(addr)
 
 /*
diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index 1455375d2ce4..27976d223186 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -134,43 +134,28 @@
 
 #define SN_SAL_FAKE_PROM			   0x02009999
 
-
 /**
- * sn_sal_rev_major - get the major SGI SAL revision number
- *
- * The SGI PROM stores its version in sal_[ab]_rev_(major|minor).
- * This routine simply extracts the major value from the
- * @ia64_sal_systab structure constructed by ia64_sal_init().
- */
-static inline int
-sn_sal_rev_major(void)
+  * sn_sal_revision - get the SGI SAL revision number
+  *
+  * The SGI PROM stores its version in the sal_[ab]_rev_(major|minor).
+  * This routine simply extracts the major and minor values and
+  * presents them in a u32 format.
+  *
+  * For example, version 4.05 would be represented at 0x0405.
+  */
+static inline u32
+sn_sal_rev(void)
 {
 	struct ia64_sal_systab *systab = efi.sal_systab;
 
-	return (int)systab->sal_b_rev_major;
-}
-
-/**
- * sn_sal_rev_minor - get the minor SGI SAL revision number
- *
- * The SGI PROM stores its version in sal_[ab]_rev_(major|minor).
- * This routine simply extracts the minor value from the
- * @ia64_sal_systab structure constructed by ia64_sal_init().
- */
-static inline int
-sn_sal_rev_minor(void)
-{
-	struct ia64_sal_systab *systab = efi.sal_systab;
-	
-	return (int)systab->sal_b_rev_minor;
+	return (u32)(systab->sal_b_rev_major << 8 | systab->sal_b_rev_minor);
 }
 
 /*
  * Specify the minimum PROM revsion required for this kernel.
  * Note that they're stored in hex format...
  */
-#define SN_SAL_MIN_MAJOR	0x4  /* SN2 kernels need at least PROM 4.0 */
-#define SN_SAL_MIN_MINOR	0x0
+#define SN_SAL_MIN_VERSION	0x0404
 
 /*
  * Returns the master console nasid, if the call fails, return an illegal
diff --git a/arch/ia64/sn/include/pci/tiocp.h b/include/asm-ia64/sn/tiocp.h
index f07c83b2bf6e..5f2489c9d2dd 100644
--- a/arch/ia64/sn/include/pci/tiocp.h
+++ b/include/asm-ia64/sn/tiocp.h
@@ -111,7 +111,7 @@ struct tiocp{
 	uint64_t	clear_all;			/* 0x000{438,,,5F8} */
     } cp_buf_count[8];
 
-    
+
     /* 0x000600-0x0009FF -- PCI/X registers */
     uint64_t		cp_pcix_bus_err_addr;		/* 0x000600 */
     uint64_t		cp_pcix_bus_err_attr;		/* 0x000608 */
author	Linus Torvalds <torvalds@g5.osdl.org>	2005-07-07 10:24:51 -0700
committer	Linus Torvalds <torvalds@g5.osdl.org>	2005-07-07 10:24:51 -0700
commit	043d051615aa5da09a7e44f1edbb69798458e067 (patch)
tree	a0bfe7f6ed6efa4e0eb7f6b9891a0dc3f2fafe57
parent	c101f3136cc98a003d0d16be6fab7d0d950581a6 (diff)
parent	21517a57e838a1fbb7a54a8a77501024e77f83e0 (diff)
download	linux-043d051615aa5da09a7e44f1edbb69798458e067.tar.bz2