497 files changed, 25494 insertions, 5470 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 71d05f481727..766abdab94e7 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1363,6 +1363,11 @@ running once the system is up.
 
 	reserve=	[KNL,BUGS] Force the kernel to ignore some iomem area
 
+	reservetop=	[IA-32]
+			Format: nn[KMG]
+			Reserves a hole at the top of the kernel virtual
+			address space.
+
 	resume=		[SWSUSP]
 			Specify the partition device for software suspend
 
diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index c45daabd3bfe..74563b38ffd9 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -1,7 +1,6 @@
 DCCP protocol
 ============
 
-Last updated: 10 November 2005
 
 Contents
 ========
@@ -42,8 +41,11 @@ Socket options
 DCCP_SOCKOPT_PACKET_SIZE is used for CCID3 to set default packet size for
 calculations.
 
-DCCP_SOCKOPT_SERVICE sets the service. This is compulsory as per the
-specification. If you don't set it you will get EPROTO.
+DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
+service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
+the socket will fall back to 0 (which means that no meaningful service code
+is present). Connecting sockets set at most one service option; for
+listening sockets, multiple service codes can be specified.
 
 Notes
 =====
diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt
index 4117802af0f8..a66bec222b16 100644
--- a/Documentation/power/interface.txt
+++ b/Documentation/power/interface.txt
@@ -52,3 +52,18 @@ suspend image will be as small as possible.
 
 Reading from this file will display the current image size limit, which
 is set to 500 MB by default.
+
+/sys/power/pm_trace controls the code which saves the last PM event point in
+the RTC across reboots, so that you can debug a machine that just hangs
+during suspend (or more commonly, during resume).  Namely, the RTC is only
+used to save the last PM event point if this file contains '1'.  Initially it
+contains '0' which may be changed to '1' by writing a string representing a
+nonzero integer into it.
+
+To use this debugging feature you should attempt to suspend the machine, then
+reboot it and run
+
+	dmesg -s 1000000 | grep 'hash matches'
+
+CAUTION: Using it will cause your machine's real-time (CMOS) clock to be
+set to a random invalid time after a resume.
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 7cee90223d3a..20d0d797f539 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -29,6 +29,7 @@ Currently, these files are in /proc/sys/vm:
 - drop-caches
 - zone_reclaim_mode
 - min_unmapped_ratio
+- min_slab_ratio
 - panic_on_oom
 
 ==============================================================
@@ -138,7 +139,6 @@ This is value ORed together of
 1	= Zone reclaim on
 2	= Zone reclaim writes dirty pages out
 4	= Zone reclaim swaps pages
-8	= Also do a global slab reclaim pass
 
 zone_reclaim_mode is set during bootup to 1 if it is determined that pages
 from remote zones will cause a measurable performance reduction. The
@@ -162,18 +162,13 @@ Allowing regular swap effectively restricts allocations to the local
 node unless explicitly overridden by memory policies or cpuset
 configurations.
 
-It may be advisable to allow slab reclaim if the system makes heavy
-use of files and builds up large slab caches. However, the slab
-shrink operation is global, may take a long time and free slabs
-in all nodes of the system.
-
 =============================================================
 
 min_unmapped_ratio:
 
 This is available only on NUMA kernels.
 
-A percentage of the file backed pages in each zone.  Zone reclaim will only
+A percentage of the total pages in each zone.  Zone reclaim will only
 occur if more than this percentage of pages are file backed and unmapped.
 This is to insure that a minimal amount of local pages is still available for
 file I/O even if the node is overallocated.
@@ -182,6 +177,24 @@ The default is 1 percent.
 
 =============================================================
 
+min_slab_ratio:
+
+This is available only on NUMA kernels.
+
+A percentage of the total pages in each zone.  On Zone reclaim
+(fallback from the local zone occurs) slabs will be reclaimed if more
+than this percentage of pages in a zone are reclaimable slab pages.
+This insures that the slab growth stays under control even in NUMA
+systems that rarely perform global reclaim.
+
+The default is 5 percent.
+
+Note that slab reclaim is triggered in a per zone / node fashion.
+The process of reclaiming slab memory is currently not node specific
+and may not be fast.
+
+=============================================================
+
 panic_on_oom
 
 This enables or disables panic on out-of-memory feature.  If this is set to 1,
diff --git a/MAINTAINERS b/MAINTAINERS
index bd446e251d5b..63673e6513b7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -443,6 +443,23 @@ W:	http://people.redhat.com/sgrubb/audit/
 T:	git kernel.org:/pub/scm/linux/kernel/git/dwmw2/audit-2.6.git
 S:	Maintained
 
+AVR32 ARCHITECTURE
+P:	Atmel AVR32 Support Team
+M:	avr32@atmel.com
+P:	Haavard Skinnemoen
+M:	hskinnemoen@atmel.com
+W:	http://www.atmel.com/products/AVR32/
+W:	http://avr32linux.org/
+W:	http://avrfreaks.net/
+S:	Supported
+
+AVR32/AT32AP MACHINE SUPPORT
+P:	Atmel AVR32 Support Team
+M:	avr32@atmel.com
+P:	Haavard Skinnemoen
+M:	hskinnemoen@atmel.com
+S:	Supported
+
 AX.25 NETWORK LAYER
 P:	Ralf Baechle
 M:	ralf@linux-mips.org
@@ -2031,6 +2048,13 @@ L:	netfilter@lists.netfilter.org
 L:	netfilter-devel@lists.netfilter.org
 S:	Supported
 
+NETLABEL
+P:	Paul Moore
+M:	paul.moore@hp.com
+W:	http://netlabel.sf.net
+L:	netdev@vger.kernel.org
+S:	Supported
+
 NETROM NETWORK LAYER
 P:	Ralf Baechle
 M:	ralf@linux-mips.org
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 213c7850d5fb..2b36afd8e969 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -381,7 +381,7 @@ config ALPHA_EV56
 
 config ALPHA_EV56
 	prompt "EV56 CPU (speed >= 333MHz)?"
-	depends on ALPHA_NORITAKE && ALPHA_PRIMO
+	depends on ALPHA_NORITAKE || ALPHA_PRIMO
 
 config ALPHA_EV56
 	prompt "EV56 CPU (speed >= 400MHz)?"
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 917dad1b74c8..550f4907d613 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -270,7 +270,7 @@ callback_init(void * kernel_end)
 void
 paging_init(void)
 {
-	unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+	unsigned long zones_size[MAX_NR_ZONES] = {0, };
 	unsigned long dma_pfn, high_pfn;
 
 	dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 88a999df0ab3..591fc3187c7f 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -177,7 +177,7 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
 			 * Free the page table, if there was one.
 			 */
 			if ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_TABLE)
-				pte_free_kernel(pmd_page_kernel(pmd));
+				pte_free_kernel(pmd_page_vaddr(pmd));
 		}
 
 		addr += PGDIR_SIZE;
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
new file mode 100644
index 000000000000..5f1694eea842
--- /dev/null
+++ b/arch/avr32/Kconfig
@@ -0,0 +1,196 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+
+mainmenu "Linux Kernel Configuration"
+
+config AVR32
+	bool
+	default y
+	# With EMBEDDED=n, we get lots of stuff automatically selected
+	# that we usually don't need on AVR32.
+	select EMBEDDED
+	help
+	  AVR32 is a high-performance 32-bit RISC microprocessor core,
+	  designed for cost-sensitive embedded applications, with particular
+	  emphasis on low power consumption and high code density.
+
+	  There is an AVR32 Linux project with a web page at
+	  http://avr32linux.org/.
+
+config UID16
+	bool
+
+config GENERIC_HARDIRQS
+	bool
+	default y
+
+config HARDIRQS_SW_RESEND
+	bool
+	default y
+
+config GENERIC_IRQ_PROBE
+	bool
+	default y
+
+config RWSEM_GENERIC_SPINLOCK
+	bool
+	default y
+
+config GENERIC_TIME
+	bool
+	default y
+
+config RWSEM_XCHGADD_ALGORITHM
+	bool
+
+config GENERIC_BUST_SPINLOCK
+	bool
+
+config GENERIC_HWEIGHT
+	bool
+	default y
+
+config GENERIC_CALIBRATE_DELAY
+	bool
+	default y
+
+source "init/Kconfig"
+
+menu "System Type and features"
+
+config SUBARCH_AVR32B
+	bool
+config MMU
+	bool
+config PERFORMANCE_COUNTERS
+	bool
+
+config PLATFORM_AT32AP
+	bool
+	select SUBARCH_AVR32B
+	select MMU
+	select PERFORMANCE_COUNTERS
+
+choice
+	prompt "AVR32 CPU type"
+	default CPU_AT32AP7000
+
+config CPU_AT32AP7000
+	bool "AT32AP7000"
+	select PLATFORM_AT32AP
+endchoice
+
+#
+# CPU Daughterboards for ATSTK1000
+config BOARD_ATSTK1002
+	bool
+
+choice
+	prompt "AVR32 board type"
+	default BOARD_ATSTK1000
+
+config BOARD_ATSTK1000
+	bool "ATSTK1000 evaluation board"
+	select BOARD_ATSTK1002 if CPU_AT32AP7000
+endchoice
+
+choice
+	prompt "Boot loader type"
+	default LOADER_U_BOOT
+
+config	LOADER_U_BOOT
+	bool "U-Boot (or similar) bootloader"
+endchoice
+
+config LOAD_ADDRESS
+	hex
+	default 0x10000000 if LOADER_U_BOOT=y && CPU_AT32AP7000=y
+
+config ENTRY_ADDRESS
+	hex
+	default 0x90000000 if LOADER_U_BOOT=y && CPU_AT32AP7000=y
+
+config PHYS_OFFSET
+	hex
+	default 0x10000000 if CPU_AT32AP7000=y
+
+source "kernel/Kconfig.preempt"
+
+config HAVE_ARCH_BOOTMEM_NODE
+	bool
+	default n
+
+config ARCH_HAVE_MEMORY_PRESENT
+	bool
+	default n
+
+config NEED_NODE_MEMMAP_SIZE
+	bool
+	default n
+
+config ARCH_FLATMEM_ENABLE
+	bool
+	default y
+
+config ARCH_DISCONTIGMEM_ENABLE
+	bool
+	default n
+
+config ARCH_SPARSEMEM_ENABLE
+	bool
+	default n
+
+source "mm/Kconfig"
+
+config OWNERSHIP_TRACE
+	bool "Ownership trace support"
+	default y
+	help
+	  Say Y to generate an Ownership Trace message on every context switch,
+	  enabling Nexus-compliant debuggers to keep track of the PID of the
+	  currently executing task.
+
+# FPU emulation goes here
+
+source "kernel/Kconfig.hz"
+
+config CMDLINE
+	string "Default kernel command line"
+	default ""
+	help
+	  If you don't have a boot loader capable of passing a command line string
+	  to the kernel, you may specify one here. As a minimum, you should specify
+	  the memory size and the root device (e.g., mem=8M, root=/dev/nfs).
+
+endmenu
+
+menu "Bus options"
+
+config PCI
+	bool
+
+source "drivers/pci/Kconfig"
+
+source "drivers/pcmcia/Kconfig"
+
+endmenu
+
+menu "Executable file formats"
+source "fs/Kconfig.binfmt"
+endmenu
+
+source "net/Kconfig"
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/avr32/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
diff --git a/arch/avr32/Kconfig.debug b/arch/avr32/Kconfig.debug
new file mode 100644
index 000000000000..64ace00fe6cb
--- /dev/null
+++ b/arch/avr32/Kconfig.debug
@@ -0,0 +1,19 @@
+menu "Kernel hacking"
+
+config TRACE_IRQFLAGS_SUPPORT
+	bool
+	default y
+
+source "lib/Kconfig.debug"
+
+config KPROBES
+	bool "Kprobes"
+	depends on DEBUG_KERNEL
+	help
+	  Kprobes allows you to trap at almost any kernel address and
+          execute a callback function.  register_kprobe() establishes
+          a probepoint and specifies the callback.  Kprobes is useful
+          for kernel debugging, non-intrusive instrumentation and testing.
+          If in doubt, say "N".
+
+endmenu
diff --git a/arch/avr32/Makefile b/arch/avr32/Makefile
new file mode 100644
index 000000000000..cefc95a73980
--- /dev/null
+++ b/arch/avr32/Makefile
@@ -0,0 +1,84 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2004-2006 Atmel Corporation.
+
+# Default target when executing plain make
+.PHONY: all
+all: uImage vmlinux.elf linux.lst
+
+KBUILD_DEFCONFIG	:= atstk1002_defconfig
+
+CFLAGS		+= -pipe -fno-builtin -mno-pic
+AFLAGS		+= -mrelax -mno-pic
+CFLAGS_MODULE	+= -mno-relax
+LDFLAGS_vmlinux	+= --relax
+
+cpuflags-$(CONFIG_CPU_AP7000)	+= -mcpu=ap7000
+
+CFLAGS		+= $(cpuflags-y)
+AFLAGS		+= $(cpuflags-y)
+
+CHECKFLAGS	+= -D__avr32__
+
+LIBGCC		:= $(shell $(CC) $(CFLAGS) -print-libgcc-file-name)
+
+head-$(CONFIG_LOADER_U_BOOT)		+= arch/avr32/boot/u-boot/head.o
+head-y					+= arch/avr32/kernel/head.o
+core-$(CONFIG_PLATFORM_AT32AP)		+= arch/avr32/mach-at32ap/
+core-$(CONFIG_BOARD_ATSTK1000)		+= arch/avr32/boards/atstk1000/
+core-$(CONFIG_LOADER_U_BOOT)		+= arch/avr32/boot/u-boot/
+core-y					+= arch/avr32/kernel/
+core-y					+= arch/avr32/mm/
+libs-y					+= arch/avr32/lib/ #$(LIBGCC)
+
+archincdir-$(CONFIG_PLATFORM_AT32AP)	:= arch-at32ap
+
+include/asm-avr32/.arch: $(wildcard include/config/platform/*.h) include/config/auto.conf
+	@echo '  SYMLINK include/asm-avr32/arch -> include/asm-avr32/$(archincdir-y)'
+ifneq ($(KBUILD_SRC),)
+	$(Q)mkdir -p include/asm-avr32
+	$(Q)ln -fsn $(srctree)/include/asm-avr32/$(archincdir-y) include/asm-avr32/arch
+else
+	$(Q)ln -fsn $(archincdir-y) include/asm-avr32/arch
+endif
+	@touch $@
+
+archprepare: include/asm-avr32/.arch
+
+BOOT_TARGETS := vmlinux.elf vmlinux.bin uImage uImage.srec
+
+.PHONY: $(BOOT_TARGETS) install
+
+boot := arch/$(ARCH)/boot/images
+
+             KBUILD_IMAGE := $(boot)/uImage
+vmlinux.elf: KBUILD_IMAGE := $(boot)/vmlinux.elf
+vmlinux.cso: KBUILD_IMAGE := $(boot)/vmlinux.cso
+uImage.srec: KBUILD_IMAGE := $(boot)/uImage.srec
+uImage:      KBUILD_IMAGE := $(boot)/uImage
+
+quiet_cmd_listing = LST     $@
+      cmd_listing = avr32-linux-objdump $(OBJDUMPFLAGS) -lS $< > $@
+quiet_cmd_disasm  = DIS     $@
+      cmd_disasm  = avr32-linux-objdump $(OBJDUMPFLAGS) -d $< > $@
+
+vmlinux.elf vmlinux.bin uImage.srec uImage vmlinux.cso: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+install: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
+
+linux.s: vmlinux
+	$(call if_changed,disasm)
+
+linux.lst: vmlinux
+	$(call if_changed,listing)
+
+define archhelp
+  @echo '* vmlinux.elf		- ELF image with load address 0'
+  @echo '  vmlinux.cso		- PathFinder CSO image'
+  @echo '  uImage		- Create a bootable image for U-Boot'
+endef
diff --git a/arch/avr32/boards/atstk1000/Makefile b/arch/avr32/boards/atstk1000/Makefile
new file mode 100644
index 000000000000..df9499480530
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/Makefile
@@ -0,0 +1,2 @@
+obj-y				+= setup.o spi.o flash.o
+obj-$(CONFIG_BOARD_ATSTK1002)	+= atstk1002.o
diff --git a/arch/avr32/boards/atstk1000/atstk1002.c b/arch/avr32/boards/atstk1000/atstk1002.c
new file mode 100644
index 000000000000..49164e9aadd6
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/atstk1002.c
@@ -0,0 +1,37 @@
+/*
+ * ATSTK1002 daughterboard-specific init code
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+
+#include <asm/arch/board.h>
+
+struct eth_platform_data __initdata eth0_data = {
+	.valid		= 1,
+	.mii_phy_addr	= 0x10,
+	.is_rmii	= 0,
+	.hw_addr	= { 0x6a, 0x87, 0x71, 0x14, 0xcd, 0xcb },
+};
+
+extern struct lcdc_platform_data atstk1000_fb0_data;
+
+static int __init atstk1002_init(void)
+{
+	at32_add_system_devices();
+
+	at32_add_device_usart(1);	/* /dev/ttyS0 */
+	at32_add_device_usart(2);	/* /dev/ttyS1 */
+	at32_add_device_usart(3);	/* /dev/ttyS2 */
+
+	at32_add_device_eth(0, &eth0_data);
+	at32_add_device_spi(0);
+	at32_add_device_lcdc(0, &atstk1000_fb0_data);
+
+	return 0;
+}
+postcore_initcall(atstk1002_init);
diff --git a/arch/avr32/boards/atstk1000/flash.c b/arch/avr32/boards/atstk1000/flash.c
new file mode 100644
index 000000000000..aac4300cca12
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/flash.c
@@ -0,0 +1,95 @@
+/*
+ * ATSTK1000 board-specific flash initialization
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
+
+#include <asm/arch/smc.h>
+
+static struct smc_config flash_config __initdata = {
+	.ncs_read_setup		= 0,
+	.nrd_setup		= 40,
+	.ncs_write_setup	= 0,
+	.nwe_setup		= 10,
+
+	.ncs_read_pulse		= 80,
+	.nrd_pulse		= 40,
+	.ncs_write_pulse	= 65,
+	.nwe_pulse		= 55,
+
+	.read_cycle		= 120,
+	.write_cycle		= 120,
+
+	.bus_width		= 2,
+	.nrd_controlled		= 1,
+	.nwe_controlled		= 1,
+	.byte_write		= 1,
+};
+
+static struct mtd_partition flash_parts[] = {
+	{
+		.name           = "u-boot",
+		.offset         = 0x00000000,
+		.size           = 0x00020000,           /* 128 KiB */
+		.mask_flags     = MTD_WRITEABLE,
+	},
+	{
+		.name           = "root",
+		.offset         = 0x00020000,
+		.size           = 0x007d0000,
+	},
+	{
+		.name           = "env",
+		.offset         = 0x007f0000,
+		.size           = 0x00010000,
+		.mask_flags     = MTD_WRITEABLE,
+	},
+};
+
+static struct physmap_flash_data flash_data = {
+	.width		= 2,
+	.nr_parts	= ARRAY_SIZE(flash_parts),
+	.parts		= flash_parts,
+};
+
+static struct resource flash_resource = {
+	.start		= 0x00000000,
+	.end		= 0x007fffff,
+	.flags		= IORESOURCE_MEM,
+};
+
+static struct platform_device flash_device = {
+	.name		= "physmap-flash",
+	.id		= 0,
+	.resource	= &flash_resource,
+	.num_resources	= 1,
+	.dev		= {
+		.platform_data = &flash_data,
+	},
+};
+
+/* This needs to be called after the SMC has been initialized */
+static int __init atstk1000_flash_init(void)
+{
+	int ret;
+
+	ret = smc_set_configuration(0, &flash_config);
+	if (ret < 0) {
+		printk(KERN_ERR "atstk1000: failed to set NOR flash timing\n");
+		return ret;
+	}
+
+	platform_device_register(&flash_device);
+
+	return 0;
+}
+device_initcall(atstk1000_flash_init);
diff --git a/arch/avr32/boards/atstk1000/setup.c b/arch/avr32/boards/atstk1000/setup.c
new file mode 100644
index 000000000000..191ab85de9a3
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/setup.c
@@ -0,0 +1,59 @@
+/*
+ * ATSTK1000 board-specific setup code.
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/bootmem.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/linkage.h>
+
+#include <asm/setup.h>
+
+#include <asm/arch/board.h>
+
+/* Initialized by bootloader-specific startup code. */
+struct tag *bootloader_tags __initdata;
+
+struct lcdc_platform_data __initdata atstk1000_fb0_data;
+
+asmlinkage void __init board_early_init(void)
+{
+	extern void sdram_init(void);
+
+#ifdef CONFIG_LOADER_STANDALONE
+	sdram_init();
+#endif
+}
+
+void __init board_setup_fbmem(unsigned long fbmem_start,
+			      unsigned long fbmem_size)
+{
+	if (!fbmem_size)
+		return;
+
+	if (!fbmem_start) {
+		void *fbmem;
+
+		fbmem = alloc_bootmem_low_pages(fbmem_size);
+		fbmem_start = __pa(fbmem);
+	} else {
+		pg_data_t *pgdat;
+
+		for_each_online_pgdat(pgdat) {
+			if (fbmem_start >= pgdat->bdata->node_boot_start
+			    && fbmem_start <= pgdat->bdata->node_low_pfn)
+				reserve_bootmem_node(pgdat, fbmem_start,
+						     fbmem_size);
+		}
+	}
+
+	printk("%luKiB framebuffer memory at address 0x%08lx\n",
+	       fbmem_size >> 10, fbmem_start);
+	atstk1000_fb0_data.fbmem_start = fbmem_start;
+	atstk1000_fb0_data.fbmem_size = fbmem_size;
+}
diff --git a/arch/avr32/boards/atstk1000/spi.c b/arch/avr32/boards/atstk1000/spi.c
new file mode 100644
index 000000000000..567726c82c6e
--- /dev/null
+++ b/arch/avr32/boards/atstk1000/spi.c
@@ -0,0 +1,27 @@
+/*
+ * ATSTK1000 SPI devices
+ *
+ * Copyright (C) 2005 Atmel Norway
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/device.h>
+#include <linux/spi/spi.h>
+
+static struct spi_board_info spi_board_info[] __initdata = {
+	{
+		.modalias	= "ltv350qv",
+		.max_speed_hz	= 16000000,
+		.bus_num	= 0,
+		.chip_select	= 1,
+	},
+};
+
+static int board_init_spi(void)
+{
+	spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info));
+	return 0;
+}
+arch_initcall(board_init_spi);
diff --git a/arch/avr32/boot/images/Makefile b/arch/avr32/boot/images/Makefile
new file mode 100644
index 000000000000..ccd74eeecec3
--- /dev/null
+++ b/arch/avr32/boot/images/Makefile
@@ -0,0 +1,62 @@
+#
+# Copyright (C) 2004-2006 Atmel Corporation
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+
+MKIMAGE		:= $(srctree)/scripts/mkuboot.sh
+
+extra-y		:= vmlinux.bin vmlinux.gz
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
+
+quiet_cmd_uimage = UIMAGE $@
+      cmd_uimage = $(CONFIG_SHELL) $(MKIMAGE) -A avr32 -O linux -T kernel	\
+		-C gzip -a $(CONFIG_LOAD_ADDRESS) -e $(CONFIG_ENTRY_ADDRESS)	\
+		-n 'Linux-$(KERNELRELEASE)' -d $< $@
+
+targets += uImage uImage.srec
+$(obj)/uImage: $(obj)/vmlinux.gz
+	$(call if_changed,uimage)
+	@echo '  Image $@ is ready'
+
+OBJCOPYFLAGS_uImage.srec := -I binary -O srec
+$(obj)/uImage.srec: $(obj)/uImage
+	$(call if_changed,objcopy)
+
+OBJCOPYFLAGS_vmlinux.elf := --change-section-lma .text-0x80000000 \
+			    --change-section-lma __ex_table-0x80000000 \
+			    --change-section-lma .rodata-0x80000000 \
+			    --change-section-lma .data-0x80000000 \
+			    --change-section-lma .init-0x80000000 \
+			    --change-section-lma .bss-0x80000000 \
+			    --change-section-lma .initrd-0x80000000 \
+			    --change-section-lma __param-0x80000000 \
+			    --change-section-lma __ksymtab-0x80000000 \
+			    --change-section-lma __ksymtab_gpl-0x80000000 \
+			    --change-section-lma __kcrctab-0x80000000 \
+			    --change-section-lma __kcrctab_gpl-0x80000000 \
+			    --change-section-lma __ksymtab_strings-0x80000000 \
+			    --change-section-lma .got-0x80000000 \
+			    --set-start 0xa0000000
+$(obj)/vmlinux.elf: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+quiet_cmd_sfdwarf = SFDWARF $@
+      cmd_sfdwarf = sfdwarf $< TO $@ GNUAVR IW $(SFDWARF_FLAGS) > $(obj)/sfdwarf.log
+
+$(obj)/vmlinux.cso: $(obj)/vmlinux.elf FORCE
+	$(call if_changed,sfdwarf)
+
+install: $(BOOTIMAGE)
+	sh $(srctree)/install-kernel.sh $<
+
+# Generated files to be removed upon make clean
+clean-files	:= vmlinux* uImage uImage.srec
diff --git a/arch/avr32/boot/u-boot/Makefile b/arch/avr32/boot/u-boot/Makefile
new file mode 100644
index 000000000000..125ddc96c275
--- /dev/null
+++ b/arch/avr32/boot/u-boot/Makefile
@@ -0,0 +1,3 @@
+extra-y		:= head.o
+
+obj-y		:= empty.o
diff --git a/arch/avr32/boot/u-boot/empty.S b/arch/avr32/boot/u-boot/empty.S
new file mode 100644
index 000000000000..8ac91a5f12f0
--- /dev/null
+++ b/arch/avr32/boot/u-boot/empty.S
@@ -0,0 +1 @@
+/* Empty file */
diff --git a/arch/avr32/boot/u-boot/head.S b/arch/avr32/boot/u-boot/head.S
new file mode 100644
index 000000000000..4488fa27fe94
--- /dev/null
+++ b/arch/avr32/boot/u-boot/head.S
@@ -0,0 +1,60 @@
+/*
+ * Startup code for use with the u-boot bootloader.
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/setup.h>
+
+	/*
+	 * The kernel is loaded where we want it to be and all caches
+	 * have just been flushed. We get two parameters from u-boot:
+	 *
+	 * r12 contains a magic number (ATAG_MAGIC)
+	 * r11 points to a tag table providing information about
+	 *     the system.
+	 */
+	.section .init.text,"ax"
+	.global _start
+_start:
+	/* Check if the boot loader actually provided a tag table */
+	lddpc	r0, magic_number
+	cp.w	r12, r0
+	brne	no_tag_table
+
+	/* Initialize .bss */
+	lddpc	r2, bss_start_addr
+	lddpc   r3, end_addr
+	mov	r0, 0
+	mov	r1, 0
+1:      st.d    r2++, r0
+	cp      r2, r3
+	brlo    1b
+
+	/*
+	 * Save the tag table address for later use. This must be done
+	 * _after_ .bss has been initialized...
+	 */
+	lddpc	r0, tag_table_addr
+	st.w	r0[0], r11
+
+	/* Jump to loader-independent setup code */
+	rjmp	kernel_entry
+
+	.align	2
+magic_number:
+	.long	ATAG_MAGIC
+tag_table_addr:
+	.long	bootloader_tags
+bss_start_addr:
+	.long   __bss_start
+end_addr:
+	.long   _end
+
+no_tag_table:
+	sub	r12, pc, (. - 2f)
+	bral	panic
+2:	.asciz	"Boot loader didn't provide correct magic number\n"
diff --git a/arch/avr32/configs/atstk1002_defconfig b/arch/avr32/configs/atstk1002_defconfig
new file mode 100644
index 000000000000..1d22255009fd
--- /dev/null
+++ b/arch/avr32/configs/atstk1002_defconfig
@@ -0,0 +1,754 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.18-rc1
+# Tue Jul 11 12:41:36 2006
+#
+CONFIG_AVR32=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+# CONFIG_SYSVIPC is not set
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+# CONFIG_IKCONFIG is not set
+# CONFIG_RELAY is not set
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EMBEDDED=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+# CONFIG_BASE_FULL is not set
+# CONFIG_FUTEX is not set
+# CONFIG_EPOLL is not set
+CONFIG_SHMEM=y
+# CONFIG_SLAB is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=1
+CONFIG_SLOB=y
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+# CONFIG_KMOD is not set
+
+#
+# Block layer
+#
+# CONFIG_BLK_DEV_IO_TRACE is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+# CONFIG_IOSCHED_AS is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+CONFIG_DEFAULT_NOOP=y
+CONFIG_DEFAULT_IOSCHED="noop"
+
+#
+# System Type and features
+#
+CONFIG_SUBARCH_AVR32B=y
+CONFIG_MMU=y
+CONFIG_PERFORMANCE_COUNTERS=y
+CONFIG_PLATFORM_AT32AP=y
+CONFIG_CPU_AT32AP7000=y
+CONFIG_BOARD_ATSTK1002=y
+CONFIG_BOARD_ATSTK1000=y
+CONFIG_LOADER_U_BOOT=y
+CONFIG_LOAD_ADDRESS=0x10000000
+CONFIG_ENTRY_ADDRESS=0x90000000
+CONFIG_PHYS_OFFSET=0x10000000
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+# CONFIG_HAVE_ARCH_BOOTMEM_NODE is not set
+# CONFIG_ARCH_HAVE_MEMORY_PRESENT is not set
+# CONFIG_NEED_NODE_MEMMAP_SIZE is not set
+CONFIG_ARCH_FLATMEM_ENABLE=y
+# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
+# CONFIG_ARCH_SPARSEMEM_ENABLE is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+# CONFIG_OWNERSHIP_TRACE is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_CMDLINE=""
+
+#
+# Bus options
+#
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+# CONFIG_NETDEBUG is not set
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+# CONFIG_IPV6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+
+#
+# DCCP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_DCCP is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+
+#
+# TIPC Configuration (EXPERIMENTAL)
+#
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_NET_TCPPROBE is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_IEEE80211 is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_SYS_HYPERVISOR is not set
+
+#
+# Connector - unified userspace <-> kernelspace linker
+#
+# CONFIG_CONNECTOR is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=m
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=m
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+
+#
+# I2O device support
+#
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=y
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+CONFIG_TUN=m
+
+#
+# PHY device support
+#
+# CONFIG_PHYLIB is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+CONFIG_MACB=y
+
+#
+# Ethernet (1000 Mbit)
+#
+
+#
+# Ethernet (10000 Mbit)
+#
+
+#
+# Token Ring devices
+#
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+CONFIG_PPP=m
+# CONFIG_PPP_MULTILINK is not set
+# CONFIG_PPP_FILTER is not set
+CONFIG_PPP_ASYNC=m
+# CONFIG_PPP_SYNC_TTY is not set
+CONFIG_PPP_DEFLATE=m
+# CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPP_MPPE is not set
+# CONFIG_PPPOE is not set
+# CONFIG_SLIP is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_AT91=y
+CONFIG_SERIAL_AT91_CONSOLE=y
+# CONFIG_SERIAL_AT91_TTYAT is not set
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_LEGACY_PTYS is not set
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_RTC is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_RAW_DRIVER is not set
+
+#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# SPI support
+#
+CONFIG_SPI=y
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_MASTER=y
+
+#
+# SPI Master Controller Drivers
+#
+CONFIG_SPI_ATMEL=m
+# CONFIG_SPI_BITBANG is not set
+
+#
+# SPI Protocol Masters
+#
+
+#
+# Dallas's 1-wire bus
+#
+
+#
+# Hardware Monitoring support
+#
+# CONFIG_HWMON is not set
+# CONFIG_HWMON_VID is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+CONFIG_VIDEO_V4L2=y
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FIRMWARE_EDID is not set
+CONFIG_FB=m
+CONFIG_FB_CFB_FILLRECT=m
+CONFIG_FB_CFB_COPYAREA=m
+CONFIG_FB_CFB_IMAGEBLIT=m
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+CONFIG_FB_SIDSA=m
+CONFIG_FB_SIDSA_DEFAULT_BPP=24
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_VIRTUAL is not set
+
+#
+# Logo configuration
+#
+# CONFIG_LOGO is not set
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
+CONFIG_LCD_CLASS_DEVICE=m
+CONFIG_LCD_DEVICE=y
+CONFIG_LCD_LTV350QV=m
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+# CONFIG_USB_ARCH_HAS_HCD is not set
+# CONFIG_USB_ARCH_HAS_OHCI is not set
+# CONFIG_USB_ARCH_HAS_EHCI is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# LED devices
+#
+# CONFIG_NEW_LEDS is not set
+
+#
+# LED drivers
+#
+
+#
+# LED Triggers
+#
+
+#
+# InfiniBand support
+#
+
+#
+# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
+#
+
+#
+# Real Time Clock
+#
+# CONFIG_RTC_CLASS is not set
+
+#
+# DMA Engine support
+#
+# CONFIG_DMA_ENGINE is not set
+
+#
+# DMA Clients
+#
+
+#
+# DMA Devices
+#
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+CONFIG_MINIX_FS=m
+CONFIG_ROMFS_FS=m
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+CONFIG_CONFIGFS_FS=m
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+# CONFIG_NFSD is not set
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+CONFIG_CIFS=m
+# CONFIG_CIFS_STATS is not set
+# CONFIG_CIFS_WEAK_PW_HASH is not set
+# CONFIG_CIFS_XATTR is not set
+# CONFIG_CIFS_DEBUG2 is not set
+# CONFIG_CIFS_EXPERIMENTAL is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+# CONFIG_9P_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=m
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=m
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+CONFIG_NLS_CODEPAGE_850=m
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=m
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+CONFIG_NLS_UTF8=m
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_PRINTK_TIME=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_RWSEMS is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_DEBUG_VM is not set
+CONFIG_FRAME_POINTER=y
+# CONFIG_UNWIND_INFO is not set
+CONFIG_FORCED_INLINING=y
+# CONFIG_RCU_TORTURE_TEST is not set
+CONFIG_KPROBES=y
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+# CONFIG_CRYPTO is not set
+
+#
+# Hardware crypto devices
+#
+
+#
+# Library routines
+#
+CONFIG_CRC_CCITT=m
+# CONFIG_CRC16 is not set
+CONFIG_CRC32=m
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=m
+CONFIG_ZLIB_DEFLATE=m
diff --git a/arch/avr32/kernel/Makefile b/arch/avr32/kernel/Makefile
new file mode 100644
index 000000000000..90e5afff54a2
--- /dev/null
+++ b/arch/avr32/kernel/Makefile
@@ -0,0 +1,18 @@
+#
+# Makefile for the Linux/AVR32 kernel.
+#
+
+extra-y				:= head.o vmlinux.lds
+
+obj-$(CONFIG_SUBARCH_AVR32B)	+= entry-avr32b.o
+obj-y				+= syscall_table.o syscall-stubs.o irq.o
+obj-y				+= setup.o traps.o semaphore.o ptrace.o
+obj-y				+= signal.o sys_avr32.o process.o time.o
+obj-y				+= init_task.o switch_to.o cpu.o
+obj-$(CONFIG_MODULES)		+= module.o avr32_ksyms.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o
+
+USE_STANDARD_AS_RULE		:= true
+
+%.lds: %.lds.c FORCE
+	$(call if_changed_dep,cpp_lds_S)
diff --git a/arch/avr32/kernel/asm-offsets.c b/arch/avr32/kernel/asm-offsets.c
new file mode 100644
index 000000000000..97d865865667
--- /dev/null
+++ b/arch/avr32/kernel/asm-offsets.c
@@ -0,0 +1,25 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+
+#include <linux/thread_info.h>
+
+#define DEFINE(sym, val) \
+        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : : )
+
+#define OFFSET(sym, str, mem) \
+        DEFINE(sym, offsetof(struct str, mem));
+
+void foo(void)
+{
+	OFFSET(TI_task, thread_info, task);
+	OFFSET(TI_exec_domain, thread_info, exec_domain);
+	OFFSET(TI_flags, thread_info, flags);
+	OFFSET(TI_cpu, thread_info, cpu);
+	OFFSET(TI_preempt_count, thread_info, preempt_count);
+	OFFSET(TI_restart_block, thread_info, restart_block);
+}
diff --git a/arch/avr32/kernel/avr32_ksyms.c b/arch/avr32/kernel/avr32_ksyms.c
new file mode 100644
index 000000000000..04f767a272b7
--- /dev/null
+++ b/arch/avr32/kernel/avr32_ksyms.c
@@ -0,0 +1,55 @@
+/*
+ * Export AVR32-specific functions for loadable modules.
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+
+#include <asm/checksum.h>
+#include <asm/uaccess.h>
+#include <asm/delay.h>
+
+/*
+ * GCC functions
+ */
+extern unsigned long long __avr32_lsl64(unsigned long long u, unsigned long b);
+extern unsigned long long __avr32_lsr64(unsigned long long u, unsigned long b);
+extern unsigned long long __avr32_asr64(unsigned long long u, unsigned long b);
+EXPORT_SYMBOL(__avr32_lsl64);
+EXPORT_SYMBOL(__avr32_lsr64);
+EXPORT_SYMBOL(__avr32_asr64);
+
+/*
+ * String functions
+ */
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memcpy);
+
+/*
+ * Userspace access stuff.
+ */
+EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(copy_to_user);
+EXPORT_SYMBOL(__copy_user);
+EXPORT_SYMBOL(strncpy_from_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(clear_user);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(csum_partial_copy_generic);
+
+/* Delay loops (lib/delay.S) */
+EXPORT_SYMBOL(__ndelay);
+EXPORT_SYMBOL(__udelay);
+EXPORT_SYMBOL(__const_udelay);
+
+/* Bit operations (lib/findbit.S) */
+EXPORT_SYMBOL(find_first_zero_bit);
+EXPORT_SYMBOL(find_next_zero_bit);
+EXPORT_SYMBOL(find_first_bit);
+EXPORT_SYMBOL(find_next_bit);
+EXPORT_SYMBOL(generic_find_next_zero_le_bit);
diff --git a/arch/avr32/kernel/cpu.c b/arch/avr32/kernel/cpu.c
new file mode 100644
index 000000000000..342452ba2049
--- /dev/null
+++ b/arch/avr32/kernel/cpu.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/sysdev.h>
+#include <linux/seq_file.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
+#include <linux/param.h>
+#include <linux/errno.h>
+
+#include <asm/setup.h>
+#include <asm/sysreg.h>
+
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+
+#ifdef CONFIG_PERFORMANCE_COUNTERS
+
+/*
+ * XXX: If/when a SMP-capable implementation of AVR32 will ever be
+ * made, we must make sure that the code executes on the correct CPU.
+ */
+static ssize_t show_pc0event(struct sys_device *dev, char *buf)
+{
+	unsigned long pccr;
+
+	pccr = sysreg_read(PCCR);
+	return sprintf(buf, "0x%lx\n", (pccr >> 12) & 0x3f);
+}
+static ssize_t store_pc0event(struct sys_device *dev, const char *buf,
+			      size_t count)
+{
+	unsigned long val;
+	char *endp;
+
+	val = simple_strtoul(buf, &endp, 0);
+	if (endp == buf || val > 0x3f)
+		return -EINVAL;
+	val = (val << 12) | (sysreg_read(PCCR) & 0xfffc0fff);
+	sysreg_write(PCCR, val);
+	return count;
+}
+static ssize_t show_pc0count(struct sys_device *dev, char *buf)
+{
+	unsigned long pcnt0;
+
+	pcnt0 = sysreg_read(PCNT0);
+	return sprintf(buf, "%lu\n", pcnt0);
+}
+static ssize_t store_pc0count(struct sys_device *dev, const char *buf,
+			      size_t count)
+{
+	unsigned long val;
+	char *endp;
+
+	val = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		return -EINVAL;
+	sysreg_write(PCNT0, val);
+
+	return count;
+}
+
+static ssize_t show_pc1event(struct sys_device *dev, char *buf)
+{
+	unsigned long pccr;
+
+	pccr = sysreg_read(PCCR);
+	return sprintf(buf, "0x%lx\n", (pccr >> 18) & 0x3f);
+}
+static ssize_t store_pc1event(struct sys_device *dev, const char *buf,
+			      size_t count)
+{
+	unsigned long val;
+	char *endp;
+
+	val = simple_strtoul(buf, &endp, 0);
+	if (endp == buf || val > 0x3f)
+		return -EINVAL;
+	val = (val << 18) | (sysreg_read(PCCR) & 0xff03ffff);
+	sysreg_write(PCCR, val);
+	return count;
+}
+static ssize_t show_pc1count(struct sys_device *dev, char *buf)
+{
+	unsigned long pcnt1;
+
+	pcnt1 = sysreg_read(PCNT1);
+	return sprintf(buf, "%lu\n", pcnt1);
+}
+static ssize_t store_pc1count(struct sys_device *dev, const char *buf,
+			      size_t count)
+{
+	unsigned long val;
+	char *endp;
+
+	val = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		return -EINVAL;
+	sysreg_write(PCNT1, val);
+
+	return count;
+}
+
+static ssize_t show_pccycles(struct sys_device *dev, char *buf)
+{
+	unsigned long pccnt;
+
+	pccnt = sysreg_read(PCCNT);
+	return sprintf(buf, "%lu\n", pccnt);
+}
+static ssize_t store_pccycles(struct sys_device *dev, const char *buf,
+			      size_t count)
+{
+	unsigned long val;
+	char *endp;
+
+	val = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		return -EINVAL;
+	sysreg_write(PCCNT, val);
+
+	return count;
+}
+
+static ssize_t show_pcenable(struct sys_device *dev, char *buf)
+{
+	unsigned long pccr;
+
+	pccr = sysreg_read(PCCR);
+	return sprintf(buf, "%c\n", (pccr & 1)?'1':'0');
+}
+static ssize_t store_pcenable(struct sys_device *dev, const char *buf,
+			      size_t count)
+{
+	unsigned long pccr, val;
+	char *endp;
+
+	val = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		return -EINVAL;
+	if (val)
+		val = 1;
+
+	pccr = sysreg_read(PCCR);
+	pccr = (pccr & ~1UL) | val;
+	sysreg_write(PCCR, pccr);
+
+	return count;
+}
+
+static SYSDEV_ATTR(pc0event, 0600, show_pc0event, store_pc0event);
+static SYSDEV_ATTR(pc0count, 0600, show_pc0count, store_pc0count);
+static SYSDEV_ATTR(pc1event, 0600, show_pc1event, store_pc1event);
+static SYSDEV_ATTR(pc1count, 0600, show_pc1count, store_pc1count);
+static SYSDEV_ATTR(pccycles, 0600, show_pccycles, store_pccycles);
+static SYSDEV_ATTR(pcenable, 0600, show_pcenable, store_pcenable);
+
+#endif /* CONFIG_PERFORMANCE_COUNTERS */
+
+static int __init topology_init(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+		register_cpu(c, cpu);
+
+#ifdef CONFIG_PERFORMANCE_COUNTERS
+		sysdev_create_file(&c->sysdev, &attr_pc0event);
+		sysdev_create_file(&c->sysdev, &attr_pc0count);
+		sysdev_create_file(&c->sysdev, &attr_pc1event);
+		sysdev_create_file(&c->sysdev, &attr_pc1count);
+		sysdev_create_file(&c->sysdev, &attr_pccycles);
+		sysdev_create_file(&c->sysdev, &attr_pcenable);
+#endif
+	}
+
+	return 0;
+}
+
+subsys_initcall(topology_init);
+
+static const char *cpu_names[] = {
+	"Morgan",
+	"AP7000",
+};
+#define NR_CPU_NAMES ARRAY_SIZE(cpu_names)
+
+static const char *arch_names[] = {
+	"AVR32A",
+	"AVR32B",
+};
+#define NR_ARCH_NAMES ARRAY_SIZE(arch_names)
+
+static const char *mmu_types[] = {
+	"No MMU",
+	"ITLB and DTLB",
+	"Shared TLB",
+	"MPU"
+};
+
+void __init setup_processor(void)
+{
+	unsigned long config0, config1;
+	unsigned cpu_id, cpu_rev, arch_id, arch_rev, mmu_type;
+	unsigned tmp;
+
+	config0 = sysreg_read(CONFIG0); /* 0x0000013e; */
+	config1 = sysreg_read(CONFIG1); /* 0x01f689a2; */
+	cpu_id = config0 >> 24;
+	cpu_rev = (config0 >> 16) & 0xff;
+	arch_id = (config0 >> 13) & 0x07;
+	arch_rev = (config0 >> 10) & 0x07;
+	mmu_type = (config0 >> 7) & 0x03;
+
+	boot_cpu_data.arch_type = arch_id;
+	boot_cpu_data.cpu_type = cpu_id;
+	boot_cpu_data.arch_revision = arch_rev;
+	boot_cpu_data.cpu_revision = cpu_rev;
+	boot_cpu_data.tlb_config = mmu_type;
+
+	tmp = (config1 >> 13) & 0x07;
+	if (tmp) {
+		boot_cpu_data.icache.ways = 1 << ((config1 >> 10) & 0x07);
+		boot_cpu_data.icache.sets = 1 << ((config1 >> 16) & 0x0f);
+		boot_cpu_data.icache.linesz = 1 << (tmp + 1);
+	}
+	tmp = (config1 >> 3) & 0x07;
+	if (tmp) {
+		boot_cpu_data.dcache.ways = 1 << (config1 & 0x07);
+		boot_cpu_data.dcache.sets = 1 << ((config1 >> 6) & 0x0f);
+		boot_cpu_data.dcache.linesz = 1 << (tmp + 1);
+	}
+
+	if ((cpu_id >= NR_CPU_NAMES) || (arch_id >= NR_ARCH_NAMES)) {
+		printk ("Unknown CPU configuration (ID %02x, arch %02x), "
+			"continuing anyway...\n",
+			cpu_id, arch_id);
+		return;
+	}
+
+	printk ("CPU: %s [%02x] revision %d (%s revision %d)\n",
+		cpu_names[cpu_id], cpu_id, cpu_rev,
+		arch_names[arch_id], arch_rev);
+	printk ("CPU: MMU configuration: %s\n", mmu_types[mmu_type]);
+	printk ("CPU: features:");
+	if (config0 & (1 << 6))
+		printk(" fpu");
+	if (config0 & (1 << 5))
+		printk(" java");
+	if (config0 & (1 << 4))
+		printk(" perfctr");
+	if (config0 & (1 << 3))
+		printk(" ocd");
+	printk("\n");
+}
+
+#ifdef CONFIG_PROC_FS
+static int c_show(struct seq_file *m, void *v)
+{
+	unsigned int icache_size, dcache_size;
+	unsigned int cpu = smp_processor_id();
+
+	icache_size = boot_cpu_data.icache.ways *
+		boot_cpu_data.icache.sets *
+		boot_cpu_data.icache.linesz;
+	dcache_size = boot_cpu_data.dcache.ways *
+		boot_cpu_data.dcache.sets *
+		boot_cpu_data.dcache.linesz;
+
+	seq_printf(m, "processor\t: %d\n", cpu);
+
+	if (boot_cpu_data.arch_type < NR_ARCH_NAMES)
+		seq_printf(m, "cpu family\t: %s revision %d\n",
+			   arch_names[boot_cpu_data.arch_type],
+			   boot_cpu_data.arch_revision);
+	if (boot_cpu_data.cpu_type < NR_CPU_NAMES)
+		seq_printf(m, "cpu type\t: %s revision %d\n",
+			   cpu_names[boot_cpu_data.cpu_type],
+			   boot_cpu_data.cpu_revision);
+
+	seq_printf(m, "i-cache\t\t: %dK (%u ways x %u sets x %u)\n",
+		   icache_size >> 10,
+		   boot_cpu_data.icache.ways,
+		   boot_cpu_data.icache.sets,
+		   boot_cpu_data.icache.linesz);
+	seq_printf(m, "d-cache\t\t: %dK (%u ways x %u sets x %u)\n",
+		   dcache_size >> 10,
+		   boot_cpu_data.dcache.ways,
+		   boot_cpu_data.dcache.sets,
+		   boot_cpu_data.dcache.linesz);
+	seq_printf(m, "bogomips\t: %lu.%02lu\n",
+		   boot_cpu_data.loops_per_jiffy / (500000/HZ),
+		   (boot_cpu_data.loops_per_jiffy / (5000/HZ)) % 100);
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < 1 ? (void *)1 : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return NULL;
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+
+}
+
+struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= c_show
+};
+#endif /* CONFIG_PROC_FS */
diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S
new file mode 100644
index 000000000000..eeb66792bc37
--- /dev/null
+++ b/arch/avr32/kernel/entry-avr32b.S
@@ -0,0 +1,678 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * This file contains the low-level entry-points into the kernel, that is,
+ * exception handlers, debug trap handlers, interrupt handlers and the
+ * system call handler.
+ */
+#include <linux/errno.h>
+
+#include <asm/asm.h>
+#include <asm/hardirq.h>
+#include <asm/irq.h>
+#include <asm/ocd.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+
+#ifdef CONFIG_PREEMPT
+# define preempt_stop		mask_interrupts
+#else
+# define preempt_stop
+# define fault_resume_kernel	fault_restore_all
+#endif
+
+#define __MASK(x)	((1 << (x)) - 1)
+#define IRQ_MASK	((__MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) | \
+			 (__MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT))
+
+	.section .ex.text,"ax",@progbits
+	.align	2
+exception_vectors:
+	bral	handle_critical
+	.align	2
+	bral	handle_critical
+	.align	2
+	bral	do_bus_error_write
+	.align	2
+	bral	do_bus_error_read
+	.align	2
+	bral	do_nmi_ll
+	.align	2
+	bral	handle_address_fault
+	.align	2
+	bral	handle_protection_fault
+	.align	2
+	bral	handle_debug
+	.align	2
+	bral	do_illegal_opcode_ll
+	.align	2
+	bral	do_illegal_opcode_ll
+	.align	2
+	bral	do_illegal_opcode_ll
+	.align	2
+	bral	do_fpe_ll
+	.align	2
+	bral	do_illegal_opcode_ll
+	.align	2
+	bral	handle_address_fault
+	.align	2
+	bral	handle_address_fault
+	.align	2
+	bral	handle_protection_fault
+	.align	2
+	bral	handle_protection_fault
+	.align	2
+	bral	do_dtlb_modified
+
+	/*
+	 * r0 :	PGD/PT/PTE
+	 * r1 : Offending address
+	 * r2 : Scratch register
+	 * r3 : Cause (5, 12 or 13)
+	 */
+#define	tlbmiss_save	pushm	r0-r3
+#define tlbmiss_restore	popm	r0-r3
+
+	.section .tlbx.ex.text,"ax",@progbits
+	.global	itlb_miss
+itlb_miss:
+	tlbmiss_save
+	rjmp	tlb_miss_common
+
+	.section .tlbr.ex.text,"ax",@progbits
+dtlb_miss_read:
+	tlbmiss_save
+	rjmp	tlb_miss_common
+
+	.section .tlbw.ex.text,"ax",@progbits
+dtlb_miss_write:
+	tlbmiss_save
+
+	.global	tlb_miss_common
+tlb_miss_common:
+	mfsr	r0, SYSREG_PTBR
+	mfsr	r1, SYSREG_TLBEAR
+
+	/* Is it the vmalloc space? */
+	bld	r1, 31
+	brcs	handle_vmalloc_miss
+
+	/* First level lookup */
+pgtbl_lookup:
+	lsr	r2, r1, PGDIR_SHIFT
+	ld.w	r0, r0[r2 << 2]
+	bld	r0, _PAGE_BIT_PRESENT
+	brcc	page_table_not_present
+
+	/* TODO: Check access rights on page table if necessary */
+
+	/* Translate to virtual address in P1. */
+	andl	r0, 0xf000
+	sbr	r0, 31
+
+	/* Second level lookup */
+	lsl	r1, (32 - PGDIR_SHIFT)
+	lsr	r1, (32 - PGDIR_SHIFT) + PAGE_SHIFT
+	add	r2, r0, r1 << 2
+	ld.w	r1, r2[0]
+	bld	r1, _PAGE_BIT_PRESENT
+	brcc	page_not_present
+
+	/* Mark the page as accessed */
+	sbr	r1, _PAGE_BIT_ACCESSED
+	st.w	r2[0], r1
+
+	/* Drop software flags */
+	andl	r1, _PAGE_FLAGS_HARDWARE_MASK & 0xffff
+	mtsr	SYSREG_TLBELO, r1
+
+	/* Figure out which entry we want to replace */
+	mfsr	r0, SYSREG_TLBARLO
+	clz	r2, r0
+	brcc	1f
+	mov	r1, -1			/* All entries have been accessed, */
+	mtsr	SYSREG_TLBARLO, r1	/* so reset TLBAR */
+	mov	r2, 0			/* and start at 0 */
+1:	mfsr	r1, SYSREG_MMUCR
+	lsl	r2, 14
+	andl	r1, 0x3fff, COH
+	or	r1, r2
+	mtsr	SYSREG_MMUCR, r1
+
+	tlbw
+
+	tlbmiss_restore
+	rete
+
+handle_vmalloc_miss:
+	/* Simply do the lookup in init's page table */
+	mov	r0, lo(swapper_pg_dir)
+	orh	r0, hi(swapper_pg_dir)
+	rjmp	pgtbl_lookup
+
+
+	/* ---                    System Call                    --- */
+
+	.section .scall.text,"ax",@progbits
+system_call:
+	pushm	r12		/* r12_orig */
+	stmts	--sp, r0-lr
+	zero_fp
+	mfsr	r0, SYSREG_RAR_SUP
+	mfsr	r1, SYSREG_RSR_SUP
+	stm	--sp, r0-r1
+
+	/* check for syscall tracing */
+	get_thread_info r0
+	ld.w	r1, r0[TI_flags]
+	bld	r1, TIF_SYSCALL_TRACE
+	brcs	syscall_trace_enter
+
+syscall_trace_cont:
+	cp.w	r8, NR_syscalls
+	brhs	syscall_badsys
+
+	lddpc   lr, syscall_table_addr
+	ld.w    lr, lr[r8 << 2]
+	mov	r8, r5		/* 5th argument (6th is pushed by stub) */
+	icall   lr
+
+	.global	syscall_return
+syscall_return:
+	get_thread_info r0
+	mask_interrupts		/* make sure we don't miss an interrupt
+				   setting need_resched or sigpending
+				   between sampling and the rets */
+
+	/* Store the return value so that the correct value is loaded below */
+	stdsp   sp[REG_R12], r12
+
+	ld.w	r1, r0[TI_flags]
+	andl	r1, _TIF_ALLWORK_MASK, COH
+	brne	syscall_exit_work
+
+syscall_exit_cont:
+	popm	r8-r9
+	mtsr	SYSREG_RAR_SUP, r8
+	mtsr	SYSREG_RSR_SUP, r9
+	ldmts	sp++, r0-lr
+	sub	sp, -4		/* r12_orig */
+	rets
+
+	.align	2
+syscall_table_addr:
+	.long   sys_call_table
+
+syscall_badsys:
+	mov	r12, -ENOSYS
+	rjmp	syscall_return
+
+	.global ret_from_fork
+ret_from_fork:
+	rcall   schedule_tail
+
+	/* check for syscall tracing */
+	get_thread_info r0
+	ld.w	r1, r0[TI_flags]
+	andl	r1, _TIF_ALLWORK_MASK, COH
+	brne	syscall_exit_work
+	rjmp    syscall_exit_cont
+
+syscall_trace_enter:
+	pushm	r8-r12
+	rcall	syscall_trace
+	popm	r8-r12
+	rjmp	syscall_trace_cont
+
+syscall_exit_work:
+	bld	r1, TIF_SYSCALL_TRACE
+	brcc	1f
+	unmask_interrupts
+	rcall	syscall_trace
+	mask_interrupts
+	ld.w	r1, r0[TI_flags]
+
+1:	bld	r1, TIF_NEED_RESCHED
+	brcc	2f
+	unmask_interrupts
+	rcall	schedule
+	mask_interrupts
+	ld.w	r1, r0[TI_flags]
+	rjmp	1b
+
+2:	mov	r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
+	tst	r1, r2
+	breq	3f
+	unmask_interrupts
+	mov	r12, sp
+	mov	r11, r0
+	rcall	do_notify_resume
+	mask_interrupts
+	ld.w	r1, r0[TI_flags]
+	rjmp	1b
+
+3:	bld	r1, TIF_BREAKPOINT
+	brcc	syscall_exit_cont
+	mfsr	r3, SYSREG_TLBEHI
+	lddsp	r2, sp[REG_PC]
+	andl	r3, 0xff, COH
+	lsl	r3, 1
+	sbr	r3, 30
+	sbr	r3, 0
+	mtdr	DBGREG_BWA2A, r2
+	mtdr	DBGREG_BWC2A, r3
+	rjmp	syscall_exit_cont
+
+
+	/* The slow path of the TLB miss handler */
+page_table_not_present:
+page_not_present:
+	tlbmiss_restore
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	rcall	save_full_context_ex
+	mfsr	r12, SYSREG_ECR
+	mov	r11, sp
+	rcall	do_page_fault
+	rjmp	ret_from_exception
+
+	/* This function expects to find offending PC in SYSREG_RAR_EX */
+save_full_context_ex:
+	mfsr	r8, SYSREG_RSR_EX
+	mov	r12, r8
+	andh	r8, (MODE_MASK >> 16), COH
+	mfsr	r11, SYSREG_RAR_EX
+	brne	2f
+
+1:	pushm	r11, r12	/* PC and SR */
+	unmask_exceptions
+	ret	r12
+
+2:	sub	r10, sp, -(FRAME_SIZE_FULL - REG_LR)
+	stdsp	sp[4], r10	/* replace saved SP */
+	rjmp	1b
+
+	/* Low-level exception handlers */
+handle_critical:
+	pushm	r12
+	pushm	r0-r12
+	rcall	save_full_context_ex
+	mfsr	r12, SYSREG_ECR
+	mov	r11, sp
+	rcall	do_critical_exception
+
+	/* We should never get here... */
+bad_return:
+	sub	r12, pc, (. - 1f)
+	bral	panic
+	.align	2
+1:	.asciz	"Return from critical exception!"
+
+	.align	1
+do_bus_error_write:
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	rcall	save_full_context_ex
+	mov	r11, 1
+	rjmp	1f
+
+do_bus_error_read:
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	rcall	save_full_context_ex
+	mov	r11, 0
+1:	mfsr	r12, SYSREG_BEAR
+	mov	r10, sp
+	rcall	do_bus_error
+	rjmp	ret_from_exception
+
+	.align	1
+do_nmi_ll:
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	/* FIXME: Make sure RAR_NMI and RSR_NMI are pushed instead of *_EX */
+	rcall	save_full_context_ex
+	mfsr	r12, SYSREG_ECR
+	mov	r11, sp
+	rcall	do_nmi
+	rjmp	bad_return
+
+handle_address_fault:
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	rcall	save_full_context_ex
+	mfsr	r12, SYSREG_ECR
+	mov	r11, sp
+	rcall	do_address_exception
+	rjmp	ret_from_exception
+
+handle_protection_fault:
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	rcall	save_full_context_ex
+	mfsr	r12, SYSREG_ECR
+	mov	r11, sp
+	rcall	do_page_fault
+	rjmp	ret_from_exception
+
+	.align	1
+do_illegal_opcode_ll:
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	rcall	save_full_context_ex
+	mfsr	r12, SYSREG_ECR
+	mov	r11, sp
+	rcall	do_illegal_opcode
+	rjmp	ret_from_exception
+
+do_dtlb_modified:
+	pushm	r0-r3
+	mfsr	r1, SYSREG_TLBEAR
+	mfsr	r0, SYSREG_PTBR
+	lsr	r2, r1, PGDIR_SHIFT
+	ld.w	r0, r0[r2 << 2]
+	lsl	r1, (32 - PGDIR_SHIFT)
+	lsr	r1, (32 - PGDIR_SHIFT) + PAGE_SHIFT
+
+	/* Translate to virtual address in P1 */
+	andl	r0, 0xf000
+	sbr	r0, 31
+	add	r2, r0, r1 << 2
+	ld.w	r3, r2[0]
+	sbr	r3, _PAGE_BIT_DIRTY
+	mov	r0, r3
+	st.w	r2[0], r3
+
+	/* The page table is up-to-date. Update the TLB entry as well */
+	andl	r0, lo(_PAGE_FLAGS_HARDWARE_MASK)
+	mtsr	SYSREG_TLBELO, r0
+
+	/* MMUCR[DRP] is updated automatically, so let's go... */
+	tlbw
+
+	popm	r0-r3
+	rete
+
+do_fpe_ll:
+	sub	sp, 4
+	stmts	--sp, r0-lr
+	rcall	save_full_context_ex
+	unmask_interrupts
+	mov	r12, 26
+	mov	r11, sp
+	rcall	do_fpe
+	rjmp	ret_from_exception
+
+ret_from_exception:
+	mask_interrupts
+	lddsp	r4, sp[REG_SR]
+	andh	r4, (MODE_MASK >> 16), COH
+	brne	fault_resume_kernel
+
+	get_thread_info r0
+	ld.w	r1, r0[TI_flags]
+	andl	r1, _TIF_WORK_MASK, COH
+	brne	fault_exit_work
+
+fault_resume_user:
+	popm	r8-r9
+	mask_exceptions
+	mtsr	SYSREG_RAR_EX, r8
+	mtsr	SYSREG_RSR_EX, r9
+	ldmts	sp++, r0-lr
+	sub	sp, -4
+	rete
+
+fault_resume_kernel:
+#ifdef CONFIG_PREEMPT
+	get_thread_info	r0
+	ld.w	r2, r0[TI_preempt_count]
+	cp.w	r2, 0
+	brne	1f
+	ld.w	r1, r0[TI_flags]
+	bld	r1, TIF_NEED_RESCHED
+	brcc	1f
+	lddsp	r4, sp[REG_SR]
+	bld	r4, SYSREG_GM_OFFSET
+	brcs	1f
+	rcall	preempt_schedule_irq
+1:
+#endif
+
+	popm	r8-r9
+	mask_exceptions
+	mfsr	r1, SYSREG_SR
+	mtsr	SYSREG_RAR_EX, r8
+	mtsr	SYSREG_RSR_EX, r9
+	popm	lr
+	sub	sp, -4		/* ignore SP */
+	popm	r0-r12
+	sub	sp, -4		/* ignore r12_orig */
+	rete
+
+irq_exit_work:
+	/* Switch to exception mode so that we can share the same code. */
+	mfsr	r8, SYSREG_SR
+	cbr	r8, SYSREG_M0_OFFSET
+	orh	r8, hi(SYSREG_BIT(M1) | SYSREG_BIT(M2))
+	mtsr	SYSREG_SR, r8
+	sub	pc, -2
+	get_thread_info r0
+	ld.w	r1, r0[TI_flags]
+
+fault_exit_work:
+	bld	r1, TIF_NEED_RESCHED
+	brcc	1f
+	unmask_interrupts
+	rcall	schedule
+	mask_interrupts
+	ld.w	r1, r0[TI_flags]
+	rjmp	fault_exit_work
+
+1:	mov	r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
+	tst	r1, r2
+	breq	2f
+	unmask_interrupts
+	mov	r12, sp
+	mov	r11, r0
+	rcall	do_notify_resume
+	mask_interrupts
+	ld.w	r1, r0[TI_flags]
+	rjmp	fault_exit_work
+
+2:	bld	r1, TIF_BREAKPOINT
+	brcc	fault_resume_user
+	mfsr	r3, SYSREG_TLBEHI
+	lddsp	r2, sp[REG_PC]
+	andl	r3, 0xff, COH
+	lsl	r3, 1
+	sbr	r3, 30
+	sbr	r3, 0
+	mtdr	DBGREG_BWA2A, r2
+	mtdr	DBGREG_BWC2A, r3
+	rjmp	fault_resume_user
+
+	/* If we get a debug trap from privileged context we end up here */
+handle_debug_priv:
+	/* Fix up LR and SP in regs. r11 contains the mode we came from */
+	mfsr	r8, SYSREG_SR
+	mov	r9, r8
+	andh	r8, hi(~MODE_MASK)
+	or	r8, r11
+	mtsr	SYSREG_SR, r8
+	sub	pc, -2
+	stdsp	sp[REG_LR], lr
+	mtsr	SYSREG_SR, r9
+	sub	pc, -2
+	sub	r10, sp, -FRAME_SIZE_FULL
+	stdsp	sp[REG_SP], r10
+	mov	r12, sp
+	rcall	do_debug_priv
+
+	/* Now, put everything back */
+	ssrf	SR_EM_BIT
+	popm	r10, r11
+	mtsr	SYSREG_RAR_DBG, r10
+	mtsr	SYSREG_RSR_DBG, r11
+	mfsr	r8, SYSREG_SR
+	mov	r9, r8
+	andh	r8, hi(~MODE_MASK)
+	andh	r11, hi(MODE_MASK)
+	or	r8, r11
+	mtsr	SYSREG_SR, r8
+	sub	pc, -2
+	popm	lr
+	mtsr	SYSREG_SR, r9
+	sub	pc, -2
+	sub	sp, -4		/* skip SP */
+	popm	r0-r12
+	sub	sp, -4
+	retd
+
+	/*
+	 * At this point, everything is masked, that is, interrupts,
+	 * exceptions and debugging traps. We might get called from
+	 * interrupt or exception context in some rare cases, but this
+	 * will be taken care of by do_debug(), so we're not going to
+	 * do a 100% correct context save here.
+	 */
+handle_debug:
+	sub	sp, 4		/* r12_orig */
+	stmts	--sp, r0-lr
+	mfsr	r10, SYSREG_RAR_DBG
+	mfsr	r11, SYSREG_RSR_DBG
+	unmask_exceptions
+	pushm	r10,r11
+	andh	r11, (MODE_MASK >> 16), COH
+	brne	handle_debug_priv
+
+	mov	r12, sp
+	rcall	do_debug
+
+	lddsp	r10, sp[REG_SR]
+	andh	r10, (MODE_MASK >> 16), COH
+	breq	debug_resume_user
+
+debug_restore_all:
+	popm	r10,r11
+	mask_exceptions
+	mtsr	SYSREG_RSR_DBG, r11
+	mtsr	SYSREG_RAR_DBG, r10
+	ldmts	sp++, r0-lr
+	sub	sp, -4
+	retd
+
+debug_resume_user:
+	get_thread_info r0
+	mask_interrupts
+
+	ld.w	r1, r0[TI_flags]
+	andl	r1, _TIF_DBGWORK_MASK, COH
+	breq	debug_restore_all
+
+1:	bld	r1, TIF_NEED_RESCHED
+	brcc	2f
+	unmask_interrupts
+	rcall	schedule
+	mask_interrupts
+	ld.w	r1, r0[TI_flags]
+	rjmp	1b
+
+2:	mov	r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
+	tst	r1, r2
+	breq	3f
+	unmask_interrupts
+	mov	r12, sp
+	mov	r11, r0
+	rcall	do_notify_resume
+	mask_interrupts
+	ld.w	r1, r0[TI_flags]
+	rjmp	1b
+
+3:	bld	r1, TIF_SINGLE_STEP
+	brcc	debug_restore_all
+	mfdr	r2, DBGREG_DC
+	sbr	r2, DC_SS_BIT
+	mtdr	DBGREG_DC, r2
+	rjmp	debug_restore_all
+
+	.set	rsr_int0,	SYSREG_RSR_INT0
+	.set	rsr_int1,	SYSREG_RSR_INT1
+	.set	rsr_int2,	SYSREG_RSR_INT2
+	.set	rsr_int3,	SYSREG_RSR_INT3
+	.set	rar_int0,	SYSREG_RAR_INT0
+	.set	rar_int1,	SYSREG_RAR_INT1
+	.set	rar_int2,	SYSREG_RAR_INT2
+	.set	rar_int3,	SYSREG_RAR_INT3
+
+	.macro	IRQ_LEVEL level
+	.type	irq_level\level, @function
+irq_level\level:
+	sub	sp, 4		/* r12_orig */
+	stmts	--sp,r0-lr
+	mfsr	r8, rar_int\level
+	mfsr	r9, rsr_int\level
+	pushm	r8-r9
+
+	mov	r11, sp
+	mov	r12, \level
+
+	rcall	do_IRQ
+
+	lddsp	r4, sp[REG_SR]
+	andh	r4, (MODE_MASK >> 16), COH
+#ifdef CONFIG_PREEMPT
+	brne	2f
+#else
+	brne	1f
+#endif
+
+	get_thread_info	r0
+	ld.w	r1, r0[TI_flags]
+	andl	r1, _TIF_WORK_MASK, COH
+	brne	irq_exit_work
+
+1:	popm	r8-r9
+	mtsr	rar_int\level, r8
+	mtsr	rsr_int\level, r9
+	ldmts	sp++,r0-lr
+	sub	sp, -4		/* ignore r12_orig */
+	rete
+
+#ifdef CONFIG_PREEMPT
+2:
+	get_thread_info	r0
+	ld.w	r2, r0[TI_preempt_count]
+	cp.w	r2, 0
+	brne	1b
+	ld.w	r1, r0[TI_flags]
+	bld	r1, TIF_NEED_RESCHED
+	brcc	1b
+	lddsp	r4, sp[REG_SR]
+	bld	r4, SYSREG_GM_OFFSET
+	brcs	1b
+	rcall	preempt_schedule_irq
+	rjmp	1b
+#endif
+	.endm
+
+	.section .irq.text,"ax",@progbits
+
+	.global	irq_level0
+	.global	irq_level1
+	.global	irq_level2
+	.global	irq_level3
+	IRQ_LEVEL 0
+	IRQ_LEVEL 1
+	IRQ_LEVEL 2
+	IRQ_LEVEL 3
diff --git a/arch/avr32/kernel/head.S b/arch/avr32/kernel/head.S
new file mode 100644
index 000000000000..773b7ad87be9
--- /dev/null
+++ b/arch/avr32/kernel/head.S
@@ -0,0 +1,45 @@
+/*
+ * Non-board-specific low-level startup code
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/sysreg.h>
+
+	.section .init.text,"ax"
+	.global kernel_entry
+kernel_entry:
+	/* Initialize status register */
+	lddpc   r0, init_sr
+	mtsr	SYSREG_SR, r0
+
+	/* Set initial stack pointer */
+	lddpc   sp, stack_addr
+	sub	sp, -THREAD_SIZE
+
+#ifdef CONFIG_FRAME_POINTER
+	/* Mark last stack frame */
+	mov	lr, 0
+	mov	r7, 0
+#endif
+
+	/* Set up the PIO, SDRAM controller, early printk, etc. */
+	rcall	board_early_init
+
+	/* Start the show */
+	lddpc   pc, kernel_start_addr
+
+	.align  2
+init_sr:
+	.long   0x007f0000	/* Supervisor mode, everything masked */
+stack_addr:
+	.long   init_thread_union
+kernel_start_addr:
+	.long   start_kernel
diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c
new file mode 100644
index 000000000000..effcacf9d1a2
--- /dev/null
+++ b/arch/avr32/kernel/init_task.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/init_task.h>
+#include <linux/mqueue.h>
+
+#include <asm/pgtable.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial thread structure. Must be aligned on an 8192-byte boundary.
+ */
+union thread_union init_thread_union
+	__attribute__((__section__(".data.init_task"))) =
+		{ INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
diff --git a/arch/avr32/kernel/irq.c b/arch/avr32/kernel/irq.c
new file mode 100644
index 000000000000..856f3548e664
--- /dev/null
+++ b/arch/avr32/kernel/irq.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on arch/i386/kernel/irq.c
+ *   Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * IRQ's are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/sysdev.h>
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+	printk("unexpected IRQ %u\n", irq);
+}
+
+#ifdef CONFIG_PROC_FS
+int show_interrupts(struct seq_file *p, void *v)
+{
+	int i = *(loff_t *)v, cpu;
+	struct irqaction *action;
+	unsigned long flags;
+
+	if (i == 0) {
+		seq_puts(p, "           ");
+		for_each_online_cpu(cpu)
+			seq_printf(p, "CPU%d       ", cpu);
+		seq_putc(p, '\n');
+	}
+
+	if (i < NR_IRQS) {
+		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		action = irq_desc[i].action;
+		if (!action)
+			goto unlock;
+
+		seq_printf(p, "%3d: ", i);
+		for_each_online_cpu(cpu)
+			seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
+		seq_printf(p, "  %s", action->name);
+		for (action = action->next; action; action = action->next)
+			seq_printf(p, ", %s", action->name);
+
+		seq_putc(p, '\n');
+	unlock:
+		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+	}
+
+	return 0;
+}
+#endif
diff --git a/arch/avr32/kernel/kprobes.c b/arch/avr32/kernel/kprobes.c
new file mode 100644
index 000000000000..6caf9e8d8080
--- /dev/null
+++ b/arch/avr32/kernel/kprobes.c
@@ -0,0 +1,270 @@
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * Based on arch/ppc64/kernel/kprobes.c
+ *  Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/kdebug.h>
+#include <asm/ocd.h>
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe);
+static unsigned long kprobe_status;
+static struct pt_regs jprobe_saved_regs;
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	int ret = 0;
+
+	if ((unsigned long)p->addr & 0x01) {
+		printk("Attempt to register kprobe at an unaligned address\n");
+		ret = -EINVAL;
+	}
+
+	/* XXX: Might be a good idea to check if p->addr is a valid
+	 * kernel address as well... */
+
+	if (!ret) {
+		pr_debug("copy kprobe at %p\n", p->addr);
+		memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+		p->opcode = *p->addr;
+	}
+
+	return ret;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	pr_debug("arming kprobe at %p\n", p->addr);
+	*p->addr = BREAKPOINT_INSTRUCTION;
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	pr_debug("disarming kprobe at %p\n", p->addr);
+	*p->addr = p->opcode;
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+	unsigned long dc;
+
+	pr_debug("preparing to singlestep over %p (PC=%08lx)\n",
+		 p->addr, regs->pc);
+
+	BUG_ON(!(sysreg_read(SR) & SYSREG_BIT(SR_D)));
+
+	dc = __mfdr(DBGREG_DC);
+	dc |= DC_SS;
+	__mtdr(DBGREG_DC, dc);
+
+	/*
+	 * We must run the instruction from its original location
+	 * since it may actually reference PC.
+	 *
+	 * TODO: Do the instruction replacement directly in icache.
+	 */
+	*p->addr = p->opcode;
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+	unsigned long dc;
+
+	pr_debug("resuming execution at PC=%08lx\n", regs->pc);
+
+	dc = __mfdr(DBGREG_DC);
+	dc &= ~DC_SS;
+	__mtdr(DBGREG_DC, dc);
+
+	*p->addr = BREAKPOINT_INSTRUCTION;
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p)
+{
+	__get_cpu_var(current_kprobe) = p;
+}
+
+static int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *p;
+	void *addr = (void *)regs->pc;
+	int ret = 0;
+
+	pr_debug("kprobe_handler: kprobe_running=%d\n",
+		 kprobe_running());
+
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing
+	 */
+	preempt_disable();
+
+	/* Check that we're not recursing */
+	if (kprobe_running()) {
+		p = get_kprobe(addr);
+		if (p) {
+			if (kprobe_status == KPROBE_HIT_SS) {
+				printk("FIXME: kprobe hit while single-stepping!\n");
+				goto no_kprobe;
+			}
+
+			printk("FIXME: kprobe hit while handling another kprobe\n");
+			goto no_kprobe;
+		} else {
+			p = kprobe_running();
+			if (p->break_handler && p->break_handler(p, regs))
+				goto ss_probe;
+		}
+		/* If it's not ours, can't be delete race, (we hold lock). */
+		goto no_kprobe;
+	}
+
+	p = get_kprobe(addr);
+	if (!p)
+		goto no_kprobe;
+
+	kprobe_status = KPROBE_HIT_ACTIVE;
+	set_current_kprobe(p);
+	if (p->pre_handler && p->pre_handler(p, regs))
+		/* handler has already set things up, so skip ss setup */
+		return 1;
+
+ss_probe:
+	prepare_singlestep(p, regs);
+	kprobe_status = KPROBE_HIT_SS;
+	return 1;
+
+no_kprobe:
+	return ret;
+}
+
+static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+
+	pr_debug("post_kprobe_handler, cur=%p\n", cur);
+
+	if (!cur)
+		return 0;
+
+	if (cur->post_handler) {
+		kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
+	}
+
+	resume_execution(cur, regs);
+	reset_current_kprobe();
+	preempt_enable_no_resched();
+
+	return 1;
+}
+
+static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	struct kprobe *cur = kprobe_running();
+
+	pr_debug("kprobe_fault_handler: trapnr=%d\n", trapnr);
+
+	if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+		return 1;
+
+	if (kprobe_status & KPROBE_HIT_SS) {
+		resume_execution(cur, regs);
+		preempt_enable_no_resched();
+	}
+	return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
+{
+	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+
+	pr_debug("kprobe_exceptions_notify: val=%lu, data=%p\n",
+		 val, data);
+
+	switch (val) {
+	case DIE_BREAKPOINT:
+		if (kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_SSTEP:
+		if (post_kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_FAULT:
+		if (kprobe_running()
+		    && kprobe_fault_handler(args->regs, args->trapnr))
+			ret = NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+
+	memcpy(&jprobe_saved_regs, regs, sizeof(struct pt_regs));
+
+	/*
+	 * TODO: We should probably save some of the stack here as
+	 * well, since gcc may pass arguments on the stack for certain
+	 * functions (lots of arguments, large aggregates, varargs)
+	 */
+
+	/* setup return addr to the jprobe handler routine */
+	regs->pc = (unsigned long)jp->entry;
+	return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+	asm volatile("breakpoint" ::: "memory");
+}
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	/*
+	 * FIXME - we should ideally be validating that we got here 'cos
+	 * of the "trap" in jprobe_return() above, before restoring the
+	 * saved regs...
+	 */
+	memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs));
+	return 1;
+}
+
+int __init arch_init_kprobes(void)
+{
+	printk("KPROBES: Enabling monitor mode (MM|DBE)...\n");
+	__mtdr(DBGREG_DC, DC_MM | DC_DBE);
+
+	/* TODO: Register kretprobe trampoline */
+	return 0;
+}
diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c
new file mode 100644
index 000000000000..dfc32f2817b6
--- /dev/null
+++ b/arch/avr32/kernel/module.c
@@ -0,0 +1,324 @@
+/*
+ * AVR32-specific kernel module loader
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * GOT initialization parts are based on the s390 version
+ *   Copyright (C) 2002, 2003 IBM Deutschland Entwicklung GmbH,
+ *                            IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/moduleloader.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+
+void *module_alloc(unsigned long size)
+{
+	if (size == 0)
+		return NULL;
+	return vmalloc(size);
+}
+
+void module_free(struct module *mod, void *module_region)
+{
+	vfree(mod->arch.syminfo);
+	mod->arch.syminfo = NULL;
+
+	vfree(module_region);
+	/* FIXME: if module_region == mod->init_region, trim exception
+	 * table entries. */
+}
+
+static inline int check_rela(Elf32_Rela *rela, struct module *module,
+			     char *strings, Elf32_Sym *symbols)
+{
+	struct mod_arch_syminfo *info;
+
+	info = module->arch.syminfo + ELF32_R_SYM(rela->r_info);
+	switch (ELF32_R_TYPE(rela->r_info)) {
+	case R_AVR32_GOT32:
+	case R_AVR32_GOT16:
+	case R_AVR32_GOT8:
+	case R_AVR32_GOT21S:
+	case R_AVR32_GOT18SW:	/* mcall */
+	case R_AVR32_GOT16S:	/* ld.w */
+		if (rela->r_addend != 0) {
+			printk(KERN_ERR
+			       "GOT relocation against %s at offset %u with addend\n",
+			       strings + symbols[ELF32_R_SYM(rela->r_info)].st_name,
+			       rela->r_offset);
+			return -ENOEXEC;
+		}
+		if (info->got_offset == -1UL) {
+			info->got_offset = module->arch.got_size;
+			module->arch.got_size += sizeof(void *);
+		}
+		pr_debug("GOT[%3lu] %s\n", info->got_offset,
+			 strings + symbols[ELF32_R_SYM(rela->r_info)].st_name);
+		break;
+	}
+
+	return 0;
+}
+
+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+			      char *secstrings, struct module *module)
+{
+	Elf32_Shdr *symtab;
+	Elf32_Sym *symbols;
+	Elf32_Rela *rela;
+	char *strings;
+	int nrela, i, j;
+	int ret;
+
+	/* Find the symbol table */
+	symtab = NULL;
+	for (i = 0; i < hdr->e_shnum; i++)
+		switch (sechdrs[i].sh_type) {
+		case SHT_SYMTAB:
+			symtab = &sechdrs[i];
+			break;
+		}
+	if (!symtab) {
+		printk(KERN_ERR "module %s: no symbol table\n", module->name);
+		return -ENOEXEC;
+	}
+
+	/* Allocate room for one syminfo structure per symbol. */
+	module->arch.nsyms = symtab->sh_size / sizeof(Elf_Sym);
+	module->arch.syminfo = vmalloc(module->arch.nsyms
+				   * sizeof(struct mod_arch_syminfo));
+	if (!module->arch.syminfo)
+		return -ENOMEM;
+
+	symbols = (void *)hdr + symtab->sh_offset;
+	strings = (void *)hdr + sechdrs[symtab->sh_link].sh_offset;
+	for (i = 0; i < module->arch.nsyms; i++) {
+		if (symbols[i].st_shndx == SHN_UNDEF &&
+		    strcmp(strings + symbols[i].st_name,
+			   "_GLOBAL_OFFSET_TABLE_") == 0)
+			/* "Define" it as absolute. */
+			symbols[i].st_shndx = SHN_ABS;
+		module->arch.syminfo[i].got_offset = -1UL;
+		module->arch.syminfo[i].got_initialized = 0;
+	}
+
+	/* Allocate GOT entries for symbols that need it. */
+	module->arch.got_size = 0;
+	for (i = 0; i < hdr->e_shnum; i++) {
+		if (sechdrs[i].sh_type != SHT_RELA)
+			continue;
+		nrela = sechdrs[i].sh_size / sizeof(Elf32_Rela);
+		rela = (void *)hdr + sechdrs[i].sh_offset;
+		for (j = 0; j < nrela; j++) {
+			ret = check_rela(rela + j, module,
+					 strings, symbols);
+			if (ret)
+				goto out_free_syminfo;
+		}
+	}
+
+	/*
+	 * Increase core size to make room for GOT and set start
+	 * offset for GOT.
+	 */
+	module->core_size = ALIGN(module->core_size, 4);
+	module->arch.got_offset = module->core_size;
+	module->core_size += module->arch.got_size;
+
+	return 0;
+
+out_free_syminfo:
+	vfree(module->arch.syminfo);
+	module->arch.syminfo = NULL;
+
+	return ret;
+}
+
+static inline int reloc_overflow(struct module *module, const char *reloc_name,
+				 Elf32_Addr relocation)
+{
+	printk(KERN_ERR "module %s: Value %lx does not fit relocation %s\n",
+	       module->name, (unsigned long)relocation, reloc_name);
+	return -ENOEXEC;
+}
+
+#define get_u16(loc)		(*((uint16_t *)loc))
+#define put_u16(loc, val)	(*((uint16_t *)loc) = (val))
+
+int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
+		       unsigned int symindex, unsigned int relindex,
+		       struct module *module)
+{
+	Elf32_Shdr *symsec = sechdrs + symindex;
+	Elf32_Shdr *relsec = sechdrs + relindex;
+	Elf32_Shdr *dstsec = sechdrs + relsec->sh_info;
+	Elf32_Rela *rel = (void *)relsec->sh_addr;
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rela); i++, rel++) {
+		struct mod_arch_syminfo *info;
+		Elf32_Sym *sym;
+		Elf32_Addr relocation;
+		uint32_t *location;
+		uint32_t value;
+
+		location = (void *)dstsec->sh_addr + rel->r_offset;
+		sym = (Elf32_Sym *)symsec->sh_addr + ELF32_R_SYM(rel->r_info);
+		relocation = sym->st_value + rel->r_addend;
+
+		info = module->arch.syminfo + ELF32_R_SYM(rel->r_info);
+
+		/* Initialize GOT entry if necessary */
+		switch (ELF32_R_TYPE(rel->r_info)) {
+		case R_AVR32_GOT32:
+		case R_AVR32_GOT16:
+		case R_AVR32_GOT8:
+		case R_AVR32_GOT21S:
+		case R_AVR32_GOT18SW:
+		case R_AVR32_GOT16S:
+			if (!info->got_initialized) {
+				Elf32_Addr *gotent;
+
+				gotent = (module->module_core
+					  + module->arch.got_offset
+					  + info->got_offset);
+				*gotent = relocation;
+				info->got_initialized = 1;
+			}
+
+			relocation = info->got_offset;
+			break;
+		}
+
+		switch (ELF32_R_TYPE(rel->r_info)) {
+		case R_AVR32_32:
+		case R_AVR32_32_CPENT:
+			*location = relocation;
+			break;
+		case R_AVR32_22H_PCREL:
+			relocation -= (Elf32_Addr)location;
+			if ((relocation & 0xffe00001) != 0
+			    && (relocation & 0xffc00001) != 0xffc00000)
+				return reloc_overflow(module,
+						      "R_AVR32_22H_PCREL",
+						      relocation);
+			relocation >>= 1;
+
+			value = *location;
+			value = ((value & 0xe1ef0000)
+				 | (relocation & 0xffff)
+				 | ((relocation & 0x10000) << 4)
+				 | ((relocation & 0x1e0000) << 8));
+			*location = value;
+			break;
+		case R_AVR32_11H_PCREL:
+			relocation -= (Elf32_Addr)location;
+			if ((relocation & 0xfffffc01) != 0
+			    && (relocation & 0xfffff801) != 0xfffff800)
+				return reloc_overflow(module,
+						      "R_AVR32_11H_PCREL",
+						      relocation);
+			value = get_u16(location);
+			value = ((value & 0xf00c)
+				 | ((relocation & 0x1fe) << 3)
+				 | ((relocation & 0x600) >> 9));
+			put_u16(location, value);
+			break;
+		case R_AVR32_9H_PCREL:
+			relocation -= (Elf32_Addr)location;
+			if ((relocation & 0xffffff01) != 0
+			    && (relocation & 0xfffffe01) != 0xfffffe00)
+				return reloc_overflow(module,
+						      "R_AVR32_9H_PCREL",
+						      relocation);
+			value = get_u16(location);
+			value = ((value & 0xf00f)
+				 | ((relocation & 0x1fe) << 3));
+			put_u16(location, value);
+			break;
+		case R_AVR32_9UW_PCREL:
+			relocation -= ((Elf32_Addr)location) & 0xfffffffc;
+			if ((relocation & 0xfffffc03) != 0)
+				return reloc_overflow(module,
+						      "R_AVR32_9UW_PCREL",
+						      relocation);
+			value = get_u16(location);
+			value = ((value & 0xf80f)
+				 | ((relocation & 0x1fc) << 2));
+			put_u16(location, value);
+			break;
+		case R_AVR32_GOTPC:
+			/*
+			 * R6 = PC - (PC - GOT)
+			 *
+			 * At this point, relocation contains the
+			 * value of PC.  Just subtract the value of
+			 * GOT, and we're done.
+			 */
+			pr_debug("GOTPC: PC=0x%lx, got_offset=0x%lx, core=0x%p\n",
+				 relocation, module->arch.got_offset,
+				 module->module_core);
+			relocation -= ((unsigned long)module->module_core
+				       + module->arch.got_offset);
+			*location = relocation;
+			break;
+		case R_AVR32_GOT18SW:
+			if ((relocation & 0xfffe0003) != 0
+			    && (relocation & 0xfffc0003) != 0xffff0000)
+				return reloc_overflow(module, "R_AVR32_GOT18SW",
+						     relocation);
+			relocation >>= 2;
+			/* fall through */
+		case R_AVR32_GOT16S:
+			if ((relocation & 0xffff8000) != 0
+			    && (relocation & 0xffff0000) != 0xffff0000)
+				return reloc_overflow(module, "R_AVR32_GOT16S",
+						      relocation);
+			pr_debug("GOT reloc @ 0x%lx -> %lu\n",
+				 rel->r_offset, relocation);
+			value = *location;
+			value = ((value & 0xffff0000)
+				 | (relocation & 0xffff));
+			*location = value;
+			break;
+
+		default:
+			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+			       module->name, ELF32_R_TYPE(rel->r_info));
+			return -ENOEXEC;
+		}
+	}
+
+	return ret;
+}
+
+int apply_relocate(Elf32_Shdr *sechdrs, const char *strtab,
+		   unsigned int symindex, unsigned int relindex,
+		   struct module *module)
+{
+	printk(KERN_ERR "module %s: REL relocations are not supported\n",
+		module->name);
+	return -ENOEXEC;
+}
+
+int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
+		    struct module *module)
+{
+	vfree(module->arch.syminfo);
+	module->arch.syminfo = NULL;
+
+	return 0;
+}
+
+void module_arch_cleanup(struct module *module)
+{
+
+}
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
new file mode 100644
index 000000000000..317dc50945f2
--- /dev/null
+++ b/arch/avr32/kernel/process.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/fs.h>
+#include <linux/ptrace.h>
+#include <linux/reboot.h>
+#include <linux/unistd.h>
+
+#include <asm/sysreg.h>
+#include <asm/ocd.h>
+
+void (*pm_power_off)(void) = NULL;
+EXPORT_SYMBOL(pm_power_off);
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+void cpu_idle(void)
+{
+	/* endless idle loop with no priority at all */
+	while (1) {
+		/* TODO: Enter sleep mode */
+		while (!need_resched())
+			cpu_relax();
+		preempt_enable_no_resched();
+		schedule();
+		preempt_disable();
+	}
+}
+
+void machine_halt(void)
+{
+}
+
+void machine_power_off(void)
+{
+}
+
+void machine_restart(char *cmd)
+{
+	__mtdr(DBGREG_DC, DC_DBE);
+	__mtdr(DBGREG_DC, DC_RES);
+	while (1) ;
+}
+
+/*
+ * PC is actually discarded when returning from a system call -- the
+ * return address must be stored in LR. This function will make sure
+ * LR points to do_exit before starting the thread.
+ *
+ * Also, when returning from fork(), r12 is 0, so we must copy the
+ * argument as well.
+ *
+ *  r0 : The argument to the main thread function
+ *  r1 : The address of do_exit
+ *  r2 : The address of the main thread function
+ */
+asmlinkage extern void kernel_thread_helper(void);
+__asm__("	.type	kernel_thread_helper, @function\n"
+	"kernel_thread_helper:\n"
+	"	mov	r12, r0\n"
+	"	mov	lr, r2\n"
+	"	mov	pc, r1\n"
+	"	.size	kernel_thread_helper, . - kernel_thread_helper");
+
+int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+	struct pt_regs regs;
+
+	memset(&regs, 0, sizeof(regs));
+
+	regs.r0 = (unsigned long)arg;
+	regs.r1 = (unsigned long)fn;
+	regs.r2 = (unsigned long)do_exit;
+	regs.lr = (unsigned long)kernel_thread_helper;
+	regs.pc = (unsigned long)kernel_thread_helper;
+	regs.sr = MODE_SUPERVISOR;
+
+	return do_fork(flags | CLONE_VM | CLONE_UNTRACED,
+		       0, &regs, 0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+/*
+ * Free current thread data structures etc
+ */
+void exit_thread(void)
+{
+	/* nothing to do */
+}
+
+void flush_thread(void)
+{
+	/* nothing to do */
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+	/* do nothing */
+}
+
+static const char *cpu_modes[] = {
+	"Application", "Supervisor", "Interrupt level 0", "Interrupt level 1",
+	"Interrupt level 2", "Interrupt level 3", "Exception", "NMI"
+};
+
+void show_regs(struct pt_regs *regs)
+{
+	unsigned long sp = regs->sp;
+	unsigned long lr = regs->lr;
+	unsigned long mode = (regs->sr & MODE_MASK) >> MODE_SHIFT;
+
+	if (!user_mode(regs))
+		sp = (unsigned long)regs + FRAME_SIZE_FULL;
+
+	print_symbol("PC is at %s\n", instruction_pointer(regs));
+	print_symbol("LR is at %s\n", lr);
+	printk("pc : [<%08lx>]    lr : [<%08lx>]    %s\n"
+	       "sp : %08lx  r12: %08lx  r11: %08lx\n",
+	       instruction_pointer(regs),
+	       lr, print_tainted(), sp, regs->r12, regs->r11);
+	printk("r10: %08lx  r9 : %08lx  r8 : %08lx\n",
+	       regs->r10, regs->r9, regs->r8);
+	printk("r7 : %08lx  r6 : %08lx  r5 : %08lx  r4 : %08lx\n",
+	       regs->r7, regs->r6, regs->r5, regs->r4);
+	printk("r3 : %08lx  r2 : %08lx  r1 : %08lx  r0 : %08lx\n",
+	       regs->r3, regs->r2, regs->r1, regs->r0);
+	printk("Flags: %c%c%c%c%c\n",
+	       regs->sr & SR_Q ? 'Q' : 'q',
+	       regs->sr & SR_V ? 'V' : 'v',
+	       regs->sr & SR_N ? 'N' : 'n',
+	       regs->sr & SR_Z ? 'Z' : 'z',
+	       regs->sr & SR_C ? 'C' : 'c');
+	printk("Mode bits: %c%c%c%c%c%c%c%c%c\n",
+	       regs->sr & SR_H ? 'H' : 'h',
+	       regs->sr & SR_R ? 'R' : 'r',
+	       regs->sr & SR_J ? 'J' : 'j',
+	       regs->sr & SR_EM ? 'E' : 'e',
+	       regs->sr & SR_I3M ? '3' : '.',
+	       regs->sr & SR_I2M ? '2' : '.',
+	       regs->sr & SR_I1M ? '1' : '.',
+	       regs->sr & SR_I0M ? '0' : '.',
+	       regs->sr & SR_GM ? 'G' : 'g');
+	printk("CPU Mode: %s\n", cpu_modes[mode]);
+
+	show_trace(NULL, (unsigned long *)sp, regs);
+}
+EXPORT_SYMBOL(show_regs);
+
+/* Fill in the fpu structure for a core dump. This is easy -- we don't have any */
+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
+{
+	/* Not valid */
+	return 0;
+}
+
+asmlinkage void ret_from_fork(void);
+
+int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
+		unsigned long unused,
+		struct task_struct *p, struct pt_regs *regs)
+{
+	struct pt_regs *childregs;
+
+	childregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long)p->thread_info)) - 1;
+	*childregs = *regs;
+
+	if (user_mode(regs))
+		childregs->sp = usp;
+	else
+		childregs->sp = (unsigned long)p->thread_info + THREAD_SIZE;
+
+	childregs->r12 = 0; /* Set return value for child */
+
+	p->thread.cpu_context.sr = MODE_SUPERVISOR | SR_GM;
+	p->thread.cpu_context.ksp = (unsigned long)childregs;
+	p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
+
+	return 0;
+}
+
+/* r12-r8 are dummy parameters to force the compiler to use the stack */
+asmlinkage int sys_fork(struct pt_regs *regs)
+{
+	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
+}
+
+asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
+			 unsigned long parent_tidptr,
+			 unsigned long child_tidptr, struct pt_regs *regs)
+{
+	if (!newsp)
+		newsp = regs->sp;
+	return do_fork(clone_flags, newsp, regs, 0,
+		       (int __user *)parent_tidptr,
+		       (int __user *)child_tidptr);
+}
+
+asmlinkage int sys_vfork(struct pt_regs *regs)
+{
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs,
+		       0, NULL, NULL);
+}
+
+asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv,
+			  char __user *__user *uenvp, struct pt_regs *regs)
+{
+	int error;
+	char *filename;
+
+	filename = getname(ufilename);
+	error = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		goto out;
+
+	error = do_execve(filename, uargv, uenvp, regs);
+	if (error == 0)
+		current->ptrace &= ~PT_DTRACE;
+	putname(filename);
+
+out:
+	return error;
+}
+
+
+/*
+ * This function is supposed to answer the question "who called
+ * schedule()?"
+ */
+unsigned long get_wchan(struct task_struct *p)
+{
+	unsigned long pc;
+	unsigned long stack_page;
+
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
+	stack_page = (unsigned long)p->thread_info;
+	BUG_ON(!stack_page);
+
+	/*
+	 * The stored value of PC is either the address right after
+	 * the call to __switch_to() or ret_from_fork.
+	 */
+	pc = thread_saved_pc(p);
+	if (in_sched_functions(pc)) {
+#ifdef CONFIG_FRAME_POINTER
+		unsigned long fp = p->thread.cpu_context.r7;
+		BUG_ON(fp < stack_page || fp > (THREAD_SIZE + stack_page));
+		pc = *(unsigned long *)fp;
+#else
+		/*
+		 * We depend on the frame size of schedule here, which
+		 * is actually quite ugly. It might be possible to
+		 * determine the frame size automatically at build
+		 * time by doing this:
+		 *   - compile sched.c
+		 *   - disassemble the resulting sched.o
+		 *   - look for 'sub sp,??' shortly after '<schedule>:'
+		 */
+		unsigned long sp = p->thread.cpu_context.ksp + 16;
+		BUG_ON(sp < stack_page || sp > (THREAD_SIZE + stack_page));
+		pc = *(unsigned long *)sp;
+#endif
+	}
+
+	return pc;
+}
diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c
new file mode 100644
index 000000000000..3c89e59029ab
--- /dev/null
+++ b/arch/avr32/kernel/ptrace.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#undef DEBUG
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp_lock.h>
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/unistd.h>
+#include <linux/notifier.h>
+
+#include <asm/traps.h>
+#include <asm/uaccess.h>
+#include <asm/ocd.h>
+#include <asm/mmu_context.h>
+#include <asm/kdebug.h>
+
+static struct pt_regs *get_user_regs(struct task_struct *tsk)
+{
+	return (struct pt_regs *)((unsigned long) tsk->thread_info +
+				  THREAD_SIZE - sizeof(struct pt_regs));
+}
+
+static void ptrace_single_step(struct task_struct *tsk)
+{
+	pr_debug("ptrace_single_step: pid=%u, SR=0x%08lx\n",
+		 tsk->pid, tsk->thread.cpu_context.sr);
+	if (!(tsk->thread.cpu_context.sr & SR_D)) {
+		/*
+		 * Set a breakpoint at the current pc to force the
+		 * process into debug mode.  The syscall/exception
+		 * exit code will set a breakpoint at the return
+		 * address when this flag is set.
+		 */
+		pr_debug("ptrace_single_step: Setting TIF_BREAKPOINT\n");
+		set_tsk_thread_flag(tsk, TIF_BREAKPOINT);
+	}
+
+	/* The monitor code will do the actual step for us */
+	set_tsk_thread_flag(tsk, TIF_SINGLE_STEP);
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching
+ *
+ * Make sure any single step bits, etc. are not set
+ */
+void ptrace_disable(struct task_struct *child)
+{
+	clear_tsk_thread_flag(child, TIF_SINGLE_STEP);
+}
+
+/*
+ * Handle hitting a breakpoint
+ */
+static void ptrace_break(struct task_struct *tsk, struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	info.si_signo = SIGTRAP;
+	info.si_errno = 0;
+	info.si_code  = TRAP_BRKPT;
+	info.si_addr  = (void __user *)instruction_pointer(regs);
+
+	pr_debug("ptrace_break: Sending SIGTRAP to PID %u (pc = 0x%p)\n",
+		 tsk->pid, info.si_addr);
+	force_sig_info(SIGTRAP, &info, tsk);
+}
+
+/*
+ * Read the word at offset "offset" into the task's "struct user". We
+ * actually access the pt_regs struct stored on the kernel stack.
+ */
+static int ptrace_read_user(struct task_struct *tsk, unsigned long offset,
+			    unsigned long __user *data)
+{
+	unsigned long *regs;
+	unsigned long value;
+
+	pr_debug("ptrace_read_user(%p, %#lx, %p)\n",
+		 tsk, offset, data);
+
+	if (offset & 3 || offset >= sizeof(struct user)) {
+		printk("ptrace_read_user: invalid offset 0x%08lx\n", offset);
+		return -EIO;
+	}
+
+	regs = (unsigned long *)get_user_regs(tsk);
+
+	value = 0;
+	if (offset < sizeof(struct pt_regs))
+		value = regs[offset / sizeof(regs[0])];
+
+	return put_user(value, data);
+}
+
+/*
+ * Write the word "value" to offset "offset" into the task's "struct
+ * user". We actually access the pt_regs struct stored on the kernel
+ * stack.
+ */
+static int ptrace_write_user(struct task_struct *tsk, unsigned long offset,
+			     unsigned long value)
+{
+	unsigned long *regs;
+
+	if (offset & 3 || offset >= sizeof(struct user)) {
+		printk("ptrace_write_user: invalid offset 0x%08lx\n", offset);
+		return -EIO;
+	}
+
+	if (offset >= sizeof(struct pt_regs))
+		return 0;
+
+	regs = (unsigned long *)get_user_regs(tsk);
+	regs[offset / sizeof(regs[0])] = value;
+
+	return 0;
+}
+
+static int ptrace_getregs(struct task_struct *tsk, void __user *uregs)
+{
+	struct pt_regs *regs = get_user_regs(tsk);
+
+	return copy_to_user(uregs, regs, sizeof(*regs)) ? -EFAULT : 0;
+}
+
+static int ptrace_setregs(struct task_struct *tsk, const void __user *uregs)
+{
+	struct pt_regs newregs;
+	int ret;
+
+	ret = -EFAULT;
+	if (copy_from_user(&newregs, uregs, sizeof(newregs)) == 0) {
+		struct pt_regs *regs = get_user_regs(tsk);
+
+		ret = -EINVAL;
+		if (valid_user_regs(&newregs)) {
+			*regs = newregs;
+			ret = 0;
+		}
+	}
+
+	return ret;
+}
+
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+{
+	unsigned long tmp;
+	int ret;
+
+	pr_debug("arch_ptrace(%ld, %ld, %#lx, %#lx)\n",
+		 request, child->pid, addr, data);
+
+	pr_debug("ptrace: Enabling monitor mode...\n");
+	__mtdr(DBGREG_DC, __mfdr(DBGREG_DC) | DC_MM | DC_DBE);
+
+	switch (request) {
+	/* Read the word at location addr in the child process */
+	case PTRACE_PEEKTEXT:
+	case PTRACE_PEEKDATA:
+		ret = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
+		if (ret == sizeof(tmp))
+			ret = put_user(tmp, (unsigned long __user *)data);
+		else
+			ret = -EIO;
+		break;
+
+	case PTRACE_PEEKUSR:
+		ret = ptrace_read_user(child, addr,
+				       (unsigned long __user *)data);
+		break;
+
+	/* Write the word in data at location addr */
+	case PTRACE_POKETEXT:
+	case PTRACE_POKEDATA:
+		ret = access_process_vm(child, addr, &data, sizeof(data), 1);
+		if (ret == sizeof(data))
+			ret = 0;
+		else
+			ret = -EIO;
+		break;
+
+	case PTRACE_POKEUSR:
+		ret = ptrace_write_user(child, addr, data);
+		break;
+
+	/* continue and stop at next (return from) syscall */
+	case PTRACE_SYSCALL:
+	/* restart after signal */
+	case PTRACE_CONT:
+		ret = -EIO;
+		if (!valid_signal(data))
+			break;
+		if (request == PTRACE_SYSCALL)
+			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+		else
+			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+		child->exit_code = data;
+		/* XXX: Are we sure no breakpoints are active here? */
+		wake_up_process(child);
+		ret = 0;
+		break;
+
+	/*
+	 * Make the child exit. Best I can do is send it a
+	 * SIGKILL. Perhaps it should be put in the status that it
+	 * wants to exit.
+	 */
+	case PTRACE_KILL:
+		ret = 0;
+		if (child->exit_state == EXIT_ZOMBIE)
+			break;
+		child->exit_code = SIGKILL;
+		wake_up_process(child);
+		break;
+
+	/*
+	 * execute single instruction.
+	 */
+	case PTRACE_SINGLESTEP:
+		ret = -EIO;
+		if (!valid_signal(data))
+			break;
+		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+		ptrace_single_step(child);
+		child->exit_code = data;
+		wake_up_process(child);
+		ret = 0;
+		break;
+
+	/* Detach a process that was attached */
+	case PTRACE_DETACH:
+		ret = ptrace_detach(child, data);
+		break;
+
+	case PTRACE_GETREGS:
+		ret = ptrace_getregs(child, (void __user *)data);
+		break;
+
+	case PTRACE_SETREGS:
+		ret = ptrace_setregs(child, (const void __user *)data);
+		break;
+
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	pr_debug("sys_ptrace returning %d (DC = 0x%08lx)\n", ret, __mfdr(DBGREG_DC));
+	return ret;
+}
+
+asmlinkage void syscall_trace(void)
+{
+	pr_debug("syscall_trace called\n");
+	if (!test_thread_flag(TIF_SYSCALL_TRACE))
+		return;
+	if (!(current->ptrace & PT_PTRACED))
+		return;
+
+	pr_debug("syscall_trace: notifying parent\n");
+	/* The 0x80 provides a way for the tracing parent to
+	 * distinguish between a syscall stop and SIGTRAP delivery */
+	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+				 ? 0x80 : 0));
+
+	/*
+	 * this isn't the same as continuing with a signal, but it
+	 * will do for normal use.  strace only continues with a
+	 * signal if the stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code) {
+		pr_debug("syscall_trace: sending signal %d to PID %u\n",
+			 current->exit_code, current->pid);
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+}
+
+asmlinkage void do_debug_priv(struct pt_regs *regs)
+{
+	unsigned long dc, ds;
+	unsigned long die_val;
+
+	ds = __mfdr(DBGREG_DS);
+
+	pr_debug("do_debug_priv: pc = %08lx, ds = %08lx\n", regs->pc, ds);
+
+	if (ds & DS_SSS)
+		die_val = DIE_SSTEP;
+	else
+		die_val = DIE_BREAKPOINT;
+
+	if (notify_die(die_val, regs, 0, SIGTRAP) == NOTIFY_STOP)
+		return;
+
+	if (likely(ds & DS_SSS)) {
+		extern void itlb_miss(void);
+		extern void tlb_miss_common(void);
+		struct thread_info *ti;
+
+		dc = __mfdr(DBGREG_DC);
+		dc &= ~DC_SS;
+		__mtdr(DBGREG_DC, dc);
+
+		ti = current_thread_info();
+		ti->flags |= _TIF_BREAKPOINT;
+
+		/* The TLB miss handlers don't check thread flags */
+		if ((regs->pc >= (unsigned long)&itlb_miss)
+		    && (regs->pc <= (unsigned long)&tlb_miss_common)) {
+			__mtdr(DBGREG_BWA2A, sysreg_read(RAR_EX));
+			__mtdr(DBGREG_BWC2A, 0x40000001 | (get_asid() << 1));
+		}
+
+		/*
+		 * If we're running in supervisor mode, the breakpoint
+		 * will take us where we want directly, no need to
+		 * single step.
+		 */
+		if ((regs->sr & MODE_MASK) != MODE_SUPERVISOR)
+			ti->flags |= TIF_SINGLE_STEP;
+	} else {
+		panic("Unable to handle debug trap at pc = %08lx\n",
+		      regs->pc);
+	}
+}
+
+/*
+ * Handle breakpoints, single steps and other debuggy things. To keep
+ * things simple initially, we run with interrupts and exceptions
+ * disabled all the time.
+ */
+asmlinkage void do_debug(struct pt_regs *regs)
+{
+	unsigned long dc, ds;
+
+	ds = __mfdr(DBGREG_DS);
+	pr_debug("do_debug: pc = %08lx, ds = %08lx\n", regs->pc, ds);
+
+	if (test_thread_flag(TIF_BREAKPOINT)) {
+		pr_debug("TIF_BREAKPOINT set\n");
+		/* We're taking care of it */
+		clear_thread_flag(TIF_BREAKPOINT);
+		__mtdr(DBGREG_BWC2A, 0);
+	}
+
+	if (test_thread_flag(TIF_SINGLE_STEP)) {
+		pr_debug("TIF_SINGLE_STEP set, ds = 0x%08lx\n", ds);
+		if (ds & DS_SSS) {
+			dc = __mfdr(DBGREG_DC);
+			dc &= ~DC_SS;
+			__mtdr(DBGREG_DC, dc);
+
+			clear_thread_flag(TIF_SINGLE_STEP);
+			ptrace_break(current, regs);
+		}
+	} else {
+		/* regular breakpoint */
+		ptrace_break(current, regs);
+	}
+}
diff --git a/arch/avr32/kernel/semaphore.c b/arch/avr32/kernel/semaphore.c
new file mode 100644
index 000000000000..1e2705a05016
--- /dev/null
+++ b/arch/avr32/kernel/semaphore.c
@@ -0,0 +1,148 @@
+/*
+ * AVR32 sempahore implementation.
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/arch/i386/kernel/semaphore.c
+ *  Copyright (C) 1999 Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+
+#include <asm/semaphore.h>
+#include <asm/atomic.h>
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is protected
+ * by the spinlock in the semaphore's waitqueue head.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in <asm/semaphore.h>
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ *  - only on a boundary condition do we need to care. When we go
+ *    from a negative count to a non-negative, we wake people up.
+ *  - when we go from a non-negative count to a negative do we
+ *    (a) synchronize with the "sleeper" count and (b) make sure
+ *    that we're on the wakeup list before we synchronize so that
+ *    we cannot lose wakeup events.
+ */
+
+void __up(struct semaphore *sem)
+{
+	wake_up(&sem->wait);
+}
+EXPORT_SYMBOL(__up);
+
+void __sched __down(struct semaphore *sem)
+{
+	struct task_struct *tsk = current;
+        DECLARE_WAITQUEUE(wait, tsk);
+        unsigned long flags;
+
+        tsk->state = TASK_UNINTERRUPTIBLE;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+        sem->sleepers++;
+        for (;;) {
+                int sleepers = sem->sleepers;
+
+                /*
+                 * Add "everybody else" into it. They aren't
+                 * playing, because we own the spinlock in
+                 * the wait_queue_head.
+                 */
+                if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
+                        sem->sleepers = 0;
+                        break;
+                }
+                sem->sleepers = 1;      /* us - see -1 above */
+                spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+                schedule();
+
+                spin_lock_irqsave(&sem->wait.lock, flags);
+                tsk->state = TASK_UNINTERRUPTIBLE;
+        }
+        remove_wait_queue_locked(&sem->wait, &wait);
+        wake_up_locked(&sem->wait);
+        spin_unlock_irqrestore(&sem->wait.lock, flags);
+        tsk->state = TASK_RUNNING;
+}
+EXPORT_SYMBOL(__down);
+
+int __sched __down_interruptible(struct semaphore *sem)
+{
+	int retval = 0;
+	struct task_struct *tsk = current;
+        DECLARE_WAITQUEUE(wait, tsk);
+        unsigned long flags;
+
+        tsk->state = TASK_INTERRUPTIBLE;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+        sem->sleepers++;
+        for (;;) {
+                int sleepers = sem->sleepers;
+
+		/*
+		 * With signals pending, this turns into the trylock
+		 * failure case - we won't be sleeping, and we can't
+		 * get the lock as it has contention. Just correct the
+		 * count and exit.
+		 */
+		if (signal_pending(current)) {
+			retval = -EINTR;
+			sem->sleepers = 0;
+			atomic_add(sleepers, &sem->count);
+			break;
+		}
+
+                /*
+                 * Add "everybody else" into it. They aren't
+                 * playing, because we own the spinlock in
+                 * the wait_queue_head.
+                 */
+                if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
+                        sem->sleepers = 0;
+                        break;
+                }
+                sem->sleepers = 1;      /* us - see -1 above */
+                spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+                schedule();
+
+                spin_lock_irqsave(&sem->wait.lock, flags);
+                tsk->state = TASK_INTERRUPTIBLE;
+        }
+        remove_wait_queue_locked(&sem->wait, &wait);
+        wake_up_locked(&sem->wait);
+        spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+        tsk->state = TASK_RUNNING;
+	return retval;
+}
+EXPORT_SYMBOL(__down_interruptible);
diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c
new file mode 100644
index 000000000000..5d68f3c6990b
--- /dev/null
+++ b/arch/avr32/kernel/setup.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/console.h>
+#include <linux/ioport.h>
+#include <linux/bootmem.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/root_dev.h>
+#include <linux/cpu.h>
+
+#include <asm/sections.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/setup.h>
+#include <asm/sysreg.h>
+
+#include <asm/arch/board.h>
+#include <asm/arch/init.h>
+
+extern int root_mountflags;
+
+/*
+ * Bootloader-provided information about physical memory
+ */
+struct tag_mem_range *mem_phys;
+struct tag_mem_range *mem_reserved;
+struct tag_mem_range *mem_ramdisk;
+
+/*
+ * Initialize loops_per_jiffy as 5000000 (500MIPS).
+ * Better make it too large than too small...
+ */
+struct avr32_cpuinfo boot_cpu_data = {
+	.loops_per_jiffy = 5000000
+};
+EXPORT_SYMBOL(boot_cpu_data);
+
+static char command_line[COMMAND_LINE_SIZE];
+
+/*
+ * Should be more than enough, but if you have a _really_ complex
+ * setup, you might need to increase the size of this...
+ */
+static struct tag_mem_range __initdata mem_range_cache[32];
+static unsigned mem_range_next_free;
+
+/*
+ * Standard memory resources
+ */
+static struct resource mem_res[] = {
+	{
+		.name	= "Kernel code",
+		.start	= 0,
+		.end	= 0,
+		.flags	= IORESOURCE_MEM
+	},
+	{
+		.name	= "Kernel data",
+		.start	= 0,
+		.end	= 0,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+#define kernel_code	mem_res[0]
+#define kernel_data	mem_res[1]
+
+/*
+ * Early framebuffer allocation. Works as follows:
+ *   - If fbmem_size is zero, nothing will be allocated or reserved.
+ *   - If fbmem_start is zero when setup_bootmem() is called,
+ *     fbmem_size bytes will be allocated from the bootmem allocator.
+ *   - If fbmem_start is nonzero, an area of size fbmem_size will be
+ *     reserved at the physical address fbmem_start if necessary. If
+ *     the area isn't in a memory region known to the kernel, it will
+ *     be left alone.
+ *
+ * Board-specific code may use these variables to set up platform data
+ * for the framebuffer driver if fbmem_size is nonzero.
+ */
+static unsigned long __initdata fbmem_start;
+static unsigned long __initdata fbmem_size;
+
+/*
+ * "fbmem=xxx[kKmM]" allocates the specified amount of boot memory for
+ * use as framebuffer.
+ *
+ * "fbmem=xxx[kKmM]@yyy[kKmM]" defines a memory region of size xxx and
+ * starting at yyy to be reserved for use as framebuffer.
+ *
+ * The kernel won't verify that the memory region starting at yyy
+ * actually contains usable RAM.
+ */
+static int __init early_parse_fbmem(char *p)
+{
+	fbmem_size = memparse(p, &p);
+	if (*p == '@')
+		fbmem_start = memparse(p, &p);
+	return 0;
+}
+early_param("fbmem", early_parse_fbmem);
+
+static inline void __init resource_init(void)
+{
+	struct tag_mem_range *region;
+
+	kernel_code.start = __pa(init_mm.start_code);
+	kernel_code.end = __pa(init_mm.end_code - 1);
+	kernel_data.start = __pa(init_mm.end_code);
+	kernel_data.end = __pa(init_mm.brk - 1);
+
+	for (region = mem_phys; region; region = region->next) {
+		struct resource *res;
+		unsigned long phys_start, phys_end;
+
+		if (region->size == 0)
+			continue;
+
+		phys_start = region->addr;
+		phys_end = phys_start + region->size - 1;
+
+		res = alloc_bootmem_low(sizeof(*res));
+		res->name = "System RAM";
+		res->start = phys_start;
+		res->end = phys_end;
+		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
+		request_resource (&iomem_resource, res);
+
+		if (kernel_code.start >= res->start &&
+		    kernel_code.end <= res->end)
+			request_resource (res, &kernel_code);
+		if (kernel_data.start >= res->start &&
+		    kernel_data.end <= res->end)
+			request_resource (res, &kernel_data);
+	}
+}
+
+static int __init parse_tag_core(struct tag *tag)
+{
+	if (tag->hdr.size > 2) {
+		if ((tag->u.core.flags & 1) == 0)
+			root_mountflags &= ~MS_RDONLY;
+		ROOT_DEV = new_decode_dev(tag->u.core.rootdev);
+	}
+	return 0;
+}
+__tagtable(ATAG_CORE, parse_tag_core);
+
+static int __init parse_tag_mem_range(struct tag *tag,
+				      struct tag_mem_range **root)
+{
+	struct tag_mem_range *cur, **pprev;
+	struct tag_mem_range *new;
+
+	/*
+	 * Ignore zero-sized entries. If we're running standalone, the
+	 * SDRAM code may emit such entries if something goes
+	 * wrong...
+	 */
+	if (tag->u.mem_range.size == 0)
+		return 0;
+
+	/*
+	 * Copy the data so the bootmem init code doesn't need to care
+	 * about it.
+	 */
+	if (mem_range_next_free >=
+	    (sizeof(mem_range_cache) / sizeof(mem_range_cache[0])))
+		panic("Physical memory map too complex!\n");
+
+	new = &mem_range_cache[mem_range_next_free++];
+	*new = tag->u.mem_range;
+
+	pprev = root;
+	cur = *root;
+	while (cur) {
+		pprev = &cur->next;
+		cur = cur->next;
+	}
+
+	*pprev = new;
+	new->next = NULL;
+
+	return 0;
+}
+
+static int __init parse_tag_mem(struct tag *tag)
+{
+	return parse_tag_mem_range(tag, &mem_phys);
+}
+__tagtable(ATAG_MEM, parse_tag_mem);
+
+static int __init parse_tag_cmdline(struct tag *tag)
+{
+	strlcpy(saved_command_line, tag->u.cmdline.cmdline, COMMAND_LINE_SIZE);
+	return 0;
+}
+__tagtable(ATAG_CMDLINE, parse_tag_cmdline);
+
+static int __init parse_tag_rdimg(struct tag *tag)
+{
+	return parse_tag_mem_range(tag, &mem_ramdisk);
+}
+__tagtable(ATAG_RDIMG, parse_tag_rdimg);
+
+static int __init parse_tag_clock(struct tag *tag)
+{
+	/*
+	 * We'll figure out the clocks by peeking at the system
+	 * manager regs directly.
+	 */
+	return 0;
+}
+__tagtable(ATAG_CLOCK, parse_tag_clock);
+
+static int __init parse_tag_rsvd_mem(struct tag *tag)
+{
+	return parse_tag_mem_range(tag, &mem_reserved);
+}
+__tagtable(ATAG_RSVD_MEM, parse_tag_rsvd_mem);
+
+static int __init parse_tag_ethernet(struct tag *tag)
+{
+#if 0
+	const struct platform_device *pdev;
+
+	/*
+	 * We really need a bus type that supports "classes"...this
+	 * will do for now (until we must handle other kinds of
+	 * ethernet controllers)
+	 */
+	pdev = platform_get_device("macb", tag->u.ethernet.mac_index);
+	if (pdev && pdev->dev.platform_data) {
+		struct eth_platform_data *data = pdev->dev.platform_data;
+
+		data->valid = 1;
+		data->mii_phy_addr = tag->u.ethernet.mii_phy_addr;
+		memcpy(data->hw_addr, tag->u.ethernet.hw_address,
+		       sizeof(data->hw_addr));
+	}
+#endif
+	return 0;
+}
+__tagtable(ATAG_ETHERNET, parse_tag_ethernet);
+
+/*
+ * Scan the tag table for this tag, and call its parse function. The
+ * tag table is built by the linker from all the __tagtable
+ * declarations.
+ */
+static int __init parse_tag(struct tag *tag)
+{
+	extern struct tagtable __tagtable_begin, __tagtable_end;
+	struct tagtable *t;
+
+	for (t = &__tagtable_begin; t < &__tagtable_end; t++)
+		if (tag->hdr.tag == t->tag) {
+			t->parse(tag);
+			break;
+		}
+
+	return t < &__tagtable_end;
+}
+
+/*
+ * Parse all tags in the list we got from the boot loader
+ */
+static void __init parse_tags(struct tag *t)
+{
+	for (; t->hdr.tag != ATAG_NONE; t = tag_next(t))
+		if (!parse_tag(t))
+			printk(KERN_WARNING
+			       "Ignoring unrecognised tag 0x%08x\n",
+			       t->hdr.tag);
+}
+
+void __init setup_arch (char **cmdline_p)
+{
+	struct clk *cpu_clk;
+
+	parse_tags(bootloader_tags);
+
+	setup_processor();
+	setup_platform();
+
+	cpu_clk = clk_get(NULL, "cpu");
+	if (IS_ERR(cpu_clk)) {
+		printk(KERN_WARNING "Warning: Unable to get CPU clock\n");
+	} else {
+		unsigned long cpu_hz = clk_get_rate(cpu_clk);
+
+		/*
+		 * Well, duh, but it's probably a good idea to
+		 * increment the use count.
+		 */
+		clk_enable(cpu_clk);
+
+		boot_cpu_data.clk = cpu_clk;
+		boot_cpu_data.loops_per_jiffy = cpu_hz * 4;
+		printk("CPU: Running at %lu.%03lu MHz\n",
+		       ((cpu_hz + 500) / 1000) / 1000,
+		       ((cpu_hz + 500) / 1000) % 1000);
+	}
+
+	init_mm.start_code = (unsigned long) &_text;
+	init_mm.end_code = (unsigned long) &_etext;
+	init_mm.end_data = (unsigned long) &_edata;
+	init_mm.brk = (unsigned long) &_end;
+
+	strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
+	*cmdline_p = command_line;
+	parse_early_param();
+
+	setup_bootmem();
+
+	board_setup_fbmem(fbmem_start, fbmem_size);
+
+#ifdef CONFIG_VT
+	conswitchp = &dummy_con;
+#endif
+
+	paging_init();
+
+	resource_init();
+}
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
new file mode 100644
index 000000000000..33096651c24f
--- /dev/null
+++ b/arch/avr32/kernel/signal.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/arch/sh/kernel/signal.c
+ *  Copyright (C) 1999, 2000  Niibe Yutaka & Kaz Kojima
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/suspend.h>
+
+#include <asm/uaccess.h>
+#include <asm/ucontext.h>
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+			       struct pt_regs *regs)
+{
+	return do_sigaltstack(uss, uoss, regs->sp);
+}
+
+struct rt_sigframe
+{
+	struct siginfo info;
+	struct ucontext uc;
+	unsigned long retcode;
+};
+
+static int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
+{
+	int err = 0;
+
+#define COPY(x)		err |= __get_user(regs->x, &sc->x)
+	COPY(sr);
+	COPY(pc);
+	COPY(lr);
+	COPY(sp);
+	COPY(r12);
+	COPY(r11);
+	COPY(r10);
+	COPY(r9);
+	COPY(r8);
+	COPY(r7);
+	COPY(r6);
+	COPY(r5);
+	COPY(r4);
+	COPY(r3);
+	COPY(r2);
+	COPY(r1);
+	COPY(r0);
+#undef	COPY
+
+	/*
+	 * Don't allow anyone to pretend they're running in supervisor
+	 * mode or something...
+	 */
+	err |= !valid_user_regs(regs);
+
+	return err;
+}
+
+
+asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	sigset_t set;
+
+	frame = (struct rt_sigframe __user *)regs->sp;
+	pr_debug("SIG return: frame = %p\n", frame);
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+		goto badframe;
+
+	pr_debug("Context restored: pc = %08lx, lr = %08lx, sp = %08lx\n",
+		 regs->pc, regs->lr, regs->sp);
+
+	return regs->r12;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+static int
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
+{
+	int err = 0;
+
+#define COPY(x)		err |= __put_user(regs->x, &sc->x)
+	COPY(sr);
+	COPY(pc);
+	COPY(lr);
+	COPY(sp);
+	COPY(r12);
+	COPY(r11);
+	COPY(r10);
+	COPY(r9);
+	COPY(r8);
+	COPY(r7);
+	COPY(r6);
+	COPY(r5);
+	COPY(r4);
+	COPY(r3);
+	COPY(r2);
+	COPY(r1);
+	COPY(r0);
+#undef	COPY
+
+	return err;
+}
+
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, int framesize)
+{
+	unsigned long sp = regs->sp;
+
+	if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp))
+		sp = current->sas_ss_sp + current->sas_ss_size;
+
+	return (void __user *)((sp - framesize) & ~3);
+}
+
+static int
+setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+	       sigset_t *set, struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	int err = 0;
+
+	frame = get_sigframe(ka, regs, sizeof(*frame));
+	err = -EFAULT;
+	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
+		goto out;
+
+	/*
+	 * Set up the return code:
+	 *
+	 *	mov	r8, __NR_rt_sigreturn
+	 *	scall
+	 *
+	 * Note: This will blow up since we're using a non-executable
+	 * stack. Better use SA_RESTORER.
+	 */
+#if __NR_rt_sigreturn > 127
+# error __NR_rt_sigreturn must be < 127 to fit in a short mov
+#endif
+	err = __put_user(0x3008d733 | (__NR_rt_sigreturn << 20),
+			 &frame->retcode);
+
+	err |= copy_siginfo_to_user(&frame->info, info);
+
+	/* Set up the ucontext */
+	err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(NULL, &frame->uc.uc_link);
+	err |= __put_user((void __user *)current->sas_ss_sp,
+			  &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(sas_ss_flags(regs->sp),
+			  &frame->uc.uc_stack.ss_flags);
+	err |= __put_user(current->sas_ss_size,
+			  &frame->uc.uc_stack.ss_size);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+	if (err)
+		goto out;
+
+	regs->r12 = sig;
+	regs->r11 = (unsigned long) &frame->info;
+	regs->r10 = (unsigned long) &frame->uc;
+	regs->sp = (unsigned long) frame;
+	if (ka->sa.sa_flags & SA_RESTORER)
+		regs->lr = (unsigned long)ka->sa.sa_restorer;
+	else {
+		printk(KERN_NOTICE "[%s:%d] did not set SA_RESTORER\n",
+		       current->comm, current->pid);
+		regs->lr = (unsigned long) &frame->retcode;
+	}
+
+	pr_debug("SIG deliver [%s:%d]: sig=%d sp=0x%lx pc=0x%lx->0x%p lr=0x%lx\n",
+		 current->comm, current->pid, sig, regs->sp,
+		 regs->pc, ka->sa.sa_handler, regs->lr);
+
+	regs->pc = (unsigned long) ka->sa.sa_handler;
+
+out:
+	return err;
+}
+
+static inline void restart_syscall(struct pt_regs *regs)
+{
+	if (regs->r12 == -ERESTART_RESTARTBLOCK)
+		regs->r8 = __NR_restart_syscall;
+	else
+		regs->r12 = regs->r12_orig;
+	regs->pc -= 2;
+}
+
+static inline void
+handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
+	      sigset_t *oldset, struct pt_regs *regs, int syscall)
+{
+	int ret;
+
+	/*
+	 * Set up the stack frame
+	 */
+	ret = setup_rt_frame(sig, ka, info, oldset, regs);
+
+	/*
+	 * Check that the resulting registers are sane
+	 */
+	ret |= !valid_user_regs(regs);
+
+	/*
+	 * Block the signal if we were unsuccessful.
+	 */
+	if (ret != 0 || !(ka->sa.sa_flags & SA_NODEFER)) {
+		spin_lock_irq(&current->sighand->siglock);
+		sigorsets(&current->blocked, &current->blocked,
+			  &ka->sa.sa_mask);
+		sigaddset(&current->blocked, sig);
+		recalc_sigpending();
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+
+	if (ret == 0)
+		return;
+
+	force_sigsegv(sig, current);
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it
+ * doesn't want to handle. Thus you cannot kill init even with a
+ * SIGKILL even by mistake.
+ */
+int do_signal(struct pt_regs *regs, sigset_t *oldset, int syscall)
+{
+	siginfo_t info;
+	int signr;
+	struct k_sigaction ka;
+
+	/*
+	 * We want the common case to go fast, which is why we may in
+	 * certain cases get here from kernel mode. Just return
+	 * without doing anything if so.
+	 */
+	if (!user_mode(regs))
+		return 0;
+
+	if (try_to_freeze()) {
+		signr = 0;
+		if (!signal_pending(current))
+			goto no_signal;
+	}
+
+	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+		oldset = &current->saved_sigmask;
+	else if (!oldset)
+		oldset = &current->blocked;
+
+	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+no_signal:
+	if (syscall) {
+		switch (regs->r12) {
+		case -ERESTART_RESTARTBLOCK:
+		case -ERESTARTNOHAND:
+			if (signr > 0) {
+				regs->r12 = -EINTR;
+				break;
+			}
+			/* fall through */
+		case -ERESTARTSYS:
+			if (signr > 0 && !(ka.sa.sa_flags & SA_RESTART)) {
+				regs->r12 = -EINTR;
+				break;
+			}
+			/* fall through */
+		case -ERESTARTNOINTR:
+			restart_syscall(regs);
+		}
+	}
+
+	if (signr == 0) {
+		/* No signal to deliver -- put the saved sigmask back */
+		if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+			clear_thread_flag(TIF_RESTORE_SIGMASK);
+			sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+		}
+		return 0;
+	}
+
+	handle_signal(signr, &ka, &info, oldset, regs, syscall);
+	return 1;
+}
+
+asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti)
+{
+	int syscall = 0;
+
+	if ((sysreg_read(SR) & MODE_MASK) == MODE_SUPERVISOR)
+		syscall = 1;
+
+	if (ti->flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
+		do_signal(regs, &current->blocked, syscall);
+}
diff --git a/arch/avr32/kernel/switch_to.S b/arch/avr32/kernel/switch_to.S
new file mode 100644
index 000000000000..a48d046723c5
--- /dev/null
+++ b/arch/avr32/kernel/switch_to.S
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/sysreg.h>
+
+	.text
+	.global	__switch_to
+	.type	__switch_to, @function
+
+	/* Switch thread context from "prev" to "next", returning "last"
+	 *   r12 :	prev
+	 *   r11 :	&prev->thread + 1
+	 *   r10 :	&next->thread
+	 */
+__switch_to:
+	stm	--r11, r0,r1,r2,r3,r4,r5,r6,r7,sp,lr
+	mfsr	r9, SYSREG_SR
+	st.w	--r11, r9
+	ld.w	r8, r10++
+	/*
+	 * schedule() may have been called from a mode with a different
+	 * set of registers. Make sure we don't lose anything here.
+	 */
+	pushm	r10,r12
+	mtsr	SYSREG_SR, r8
+	frs			/* flush the return stack */
+	sub	pc, -2		/* flush the pipeline */
+	popm	r10,r12
+	ldm	r10++, r0,r1,r2,r3,r4,r5,r6,r7,sp,pc
+	.size	__switch_to, . - __switch_to
diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c
new file mode 100644
index 000000000000..6ec5693da448
--- /dev/null
+++ b/arch/avr32/kernel/sys_avr32.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/unistd.h>
+
+#include <asm/mman.h>
+#include <asm/uaccess.h>
+
+asmlinkage int sys_pipe(unsigned long __user *filedes)
+{
+	int fd[2];
+	int error;
+
+	error = do_pipe(fd);
+	if (!error) {
+		if (copy_to_user(filedes, fd, sizeof(fd)))
+			error = -EFAULT;
+	}
+	return error;
+}
+
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+			  unsigned long prot, unsigned long flags,
+			  unsigned long fd, off_t offset)
+{
+	int error = -EBADF;
+	struct file *file = NULL;
+
+	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+	if (!(flags & MAP_ANONYMOUS)) {
+		file = fget(fd);
+		if (!file)
+			return error;
+	}
+
+	down_write(&current->mm->mmap_sem);
+	error = do_mmap_pgoff(file, addr, len, prot, flags, offset);
+	up_write(&current->mm->mmap_sem);
+
+	if (file)
+		fput(file);
+	return error;
+}
diff --git a/arch/avr32/kernel/syscall-stubs.S b/arch/avr32/kernel/syscall-stubs.S
new file mode 100644
index 000000000000..7589a9b426cb
--- /dev/null
+++ b/arch/avr32/kernel/syscall-stubs.S
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Stubs for syscalls that require access to pt_regs or that take more
+ * than five parameters.
+ */
+
+#define ARG6	r3
+
+	.text
+	.global __sys_rt_sigsuspend
+	.type	__sys_rt_sigsuspend,@function
+__sys_rt_sigsuspend:
+	mov	r10, sp
+	rjmp	sys_rt_sigsuspend
+
+	.global	__sys_sigaltstack
+	.type	__sys_sigaltstack,@function
+__sys_sigaltstack:
+	mov	r10, sp
+	rjmp	sys_sigaltstack
+
+	.global	__sys_rt_sigreturn
+	.type	__sys_rt_sigreturn,@function
+__sys_rt_sigreturn:
+	mov	r12, sp
+	rjmp	sys_rt_sigreturn
+
+	.global	__sys_fork
+	.type	__sys_fork,@function
+__sys_fork:
+	mov	r12, sp
+	rjmp	sys_fork
+
+	.global	__sys_clone
+	.type	__sys_clone,@function
+__sys_clone:
+	mov	r8, sp
+	rjmp	sys_clone
+
+	.global	__sys_vfork
+	.type	__sys_vfork,@function
+__sys_vfork:
+	mov	r12, sp
+	rjmp	sys_vfork
+
+	.global	__sys_execve
+	.type	__sys_execve,@function
+__sys_execve:
+	mov	r9, sp
+	rjmp	sys_execve
+
+	.global	__sys_mmap2
+	.type	__sys_mmap2,@function
+__sys_mmap2:
+	pushm	lr
+	st.w	--sp, ARG6
+	rcall	sys_mmap2
+	sub	sp, -4
+	popm	pc
+
+	.global	__sys_sendto
+	.type	__sys_sendto,@function
+__sys_sendto:
+	pushm	lr
+	st.w	--sp, ARG6
+	rcall	sys_sendto
+	sub	sp, -4
+	popm	pc
+
+	.global	__sys_recvfrom
+	.type	__sys_recvfrom,@function
+__sys_recvfrom:
+	pushm	lr
+	st.w	--sp, ARG6
+	rcall	sys_recvfrom
+	sub	sp, -4
+	popm	pc
+
+	.global	__sys_pselect6
+	.type	__sys_pselect6,@function
+__sys_pselect6:
+	pushm	lr
+	st.w	--sp, ARG6
+	rcall	sys_pselect6
+	sub	sp, -4
+	popm	pc
+
+	.global	__sys_splice
+	.type	__sys_splice,@function
+__sys_splice:
+	pushm	lr
+	st.w	--sp, ARG6
+	rcall	sys_splice
+	sub	sp, -4
+	popm	pc
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
new file mode 100644
index 000000000000..63b206965d05
--- /dev/null
+++ b/arch/avr32/kernel/syscall_table.S
@@ -0,0 +1,289 @@
+/*
+ * AVR32 system call table
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if !defined(CONFIG_NFSD) && !defined(CONFIG_NFSD_MODULE)
+#define sys_nfsservctl sys_ni_syscall
+#endif
+
+#if !defined(CONFIG_SYSV_IPC)
+# define sys_ipc	sys_ni_syscall
+#endif
+
+	.section .rodata,"a",@progbits
+	.type	sys_call_table,@object
+	.global	sys_call_table
+	.align	2
+sys_call_table:
+	.long	sys_restart_syscall
+	.long	sys_exit
+	.long	__sys_fork
+	.long	sys_read
+	.long	sys_write
+	.long	sys_open		/* 5 */
+	.long	sys_close
+	.long	sys_umask
+	.long	sys_creat
+	.long	sys_link
+	.long	sys_unlink		/* 10 */
+	.long	__sys_execve
+	.long	sys_chdir
+	.long	sys_time
+	.long	sys_mknod
+	.long	sys_chmod		/* 15 */
+	.long	sys_chown
+	.long	sys_lchown
+	.long	sys_lseek
+	.long	sys_llseek
+	.long	sys_getpid		/* 20 */
+	.long	sys_mount
+	.long	sys_umount
+	.long	sys_setuid
+	.long	sys_getuid
+	.long	sys_stime		/* 25 */
+	.long	sys_ptrace
+	.long	sys_alarm
+	.long	sys_pause
+	.long	sys_utime
+	.long	sys_newstat		/* 30 */
+	.long	sys_newfstat
+	.long	sys_newlstat
+	.long	sys_access
+	.long	sys_chroot
+	.long	sys_sync		/* 35 */
+	.long	sys_fsync
+	.long	sys_kill
+	.long	sys_rename
+	.long	sys_mkdir
+	.long	sys_rmdir		/* 40 */
+	.long	sys_dup
+	.long	sys_pipe
+	.long	sys_times
+	.long	__sys_clone
+	.long	sys_brk			/* 45 */
+	.long	sys_setgid
+	.long	sys_getgid
+	.long	sys_getcwd
+	.long	sys_geteuid
+	.long	sys_getegid		/* 50 */
+	.long	sys_acct
+	.long	sys_setfsuid
+	.long	sys_setfsgid
+	.long	sys_ioctl
+	.long	sys_fcntl		/* 55 */
+	.long	sys_setpgid
+	.long	sys_mremap
+	.long	sys_setresuid
+	.long	sys_getresuid
+	.long	sys_setreuid		/* 60 */
+	.long	sys_setregid
+	.long	sys_ustat
+	.long	sys_dup2
+	.long	sys_getppid
+	.long	sys_getpgrp		/* 65 */
+	.long	sys_setsid
+	.long	sys_rt_sigaction
+	.long	__sys_rt_sigreturn
+	.long	sys_rt_sigprocmask
+	.long	sys_rt_sigpending	/* 70 */
+	.long	sys_rt_sigtimedwait
+	.long	sys_rt_sigqueueinfo
+	.long	__sys_rt_sigsuspend
+	.long	sys_sethostname
+	.long	sys_setrlimit		/* 75 */
+	.long	sys_getrlimit
+	.long	sys_getrusage
+	.long	sys_gettimeofday
+	.long	sys_settimeofday
+	.long	sys_getgroups		/* 80 */
+	.long	sys_setgroups
+	.long	sys_select
+	.long	sys_symlink
+	.long	sys_fchdir
+	.long	sys_readlink		/* 85 */
+	.long	sys_pread64
+	.long	sys_pwrite64
+	.long	sys_swapon
+	.long	sys_reboot
+	.long	__sys_mmap2		/* 90 */
+	.long	sys_munmap
+	.long	sys_truncate
+	.long	sys_ftruncate
+	.long	sys_fchmod
+	.long	sys_fchown		/* 95 */
+	.long	sys_getpriority
+	.long	sys_setpriority
+	.long	sys_wait4
+	.long	sys_statfs
+	.long	sys_fstatfs		/* 100 */
+	.long	sys_vhangup
+	.long	__sys_sigaltstack
+	.long	sys_syslog
+	.long	sys_setitimer
+	.long	sys_getitimer		/* 105 */
+	.long	sys_swapoff
+	.long	sys_sysinfo
+	.long	sys_ipc
+	.long	sys_sendfile
+	.long	sys_setdomainname	/* 110 */
+	.long	sys_newuname
+	.long	sys_adjtimex
+	.long	sys_mprotect
+	.long	__sys_vfork
+	.long	sys_init_module		/* 115 */
+	.long	sys_delete_module
+	.long	sys_quotactl
+	.long	sys_getpgid
+	.long	sys_bdflush
+	.long	sys_sysfs		/* 120 */
+	.long	sys_personality
+	.long	sys_ni_syscall		/* reserved for afs_syscall */
+	.long	sys_getdents
+	.long	sys_flock
+	.long	sys_msync		/* 125 */
+	.long	sys_readv
+	.long	sys_writev
+	.long	sys_getsid
+	.long	sys_fdatasync
+	.long	sys_sysctl		/* 130 */
+	.long	sys_mlock
+	.long	sys_munlock
+	.long	sys_mlockall
+	.long	sys_munlockall
+	.long	sys_sched_setparam		/* 135 */
+	.long	sys_sched_getparam
+	.long	sys_sched_setscheduler
+	.long	sys_sched_getscheduler
+	.long	sys_sched_yield
+	.long	sys_sched_get_priority_max	/* 140 */
+	.long	sys_sched_get_priority_min
+	.long	sys_sched_rr_get_interval
+	.long	sys_nanosleep
+	.long	sys_poll
+	.long	sys_nfsservctl		/* 145 */
+	.long	sys_setresgid
+	.long	sys_getresgid
+	.long	sys_prctl
+	.long	sys_socket
+	.long	sys_bind		/* 150 */
+	.long	sys_connect
+	.long	sys_listen
+	.long	sys_accept
+	.long	sys_getsockname
+	.long	sys_getpeername		/* 155 */
+	.long	sys_socketpair
+	.long	sys_send
+	.long	sys_recv
+	.long	__sys_sendto
+	.long	__sys_recvfrom		/* 160 */
+	.long	sys_shutdown
+	.long	sys_setsockopt
+	.long	sys_getsockopt
+	.long	sys_sendmsg
+	.long	sys_recvmsg		/* 165 */
+	.long	sys_truncate64
+	.long	sys_ftruncate64
+	.long	sys_stat64
+	.long	sys_lstat64
+	.long	sys_fstat64		/* 170 */
+	.long	sys_pivot_root
+	.long	sys_mincore
+	.long	sys_madvise
+	.long	sys_getdents64
+	.long	sys_fcntl64		/* 175 */
+	.long	sys_gettid
+	.long	sys_readahead
+	.long	sys_setxattr
+	.long	sys_lsetxattr
+	.long	sys_fsetxattr		/* 180 */
+	.long	sys_getxattr
+	.long	sys_lgetxattr
+	.long	sys_fgetxattr
+	.long	sys_listxattr
+	.long	sys_llistxattr		/* 185 */
+	.long	sys_flistxattr
+	.long	sys_removexattr
+	.long	sys_lremovexattr
+	.long	sys_fremovexattr
+	.long	sys_tkill		/* 190 */
+	.long	sys_sendfile64
+	.long	sys_futex
+	.long	sys_sched_setaffinity
+	.long	sys_sched_getaffinity
+	.long	sys_capget		/* 195 */
+	.long	sys_capset
+	.long	sys_io_setup
+	.long	sys_io_destroy
+	.long	sys_io_getevents
+	.long	sys_io_submit		/* 200 */
+	.long	sys_io_cancel
+	.long	sys_fadvise64
+	.long	sys_exit_group
+	.long	sys_lookup_dcookie
+	.long	sys_epoll_create	/* 205 */
+	.long	sys_epoll_ctl
+	.long	sys_epoll_wait
+	.long	sys_remap_file_pages
+	.long	sys_set_tid_address
+	.long	sys_timer_create	/* 210 */
+	.long	sys_timer_settime
+	.long	sys_timer_gettime
+	.long	sys_timer_getoverrun
+	.long	sys_timer_delete
+	.long	sys_clock_settime	/* 215 */
+	.long	sys_clock_gettime
+	.long	sys_clock_getres
+	.long	sys_clock_nanosleep
+	.long	sys_statfs64
+	.long	sys_fstatfs64		/* 220 */
+	.long	sys_tgkill
+	.long	sys_ni_syscall		/* reserved for TUX */
+	.long	sys_utimes
+	.long	sys_fadvise64_64
+	.long	sys_cacheflush		/* 225 */
+	.long	sys_ni_syscall		/* sys_vserver */
+	.long	sys_mq_open
+	.long	sys_mq_unlink
+	.long	sys_mq_timedsend
+	.long	sys_mq_timedreceive	/* 230 */
+	.long	sys_mq_notify
+	.long	sys_mq_getsetattr
+	.long	sys_kexec_load
+	.long	sys_waitid
+	.long	sys_add_key		/* 235 */
+	.long	sys_request_key
+	.long	sys_keyctl
+	.long	sys_ioprio_set
+	.long	sys_ioprio_get
+	.long	sys_inotify_init	/* 240 */
+	.long	sys_inotify_add_watch
+	.long	sys_inotify_rm_watch
+	.long	sys_openat
+	.long	sys_mkdirat
+	.long	sys_mknodat		/* 245 */
+	.long	sys_fchownat
+	.long	sys_futimesat
+	.long	sys_fstatat64
+	.long	sys_unlinkat
+	.long	sys_renameat		/* 250 */
+	.long	sys_linkat
+	.long	sys_symlinkat
+	.long	sys_readlinkat
+	.long	sys_fchmodat
+	.long	sys_faccessat		/* 255 */
+	.long	__sys_pselect6
+	.long	sys_ppoll
+	.long	sys_unshare
+	.long	sys_set_robust_list
+	.long	sys_get_robust_list	/* 260 */
+	.long	__sys_splice
+	.long	sys_sync_file_range
+	.long	sys_tee
+	.long	sys_vmsplice
+	.long	sys_ni_syscall		/* r8 is saturated at nr_syscalls */
diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c
new file mode 100644
index 000000000000..b0e6b5855a38
--- /dev/null
+++ b/arch/avr32/kernel/time.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on MIPS implementation arch/mips/kernel/time.c
+ *   Copyright 2001 MontaVista Software Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/clocksource.h>
+#include <linux/time.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/profile.h>
+#include <linux/sysdev.h>
+
+#include <asm/div64.h>
+#include <asm/sysreg.h>
+#include <asm/io.h>
+#include <asm/sections.h>
+
+static cycle_t read_cycle_count(void)
+{
+	return (cycle_t)sysreg_read(COUNT);
+}
+
+static struct clocksource clocksource_avr32 = {
+	.name		= "avr32",
+	.rating		= 350,
+	.read		= read_cycle_count,
+	.mask		= CLOCKSOURCE_MASK(32),
+	.shift		= 16,
+	.is_continuous	= 1,
+};
+
+/*
+ * By default we provide the null RTC ops
+ */
+static unsigned long null_rtc_get_time(void)
+{
+	return mktime(2004, 1, 1, 0, 0, 0);
+}
+
+static int null_rtc_set_time(unsigned long sec)
+{
+	return 0;
+}
+
+static unsigned long (*rtc_get_time)(void) = null_rtc_get_time;
+static int (*rtc_set_time)(unsigned long) = null_rtc_set_time;
+
+/* how many counter cycles in a jiffy? */
+static unsigned long cycles_per_jiffy;
+
+/* cycle counter value at the previous timer interrupt */
+static unsigned int timerhi, timerlo;
+
+/* the count value for the next timer interrupt */
+static unsigned int expirelo;
+
+static void avr32_timer_ack(void)
+{
+	unsigned int count;
+
+	/* Ack this timer interrupt and set the next one */
+	expirelo += cycles_per_jiffy;
+	if (expirelo == 0) {
+		printk(KERN_DEBUG "expirelo == 0\n");
+		sysreg_write(COMPARE, expirelo + 1);
+	} else {
+		sysreg_write(COMPARE, expirelo);
+	}
+
+	/* Check to see if we have missed any timer interrupts */
+	count = sysreg_read(COUNT);
+	if ((count - expirelo) < 0x7fffffff) {
+		expirelo = count + cycles_per_jiffy;
+		sysreg_write(COMPARE, expirelo);
+	}
+}
+
+static unsigned int avr32_hpt_read(void)
+{
+	return sysreg_read(COUNT);
+}
+
+/*
+ * Taken from MIPS c0_hpt_timer_init().
+ *
+ * Why is it so complicated, and what is "count"?  My assumption is
+ * that `count' specifies the "reference cycle", i.e. the cycle since
+ * reset that should mean "zero". The reason COUNT is written twice is
+ * probably to make sure we don't get any timer interrupts while we
+ * are messing with the counter.
+ */
+static void avr32_hpt_init(unsigned int count)
+{
+	count = sysreg_read(COUNT) - count;
+	expirelo = (count / cycles_per_jiffy + 1) * cycles_per_jiffy;
+	sysreg_write(COUNT, expirelo - cycles_per_jiffy);
+	sysreg_write(COMPARE, expirelo);
+	sysreg_write(COUNT, count);
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long sched_clock(void)
+{
+	/* There must be better ways...? */
+	return (unsigned long long)jiffies * (1000000000 / HZ);
+}
+
+/*
+ * local_timer_interrupt() does profiling and process accounting on a
+ * per-CPU basis.
+ *
+ * In UP mode, it is invoked from the (global) timer_interrupt.
+ */
+static void local_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	if (current->pid)
+		profile_tick(CPU_PROFILING, regs);
+	update_process_times(user_mode(regs));
+}
+
+static irqreturn_t
+timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	unsigned int count;
+
+	/* ack timer interrupt and try to set next interrupt */
+	count = avr32_hpt_read();
+	avr32_timer_ack();
+
+	/* Update timerhi/timerlo for intra-jiffy calibration */
+	timerhi += count < timerlo;	/* Wrap around */
+	timerlo = count;
+
+	/*
+	 * Call the generic timer interrupt handler
+	 */
+	write_seqlock(&xtime_lock);
+	do_timer(regs);
+	write_sequnlock(&xtime_lock);
+
+	/*
+	 * In UP mode, we call local_timer_interrupt() to do profiling
+	 * and process accounting.
+	 *
+	 * SMP is not supported yet.
+	 */
+	local_timer_interrupt(irq, dev_id, regs);
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction timer_irqaction = {
+	.handler	= timer_interrupt,
+	.flags		= IRQF_DISABLED,
+	.name		= "timer",
+};
+
+void __init time_init(void)
+{
+	unsigned long mult, shift, count_hz;
+	int ret;
+
+	xtime.tv_sec = rtc_get_time();
+	xtime.tv_nsec = 0;
+
+	set_normalized_timespec(&wall_to_monotonic,
+				-xtime.tv_sec, -xtime.tv_nsec);
+
+	printk("Before time_init: count=%08lx, compare=%08lx\n",
+	       (unsigned long)sysreg_read(COUNT),
+	       (unsigned long)sysreg_read(COMPARE));
+
+	count_hz = clk_get_rate(boot_cpu_data.clk);
+	shift = clocksource_avr32.shift;
+	mult = clocksource_hz2mult(count_hz, shift);
+	clocksource_avr32.mult = mult;
+
+	printk("Cycle counter: mult=%lu, shift=%lu\n", mult, shift);
+
+	{
+		u64 tmp;
+
+		tmp = TICK_NSEC;
+		tmp <<= shift;
+		tmp += mult / 2;
+		do_div(tmp, mult);
+
+		cycles_per_jiffy = tmp;
+	}
+
+	/* This sets up the high precision timer for the first interrupt. */
+	avr32_hpt_init(avr32_hpt_read());
+
+	printk("After time_init: count=%08lx, compare=%08lx\n",
+	       (unsigned long)sysreg_read(COUNT),
+	       (unsigned long)sysreg_read(COMPARE));
+
+	ret = clocksource_register(&clocksource_avr32);
+	if (ret)
+		printk(KERN_ERR
+		       "timer: could not register clocksource: %d\n", ret);
+
+	ret = setup_irq(0, &timer_irqaction);
+	if (ret)
+		printk("timer: could not request IRQ 0: %d\n", ret);
+}
+
+static struct sysdev_class timer_class = {
+	set_kset_name("timer"),
+};
+
+static struct sys_device timer_device = {
+	.id	= 0,
+	.cls	= &timer_class,
+};
+
+static int __init init_timer_sysfs(void)
+{
+	int err = sysdev_class_register(&timer_class);
+	if (!err)
+		err = sysdev_register(&timer_device);
+	return err;
+}
+
+device_initcall(init_timer_sysfs);
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
new file mode 100644
index 000000000000..7e803f4d7a12
--- /dev/null
+++ b/arch/avr32/kernel/traps.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#undef DEBUG
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/notifier.h>
+
+#include <asm/traps.h>
+#include <asm/sysreg.h>
+#include <asm/addrspace.h>
+#include <asm/ocd.h>
+#include <asm/mmu_context.h>
+#include <asm/uaccess.h>
+
+static void dump_mem(const char *str, unsigned long bottom, unsigned long top)
+{
+	unsigned long p;
+	int i;
+
+	printk("%s(0x%08lx to 0x%08lx)\n", str, bottom, top);
+
+	for (p = bottom & ~31; p < top; ) {
+		printk("%04lx: ", p & 0xffff);
+
+		for (i = 0; i < 8; i++, p += 4) {
+			unsigned int val;
+
+			if (p < bottom || p >= top)
+				printk("         ");
+			else {
+				if (__get_user(val, (unsigned int __user *)p)) {
+					printk("\n");
+					goto out;
+				}
+				printk("%08x ", val);
+			}
+		}
+		printk("\n");
+	}
+
+out:
+	return;
+}
+
+#ifdef CONFIG_FRAME_POINTER
+static inline void __show_trace(struct task_struct *tsk, unsigned long *sp,
+				struct pt_regs *regs)
+{
+	unsigned long __user *fp;
+	unsigned long __user *last_fp = NULL;
+
+	if (regs) {
+		fp = (unsigned long __user *)regs->r7;
+	} else if (tsk == current) {
+		register unsigned long __user *real_fp __asm__("r7");
+		fp = real_fp;
+	} else {
+		fp = (unsigned long __user *)tsk->thread.cpu_context.r7;
+	}
+
+	/*
+	 * Walk the stack until (a) we get an exception, (b) the frame
+	 * pointer becomes zero, or (c) the frame pointer gets stuck
+	 * at the same value.
+	 */
+	while (fp && fp != last_fp) {
+		unsigned long lr, new_fp = 0;
+
+		last_fp = fp;
+		if (__get_user(lr, fp))
+			break;
+		if (fp && __get_user(new_fp, fp + 1))
+			break;
+		fp = (unsigned long __user *)new_fp;
+
+		printk(" [<%08lx>] ", lr);
+		print_symbol("%s\n", lr);
+	}
+	printk("\n");
+}
+#else
+static inline void __show_trace(struct task_struct *tsk, unsigned long *sp,
+				struct pt_regs *regs)
+{
+	unsigned long addr;
+
+	while (!kstack_end(sp)) {
+		addr = *sp++;
+		if (kernel_text_address(addr)) {
+			printk(" [<%08lx>] ", addr);
+			print_symbol("%s\n", addr);
+		}
+	}
+}
+#endif
+
+void show_trace(struct task_struct *tsk, unsigned long *sp,
+		       struct pt_regs *regs)
+{
+	if (regs &&
+	    (((regs->sr & MODE_MASK) == MODE_EXCEPTION) ||
+	     ((regs->sr & MODE_MASK) == MODE_USER)))
+		return;
+
+	printk ("Call trace:");
+#ifdef CONFIG_KALLSYMS
+	printk("\n");
+#endif
+
+	__show_trace(tsk, sp, regs);
+	printk("\n");
+}
+
+void show_stack(struct task_struct *tsk, unsigned long *sp)
+{
+	unsigned long stack;
+
+	if (!tsk)
+		tsk = current;
+	if (sp == 0) {
+		if (tsk == current) {
+			register unsigned long *real_sp __asm__("sp");
+			sp = real_sp;
+		} else {
+			sp = (unsigned long *)tsk->thread.cpu_context.ksp;
+		}
+	}
+
+	stack = (unsigned long)sp;
+	dump_mem("Stack: ", stack,
+		 THREAD_SIZE + (unsigned long)tsk->thread_info);
+	show_trace(tsk, sp, NULL);
+}
+
+void dump_stack(void)
+{
+	show_stack(NULL, NULL);
+}
+EXPORT_SYMBOL(dump_stack);
+
+ATOMIC_NOTIFIER_HEAD(avr32_die_chain);
+
+int register_die_notifier(struct notifier_block *nb)
+{
+	pr_debug("register_die_notifier: %p\n", nb);
+
+	return atomic_notifier_chain_register(&avr32_die_chain, nb);
+}
+EXPORT_SYMBOL(register_die_notifier);
+
+int unregister_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&avr32_die_chain, nb);
+}
+EXPORT_SYMBOL(unregister_die_notifier);
+
+static DEFINE_SPINLOCK(die_lock);
+
+void __die(const char *str, struct pt_regs *regs, unsigned long err,
+	   const char *file, const char *func, unsigned long line)
+{
+	struct task_struct *tsk = current;
+	static int die_counter;
+
+	console_verbose();
+	spin_lock_irq(&die_lock);
+	bust_spinlocks(1);
+
+	printk(KERN_ALERT "%s", str);
+	if (file && func)
+		printk(" in %s:%s, line %ld", file, func, line);
+	printk("[#%d]:\n", ++die_counter);
+	print_modules();
+	show_regs(regs);
+	printk("Process %s (pid: %d, stack limit = 0x%p)\n",
+	       tsk->comm, tsk->pid, tsk->thread_info + 1);
+
+	if (!user_mode(regs) || in_interrupt()) {
+		dump_mem("Stack: ", regs->sp,
+			 THREAD_SIZE + (unsigned long)tsk->thread_info);
+	}
+
+	bust_spinlocks(0);
+	spin_unlock_irq(&die_lock);
+	do_exit(SIGSEGV);
+}
+
+void __die_if_kernel(const char *str, struct pt_regs *regs, unsigned long err,
+		     const char *file, const char *func, unsigned long line)
+{
+	if (!user_mode(regs))
+		__die(str, regs, err, file, func, line);
+}
+
+asmlinkage void do_nmi(unsigned long ecr, struct pt_regs *regs)
+{
+#ifdef CONFIG_SUBARCH_AVR32B
+	/*
+	 * The exception entry always saves RSR_EX. For NMI, this is
+	 * wrong; it should be RSR_NMI
+	 */
+	regs->sr = sysreg_read(RSR_NMI);
+#endif
+
+	printk("NMI taken!!!!\n");
+	die("NMI", regs, ecr);
+	BUG();
+}
+
+asmlinkage void do_critical_exception(unsigned long ecr, struct pt_regs *regs)
+{
+	printk("Unable to handle critical exception %lu at pc = %08lx!\n",
+	       ecr, regs->pc);
+	die("Oops", regs, ecr);
+	BUG();
+}
+
+asmlinkage void do_address_exception(unsigned long ecr, struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	die_if_kernel("Oops: Address exception in kernel mode", regs, ecr);
+
+#ifdef DEBUG
+	if (ecr == ECR_ADDR_ALIGN_X)
+		pr_debug("Instruction Address Exception at pc = %08lx\n",
+			 regs->pc);
+	else if (ecr == ECR_ADDR_ALIGN_R)
+		pr_debug("Data Address Exception (Read) at pc = %08lx\n",
+			 regs->pc);
+	else if (ecr == ECR_ADDR_ALIGN_W)
+		pr_debug("Data Address Exception (Write) at pc = %08lx\n",
+			 regs->pc);
+	else
+		BUG();
+
+	show_regs(regs);
+#endif
+
+	info.si_signo = SIGBUS;
+	info.si_errno = 0;
+	info.si_code = BUS_ADRALN;
+	info.si_addr = (void __user *)regs->pc;
+
+	force_sig_info(SIGBUS, &info, current);
+}
+
+/* This way of handling undefined instructions is stolen from ARM */
+static LIST_HEAD(undef_hook);
+static spinlock_t undef_lock = SPIN_LOCK_UNLOCKED;
+
+void register_undef_hook(struct undef_hook *hook)
+{
+	spin_lock_irq(&undef_lock);
+	list_add(&hook->node, &undef_hook);
+	spin_unlock_irq(&undef_lock);
+}
+
+void unregister_undef_hook(struct undef_hook *hook)
+{
+	spin_lock_irq(&undef_lock);
+	list_del(&hook->node);
+	spin_unlock_irq(&undef_lock);
+}
+
+static int do_cop_absent(u32 insn)
+{
+	int cop_nr;
+	u32 cpucr;
+	if ( (insn & 0xfdf00000) == 0xf1900000 )
+		/* LDC0 */
+		cop_nr = 0;
+	else
+		cop_nr = (insn >> 13) & 0x7;
+
+	/* Try enabling the coprocessor */
+	cpucr = sysreg_read(CPUCR);
+	cpucr |= (1 << (24 + cop_nr));
+	sysreg_write(CPUCR, cpucr);
+
+	cpucr = sysreg_read(CPUCR);
+	if ( !(cpucr & (1 << (24 + cop_nr))) ){
+		printk("Coprocessor #%i not found!\n", cop_nr);
+		return -1;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_BUG
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+static inline void do_bug_verbose(struct pt_regs *regs, u32 insn)
+{
+	char *file;
+	u16 line;
+	char c;
+
+	if (__get_user(line, (u16 __user *)(regs->pc + 2)))
+		return;
+	if (__get_user(file, (char * __user *)(regs->pc + 4))
+	    || (unsigned long)file < PAGE_OFFSET
+	    || __get_user(c, file))
+		file = "<bad filename>";
+
+	printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line);
+}
+#else
+static inline void do_bug_verbose(struct pt_regs *regs, u32 insn)
+{
+
+}
+#endif
+#endif
+
+asmlinkage void do_illegal_opcode(unsigned long ecr, struct pt_regs *regs)
+{
+	u32 insn;
+	struct undef_hook *hook;
+	siginfo_t info;
+	void __user *pc;
+
+	if (!user_mode(regs))
+		goto kernel_trap;
+
+	local_irq_enable();
+
+	pc = (void __user *)instruction_pointer(regs);
+	if (__get_user(insn, (u32 __user *)pc))
+		goto invalid_area;
+
+        if (ecr == ECR_COPROC_ABSENT) {
+		if (do_cop_absent(insn) == 0)
+			return;
+        }
+
+	spin_lock_irq(&undef_lock);
+	list_for_each_entry(hook, &undef_hook, node) {
+		if ((insn & hook->insn_mask) == hook->insn_val) {
+			if (hook->fn(regs, insn) == 0) {
+				spin_unlock_irq(&undef_lock);
+				return;
+			}
+		}
+	}
+	spin_unlock_irq(&undef_lock);
+
+invalid_area:
+
+#ifdef DEBUG
+	printk("Illegal instruction at pc = %08lx\n", regs->pc);
+	if (regs->pc < TASK_SIZE) {
+		unsigned long ptbr, pgd, pte, *p;
+
+		ptbr = sysreg_read(PTBR);
+		p = (unsigned long *)ptbr;
+		pgd = p[regs->pc >> 22];
+		p = (unsigned long *)((pgd & 0x1ffff000) | 0x80000000);
+		pte = p[(regs->pc >> 12) & 0x3ff];
+		printk("page table: 0x%08lx -> 0x%08lx -> 0x%08lx\n", ptbr, pgd, pte);
+	}
+#endif
+
+	info.si_signo = SIGILL;
+	info.si_errno = 0;
+	info.si_addr = (void __user *)regs->pc;
+	switch (ecr) {
+	case ECR_ILLEGAL_OPCODE:
+	case ECR_UNIMPL_INSTRUCTION:
+		info.si_code = ILL_ILLOPC;
+		break;
+	case ECR_PRIVILEGE_VIOLATION:
+		info.si_code = ILL_PRVOPC;
+		break;
+	case ECR_COPROC_ABSENT:
+		info.si_code = ILL_COPROC;
+		break;
+	default:
+		BUG();
+	}
+
+	force_sig_info(SIGILL, &info, current);
+	return;
+
+kernel_trap:
+#ifdef CONFIG_BUG
+	if (__kernel_text_address(instruction_pointer(regs))) {
+		insn = *(u16 *)instruction_pointer(regs);
+		if (insn == AVR32_BUG_OPCODE) {
+			do_bug_verbose(regs, insn);
+			die("Kernel BUG", regs, 0);
+			return;
+		}
+	}
+#endif
+
+	die("Oops: Illegal instruction in kernel code", regs, ecr);
+}
+
+asmlinkage void do_fpe(unsigned long ecr, struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	printk("Floating-point exception at pc = %08lx\n", regs->pc);
+
+	/* We have no FPU... */
+	info.si_signo = SIGILL;
+	info.si_errno = 0;
+	info.si_addr = (void __user *)regs->pc;
+	info.si_code = ILL_COPROC;
+
+	force_sig_info(SIGILL, &info, current);
+}
+
+
+void __init trap_init(void)
+{
+
+}
diff --git a/arch/avr32/kernel/vmlinux.lds.c b/arch/avr32/kernel/vmlinux.lds.c
new file mode 100644
index 000000000000..cdd627c6b7dc
--- /dev/null
+++ b/arch/avr32/kernel/vmlinux.lds.c
@@ -0,0 +1,139 @@
+/*
+ * AVR32 linker script for the Linux kernel
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define LOAD_OFFSET 0x00000000
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf32-avr32", "elf32-avr32", "elf32-avr32")
+OUTPUT_ARCH(avr32)
+ENTRY(_start)
+
+/* Big endian */
+jiffies = jiffies_64 + 4;
+
+SECTIONS
+{
+	. = CONFIG_ENTRY_ADDRESS;
+	.init		: AT(ADDR(.init) - LOAD_OFFSET) {
+		_stext = .;
+		__init_begin = .;
+			_sinittext = .;
+			*(.text.reset)
+			*(.init.text)
+			_einittext = .;
+		. = ALIGN(4);
+		__tagtable_begin = .;
+			*(.taglist)
+		__tagtable_end = .;
+			*(.init.data)
+		. = ALIGN(16);
+		__setup_start = .;
+			*(.init.setup)
+		__setup_end = .;
+		. = ALIGN(4);
+		__initcall_start = .;
+			*(.initcall1.init)
+			*(.initcall2.init)
+			*(.initcall3.init)
+			*(.initcall4.init)
+			*(.initcall5.init)
+			*(.initcall6.init)
+			*(.initcall7.init)
+		__initcall_end = .;
+		__con_initcall_start = .;
+			*(.con_initcall.init)
+		__con_initcall_end = .;
+		__security_initcall_start = .;
+			*(.security_initcall.init)
+		__security_initcall_end = .;
+		. = ALIGN(32);
+		__initramfs_start = .;
+			*(.init.ramfs)
+		__initramfs_end = .;
+		. = ALIGN(4096);
+		__init_end = .;
+	}
+
+	. = ALIGN(8192);
+	.text		: AT(ADDR(.text) - LOAD_OFFSET) {
+		_evba = .;
+		_text = .;
+		*(.ex.text)
+		. = 0x50;
+		*(.tlbx.ex.text)
+		. = 0x60;
+		*(.tlbr.ex.text)
+		. = 0x70;
+		*(.tlbw.ex.text)
+		. = 0x100;
+		*(.scall.text)
+		*(.irq.text)
+		*(.text)
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		*(.fixup)
+		*(.gnu.warning)
+		_etext = .;
+	} = 0xd703d703
+
+	. = ALIGN(4);
+	__ex_table	: AT(ADDR(__ex_table) - LOAD_OFFSET) {
+		__start___ex_table = .;
+		*(__ex_table)
+		__stop___ex_table = .;
+	}
+
+	RODATA
+
+	. = ALIGN(8192);
+
+	.data		: AT(ADDR(.data) - LOAD_OFFSET) {
+		_data = .;
+		_sdata = .;
+		/*
+		 * First, the init task union, aligned to an 8K boundary.
+		 */
+		*(.data.init_task)
+
+		/* Then, the cacheline aligned data */
+		. = ALIGN(32);
+		*(.data.cacheline_aligned)
+
+		/* And the rest... */
+		*(.data.rel*)
+		*(.data)
+		CONSTRUCTORS
+
+		_edata = .;
+	}
+
+
+	. = ALIGN(8);
+	.bss    	: AT(ADDR(.bss) - LOAD_OFFSET) {
+		__bss_start = .;
+		*(.bss)
+		*(COMMON)
+		. = ALIGN(8);
+		__bss_stop = .;
+		_end = .;
+	}
+
+	/* When something in the kernel is NOT compiled as a module, the module
+	 * cleanup code and data are put into these segments. Both can then be
+	 * thrown away, as cleanup code is never called unless it's a module.
+	 */
+	/DISCARD/       	: {
+		*(.exit.text)
+		*(.exit.data)
+		*(.exitcall.exit)
+	}
+
+	DWARF_DEBUG
+}
diff --git a/arch/avr32/lib/Makefile b/arch/avr32/lib/Makefile
new file mode 100644
index 000000000000..09ac43e40522
--- /dev/null
+++ b/arch/avr32/lib/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for AVR32-specific library files
+#
+
+lib-y	:= copy_user.o clear_user.o
+lib-y	+= strncpy_from_user.o strnlen_user.o
+lib-y	+= delay.o memset.o memcpy.o findbit.o
+lib-y	+= csum_partial.o csum_partial_copy_generic.o
+lib-y	+= io-readsw.o io-readsl.o io-writesw.o io-writesl.o
+lib-y	+= __avr32_lsl64.o __avr32_lsr64.o __avr32_asr64.o
diff --git a/arch/avr32/lib/__avr32_asr64.S b/arch/avr32/lib/__avr32_asr64.S
new file mode 100644
index 000000000000..368b6bca4c76
--- /dev/null
+++ b/arch/avr32/lib/__avr32_asr64.S
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	/*
+	 * DWtype __avr32_asr64(DWtype u, word_type b)
+	 */
+	.text
+	.global	__avr32_asr64
+	.type	__avr32_asr64,@function
+__avr32_asr64:
+	cp.w	r12, 0
+	reteq	r12
+
+	rsub	r9, r12, 32
+	brle	1f
+
+	lsl	r8, r11, r9
+	lsr	r10, r10, r12
+	asr	r11, r11, r12
+	or	r10, r8
+	retal	r12
+
+1:	neg	r9
+	asr	r10, r11, r9
+	asr	r11, 31
+	retal	r12
diff --git a/arch/avr32/lib/__avr32_lsl64.S b/arch/avr32/lib/__avr32_lsl64.S
new file mode 100644
index 000000000000..f1dbc2b36257
--- /dev/null
+++ b/arch/avr32/lib/__avr32_lsl64.S
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	/*
+	 * DWtype __avr32_lsl64(DWtype u, word_type b)
+	 */
+	.text
+	.global	__avr32_lsl64
+	.type	__avr32_lsl64,@function
+__avr32_lsl64:
+	cp.w	r12, 0
+	reteq	r12
+
+	rsub	r9, r12, 32
+	brle	1f
+
+	lsr	r8, r10, r9
+	lsl	r10, r10, r12
+	lsl	r11, r11, r12
+	or	r11, r8
+	retal	r12
+
+1:	neg	r9
+	lsl	r11, r10, r9
+	mov	r10, 0
+	retal	r12
diff --git a/arch/avr32/lib/__avr32_lsr64.S b/arch/avr32/lib/__avr32_lsr64.S
new file mode 100644
index 000000000000..e65bb7f0d24c
--- /dev/null
+++ b/arch/avr32/lib/__avr32_lsr64.S
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	/*
+	 * DWtype __avr32_lsr64(DWtype u, word_type b)
+	 */
+	.text
+	.global	__avr32_lsr64
+	.type	__avr32_lsr64,@function
+__avr32_lsr64:
+	cp.w	r12, 0
+	reteq	r12
+
+	rsub	r9, r12, 32
+	brle	1f
+
+	lsl	r8, r11, r9
+	lsr	r11, r11, r12
+	lsr	r10, r10, r12
+	or	r10, r8
+	retal	r12
+
+1:	neg	r9
+	lsr	r10, r11, r9
+	mov	r11, 0
+	retal	r12
diff --git a/arch/avr32/lib/clear_user.S b/arch/avr32/lib/clear_user.S
new file mode 100644
index 000000000000..d8991b6f8eb7
--- /dev/null
+++ b/arch/avr32/lib/clear_user.S
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/asm.h>
+
+	.text
+	.align	1
+	.global	clear_user
+	.type	clear_user, "function"
+clear_user:
+	branch_if_kernel r8, __clear_user
+	ret_if_privileged r8, r12, r11, r11
+
+	.global	__clear_user
+	.type	__clear_user, "function"
+__clear_user:
+	mov	r9, r12
+	mov	r8, 0
+	andl	r9, 3, COH
+	brne	5f
+
+1:	sub	r11, 4
+	brlt	2f
+
+10:	st.w	r12++, r8
+	sub	r11, 4
+	brge	10b
+
+2:	sub	r11, -4
+	reteq	0
+
+	/* Unaligned count or address */
+	bld	r11, 1
+	brcc	12f
+11:	st.h	r12++, r8
+	sub	r11, 2
+	reteq	0
+12:	st.b	r12++, r8
+	retal	0
+
+	/* Unaligned address */
+5:	cp.w	r11, 4
+	brlt	2b
+
+	lsl	r9, 2
+	add	pc, pc, r9
+13:	st.b	r12++, r8
+	sub	r11, 1
+14:	st.b	r12++, r8
+	sub	r11, 1
+15:	st.b	r12++, r8
+	sub	r11, 1
+	rjmp	1b
+
+	.size	clear_user, . - clear_user
+	.size	__clear_user, . - __clear_user
+
+	.section .fixup, "ax"
+	.align	1
+18:	sub	r11, -4
+19:	retal	r11
+
+	.section __ex_table, "a"
+	.align	2
+	.long	10b, 18b
+	.long	11b, 19b
+	.long	12b, 19b
+	.long	13b, 19b
+	.long	14b, 19b
+	.long	15b, 19b
diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S
new file mode 100644
index 000000000000..ea59c04b07de
--- /dev/null
+++ b/arch/avr32/lib/copy_user.S
@@ -0,0 +1,119 @@
+/*
+ * Copy to/from userspace with optional address space checking.
+ *
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/asm.h>
+
+	/*
+	 * __kernel_size_t
+	 * __copy_user(void *to, const void *from, __kernel_size_t n)
+	 *
+	 * Returns the number of bytes not copied. Might be off by
+	 * max 3 bytes if we get a fault in the main loop.
+	 *
+	 * The address-space checking functions simply fall through to
+	 * the non-checking version.
+	 */
+	.text
+	.align	1
+	.global	copy_from_user
+	.type	copy_from_user, @function
+copy_from_user:
+	branch_if_kernel r8, __copy_user
+	ret_if_privileged r8, r11, r10, r10
+	rjmp	__copy_user
+	.size	copy_from_user, . - copy_from_user
+
+	.global	copy_to_user
+	.type	copy_to_user, @function
+copy_to_user:
+	branch_if_kernel r8, __copy_user
+	ret_if_privileged r8, r12, r10, r10
+	.size	copy_to_user, . - copy_to_user
+
+	.global	__copy_user
+	.type	__copy_user, @function
+__copy_user:
+	mov	r9, r11
+	andl	r9, 3, COH
+	brne	6f
+
+	/* At this point, from is word-aligned */
+1:	sub	r10, 4
+	brlt	3f
+
+2:
+10:	ld.w	r8, r11++
+11:	st.w	r12++, r8
+	sub	r10, 4
+	brge	2b
+
+3:	sub	r10, -4
+	reteq	0
+
+	/*
+	 * Handle unaligned count. Need to be careful with r10 here so
+	 * that we return the correct value even if we get a fault
+	 */
+4:
+20:	ld.ub	r8, r11++
+21:	st.b	r12++, r8
+	sub	r10, 1
+	reteq	0
+22:	ld.ub	r8, r11++
+23:	st.b	r12++, r8
+	sub	r10, 1
+	reteq	0
+24:	ld.ub	r8, r11++
+25:	st.b	r12++, r8
+	retal	0
+
+	/* Handle unaligned from-pointer */
+6:	cp.w	r10, 4
+	brlt	4b
+	rsub	r9, r9, 4
+
+30:	ld.ub	r8, r11++
+31:	st.b	r12++, r8
+	sub	r10, 1
+	sub	r9, 1
+	breq	1b
+32:	ld.ub	r8, r11++
+33:	st.b	r12++, r8
+	sub	r10, 1
+	sub	r9, 1
+	breq	1b
+34:	ld.ub	r8, r11++
+35:	st.b	r12++, r8
+	sub	r10, 1
+	rjmp	1b
+	.size	__copy_user, . - __copy_user
+
+	.section .fixup,"ax"
+	.align	1
+19:	sub	r10, -4
+29:	retal	r10
+
+	.section __ex_table,"a"
+	.align	2
+	.long	10b, 19b
+	.long	11b, 19b
+	.long	20b, 29b
+	.long	21b, 29b
+	.long	22b, 29b
+	.long	23b, 29b
+	.long	24b, 29b
+	.long	25b, 29b
+	.long	30b, 29b
+	.long	31b, 29b
+	.long	32b, 29b
+	.long	33b, 29b
+	.long	34b, 29b
+	.long	35b, 29b
diff --git a/arch/avr32/lib/csum_partial.S b/arch/avr32/lib/csum_partial.S
new file mode 100644
index 000000000000..6a262b528eb7
--- /dev/null
+++ b/arch/avr32/lib/csum_partial.S
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	/*
+	 * unsigned int csum_partial(const unsigned char *buff,
+	 *			     int len, unsigned int sum)
+	 */
+	.text
+	.global	csum_partial
+	.type	csum_partial,"function"
+	.align	1
+csum_partial:
+	/* checksum complete words, aligned or not */
+3:	sub	r11, 4
+	brlt	5f
+4:	ld.w	r9, r12++
+	add	r10, r9
+	acr	r10
+	sub	r11, 4
+	brge	4b
+
+	/* return if we had a whole number of words */
+5:	sub	r11, -4
+	reteq	r10
+
+	/* checksum any remaining bytes at the end */
+	mov	r9, 0
+	mov	r8, 0
+	cp	r11, 2
+	brlt	6f
+	ld.uh	r9, r12++
+	sub	r11, 2
+	breq	7f
+	lsl	r9, 16
+6:	ld.ub	r8, r12++
+	lsl	r8, 8
+7:	or	r9, r8
+	add	r10, r9
+	acr	r10
+
+	retal	r10
+	.size	csum_partial, . - csum_partial
diff --git a/arch/avr32/lib/csum_partial_copy_generic.S b/arch/avr32/lib/csum_partial_copy_generic.S
new file mode 100644
index 000000000000..a3a0f9b8929c
--- /dev/null
+++ b/arch/avr32/lib/csum_partial_copy_generic.S
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/errno.h>
+#include <asm/asm.h>
+
+	/*
+	 * unsigned int csum_partial_copy_generic(const char *src, char *dst, int len
+	 *					  int sum, int *src_err_ptr,
+	 *					  int *dst_err_ptr)
+	 *
+	 * Copy src to dst while checksumming, otherwise like csum_partial.
+	 */
+
+	.macro ld_src size, reg, ptr
+9999:	ld.\size \reg, \ptr
+	.section __ex_table, "a"
+	.long	9999b, fixup_ld_src
+	.previous
+	.endm
+
+	.macro st_dst size, ptr, reg
+9999:	st.\size \ptr, \reg
+	.section __ex_table, "a"
+	.long	9999b, fixup_st_dst
+	.previous
+	.endm
+
+	.text
+	.global	csum_partial_copy_generic
+	.type	csum_partial_copy_generic,"function"
+	.align	1
+csum_partial_copy_generic:
+	pushm	r4-r7,lr
+
+	/* The inner loop */
+1:	sub	r10, 4
+	brlt	5f
+2:	ld_src	w, r5, r12++
+	st_dst	w, r11++, r5
+	add	r9, r5
+	acr	r9
+	sub	r10, 4
+	brge	2b
+
+	/* return if we had a whole number of words */
+5:	sub	r10, -4
+	brne	7f
+
+6:	mov	r12, r9
+	popm	r4-r7,pc
+
+	/* handle additional bytes at the tail */
+7:	mov	r5, 0
+	mov	r4, 32
+8:	ld_src	ub, r6, r12++
+	st_dst	b, r11++, r6
+	lsl	r5, 8
+	sub	r4, 8
+	bfins	r5, r6, 0, 8
+	sub	r10, 1
+	brne	8b
+
+	lsl	r5, r5, r4
+	add	r9, r5
+	acr	r9
+	rjmp	6b
+
+	/* Exception handler */
+	.section .fixup,"ax"
+	.align	1
+fixup_ld_src:
+	mov	r9, -EFAULT
+	cp.w	r8, 0
+	breq	1f
+	st.w	r8[0], r9
+
+1:	/*
+	 * TODO: zero the complete destination - computing the rest
+	 * is too much work
+	 */
+
+	mov	r9, 0
+	rjmp	6b
+
+fixup_st_dst:
+	mov	r9, -EFAULT
+	lddsp	r8, sp[20]
+	cp.w	r8, 0
+	breq	1f
+	st.w	r8[0], r9
+1:	mov	r9, 0
+	rjmp	6b
+
+	.previous
diff --git a/arch/avr32/lib/delay.c b/arch/avr32/lib/delay.c
new file mode 100644
index 000000000000..462c8307b680
--- /dev/null
+++ b/arch/avr32/lib/delay.c
@@ -0,0 +1,55 @@
+/*
+ *      Precise Delay Loops for avr32
+ *
+ *      Copyright (C) 1993 Linus Torvalds
+ *      Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *	Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include <asm/delay.h>
+#include <asm/processor.h>
+#include <asm/sysreg.h>
+
+int read_current_timer(unsigned long *timer_value)
+{
+	*timer_value = sysreg_read(COUNT);
+	return 0;
+}
+
+void __delay(unsigned long loops)
+{
+	unsigned bclock, now;
+
+	bclock = sysreg_read(COUNT);
+	do {
+		now = sysreg_read(COUNT);
+	} while ((now - bclock) < loops);
+}
+
+inline void __const_udelay(unsigned long xloops)
+{
+	unsigned long long loops;
+
+	asm("mulu.d %0, %1, %2"
+	    : "=r"(loops)
+	    : "r"(current_cpu_data.loops_per_jiffy * HZ), "r"(xloops));
+	__delay(loops >> 32);
+}
+
+void __udelay(unsigned long usecs)
+{
+	__const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
+}
+
+void __ndelay(unsigned long nsecs)
+{
+	__const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
+}
diff --git a/arch/avr32/lib/findbit.S b/arch/avr32/lib/findbit.S
new file mode 100644
index 000000000000..2b4856f4bf7c
--- /dev/null
+++ b/arch/avr32/lib/findbit.S
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+
+	.text
+	/*
+	 * unsigned long find_first_zero_bit(const unsigned long *addr,
+	 *				     unsigned long size)
+	 */
+ENTRY(find_first_zero_bit)
+	cp.w	r11, 0
+	reteq	r11
+	mov	r9, r11
+1:	ld.w	r8, r12[0]
+	com	r8
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+	/*
+	 * unsigned long find_next_zero_bit(const unsigned long *addr,
+	 *				    unsigned long size,
+	 *				    unsigned long offset)
+	 */
+ENTRY(find_next_zero_bit)
+	lsr	r8, r10, 5
+	sub	r9, r11, r10
+	retle	r11
+
+	lsl	r8, 2
+	add	r12, r8
+	andl	r10, 31, COH
+	breq	1f
+
+	/* offset is not word-aligned. Handle the first (32 - r10) bits */
+	ld.w	r8, r12[0]
+	com	r8
+	sub	r12, -4
+	lsr	r8, r8, r10
+	brne	.L_found
+
+	/* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
+	add	r9, r10
+	sub	r9, 32
+	retle	r11
+
+	/* Main loop. offset must be word-aligned */
+1:	ld.w	r8, r12[0]
+	com	r8
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+	/* Common return path for when a bit is actually found. */
+.L_found:
+	brev	r8
+	clz	r10, r8
+	rsub	r9, r11
+	add	r10, r9
+
+	/* XXX: If we don't have to return exactly "size" when the bit
+	   is not found, we may drop this "min" thing */
+	min	r12, r11, r10
+	retal	r12
+
+	/*
+	 * unsigned long find_first_bit(const unsigned long *addr,
+	 *				unsigned long size)
+	 */
+ENTRY(find_first_bit)
+	cp.w	r11, 0
+	reteq	r11
+	mov	r9, r11
+1:	ld.w	r8, r12[0]
+	cp.w	r8, 0
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+	/*
+	 * unsigned long find_next_bit(const unsigned long *addr,
+	 *			       unsigned long size,
+	 *			       unsigned long offset)
+	 */
+ENTRY(find_next_bit)
+	lsr	r8, r10, 5
+	sub	r9, r11, r10
+	retle	r11
+
+	lsl	r8, 2
+	add	r12, r8
+	andl	r10, 31, COH
+	breq	1f
+
+	/* offset is not word-aligned. Handle the first (32 - r10) bits */
+	ld.w	r8, r12[0]
+	sub	r12, -4
+	lsr	r8, r8, r10
+	brne	.L_found
+
+	/* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
+	add	r9, r10
+	sub	r9, 32
+	retle	r11
+
+	/* Main loop. offset must be word-aligned */
+1:	ld.w	r8, r12[0]
+	cp.w	r8, 0
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+ENTRY(generic_find_next_zero_le_bit)
+	lsr	r8, r10, 5
+	sub	r9, r11, r10
+	retle	r11
+
+	lsl	r8, 2
+	add	r12, r8
+	andl	r10, 31, COH
+	breq	1f
+
+	/* offset is not word-aligned. Handle the first (32 - r10) bits */
+	ldswp.w	r8, r12[0]
+	sub	r12, -4
+	lsr	r8, r8, r10
+	brne	.L_found
+
+	/* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
+	add	r9, r10
+	sub	r9, 32
+	retle	r11
+
+	/* Main loop. offset must be word-aligned */
+1:	ldswp.w	r8, r12[0]
+	cp.w	r8, 0
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
diff --git a/arch/avr32/lib/io-readsl.S b/arch/avr32/lib/io-readsl.S
new file mode 100644
index 000000000000..b103511ed6c4
--- /dev/null
+++ b/arch/avr32/lib/io-readsl.S
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	.global	__raw_readsl
+	.type	__raw_readsl,@function
+__raw_readsl:
+	cp.w	r10, 0
+	reteq	r12
+
+	/*
+	 * If r11 isn't properly aligned, we might get an exception on
+	 * some implementations. But there's not much we can do about it.
+	 */
+1:	ld.w	r8, r12[0]
+	sub	r10, 1
+	st.w	r11++, r8
+	brne	1b
+
+	retal	r12
diff --git a/arch/avr32/lib/io-readsw.S b/arch/avr32/lib/io-readsw.S
new file mode 100644
index 000000000000..456be9909027
--- /dev/null
+++ b/arch/avr32/lib/io-readsw.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+.Lnot_word_aligned:
+	/*
+	 * Bad alignment will cause a hardware exception, which is as
+	 * good as anything. No need for us to check for proper alignment.
+	 */
+	ld.uh	r8, r12[0]
+	sub	r10, 1
+	st.h	r11++, r8
+
+	/* fall through */
+
+	.global	__raw_readsw
+	.type	__raw_readsw,@function
+__raw_readsw:
+	cp.w	r10, 0
+	reteq	r12
+	mov	r9, 3
+	tst	r11, r9
+	brne	.Lnot_word_aligned
+
+	sub	r10, 2
+	brlt	2f
+
+1:	ldins.h	r8:t, r12[0]
+	ldins.h	r8:b, r12[0]
+	st.w	r11++, r8
+	sub	r10, 2
+	brge	1b
+
+2:	sub	r10, -2
+	reteq	r12
+
+	ld.uh	r8, r12[0]
+	st.h	r11++, r8
+	retal	r12
diff --git a/arch/avr32/lib/io-writesl.S b/arch/avr32/lib/io-writesl.S
new file mode 100644
index 000000000000..22138b3a16e5
--- /dev/null
+++ b/arch/avr32/lib/io-writesl.S
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	.global	__raw_writesl
+	.type	__raw_writesl,@function
+__raw_writesl:
+	cp.w	r10, 0
+	reteq	r12
+
+1:	ld.w	r8, r11++
+	sub	r10, 1
+	st.w	r12[0], r8
+	brne	1b
+
+	retal	r12
diff --git a/arch/avr32/lib/io-writesw.S b/arch/avr32/lib/io-writesw.S
new file mode 100644
index 000000000000..8c4a53f1c52a
--- /dev/null
+++ b/arch/avr32/lib/io-writesw.S
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+.Lnot_word_aligned:
+	ld.uh	r8, r11++
+	sub	r10, 1
+	st.h	r12[0], r8
+
+	.global	__raw_writesw
+	.type	__raw_writesw,@function
+__raw_writesw:
+	cp.w	r10, 0
+	mov	r9, 3
+	reteq	r12
+	tst	r11, r9
+	brne	.Lnot_word_aligned
+
+	sub	r10, 2
+	brlt	2f
+
+1:	ld.w	r8, r11++
+	bfextu	r9, r8, 16, 16
+	st.h	r12[0], r9
+	st.h	r12[0], r8
+	sub	r10, 2
+	brge	1b
+
+2:	sub	r10, -2
+	reteq	r12
+
+	ld.uh	r8, r11++
+	st.h	r12[0], r8
+	retal	r12
diff --git a/arch/avr32/lib/libgcc.h b/arch/avr32/lib/libgcc.h
new file mode 100644
index 000000000000..5a091b5e3618
--- /dev/null
+++ b/arch/avr32/lib/libgcc.h
@@ -0,0 +1,33 @@
+/* Definitions for various functions 'borrowed' from gcc-3.4.3 */
+
+#define BITS_PER_UNIT	8
+
+typedef		 int QItype	__attribute__ ((mode (QI)));
+typedef unsigned int UQItype	__attribute__ ((mode (QI)));
+typedef		 int HItype	__attribute__ ((mode (HI)));
+typedef unsigned int UHItype	__attribute__ ((mode (HI)));
+typedef 	 int SItype	__attribute__ ((mode (SI)));
+typedef unsigned int USItype	__attribute__ ((mode (SI)));
+typedef		 int DItype	__attribute__ ((mode (DI)));
+typedef unsigned int UDItype	__attribute__ ((mode (DI)));
+typedef 	float SFtype	__attribute__ ((mode (SF)));
+typedef		float DFtype	__attribute__ ((mode (DF)));
+typedef int word_type __attribute__ ((mode (__word__)));
+
+#define W_TYPE_SIZE (4 * BITS_PER_UNIT)
+#define Wtype	SItype
+#define UWtype	USItype
+#define HWtype	SItype
+#define UHWtype	USItype
+#define DWtype	DItype
+#define UDWtype	UDItype
+#define __NW(a,b)	__ ## a ## si ## b
+#define __NDW(a,b)	__ ## a ## di ## b
+
+struct DWstruct {Wtype high, low;};
+
+typedef union
+{
+  struct DWstruct s;
+  DWtype ll;
+} DWunion;
diff --git a/arch/avr32/lib/longlong.h b/arch/avr32/lib/longlong.h
new file mode 100644
index 000000000000..cd5e369ac437
--- /dev/null
+++ b/arch/avr32/lib/longlong.h
@@ -0,0 +1,98 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+   Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
+   Free Software Foundation, Inc.
+
+   This definition file is free software; you can redistribute it
+   and/or modify it under the terms of the GNU General Public
+   License as published by the Free Software Foundation; either
+   version 2, or (at your option) any later version.
+
+   This definition file is distributed in the hope that it will be
+   useful, but WITHOUT ANY WARRANTY; without even the implied
+   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+   See the GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* Borrowed from gcc-3.4.3 */
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+#define count_leading_zeros(count, x) ((count) = __builtin_clz(x))
+
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+  do {									\
+    UWtype __d1, __d0, __q1, __q0;					\
+    UWtype __r1, __r0, __m;						\
+    __d1 = __ll_highpart (d);						\
+    __d0 = __ll_lowpart (d);						\
+									\
+    __r1 = (n1) % __d1;							\
+    __q1 = (n1) / __d1;							\
+    __m = (UWtype) __q1 * __d0;						\
+    __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
+    if (__r1 < __m)							\
+      {									\
+	__q1--, __r1 += (d);						\
+	if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+	  if (__r1 < __m)						\
+	    __q1--, __r1 += (d);					\
+      }									\
+    __r1 -= __m;							\
+									\
+    __r0 = __r1 % __d1;							\
+    __q0 = __r1 / __d1;							\
+    __m = (UWtype) __q0 * __d0;						\
+    __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
+    if (__r0 < __m)							\
+      {									\
+	__q0--, __r0 += (d);						\
+	if (__r0 >= (d))						\
+	  if (__r0 < __m)						\
+	    __q0--, __r0 += (d);					\
+      }									\
+    __r0 -= __m;							\
+									\
+    (q) = (UWtype) __q1 * __ll_B | __q0;				\
+    (r) = __r0;								\
+  } while (0)
+
+#define udiv_qrnnd __udiv_qrnnd_c
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    UWtype __x;								\
+    __x = (al) - (bl);							\
+    (sh) = (ah) - (bh) - (__x > (al));					\
+    (sl) = __x;								\
+  } while (0)
+
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UWtype __x0, __x1, __x2, __x3;					\
+    UHWtype __ul, __vl, __uh, __vh;					\
+									\
+    __ul = __ll_lowpart (u);						\
+    __uh = __ll_highpart (u);						\
+    __vl = __ll_lowpart (v);						\
+    __vh = __ll_highpart (v);						\
+									\
+    __x0 = (UWtype) __ul * __vl;					\
+    __x1 = (UWtype) __ul * __vh;					\
+    __x2 = (UWtype) __uh * __vl;					\
+    __x3 = (UWtype) __uh * __vh;					\
+									\
+    __x1 += __ll_highpart (__x0);/* this can't give carry */		\
+    __x1 += __x2;		/* but this indeed can */		\
+    if (__x1 < __x2)		/* did we get it? */			\
+      __x3 += __ll_B;		/* yes, add it in the proper pos.  */	\
+									\
+    (w1) = __x3 + __ll_highpart (__x1);					\
+    (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);		\
+  } while (0)
diff --git a/arch/avr32/lib/memcpy.S b/arch/avr32/lib/memcpy.S
new file mode 100644
index 000000000000..0abb26142b64
--- /dev/null
+++ b/arch/avr32/lib/memcpy.S
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	/*
+	 * void *memcpy(void *to, const void *from, unsigned long n)
+	 *
+	 * This implementation does word-aligned loads in the main loop,
+	 * possibly sacrificing alignment of stores.
+	 *
+	 * Hopefully, in most cases, both "to" and "from" will be
+	 * word-aligned to begin with.
+	 */
+	.text
+	.global	memcpy
+	.type	memcpy, @function
+memcpy:
+	mov	r9, r11
+	andl	r9, 3, COH
+	brne	1f
+
+	/* At this point, "from" is word-aligned */
+2:	sub	r10, 4
+	mov	r9, r12
+	brlt	4f
+
+3:	ld.w	r8, r11++
+	sub	r10, 4
+	st.w	r12++, r8
+	brge	3b
+
+4:	neg	r10
+	reteq	r9
+
+	/* Handle unaligned count */
+	lsl	r10, 2
+	add	pc, pc, r10
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	retal	r9
+
+	/* Handle unaligned "from" pointer */
+1:	sub	r10, 4
+	brlt	4b
+	add	r10, r9
+	lsl	r9, 2
+	add	pc, pc, r9
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	rjmp	2b
diff --git a/arch/avr32/lib/memset.S b/arch/avr32/lib/memset.S
new file mode 100644
index 000000000000..40da32c0480c
--- /dev/null
+++ b/arch/avr32/lib/memset.S
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/arch/arm/lib/memset.S
+ *   Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * ASM optimised string functions
+ */
+#include <asm/asm.h>
+
+	/*
+	 * r12:	void *b
+	 * r11:	int c
+	 * r10:	size_t len
+	 *
+	 * Returns b in r12
+	 */
+	.text
+	.global	memset
+	.type	memset, @function
+	.align	5
+memset:
+	mov	r9, r12
+	mov	r8, r12
+	or	r11, r11, r11 << 8
+	andl	r9, 3, COH
+	brne	1f
+
+2:	or	r11, r11, r11 << 16
+	sub	r10, 4
+	brlt	5f
+
+	/* Let's do some real work */
+4:	st.w	r8++, r11
+	sub	r10, 4
+	brge	4b
+
+	/*
+	 * When we get here, we've got less than 4 bytes to set. r10
+	 * might be negative.
+	 */
+5:	sub	r10, -4
+	reteq	r12
+
+	/* Fastpath ends here, exactly 32 bytes from memset */
+
+	/* Handle unaligned count or pointer */
+	bld	r10, 1
+	brcc	6f
+	st.b	r8++, r11
+	st.b	r8++, r11
+	bld	r10, 0
+	retcc	r12
+6:	st.b	r8++, r11
+	retal	r12
+
+	/* Handle unaligned pointer */
+1:	sub	r10, 4
+	brlt	5b
+	add	r10, r9
+	lsl	r9, 1
+	add	pc, r9
+	st.b	r8++, r11
+	st.b	r8++, r11
+	st.b	r8++, r11
+	rjmp	2b
+
+	.size	memset, . - memset
diff --git a/arch/avr32/lib/strncpy_from_user.S b/arch/avr32/lib/strncpy_from_user.S
new file mode 100644
index 000000000000..72bd50599ec6
--- /dev/null
+++ b/arch/avr32/lib/strncpy_from_user.S
@@ -0,0 +1,60 @@
+/*
+ * Copy to/from userspace with optional address space checking.
+ *
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/errno.h>
+
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/asm.h>
+
+	/*
+	 * long strncpy_from_user(char *dst, const char *src, long count)
+	 *
+	 * On success, returns the length of the string, not including
+	 * the terminating NUL.
+	 *
+	 * If the string is longer than count, returns count
+	 *
+	 * If userspace access fails, returns -EFAULT
+	 */
+	.text
+	.align	1
+	.global	strncpy_from_user
+	.type	strncpy_from_user, "function"
+strncpy_from_user:
+	mov	r9, -EFAULT
+	branch_if_kernel r8, __strncpy_from_user
+	ret_if_privileged r8, r11, r10, r9
+
+	.global	__strncpy_from_user
+	.type	__strncpy_from_user, "function"
+__strncpy_from_user:
+	cp.w	r10, 0
+	reteq	0
+
+	mov	r9, r10
+
+1:	ld.ub	r8, r11++
+	st.b	r12++, r8
+	cp.w	r8, 0
+	breq	2f
+	sub	r9, 1
+	brne	1b
+
+2:	sub	r10, r9
+	retal	r10
+
+	.section .fixup, "ax"
+	.align	1
+3:	mov	r12, -EFAULT
+	retal	r12
+
+	.section __ex_table, "a"
+	.align	2
+	.long	1b, 3b
diff --git a/arch/avr32/lib/strnlen_user.S b/arch/avr32/lib/strnlen_user.S
new file mode 100644
index 000000000000..65ce11afa66a
--- /dev/null
+++ b/arch/avr32/lib/strnlen_user.S
@@ -0,0 +1,67 @@
+/*
+ * Copy to/from userspace with optional address space checking.
+ *
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/processor.h>
+#include <asm/asm.h>
+
+	.text
+	.align	1
+	.global	strnlen_user
+	.type	strnlen_user, "function"
+strnlen_user:
+	branch_if_kernel r8, __strnlen_user
+	sub	r8, r11, 1
+	add	r8, r12
+	retcs	0
+	brmi	adjust_length	/* do a closer inspection */
+
+	.global	__strnlen_user
+	.type	__strnlen_user, "function"
+__strnlen_user:
+	mov	r10, r12
+
+10:	ld.ub	r8, r12++
+	cp.w	r8, 0
+	breq	2f
+	sub	r11, 1
+	brne	10b
+
+	sub	r12, -1
+2:	sub	r12, r10
+	retal	r12
+
+
+	.type	adjust_length, "function"
+adjust_length:
+	cp.w	r12, 0		/* addr must always be < TASK_SIZE */
+	retmi	0
+
+	pushm	lr
+	lddpc	lr, _task_size
+	sub	r11, lr, r12
+	mov	r9, r11
+	rcall	__strnlen_user
+	cp.w	r12, r9
+	brgt	1f
+	popm	pc
+1:	popm	pc, r12=0
+
+	.align	2
+_task_size:
+	.long	TASK_SIZE
+
+	.section .fixup, "ax"
+	.align	1
+19:	retal	0
+
+	.section __ex_table, "a"
+	.align	2
+	.long	10b, 19b
diff --git a/arch/avr32/mach-at32ap/Makefile b/arch/avr32/mach-at32ap/Makefile
new file mode 100644
index 000000000000..f62eb6915510
--- /dev/null
+++ b/arch/avr32/mach-at32ap/Makefile
@@ -0,0 +1,2 @@
+obj-y				+= at32ap.o clock.o pio.o intc.o extint.o hsmc.o
+obj-$(CONFIG_CPU_AT32AP7000)	+= at32ap7000.o
diff --git a/arch/avr32/mach-at32ap/at32ap.c b/arch/avr32/mach-at32ap/at32ap.c
new file mode 100644
index 000000000000..f7cedf5aabea
--- /dev/null
+++ b/arch/avr32/mach-at32ap/at32ap.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+
+#include <asm/arch/init.h>
+#include <asm/arch/sm.h>
+
+struct at32_sm system_manager;
+
+static int __init at32_sm_init(void)
+{
+	struct resource *regs;
+	struct at32_sm *sm = &system_manager;
+	int ret = -ENXIO;
+
+	regs = platform_get_resource(&at32_sm_device, IORESOURCE_MEM, 0);
+	if (!regs)
+		goto fail;
+
+	spin_lock_init(&sm->lock);
+	sm->pdev = &at32_sm_device;
+
+	ret = -ENOMEM;
+	sm->regs = ioremap(regs->start, regs->end - regs->start + 1);
+	if (!sm->regs)
+		goto fail;
+
+	return 0;
+
+fail:
+	printk(KERN_ERR "Failed to initialize System Manager: %d\n", ret);
+	return ret;
+}
+
+void __init setup_platform(void)
+{
+	at32_sm_init();
+	at32_clock_init();
+	at32_portmux_init();
+
+	/* FIXME: This doesn't belong here */
+	at32_setup_serial_console(1);
+}
+
+static int __init pdc_probe(struct platform_device *pdev)
+{
+	struct clk *pclk, *hclk;
+
+	pclk = clk_get(&pdev->dev, "pclk");
+	if (IS_ERR(pclk)) {
+		dev_err(&pdev->dev, "no pclk defined\n");
+		return PTR_ERR(pclk);
+	}
+	hclk = clk_get(&pdev->dev, "hclk");
+	if (IS_ERR(hclk)) {
+		dev_err(&pdev->dev, "no hclk defined\n");
+		clk_put(pclk);
+		return PTR_ERR(hclk);
+	}
+
+	clk_enable(pclk);
+	clk_enable(hclk);
+
+	dev_info(&pdev->dev, "Atmel Peripheral DMA Controller enabled\n");
+	return 0;
+}
+
+static struct platform_driver pdc_driver = {
+	.probe		= pdc_probe,
+	.driver		= {
+		.name	= "pdc",
+	},
+};
+
+static int __init pdc_init(void)
+{
+	return platform_driver_register(&pdc_driver);
+}
+arch_initcall(pdc_init);
diff --git a/arch/avr32/mach-at32ap/at32ap7000.c b/arch/avr32/mach-at32ap/at32ap7000.c
new file mode 100644
index 000000000000..37982b60398e
--- /dev/null
+++ b/arch/avr32/mach-at32ap/at32ap7000.c
@@ -0,0 +1,876 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+
+#include <asm/arch/board.h>
+#include <asm/arch/portmux.h>
+#include <asm/arch/sm.h>
+
+#include "clock.h"
+#include "pio.h"
+#include "sm.h"
+
+#define PBMEM(base)					\
+	{						\
+		.start		= base,			\
+		.end		= base + 0x3ff,		\
+		.flags		= IORESOURCE_MEM,	\
+	}
+#define IRQ(num)					\
+	{						\
+		.start		= num,			\
+		.end		= num,			\
+		.flags		= IORESOURCE_IRQ,	\
+	}
+#define NAMED_IRQ(num, _name)				\
+	{						\
+		.start		= num,			\
+		.end		= num,			\
+		.name		= _name,		\
+		.flags		= IORESOURCE_IRQ,	\
+	}
+
+#define DEFINE_DEV(_name, _id)					\
+static struct platform_device _name##_id##_device = {		\
+	.name		= #_name,				\
+	.id		= _id,					\
+	.resource	= _name##_id##_resource,		\
+	.num_resources	= ARRAY_SIZE(_name##_id##_resource),	\
+}
+#define DEFINE_DEV_DATA(_name, _id)				\
+static struct platform_device _name##_id##_device = {		\
+	.name		= #_name,				\
+	.id		= _id,					\
+	.dev		= {					\
+		.platform_data	= &_name##_id##_data,		\
+	},							\
+	.resource	= _name##_id##_resource,		\
+	.num_resources	= ARRAY_SIZE(_name##_id##_resource),	\
+}
+
+#define DEV_CLK(_name, devname, bus, _index)			\
+static struct clk devname##_##_name = {				\
+	.name		= #_name,				\
+	.dev		= &devname##_device.dev,		\
+	.parent		= &bus##_clk,				\
+	.mode		= bus##_clk_mode,			\
+	.get_rate	= bus##_clk_get_rate,			\
+	.index		= _index,				\
+}
+
+enum {
+	PIOA,
+	PIOB,
+	PIOC,
+	PIOD,
+};
+
+enum {
+	FUNC_A,
+	FUNC_B,
+};
+
+unsigned long at32ap7000_osc_rates[3] = {
+	[0] = 32768,
+	/* FIXME: these are ATSTK1002-specific */
+	[1] = 20000000,
+	[2] = 12000000,
+};
+
+static unsigned long osc_get_rate(struct clk *clk)
+{
+	return at32ap7000_osc_rates[clk->index];
+}
+
+static unsigned long pll_get_rate(struct clk *clk, unsigned long control)
+{
+	unsigned long div, mul, rate;
+
+	if (!(control & SM_BIT(PLLEN)))
+		return 0;
+
+	div = SM_BFEXT(PLLDIV, control) + 1;
+	mul = SM_BFEXT(PLLMUL, control) + 1;
+
+	rate = clk->parent->get_rate(clk->parent);
+	rate = (rate + div / 2) / div;
+	rate *= mul;
+
+	return rate;
+}
+
+static unsigned long pll0_get_rate(struct clk *clk)
+{
+	u32 control;
+
+	control = sm_readl(&system_manager, PM_PLL0);
+
+	return pll_get_rate(clk, control);
+}
+
+static unsigned long pll1_get_rate(struct clk *clk)
+{
+	u32 control;
+
+	control = sm_readl(&system_manager, PM_PLL1);
+
+	return pll_get_rate(clk, control);
+}
+
+/*
+ * The AT32AP7000 has five primary clock sources: One 32kHz
+ * oscillator, two crystal oscillators and two PLLs.
+ */
+static struct clk osc32k = {
+	.name		= "osc32k",
+	.get_rate	= osc_get_rate,
+	.users		= 1,
+	.index		= 0,
+};
+static struct clk osc0 = {
+	.name		= "osc0",
+	.get_rate	= osc_get_rate,
+	.users		= 1,
+	.index		= 1,
+};
+static struct clk osc1 = {
+	.name		= "osc1",
+	.get_rate	= osc_get_rate,
+	.index		= 2,
+};
+static struct clk pll0 = {
+	.name		= "pll0",
+	.get_rate	= pll0_get_rate,
+	.parent		= &osc0,
+};
+static struct clk pll1 = {
+	.name		= "pll1",
+	.get_rate	= pll1_get_rate,
+	.parent		= &osc0,
+};
+
+/*
+ * The main clock can be either osc0 or pll0.  The boot loader may
+ * have chosen one for us, so we don't really know which one until we
+ * have a look at the SM.
+ */
+static struct clk *main_clock;
+
+/*
+ * Synchronous clocks are generated from the main clock. The clocks
+ * must satisfy the constraint
+ *   fCPU >= fHSB >= fPB
+ * i.e. each clock must not be faster than its parent.
+ */
+static unsigned long bus_clk_get_rate(struct clk *clk, unsigned int shift)
+{
+	return main_clock->get_rate(main_clock) >> shift;
+};
+
+static void cpu_clk_mode(struct clk *clk, int enabled)
+{
+	struct at32_sm *sm = &system_manager;
+	unsigned long flags;
+	u32 mask;
+
+	spin_lock_irqsave(&sm->lock, flags);
+	mask = sm_readl(sm, PM_CPU_MASK);
+	if (enabled)
+		mask |= 1 << clk->index;
+	else
+		mask &= ~(1 << clk->index);
+	sm_writel(sm, PM_CPU_MASK, mask);
+	spin_unlock_irqrestore(&sm->lock, flags);
+}
+
+static unsigned long cpu_clk_get_rate(struct clk *clk)
+{
+	unsigned long cksel, shift = 0;
+
+	cksel = sm_readl(&system_manager, PM_CKSEL);
+	if (cksel & SM_BIT(CPUDIV))
+		shift = SM_BFEXT(CPUSEL, cksel) + 1;
+
+	return bus_clk_get_rate(clk, shift);
+}
+
+static void hsb_clk_mode(struct clk *clk, int enabled)
+{
+	struct at32_sm *sm = &system_manager;
+	unsigned long flags;
+	u32 mask;
+
+	spin_lock_irqsave(&sm->lock, flags);
+	mask = sm_readl(sm, PM_HSB_MASK);
+	if (enabled)
+		mask |= 1 << clk->index;
+	else
+		mask &= ~(1 << clk->index);
+	sm_writel(sm, PM_HSB_MASK, mask);
+	spin_unlock_irqrestore(&sm->lock, flags);
+}
+
+static unsigned long hsb_clk_get_rate(struct clk *clk)
+{
+	unsigned long cksel, shift = 0;
+
+	cksel = sm_readl(&system_manager, PM_CKSEL);
+	if (cksel & SM_BIT(HSBDIV))
+		shift = SM_BFEXT(HSBSEL, cksel) + 1;
+
+	return bus_clk_get_rate(clk, shift);
+}
+
+static void pba_clk_mode(struct clk *clk, int enabled)
+{
+	struct at32_sm *sm = &system_manager;
+	unsigned long flags;
+	u32 mask;
+
+	spin_lock_irqsave(&sm->lock, flags);
+	mask = sm_readl(sm, PM_PBA_MASK);
+	if (enabled)
+		mask |= 1 << clk->index;
+	else
+		mask &= ~(1 << clk->index);
+	sm_writel(sm, PM_PBA_MASK, mask);
+	spin_unlock_irqrestore(&sm->lock, flags);
+}
+
+static unsigned long pba_clk_get_rate(struct clk *clk)
+{
+	unsigned long cksel, shift = 0;
+
+	cksel = sm_readl(&system_manager, PM_CKSEL);
+	if (cksel & SM_BIT(PBADIV))
+		shift = SM_BFEXT(PBASEL, cksel) + 1;
+
+	return bus_clk_get_rate(clk, shift);
+}
+
+static void pbb_clk_mode(struct clk *clk, int enabled)
+{
+	struct at32_sm *sm = &system_manager;
+	unsigned long flags;
+	u32 mask;
+
+	spin_lock_irqsave(&sm->lock, flags);
+	mask = sm_readl(sm, PM_PBB_MASK);
+	if (enabled)
+		mask |= 1 << clk->index;
+	else
+		mask &= ~(1 << clk->index);
+	sm_writel(sm, PM_PBB_MASK, mask);
+	spin_unlock_irqrestore(&sm->lock, flags);
+}
+
+static unsigned long pbb_clk_get_rate(struct clk *clk)
+{
+	unsigned long cksel, shift = 0;
+
+	cksel = sm_readl(&system_manager, PM_CKSEL);
+	if (cksel & SM_BIT(PBBDIV))
+		shift = SM_BFEXT(PBBSEL, cksel) + 1;
+
+	return bus_clk_get_rate(clk, shift);
+}
+
+static struct clk cpu_clk = {
+	.name		= "cpu",
+	.get_rate	= cpu_clk_get_rate,
+	.users		= 1,
+};
+static struct clk hsb_clk = {
+	.name		= "hsb",
+	.parent		= &cpu_clk,
+	.get_rate	= hsb_clk_get_rate,
+};
+static struct clk pba_clk = {
+	.name		= "pba",
+	.parent		= &hsb_clk,
+	.mode		= hsb_clk_mode,
+	.get_rate	= pba_clk_get_rate,
+	.index		= 1,
+};
+static struct clk pbb_clk = {
+	.name		= "pbb",
+	.parent		= &hsb_clk,
+	.mode		= hsb_clk_mode,
+	.get_rate	= pbb_clk_get_rate,
+	.users		= 1,
+	.index		= 2,
+};
+
+/* --------------------------------------------------------------------
+ *  Generic Clock operations
+ * -------------------------------------------------------------------- */
+
+static void genclk_mode(struct clk *clk, int enabled)
+{
+	u32 control;
+
+	BUG_ON(clk->index > 7);
+
+	control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
+	if (enabled)
+		control |= SM_BIT(CEN);
+	else
+		control &= ~SM_BIT(CEN);
+	sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index, control);
+}
+
+static unsigned long genclk_get_rate(struct clk *clk)
+{
+	u32 control;
+	unsigned long div = 1;
+
+	BUG_ON(clk->index > 7);
+
+	if (!clk->parent)
+		return 0;
+
+	control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
+	if (control & SM_BIT(DIVEN))
+		div = 2 * (SM_BFEXT(DIV, control) + 1);
+
+	return clk->parent->get_rate(clk->parent) / div;
+}
+
+static long genclk_set_rate(struct clk *clk, unsigned long rate, int apply)
+{
+	u32 control;
+	unsigned long parent_rate, actual_rate, div;
+
+	BUG_ON(clk->index > 7);
+
+	if (!clk->parent)
+		return 0;
+
+	parent_rate = clk->parent->get_rate(clk->parent);
+	control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
+
+	if (rate > 3 * parent_rate / 4) {
+		actual_rate = parent_rate;
+		control &= ~SM_BIT(DIVEN);
+	} else {
+		div = (parent_rate + rate) / (2 * rate) - 1;
+		control = SM_BFINS(DIV, div, control) | SM_BIT(DIVEN);
+		actual_rate = parent_rate / (2 * (div + 1));
+	}
+
+	printk("clk %s: new rate %lu (actual rate %lu)\n",
+	       clk->name, rate, actual_rate);
+
+	if (apply)
+		sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index,
+			  control);
+
+	return actual_rate;
+}
+
+int genclk_set_parent(struct clk *clk, struct clk *parent)
+{
+	u32 control;
+
+	BUG_ON(clk->index > 7);
+
+	printk("clk %s: new parent %s (was %s)\n",
+	       clk->name, parent->name,
+	       clk->parent ? clk->parent->name : "(null)");
+
+	control = sm_readl(&system_manager, PM_GCCTRL + 4 * clk->index);
+
+	if (parent == &osc1 || parent == &pll1)
+		control |= SM_BIT(OSCSEL);
+	else if (parent == &osc0 || parent == &pll0)
+		control &= ~SM_BIT(OSCSEL);
+	else
+		return -EINVAL;
+
+	if (parent == &pll0 || parent == &pll1)
+		control |= SM_BIT(PLLSEL);
+	else
+		control &= ~SM_BIT(PLLSEL);
+
+	sm_writel(&system_manager, PM_GCCTRL + 4 * clk->index, control);
+	clk->parent = parent;
+
+	return 0;
+}
+
+/* --------------------------------------------------------------------
+ *  System peripherals
+ * -------------------------------------------------------------------- */
+static struct resource sm_resource[] = {
+	PBMEM(0xfff00000),
+	NAMED_IRQ(19, "eim"),
+	NAMED_IRQ(20, "pm"),
+	NAMED_IRQ(21, "rtc"),
+};
+struct platform_device at32_sm_device = {
+	.name		= "sm",
+	.id		= 0,
+	.resource	= sm_resource,
+	.num_resources	= ARRAY_SIZE(sm_resource),
+};
+DEV_CLK(pclk, at32_sm, pbb, 0);
+
+static struct resource intc0_resource[] = {
+	PBMEM(0xfff00400),
+};
+struct platform_device at32_intc0_device = {
+	.name		= "intc",
+	.id		= 0,
+	.resource	= intc0_resource,
+	.num_resources	= ARRAY_SIZE(intc0_resource),
+};
+DEV_CLK(pclk, at32_intc0, pbb, 1);
+
+static struct clk ebi_clk = {
+	.name		= "ebi",
+	.parent		= &hsb_clk,
+	.mode		= hsb_clk_mode,
+	.get_rate	= hsb_clk_get_rate,
+	.users		= 1,
+};
+static struct clk hramc_clk = {
+	.name		= "hramc",
+	.parent		= &hsb_clk,
+	.mode		= hsb_clk_mode,
+	.get_rate	= hsb_clk_get_rate,
+	.users		= 1,
+};
+
+static struct resource smc0_resource[] = {
+	PBMEM(0xfff03400),
+};
+DEFINE_DEV(smc, 0);
+DEV_CLK(pclk, smc0, pbb, 13);
+DEV_CLK(mck, smc0, hsb, 0);
+
+static struct platform_device pdc_device = {
+	.name		= "pdc",
+	.id		= 0,
+};
+DEV_CLK(hclk, pdc, hsb, 4);
+DEV_CLK(pclk, pdc, pba, 16);
+
+static struct clk pico_clk = {
+	.name		= "pico",
+	.parent		= &cpu_clk,
+	.mode		= cpu_clk_mode,
+	.get_rate	= cpu_clk_get_rate,
+	.users		= 1,
+};
+
+/* --------------------------------------------------------------------
+ *  PIO
+ * -------------------------------------------------------------------- */
+
+static struct resource pio0_resource[] = {
+	PBMEM(0xffe02800),
+	IRQ(13),
+};
+DEFINE_DEV(pio, 0);
+DEV_CLK(mck, pio0, pba, 10);
+
+static struct resource pio1_resource[] = {
+	PBMEM(0xffe02c00),
+	IRQ(14),
+};
+DEFINE_DEV(pio, 1);
+DEV_CLK(mck, pio1, pba, 11);
+
+static struct resource pio2_resource[] = {
+	PBMEM(0xffe03000),
+	IRQ(15),
+};
+DEFINE_DEV(pio, 2);
+DEV_CLK(mck, pio2, pba, 12);
+
+static struct resource pio3_resource[] = {
+	PBMEM(0xffe03400),
+	IRQ(16),
+};
+DEFINE_DEV(pio, 3);
+DEV_CLK(mck, pio3, pba, 13);
+
+void __init at32_add_system_devices(void)
+{
+	system_manager.eim_first_irq = NR_INTERNAL_IRQS;
+
+	platform_device_register(&at32_sm_device);
+	platform_device_register(&at32_intc0_device);
+	platform_device_register(&smc0_device);
+	platform_device_register(&pdc_device);
+
+	platform_device_register(&pio0_device);
+	platform_device_register(&pio1_device);
+	platform_device_register(&pio2_device);
+	platform_device_register(&pio3_device);
+}
+
+/* --------------------------------------------------------------------
+ *  USART
+ * -------------------------------------------------------------------- */
+
+static struct resource usart0_resource[] = {
+	PBMEM(0xffe00c00),
+	IRQ(7),
+};
+DEFINE_DEV(usart, 0);
+DEV_CLK(usart, usart0, pba, 4);
+
+static struct resource usart1_resource[] = {
+	PBMEM(0xffe01000),
+	IRQ(7),
+};
+DEFINE_DEV(usart, 1);
+DEV_CLK(usart, usart1, pba, 4);
+
+static struct resource usart2_resource[] = {
+	PBMEM(0xffe01400),
+	IRQ(8),
+};
+DEFINE_DEV(usart, 2);
+DEV_CLK(usart, usart2, pba, 5);
+
+static struct resource usart3_resource[] = {
+	PBMEM(0xffe01800),
+	IRQ(9),
+};
+DEFINE_DEV(usart, 3);
+DEV_CLK(usart, usart3, pba, 6);
+
+static inline void configure_usart0_pins(void)
+{
+	portmux_set_func(PIOA,  8, FUNC_B);	/* RXD	*/
+	portmux_set_func(PIOA,  9, FUNC_B);	/* TXD	*/
+}
+
+static inline void configure_usart1_pins(void)
+{
+	portmux_set_func(PIOA, 17, FUNC_A);	/* RXD	*/
+	portmux_set_func(PIOA, 18, FUNC_A);	/* TXD	*/
+}
+
+static inline void configure_usart2_pins(void)
+{
+	portmux_set_func(PIOB, 26, FUNC_B);	/* RXD	*/
+	portmux_set_func(PIOB, 27, FUNC_B);	/* TXD	*/
+}
+
+static inline void configure_usart3_pins(void)
+{
+	portmux_set_func(PIOB, 18, FUNC_B);	/* RXD	*/
+	portmux_set_func(PIOB, 17, FUNC_B);	/* TXD	*/
+}
+
+static struct platform_device *setup_usart(unsigned int id)
+{
+	struct platform_device *pdev;
+
+	switch (id) {
+	case 0:
+		pdev = &usart0_device;
+		configure_usart0_pins();
+		break;
+	case 1:
+		pdev = &usart1_device;
+		configure_usart1_pins();
+		break;
+	case 2:
+		pdev = &usart2_device;
+		configure_usart2_pins();
+		break;
+	case 3:
+		pdev = &usart3_device;
+		configure_usart3_pins();
+		break;
+	default:
+		pdev = NULL;
+		break;
+	}
+
+	return pdev;
+}
+
+struct platform_device *__init at32_add_device_usart(unsigned int id)
+{
+	struct platform_device *pdev;
+
+	pdev = setup_usart(id);
+	if (pdev)
+		platform_device_register(pdev);
+
+	return pdev;
+}
+
+struct platform_device *at91_default_console_device;
+
+void __init at32_setup_serial_console(unsigned int usart_id)
+{
+	at91_default_console_device = setup_usart(usart_id);
+}
+
+/* --------------------------------------------------------------------
+ *  Ethernet
+ * -------------------------------------------------------------------- */
+
+static struct eth_platform_data macb0_data;
+static struct resource macb0_resource[] = {
+	PBMEM(0xfff01800),
+	IRQ(25),
+};
+DEFINE_DEV_DATA(macb, 0);
+DEV_CLK(hclk, macb0, hsb, 8);
+DEV_CLK(pclk, macb0, pbb, 6);
+
+struct platform_device *__init
+at32_add_device_eth(unsigned int id, struct eth_platform_data *data)
+{
+	struct platform_device *pdev;
+
+	switch (id) {
+	case 0:
+		pdev = &macb0_device;
+
+		portmux_set_func(PIOC,  3, FUNC_A);	/* TXD0	*/
+		portmux_set_func(PIOC,  4, FUNC_A);	/* TXD1	*/
+		portmux_set_func(PIOC,  7, FUNC_A);	/* TXEN	*/
+		portmux_set_func(PIOC,  8, FUNC_A);	/* TXCK */
+		portmux_set_func(PIOC,  9, FUNC_A);	/* RXD0	*/
+		portmux_set_func(PIOC, 10, FUNC_A);	/* RXD1	*/
+		portmux_set_func(PIOC, 13, FUNC_A);	/* RXER	*/
+		portmux_set_func(PIOC, 15, FUNC_A);	/* RXDV	*/
+		portmux_set_func(PIOC, 16, FUNC_A);	/* MDC	*/
+		portmux_set_func(PIOC, 17, FUNC_A);	/* MDIO	*/
+
+		if (!data->is_rmii) {
+			portmux_set_func(PIOC,  0, FUNC_A);	/* COL	*/
+			portmux_set_func(PIOC,  1, FUNC_A);	/* CRS	*/
+			portmux_set_func(PIOC,  2, FUNC_A);	/* TXER	*/
+			portmux_set_func(PIOC,  5, FUNC_A);	/* TXD2	*/
+			portmux_set_func(PIOC,  6, FUNC_A);	/* TXD3 */
+			portmux_set_func(PIOC, 11, FUNC_A);	/* RXD2	*/
+			portmux_set_func(PIOC, 12, FUNC_A);	/* RXD3	*/
+			portmux_set_func(PIOC, 14, FUNC_A);	/* RXCK	*/
+			portmux_set_func(PIOC, 18, FUNC_A);	/* SPD	*/
+		}
+		break;
+
+	default:
+		return NULL;
+	}
+
+	memcpy(pdev->dev.platform_data, data, sizeof(struct eth_platform_data));
+	platform_device_register(pdev);
+
+	return pdev;
+}
+
+/* --------------------------------------------------------------------
+ *  SPI
+ * -------------------------------------------------------------------- */
+static struct resource spi0_resource[] = {
+	PBMEM(0xffe00000),
+	IRQ(3),
+};
+DEFINE_DEV(spi, 0);
+DEV_CLK(mck, spi0, pba, 0);
+
+struct platform_device *__init at32_add_device_spi(unsigned int id)
+{
+	struct platform_device *pdev;
+
+	switch (id) {
+	case 0:
+		pdev = &spi0_device;
+		portmux_set_func(PIOA,  0, FUNC_A);	/* MISO	 */
+		portmux_set_func(PIOA,  1, FUNC_A);	/* MOSI	 */
+		portmux_set_func(PIOA,  2, FUNC_A);	/* SCK	 */
+		portmux_set_func(PIOA,  3, FUNC_A);	/* NPCS0 */
+		portmux_set_func(PIOA,  4, FUNC_A);	/* NPCS1 */
+		portmux_set_func(PIOA,  5, FUNC_A);	/* NPCS2 */
+		break;
+
+	default:
+		return NULL;
+	}
+
+	platform_device_register(pdev);
+	return pdev;
+}
+
+/* --------------------------------------------------------------------
+ *  LCDC
+ * -------------------------------------------------------------------- */
+static struct lcdc_platform_data lcdc0_data;
+static struct resource lcdc0_resource[] = {
+	{
+		.start		= 0xff000000,
+		.end		= 0xff000fff,
+		.flags		= IORESOURCE_MEM,
+	},
+	IRQ(1),
+};
+DEFINE_DEV_DATA(lcdc, 0);
+DEV_CLK(hclk, lcdc0, hsb, 7);
+static struct clk lcdc0_pixclk = {
+	.name		= "pixclk",
+	.dev		= &lcdc0_device.dev,
+	.mode		= genclk_mode,
+	.get_rate	= genclk_get_rate,
+	.set_rate	= genclk_set_rate,
+	.set_parent	= genclk_set_parent,
+	.index		= 7,
+};
+
+struct platform_device *__init
+at32_add_device_lcdc(unsigned int id, struct lcdc_platform_data *data)
+{
+	struct platform_device *pdev;
+
+	switch (id) {
+	case 0:
+		pdev = &lcdc0_device;
+		portmux_set_func(PIOC, 19, FUNC_A);	/* CC	  */
+		portmux_set_func(PIOC, 20, FUNC_A);	/* HSYNC  */
+		portmux_set_func(PIOC, 21, FUNC_A);	/* PCLK	  */
+		portmux_set_func(PIOC, 22, FUNC_A);	/* VSYNC  */
+		portmux_set_func(PIOC, 23, FUNC_A);	/* DVAL	  */
+		portmux_set_func(PIOC, 24, FUNC_A);	/* MODE	  */
+		portmux_set_func(PIOC, 25, FUNC_A);	/* PWR	  */
+		portmux_set_func(PIOC, 26, FUNC_A);	/* DATA0  */
+		portmux_set_func(PIOC, 27, FUNC_A);	/* DATA1  */
+		portmux_set_func(PIOC, 28, FUNC_A);	/* DATA2  */
+		portmux_set_func(PIOC, 29, FUNC_A);	/* DATA3  */
+		portmux_set_func(PIOC, 30, FUNC_A);	/* DATA4  */
+		portmux_set_func(PIOC, 31, FUNC_A);	/* DATA5  */
+		portmux_set_func(PIOD,  0, FUNC_A);	/* DATA6  */
+		portmux_set_func(PIOD,  1, FUNC_A);	/* DATA7  */
+		portmux_set_func(PIOD,  2, FUNC_A);	/* DATA8  */
+		portmux_set_func(PIOD,  3, FUNC_A);	/* DATA9  */
+		portmux_set_func(PIOD,  4, FUNC_A);	/* DATA10 */
+		portmux_set_func(PIOD,  5, FUNC_A);	/* DATA11 */
+		portmux_set_func(PIOD,  6, FUNC_A);	/* DATA12 */
+		portmux_set_func(PIOD,  7, FUNC_A);	/* DATA13 */
+		portmux_set_func(PIOD,  8, FUNC_A);	/* DATA14 */
+		portmux_set_func(PIOD,  9, FUNC_A);	/* DATA15 */
+		portmux_set_func(PIOD, 10, FUNC_A);	/* DATA16 */
+		portmux_set_func(PIOD, 11, FUNC_A);	/* DATA17 */
+		portmux_set_func(PIOD, 12, FUNC_A);	/* DATA18 */
+		portmux_set_func(PIOD, 13, FUNC_A);	/* DATA19 */
+		portmux_set_func(PIOD, 14, FUNC_A);	/* DATA20 */
+		portmux_set_func(PIOD, 15, FUNC_A);	/* DATA21 */
+		portmux_set_func(PIOD, 16, FUNC_A);	/* DATA22 */
+		portmux_set_func(PIOD, 17, FUNC_A);	/* DATA23 */
+
+		clk_set_parent(&lcdc0_pixclk, &pll0);
+		clk_set_rate(&lcdc0_pixclk, clk_get_rate(&pll0));
+		break;
+
+	default:
+		return NULL;
+	}
+
+	memcpy(pdev->dev.platform_data, data,
+	       sizeof(struct lcdc_platform_data));
+
+	platform_device_register(pdev);
+	return pdev;
+}
+
+struct clk *at32_clock_list[] = {
+	&osc32k,
+	&osc0,
+	&osc1,
+	&pll0,
+	&pll1,
+	&cpu_clk,
+	&hsb_clk,
+	&pba_clk,
+	&pbb_clk,
+	&at32_sm_pclk,
+	&at32_intc0_pclk,
+	&ebi_clk,
+	&hramc_clk,
+	&smc0_pclk,
+	&smc0_mck,
+	&pdc_hclk,
+	&pdc_pclk,
+	&pico_clk,
+	&pio0_mck,
+	&pio1_mck,
+	&pio2_mck,
+	&pio3_mck,
+	&usart0_usart,
+	&usart1_usart,
+	&usart2_usart,
+	&usart3_usart,
+	&macb0_hclk,
+	&macb0_pclk,
+	&spi0_mck,
+	&lcdc0_hclk,
+	&lcdc0_pixclk,
+};
+unsigned int at32_nr_clocks = ARRAY_SIZE(at32_clock_list);
+
+void __init at32_portmux_init(void)
+{
+	at32_init_pio(&pio0_device);
+	at32_init_pio(&pio1_device);
+	at32_init_pio(&pio2_device);
+	at32_init_pio(&pio3_device);
+}
+
+void __init at32_clock_init(void)
+{
+	struct at32_sm *sm = &system_manager;
+	u32 cpu_mask = 0, hsb_mask = 0, pba_mask = 0, pbb_mask = 0;
+	int i;
+
+	if (sm_readl(sm, PM_MCCTRL) & SM_BIT(PLLSEL))
+		main_clock = &pll0;
+	else
+		main_clock = &osc0;
+
+	if (sm_readl(sm, PM_PLL0) & SM_BIT(PLLOSC))
+		pll0.parent = &osc1;
+	if (sm_readl(sm, PM_PLL1) & SM_BIT(PLLOSC))
+		pll1.parent = &osc1;
+
+	/*
+	 * Turn on all clocks that have at least one user already, and
+	 * turn off everything else. We only do this for module
+	 * clocks, and even though it isn't particularly pretty to
+	 * check the address of the mode function, it should do the
+	 * trick...
+	 */
+	for (i = 0; i < ARRAY_SIZE(at32_clock_list); i++) {
+		struct clk *clk = at32_clock_list[i];
+
+		if (clk->mode == &cpu_clk_mode)
+			cpu_mask |= 1 << clk->index;
+		else if (clk->mode == &hsb_clk_mode)
+			hsb_mask |= 1 << clk->index;
+		else if (clk->mode == &pba_clk_mode)
+			pba_mask |= 1 << clk->index;
+		else if (clk->mode == &pbb_clk_mode)
+			pbb_mask |= 1 << clk->index;
+	}
+
+	sm_writel(sm, PM_CPU_MASK, cpu_mask);
+	sm_writel(sm, PM_HSB_MASK, hsb_mask);
+	sm_writel(sm, PM_PBA_MASK, pba_mask);
+	sm_writel(sm, PM_PBB_MASK, pbb_mask);
+}
diff --git a/arch/avr32/mach-at32ap/clock.c b/arch/avr32/mach-at32ap/clock.c
new file mode 100644
index 000000000000..3d0d1097389f
--- /dev/null
+++ b/arch/avr32/mach-at32ap/clock.c
@@ -0,0 +1,148 @@
+/*
+ * Clock management for AT32AP CPUs
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * Based on arch/arm/mach-at91rm9200/clock.c
+ *   Copyright (C) 2005 David Brownell
+ *   Copyright (C) 2005 Ivan Kokshaysky
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/string.h>
+
+#include "clock.h"
+
+static spinlock_t clk_lock = SPIN_LOCK_UNLOCKED;
+
+struct clk *clk_get(struct device *dev, const char *id)
+{
+	int i;
+
+	for (i = 0; i < at32_nr_clocks; i++) {
+		struct clk *clk = at32_clock_list[i];
+
+		if (clk->dev == dev && strcmp(id, clk->name) == 0)
+			return clk;
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL(clk_get);
+
+void clk_put(struct clk *clk)
+{
+	/* clocks are static for now, we can't free them */
+}
+EXPORT_SYMBOL(clk_put);
+
+static void __clk_enable(struct clk *clk)
+{
+	if (clk->parent)
+		__clk_enable(clk->parent);
+	if (clk->users++ == 0 && clk->mode)
+		clk->mode(clk, 1);
+}
+
+int clk_enable(struct clk *clk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&clk_lock, flags);
+	__clk_enable(clk);
+	spin_unlock_irqrestore(&clk_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(clk_enable);
+
+static void __clk_disable(struct clk *clk)
+{
+	BUG_ON(clk->users == 0);
+
+	if (--clk->users == 0 && clk->mode)
+		clk->mode(clk, 0);
+	if (clk->parent)
+		__clk_disable(clk->parent);
+}
+
+void clk_disable(struct clk *clk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&clk_lock, flags);
+	__clk_disable(clk);
+	spin_unlock_irqrestore(&clk_lock, flags);
+}
+EXPORT_SYMBOL(clk_disable);
+
+unsigned long clk_get_rate(struct clk *clk)
+{
+	unsigned long flags;
+	unsigned long rate;
+
+	spin_lock_irqsave(&clk_lock, flags);
+	rate = clk->get_rate(clk);
+	spin_unlock_irqrestore(&clk_lock, flags);
+
+	return rate;
+}
+EXPORT_SYMBOL(clk_get_rate);
+
+long clk_round_rate(struct clk *clk, unsigned long rate)
+{
+	unsigned long flags, actual_rate;
+
+	if (!clk->set_rate)
+		return -ENOSYS;
+
+	spin_lock_irqsave(&clk_lock, flags);
+	actual_rate = clk->set_rate(clk, rate, 0);
+	spin_unlock_irqrestore(&clk_lock, flags);
+
+	return actual_rate;
+}
+EXPORT_SYMBOL(clk_round_rate);
+
+int clk_set_rate(struct clk *clk, unsigned long rate)
+{
+	unsigned long flags;
+	long ret;
+
+	if (!clk->set_rate)
+		return -ENOSYS;
+
+	spin_lock_irqsave(&clk_lock, flags);
+	ret = clk->set_rate(clk, rate, 1);
+	spin_unlock_irqrestore(&clk_lock, flags);
+
+	return (ret < 0) ? ret : 0;
+}
+EXPORT_SYMBOL(clk_set_rate);
+
+int clk_set_parent(struct clk *clk, struct clk *parent)
+{
+	unsigned long flags;
+	int ret;
+
+	if (!clk->set_parent)
+		return -ENOSYS;
+
+	spin_lock_irqsave(&clk_lock, flags);
+	ret = clk->set_parent(clk, parent);
+	spin_unlock_irqrestore(&clk_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(clk_set_parent);
+
+struct clk *clk_get_parent(struct clk *clk)
+{
+	return clk->parent;
+}
+EXPORT_SYMBOL(clk_get_parent);
diff --git a/arch/avr32/mach-at32ap/clock.h b/arch/avr32/mach-at32ap/clock.h
new file mode 100644
index 000000000000..f953f044ba4d
--- /dev/null
+++ b/arch/avr32/mach-at32ap/clock.h
@@ -0,0 +1,30 @@
+/*
+ * Clock management for AT32AP CPUs
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * Based on arch/arm/mach-at91rm9200/clock.c
+ *   Copyright (C) 2005 David Brownell
+ *   Copyright (C) 2005 Ivan Kokshaysky
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clk.h>
+
+struct clk {
+	const char	*name;		/* Clock name/function */
+	struct device	*dev;		/* Device the clock is used by */
+	struct clk	*parent;	/* Parent clock, if any */
+	void		(*mode)(struct clk *clk, int enabled);
+	unsigned long	(*get_rate)(struct clk *clk);
+	long		(*set_rate)(struct clk *clk, unsigned long rate,
+				    int apply);
+	int		(*set_parent)(struct clk *clk, struct clk *parent);
+	u16		users;		/* Enabled if non-zero */
+	u16		index;		/* Sibling index */
+};
+
+extern struct clk *at32_clock_list[];
+extern unsigned int at32_nr_clocks;
diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c
new file mode 100644
index 000000000000..7da9c5f7a0eb
--- /dev/null
+++ b/arch/avr32/mach-at32ap/extint.c
@@ -0,0 +1,171 @@
+/*
+ * External interrupt handling for AT32AP CPUs
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/platform_device.h>
+#include <linux/random.h>
+
+#include <asm/io.h>
+
+#include <asm/arch/sm.h>
+
+#include "sm.h"
+
+static void eim_ack_irq(unsigned int irq)
+{
+	struct at32_sm *sm = get_irq_chip_data(irq);
+	sm_writel(sm, EIM_ICR, 1 << (irq - sm->eim_first_irq));
+}
+
+static void eim_mask_irq(unsigned int irq)
+{
+	struct at32_sm *sm = get_irq_chip_data(irq);
+	sm_writel(sm, EIM_IDR, 1 << (irq - sm->eim_first_irq));
+}
+
+static void eim_mask_ack_irq(unsigned int irq)
+{
+	struct at32_sm *sm = get_irq_chip_data(irq);
+	sm_writel(sm, EIM_ICR, 1 << (irq - sm->eim_first_irq));
+	sm_writel(sm, EIM_IDR, 1 << (irq - sm->eim_first_irq));
+}
+
+static void eim_unmask_irq(unsigned int irq)
+{
+	struct at32_sm *sm = get_irq_chip_data(irq);
+	sm_writel(sm, EIM_IER, 1 << (irq - sm->eim_first_irq));
+}
+
+static int eim_set_irq_type(unsigned int irq, unsigned int flow_type)
+{
+	struct at32_sm *sm = get_irq_chip_data(irq);
+	unsigned int i = irq - sm->eim_first_irq;
+	u32 mode, edge, level;
+	unsigned long flags;
+	int ret = 0;
+
+	flow_type &= IRQ_TYPE_SENSE_MASK;
+
+	spin_lock_irqsave(&sm->lock, flags);
+
+	mode = sm_readl(sm, EIM_MODE);
+	edge = sm_readl(sm, EIM_EDGE);
+	level = sm_readl(sm, EIM_LEVEL);
+
+	switch (flow_type) {
+	case IRQ_TYPE_LEVEL_LOW:
+		mode |= 1 << i;
+		level &= ~(1 << i);
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+		mode |= 1 << i;
+		level |= 1 << i;
+		break;
+	case IRQ_TYPE_EDGE_RISING:
+		mode &= ~(1 << i);
+		edge |= 1 << i;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		mode &= ~(1 << i);
+		edge &= ~(1 << i);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	sm_writel(sm, EIM_MODE, mode);
+	sm_writel(sm, EIM_EDGE, edge);
+	sm_writel(sm, EIM_LEVEL, level);
+
+	spin_unlock_irqrestore(&sm->lock, flags);
+
+	return ret;
+}
+
+struct irq_chip eim_chip = {
+	.name		= "eim",
+	.ack		= eim_ack_irq,
+	.mask		= eim_mask_irq,
+	.mask_ack	= eim_mask_ack_irq,
+	.unmask		= eim_unmask_irq,
+	.set_type	= eim_set_irq_type,
+};
+
+static void demux_eim_irq(unsigned int irq, struct irq_desc *desc,
+			  struct pt_regs *regs)
+{
+	struct at32_sm *sm = desc->handler_data;
+	struct irq_desc *ext_desc;
+	unsigned long status, pending;
+	unsigned int i, ext_irq;
+
+	spin_lock(&sm->lock);
+
+	status = sm_readl(sm, EIM_ISR);
+	pending = status & sm_readl(sm, EIM_IMR);
+
+	while (pending) {
+		i = fls(pending) - 1;
+		pending &= ~(1 << i);
+
+		ext_irq = i + sm->eim_first_irq;
+		ext_desc = irq_desc + ext_irq;
+		ext_desc->handle_irq(ext_irq, ext_desc, regs);
+	}
+
+	spin_unlock(&sm->lock);
+}
+
+static int __init eim_init(void)
+{
+	struct at32_sm *sm = &system_manager;
+	unsigned int i;
+	unsigned int nr_irqs;
+	unsigned int int_irq;
+	u32 pattern;
+
+	/*
+	 * The EIM is really the same module as SM, so register
+	 * mapping, etc. has been taken care of already.
+	 */
+
+	/*
+	 * Find out how many interrupt lines that are actually
+	 * implemented in hardware.
+	 */
+	sm_writel(sm, EIM_IDR, ~0UL);
+	sm_writel(sm, EIM_MODE, ~0UL);
+	pattern = sm_readl(sm, EIM_MODE);
+	nr_irqs = fls(pattern);
+
+	sm->eim_chip = &eim_chip;
+
+	for (i = 0; i < nr_irqs; i++) {
+		set_irq_chip(sm->eim_first_irq + i, &eim_chip);
+		set_irq_chip_data(sm->eim_first_irq + i, sm);
+	}
+
+	int_irq = platform_get_irq_byname(sm->pdev, "eim");
+
+	set_irq_chained_handler(int_irq, demux_eim_irq);
+	set_irq_data(int_irq, sm);
+
+	printk("EIM: External Interrupt Module at 0x%p, IRQ %u\n",
+	       sm->regs, int_irq);
+	printk("EIM: Handling %u external IRQs, starting with IRQ %u\n",
+	       nr_irqs, sm->eim_first_irq);
+
+	return 0;
+}
+arch_initcall(eim_init);
diff --git a/arch/avr32/mach-at32ap/hsmc.c b/arch/avr32/mach-at32ap/hsmc.c
new file mode 100644
index 000000000000..7691721928a7
--- /dev/null
+++ b/arch/avr32/mach-at32ap/hsmc.c
@@ -0,0 +1,164 @@
+/*
+ * Static Memory Controller for AT32 chips
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define DEBUG
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+#include <asm/arch/smc.h>
+
+#include "hsmc.h"
+
+#define NR_CHIP_SELECTS 6
+
+struct hsmc {
+	void __iomem *regs;
+	struct clk *pclk;
+	struct clk *mck;
+};
+
+static struct hsmc *hsmc;
+
+int smc_set_configuration(int cs, const struct smc_config *config)
+{
+	unsigned long mul;
+	unsigned long offset;
+	u32 setup, pulse, cycle, mode;
+
+	if (!hsmc)
+		return -ENODEV;
+	if (cs >= NR_CHIP_SELECTS)
+		return -EINVAL;
+
+	/*
+	 * cycles = x / T = x * f
+	 *   = ((x * 1000000000) * ((f * 65536) / 1000000000)) / 65536
+	 *   = ((x * 1000000000) * (((f / 10000) * 65536) / 100000)) / 65536
+	 */
+	mul = (clk_get_rate(hsmc->mck) / 10000) << 16;
+	mul /= 100000;
+
+#define ns2cyc(x) ((((x) * mul) + 65535) >> 16)
+
+	setup = (HSMC_BF(NWE_SETUP, ns2cyc(config->nwe_setup))
+		 | HSMC_BF(NCS_WR_SETUP, ns2cyc(config->ncs_write_setup))
+		 | HSMC_BF(NRD_SETUP, ns2cyc(config->nrd_setup))
+		 | HSMC_BF(NCS_RD_SETUP, ns2cyc(config->ncs_read_setup)));
+	pulse = (HSMC_BF(NWE_PULSE, ns2cyc(config->nwe_pulse))
+		 | HSMC_BF(NCS_WR_PULSE, ns2cyc(config->ncs_write_pulse))
+		 | HSMC_BF(NRD_PULSE, ns2cyc(config->nrd_pulse))
+		 | HSMC_BF(NCS_RD_PULSE, ns2cyc(config->ncs_read_pulse)));
+	cycle = (HSMC_BF(NWE_CYCLE, ns2cyc(config->write_cycle))
+		 | HSMC_BF(NRD_CYCLE, ns2cyc(config->read_cycle)));
+
+	switch (config->bus_width) {
+	case 1:
+		mode = HSMC_BF(DBW, HSMC_DBW_8_BITS);
+		break;
+	case 2:
+		mode = HSMC_BF(DBW, HSMC_DBW_16_BITS);
+		break;
+	case 4:
+		mode = HSMC_BF(DBW, HSMC_DBW_32_BITS);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (config->nrd_controlled)
+		mode |= HSMC_BIT(READ_MODE);
+	if (config->nwe_controlled)
+		mode |= HSMC_BIT(WRITE_MODE);
+	if (config->byte_write)
+		mode |= HSMC_BIT(BAT);
+
+	pr_debug("smc cs%d: setup/%08x pulse/%08x cycle/%08x mode/%08x\n",
+		 cs, setup, pulse, cycle, mode);
+
+	offset = cs * 0x10;
+	hsmc_writel(hsmc, SETUP0 + offset, setup);
+	hsmc_writel(hsmc, PULSE0 + offset, pulse);
+	hsmc_writel(hsmc, CYCLE0 + offset, cycle);
+	hsmc_writel(hsmc, MODE0 + offset, mode);
+	hsmc_readl(hsmc, MODE0); /* I/O barrier */
+
+	return 0;
+}
+EXPORT_SYMBOL(smc_set_configuration);
+
+static int hsmc_probe(struct platform_device *pdev)
+{
+	struct resource *regs;
+	struct clk *pclk, *mck;
+	int ret;
+
+	if (hsmc)
+		return -EBUSY;
+
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!regs)
+		return -ENXIO;
+	pclk = clk_get(&pdev->dev, "pclk");
+	if (IS_ERR(pclk))
+		return PTR_ERR(pclk);
+	mck = clk_get(&pdev->dev, "mck");
+	if (IS_ERR(mck)) {
+		ret = PTR_ERR(mck);
+		goto out_put_pclk;
+	}
+
+	ret = -ENOMEM;
+	hsmc = kzalloc(sizeof(struct hsmc), GFP_KERNEL);
+	if (!hsmc)
+		goto out_put_clocks;
+
+	clk_enable(pclk);
+	clk_enable(mck);
+
+	hsmc->pclk = pclk;
+	hsmc->mck = mck;
+	hsmc->regs = ioremap(regs->start, regs->end - regs->start + 1);
+	if (!hsmc->regs)
+		goto out_disable_clocks;
+
+	dev_info(&pdev->dev, "Atmel Static Memory Controller at 0x%08lx\n",
+		 (unsigned long)regs->start);
+
+	platform_set_drvdata(pdev, hsmc);
+
+	return 0;
+
+out_disable_clocks:
+	clk_disable(mck);
+	clk_disable(pclk);
+	kfree(hsmc);
+out_put_clocks:
+	clk_put(mck);
+out_put_pclk:
+	clk_put(pclk);
+	hsmc = NULL;
+	return ret;
+}
+
+static struct platform_driver hsmc_driver = {
+	.probe		= hsmc_probe,
+	.driver		= {
+		.name	= "smc",
+	},
+};
+
+static int __init hsmc_init(void)
+{
+	return platform_driver_register(&hsmc_driver);
+}
+arch_initcall(hsmc_init);
diff --git a/arch/avr32/mach-at32ap/hsmc.h b/arch/avr32/mach-at32ap/hsmc.h
new file mode 100644
index 000000000000..5681276fafdb
--- /dev/null
+++ b/arch/avr32/mach-at32ap/hsmc.h
@@ -0,0 +1,127 @@
+/*
+ * Register definitions for Atmel Static Memory Controller (SMC)
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_HSMC_H__
+#define __ASM_AVR32_HSMC_H__
+
+/* HSMC register offsets */
+#define HSMC_SETUP0				0x0000
+#define HSMC_PULSE0				0x0004
+#define HSMC_CYCLE0				0x0008
+#define HSMC_MODE0				0x000c
+#define HSMC_SETUP1				0x0010
+#define HSMC_PULSE1				0x0014
+#define HSMC_CYCLE1				0x0018
+#define HSMC_MODE1				0x001c
+#define HSMC_SETUP2				0x0020
+#define HSMC_PULSE2				0x0024
+#define HSMC_CYCLE2				0x0028
+#define HSMC_MODE2				0x002c
+#define HSMC_SETUP3				0x0030
+#define HSMC_PULSE3				0x0034
+#define HSMC_CYCLE3				0x0038
+#define HSMC_MODE3				0x003c
+#define HSMC_SETUP4				0x0040
+#define HSMC_PULSE4				0x0044
+#define HSMC_CYCLE4				0x0048
+#define HSMC_MODE4				0x004c
+#define HSMC_SETUP5				0x0050
+#define HSMC_PULSE5				0x0054
+#define HSMC_CYCLE5				0x0058
+#define HSMC_MODE5				0x005c
+
+/* Bitfields in SETUP0 */
+#define HSMC_NWE_SETUP_OFFSET			0
+#define HSMC_NWE_SETUP_SIZE			6
+#define HSMC_NCS_WR_SETUP_OFFSET		8
+#define HSMC_NCS_WR_SETUP_SIZE			6
+#define HSMC_NRD_SETUP_OFFSET			16
+#define HSMC_NRD_SETUP_SIZE			6
+#define HSMC_NCS_RD_SETUP_OFFSET		24
+#define HSMC_NCS_RD_SETUP_SIZE			6
+
+/* Bitfields in PULSE0 */
+#define HSMC_NWE_PULSE_OFFSET			0
+#define HSMC_NWE_PULSE_SIZE			7
+#define HSMC_NCS_WR_PULSE_OFFSET		8
+#define HSMC_NCS_WR_PULSE_SIZE			7
+#define HSMC_NRD_PULSE_OFFSET			16
+#define HSMC_NRD_PULSE_SIZE			7
+#define HSMC_NCS_RD_PULSE_OFFSET		24
+#define HSMC_NCS_RD_PULSE_SIZE			7
+
+/* Bitfields in CYCLE0 */
+#define HSMC_NWE_CYCLE_OFFSET			0
+#define HSMC_NWE_CYCLE_SIZE			9
+#define HSMC_NRD_CYCLE_OFFSET			16
+#define HSMC_NRD_CYCLE_SIZE			9
+
+/* Bitfields in MODE0 */
+#define HSMC_READ_MODE_OFFSET			0
+#define HSMC_READ_MODE_SIZE			1
+#define HSMC_WRITE_MODE_OFFSET			1
+#define HSMC_WRITE_MODE_SIZE			1
+#define HSMC_EXNW_MODE_OFFSET			4
+#define HSMC_EXNW_MODE_SIZE			2
+#define HSMC_BAT_OFFSET				8
+#define HSMC_BAT_SIZE				1
+#define HSMC_DBW_OFFSET				12
+#define HSMC_DBW_SIZE				2
+#define HSMC_TDF_CYCLES_OFFSET			16
+#define HSMC_TDF_CYCLES_SIZE			4
+#define HSMC_TDF_MODE_OFFSET			20
+#define HSMC_TDF_MODE_SIZE			1
+#define HSMC_PMEN_OFFSET			24
+#define HSMC_PMEN_SIZE				1
+#define HSMC_PS_OFFSET				28
+#define HSMC_PS_SIZE				2
+
+/* Constants for READ_MODE */
+#define HSMC_READ_MODE_NCS_CONTROLLED		0
+#define HSMC_READ_MODE_NRD_CONTROLLED		1
+
+/* Constants for WRITE_MODE */
+#define HSMC_WRITE_MODE_NCS_CONTROLLED		0
+#define HSMC_WRITE_MODE_NWE_CONTROLLED		1
+
+/* Constants for EXNW_MODE */
+#define HSMC_EXNW_MODE_DISABLED			0
+#define HSMC_EXNW_MODE_RESERVED			1
+#define HSMC_EXNW_MODE_FROZEN			2
+#define HSMC_EXNW_MODE_READY			3
+
+/* Constants for BAT */
+#define HSMC_BAT_BYTE_SELECT			0
+#define HSMC_BAT_BYTE_WRITE			1
+
+/* Constants for DBW */
+#define HSMC_DBW_8_BITS				0
+#define HSMC_DBW_16_BITS			1
+#define HSMC_DBW_32_BITS			2
+
+/* Bit manipulation macros */
+#define HSMC_BIT(name)							\
+	(1 << HSMC_##name##_OFFSET)
+#define HSMC_BF(name,value)						\
+	(((value) & ((1 << HSMC_##name##_SIZE) - 1))			\
+	 << HSMC_##name##_OFFSET)
+#define HSMC_BFEXT(name,value)						\
+	(((value) >> HSMC_##name##_OFFSET)				\
+	 & ((1 << HSMC_##name##_SIZE) - 1))
+#define HSMC_BFINS(name,value,old)					\
+	(((old) & ~(((1 << HSMC_##name##_SIZE) - 1)			\
+		    << HSMC_##name##_OFFSET)) | HSMC_BF(name,value))
+
+/* Register access macros */
+#define hsmc_readl(port,reg)						\
+	readl((port)->regs + HSMC_##reg)
+#define hsmc_writel(port,reg,value)					\
+	writel((value), (port)->regs + HSMC_##reg)
+
+#endif /* __ASM_AVR32_HSMC_H__ */
diff --git a/arch/avr32/mach-at32ap/intc.c b/arch/avr32/mach-at32ap/intc.c
new file mode 100644
index 000000000000..74f8c9f2f03d
--- /dev/null
+++ b/arch/avr32/mach-at32ap/intc.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+
+#include "intc.h"
+
+struct intc {
+	void __iomem	*regs;
+	struct irq_chip	chip;
+};
+
+extern struct platform_device at32_intc0_device;
+
+/*
+ * TODO: We may be able to implement mask/unmask by setting IxM flags
+ * in the status register.
+ */
+static void intc_mask_irq(unsigned int irq)
+{
+
+}
+
+static void intc_unmask_irq(unsigned int irq)
+{
+
+}
+
+static struct intc intc0 = {
+	.chip = {
+		.name		= "intc",
+		.mask		= intc_mask_irq,
+		.unmask		= intc_unmask_irq,
+	},
+};
+
+/*
+ * All interrupts go via intc at some point.
+ */
+asmlinkage void do_IRQ(int level, struct pt_regs *regs)
+{
+	struct irq_desc *desc;
+	unsigned int irq;
+	unsigned long status_reg;
+
+	local_irq_disable();
+
+	irq_enter();
+
+	irq = intc_readl(&intc0, INTCAUSE0 - 4 * level);
+	desc = irq_desc + irq;
+	desc->handle_irq(irq, desc, regs);
+
+	/*
+	 * Clear all interrupt level masks so that we may handle
+	 * interrupts during softirq processing.  If this is a nested
+	 * interrupt, interrupts must stay globally disabled until we
+	 * return.
+	 */
+	status_reg = sysreg_read(SR);
+	status_reg &= ~(SYSREG_BIT(I0M) | SYSREG_BIT(I1M)
+			| SYSREG_BIT(I2M) | SYSREG_BIT(I3M));
+	sysreg_write(SR, status_reg);
+
+	irq_exit();
+}
+
+void __init init_IRQ(void)
+{
+	extern void _evba(void);
+	extern void irq_level0(void);
+	struct resource *regs;
+	struct clk *pclk;
+	unsigned int i;
+	u32 offset, readback;
+
+	regs = platform_get_resource(&at32_intc0_device, IORESOURCE_MEM, 0);
+	if (!regs) {
+		printk(KERN_EMERG "intc: no mmio resource defined\n");
+		goto fail;
+	}
+	pclk = clk_get(&at32_intc0_device.dev, "pclk");
+	if (IS_ERR(pclk)) {
+		printk(KERN_EMERG "intc: no clock defined\n");
+		goto fail;
+	}
+
+	clk_enable(pclk);
+
+	intc0.regs = ioremap(regs->start, regs->end - regs->start + 1);
+	if (!intc0.regs) {
+		printk(KERN_EMERG "intc: failed to map registers (0x%08lx)\n",
+		       (unsigned long)regs->start);
+		goto fail;
+	}
+
+	/*
+	 * Initialize all interrupts to level 0 (lowest priority). The
+	 * priority level may be changed by calling
+	 * irq_set_priority().
+	 *
+	 */
+	offset = (unsigned long)&irq_level0 - (unsigned long)&_evba;
+	for (i = 0; i < NR_INTERNAL_IRQS; i++) {
+		intc_writel(&intc0, INTPR0 + 4 * i, offset);
+		readback = intc_readl(&intc0, INTPR0 + 4 * i);
+		if (readback == offset)
+			set_irq_chip_and_handler(i, &intc0.chip,
+						 handle_simple_irq);
+	}
+
+	/* Unmask all interrupt levels */
+	sysreg_write(SR, (sysreg_read(SR)
+			  & ~(SR_I3M | SR_I2M | SR_I1M | SR_I0M)));
+
+	return;
+
+fail:
+	panic("Interrupt controller initialization failed!\n");
+}
+
diff --git a/arch/avr32/mach-at32ap/intc.h b/arch/avr32/mach-at32ap/intc.h
new file mode 100644
index 000000000000..d289ca2fff13
--- /dev/null
+++ b/arch/avr32/mach-at32ap/intc.h
@@ -0,0 +1,327 @@
+/*
+ * Automatically generated by gen-header.xsl
+ */
+#ifndef __ASM_AVR32_PERIHP_INTC_H__
+#define __ASM_AVR32_PERIHP_INTC_H__
+
+#define INTC_NUM_INT_GRPS            33
+
+#define INTC_INTPR0                  0x0
+# define INTC_INTPR0_INTLEV_OFFSET   30
+# define INTC_INTPR0_INTLEV_SIZE     2
+# define INTC_INTPR0_OFFSET_OFFSET   0
+# define INTC_INTPR0_OFFSET_SIZE     24
+#define INTC_INTREQ0                 0x100
+# define INTC_INTREQ0_IREQUEST0_OFFSET 0
+# define INTC_INTREQ0_IREQUEST0_SIZE 1
+# define INTC_INTREQ0_IREQUEST1_OFFSET 1
+# define INTC_INTREQ0_IREQUEST1_SIZE 1
+#define INTC_INTPR1                  0x4
+# define INTC_INTPR1_INTLEV_OFFSET   30
+# define INTC_INTPR1_INTLEV_SIZE     2
+# define INTC_INTPR1_OFFSET_OFFSET   0
+# define INTC_INTPR1_OFFSET_SIZE     24
+#define INTC_INTREQ1                 0x104
+# define INTC_INTREQ1_IREQUEST32_OFFSET 0
+# define INTC_INTREQ1_IREQUEST32_SIZE 1
+# define INTC_INTREQ1_IREQUEST33_OFFSET 1
+# define INTC_INTREQ1_IREQUEST33_SIZE 1
+# define INTC_INTREQ1_IREQUEST34_OFFSET 2
+# define INTC_INTREQ1_IREQUEST34_SIZE 1
+# define INTC_INTREQ1_IREQUEST35_OFFSET 3
+# define INTC_INTREQ1_IREQUEST35_SIZE 1
+# define INTC_INTREQ1_IREQUEST36_OFFSET 4
+# define INTC_INTREQ1_IREQUEST36_SIZE 1
+# define INTC_INTREQ1_IREQUEST37_OFFSET 5
+# define INTC_INTREQ1_IREQUEST37_SIZE 1
+#define INTC_INTPR2                  0x8
+# define INTC_INTPR2_INTLEV_OFFSET   30
+# define INTC_INTPR2_INTLEV_SIZE     2
+# define INTC_INTPR2_OFFSET_OFFSET   0
+# define INTC_INTPR2_OFFSET_SIZE     24
+#define INTC_INTREQ2                 0x108
+# define INTC_INTREQ2_IREQUEST64_OFFSET 0
+# define INTC_INTREQ2_IREQUEST64_SIZE 1
+# define INTC_INTREQ2_IREQUEST65_OFFSET 1
+# define INTC_INTREQ2_IREQUEST65_SIZE 1
+# define INTC_INTREQ2_IREQUEST66_OFFSET 2
+# define INTC_INTREQ2_IREQUEST66_SIZE 1
+# define INTC_INTREQ2_IREQUEST67_OFFSET 3
+# define INTC_INTREQ2_IREQUEST67_SIZE 1
+# define INTC_INTREQ2_IREQUEST68_OFFSET 4
+# define INTC_INTREQ2_IREQUEST68_SIZE 1
+#define INTC_INTPR3                  0xc
+# define INTC_INTPR3_INTLEV_OFFSET   30
+# define INTC_INTPR3_INTLEV_SIZE     2
+# define INTC_INTPR3_OFFSET_OFFSET   0
+# define INTC_INTPR3_OFFSET_SIZE     24
+#define INTC_INTREQ3                 0x10c
+# define INTC_INTREQ3_IREQUEST96_OFFSET 0
+# define INTC_INTREQ3_IREQUEST96_SIZE 1
+#define INTC_INTPR4                  0x10
+# define INTC_INTPR4_INTLEV_OFFSET   30
+# define INTC_INTPR4_INTLEV_SIZE     2
+# define INTC_INTPR4_OFFSET_OFFSET   0
+# define INTC_INTPR4_OFFSET_SIZE     24
+#define INTC_INTREQ4                 0x110
+# define INTC_INTREQ4_IREQUEST128_OFFSET 0
+# define INTC_INTREQ4_IREQUEST128_SIZE 1
+#define INTC_INTPR5                  0x14
+# define INTC_INTPR5_INTLEV_OFFSET   30
+# define INTC_INTPR5_INTLEV_SIZE     2
+# define INTC_INTPR5_OFFSET_OFFSET   0
+# define INTC_INTPR5_OFFSET_SIZE     24
+#define INTC_INTREQ5                 0x114
+# define INTC_INTREQ5_IREQUEST160_OFFSET 0
+# define INTC_INTREQ5_IREQUEST160_SIZE 1
+#define INTC_INTPR6                  0x18
+# define INTC_INTPR6_INTLEV_OFFSET   30
+# define INTC_INTPR6_INTLEV_SIZE     2
+# define INTC_INTPR6_OFFSET_OFFSET   0
+# define INTC_INTPR6_OFFSET_SIZE     24
+#define INTC_INTREQ6                 0x118
+# define INTC_INTREQ6_IREQUEST192_OFFSET 0
+# define INTC_INTREQ6_IREQUEST192_SIZE 1
+#define INTC_INTPR7                  0x1c
+# define INTC_INTPR7_INTLEV_OFFSET   30
+# define INTC_INTPR7_INTLEV_SIZE     2
+# define INTC_INTPR7_OFFSET_OFFSET   0
+# define INTC_INTPR7_OFFSET_SIZE     24
+#define INTC_INTREQ7                 0x11c
+# define INTC_INTREQ7_IREQUEST224_OFFSET 0
+# define INTC_INTREQ7_IREQUEST224_SIZE 1
+#define INTC_INTPR8                  0x20
+# define INTC_INTPR8_INTLEV_OFFSET   30
+# define INTC_INTPR8_INTLEV_SIZE     2
+# define INTC_INTPR8_OFFSET_OFFSET   0
+# define INTC_INTPR8_OFFSET_SIZE     24
+#define INTC_INTREQ8                 0x120
+# define INTC_INTREQ8_IREQUEST256_OFFSET 0
+# define INTC_INTREQ8_IREQUEST256_SIZE 1
+#define INTC_INTPR9                  0x24
+# define INTC_INTPR9_INTLEV_OFFSET   30
+# define INTC_INTPR9_INTLEV_SIZE     2
+# define INTC_INTPR9_OFFSET_OFFSET   0
+# define INTC_INTPR9_OFFSET_SIZE     24
+#define INTC_INTREQ9                 0x124
+# define INTC_INTREQ9_IREQUEST288_OFFSET 0
+# define INTC_INTREQ9_IREQUEST288_SIZE 1
+#define INTC_INTPR10                 0x28
+# define INTC_INTPR10_INTLEV_OFFSET  30
+# define INTC_INTPR10_INTLEV_SIZE    2
+# define INTC_INTPR10_OFFSET_OFFSET  0
+# define INTC_INTPR10_OFFSET_SIZE    24
+#define INTC_INTREQ10                0x128
+# define INTC_INTREQ10_IREQUEST320_OFFSET 0
+# define INTC_INTREQ10_IREQUEST320_SIZE 1
+#define INTC_INTPR11                 0x2c
+# define INTC_INTPR11_INTLEV_OFFSET  30
+# define INTC_INTPR11_INTLEV_SIZE    2
+# define INTC_INTPR11_OFFSET_OFFSET  0
+# define INTC_INTPR11_OFFSET_SIZE    24
+#define INTC_INTREQ11                0x12c
+# define INTC_INTREQ11_IREQUEST352_OFFSET 0
+# define INTC_INTREQ11_IREQUEST352_SIZE 1
+#define INTC_INTPR12                 0x30
+# define INTC_INTPR12_INTLEV_OFFSET  30
+# define INTC_INTPR12_INTLEV_SIZE    2
+# define INTC_INTPR12_OFFSET_OFFSET  0
+# define INTC_INTPR12_OFFSET_SIZE    24
+#define INTC_INTREQ12                0x130
+# define INTC_INTREQ12_IREQUEST384_OFFSET 0
+# define INTC_INTREQ12_IREQUEST384_SIZE 1
+#define INTC_INTPR13                 0x34
+# define INTC_INTPR13_INTLEV_OFFSET  30
+# define INTC_INTPR13_INTLEV_SIZE    2
+# define INTC_INTPR13_OFFSET_OFFSET  0
+# define INTC_INTPR13_OFFSET_SIZE    24
+#define INTC_INTREQ13                0x134
+# define INTC_INTREQ13_IREQUEST416_OFFSET 0
+# define INTC_INTREQ13_IREQUEST416_SIZE 1
+#define INTC_INTPR14                 0x38
+# define INTC_INTPR14_INTLEV_OFFSET  30
+# define INTC_INTPR14_INTLEV_SIZE    2
+# define INTC_INTPR14_OFFSET_OFFSET  0
+# define INTC_INTPR14_OFFSET_SIZE    24
+#define INTC_INTREQ14                0x138
+# define INTC_INTREQ14_IREQUEST448_OFFSET 0
+# define INTC_INTREQ14_IREQUEST448_SIZE 1
+#define INTC_INTPR15                 0x3c
+# define INTC_INTPR15_INTLEV_OFFSET  30
+# define INTC_INTPR15_INTLEV_SIZE    2
+# define INTC_INTPR15_OFFSET_OFFSET  0
+# define INTC_INTPR15_OFFSET_SIZE    24
+#define INTC_INTREQ15                0x13c
+# define INTC_INTREQ15_IREQUEST480_OFFSET 0
+# define INTC_INTREQ15_IREQUEST480_SIZE 1
+#define INTC_INTPR16                 0x40
+# define INTC_INTPR16_INTLEV_OFFSET  30
+# define INTC_INTPR16_INTLEV_SIZE    2
+# define INTC_INTPR16_OFFSET_OFFSET  0
+# define INTC_INTPR16_OFFSET_SIZE    24
+#define INTC_INTREQ16                0x140
+# define INTC_INTREQ16_IREQUEST512_OFFSET 0
+# define INTC_INTREQ16_IREQUEST512_SIZE 1
+#define INTC_INTPR17                 0x44
+# define INTC_INTPR17_INTLEV_OFFSET  30
+# define INTC_INTPR17_INTLEV_SIZE    2
+# define INTC_INTPR17_OFFSET_OFFSET  0
+# define INTC_INTPR17_OFFSET_SIZE    24
+#define INTC_INTREQ17                0x144
+# define INTC_INTREQ17_IREQUEST544_OFFSET 0
+# define INTC_INTREQ17_IREQUEST544_SIZE 1
+#define INTC_INTPR18                 0x48
+# define INTC_INTPR18_INTLEV_OFFSET  30
+# define INTC_INTPR18_INTLEV_SIZE    2
+# define INTC_INTPR18_OFFSET_OFFSET  0
+# define INTC_INTPR18_OFFSET_SIZE    24
+#define INTC_INTREQ18                0x148
+# define INTC_INTREQ18_IREQUEST576_OFFSET 0
+# define INTC_INTREQ18_IREQUEST576_SIZE 1
+#define INTC_INTPR19                 0x4c
+# define INTC_INTPR19_INTLEV_OFFSET  30
+# define INTC_INTPR19_INTLEV_SIZE    2
+# define INTC_INTPR19_OFFSET_OFFSET  0
+# define INTC_INTPR19_OFFSET_SIZE    24
+#define INTC_INTREQ19                0x14c
+# define INTC_INTREQ19_IREQUEST608_OFFSET 0
+# define INTC_INTREQ19_IREQUEST608_SIZE 1
+# define INTC_INTREQ19_IREQUEST609_OFFSET 1
+# define INTC_INTREQ19_IREQUEST609_SIZE 1
+# define INTC_INTREQ19_IREQUEST610_OFFSET 2
+# define INTC_INTREQ19_IREQUEST610_SIZE 1
+# define INTC_INTREQ19_IREQUEST611_OFFSET 3
+# define INTC_INTREQ19_IREQUEST611_SIZE 1
+#define INTC_INTPR20                 0x50
+# define INTC_INTPR20_INTLEV_OFFSET  30
+# define INTC_INTPR20_INTLEV_SIZE    2
+# define INTC_INTPR20_OFFSET_OFFSET  0
+# define INTC_INTPR20_OFFSET_SIZE    24
+#define INTC_INTREQ20                0x150
+# define INTC_INTREQ20_IREQUEST640_OFFSET 0
+# define INTC_INTREQ20_IREQUEST640_SIZE 1
+#define INTC_INTPR21                 0x54
+# define INTC_INTPR21_INTLEV_OFFSET  30
+# define INTC_INTPR21_INTLEV_SIZE    2
+# define INTC_INTPR21_OFFSET_OFFSET  0
+# define INTC_INTPR21_OFFSET_SIZE    24
+#define INTC_INTREQ21                0x154
+# define INTC_INTREQ21_IREQUEST672_OFFSET 0
+# define INTC_INTREQ21_IREQUEST672_SIZE 1
+#define INTC_INTPR22                 0x58
+# define INTC_INTPR22_INTLEV_OFFSET  30
+# define INTC_INTPR22_INTLEV_SIZE    2
+# define INTC_INTPR22_OFFSET_OFFSET  0
+# define INTC_INTPR22_OFFSET_SIZE    24
+#define INTC_INTREQ22                0x158
+# define INTC_INTREQ22_IREQUEST704_OFFSET 0
+# define INTC_INTREQ22_IREQUEST704_SIZE 1
+# define INTC_INTREQ22_IREQUEST705_OFFSET 1
+# define INTC_INTREQ22_IREQUEST705_SIZE 1
+# define INTC_INTREQ22_IREQUEST706_OFFSET 2
+# define INTC_INTREQ22_IREQUEST706_SIZE 1
+#define INTC_INTPR23                 0x5c
+# define INTC_INTPR23_INTLEV_OFFSET  30
+# define INTC_INTPR23_INTLEV_SIZE    2
+# define INTC_INTPR23_OFFSET_OFFSET  0
+# define INTC_INTPR23_OFFSET_SIZE    24
+#define INTC_INTREQ23                0x15c
+# define INTC_INTREQ23_IREQUEST736_OFFSET 0
+# define INTC_INTREQ23_IREQUEST736_SIZE 1
+# define INTC_INTREQ23_IREQUEST737_OFFSET 1
+# define INTC_INTREQ23_IREQUEST737_SIZE 1
+# define INTC_INTREQ23_IREQUEST738_OFFSET 2
+# define INTC_INTREQ23_IREQUEST738_SIZE 1
+#define INTC_INTPR24                 0x60
+# define INTC_INTPR24_INTLEV_OFFSET  30
+# define INTC_INTPR24_INTLEV_SIZE    2
+# define INTC_INTPR24_OFFSET_OFFSET  0
+# define INTC_INTPR24_OFFSET_SIZE    24
+#define INTC_INTREQ24                0x160
+# define INTC_INTREQ24_IREQUEST768_OFFSET 0
+# define INTC_INTREQ24_IREQUEST768_SIZE 1
+#define INTC_INTPR25                 0x64
+# define INTC_INTPR25_INTLEV_OFFSET  30
+# define INTC_INTPR25_INTLEV_SIZE    2
+# define INTC_INTPR25_OFFSET_OFFSET  0
+# define INTC_INTPR25_OFFSET_SIZE    24
+#define INTC_INTREQ25                0x164
+# define INTC_INTREQ25_IREQUEST800_OFFSET 0
+# define INTC_INTREQ25_IREQUEST800_SIZE 1
+#define INTC_INTPR26                 0x68
+# define INTC_INTPR26_INTLEV_OFFSET  30
+# define INTC_INTPR26_INTLEV_SIZE    2
+# define INTC_INTPR26_OFFSET_OFFSET  0
+# define INTC_INTPR26_OFFSET_SIZE    24
+#define INTC_INTREQ26                0x168
+# define INTC_INTREQ26_IREQUEST832_OFFSET 0
+# define INTC_INTREQ26_IREQUEST832_SIZE 1
+#define INTC_INTPR27                 0x6c
+# define INTC_INTPR27_INTLEV_OFFSET  30
+# define INTC_INTPR27_INTLEV_SIZE    2
+# define INTC_INTPR27_OFFSET_OFFSET  0
+# define INTC_INTPR27_OFFSET_SIZE    24
+#define INTC_INTREQ27                0x16c
+# define INTC_INTREQ27_IREQUEST864_OFFSET 0
+# define INTC_INTREQ27_IREQUEST864_SIZE 1
+#define INTC_INTPR28                 0x70
+# define INTC_INTPR28_INTLEV_OFFSET  30
+# define INTC_INTPR28_INTLEV_SIZE    2
+# define INTC_INTPR28_OFFSET_OFFSET  0
+# define INTC_INTPR28_OFFSET_SIZE    24
+#define INTC_INTREQ28                0x170
+# define INTC_INTREQ28_IREQUEST896_OFFSET 0
+# define INTC_INTREQ28_IREQUEST896_SIZE 1
+#define INTC_INTPR29                 0x74
+# define INTC_INTPR29_INTLEV_OFFSET  30
+# define INTC_INTPR29_INTLEV_SIZE    2
+# define INTC_INTPR29_OFFSET_OFFSET  0
+# define INTC_INTPR29_OFFSET_SIZE    24
+#define INTC_INTREQ29                0x174
+# define INTC_INTREQ29_IREQUEST928_OFFSET 0
+# define INTC_INTREQ29_IREQUEST928_SIZE 1
+#define INTC_INTPR30                 0x78
+# define INTC_INTPR30_INTLEV_OFFSET  30
+# define INTC_INTPR30_INTLEV_SIZE    2
+# define INTC_INTPR30_OFFSET_OFFSET  0
+# define INTC_INTPR30_OFFSET_SIZE    24
+#define INTC_INTREQ30                0x178
+# define INTC_INTREQ30_IREQUEST960_OFFSET 0
+# define INTC_INTREQ30_IREQUEST960_SIZE 1
+#define INTC_INTPR31                 0x7c
+# define INTC_INTPR31_INTLEV_OFFSET  30
+# define INTC_INTPR31_INTLEV_SIZE    2
+# define INTC_INTPR31_OFFSET_OFFSET  0
+# define INTC_INTPR31_OFFSET_SIZE    24
+#define INTC_INTREQ31                0x17c
+# define INTC_INTREQ31_IREQUEST992_OFFSET 0
+# define INTC_INTREQ31_IREQUEST992_SIZE 1
+#define INTC_INTPR32                 0x80
+# define INTC_INTPR32_INTLEV_OFFSET  30
+# define INTC_INTPR32_INTLEV_SIZE    2
+# define INTC_INTPR32_OFFSET_OFFSET  0
+# define INTC_INTPR32_OFFSET_SIZE    24
+#define INTC_INTREQ32                0x180
+# define INTC_INTREQ32_IREQUEST1024_OFFSET 0
+# define INTC_INTREQ32_IREQUEST1024_SIZE 1
+#define INTC_INTCAUSE0               0x20c
+# define INTC_INTCAUSE0_CAUSEGRP_OFFSET 0
+# define INTC_INTCAUSE0_CAUSEGRP_SIZE 6
+#define INTC_INTCAUSE1               0x208
+# define INTC_INTCAUSE1_CAUSEGRP_OFFSET 0
+# define INTC_INTCAUSE1_CAUSEGRP_SIZE 6
+#define INTC_INTCAUSE2               0x204
+# define INTC_INTCAUSE2_CAUSEGRP_OFFSET 0
+# define INTC_INTCAUSE2_CAUSEGRP_SIZE 6
+#define INTC_INTCAUSE3               0x200
+# define INTC_INTCAUSE3_CAUSEGRP_OFFSET 0
+# define INTC_INTCAUSE3_CAUSEGRP_SIZE 6
+
+#define INTC_BIT(name)               (1 << INTC_##name##_OFFSET)
+#define INTC_MKBF(name, value)       (((value) & ((1 << INTC_##name##_SIZE) - 1)) << INTC_##name##_OFFSET)
+#define INTC_GETBF(name, value)      (((value) >> INTC_##name##_OFFSET) & ((1 << INTC_##name##_SIZE) - 1))
+
+#define intc_readl(port,reg)         readl((port)->regs + INTC_##reg)
+#define intc_writel(port,reg,value)  writel((value), (port)->regs + INTC_##reg)
+
+#endif /* __ASM_AVR32_PERIHP_INTC_H__ */
diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
new file mode 100644
index 000000000000..d3aabfca8598
--- /dev/null
+++ b/arch/avr32/mach-at32ap/pio.c
@@ -0,0 +1,118 @@
+/*
+ * Atmel PIO2 Port Multiplexer support
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+
+#include <asm/arch/portmux.h>
+
+#include "pio.h"
+
+#define MAX_NR_PIO_DEVICES		8
+
+struct pio_device {
+	void __iomem *regs;
+	const struct platform_device *pdev;
+	struct clk *clk;
+	u32 alloc_mask;
+	char name[32];
+};
+
+static struct pio_device pio_dev[MAX_NR_PIO_DEVICES];
+
+void portmux_set_func(unsigned int portmux_id, unsigned int pin_id,
+		      unsigned int function_id)
+{
+	struct pio_device *pio;
+	u32 mask = 1 << pin_id;
+
+	BUG_ON(portmux_id >= MAX_NR_PIO_DEVICES);
+
+	pio = &pio_dev[portmux_id];
+
+	if (function_id)
+		pio_writel(pio, BSR, mask);
+	else
+		pio_writel(pio, ASR, mask);
+	pio_writel(pio, PDR, mask);
+}
+
+static int __init pio_probe(struct platform_device *pdev)
+{
+	struct pio_device *pio = NULL;
+
+	BUG_ON(pdev->id >= MAX_NR_PIO_DEVICES);
+	pio = &pio_dev[pdev->id];
+	BUG_ON(!pio->regs);
+
+	/* TODO: Interrupts */
+
+	platform_set_drvdata(pdev, pio);
+
+	printk(KERN_INFO "%s: Atmel Port Multiplexer at 0x%p (irq %d)\n",
+	       pio->name, pio->regs, platform_get_irq(pdev, 0));
+
+	return 0;
+}
+
+static struct platform_driver pio_driver = {
+	.probe		= pio_probe,
+	.driver		= {
+		.name		= "pio",
+	},
+};
+
+static int __init pio_init(void)
+{
+	return platform_driver_register(&pio_driver);
+}
+subsys_initcall(pio_init);
+
+void __init at32_init_pio(struct platform_device *pdev)
+{
+	struct resource *regs;
+	struct pio_device *pio;
+
+	if (pdev->id > MAX_NR_PIO_DEVICES) {
+		dev_err(&pdev->dev, "only %d PIO devices supported\n",
+			MAX_NR_PIO_DEVICES);
+		return;
+	}
+
+	pio = &pio_dev[pdev->id];
+	snprintf(pio->name, sizeof(pio->name), "pio%d", pdev->id);
+
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!regs) {
+		dev_err(&pdev->dev, "no mmio resource defined\n");
+		return;
+	}
+
+	pio->clk = clk_get(&pdev->dev, "mck");
+	if (IS_ERR(pio->clk))
+		/*
+		 * This is a fatal error, but if we continue we might
+		 * be so lucky that we manage to initialize the
+		 * console and display this message...
+		 */
+		dev_err(&pdev->dev, "no mck clock defined\n");
+	else
+		clk_enable(pio->clk);
+
+	pio->pdev = pdev;
+	pio->regs = ioremap(regs->start, regs->end - regs->start + 1);
+
+	pio_writel(pio, ODR, ~0UL);
+	pio_writel(pio, PER, ~0UL);
+}
diff --git a/arch/avr32/mach-at32ap/pio.h b/arch/avr32/mach-at32ap/pio.h
new file mode 100644
index 000000000000..cfea12351599
--- /dev/null
+++ b/arch/avr32/mach-at32ap/pio.h
@@ -0,0 +1,178 @@
+/*
+ * Atmel PIO2 Port Multiplexer support
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ARCH_AVR32_AT32AP_PIO_H__
+#define __ARCH_AVR32_AT32AP_PIO_H__
+
+/* PIO register offsets */
+#define PIO_PER                                0x0000
+#define PIO_PDR                                0x0004
+#define PIO_PSR                                0x0008
+#define PIO_OER                                0x0010
+#define PIO_ODR                                0x0014
+#define PIO_OSR                                0x0018
+#define PIO_IFER                               0x0020
+#define PIO_IFDR                               0x0024
+#define PIO_ISFR                               0x0028
+#define PIO_SODR                               0x0030
+#define PIO_CODR                               0x0034
+#define PIO_ODSR                               0x0038
+#define PIO_PDSR                               0x003c
+#define PIO_IER                                0x0040
+#define PIO_IDR                                0x0044
+#define PIO_IMR                                0x0048
+#define PIO_ISR                                0x004c
+#define PIO_MDER                               0x0050
+#define PIO_MDDR                               0x0054
+#define PIO_MDSR                               0x0058
+#define PIO_PUDR                               0x0060
+#define PIO_PUER                               0x0064
+#define PIO_PUSR                               0x0068
+#define PIO_ASR                                0x0070
+#define PIO_BSR                                0x0074
+#define PIO_ABSR                               0x0078
+#define PIO_OWER                               0x00a0
+#define PIO_OWDR                               0x00a4
+#define PIO_OWSR                               0x00a8
+
+/* Bitfields in PER */
+
+/* Bitfields in PDR */
+
+/* Bitfields in PSR */
+
+/* Bitfields in OER */
+
+/* Bitfields in ODR */
+
+/* Bitfields in OSR */
+
+/* Bitfields in IFER */
+
+/* Bitfields in IFDR */
+
+/* Bitfields in ISFR */
+
+/* Bitfields in SODR */
+
+/* Bitfields in CODR */
+
+/* Bitfields in ODSR */
+
+/* Bitfields in PDSR */
+
+/* Bitfields in IER */
+
+/* Bitfields in IDR */
+
+/* Bitfields in IMR */
+
+/* Bitfields in ISR */
+
+/* Bitfields in MDER */
+
+/* Bitfields in MDDR */
+
+/* Bitfields in MDSR */
+
+/* Bitfields in PUDR */
+
+/* Bitfields in PUER */
+
+/* Bitfields in PUSR */
+
+/* Bitfields in ASR */
+
+/* Bitfields in BSR */
+
+/* Bitfields in ABSR */
+#define PIO_P0_OFFSET                          0
+#define PIO_P0_SIZE                            1
+#define PIO_P1_OFFSET                          1
+#define PIO_P1_SIZE                            1
+#define PIO_P2_OFFSET                          2
+#define PIO_P2_SIZE                            1
+#define PIO_P3_OFFSET                          3
+#define PIO_P3_SIZE                            1
+#define PIO_P4_OFFSET                          4
+#define PIO_P4_SIZE                            1
+#define PIO_P5_OFFSET                          5
+#define PIO_P5_SIZE                            1
+#define PIO_P6_OFFSET                          6
+#define PIO_P6_SIZE                            1
+#define PIO_P7_OFFSET                          7
+#define PIO_P7_SIZE                            1
+#define PIO_P8_OFFSET                          8
+#define PIO_P8_SIZE                            1
+#define PIO_P9_OFFSET                          9
+#define PIO_P9_SIZE                            1
+#define PIO_P10_OFFSET                         10
+#define PIO_P10_SIZE                           1
+#define PIO_P11_OFFSET                         11
+#define PIO_P11_SIZE                           1
+#define PIO_P12_OFFSET                         12
+#define PIO_P12_SIZE                           1
+#define PIO_P13_OFFSET                         13
+#define PIO_P13_SIZE                           1
+#define PIO_P14_OFFSET                         14
+#define PIO_P14_SIZE                           1
+#define PIO_P15_OFFSET                         15
+#define PIO_P15_SIZE                           1
+#define PIO_P16_OFFSET                         16
+#define PIO_P16_SIZE                           1
+#define PIO_P17_OFFSET                         17
+#define PIO_P17_SIZE                           1
+#define PIO_P18_OFFSET                         18
+#define PIO_P18_SIZE                           1
+#define PIO_P19_OFFSET                         19
+#define PIO_P19_SIZE                           1
+#define PIO_P20_OFFSET                         20
+#define PIO_P20_SIZE                           1
+#define PIO_P21_OFFSET                         21
+#define PIO_P21_SIZE                           1
+#define PIO_P22_OFFSET                         22
+#define PIO_P22_SIZE                           1
+#define PIO_P23_OFFSET                         23
+#define PIO_P23_SIZE                           1
+#define PIO_P24_OFFSET                         24
+#define PIO_P24_SIZE                           1
+#define PIO_P25_OFFSET                         25
+#define PIO_P25_SIZE                           1
+#define PIO_P26_OFFSET                         26
+#define PIO_P26_SIZE                           1
+#define PIO_P27_OFFSET                         27
+#define PIO_P27_SIZE                           1
+#define PIO_P28_OFFSET                         28
+#define PIO_P28_SIZE                           1
+#define PIO_P29_OFFSET                         29
+#define PIO_P29_SIZE                           1
+#define PIO_P30_OFFSET                         30
+#define PIO_P30_SIZE                           1
+#define PIO_P31_OFFSET                         31
+#define PIO_P31_SIZE                           1
+
+/* Bitfields in OWER */
+
+/* Bitfields in OWDR */
+
+/* Bitfields in OWSR */
+
+/* Bit manipulation macros */
+#define PIO_BIT(name)                          (1 << PIO_##name##_OFFSET)
+#define PIO_BF(name,value)                     (((value) & ((1 << PIO_##name##_SIZE) - 1)) << PIO_##name##_OFFSET)
+#define PIO_BFEXT(name,value)                  (((value) >> PIO_##name##_OFFSET) & ((1 << PIO_##name##_SIZE) - 1))
+#define PIO_BFINS(name,value,old)              (((old) & ~(((1 << PIO_##name##_SIZE) - 1) << PIO_##name##_OFFSET)) | PIO_BF(name,value))
+
+/* Register access macros */
+#define pio_readl(port,reg)                    readl((port)->regs + PIO_##reg)
+#define pio_writel(port,reg,value)             writel((value), (port)->regs + PIO_##reg)
+
+void at32_init_pio(struct platform_device *pdev);
+
+#endif /* __ARCH_AVR32_AT32AP_PIO_H__ */
diff --git a/arch/avr32/mach-at32ap/sm.c b/arch/avr32/mach-at32ap/sm.c
new file mode 100644
index 000000000000..03306eb0345e
--- /dev/null
+++ b/arch/avr32/mach-at32ap/sm.c
@@ -0,0 +1,289 @@
+/*
+ * System Manager driver for AT32AP CPUs
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/random.h>
+#include <linux/spinlock.h>
+
+#include <asm/intc.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#include <asm/arch/sm.h>
+
+#include "sm.h"
+
+#define SM_EIM_IRQ_RESOURCE	1
+#define SM_PM_IRQ_RESOURCE	2
+#define SM_RTC_IRQ_RESOURCE	3
+
+#define to_eim(irqc) container_of(irqc, struct at32_sm, irqc)
+
+struct at32_sm system_manager;
+
+int __init at32_sm_init(void)
+{
+	struct resource *regs;
+	struct at32_sm *sm = &system_manager;
+	int ret = -ENXIO;
+
+	regs = platform_get_resource(&at32_sm_device, IORESOURCE_MEM, 0);
+	if (!regs)
+		goto fail;
+
+	spin_lock_init(&sm->lock);
+	sm->pdev = &at32_sm_device;
+
+	ret = -ENOMEM;
+	sm->regs = ioremap(regs->start, regs->end - regs->start + 1);
+	if (!sm->regs)
+		goto fail;
+
+	return 0;
+
+fail:
+	printk(KERN_ERR "Failed to initialize System Manager: %d\n", ret);
+	return ret;
+}
+
+/*
+ * External Interrupt Module (EIM).
+ *
+ * EIM gets level- or edge-triggered interrupts of either polarity
+ * from the outside and converts it to active-high level-triggered
+ * interrupts that the internal interrupt controller can handle. EIM
+ * also provides masking/unmasking of interrupts, as well as
+ * acknowledging of edge-triggered interrupts.
+ */
+
+static irqreturn_t spurious_eim_interrupt(int irq, void *dev_id,
+					  struct pt_regs *regs)
+{
+	printk(KERN_WARNING "Spurious EIM interrupt %d\n", irq);
+	disable_irq(irq);
+	return IRQ_NONE;
+}
+
+static struct irqaction eim_spurious_action = {
+	.handler = spurious_eim_interrupt,
+};
+
+static irqreturn_t eim_handle_irq(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct irq_controller * irqc = dev_id;
+	struct at32_sm *sm = to_eim(irqc);
+	unsigned long pending;
+
+	/*
+	 * No need to disable interrupts globally.  The interrupt
+	 * level relevant to this group must be masked all the time,
+	 * so we know that this particular EIM instance will not be
+	 * re-entered.
+	 */
+	spin_lock(&sm->lock);
+
+	pending = intc_get_pending(sm->irqc.irq_group);
+	if (unlikely(!pending)) {
+		printk(KERN_ERR "EIM (group %u): No interrupts pending!\n",
+		       sm->irqc.irq_group);
+		goto unlock;
+	}
+
+	do {
+		struct irqaction *action;
+		unsigned int i;
+
+		i = fls(pending) - 1;
+		pending &= ~(1 << i);
+		action = sm->action[i];
+
+		/* Acknowledge the interrupt */
+		sm_writel(sm, EIM_ICR, 1 << i);
+
+		spin_unlock(&sm->lock);
+
+		if (action->flags & SA_INTERRUPT)
+			local_irq_disable();
+		action->handler(sm->irqc.first_irq + i, action->dev_id, regs);
+		local_irq_enable();
+		spin_lock(&sm->lock);
+		if (action->flags & SA_SAMPLE_RANDOM)
+			add_interrupt_randomness(sm->irqc.first_irq + i);
+	} while (pending);
+
+unlock:
+	spin_unlock(&sm->lock);
+	return IRQ_HANDLED;
+}
+
+static void eim_mask(struct irq_controller *irqc, unsigned int irq)
+{
+	struct at32_sm *sm = to_eim(irqc);
+	unsigned int i;
+
+	i = irq - sm->irqc.first_irq;
+	sm_writel(sm, EIM_IDR, 1 << i);
+}
+
+static void eim_unmask(struct irq_controller *irqc, unsigned int irq)
+{
+	struct at32_sm *sm = to_eim(irqc);
+	unsigned int i;
+
+	i = irq - sm->irqc.first_irq;
+	sm_writel(sm, EIM_IER, 1 << i);
+}
+
+static int eim_setup(struct irq_controller *irqc, unsigned int irq,
+		struct irqaction *action)
+{
+	struct at32_sm *sm = to_eim(irqc);
+	sm->action[irq - sm->irqc.first_irq] = action;
+	/* Acknowledge earlier interrupts */
+	sm_writel(sm, EIM_ICR, (1<<(irq - sm->irqc.first_irq)));
+	eim_unmask(irqc, irq);
+	return 0;
+}
+
+static void eim_free(struct irq_controller *irqc, unsigned int irq,
+		void *dev)
+{
+	struct at32_sm *sm = to_eim(irqc);
+	eim_mask(irqc, irq);
+	sm->action[irq - sm->irqc.first_irq] = &eim_spurious_action;
+}
+
+static int eim_set_type(struct irq_controller *irqc, unsigned int irq,
+			unsigned int type)
+{
+	struct at32_sm *sm = to_eim(irqc);
+	unsigned long flags;
+	u32 value, pattern;
+
+	spin_lock_irqsave(&sm->lock, flags);
+
+	pattern = 1 << (irq - sm->irqc.first_irq);
+
+	value = sm_readl(sm, EIM_MODE);
+	if (type & IRQ_TYPE_LEVEL)
+		value |= pattern;
+	else
+		value &= ~pattern;
+	sm_writel(sm, EIM_MODE, value);
+	value = sm_readl(sm, EIM_EDGE);
+	if (type & IRQ_EDGE_RISING)
+		value |= pattern;
+	else
+		value &= ~pattern;
+	sm_writel(sm, EIM_EDGE, value);
+	value = sm_readl(sm, EIM_LEVEL);
+	if (type & IRQ_LEVEL_HIGH)
+		value |= pattern;
+	else
+		value &= ~pattern;
+	sm_writel(sm, EIM_LEVEL, value);
+
+	spin_unlock_irqrestore(&sm->lock, flags);
+
+	return 0;
+}
+
+static unsigned int eim_get_type(struct irq_controller *irqc,
+				 unsigned int irq)
+{
+	struct at32_sm *sm = to_eim(irqc);
+	unsigned long flags;
+	unsigned int type = 0;
+	u32 mode, edge, level, pattern;
+
+	pattern = 1 << (irq - sm->irqc.first_irq);
+
+	spin_lock_irqsave(&sm->lock, flags);
+	mode = sm_readl(sm, EIM_MODE);
+	edge = sm_readl(sm, EIM_EDGE);
+	level = sm_readl(sm, EIM_LEVEL);
+	spin_unlock_irqrestore(&sm->lock, flags);
+
+	if (mode & pattern)
+		type |= IRQ_TYPE_LEVEL;
+	if (edge & pattern)
+		type |= IRQ_EDGE_RISING;
+	if (level & pattern)
+		type |= IRQ_LEVEL_HIGH;
+
+	return type;
+}
+
+static struct irq_controller_class eim_irq_class = {
+	.typename	= "EIM",
+	.handle		= eim_handle_irq,
+	.setup		= eim_setup,
+	.free		= eim_free,
+	.mask		= eim_mask,
+	.unmask		= eim_unmask,
+	.set_type	= eim_set_type,
+	.get_type	= eim_get_type,
+};
+
+static int __init eim_init(void)
+{
+	struct at32_sm *sm = &system_manager;
+	unsigned int i;
+	u32 pattern;
+	int ret;
+
+	/*
+	 * The EIM is really the same module as SM, so register
+	 * mapping, etc. has been taken care of already.
+	 */
+
+	/*
+	 * Find out how many interrupt lines that are actually
+	 * implemented in hardware.
+	 */
+	sm_writel(sm, EIM_IDR, ~0UL);
+	sm_writel(sm, EIM_MODE, ~0UL);
+	pattern = sm_readl(sm, EIM_MODE);
+	sm->irqc.nr_irqs = fls(pattern);
+
+	ret = -ENOMEM;
+	sm->action = kmalloc(sizeof(*sm->action) * sm->irqc.nr_irqs,
+			     GFP_KERNEL);
+	if (!sm->action)
+		goto out;
+
+	for (i = 0; i < sm->irqc.nr_irqs; i++)
+		sm->action[i] = &eim_spurious_action;
+
+	spin_lock_init(&sm->lock);
+	sm->irqc.irq_group = sm->pdev->resource[SM_EIM_IRQ_RESOURCE].start;
+	sm->irqc.class = &eim_irq_class;
+
+	ret = intc_register_controller(&sm->irqc);
+	if (ret < 0)
+		goto out_free_actions;
+
+	printk("EIM: External Interrupt Module at 0x%p, IRQ group %u\n",
+	       sm->regs, sm->irqc.irq_group);
+	printk("EIM: Handling %u external IRQs, starting with IRQ%u\n",
+	       sm->irqc.nr_irqs, sm->irqc.first_irq);
+
+	return 0;
+
+out_free_actions:
+	kfree(sm->action);
+out:
+	return ret;
+}
+arch_initcall(eim_init);
diff --git a/arch/avr32/mach-at32ap/sm.h b/arch/avr32/mach-at32ap/sm.h
new file mode 100644
index 000000000000..27565822ae2a
--- /dev/null
+++ b/arch/avr32/mach-at32ap/sm.h
@@ -0,0 +1,240 @@
+/*
+ * Register definitions for SM
+ *
+ * System Manager
+ */
+#ifndef __ASM_AVR32_SM_H__
+#define __ASM_AVR32_SM_H__
+
+/* SM register offsets */
+#define SM_PM_MCCTRL                            0x0000
+#define SM_PM_CKSEL                             0x0004
+#define SM_PM_CPU_MASK                          0x0008
+#define SM_PM_HSB_MASK                          0x000c
+#define SM_PM_PBA_MASK                         0x0010
+#define SM_PM_PBB_MASK                         0x0014
+#define SM_PM_PLL0                              0x0020
+#define SM_PM_PLL1                              0x0024
+#define SM_PM_VCTRL                             0x0030
+#define SM_PM_VMREF                             0x0034
+#define SM_PM_VMV                               0x0038
+#define SM_PM_IER                               0x0040
+#define SM_PM_IDR                               0x0044
+#define SM_PM_IMR                               0x0048
+#define SM_PM_ISR                               0x004c
+#define SM_PM_ICR                               0x0050
+#define SM_PM_GCCTRL                            0x0060
+#define SM_RTC_CTRL                             0x0080
+#define SM_RTC_VAL                              0x0084
+#define SM_RTC_TOP                              0x0088
+#define SM_RTC_IER                              0x0090
+#define SM_RTC_IDR                              0x0094
+#define SM_RTC_IMR                              0x0098
+#define SM_RTC_ISR                              0x009c
+#define SM_RTC_ICR                              0x00a0
+#define SM_WDT_CTRL                             0x00b0
+#define SM_WDT_CLR                              0x00b4
+#define SM_WDT_EXT                              0x00b8
+#define SM_RC_RCAUSE                            0x00c0
+#define SM_EIM_IER                              0x0100
+#define SM_EIM_IDR                              0x0104
+#define SM_EIM_IMR                              0x0108
+#define SM_EIM_ISR                              0x010c
+#define SM_EIM_ICR                              0x0110
+#define SM_EIM_MODE                             0x0114
+#define SM_EIM_EDGE                             0x0118
+#define SM_EIM_LEVEL                            0x011c
+#define SM_EIM_TEST                             0x0120
+#define SM_EIM_NMIC                             0x0124
+
+/* Bitfields in PM_MCCTRL */
+
+/* Bitfields in PM_CKSEL */
+#define SM_CPUSEL_OFFSET                        0
+#define SM_CPUSEL_SIZE                          3
+#define SM_CPUDIV_OFFSET                        7
+#define SM_CPUDIV_SIZE                          1
+#define SM_HSBSEL_OFFSET                        8
+#define SM_HSBSEL_SIZE                          3
+#define SM_HSBDIV_OFFSET                        15
+#define SM_HSBDIV_SIZE                          1
+#define SM_PBASEL_OFFSET                       16
+#define SM_PBASEL_SIZE                         3
+#define SM_PBADIV_OFFSET                       23
+#define SM_PBADIV_SIZE                         1
+#define SM_PBBSEL_OFFSET                       24
+#define SM_PBBSEL_SIZE                         3
+#define SM_PBBDIV_OFFSET                       31
+#define SM_PBBDIV_SIZE                         1
+
+/* Bitfields in PM_CPU_MASK */
+
+/* Bitfields in PM_HSB_MASK */
+
+/* Bitfields in PM_PBA_MASK */
+
+/* Bitfields in PM_PBB_MASK */
+
+/* Bitfields in PM_PLL0 */
+#define SM_PLLEN_OFFSET                         0
+#define SM_PLLEN_SIZE                           1
+#define SM_PLLOSC_OFFSET                        1
+#define SM_PLLOSC_SIZE                          1
+#define SM_PLLOPT_OFFSET                        2
+#define SM_PLLOPT_SIZE                          3
+#define SM_PLLDIV_OFFSET                        8
+#define SM_PLLDIV_SIZE                          8
+#define SM_PLLMUL_OFFSET                        16
+#define SM_PLLMUL_SIZE                          8
+#define SM_PLLCOUNT_OFFSET                      24
+#define SM_PLLCOUNT_SIZE                        6
+#define SM_PLLTEST_OFFSET                       31
+#define SM_PLLTEST_SIZE                         1
+
+/* Bitfields in PM_PLL1 */
+
+/* Bitfields in PM_VCTRL */
+#define SM_VAUTO_OFFSET                         0
+#define SM_VAUTO_SIZE                           1
+#define SM_PM_VCTRL_VAL_OFFSET                  8
+#define SM_PM_VCTRL_VAL_SIZE                    7
+
+/* Bitfields in PM_VMREF */
+#define SM_REFSEL_OFFSET                        0
+#define SM_REFSEL_SIZE                          4
+
+/* Bitfields in PM_VMV */
+#define SM_PM_VMV_VAL_OFFSET                    0
+#define SM_PM_VMV_VAL_SIZE                      8
+
+/* Bitfields in PM_IER */
+
+/* Bitfields in PM_IDR */
+
+/* Bitfields in PM_IMR */
+
+/* Bitfields in PM_ISR */
+
+/* Bitfields in PM_ICR */
+#define SM_LOCK0_OFFSET                         0
+#define SM_LOCK0_SIZE                           1
+#define SM_LOCK1_OFFSET                         1
+#define SM_LOCK1_SIZE                           1
+#define SM_WAKE_OFFSET                          2
+#define SM_WAKE_SIZE                            1
+#define SM_VOK_OFFSET                           3
+#define SM_VOK_SIZE                             1
+#define SM_VMRDY_OFFSET                         4
+#define SM_VMRDY_SIZE                           1
+#define SM_CKRDY_OFFSET                         5
+#define SM_CKRDY_SIZE                           1
+
+/* Bitfields in PM_GCCTRL */
+#define SM_OSCSEL_OFFSET                        0
+#define SM_OSCSEL_SIZE                          1
+#define SM_PLLSEL_OFFSET                        1
+#define SM_PLLSEL_SIZE                          1
+#define SM_CEN_OFFSET                           2
+#define SM_CEN_SIZE                             1
+#define SM_CPC_OFFSET                           3
+#define SM_CPC_SIZE                             1
+#define SM_DIVEN_OFFSET                         4
+#define SM_DIVEN_SIZE                           1
+#define SM_DIV_OFFSET                           8
+#define SM_DIV_SIZE                             8
+
+/* Bitfields in RTC_CTRL */
+#define SM_PCLR_OFFSET                          1
+#define SM_PCLR_SIZE                            1
+#define SM_TOPEN_OFFSET                         2
+#define SM_TOPEN_SIZE                           1
+#define SM_CLKEN_OFFSET                         3
+#define SM_CLKEN_SIZE                           1
+#define SM_PSEL_OFFSET                          8
+#define SM_PSEL_SIZE                            16
+
+/* Bitfields in RTC_VAL */
+#define SM_RTC_VAL_VAL_OFFSET                   0
+#define SM_RTC_VAL_VAL_SIZE                     31
+
+/* Bitfields in RTC_TOP */
+#define SM_RTC_TOP_VAL_OFFSET                   0
+#define SM_RTC_TOP_VAL_SIZE                     32
+
+/* Bitfields in RTC_IER */
+
+/* Bitfields in RTC_IDR */
+
+/* Bitfields in RTC_IMR */
+
+/* Bitfields in RTC_ISR */
+
+/* Bitfields in RTC_ICR */
+#define SM_TOPI_OFFSET                          0
+#define SM_TOPI_SIZE                            1
+
+/* Bitfields in WDT_CTRL */
+#define SM_KEY_OFFSET                           24
+#define SM_KEY_SIZE                             8
+
+/* Bitfields in WDT_CLR */
+
+/* Bitfields in WDT_EXT */
+
+/* Bitfields in RC_RCAUSE */
+#define SM_POR_OFFSET                           0
+#define SM_POR_SIZE                             1
+#define SM_BOD_OFFSET                           1
+#define SM_BOD_SIZE                             1
+#define SM_EXT_OFFSET                           2
+#define SM_EXT_SIZE                             1
+#define SM_WDT_OFFSET                           3
+#define SM_WDT_SIZE                             1
+#define SM_NTAE_OFFSET                          4
+#define SM_NTAE_SIZE                            1
+#define SM_SERP_OFFSET                          5
+#define SM_SERP_SIZE                            1
+
+/* Bitfields in EIM_IER */
+
+/* Bitfields in EIM_IDR */
+
+/* Bitfields in EIM_IMR */
+
+/* Bitfields in EIM_ISR */
+
+/* Bitfields in EIM_ICR */
+
+/* Bitfields in EIM_MODE */
+
+/* Bitfields in EIM_EDGE */
+#define SM_INT0_OFFSET                          0
+#define SM_INT0_SIZE                            1
+#define SM_INT1_OFFSET                          1
+#define SM_INT1_SIZE                            1
+#define SM_INT2_OFFSET                          2
+#define SM_INT2_SIZE                            1
+#define SM_INT3_OFFSET                          3
+#define SM_INT3_SIZE                            1
+
+/* Bitfields in EIM_LEVEL */
+
+/* Bitfields in EIM_TEST */
+#define SM_TESTEN_OFFSET                        31
+#define SM_TESTEN_SIZE                          1
+
+/* Bitfields in EIM_NMIC */
+#define SM_EN_OFFSET                            0
+#define SM_EN_SIZE                              1
+
+/* Bit manipulation macros */
+#define SM_BIT(name)                            (1 << SM_##name##_OFFSET)
+#define SM_BF(name,value)                       (((value) & ((1 << SM_##name##_SIZE) - 1)) << SM_##name##_OFFSET)
+#define SM_BFEXT(name,value)                    (((value) >> SM_##name##_OFFSET) & ((1 << SM_##name##_SIZE) - 1))
+#define SM_BFINS(name,value,old)                (((old) & ~(((1 << SM_##name##_SIZE) - 1) << SM_##name##_OFFSET)) | SM_BF(name,value))
+
+/* Register access macros */
+#define sm_readl(port,reg)                      readl((port)->regs + SM_##reg)
+#define sm_writel(port,reg,value)               writel((value), (port)->regs + SM_##reg)
+
+#endif /* __ASM_AVR32_SM_H__ */
diff --git a/arch/avr32/mm/Makefile b/arch/avr32/mm/Makefile
new file mode 100644
index 000000000000..0066491f90d4
--- /dev/null
+++ b/arch/avr32/mm/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for the Linux/AVR32 kernel.
+#
+
+obj-y				+= init.o clear_page.o copy_page.o dma-coherent.o
+obj-y				+= ioremap.o cache.o fault.o tlb.o
diff --git a/arch/avr32/mm/cache.c b/arch/avr32/mm/cache.c
new file mode 100644
index 000000000000..450515b245a0
--- /dev/null
+++ b/arch/avr32/mm/cache.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/highmem.h>
+#include <linux/unistd.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cachectl.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+
+/*
+ * If you attempt to flush anything more than this, you need superuser
+ * privileges.  The value is completely arbitrary.
+ */
+#define CACHEFLUSH_MAX_LEN	1024
+
+void invalidate_dcache_region(void *start, size_t size)
+{
+	unsigned long v, begin, end, linesz;
+
+	linesz = boot_cpu_data.dcache.linesz;
+
+	//printk("invalidate dcache: %p + %u\n", start, size);
+
+	/* You asked for it, you got it */
+	begin = (unsigned long)start & ~(linesz - 1);
+	end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+
+	for (v = begin; v < end; v += linesz)
+		invalidate_dcache_line((void *)v);
+}
+
+void clean_dcache_region(void *start, size_t size)
+{
+	unsigned long v, begin, end, linesz;
+
+	linesz = boot_cpu_data.dcache.linesz;
+	begin = (unsigned long)start & ~(linesz - 1);
+	end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+
+	for (v = begin; v < end; v += linesz)
+		clean_dcache_line((void *)v);
+	flush_write_buffer();
+}
+
+void flush_dcache_region(void *start, size_t size)
+{
+	unsigned long v, begin, end, linesz;
+
+	linesz = boot_cpu_data.dcache.linesz;
+	begin = (unsigned long)start & ~(linesz - 1);
+	end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+
+	for (v = begin; v < end; v += linesz)
+		flush_dcache_line((void *)v);
+	flush_write_buffer();
+}
+
+void invalidate_icache_region(void *start, size_t size)
+{
+	unsigned long v, begin, end, linesz;
+
+	linesz = boot_cpu_data.icache.linesz;
+	begin = (unsigned long)start & ~(linesz - 1);
+	end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+
+	for (v = begin; v < end; v += linesz)
+		invalidate_icache_line((void *)v);
+}
+
+static inline void __flush_icache_range(unsigned long start, unsigned long end)
+{
+	unsigned long v, linesz;
+
+	linesz = boot_cpu_data.dcache.linesz;
+	for (v = start; v < end; v += linesz) {
+		clean_dcache_line((void *)v);
+		invalidate_icache_line((void *)v);
+	}
+
+	flush_write_buffer();
+}
+
+/*
+ * This one is called after a module has been loaded.
+ */
+void flush_icache_range(unsigned long start, unsigned long end)
+{
+	unsigned long linesz;
+
+	linesz = boot_cpu_data.dcache.linesz;
+	__flush_icache_range(start & ~(linesz - 1),
+			     (end + linesz - 1) & ~(linesz - 1));
+}
+
+/*
+ * This one is called from do_no_page(), do_swap_page() and install_page().
+ */
+void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+	if (vma->vm_flags & VM_EXEC) {
+		void *v = kmap(page);
+		__flush_icache_range((unsigned long)v, (unsigned long)v + PAGE_SIZE);
+		kunmap(v);
+	}
+}
+
+/*
+ * This one is used by copy_to_user_page()
+ */
+void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+			     unsigned long addr, int len)
+{
+	if (vma->vm_flags & VM_EXEC)
+		flush_icache_range(addr, addr + len);
+}
+
+asmlinkage int sys_cacheflush(int operation, void __user *addr, size_t len)
+{
+	int ret;
+
+	if (len > CACHEFLUSH_MAX_LEN) {
+		ret = -EPERM;
+		if (!capable(CAP_SYS_ADMIN))
+			goto out;
+	}
+
+	ret = -EFAULT;
+	if (!access_ok(VERIFY_WRITE, addr, len))
+		goto out;
+
+	switch (operation) {
+	case CACHE_IFLUSH:
+		flush_icache_range((unsigned long)addr,
+				   (unsigned long)addr + len);
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+out:
+	return ret;
+}
diff --git a/arch/avr32/mm/clear_page.S b/arch/avr32/mm/clear_page.S
new file mode 100644
index 000000000000..5d70dca00699
--- /dev/null
+++ b/arch/avr32/mm/clear_page.S
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+/*
+ * clear_page
+ * r12: P1 address (to)
+ */
+	.text
+	.global clear_page
+clear_page:
+	sub	r9, r12, -PAGE_SIZE
+	mov     r10, 0
+	mov	r11, 0
+0:      st.d    r12++, r10
+	cp      r12, r9
+	brne	0b
+	mov     pc, lr
diff --git a/arch/avr32/mm/copy_page.S b/arch/avr32/mm/copy_page.S
new file mode 100644
index 000000000000..c2b3752946b8
--- /dev/null
+++ b/arch/avr32/mm/copy_page.S
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+/*
+ * copy_page
+ *
+ * r12		to (P1 address)
+ * r11		from (P1 address)
+ * r8-r10	scratch
+ */
+	.text
+	.global copy_page
+copy_page:
+	sub	r10, r11, -(1 << PAGE_SHIFT)
+	/* pref	r11[0] */
+1:	/* pref	r11[8] */
+	ld.d	r8, r11++
+	st.d	r12++, r8
+	cp	r11, r10
+	brlo	1b
+	mov	pc, lr
diff --git a/arch/avr32/mm/dma-coherent.c b/arch/avr32/mm/dma-coherent.c
new file mode 100644
index 000000000000..44ab8a7bdae2
--- /dev/null
+++ b/arch/avr32/mm/dma-coherent.c
@@ -0,0 +1,139 @@
+/*
+ *  Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/dma-mapping.h>
+
+#include <asm/addrspace.h>
+#include <asm/cacheflush.h>
+
+void dma_cache_sync(void *vaddr, size_t size, int direction)
+{
+	/*
+	 * No need to sync an uncached area
+	 */
+	if (PXSEG(vaddr) == P2SEG)
+		return;
+
+	switch (direction) {
+	case DMA_FROM_DEVICE:		/* invalidate only */
+		dma_cache_inv(vaddr, size);
+		break;
+	case DMA_TO_DEVICE:		/* writeback only */
+		dma_cache_wback(vaddr, size);
+		break;
+	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
+		dma_cache_wback_inv(vaddr, size);
+		break;
+	default:
+		BUG();
+	}
+}
+EXPORT_SYMBOL(dma_cache_sync);
+
+static struct page *__dma_alloc(struct device *dev, size_t size,
+				dma_addr_t *handle, gfp_t gfp)
+{
+	struct page *page, *free, *end;
+	int order;
+
+	size = PAGE_ALIGN(size);
+	order = get_order(size);
+
+	page = alloc_pages(gfp, order);
+	if (!page)
+		return NULL;
+	split_page(page, order);
+
+	/*
+	 * When accessing physical memory with valid cache data, we
+	 * get a cache hit even if the virtual memory region is marked
+	 * as uncached.
+	 *
+	 * Since the memory is newly allocated, there is no point in
+	 * doing a writeback. If the previous owner cares, he should
+	 * have flushed the cache before releasing the memory.
+	 */
+	invalidate_dcache_region(phys_to_virt(page_to_phys(page)), size);
+
+	*handle = page_to_bus(page);
+	free = page + (size >> PAGE_SHIFT);
+	end = page + (1 << order);
+
+	/*
+	 * Free any unused pages
+	 */
+	while (free < end) {
+		__free_page(free);
+		free++;
+	}
+
+	return page;
+}
+
+static void __dma_free(struct device *dev, size_t size,
+		       struct page *page, dma_addr_t handle)
+{
+	struct page *end = page + (PAGE_ALIGN(size) >> PAGE_SHIFT);
+
+	while (page < end)
+		__free_page(page++);
+}
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+			 dma_addr_t *handle, gfp_t gfp)
+{
+	struct page *page;
+	void *ret = NULL;
+
+	page = __dma_alloc(dev, size, handle, gfp);
+	if (page)
+		ret = phys_to_uncached(page_to_phys(page));
+
+	return ret;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size,
+		       void *cpu_addr, dma_addr_t handle)
+{
+	void *addr = phys_to_cached(uncached_to_phys(cpu_addr));
+	struct page *page;
+
+	pr_debug("dma_free_coherent addr %p (phys %08lx) size %u\n",
+		 cpu_addr, (unsigned long)handle, (unsigned)size);
+	BUG_ON(!virt_addr_valid(addr));
+	page = virt_to_page(addr);
+	__dma_free(dev, size, page, handle);
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+#if 0
+void *dma_alloc_writecombine(struct device *dev, size_t size,
+			     dma_addr_t *handle, gfp_t gfp)
+{
+	struct page *page;
+
+	page = __dma_alloc(dev, size, handle, gfp);
+
+	/* Now, map the page into P3 with write-combining turned on */
+	return __ioremap(page_to_phys(page), size, _PAGE_BUFFER);
+}
+EXPORT_SYMBOL(dma_alloc_writecombine);
+
+void dma_free_writecombine(struct device *dev, size_t size,
+			   void *cpu_addr, dma_addr_t handle)
+{
+	struct page *page;
+
+	iounmap(cpu_addr);
+
+	page = bus_to_page(handle);
+	__dma_free(dev, size, page, handle);
+}
+EXPORT_SYMBOL(dma_free_writecombine);
+#endif
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
new file mode 100644
index 000000000000..678557260a35
--- /dev/null
+++ b/arch/avr32/mm/fault.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/arch/sh/mm/fault.c:
+ *   Copyright (C) 1999  Niibe Yutaka
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+
+#include <asm/kdebug.h>
+#include <asm/mmu_context.h>
+#include <asm/sysreg.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+
+#ifdef DEBUG
+static void dump_code(unsigned long pc)
+{
+	char *p = (char *)pc;
+	char val;
+	int i;
+
+
+	printk(KERN_DEBUG "Code:");
+	for (i = 0; i < 16; i++) {
+		if (__get_user(val, p + i))
+			break;
+		printk(" %02x", val);
+	}
+	printk("\n");
+}
+#endif
+
+#ifdef CONFIG_KPROBES
+ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+
+/* Hook to register for page fault notifications */
+int register_page_fault_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+}
+
+int unregister_page_fault_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+}
+
+static inline int notify_page_fault(enum die_val val, struct pt_regs *regs,
+				    int trap, int sig)
+{
+	struct die_args args = {
+		.regs = regs,
+		.trapnr = trap,
+	};
+	return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+}
+#else
+static inline int notify_page_fault(enum die_val val, struct pt_regs *regs,
+				    int trap, int sig)
+{
+	return NOTIFY_DONE;
+}
+#endif
+
+/*
+ * This routine handles page faults. It determines the address and the
+ * problem, and then passes it off to one of the appropriate routines.
+ *
+ * ecr is the Exception Cause Register. Possible values are:
+ *   5:  Page not found (instruction access)
+ *   6:  Protection fault (instruction access)
+ *   12: Page not found (read access)
+ *   13: Page not found (write access)
+ *   14: Protection fault (read access)
+ *   15: Protection fault (write access)
+ */
+asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
+{
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct vm_area_struct *vma;
+	const struct exception_table_entry *fixup;
+	unsigned long address;
+	unsigned long page;
+	int writeaccess = 0;
+
+	if (notify_page_fault(DIE_PAGE_FAULT, regs,
+			      ecr, SIGSEGV) == NOTIFY_STOP)
+		return;
+
+	address = sysreg_read(TLBEAR);
+
+	tsk = current;
+	mm = tsk->mm;
+
+	/*
+	 * If we're in an interrupt or have no user context, we must
+	 * not take the fault...
+	 */
+	if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
+		goto no_context;
+
+	local_irq_enable();
+
+	down_read(&mm->mmap_sem);
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (expand_stack(vma, address))
+		goto bad_area;
+
+	/*
+	 * Ok, we have a good vm_area for this memory access, so we
+	 * can handle it...
+	 */
+good_area:
+	//pr_debug("good area: vm_flags = 0x%lx\n", vma->vm_flags);
+	switch (ecr) {
+	case ECR_PROTECTION_X:
+	case ECR_TLB_MISS_X:
+		if (!(vma->vm_flags & VM_EXEC))
+			goto bad_area;
+		break;
+	case ECR_PROTECTION_R:
+	case ECR_TLB_MISS_R:
+		if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
+			goto bad_area;
+		break;
+	case ECR_PROTECTION_W:
+	case ECR_TLB_MISS_W:
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+		writeaccess = 1;
+		break;
+	default:
+		panic("Unhandled case %lu in do_page_fault!", ecr);
+	}
+
+	/*
+	 * If for any reason at all we couldn't handle the fault, make
+	 * sure we exit gracefully rather than endlessly redo the
+	 * fault.
+	 */
+survive:
+	switch (handle_mm_fault(mm, vma, address, writeaccess)) {
+	case VM_FAULT_MINOR:
+		tsk->min_flt++;
+		break;
+	case VM_FAULT_MAJOR:
+		tsk->maj_flt++;
+		break;
+	case VM_FAULT_SIGBUS:
+		goto do_sigbus;
+	case VM_FAULT_OOM:
+		goto out_of_memory;
+	default:
+		BUG();
+	}
+
+	up_read(&mm->mmap_sem);
+	return;
+
+	/*
+	 * Something tried to access memory that isn't in our memory
+	 * map. Fix it, but check if it's kernel or user first...
+	 */
+bad_area:
+	pr_debug("Bad area [%s:%u]: addr %08lx, ecr %lu\n",
+		 tsk->comm, tsk->pid, address, ecr);
+
+	up_read(&mm->mmap_sem);
+
+	if (user_mode(regs)) {
+		/* Hmm...we have to pass address and ecr somehow... */
+		/* tsk->thread.address = address;
+		   tsk->thread.error_code = ecr; */
+#ifdef DEBUG
+		show_regs(regs);
+		dump_code(regs->pc);
+
+		page = sysreg_read(PTBR);
+		printk("ptbr = %08lx", page);
+		if (page) {
+			page = ((unsigned long *)page)[address >> 22];
+			printk(" pgd = %08lx", page);
+			if (page & _PAGE_PRESENT) {
+				page &= PAGE_MASK;
+				address &= 0x003ff000;
+				page = ((unsigned long *)__va(page))[address >> PAGE_SHIFT];
+				printk(" pte = %08lx\n", page);
+			}
+		}
+#endif
+		pr_debug("Sending SIGSEGV to PID %d...\n",
+			tsk->pid);
+		force_sig(SIGSEGV, tsk);
+		return;
+	}
+
+no_context:
+	pr_debug("No context\n");
+
+	/* Are we prepared to handle this kernel fault? */
+	fixup = search_exception_tables(regs->pc);
+	if (fixup) {
+		regs->pc = fixup->fixup;
+		pr_debug("Found fixup at %08lx\n", fixup->fixup);
+		return;
+	}
+
+	/*
+	 * Oops. The kernel tried to access some bad page. We'll have
+	 * to terminate things with extreme prejudice.
+	 */
+	if (address < PAGE_SIZE)
+		printk(KERN_ALERT
+		       "Unable to handle kernel NULL pointer dereference");
+	else
+		printk(KERN_ALERT
+		       "Unable to handle kernel paging request");
+	printk(" at virtual address %08lx\n", address);
+	printk(KERN_ALERT "pc = %08lx\n", regs->pc);
+
+	page = sysreg_read(PTBR);
+	printk(KERN_ALERT "ptbr = %08lx", page);
+	if (page) {
+		page = ((unsigned long *)page)[address >> 22];
+		printk(" pgd = %08lx", page);
+		if (page & _PAGE_PRESENT) {
+			page &= PAGE_MASK;
+			address &= 0x003ff000;
+			page = ((unsigned long *)__va(page))[address >> PAGE_SHIFT];
+			printk(" pte = %08lx\n", page);
+		}
+	}
+	die("\nOops", regs, ecr);
+	do_exit(SIGKILL);
+
+	/*
+	 * We ran out of memory, or some other thing happened to us
+	 * that made us unable to handle the page fault gracefully.
+	 */
+out_of_memory:
+	printk("Out of memory\n");
+	up_read(&mm->mmap_sem);
+	if (current->pid == 1) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+	printk("VM: Killing process %s\n", tsk->comm);
+	if (user_mode(regs))
+		do_exit(SIGKILL);
+	goto no_context;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel or
+	 * user mode.
+	 */
+	/* address, error_code, trap_no, ... */
+#ifdef DEBUG
+	show_regs(regs);
+	dump_code(regs->pc);
+#endif
+	pr_debug("Sending SIGBUS to PID %d...\n", tsk->pid);
+	force_sig(SIGBUS, tsk);
+
+	/* Kernel mode? Handle exceptions or die */
+	if (!user_mode(regs))
+		goto no_context;
+}
+
+asmlinkage void do_bus_error(unsigned long addr, int write_access,
+			     struct pt_regs *regs)
+{
+	printk(KERN_ALERT
+	       "Bus error at physical address 0x%08lx (%s access)\n",
+	       addr, write_access ? "write" : "read");
+	printk(KERN_INFO "DTLB dump:\n");
+	dump_dtlb();
+	die("Bus Error", regs, write_access);
+	do_exit(SIGKILL);
+}
+
+/*
+ * This functionality is currently not possible to implement because
+ * we're using segmentation to ensure a fixed mapping of the kernel
+ * virtual address space.
+ *
+ * It would be possible to implement this, but it would require us to
+ * disable segmentation at startup and load the kernel mappings into
+ * the TLB like any other pages. There will be lots of trickery to
+ * avoid recursive invocation of the TLB miss handler, though...
+ */
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void kernel_map_pages(struct page *page, int numpages, int enable)
+{
+
+}
+EXPORT_SYMBOL(kernel_map_pages);
+#endif
diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c
new file mode 100644
index 000000000000..3e6c41039808
--- /dev/null
+++ b/arch/avr32/mm/init.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/mmzone.h>
+#include <linux/bootmem.h>
+#include <linux/pagemap.h>
+#include <linux/pfn.h>
+#include <linux/nodemask.h>
+
+#include <asm/page.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+#include <asm/io.h>
+#include <asm/dma.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+struct page *empty_zero_page;
+
+/*
+ * Cache of MMU context last used.
+ */
+unsigned long mmu_context_cache = NO_CONTEXT;
+
+#define START_PFN	(NODE_DATA(0)->bdata->node_boot_start >> PAGE_SHIFT)
+#define MAX_LOW_PFN	(NODE_DATA(0)->bdata->node_low_pfn)
+
+void show_mem(void)
+{
+	int total = 0, reserved = 0, cached = 0;
+	int slab = 0, free = 0, shared = 0;
+	pg_data_t *pgdat;
+
+	printk("Mem-info:\n");
+	show_free_areas();
+
+	for_each_online_pgdat(pgdat) {
+		struct page *page, *end;
+
+		page = pgdat->node_mem_map;
+		end = page + pgdat->node_spanned_pages;
+
+		do {
+			total++;
+			if (PageReserved(page))
+				reserved++;
+			else if (PageSwapCache(page))
+				cached++;
+			else if (PageSlab(page))
+				slab++;
+			else if (!page_count(page))
+				free++;
+			else
+				shared += page_count(page) - 1;
+			page++;
+		} while (page < end);
+	}
+
+	printk ("%d pages of RAM\n", total);
+	printk ("%d free pages\n", free);
+	printk ("%d reserved pages\n", reserved);
+	printk ("%d slab pages\n", slab);
+	printk ("%d pages shared\n", shared);
+	printk ("%d pages swap cached\n", cached);
+}
+
+static void __init print_memory_map(const char *what,
+				    struct tag_mem_range *mem)
+{
+	printk ("%s:\n", what);
+	for (; mem; mem = mem->next) {
+		printk ("  %08lx - %08lx\n",
+			(unsigned long)mem->addr,
+			(unsigned long)(mem->addr + mem->size));
+	}
+}
+
+#define MAX_LOWMEM	HIGHMEM_START
+#define MAX_LOWMEM_PFN	PFN_DOWN(MAX_LOWMEM)
+
+/*
+ * Sort a list of memory regions in-place by ascending address.
+ *
+ * We're using bubble sort because we only have singly linked lists
+ * with few elements.
+ */
+static void __init sort_mem_list(struct tag_mem_range **pmem)
+{
+	int done;
+	struct tag_mem_range **a, **b;
+
+	if (!*pmem)
+		return;
+
+	do {
+		done = 1;
+		a = pmem, b = &(*pmem)->next;
+		while (*b) {
+			if ((*a)->addr > (*b)->addr) {
+				struct tag_mem_range *tmp;
+				tmp = (*b)->next;
+				(*b)->next = *a;
+				*a = *b;
+				*b = tmp;
+				done = 0;
+			}
+			a = &(*a)->next;
+			b = &(*a)->next;
+		}
+	} while (!done);
+}
+
+/*
+ * Find a free memory region large enough for storing the
+ * bootmem bitmap.
+ */
+static unsigned long __init
+find_bootmap_pfn(const struct tag_mem_range *mem)
+{
+	unsigned long bootmap_pages, bootmap_len;
+	unsigned long node_pages = PFN_UP(mem->size);
+	unsigned long bootmap_addr = mem->addr;
+	struct tag_mem_range *reserved = mem_reserved;
+	struct tag_mem_range *ramdisk = mem_ramdisk;
+	unsigned long kern_start = virt_to_phys(_stext);
+	unsigned long kern_end = virt_to_phys(_end);
+
+	bootmap_pages = bootmem_bootmap_pages(node_pages);
+	bootmap_len = bootmap_pages << PAGE_SHIFT;
+
+	/*
+	 * Find a large enough region without reserved pages for
+	 * storing the bootmem bitmap. We can take advantage of the
+	 * fact that all lists have been sorted.
+	 *
+	 * We have to check explicitly reserved regions as well as the
+	 * kernel image and any RAMDISK images...
+	 *
+	 * Oh, and we have to make sure we don't overwrite the taglist
+	 * since we're going to use it until the bootmem allocator is
+	 * fully up and running.
+	 */
+	while (1) {
+		if ((bootmap_addr < kern_end) &&
+		    ((bootmap_addr + bootmap_len) > kern_start))
+			bootmap_addr = kern_end;
+
+		while (reserved &&
+		       (bootmap_addr >= (reserved->addr + reserved->size)))
+			reserved = reserved->next;
+
+		if (reserved &&
+		    ((bootmap_addr + bootmap_len) >= reserved->addr)) {
+			bootmap_addr = reserved->addr + reserved->size;
+			continue;
+		}
+
+		while (ramdisk &&
+		       (bootmap_addr >= (ramdisk->addr + ramdisk->size)))
+			ramdisk = ramdisk->next;
+
+		if (!ramdisk ||
+		    ((bootmap_addr + bootmap_len) < ramdisk->addr))
+			break;
+
+		bootmap_addr = ramdisk->addr + ramdisk->size;
+	}
+
+	if ((PFN_UP(bootmap_addr) + bootmap_len) >= (mem->addr + mem->size))
+		return ~0UL;
+
+	return PFN_UP(bootmap_addr);
+}
+
+void __init setup_bootmem(void)
+{
+	unsigned bootmap_size;
+	unsigned long first_pfn, bootmap_pfn, pages;
+	unsigned long max_pfn, max_low_pfn;
+	unsigned long kern_start = virt_to_phys(_stext);
+	unsigned long kern_end = virt_to_phys(_end);
+	unsigned node = 0;
+	struct tag_mem_range *bank, *res;
+
+	sort_mem_list(&mem_phys);
+	sort_mem_list(&mem_reserved);
+
+	print_memory_map("Physical memory", mem_phys);
+	print_memory_map("Reserved memory", mem_reserved);
+
+	nodes_clear(node_online_map);
+
+	if (mem_ramdisk) {
+#ifdef CONFIG_BLK_DEV_INITRD
+		initrd_start = __va(mem_ramdisk->addr);
+		initrd_end = initrd_start + mem_ramdisk->size;
+
+		print_memory_map("RAMDISK images", mem_ramdisk);
+		if (mem_ramdisk->next)
+			printk(KERN_WARNING
+			       "Warning: Only the first RAMDISK image "
+			       "will be used\n");
+		sort_mem_list(&mem_ramdisk);
+#else
+		printk(KERN_WARNING "RAM disk image present, but "
+		       "no initrd support in kernel!\n");
+#endif
+	}
+
+	if (mem_phys->next)
+		printk(KERN_WARNING "Only using first memory bank\n");
+
+	for (bank = mem_phys; bank; bank = NULL) {
+		first_pfn = PFN_UP(bank->addr);
+		max_low_pfn = max_pfn = PFN_DOWN(bank->addr + bank->size);
+		bootmap_pfn = find_bootmap_pfn(bank);
+		if (bootmap_pfn > max_pfn)
+			panic("No space for bootmem bitmap!\n");
+
+		if (max_low_pfn > MAX_LOWMEM_PFN) {
+			max_low_pfn = MAX_LOWMEM_PFN;
+#ifndef CONFIG_HIGHMEM
+			/*
+			 * Lowmem is memory that can be addressed
+			 * directly through P1/P2
+			 */
+			printk(KERN_WARNING
+			       "Node %u: Only %ld MiB of memory will be used.\n",
+			       node, MAX_LOWMEM >> 20);
+			printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
+#else
+#error HIGHMEM is not supported by AVR32 yet
+#endif
+		}
+
+		/* Initialize the boot-time allocator with low memory only. */
+		bootmap_size = init_bootmem_node(NODE_DATA(node), bootmap_pfn,
+						 first_pfn, max_low_pfn);
+
+		printk("Node %u: bdata = %p, bdata->node_bootmem_map = %p\n",
+		       node, NODE_DATA(node)->bdata,
+		       NODE_DATA(node)->bdata->node_bootmem_map);
+
+		/*
+		 * Register fully available RAM pages with the bootmem
+		 * allocator.
+		 */
+		pages = max_low_pfn - first_pfn;
+		free_bootmem_node (NODE_DATA(node), PFN_PHYS(first_pfn),
+				   PFN_PHYS(pages));
+
+		/*
+		 * Reserve space for the kernel image (if present in
+		 * this node)...
+		 */
+		if ((kern_start >= PFN_PHYS(first_pfn)) &&
+		    (kern_start < PFN_PHYS(max_pfn))) {
+			printk("Node %u: Kernel image %08lx - %08lx\n",
+			       node, kern_start, kern_end);
+			reserve_bootmem_node(NODE_DATA(node), kern_start,
+					     kern_end - kern_start);
+		}
+
+		/* ...the bootmem bitmap... */
+		reserve_bootmem_node(NODE_DATA(node),
+				     PFN_PHYS(bootmap_pfn),
+				     bootmap_size);
+
+		/* ...any RAMDISK images... */
+		for (res = mem_ramdisk; res; res = res->next) {
+			if (res->addr > PFN_PHYS(max_pfn))
+				break;
+
+			if (res->addr >= PFN_PHYS(first_pfn)) {
+				printk("Node %u: RAMDISK %08lx - %08lx\n",
+				       node,
+				       (unsigned long)res->addr,
+				       (unsigned long)(res->addr + res->size));
+				reserve_bootmem_node(NODE_DATA(node),
+						     res->addr, res->size);
+			}
+		}
+
+		/* ...and any other reserved regions. */
+		for (res = mem_reserved; res; res = res->next) {
+			if (res->addr > PFN_PHYS(max_pfn))
+				break;
+
+			if (res->addr >= PFN_PHYS(first_pfn)) {
+				printk("Node %u: Reserved %08lx - %08lx\n",
+				       node,
+				       (unsigned long)res->addr,
+				       (unsigned long)(res->addr + res->size));
+				reserve_bootmem_node(NODE_DATA(node),
+						     res->addr, res->size);
+			}
+		}
+
+		node_set_online(node);
+	}
+}
+
+/*
+ * paging_init() sets up the page tables
+ *
+ * This routine also unmaps the page at virtual kernel address 0, so
+ * that we can trap those pesky NULL-reference errors in the kernel.
+ */
+void __init paging_init(void)
+{
+	extern unsigned long _evba;
+	void *zero_page;
+	int nid;
+
+	/*
+	 * Make sure we can handle exceptions before enabling
+	 * paging. Not that we should ever _get_ any exceptions this
+	 * early, but you never know...
+	 */
+	printk("Exception vectors start at %p\n", &_evba);
+	sysreg_write(EVBA, (unsigned long)&_evba);
+
+	/*
+	 * Since we are ready to handle exceptions now, we should let
+	 * the CPU generate them...
+	 */
+	__asm__ __volatile__ ("csrf %0" : : "i"(SR_EM_BIT));
+
+	/*
+	 * Allocate the zero page. The allocator will panic if it
+	 * can't satisfy the request, so no need to check.
+	 */
+	zero_page = alloc_bootmem_low_pages_node(NODE_DATA(0),
+						 PAGE_SIZE);
+
+	{
+		pgd_t *pg_dir;
+		int i;
+
+		pg_dir = swapper_pg_dir;
+		sysreg_write(PTBR, (unsigned long)pg_dir);
+
+		for (i = 0; i < PTRS_PER_PGD; i++)
+			pgd_val(pg_dir[i]) = 0;
+
+		enable_mmu();
+		printk ("CPU: Paging enabled\n");
+	}
+
+	for_each_online_node(nid) {
+		pg_data_t *pgdat = NODE_DATA(nid);
+		unsigned long zones_size[MAX_NR_ZONES];
+		unsigned long low, start_pfn;
+
+		start_pfn = pgdat->bdata->node_boot_start;
+		start_pfn >>= PAGE_SHIFT;
+		low = pgdat->bdata->node_low_pfn;
+
+		memset(zones_size, 0, sizeof(zones_size));
+		zones_size[ZONE_NORMAL] = low - start_pfn;
+
+		printk("Node %u: start_pfn = 0x%lx, low = 0x%lx\n",
+		       nid, start_pfn, low);
+
+		free_area_init_node(nid, pgdat, zones_size, start_pfn, NULL);
+
+		printk("Node %u: mem_map starts at %p\n",
+		       pgdat->node_id, pgdat->node_mem_map);
+	}
+
+	mem_map = NODE_DATA(0)->node_mem_map;
+
+	memset(zero_page, 0, PAGE_SIZE);
+	empty_zero_page = virt_to_page(zero_page);
+	flush_dcache_page(empty_zero_page);
+}
+
+void __init mem_init(void)
+{
+	int codesize, reservedpages, datasize, initsize;
+	int nid, i;
+
+	reservedpages = 0;
+	high_memory = NULL;
+
+	/* this will put all low memory onto the freelists */
+	for_each_online_node(nid) {
+		pg_data_t *pgdat = NODE_DATA(nid);
+		unsigned long node_pages = 0;
+		void *node_high_memory;
+
+		num_physpages += pgdat->node_present_pages;
+
+		if (pgdat->node_spanned_pages != 0)
+			node_pages = free_all_bootmem_node(pgdat);
+
+		totalram_pages += node_pages;
+
+		for (i = 0; i < node_pages; i++)
+			if (PageReserved(pgdat->node_mem_map + i))
+				reservedpages++;
+
+		node_high_memory = (void *)((pgdat->node_start_pfn
+					     + pgdat->node_spanned_pages)
+					    << PAGE_SHIFT);
+		if (node_high_memory > high_memory)
+			high_memory = node_high_memory;
+	}
+
+	max_mapnr = MAP_NR(high_memory);
+
+	codesize = (unsigned long)_etext - (unsigned long)_text;
+	datasize = (unsigned long)_edata - (unsigned long)_data;
+	initsize = (unsigned long)__init_end - (unsigned long)__init_begin;
+
+	printk ("Memory: %luk/%luk available (%dk kernel code, "
+		"%dk reserved, %dk data, %dk init)\n",
+		(unsigned long)nr_free_pages() << (PAGE_SHIFT - 10),
+		totalram_pages << (PAGE_SHIFT - 10),
+		codesize >> 10,
+		reservedpages << (PAGE_SHIFT - 10),
+		datasize >> 10,
+		initsize >> 10);
+}
+
+static inline void free_area(unsigned long addr, unsigned long end, char *s)
+{
+	unsigned int size = (end - addr) >> 10;
+
+	for (; addr < end; addr += PAGE_SIZE) {
+		struct page *page = virt_to_page(addr);
+		ClearPageReserved(page);
+		init_page_count(page);
+		free_page(addr);
+		totalram_pages++;
+	}
+
+	if (size && s)
+		printk(KERN_INFO "Freeing %s memory: %dK (%lx - %lx)\n",
+		       s, size, end - (size << 10), end);
+}
+
+void free_initmem(void)
+{
+	free_area((unsigned long)__init_begin, (unsigned long)__init_end,
+		  "init");
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+static int keep_initrd;
+
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	if (!keep_initrd)
+		free_area(start, end, "initrd");
+}
+
+static int __init keepinitrd_setup(char *__unused)
+{
+	keep_initrd = 1;
+	return 1;
+}
+
+__setup("keepinitrd", keepinitrd_setup);
+#endif
diff --git a/arch/avr32/mm/ioremap.c b/arch/avr32/mm/ioremap.c
new file mode 100644
index 000000000000..536021877df6
--- /dev/null
+++ b/arch/avr32/mm/ioremap.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/addrspace.h>
+
+static inline int remap_area_pte(pte_t *pte, unsigned long address,
+				  unsigned long end, unsigned long phys_addr,
+				  pgprot_t prot)
+{
+	unsigned long pfn;
+
+	pfn = phys_addr >> PAGE_SHIFT;
+	do {
+		WARN_ON(!pte_none(*pte));
+
+		set_pte(pte, pfn_pte(pfn, prot));
+		address += PAGE_SIZE;
+		pfn++;
+		pte++;
+	} while (address && (address < end));
+
+	return 0;
+}
+
+static inline int remap_area_pmd(pmd_t *pmd, unsigned long address,
+				 unsigned long end, unsigned long phys_addr,
+				 pgprot_t prot)
+{
+	unsigned long next;
+
+	phys_addr -= address;
+
+	do {
+		pte_t *pte = pte_alloc_kernel(pmd, address);
+		if (!pte)
+			return -ENOMEM;
+
+		next = (address + PMD_SIZE) & PMD_MASK;
+		if (remap_area_pte(pte, address, next,
+				   address + phys_addr, prot))
+			return -ENOMEM;
+
+		address = next;
+		pmd++;
+	} while (address && (address < end));
+	return 0;
+}
+
+static int remap_area_pud(pud_t *pud, unsigned long address,
+			  unsigned long end, unsigned long phys_addr,
+			  pgprot_t prot)
+{
+	unsigned long next;
+
+	phys_addr -= address;
+
+	do {
+		pmd_t *pmd = pmd_alloc(&init_mm, pud, address);
+		if (!pmd)
+			return -ENOMEM;
+		next = (address + PUD_SIZE) & PUD_MASK;
+		if (remap_area_pmd(pmd, address, next,
+				   phys_addr + address, prot))
+			return -ENOMEM;
+
+		address = next;
+		pud++;
+	} while (address && address < end);
+
+	return 0;
+}
+
+static int remap_area_pages(unsigned long address, unsigned long phys_addr,
+			    size_t size, pgprot_t prot)
+{
+	unsigned long end = address + size;
+	unsigned long next;
+	pgd_t *pgd;
+	int err = 0;
+
+	phys_addr -= address;
+
+	pgd = pgd_offset_k(address);
+	flush_cache_all();
+	BUG_ON(address >= end);
+
+	spin_lock(&init_mm.page_table_lock);
+	do {
+		pud_t *pud = pud_alloc(&init_mm, pgd, address);
+
+		err = -ENOMEM;
+		if (!pud)
+			break;
+
+		next = (address + PGDIR_SIZE) & PGDIR_MASK;
+		if (next < address || next > end)
+			next = end;
+		err = remap_area_pud(pud, address, next,
+				     phys_addr + address, prot);
+		if (err)
+			break;
+
+		address = next;
+		pgd++;
+	} while (address && (address < end));
+
+	spin_unlock(&init_mm.page_table_lock);
+	flush_tlb_all();
+	return err;
+}
+
+/*
+ * Re-map an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access physical
+ * memory directly.
+ */
+void __iomem *__ioremap(unsigned long phys_addr, size_t size,
+			unsigned long flags)
+{
+	void *addr;
+	struct vm_struct *area;
+	unsigned long offset, last_addr;
+	pgprot_t prot;
+
+	/*
+	 * Check if we can simply use the P4 segment. This area is
+	 * uncacheable, so if caching/buffering is requested, we can't
+	 * use it.
+	 */
+	if ((phys_addr >= P4SEG) && (flags == 0))
+		return (void __iomem *)phys_addr;
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
+
+	/*
+	 * XXX: When mapping regular RAM, we'd better make damn sure
+	 * it's never used for anything else.  But this is really the
+	 * caller's responsibility...
+	 */
+	if (PHYSADDR(P2SEGADDR(phys_addr)) == phys_addr)
+		return (void __iomem *)P2SEGADDR(phys_addr);
+
+	/* Mappings have to be page-aligned */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr + 1) - phys_addr;
+
+	prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY
+			| _PAGE_ACCESSED | _PAGE_TYPE_SMALL | flags);
+
+	/*
+	 * Ok, go for it..
+	 */
+	area = get_vm_area(size, VM_IOREMAP);
+	if (!area)
+		return NULL;
+	area->phys_addr = phys_addr;
+	addr = area->addr;
+	if (remap_area_pages((unsigned long)addr, phys_addr, size, prot)) {
+		vunmap(addr);
+		return NULL;
+	}
+
+	return (void __iomem *)(offset + (char *)addr);
+}
+EXPORT_SYMBOL(__ioremap);
+
+void __iounmap(void __iomem *addr)
+{
+	struct vm_struct *p;
+
+	if ((unsigned long)addr >= P4SEG)
+		return;
+
+	p = remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr));
+	if (unlikely(!p)) {
+		printk (KERN_ERR "iounmap: bad address %p\n", addr);
+		return;
+	}
+
+	kfree (p);
+}
+EXPORT_SYMBOL(__iounmap);
diff --git a/arch/avr32/mm/tlb.c b/arch/avr32/mm/tlb.c
new file mode 100644
index 000000000000..5d0523bbe298
--- /dev/null
+++ b/arch/avr32/mm/tlb.c
@@ -0,0 +1,378 @@
+/*
+ * AVR32 TLB operations
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/mm.h>
+
+#include <asm/mmu_context.h>
+
+#define _TLBEHI_I	0x100
+
+void show_dtlb_entry(unsigned int index)
+{
+	unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags;
+
+	local_irq_save(flags);
+	mmucr_save = sysreg_read(MMUCR);
+	tlbehi_save = sysreg_read(TLBEHI);
+	mmucr = mmucr_save & 0x13;
+	mmucr |= index << 14;
+	sysreg_write(MMUCR, mmucr);
+
+	asm volatile("tlbr" : : : "memory");
+	cpu_sync_pipeline();
+
+	tlbehi = sysreg_read(TLBEHI);
+	tlbelo = sysreg_read(TLBELO);
+
+	printk("%2u: %c %c %02x   %05x %05x %o  %o  %c %c %c %c\n",
+	       index,
+	       (tlbehi & 0x200)?'1':'0',
+	       (tlbelo & 0x100)?'1':'0',
+	       (tlbehi & 0xff),
+	       (tlbehi >> 12), (tlbelo >> 12),
+	       (tlbelo >> 4) & 7, (tlbelo >> 2) & 3,
+	       (tlbelo & 0x200)?'1':'0',
+	       (tlbelo & 0x080)?'1':'0',
+	       (tlbelo & 0x001)?'1':'0',
+	       (tlbelo & 0x002)?'1':'0');
+
+	sysreg_write(MMUCR, mmucr_save);
+	sysreg_write(TLBEHI, tlbehi_save);
+	cpu_sync_pipeline();
+	local_irq_restore(flags);
+}
+
+void dump_dtlb(void)
+{
+	unsigned int i;
+
+	printk("ID  V G ASID VPN   PFN   AP SZ C B W D\n");
+	for (i = 0; i < 32; i++)
+		show_dtlb_entry(i);
+}
+
+static unsigned long last_mmucr;
+
+static inline void set_replacement_pointer(unsigned shift)
+{
+	unsigned long mmucr, mmucr_save;
+
+	mmucr = mmucr_save = sysreg_read(MMUCR);
+
+	/* Does this mapping already exist? */
+	__asm__ __volatile__(
+		"	tlbs\n"
+		"	mfsr %0, %1"
+		: "=r"(mmucr)
+		: "i"(SYSREG_MMUCR));
+
+	if (mmucr & SYSREG_BIT(MMUCR_N)) {
+		/* Not found -- pick a not-recently-accessed entry */
+		unsigned long rp;
+		unsigned long tlbar = sysreg_read(TLBARLO);
+
+		rp = 32 - fls(tlbar);
+		if (rp == 32) {
+			rp = 0;
+			sysreg_write(TLBARLO, -1L);
+		}
+
+		mmucr &= 0x13;
+		mmucr |= (rp << shift);
+
+		sysreg_write(MMUCR, mmucr);
+	}
+
+	last_mmucr = mmucr;
+}
+
+static void update_dtlb(unsigned long address, pte_t pte, unsigned long asid)
+{
+	unsigned long vpn;
+
+	vpn = (address & MMU_VPN_MASK) | _TLBEHI_VALID | asid;
+	sysreg_write(TLBEHI, vpn);
+	cpu_sync_pipeline();
+
+	set_replacement_pointer(14);
+
+	sysreg_write(TLBELO, pte_val(pte) & _PAGE_FLAGS_HARDWARE_MASK);
+
+	/* Let's go */
+	asm volatile("nop\n\ttlbw" : : : "memory");
+	cpu_sync_pipeline();
+}
+
+void update_mmu_cache(struct vm_area_struct *vma,
+		      unsigned long address, pte_t pte)
+{
+	unsigned long flags;
+
+	/* ptrace may call this routine */
+	if (vma && current->active_mm != vma->vm_mm)
+		return;
+
+	local_irq_save(flags);
+	update_dtlb(address, pte, get_asid());
+	local_irq_restore(flags);
+}
+
+void __flush_tlb_page(unsigned long asid, unsigned long page)
+{
+	unsigned long mmucr, tlbehi;
+
+	page |= asid;
+	sysreg_write(TLBEHI, page);
+	cpu_sync_pipeline();
+	asm volatile("tlbs");
+	mmucr = sysreg_read(MMUCR);
+
+	if (!(mmucr & SYSREG_BIT(MMUCR_N))) {
+		unsigned long tlbarlo;
+		unsigned long entry;
+
+		/* Clear the "valid" bit */
+		tlbehi = sysreg_read(TLBEHI);
+		tlbehi &= ~_TLBEHI_VALID;
+		sysreg_write(TLBEHI, tlbehi);
+		cpu_sync_pipeline();
+
+		/* mark the entry as "not accessed" */
+		entry = (mmucr >> 14) & 0x3f;
+		tlbarlo = sysreg_read(TLBARLO);
+		tlbarlo |= (0x80000000 >> entry);
+		sysreg_write(TLBARLO, tlbarlo);
+
+		/* update the entry with valid bit clear */
+		asm volatile("tlbw");
+		cpu_sync_pipeline();
+	}
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	if (vma->vm_mm && vma->vm_mm->context != NO_CONTEXT) {
+		unsigned long flags, asid;
+		unsigned long saved_asid = MMU_NO_ASID;
+
+		asid = vma->vm_mm->context & MMU_CONTEXT_ASID_MASK;
+		page &= PAGE_MASK;
+
+		local_irq_save(flags);
+		if (vma->vm_mm != current->mm) {
+			saved_asid = get_asid();
+			set_asid(asid);
+		}
+
+		__flush_tlb_page(asid, page);
+
+		if (saved_asid != MMU_NO_ASID)
+			set_asid(saved_asid);
+		local_irq_restore(flags);
+	}
+}
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (mm->context != NO_CONTEXT) {
+		unsigned long flags;
+		int size;
+
+		local_irq_save(flags);
+		size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+		if (size > (MMU_DTLB_ENTRIES / 4)) { /* Too many entries to flush */
+			mm->context = NO_CONTEXT;
+			if (mm == current->mm)
+				activate_context(mm);
+		} else {
+			unsigned long asid = mm->context & MMU_CONTEXT_ASID_MASK;
+			unsigned long saved_asid = MMU_NO_ASID;
+
+			start &= PAGE_MASK;
+			end += (PAGE_SIZE - 1);
+			end &= PAGE_MASK;
+			if (mm != current->mm) {
+				saved_asid = get_asid();
+				set_asid(asid);
+			}
+
+			while (start < end) {
+				__flush_tlb_page(asid, start);
+				start += PAGE_SIZE;
+			}
+			if (saved_asid != MMU_NO_ASID)
+				set_asid(saved_asid);
+		}
+		local_irq_restore(flags);
+	}
+}
+
+/*
+ * TODO: If this is only called for addresses > TASK_SIZE, we can probably
+ * skip the ASID stuff and just use the Global bit...
+ */
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long flags;
+	int size;
+
+	local_irq_save(flags);
+	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	if (size > (MMU_DTLB_ENTRIES / 4)) { /* Too many entries to flush */
+		flush_tlb_all();
+	} else {
+		unsigned long asid = init_mm.context & MMU_CONTEXT_ASID_MASK;
+		unsigned long saved_asid = get_asid();
+
+		start &= PAGE_MASK;
+		end += (PAGE_SIZE - 1);
+		end &= PAGE_MASK;
+		set_asid(asid);
+		while (start < end) {
+			__flush_tlb_page(asid, start);
+			start += PAGE_SIZE;
+		}
+		set_asid(saved_asid);
+	}
+	local_irq_restore(flags);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	/* Invalidate all TLB entries of this process by getting a new ASID */
+	if (mm->context != NO_CONTEXT) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		mm->context = NO_CONTEXT;
+		if (mm == current->mm)
+			activate_context(mm);
+		local_irq_restore(flags);
+	}
+}
+
+void flush_tlb_all(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	sysreg_write(MMUCR, sysreg_read(MMUCR) | SYSREG_BIT(MMUCR_I));
+	local_irq_restore(flags);
+}
+
+#ifdef CONFIG_PROC_FS
+
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+
+static void *tlb_start(struct seq_file *tlb, loff_t *pos)
+{
+	static unsigned long tlb_index;
+
+	if (*pos >= 32)
+		return NULL;
+
+	tlb_index = 0;
+	return &tlb_index;
+}
+
+static void *tlb_next(struct seq_file *tlb, void *v, loff_t *pos)
+{
+	unsigned long *index = v;
+
+	if (*index >= 31)
+		return NULL;
+
+	++*pos;
+	++*index;
+	return index;
+}
+
+static void tlb_stop(struct seq_file *tlb, void *v)
+{
+
+}
+
+static int tlb_show(struct seq_file *tlb, void *v)
+{
+	unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags;
+	unsigned long *index = v;
+
+	if (*index == 0)
+		seq_puts(tlb, "ID  V G ASID VPN   PFN   AP SZ C B W D\n");
+
+	BUG_ON(*index >= 32);
+
+	local_irq_save(flags);
+	mmucr_save = sysreg_read(MMUCR);
+	tlbehi_save = sysreg_read(TLBEHI);
+	mmucr = mmucr_save & 0x13;
+	mmucr |= *index << 14;
+	sysreg_write(MMUCR, mmucr);
+
+	asm volatile("tlbr" : : : "memory");
+	cpu_sync_pipeline();
+
+	tlbehi = sysreg_read(TLBEHI);
+	tlbelo = sysreg_read(TLBELO);
+
+	sysreg_write(MMUCR, mmucr_save);
+	sysreg_write(TLBEHI, tlbehi_save);
+	cpu_sync_pipeline();
+	local_irq_restore(flags);
+
+	seq_printf(tlb, "%2lu: %c %c %02x   %05x %05x %o  %o  %c %c %c %c\n",
+	       *index,
+	       (tlbehi & 0x200)?'1':'0',
+	       (tlbelo & 0x100)?'1':'0',
+	       (tlbehi & 0xff),
+	       (tlbehi >> 12), (tlbelo >> 12),
+	       (tlbelo >> 4) & 7, (tlbelo >> 2) & 3,
+	       (tlbelo & 0x200)?'1':'0',
+	       (tlbelo & 0x080)?'1':'0',
+	       (tlbelo & 0x001)?'1':'0',
+	       (tlbelo & 0x002)?'1':'0');
+
+	return 0;
+}
+
+static struct seq_operations tlb_ops = {
+	.start		= tlb_start,
+	.next		= tlb_next,
+	.stop		= tlb_stop,
+	.show		= tlb_show,
+};
+
+static int tlb_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &tlb_ops);
+}
+
+static struct file_operations proc_tlb_operations = {
+	.open		= tlb_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int __init proctlb_init(void)
+{
+	struct proc_dir_entry *entry;
+
+	entry = create_proc_entry("tlb", 0, NULL);
+	if (entry)
+		entry->proc_fops = &proc_tlb_operations;
+	return 0;
+}
+late_initcall(proctlb_init);
+#endif /* CONFIG_PROC_FS */
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index a601a17cf568..f7b171b92ea2 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -27,7 +27,11 @@ config GENERIC_CALIBRATE_DELAY
 
 config GENERIC_HARDIRQS
 	bool
-	default n
+	default y
+
+config GENERIC_HARDIRQS_NO__DO_IRQ
+	bool
+	default y
 
 config GENERIC_TIME
 	bool
@@ -251,6 +255,12 @@ config MB93091_NO_MB
 endchoice
 endif
 
+config FUJITSU_MB93493
+	bool "MB93493 Multimedia chip"
+	help
+	  Select this option if the MB93493 multimedia chip is going to be
+	  used.
+
 choice
 	prompt "GP-Relative data support"
 	default GPREL_DATA_8
diff --git a/arch/frv/kernel/Makefile b/arch/frv/kernel/Makefile
index 5a827b349b5e..32db3499c461 100644
--- a/arch/frv/kernel/Makefile
+++ b/arch/frv/kernel/Makefile
@@ -10,15 +10,14 @@ extra-y:= head.o init_task.o vmlinux.lds
 obj-y := $(heads-y) entry.o entry-table.o break.o switch_to.o kernel_thread.o \
 	 process.o traps.o ptrace.o signal.o dma.o \
 	 sys_frv.o time.o semaphore.o setup.o frv_ksyms.o \
-	 debug-stub.o irq.o irq-routing.o sleep.o uaccess.o
+	 debug-stub.o irq.o sleep.o uaccess.o
 
 obj-$(CONFIG_GDBSTUB)		+= gdb-stub.o gdb-io.o
 
 obj-$(CONFIG_MB93091_VDK)	+= irq-mb93091.o
-obj-$(CONFIG_MB93093_PDK)	+= irq-mb93093.o
-obj-$(CONFIG_FUJITSU_MB93493)	+= irq-mb93493.o
 obj-$(CONFIG_PM)		+= pm.o cmode.o
 obj-$(CONFIG_MB93093_PDK)	+= pm-mb93093.o
+obj-$(CONFIG_FUJITSU_MB93493)	+= irq-mb93493.o
 obj-$(CONFIG_SYSCTL)		+= sysctl.o
 obj-$(CONFIG_FUTEX)		+= futex.o
 obj-$(CONFIG_MODULES)		+= module.o
diff --git a/arch/frv/kernel/irq-mb93091.c b/arch/frv/kernel/irq-mb93091.c
index 1381abcd5cc9..369bc0a7443d 100644
--- a/arch/frv/kernel/irq-mb93091.c
+++ b/arch/frv/kernel/irq-mb93091.c
@@ -24,7 +24,6 @@
 #include <asm/delay.h>
 #include <asm/irq.h>
 #include <asm/irc-regs.h>
-#include <asm/irq-routing.h>
 
 #define __reg16(ADDR) (*(volatile unsigned short *)(ADDR))
 
@@ -33,83 +32,131 @@
 #define __get_IFR()	({ __reg16(0xffc0000c); })
 #define __clr_IFR(M)	do { __reg16(0xffc0000c) = ~(M); wmb(); } while(0)
 
-static void frv_fpga_doirq(struct irq_source *source);
-static void frv_fpga_control(struct irq_group *group, int irq, int on);
 
-/*****************************************************************************/
 /*
- * FPGA IRQ multiplexor
+ * on-motherboard FPGA PIC operations
  */
-static struct irq_source frv_fpga[4] = {
-#define __FPGA(X, M)					\
-	[X] = {						\
-		.muxname	= "fpga."#X,		\
-		.irqmask	= M,			\
-		.doirq		= frv_fpga_doirq,	\
-	}
+static void frv_fpga_mask(unsigned int irq)
+{
+	uint16_t imr = __get_IMR();
 
-	__FPGA(0, 0x0028),
-	__FPGA(1, 0x0050),
-	__FPGA(2, 0x1c00),
-	__FPGA(3, 0x6386),
-};
+	imr |= 1 << (irq - IRQ_BASE_FPGA);
 
-static struct irq_group frv_fpga_irqs = {
-	.first_irq	= IRQ_BASE_FPGA,
-	.control	= frv_fpga_control,
-	.sources = {
-		[ 1] = &frv_fpga[3],
-		[ 2] = &frv_fpga[3],
-		[ 3] = &frv_fpga[0],
-		[ 4] = &frv_fpga[1],
-		[ 5] = &frv_fpga[0],
-		[ 6] = &frv_fpga[1],
-		[ 7] = &frv_fpga[3],
-		[ 8] = &frv_fpga[3],
-		[ 9] = &frv_fpga[3],
-		[10] = &frv_fpga[2],
-		[11] = &frv_fpga[2],
-		[12] = &frv_fpga[2],
-		[13] = &frv_fpga[3],
-		[14] = &frv_fpga[3],
-	},
-};
+	__set_IMR(imr);
+}
 
+static void frv_fpga_ack(unsigned int irq)
+{
+	__clr_IFR(1 << (irq - IRQ_BASE_FPGA));
+}
 
-static void frv_fpga_control(struct irq_group *group, int index, int on)
+static void frv_fpga_mask_ack(unsigned int irq)
 {
 	uint16_t imr = __get_IMR();
 
-	if (on)
-		imr &= ~(1 << index);
-	else
-		imr |= 1 << index;
+	imr |= 1 << (irq - IRQ_BASE_FPGA);
+	__set_IMR(imr);
+
+	__clr_IFR(1 << (irq - IRQ_BASE_FPGA));
+}
+
+static void frv_fpga_unmask(unsigned int irq)
+{
+	uint16_t imr = __get_IMR();
+
+	imr &= ~(1 << (irq - IRQ_BASE_FPGA));
 
 	__set_IMR(imr);
 }
 
-static void frv_fpga_doirq(struct irq_source *source)
+static struct irq_chip frv_fpga_pic = {
+	.name		= "mb93091",
+	.ack		= frv_fpga_ack,
+	.mask		= frv_fpga_mask,
+	.mask_ack	= frv_fpga_mask_ack,
+	.unmask		= frv_fpga_unmask,
+};
+
+/*
+ * FPGA PIC interrupt handler
+ */
+static irqreturn_t fpga_interrupt(int irq, void *_mask, struct pt_regs *regs)
 {
-	uint16_t mask, imr;
+	uint16_t imr, mask = (unsigned long) _mask;
 
 	imr = __get_IMR();
-	mask = source->irqmask & ~imr & __get_IFR();
-	if (mask) {
-		__set_IMR(imr | mask);
-		__clr_IFR(mask);
-		distribute_irqs(&frv_fpga_irqs, mask);
-		__set_IMR(imr);
+	mask = mask & ~imr & __get_IFR();
+
+	/* poll all the triggered IRQs */
+	while (mask) {
+		int irq;
+
+		asm("scan %1,gr0,%0" : "=r"(irq) : "r"(mask));
+		irq = 31 - irq;
+		mask &= ~(1 << irq);
+
+		generic_handle_irq(IRQ_BASE_FPGA + irq, regs);
 	}
+
+	return IRQ_HANDLED;
 }
 
+/*
+ * define an interrupt action for each FPGA PIC output
+ * - use dev_id to indicate the FPGA PIC input to output mappings
+ */
+static struct irqaction fpga_irq[4]  = {
+	[0] = {
+		.handler	= fpga_interrupt,
+		.flags		= IRQF_DISABLED | IRQF_SHARED,
+		.mask		= CPU_MASK_NONE,
+		.name		= "fpga.0",
+		.dev_id		= (void *) 0x0028UL,
+	},
+	[1] = {
+		.handler	= fpga_interrupt,
+		.flags		= IRQF_DISABLED | IRQF_SHARED,
+		.mask		= CPU_MASK_NONE,
+		.name		= "fpga.1",
+		.dev_id		= (void *) 0x0050UL,
+	},
+	[2] = {
+		.handler	= fpga_interrupt,
+		.flags		= IRQF_DISABLED | IRQF_SHARED,
+		.mask		= CPU_MASK_NONE,
+		.name		= "fpga.2",
+		.dev_id		= (void *) 0x1c00UL,
+	},
+	[3] = {
+		.handler	= fpga_interrupt,
+		.flags		= IRQF_DISABLED | IRQF_SHARED,
+		.mask		= CPU_MASK_NONE,
+		.name		= "fpga.3",
+		.dev_id		= (void *) 0x6386UL,
+	}
+};
+
+/*
+ * initialise the motherboard FPGA's PIC
+ */
 void __init fpga_init(void)
 {
+	int irq;
+
+	/* all PIC inputs are all set to be low-level driven, apart from the
+	 * NMI button (15) which is fixed at falling-edge
+	 */
 	__set_IMR(0x7ffe);
 	__clr_IFR(0x0000);
 
-	frv_irq_route_external(&frv_fpga[0], IRQ_CPU_EXTERNAL0);
-	frv_irq_route_external(&frv_fpga[1], IRQ_CPU_EXTERNAL1);
-	frv_irq_route_external(&frv_fpga[2], IRQ_CPU_EXTERNAL2);
-	frv_irq_route_external(&frv_fpga[3], IRQ_CPU_EXTERNAL3);
-	frv_irq_set_group(&frv_fpga_irqs);
+	for (irq = IRQ_BASE_FPGA + 1; irq <= IRQ_BASE_FPGA + 14; irq++)
+		set_irq_chip_and_handler(irq, &frv_fpga_pic, handle_level_irq);
+
+	set_irq_chip_and_handler(IRQ_FPGA_NMI, &frv_fpga_pic, handle_edge_irq);
+
+	/* the FPGA drives the first four external IRQ inputs on the CPU PIC */
+	setup_irq(IRQ_CPU_EXTERNAL0, &fpga_irq[0]);
+	setup_irq(IRQ_CPU_EXTERNAL1, &fpga_irq[1]);
+	setup_irq(IRQ_CPU_EXTERNAL2, &fpga_irq[2]);
+	setup_irq(IRQ_CPU_EXTERNAL3, &fpga_irq[3]);
 }
diff --git a/arch/frv/kernel/irq-mb93093.c b/arch/frv/kernel/irq-mb93093.c
index 48b2a6420888..a43a22158956 100644
--- a/arch/frv/kernel/irq-mb93093.c
+++ b/arch/frv/kernel/irq-mb93093.c
@@ -1,6 +1,6 @@
 /* irq-mb93093.c: MB93093 FPGA interrupt handling
  *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -24,7 +24,6 @@
 #include <asm/delay.h>
 #include <asm/irq.h>
 #include <asm/irc-regs.h>
-#include <asm/irq-routing.h>
 
 #define __reg16(ADDR) (*(volatile unsigned short *)(__region_CS2 + (ADDR)))
 
@@ -33,66 +32,102 @@
 #define __get_IFR()	({ __reg16(0x02); })
 #define __clr_IFR(M)	do { __reg16(0x02) = ~(M); wmb(); } while(0)
 
-static void frv_fpga_doirq(struct irq_source *source);
-static void frv_fpga_control(struct irq_group *group, int irq, int on);
-
-/*****************************************************************************/
 /*
- * FPGA IRQ multiplexor
+ * off-CPU FPGA PIC operations
  */
-static struct irq_source frv_fpga[4] = {
-#define __FPGA(X, M)					\
-	[X] = {						\
-		.muxname	= "fpga."#X,		\
-		.irqmask	= M,			\
-		.doirq		= frv_fpga_doirq,	\
-	}
+static void frv_fpga_mask(unsigned int irq)
+{
+	uint16_t imr = __get_IMR();
 
-	__FPGA(0, 0x0700),
-};
+	imr |= 1 << (irq - IRQ_BASE_FPGA);
+	__set_IMR(imr);
+}
 
-static struct irq_group frv_fpga_irqs = {
-	.first_irq	= IRQ_BASE_FPGA,
-	.control	= frv_fpga_control,
-	.sources = {
-		[ 8] = &frv_fpga[0],
-		[ 9] = &frv_fpga[0],
-		[10] = &frv_fpga[0],
-	},
-};
+static void frv_fpga_ack(unsigned int irq)
+{
+	__clr_IFR(1 << (irq - IRQ_BASE_FPGA));
+}
+
+static void frv_fpga_mask_ack(unsigned int irq)
+{
+	uint16_t imr = __get_IMR();
 
+	imr |= 1 << (irq - IRQ_BASE_FPGA);
+	__set_IMR(imr);
+
+	__clr_IFR(1 << (irq - IRQ_BASE_FPGA));
+}
 
-static void frv_fpga_control(struct irq_group *group, int index, int on)
+static void frv_fpga_unmask(unsigned int irq)
 {
 	uint16_t imr = __get_IMR();
 
-	if (on)
-		imr &= ~(1 << index);
-	else
-		imr |= 1 << index;
+	imr &= ~(1 << (irq - IRQ_BASE_FPGA));
 
 	__set_IMR(imr);
 }
 
-static void frv_fpga_doirq(struct irq_source *source)
+static struct irq_chip frv_fpga_pic = {
+	.name		= "mb93093",
+	.ack		= frv_fpga_ack,
+	.mask		= frv_fpga_mask,
+	.mask_ack	= frv_fpga_mask_ack,
+	.unmask		= frv_fpga_unmask,
+	.end		= frv_fpga_end,
+};
+
+/*
+ * FPGA PIC interrupt handler
+ */
+static irqreturn_t fpga_interrupt(int irq, void *_mask, struct pt_regs *regs)
 {
-	uint16_t mask, imr;
+	uint16_t imr, mask = (unsigned long) _mask;
 
 	imr = __get_IMR();
-	mask = source->irqmask & ~imr & __get_IFR();
-	if (mask) {
-		__set_IMR(imr | mask);
-		__clr_IFR(mask);
-		distribute_irqs(&frv_fpga_irqs, mask);
-		__set_IMR(imr);
+	mask = mask & ~imr & __get_IFR();
+
+	/* poll all the triggered IRQs */
+	while (mask) {
+		int irq;
+
+		asm("scan %1,gr0,%0" : "=r"(irq) : "r"(mask));
+		irq = 31 - irq;
+		mask &= ~(1 << irq);
+
+		generic_irq_handle(IRQ_BASE_FPGA + irq, regs);
 	}
+
+	return IRQ_HANDLED;
 }
 
+/*
+ * define an interrupt action for each FPGA PIC output
+ * - use dev_id to indicate the FPGA PIC input to output mappings
+ */
+static struct irqaction fpga_irq[1]  = {
+	[0] = {
+		.handler	= fpga_interrupt,
+		.flags		= IRQF_DISABLED,
+		.mask		= CPU_MASK_NONE,
+		.name		= "fpga.0",
+		.dev_id		= (void *) 0x0700UL,
+	}
+};
+
+/*
+ * initialise the motherboard FPGA's PIC
+ */
 void __init fpga_init(void)
 {
+	int irq;
+
+	/* all PIC inputs are all set to be edge triggered */
 	__set_IMR(0x0700);
 	__clr_IFR(0x0000);
 
-	frv_irq_route_external(&frv_fpga[0], IRQ_CPU_EXTERNAL2);
-	frv_irq_set_group(&frv_fpga_irqs);
+	for (irq = IRQ_BASE_FPGA + 8; irq <= IRQ_BASE_FPGA + 10; irq++)
+		set_irq_chip_and_handler(irq, &frv_fpga_pic, handle_edge_irq);
+
+	/* the FPGA drives external IRQ input #2 on the CPU PIC */
+	setup_irq(IRQ_CPU_EXTERNAL2, &fpga_irq[0]);
 }
diff --git a/arch/frv/kernel/irq-mb93493.c b/arch/frv/kernel/irq-mb93493.c
index 988d035640e1..39c0188a3498 100644
--- a/arch/frv/kernel/irq-mb93493.c
+++ b/arch/frv/kernel/irq-mb93493.c
@@ -1,6 +1,6 @@
 /* irq-mb93493.c: MB93493 companion chip interrupt handler
  *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -24,84 +24,126 @@
 #include <asm/delay.h>
 #include <asm/irq.h>
 #include <asm/irc-regs.h>
-#include <asm/irq-routing.h>
 #include <asm/mb93493-irqs.h>
+#include <asm/mb93493-regs.h>
 
-static void frv_mb93493_doirq(struct irq_source *source);
+#define IRQ_ROUTE_ONE(X) (X##_ROUTE << (X - IRQ_BASE_MB93493))
+
+#define IRQ_ROUTING					\
+	(IRQ_ROUTE_ONE(IRQ_MB93493_VDC)		|	\
+	 IRQ_ROUTE_ONE(IRQ_MB93493_VCC)		|	\
+	 IRQ_ROUTE_ONE(IRQ_MB93493_AUDIO_OUT)	|	\
+	 IRQ_ROUTE_ONE(IRQ_MB93493_I2C_0)	|	\
+	 IRQ_ROUTE_ONE(IRQ_MB93493_I2C_1)	|	\
+	 IRQ_ROUTE_ONE(IRQ_MB93493_USB)		|	\
+	 IRQ_ROUTE_ONE(IRQ_MB93493_LOCAL_BUS)	|	\
+	 IRQ_ROUTE_ONE(IRQ_MB93493_PCMCIA)	|	\
+	 IRQ_ROUTE_ONE(IRQ_MB93493_GPIO)	|	\
+	 IRQ_ROUTE_ONE(IRQ_MB93493_AUDIO_IN))
 
-/*****************************************************************************/
 /*
- * MB93493 companion chip IRQ multiplexor
+ * daughter board PIC operations
+ * - there is no way to ACK interrupts in the MB93493 chip
  */
-static struct irq_source frv_mb93493[2] = {
-	[0] = {
-		.muxname		= "mb93493.0",
-		.muxdata		= __region_CS3 + 0x3d0,
-		.doirq			= frv_mb93493_doirq,
-		.irqmask		= 0x0000,
-	},
-	[1] = {
-		.muxname		= "mb93493.1",
-		.muxdata		= __region_CS3 + 0x3d4,
-		.doirq			= frv_mb93493_doirq,
-		.irqmask		= 0x0000,
-	},
-};
-
-static void frv_mb93493_control(struct irq_group *group, int index, int on)
+static void frv_mb93493_mask(unsigned int irq)
 {
-	struct irq_source *source;
 	uint32_t iqsr;
+	volatile void *piqsr;
 
-	if ((frv_mb93493[0].irqmask & (1 << index)))
-		source = &frv_mb93493[0];
+	if (IRQ_ROUTING & (1 << (irq - IRQ_BASE_MB93493)))
+		piqsr = __addr_MB93493_IQSR(1);
 	else
-		source = &frv_mb93493[1];
+		piqsr = __addr_MB93493_IQSR(0);
+
+	iqsr = readl(piqsr);
+	iqsr &= ~(1 << (irq - IRQ_BASE_MB93493 + 16));
+	writel(iqsr, piqsr);
+}
 
-	iqsr = readl(source->muxdata);
-	if (on)
-		iqsr |= 1 << (index + 16);
+static void frv_mb93493_ack(unsigned int irq)
+{
+}
+
+static void frv_mb93493_unmask(unsigned int irq)
+{
+	uint32_t iqsr;
+	volatile void *piqsr;
+
+	if (IRQ_ROUTING & (1 << (irq - IRQ_BASE_MB93493)))
+		piqsr = __addr_MB93493_IQSR(1);
 	else
-		iqsr &= ~(1 << (index + 16));
+		piqsr = __addr_MB93493_IQSR(0);
 
-	writel(iqsr, source->muxdata);
+	iqsr = readl(piqsr);
+	iqsr |= 1 << (irq - IRQ_BASE_MB93493 + 16);
+	writel(iqsr, piqsr);
 }
 
-static struct irq_group frv_mb93493_irqs = {
-	.first_irq	= IRQ_BASE_MB93493,
-	.control	= frv_mb93493_control,
+static struct irq_chip frv_mb93493_pic = {
+	.name		= "mb93093",
+	.ack		= frv_mb93493_ack,
+	.mask		= frv_mb93493_mask,
+	.mask_ack	= frv_mb93493_mask,
+	.unmask		= frv_mb93493_unmask,
 };
 
-static void frv_mb93493_doirq(struct irq_source *source)
+/*
+ * MB93493 PIC interrupt handler
+ */
+static irqreturn_t mb93493_interrupt(int irq, void *_piqsr, struct pt_regs *regs)
 {
-	uint32_t mask = readl(source->muxdata);
-	mask = mask & (mask >> 16) & 0xffff;
+	volatile void *piqsr = _piqsr;
+	uint32_t iqsr;
 
-	if (mask)
-		distribute_irqs(&frv_mb93493_irqs, mask);
-}
+	iqsr = readl(piqsr);
+	iqsr = iqsr & (iqsr >> 16) & 0xffff;
 
-static void __init mb93493_irq_route(int irq, int source)
-{
-	frv_mb93493[source].irqmask |= 1 << (irq - IRQ_BASE_MB93493);
-	frv_mb93493_irqs.sources[irq - IRQ_BASE_MB93493] = &frv_mb93493[source];
+	/* poll all the triggered IRQs */
+	while (iqsr) {
+		int irq;
+
+		asm("scan %1,gr0,%0" : "=r"(irq) : "r"(iqsr));
+		irq = 31 - irq;
+		iqsr &= ~(1 << irq);
+
+		generic_handle_irq(IRQ_BASE_MB93493 + irq, regs);
+	}
+
+	return IRQ_HANDLED;
 }
 
-void __init route_mb93493_irqs(void)
+/*
+ * define an interrupt action for each MB93493 PIC output
+ * - use dev_id to indicate the MB93493 PIC input to output mappings
+ */
+static struct irqaction mb93493_irq[2]  = {
+	[0] = {
+		.handler	= mb93493_interrupt,
+		.flags		= IRQF_DISABLED | IRQF_SHARED,
+		.mask		= CPU_MASK_NONE,
+		.name		= "mb93493.0",
+		.dev_id		= (void *) __addr_MB93493_IQSR(0),
+	},
+	[1] = {
+		.handler	= mb93493_interrupt,
+		.flags		= IRQF_DISABLED | IRQF_SHARED,
+		.mask		= CPU_MASK_NONE,
+		.name		= "mb93493.1",
+		.dev_id		= (void *) __addr_MB93493_IQSR(1),
+	}
+};
+
+/*
+ * initialise the motherboard MB93493's PIC
+ */
+void __init mb93493_init(void)
 {
-	frv_irq_route_external(&frv_mb93493[0], IRQ_CPU_MB93493_0);
-	frv_irq_route_external(&frv_mb93493[1], IRQ_CPU_MB93493_1);
-
-	frv_irq_set_group(&frv_mb93493_irqs);
-
-	mb93493_irq_route(IRQ_MB93493_VDC,		IRQ_MB93493_VDC_ROUTE);
-	mb93493_irq_route(IRQ_MB93493_VCC,		IRQ_MB93493_VCC_ROUTE);
-	mb93493_irq_route(IRQ_MB93493_AUDIO_IN,		IRQ_MB93493_AUDIO_IN_ROUTE);
-	mb93493_irq_route(IRQ_MB93493_I2C_0,		IRQ_MB93493_I2C_0_ROUTE);
-	mb93493_irq_route(IRQ_MB93493_I2C_1,		IRQ_MB93493_I2C_1_ROUTE);
-	mb93493_irq_route(IRQ_MB93493_USB,		IRQ_MB93493_USB_ROUTE);
-	mb93493_irq_route(IRQ_MB93493_LOCAL_BUS,	IRQ_MB93493_LOCAL_BUS_ROUTE);
-	mb93493_irq_route(IRQ_MB93493_PCMCIA,		IRQ_MB93493_PCMCIA_ROUTE);
-	mb93493_irq_route(IRQ_MB93493_GPIO,		IRQ_MB93493_GPIO_ROUTE);
-	mb93493_irq_route(IRQ_MB93493_AUDIO_OUT,	IRQ_MB93493_AUDIO_OUT_ROUTE);
+	int irq;
+
+	for (irq = IRQ_BASE_MB93493 + 0; irq <= IRQ_BASE_MB93493 + 10; irq++)
+		set_irq_chip_and_handler(irq, &frv_mb93493_pic, handle_edge_irq);
+
+	/* the MB93493 drives external IRQ inputs on the CPU PIC */
+	setup_irq(IRQ_CPU_MB93493_0, &mb93493_irq[0]);
+	setup_irq(IRQ_CPU_MB93493_1, &mb93493_irq[1]);
 }
diff --git a/arch/frv/kernel/irq-routing.c b/arch/frv/kernel/irq-routing.c
deleted file mode 100644
index 53886adf47de..000000000000
--- a/arch/frv/kernel/irq-routing.c
+++ /dev/null
@@ -1,291 +0,0 @@
-/* irq-routing.c: IRQ routing
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/random.h>
-#include <linux/init.h>
-#include <linux/serial_reg.h>
-#include <asm/io.h>
-#include <asm/irq-routing.h>
-#include <asm/irc-regs.h>
-#include <asm/serial-regs.h>
-#include <asm/dma.h>
-
-struct irq_level frv_irq_levels[16] = {
-	[0 ... 15] = {
-		.lock	= SPIN_LOCK_UNLOCKED,
-	}
-};
-
-struct irq_group *irq_groups[NR_IRQ_GROUPS];
-
-extern struct irq_group frv_cpu_irqs;
-
-void __init frv_irq_route(struct irq_source *source, int irqlevel)
-{
-	source->level = &frv_irq_levels[irqlevel];
-	source->next = frv_irq_levels[irqlevel].sources;
-	frv_irq_levels[irqlevel].sources = source;
-}
-
-void __init frv_irq_route_external(struct irq_source *source, int irq)
-{
-	int irqlevel = 0;
-
-	switch (irq) {
-	case IRQ_CPU_EXTERNAL0:	irqlevel = IRQ_XIRQ0_LEVEL; break;
-	case IRQ_CPU_EXTERNAL1:	irqlevel = IRQ_XIRQ1_LEVEL; break;
-	case IRQ_CPU_EXTERNAL2:	irqlevel = IRQ_XIRQ2_LEVEL; break;
-	case IRQ_CPU_EXTERNAL3:	irqlevel = IRQ_XIRQ3_LEVEL; break;
-	case IRQ_CPU_EXTERNAL4:	irqlevel = IRQ_XIRQ4_LEVEL; break;
-	case IRQ_CPU_EXTERNAL5:	irqlevel = IRQ_XIRQ5_LEVEL; break;
-	case IRQ_CPU_EXTERNAL6:	irqlevel = IRQ_XIRQ6_LEVEL; break;
-	case IRQ_CPU_EXTERNAL7:	irqlevel = IRQ_XIRQ7_LEVEL; break;
-	default: BUG();
-	}
-
-	source->level = &frv_irq_levels[irqlevel];
-	source->next = frv_irq_levels[irqlevel].sources;
-	frv_irq_levels[irqlevel].sources = source;
-}
-
-void __init frv_irq_set_group(struct irq_group *group)
-{
-	irq_groups[group->first_irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP] = group;
-}
-
-void distribute_irqs(struct irq_group *group, unsigned long irqmask)
-{
-	struct irqaction *action;
-	int irq;
-
-	while (irqmask) {
-		asm("scan %1,gr0,%0" : "=r"(irq) : "r"(irqmask));
-		if (irq < 0 || irq > 31)
-			asm volatile("break");
-		irq = 31 - irq;
-
-		irqmask &= ~(1 << irq);
-		action = group->actions[irq];
-
-		irq += group->first_irq;
-
-		if (action) {
-			int status = 0;
-
-//			if (!(action->flags & IRQF_DISABLED))
-//				local_irq_enable();
-
-			do {
-				status |= action->flags;
-				action->handler(irq, action->dev_id, __frame);
-				action = action->next;
-			} while (action);
-
-			if (status & IRQF_SAMPLE_RANDOM)
-				add_interrupt_randomness(irq);
-			local_irq_disable();
-		}
-	}
-}
-
-/*****************************************************************************/
-/*
- * CPU UART interrupts
- */
-static void frv_cpuuart_doirq(struct irq_source *source)
-{
-//	uint8_t iir = readb(source->muxdata + UART_IIR * 8);
-//	if ((iir & 0x0f) != UART_IIR_NO_INT)
-		distribute_irqs(&frv_cpu_irqs, source->irqmask);
-}
-
-struct irq_source frv_cpuuart[2] = {
-#define __CPUUART(X, A)						\
-	[X] = {							\
-		.muxname	= "uart",			\
-		.muxdata	= (volatile void __iomem *)(unsigned long)A,\
-		.irqmask	= 1 << IRQ_CPU_UART##X,		\
-		.doirq		= frv_cpuuart_doirq,		\
-	}
-
-	__CPUUART(0, UART0_BASE),
-	__CPUUART(1, UART1_BASE),
-};
-
-/*****************************************************************************/
-/*
- * CPU DMA interrupts
- */
-static void frv_cpudma_doirq(struct irq_source *source)
-{
-	uint32_t cstr = readl(source->muxdata + DMAC_CSTRx);
-	if (cstr & DMAC_CSTRx_INT)
-		distribute_irqs(&frv_cpu_irqs, source->irqmask);
-}
-
-struct irq_source frv_cpudma[8] = {
-#define __CPUDMA(X, A)						\
-	[X] = {							\
-		.muxname	= "dma",			\
-		.muxdata	= (volatile void __iomem *)(unsigned long)A,\
-		.irqmask	= 1 << IRQ_CPU_DMA##X,		\
-		.doirq		= frv_cpudma_doirq,		\
-	}
-
-	__CPUDMA(0, 0xfe000900),
-	__CPUDMA(1, 0xfe000980),
-	__CPUDMA(2, 0xfe000a00),
-	__CPUDMA(3, 0xfe000a80),
-	__CPUDMA(4, 0xfe001000),
-	__CPUDMA(5, 0xfe001080),
-	__CPUDMA(6, 0xfe001100),
-	__CPUDMA(7, 0xfe001180),
-};
-
-/*****************************************************************************/
-/*
- * CPU timer interrupts - can't tell whether they've generated an interrupt or not
- */
-static void frv_cputimer_doirq(struct irq_source *source)
-{
-	distribute_irqs(&frv_cpu_irqs, source->irqmask);
-}
-
-struct irq_source frv_cputimer[3] = {
-#define __CPUTIMER(X)						\
-	[X] = {							\
-		.muxname	= "timer",			\
-		.muxdata	= NULL,				\
-		.irqmask	= 1 << IRQ_CPU_TIMER##X,	\
-		.doirq		= frv_cputimer_doirq,		\
-	}
-
-	__CPUTIMER(0),
-	__CPUTIMER(1),
-	__CPUTIMER(2),
-};
-
-/*****************************************************************************/
-/*
- * external CPU interrupts - can't tell directly whether they've generated an interrupt or not
- */
-static void frv_cpuexternal_doirq(struct irq_source *source)
-{
-	distribute_irqs(&frv_cpu_irqs, source->irqmask);
-}
-
-struct irq_source frv_cpuexternal[8] = {
-#define __CPUEXTERNAL(X)					\
-	[X] = {							\
-		.muxname	= "ext",			\
-		.muxdata	= NULL,				\
-		.irqmask	= 1 << IRQ_CPU_EXTERNAL##X,	\
-		.doirq		= frv_cpuexternal_doirq,	\
-	}
-
-	__CPUEXTERNAL(0),
-	__CPUEXTERNAL(1),
-	__CPUEXTERNAL(2),
-	__CPUEXTERNAL(3),
-	__CPUEXTERNAL(4),
-	__CPUEXTERNAL(5),
-	__CPUEXTERNAL(6),
-	__CPUEXTERNAL(7),
-};
-
-#define set_IRR(N,A,B,C,D) __set_IRR(N, (A << 28) | (B << 24) | (C << 20) | (D << 16))
-
-struct irq_group frv_cpu_irqs = {
-	.sources = {
-		[IRQ_CPU_UART0]		= &frv_cpuuart[0],
-		[IRQ_CPU_UART1]		= &frv_cpuuart[1],
-		[IRQ_CPU_TIMER0]	= &frv_cputimer[0],
-		[IRQ_CPU_TIMER1]	= &frv_cputimer[1],
-		[IRQ_CPU_TIMER2]	= &frv_cputimer[2],
-		[IRQ_CPU_DMA0]		= &frv_cpudma[0],
-		[IRQ_CPU_DMA1]		= &frv_cpudma[1],
-		[IRQ_CPU_DMA2]		= &frv_cpudma[2],
-		[IRQ_CPU_DMA3]		= &frv_cpudma[3],
-		[IRQ_CPU_DMA4]		= &frv_cpudma[4],
-		[IRQ_CPU_DMA5]		= &frv_cpudma[5],
-		[IRQ_CPU_DMA6]		= &frv_cpudma[6],
-		[IRQ_CPU_DMA7]		= &frv_cpudma[7],
-		[IRQ_CPU_EXTERNAL0]	= &frv_cpuexternal[0],
-		[IRQ_CPU_EXTERNAL1]	= &frv_cpuexternal[1],
-		[IRQ_CPU_EXTERNAL2]	= &frv_cpuexternal[2],
-		[IRQ_CPU_EXTERNAL3]	= &frv_cpuexternal[3],
-		[IRQ_CPU_EXTERNAL4]	= &frv_cpuexternal[4],
-		[IRQ_CPU_EXTERNAL5]	= &frv_cpuexternal[5],
-		[IRQ_CPU_EXTERNAL6]	= &frv_cpuexternal[6],
-		[IRQ_CPU_EXTERNAL7]	= &frv_cpuexternal[7],
-	},
-};
-
-/*****************************************************************************/
-/*
- * route the CPU's interrupt sources
- */
-void __init route_cpu_irqs(void)
-{
-	frv_irq_set_group(&frv_cpu_irqs);
-
-	__set_IITMR(0, 0x003f0000);	/* DMA0-3, TIMER0-2 IRQ detect levels */
-	__set_IITMR(1, 0x20000000);	/* ERR0-1, UART0-1, DMA4-7 IRQ detect levels */
-
-	/* route UART and error interrupts */
-	frv_irq_route(&frv_cpuuart[0],	IRQ_UART0_LEVEL);
-	frv_irq_route(&frv_cpuuart[1],	IRQ_UART1_LEVEL);
-
-	set_IRR(6, IRQ_GDBSTUB_LEVEL, IRQ_GDBSTUB_LEVEL, IRQ_UART1_LEVEL, IRQ_UART0_LEVEL);
-
-	/* route DMA channel interrupts */
-	frv_irq_route(&frv_cpudma[0],	IRQ_DMA0_LEVEL);
-	frv_irq_route(&frv_cpudma[1],	IRQ_DMA1_LEVEL);
-	frv_irq_route(&frv_cpudma[2],	IRQ_DMA2_LEVEL);
-	frv_irq_route(&frv_cpudma[3],	IRQ_DMA3_LEVEL);
-	frv_irq_route(&frv_cpudma[4],	IRQ_DMA4_LEVEL);
-	frv_irq_route(&frv_cpudma[5],	IRQ_DMA5_LEVEL);
-	frv_irq_route(&frv_cpudma[6],	IRQ_DMA6_LEVEL);
-	frv_irq_route(&frv_cpudma[7],	IRQ_DMA7_LEVEL);
-
-	set_IRR(4, IRQ_DMA3_LEVEL, IRQ_DMA2_LEVEL, IRQ_DMA1_LEVEL, IRQ_DMA0_LEVEL);
-	set_IRR(7, IRQ_DMA7_LEVEL, IRQ_DMA6_LEVEL, IRQ_DMA5_LEVEL, IRQ_DMA4_LEVEL);
-
-	/* route timer interrupts */
-	frv_irq_route(&frv_cputimer[0],	IRQ_TIMER0_LEVEL);
-	frv_irq_route(&frv_cputimer[1],	IRQ_TIMER1_LEVEL);
-	frv_irq_route(&frv_cputimer[2],	IRQ_TIMER2_LEVEL);
-
-	set_IRR(5, 0, IRQ_TIMER2_LEVEL, IRQ_TIMER1_LEVEL, IRQ_TIMER0_LEVEL);
-
-	/* route external interrupts */
-	frv_irq_route(&frv_cpuexternal[0], IRQ_XIRQ0_LEVEL);
-	frv_irq_route(&frv_cpuexternal[1], IRQ_XIRQ1_LEVEL);
-	frv_irq_route(&frv_cpuexternal[2], IRQ_XIRQ2_LEVEL);
-	frv_irq_route(&frv_cpuexternal[3], IRQ_XIRQ3_LEVEL);
-	frv_irq_route(&frv_cpuexternal[4], IRQ_XIRQ4_LEVEL);
-	frv_irq_route(&frv_cpuexternal[5], IRQ_XIRQ5_LEVEL);
-	frv_irq_route(&frv_cpuexternal[6], IRQ_XIRQ6_LEVEL);
-	frv_irq_route(&frv_cpuexternal[7], IRQ_XIRQ7_LEVEL);
-
-	set_IRR(2, IRQ_XIRQ7_LEVEL, IRQ_XIRQ6_LEVEL, IRQ_XIRQ5_LEVEL, IRQ_XIRQ4_LEVEL);
-	set_IRR(3, IRQ_XIRQ3_LEVEL, IRQ_XIRQ2_LEVEL, IRQ_XIRQ1_LEVEL, IRQ_XIRQ0_LEVEL);
-
-#if defined(CONFIG_MB93091_VDK)
-	__set_TM1(0x55550000);		/* XIRQ7-0 all active low */
-#elif defined(CONFIG_MB93093_PDK)
-	__set_TM1(0x15550000);		/* XIRQ7 active high, 6-0 all active low */
-#else
-#error dont know external IRQ trigger levels for this setup
-#endif
-
-} /* end route_cpu_irqs() */
diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c
index 08967010be04..5ac041c7c0a4 100644
--- a/arch/frv/kernel/irq.c
+++ b/arch/frv/kernel/irq.c
@@ -1,6 +1,6 @@
 /* irq.c: FRV IRQ handling
  *
- * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2003, 2004, 2006 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -9,13 +9,6 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-/*
- * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
- *
- * IRQs are in fact implemented a bit like signal handlers for the kernel.
- * Naturally it's not a 1:1 relation, but there are similarities.
- */
-
 #include <linux/ptrace.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
@@ -43,19 +36,16 @@
 #include <asm/delay.h>
 #include <asm/irq.h>
 #include <asm/irc-regs.h>
-#include <asm/irq-routing.h>
 #include <asm/gdb-stub.h>
 
-extern void __init fpga_init(void);
-extern void __init route_mb93493_irqs(void);
-
-static void register_irq_proc (unsigned int irq);
+#define set_IRR(N,A,B,C,D) __set_IRR(N, (A << 28) | (B << 24) | (C << 20) | (D << 16))
 
-/*
- * Special irq handlers.
- */
+extern void __init fpga_init(void);
+#ifdef CONFIG_FUJITSU_MB93493
+extern void __init mb93493_init(void);
+#endif
 
-irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs) { return IRQ_HANDLED; }
+#define __reg16(ADDR) (*(volatile unsigned short *)(ADDR))
 
 atomic_t irq_err_count;
 
@@ -64,215 +54,86 @@ atomic_t irq_err_count;
  */
 int show_interrupts(struct seq_file *p, void *v)
 {
-	struct irqaction *action;
-	struct irq_group *group;
+	int i = *(loff_t *) v, cpu;
+	struct irqaction * action;
 	unsigned long flags;
-	int level, grp, ix, i, j;
-
-	i = *(loff_t *) v;
-
-	switch (i) {
-	case 0:
-		seq_printf(p, "           ");
-		for_each_online_cpu(j)
-			seq_printf(p, "CPU%d       ",j);
-
-		seq_putc(p, '\n');
-		break;
 
-	case 1 ... NR_IRQ_GROUPS * NR_IRQ_ACTIONS_PER_GROUP:
-		local_irq_save(flags);
-
-		grp = (i - 1) / NR_IRQ_ACTIONS_PER_GROUP;
-		group = irq_groups[grp];
-		if (!group)
-			goto skip;
-
-		ix = (i - 1) % NR_IRQ_ACTIONS_PER_GROUP;
-		action = group->actions[ix];
-		if (!action)
-			goto skip;
-
-		seq_printf(p, "%3d: ", i - 1);
-
-#ifndef CONFIG_SMP
-		seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i - 1]);
-#endif
-
-		level = group->sources[ix]->level - frv_irq_levels;
-
-		seq_printf(p, " %12s@%x", group->sources[ix]->muxname, level);
-		seq_printf(p, "  %s", action->name);
-
-		for (action = action->next; action; action = action->next)
-			seq_printf(p, ", %s", action->name);
+	if (i == 0) {
+		char cpuname[12];
 
+		seq_printf(p, "    ");
+		for_each_present_cpu(cpu) {
+			sprintf(cpuname, "CPU%d", cpu);
+			seq_printf(p, " %10s", cpuname);
+		}
 		seq_putc(p, '\n');
-skip:
-		local_irq_restore(flags);
-		break;
+	}
 
-	case NR_IRQ_GROUPS * NR_IRQ_ACTIONS_PER_GROUP + 1:
-		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-		break;
+	if (i < NR_IRQS) {
+		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		action = irq_desc[i].action;
+		if (action) {
+			seq_printf(p, "%3d: ", i);
+			for_each_present_cpu(cpu)
+				seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
+			seq_printf(p, " %10s", irq_desc[i].chip->name ? : "-");
+			seq_printf(p, "  %s", action->name);
+			for (action = action->next;
+			     action;
+			     action = action->next)
+				seq_printf(p, ", %s", action->name);
+
+			seq_putc(p, '\n');
+		}
 
-	default:
-		break;
+		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+	} else if (i == NR_IRQS) {
+		seq_printf(p, "Err: %10u\n", atomic_read(&irq_err_count));
 	}
 
 	return 0;
 }
 
-
 /*
- * Generic enable/disable code: this just calls
- * down into the PIC-specific version for the actual
- * hardware disable after having gotten the irq
- * controller lock.
+ * on-CPU PIC operations
  */
-
-/**
- *	disable_irq_nosync - disable an irq without waiting
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line.  Disables and Enables are
- *	nested.
- *	Unlike disable_irq(), this function does not ensure existing
- *	instances of the IRQ handler have completed before returning.
- *
- *	This function may be called from IRQ context.
- */
-
-void disable_irq_nosync(unsigned int irq)
+static void frv_cpupic_ack(unsigned int irqlevel)
 {
-	struct irq_source *source;
-	struct irq_group *group;
-	struct irq_level *level;
-	unsigned long flags;
-	int idx = irq & (NR_IRQ_ACTIONS_PER_GROUP - 1);
-
-	group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP];
-	if (!group)
-		BUG();
-
-	source = group->sources[idx];
-	if (!source)
-		BUG();
-
-	level = source->level;
-
-	spin_lock_irqsave(&level->lock, flags);
-
-	if (group->control) {
-		if (!group->disable_cnt[idx]++)
-			group->control(group, idx, 0);
-	} else if (!level->disable_count++) {
-		__set_MASK(level - frv_irq_levels);
-	}
-
-	spin_unlock_irqrestore(&level->lock, flags);
+	__clr_RC(irqlevel);
+	__clr_IRL();
 }
 
-EXPORT_SYMBOL(disable_irq_nosync);
-
-/**
- *	disable_irq - disable an irq and wait for completion
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line.  Enables and Disables are
- *	nested.
- *	This function waits for any pending IRQ handlers for this interrupt
- *	to complete before returning. If you use this function while
- *	holding a resource the IRQ handler may need you will deadlock.
- *
- *	This function may be called - with care - from IRQ context.
- */
-
-void disable_irq(unsigned int irq)
+static void frv_cpupic_mask(unsigned int irqlevel)
 {
-	disable_irq_nosync(irq);
-
-#ifdef CONFIG_SMP
-	if (!local_irq_count(smp_processor_id())) {
-		do {
-			barrier();
-		} while (irq_desc[irq].status & IRQ_INPROGRESS);
-	}
-#endif
+	__set_MASK(irqlevel);
 }
 
-EXPORT_SYMBOL(disable_irq);
-
-/**
- *	enable_irq - enable handling of an irq
- *	@irq: Interrupt to enable
- *
- *	Undoes the effect of one call to disable_irq().  If this
- *	matches the last disable, processing of interrupts on this
- *	IRQ line is re-enabled.
- *
- *	This function may be called from IRQ context.
- */
-
-void enable_irq(unsigned int irq)
+static void frv_cpupic_mask_ack(unsigned int irqlevel)
 {
-	struct irq_source *source;
-	struct irq_group *group;
-	struct irq_level *level;
-	unsigned long flags;
-	int idx = irq & (NR_IRQ_ACTIONS_PER_GROUP - 1);
-	int count;
-
-	group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP];
-	if (!group)
-		BUG();
-
-	source = group->sources[idx];
-	if (!source)
-		BUG();
-
-	level = source->level;
-
-	spin_lock_irqsave(&level->lock, flags);
-
-	if (group->control)
-		count = group->disable_cnt[idx];
-	else
-		count = level->disable_count;
-
-	switch (count) {
-	case 1:
-		if (group->control) {
-			if (group->actions[idx])
-				group->control(group, idx, 1);
-		} else {
-			if (level->usage)
-				__clr_MASK(level - frv_irq_levels);
-		}
-		/* fall-through */
-
-	default:
-		count--;
-		break;
-
-	case 0:
-		printk("enable_irq(%u) unbalanced from %p\n", irq, __builtin_return_address(0));
-	}
+	__set_MASK(irqlevel);
+	__clr_RC(irqlevel);
+	__clr_IRL();
+}
 
-	if (group->control)
-		group->disable_cnt[idx] = count;
-	else
-		level->disable_count = count;
+static void frv_cpupic_unmask(unsigned int irqlevel)
+{
+	__clr_MASK(irqlevel);
+}
 
-	spin_unlock_irqrestore(&level->lock, flags);
+static void frv_cpupic_end(unsigned int irqlevel)
+{
+	__clr_MASK(irqlevel);
 }
 
-EXPORT_SYMBOL(enable_irq);
+static struct irq_chip frv_cpu_pic = {
+	.name		= "cpu",
+	.ack		= frv_cpupic_ack,
+	.mask		= frv_cpupic_mask,
+	.mask_ack	= frv_cpupic_mask_ack,
+	.unmask		= frv_cpupic_unmask,
+	.end		= frv_cpupic_end,
+};
 
-/*****************************************************************************/
 /*
  * handles all normal device IRQ's
  * - registers are referred to by the __frame variable (GR28)
@@ -281,463 +142,65 @@ EXPORT_SYMBOL(enable_irq);
  */
 asmlinkage void do_IRQ(void)
 {
-	struct irq_source *source;
-	int level, cpu;
-
 	irq_enter();
-
-	level = (__frame->tbr >> 4) & 0xf;
-	cpu = smp_processor_id();
-
-	if ((unsigned long) __frame - (unsigned long) (current + 1) < 512)
-		BUG();
-
-	__set_MASK(level);
-	__clr_RC(level);
-	__clr_IRL();
-
-	kstat_this_cpu.irqs[level]++;
-
-	for (source = frv_irq_levels[level].sources; source; source = source->next)
-		source->doirq(source);
-
-	__clr_MASK(level);
-
+	generic_handle_irq(__get_IRL(), __frame);
 	irq_exit();
+}
 
-} /* end do_IRQ() */
-
-/*****************************************************************************/
 /*
  * handles all NMIs when not co-opted by the debugger
  * - registers are referred to by the __frame variable (GR28)
  */
 asmlinkage void do_NMI(void)
 {
-} /* end do_NMI() */
-
-/*****************************************************************************/
-/**
- *	request_irq - allocate an interrupt line
- *	@irq: Interrupt line to allocate
- *	@handler: Function to be called when the IRQ occurs
- *	@irqflags: Interrupt type flags
- *	@devname: An ascii name for the claiming device
- *	@dev_id: A cookie passed back to the handler function
- *
- *	This call allocates interrupt resources and enables the
- *	interrupt line and IRQ handling. From the point this
- *	call is made your handler function may be invoked. Since
- *	your handler function must clear any interrupt the board
- *	raises, you must take care both to initialise your hardware
- *	and to set up the interrupt handler in the right order.
- *
- *	Dev_id must be globally unique. Normally the address of the
- *	device data structure is used as the cookie. Since the handler
- *	receives this value it makes sense to use it.
- *
- *	If your interrupt is shared you must pass a non NULL dev_id
- *	as this is required when freeing the interrupt.
- *
- *	Flags:
- *
- *	IRQF_SHARED		Interrupt is shared
- *
- *	IRQF_DISABLED	Disable local interrupts while processing
- *
- *	IRQF_SAMPLE_RANDOM	The interrupt can be used for entropy
- *
- */
-
-int request_irq(unsigned int irq,
-		irqreturn_t (*handler)(int, void *, struct pt_regs *),
-		unsigned long irqflags,
-		const char * devname,
-		void *dev_id)
-{
-	int retval;
-	struct irqaction *action;
-
-#if 1
-	/*
-	 * Sanity-check: shared interrupts should REALLY pass in
-	 * a real dev-ID, otherwise we'll have trouble later trying
-	 * to figure out which interrupt is which (messes up the
-	 * interrupt freeing logic etc).
-	 */
-	if (irqflags & IRQF_SHARED) {
-		if (!dev_id)
-			printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n",
-			       devname, (&irq)[-1]);
-	}
-#endif
-
-	if ((irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP) >= NR_IRQ_GROUPS)
-		return -EINVAL;
-	if (!handler)
-		return -EINVAL;
-
-	action = (struct irqaction *) kmalloc(sizeof(struct irqaction), GFP_KERNEL);
-	if (!action)
-		return -ENOMEM;
-
-	action->handler = handler;
-	action->flags = irqflags;
-	action->mask = CPU_MASK_NONE;
-	action->name = devname;
-	action->next = NULL;
-	action->dev_id = dev_id;
-
-	retval = setup_irq(irq, action);
-	if (retval)
-		kfree(action);
-	return retval;
-}
-
-EXPORT_SYMBOL(request_irq);
-
-/**
- *	free_irq - free an interrupt
- *	@irq: Interrupt line to free
- *	@dev_id: Device identity to free
- *
- *	Remove an interrupt handler. The handler is removed and if the
- *	interrupt line is no longer in use by any driver it is disabled.
- *	On a shared IRQ the caller must ensure the interrupt is disabled
- *	on the card it drives before calling this function. The function
- *	does not return until any executing interrupts for this IRQ
- *	have completed.
- *
- *	This function may be called from interrupt context.
- *
- *	Bugs: Attempting to free an irq in a handler for the same irq hangs
- *	      the machine.
- */
-
-void free_irq(unsigned int irq, void *dev_id)
-{
-	struct irq_source *source;
-	struct irq_group *group;
-	struct irq_level *level;
-	struct irqaction **p, **pp;
-	unsigned long flags;
-
-	if ((irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP) >= NR_IRQ_GROUPS)
-		return;
-
-	group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP];
-	if (!group)
-		BUG();
-
-	source = group->sources[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)];
-	if (!source)
-		BUG();
-
-	level = source->level;
-	p = &group->actions[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)];
-
-	spin_lock_irqsave(&level->lock, flags);
-
-	for (pp = p; *pp; pp = &(*pp)->next) {
-		struct irqaction *action = *pp;
-
-		if (action->dev_id != dev_id)
-			continue;
-
-		/* found it - remove from the list of entries */
-		*pp = action->next;
-
-		level->usage--;
-
-		if (p == pp && group->control)
-			group->control(group, irq & (NR_IRQ_ACTIONS_PER_GROUP - 1), 0);
-
-		if (level->usage == 0)
-			__set_MASK(level - frv_irq_levels);
-
-		spin_unlock_irqrestore(&level->lock,flags);
-
-#ifdef CONFIG_SMP
-		/* Wait to make sure it's not being used on another CPU */
-		while (desc->status & IRQ_INPROGRESS)
-			barrier();
-#endif
-		kfree(action);
-		return;
-	}
-}
-
-EXPORT_SYMBOL(free_irq);
-
-/*
- * IRQ autodetection code..
- *
- * This depends on the fact that any interrupt that comes in on to an
- * unassigned IRQ will cause GxICR_DETECT to be set
- */
-
-static DECLARE_MUTEX(probe_sem);
-
-/**
- *	probe_irq_on	- begin an interrupt autodetect
- *
- *	Commence probing for an interrupt. The interrupts are scanned
- *	and a mask of potential interrupt lines is returned.
- *
- */
-
-unsigned long probe_irq_on(void)
-{
-	down(&probe_sem);
-	return 0;
 }
 
-EXPORT_SYMBOL(probe_irq_on);
-
 /*
- * Return a mask of triggered interrupts (this
- * can handle only legacy ISA interrupts).
- */
-
-/**
- *	probe_irq_mask - scan a bitmap of interrupt lines
- *	@val:	mask of interrupts to consider
- *
- *	Scan the ISA bus interrupt lines and return a bitmap of
- *	active interrupts. The interrupt probe logic state is then
- *	returned to its previous value.
- *
- *	Note: we need to scan all the irq's even though we will
- *	only return ISA irq numbers - just so that we reset them
- *	all to a known state.
- */
-unsigned int probe_irq_mask(unsigned long xmask)
-{
-	up(&probe_sem);
-	return 0;
-}
-
-EXPORT_SYMBOL(probe_irq_mask);
-
-/*
- * Return the one interrupt that triggered (this can
- * handle any interrupt source).
- */
-
-/**
- *	probe_irq_off	- end an interrupt autodetect
- *	@xmask: mask of potential interrupts (unused)
- *
- *	Scans the unused interrupt lines and returns the line which
- *	appears to have triggered the interrupt. If no interrupt was
- *	found then zero is returned. If more than one interrupt is
- *	found then minus the first candidate is returned to indicate
- *	their is doubt.
- *
- *	The interrupt probe logic state is returned to its previous
- *	value.
- *
- *	BUGS: When used in a module (which arguably shouldnt happen)
- *	nothing prevents two IRQ probe callers from overlapping. The
- *	results of this are non-optimal.
+ * initialise the interrupt system
  */
-
-int probe_irq_off(unsigned long xmask)
-{
-	up(&probe_sem);
-	return -1;
-}
-
-EXPORT_SYMBOL(probe_irq_off);
-
-/* this was setup_x86_irq but it seems pretty generic */
-int setup_irq(unsigned int irq, struct irqaction *new)
-{
-	struct irq_source *source;
-	struct irq_group *group;
-	struct irq_level *level;
-	struct irqaction **p, **pp;
-	unsigned long flags;
-
-	group = irq_groups[irq >> NR_IRQ_LOG2_ACTIONS_PER_GROUP];
-	if (!group)
-		BUG();
-
-	source = group->sources[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)];
-	if (!source)
-		BUG();
-
-	level = source->level;
-
-	p = &group->actions[irq & (NR_IRQ_ACTIONS_PER_GROUP - 1)];
-
-	/*
-	 * Some drivers like serial.c use request_irq() heavily,
-	 * so we have to be careful not to interfere with a
-	 * running system.
-	 */
-	if (new->flags & IRQF_SAMPLE_RANDOM) {
-		/*
-		 * This function might sleep, we want to call it first,
-		 * outside of the atomic block.
-		 * Yes, this might clear the entropy pool if the wrong
-		 * driver is attempted to be loaded, without actually
-		 * installing a new handler, but is this really a problem,
-		 * only the sysadmin is able to do this.
-		 */
-		rand_initialize_irq(irq);
-	}
-
-	/* must juggle the interrupt processing stuff with interrupts disabled */
-	spin_lock_irqsave(&level->lock, flags);
-
-	/* can't share interrupts unless all parties agree to */
-	if (level->usage != 0 && !(level->flags & new->flags & IRQF_SHARED)) {
-		spin_unlock_irqrestore(&level->lock,flags);
-		return -EBUSY;
-	}
-
-	/* add new interrupt at end of irq queue */
-	pp = p;
-	while (*pp)
-		pp = &(*pp)->next;
-
-	*pp = new;
-
-	level->usage++;
-	level->flags = new->flags;
-
-	/* turn the interrupts on */
-	if (level->usage == 1)
-		__clr_MASK(level - frv_irq_levels);
-
-	if (p == pp && group->control)
-		group->control(group, irq & (NR_IRQ_ACTIONS_PER_GROUP - 1), 1);
-
-	spin_unlock_irqrestore(&level->lock, flags);
-	register_irq_proc(irq);
-	return 0;
-}
-
-static struct proc_dir_entry * root_irq_dir;
-static struct proc_dir_entry * irq_dir [NR_IRQS];
-
-#define HEX_DIGITS 8
-
-static unsigned int parse_hex_value (const char __user *buffer,
-				     unsigned long count, unsigned long *ret)
-{
-	unsigned char hexnum [HEX_DIGITS];
-	unsigned long value;
-	int i;
-
-	if (!count)
-		return -EINVAL;
-	if (count > HEX_DIGITS)
-		count = HEX_DIGITS;
-	if (copy_from_user(hexnum, buffer, count))
-		return -EFAULT;
-
-	/*
-	 * Parse the first 8 characters as a hex string, any non-hex char
-	 * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same.
-	 */
-	value = 0;
-
-	for (i = 0; i < count; i++) {
-		unsigned int c = hexnum[i];
-
-		switch (c) {
-			case '0' ... '9': c -= '0'; break;
-			case 'a' ... 'f': c -= 'a'-10; break;
-			case 'A' ... 'F': c -= 'A'-10; break;
-		default:
-			goto out;
-		}
-		value = (value << 4) | c;
-	}
-out:
-	*ret = value;
-	return 0;
-}
-
-
-static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
-			int count, int *eof, void *data)
-{
-	unsigned long *mask = (unsigned long *) data;
-	if (count < HEX_DIGITS+1)
-		return -EINVAL;
-	return sprintf (page, "%08lx\n", *mask);
-}
-
-static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffer,
-					unsigned long count, void *data)
-{
-	unsigned long *mask = (unsigned long *) data, full_count = count, err;
-	unsigned long new_value;
-
-	show_state();
-	err = parse_hex_value(buffer, count, &new_value);
-	if (err)
-		return err;
-
-	*mask = new_value;
-	return full_count;
-}
-
-#define MAX_NAMELEN 10
-
-static void register_irq_proc (unsigned int irq)
-{
-	char name [MAX_NAMELEN];
-
-	if (!root_irq_dir || irq_dir[irq])
-		return;
-
-	memset(name, 0, MAX_NAMELEN);
-	sprintf(name, "%d", irq);
-
-	/* create /proc/irq/1234 */
-	irq_dir[irq] = proc_mkdir(name, root_irq_dir);
-}
-
-unsigned long prof_cpu_mask = -1;
-
-void init_irq_proc (void)
+void __init init_IRQ(void)
 {
-	struct proc_dir_entry *entry;
-	int i;
+	int level;
 
-	/* create /proc/irq */
-	root_irq_dir = proc_mkdir("irq", NULL);
+	for (level = 1; level <= 14; level++)
+		set_irq_chip_and_handler(level, &frv_cpu_pic,
+					 handle_level_irq);
 
-	/* create /proc/irq/prof_cpu_mask */
-	entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
-	if (!entry)
-	    return;
+	set_irq_handler(IRQ_CPU_TIMER0, handle_edge_irq);
 
-	entry->nlink = 1;
-	entry->data = (void *)&prof_cpu_mask;
-	entry->read_proc = prof_cpu_mask_read_proc;
-	entry->write_proc = prof_cpu_mask_write_proc;
-
-	/*
-	 * Create entries for all existing IRQs.
+	/* set the trigger levels for internal interrupt sources
+	 * - timers all falling-edge
+	 * - ERR0 is rising-edge
+	 * - all others are high-level
 	 */
-	for (i = 0; i < NR_IRQS; i++)
-		register_irq_proc(i);
-}
+	__set_IITMR(0, 0x003f0000);	/* DMA0-3, TIMER0-2 */
+	__set_IITMR(1, 0x20000000);	/* ERR0-1, UART0-1, DMA4-7 */
+
+	/* route internal interrupts */
+	set_IRR(4, IRQ_DMA3_LEVEL, IRQ_DMA2_LEVEL, IRQ_DMA1_LEVEL,
+		IRQ_DMA0_LEVEL);
+	set_IRR(5, 0, IRQ_TIMER2_LEVEL, IRQ_TIMER1_LEVEL, IRQ_TIMER0_LEVEL);
+	set_IRR(6, IRQ_GDBSTUB_LEVEL, IRQ_GDBSTUB_LEVEL,
+		IRQ_UART1_LEVEL, IRQ_UART0_LEVEL);
+	set_IRR(7, IRQ_DMA7_LEVEL, IRQ_DMA6_LEVEL, IRQ_DMA5_LEVEL,
+		IRQ_DMA4_LEVEL);
+
+	/* route external interrupts */
+	set_IRR(2, IRQ_XIRQ7_LEVEL, IRQ_XIRQ6_LEVEL, IRQ_XIRQ5_LEVEL,
+		IRQ_XIRQ4_LEVEL);
+	set_IRR(3, IRQ_XIRQ3_LEVEL, IRQ_XIRQ2_LEVEL, IRQ_XIRQ1_LEVEL,
+		IRQ_XIRQ0_LEVEL);
+
+#if defined(CONFIG_MB93091_VDK)
+	__set_TM1(0x55550000);		/* XIRQ7-0 all active low */
+#elif defined(CONFIG_MB93093_PDK)
+	__set_TM1(0x15550000);		/* XIRQ7 active high, 6-0 all active low */
+#else
+#error dont know external IRQ trigger levels for this setup
+#endif
 
-/*****************************************************************************/
-/*
- * initialise the interrupt system
- */
-void __init init_IRQ(void)
-{
-	route_cpu_irqs();
 	fpga_init();
 #ifdef CONFIG_FUJITSU_MB93493
-	route_mb93493_irqs();
+	mb93493_init();
 #endif
-} /* end init_IRQ() */
+}
diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c
index af08ccd4ed6e..d96a57e5f030 100644
--- a/arch/frv/kernel/setup.c
+++ b/arch/frv/kernel/setup.c
@@ -43,7 +43,6 @@
 #include <asm/mb-regs.h>
 #include <asm/mb93493-regs.h>
 #include <asm/gdb-stub.h>
-#include <asm/irq-routing.h>
 #include <asm/io.h>
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c
index 68a77fe3bb40..3d0284bccb94 100644
--- a/arch/frv/kernel/time.c
+++ b/arch/frv/kernel/time.c
@@ -26,7 +26,6 @@
 #include <asm/timer-regs.h>
 #include <asm/mb-regs.h>
 #include <asm/mb86943a.h>
-#include <asm/irq-routing.h>
 
 #include <linux/timex.h>
 
diff --git a/arch/frv/mb93090-mb00/pci-irq.c b/arch/frv/mb93090-mb00/pci-irq.c
index 2278c80bd88c..ba587523c015 100644
--- a/arch/frv/mb93090-mb00/pci-irq.c
+++ b/arch/frv/mb93090-mb00/pci-irq.c
@@ -15,7 +15,6 @@
 
 #include <asm/io.h>
 #include <asm/smp.h>
-#include <asm/irq-routing.h>
 
 #include "pci-frv.h"
 
diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c
index b5b4286f9dd4..3f3a0ed3539b 100644
--- a/arch/frv/mm/init.c
+++ b/arch/frv/mm/init.c
@@ -98,7 +98,7 @@ void show_mem(void)
  */
 void __init paging_init(void)
 {
-	unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+	unsigned long zones_size[MAX_NR_ZONES] = {0, };
 
 	/* allocate some pages for kernel housekeeping tasks */
 	empty_bad_page_table	= (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index d3d40bdc2d6a..e4f4199f97ab 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -138,7 +138,7 @@ void paging_init(void)
 #endif
 
 	{
-		unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+		unsigned long zones_size[MAX_NR_ZONES] = {0, };
 
 		zones_size[ZONE_DMA]     = 0 >> PAGE_SHIFT;
 		zones_size[ZONE_NORMAL]  = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT;
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index b2751eadbc56..6189b0c28d6f 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -494,7 +494,7 @@ config HIGHMEM64G
 endchoice
 
 choice
-	depends on EXPERIMENTAL && !X86_PAE
+	depends on EXPERIMENTAL
 	prompt "Memory split" if EMBEDDED
 	default VMSPLIT_3G
 	help
@@ -516,6 +516,7 @@ choice
 	config VMSPLIT_3G
 		bool "3G/1G user/kernel split"
 	config VMSPLIT_3G_OPT
+		depends on !HIGHMEM
 		bool "3G/1G user/kernel split (for full 1G low memory)"
 	config VMSPLIT_2G
 		bool "2G/2G user/kernel split"
@@ -794,6 +795,7 @@ config HOTPLUG_CPU
 config COMPAT_VDSO
 	bool "Compat VDSO support"
 	default y
+	depends on !PARAVIRT
 	help
 	  Map the VDSO to the predictable old-style address too.
 	---help---
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 8591f2fa920c..ff9ce4b5eaa8 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -1154,9 +1154,11 @@ out:
 
 static void set_time(void)
 {
+	struct timespec ts;
 	if (got_clock_diff) {	/* Must know time zone in order to set clock */
-		xtime.tv_sec = get_cmos_time() + clock_cmos_diff;
-		xtime.tv_nsec = 0; 
+		ts.tv_sec = get_cmos_time() + clock_cmos_diff;
+		ts.tv_nsec = 0;
+		do_settimeofday(&ts);
 	} 
 }
 
@@ -1232,13 +1234,8 @@ static int suspend(int vetoable)
 	restore_processor_state();
 
 	local_irq_disable();
-	write_seqlock(&xtime_lock);
-	spin_lock(&i8253_lock);
-	reinit_timer();
 	set_time();
-
-	spin_unlock(&i8253_lock);
-	write_sequnlock(&xtime_lock);
+	reinit_timer();
 
 	if (err == APM_NO_ERROR)
 		err = APM_SUCCESS;
@@ -1365,9 +1362,7 @@ static void check_events(void)
 			ignore_bounce = 1;
 			if ((event != APM_NORMAL_RESUME)
 			    || (ignore_normal_resume == 0)) {
-				write_seqlock_irq(&xtime_lock);
 				set_time();
-				write_sequnlock_irq(&xtime_lock);
 				device_resume();
 				pm_send_all(PM_RESUME, (void *)0);
 				queue_event(event, NULL);
@@ -1383,9 +1378,7 @@ static void check_events(void)
 			break;
 
 		case APM_UPDATE_TIME:
-			write_seqlock_irq(&xtime_lock);
 			set_time();
-			write_sequnlock_irq(&xtime_lock);
 			break;
 
 		case APM_CRITICAL_SUSPEND:
@@ -2339,6 +2332,7 @@ static int __init apm_init(void)
 	ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD);
 	if (ret < 0) {
 		printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n");
+		remove_proc_entry("apm", NULL);
 		return -ENOMEM;
 	}
 
@@ -2348,7 +2342,13 @@ static int __init apm_init(void)
 		return 0;
 	}
 
-	misc_register(&apm_device);
+	/*
+	 * Note we don't actually care if the misc_device cannot be registered.
+	 * this driver can do its job without it, even if userspace can't
+	 * control it.  just log the error
+	 */
+	if (misc_register(&apm_device))
+		printk(KERN_WARNING "apm: Could not register misc device.\n");
 
 	if (HZ != 100)
 		idle_period = (idle_period * HZ) / 100;
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index 169ac8e0db68..0b61eed8bbd8 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -243,7 +243,7 @@ static DEFINE_SPINLOCK(set_atomicity_lock);
  * has been called.
  */
 
-static void prepare_set(void)
+static void prepare_set(void) __acquires(set_atomicity_lock)
 {
 	unsigned long cr0;
 
@@ -274,7 +274,7 @@ static void prepare_set(void)
 	mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi);
 }
 
-static void post_set(void)
+static void post_set(void) __releases(set_atomicity_lock)
 {
 	/*  Flush TLBs (no need to flush caches - they are disabled)  */
 	__flush_tlb();
diff --git a/arch/i386/kernel/efi_stub.S b/arch/i386/kernel/efi_stub.S
index d3ee73a3eee3..ef00bb77d7e4 100644
--- a/arch/i386/kernel/efi_stub.S
+++ b/arch/i386/kernel/efi_stub.S
@@ -7,7 +7,6 @@
 
 #include <linux/linkage.h>
 #include <asm/page.h>
-#include <asm/pgtable.h>
 
 /*
  * efi_call_phys(void *, ...) is a function with variable parameters.
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index 54cfeabbc5e4..84278e0093a2 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -145,14 +145,10 @@ real_mode_gdt_entries [3] =
 	0x000092000100ffffULL	/* 16-bit real-mode 64k data at 0x00000100 */
 };
 
-static struct
-{
-	unsigned short       size __attribute__ ((packed));
-	unsigned long long * base __attribute__ ((packed));
-}
-real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries },
-real_mode_idt = { 0x3ff, NULL },
-no_idt = { 0, NULL };
+static struct Xgt_desc_struct
+real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries },
+real_mode_idt = { 0x3ff, 0 },
+no_idt = { 0, 0 };
 
 
 /* This is 16-bit protected mode code to disable paging and the cache,
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index f1682206d304..16d99444cf66 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -53,6 +53,7 @@
 #include <asm/apic.h>
 #include <asm/e820.h>
 #include <asm/mpspec.h>
+#include <asm/mmzone.h>
 #include <asm/setup.h>
 #include <asm/arch_hooks.h>
 #include <asm/sections.h>
@@ -934,6 +935,24 @@ static void __init parse_cmdline_early (char ** cmdline_p)
 }
 
 /*
+ * reservetop=size reserves a hole at the top of the kernel address space which
+ * a hypervisor can load into later.  Needed for dynamically loaded hypervisors,
+ * so relocating the fixmap can be done before paging initialization.
+ */
+static int __init parse_reservetop(char *arg)
+{
+	unsigned long address;
+
+	if (!arg)
+		return -EINVAL;
+
+	address = memparse(arg, &arg);
+	reserve_top_address(address);
+	return 0;
+}
+early_param("reservetop", parse_reservetop);
+
+/*
  * Callback for efi_memory_walk.
  */
 static int __init
@@ -1181,7 +1200,7 @@ static unsigned long __init setup_memory(void)
 
 void __init zone_sizes_init(void)
 {
-	unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+	unsigned long zones_size[MAX_NR_ZONES] = { 0, };
 	unsigned int max_dma, low;
 
 	max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
@@ -1258,7 +1277,7 @@ void __init setup_bootmem_allocator(void)
 	 */
 	find_smp_config();
 #endif
-
+	numa_kva_reserve();
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (LOADER_TYPE && INITRD_START) {
 		if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index c10789d7a9d3..465188e2d701 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -634,3 +634,69 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs)
 	}
 }
 
+/*
+ * this function sends a 'generic call function' IPI to one other CPU
+ * in the system.
+ *
+ * cpu is a standard Linux logical CPU number.
+ */
+static void
+__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+				int nonatomic, int wait)
+{
+	struct call_data_struct data;
+	int cpus = 1;
+
+	data.func = func;
+	data.info = info;
+	atomic_set(&data.started, 0);
+	data.wait = wait;
+	if (wait)
+		atomic_set(&data.finished, 0);
+
+	call_data = &data;
+	wmb();
+	/* Send a message to all other CPUs and wait for them to respond */
+	send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
+
+	/* Wait for response */
+	while (atomic_read(&data.started) != cpus)
+		cpu_relax();
+
+	if (!wait)
+		return;
+
+	while (atomic_read(&data.finished) != cpus)
+		cpu_relax();
+}
+
+/*
+ * smp_call_function_single - Run a function on another CPU
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @nonatomic: Currently unused.
+ * @wait: If true, wait until function has completed on other CPUs.
+ *
+ * Retrurns 0 on success, else a negative status code.
+ *
+ * Does not return until the remote CPU is nearly ready to execute <func>
+ * or is or has executed.
+ */
+
+int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+			int nonatomic, int wait)
+{
+	/* prevent preemption and reschedule on another processor */
+	int me = get_cpu();
+	if (cpu == me) {
+		WARN_ON(1);
+		put_cpu();
+		return -EBUSY;
+	}
+	spin_lock_bh(&call_lock);
+	__smp_call_function_single(cpu, func, info, nonatomic, wait);
+	spin_unlock_bh(&call_lock);
+	put_cpu();
+	return 0;
+}
+EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index f948419c888a..efe07990e7fc 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -642,9 +642,13 @@ static void map_cpu_to_logical_apicid(void)
 {
 	int cpu = smp_processor_id();
 	int apicid = logical_smp_processor_id();
+	int node = apicid_to_node(apicid);
+
+	if (!node_online(node))
+		node = first_online_node;
 
 	cpu_2_logical_apicid[cpu] = apicid;
-	map_cpu_to_node(cpu, apicid_to_node(apicid));
+	map_cpu_to_node(cpu, node);
 }
 
 static void unmap_cpu_to_logical_apicid(int cpu)
diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c
index b1809c9a0899..83db411b3aa7 100644
--- a/arch/i386/kernel/srat.c
+++ b/arch/i386/kernel/srat.c
@@ -42,7 +42,7 @@
 #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) 
 static u8 pxm_bitmap[PXM_BITMAP_LEN];	/* bitmap of proximity domains */
 
-#define MAX_CHUNKS_PER_NODE	4
+#define MAX_CHUNKS_PER_NODE	3
 #define MAXCHUNKS		(MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
 struct node_memory_chunk_s {
 	unsigned long	start_pfn;
@@ -135,9 +135,6 @@ static void __init parse_memory_affinity_structure (char *sratp)
 		 "enabled and removable" : "enabled" ) );
 }
 
-#if MAX_NR_ZONES != 4
-#error "MAX_NR_ZONES != 4, chunk_to_zone requires review"
-#endif
 /* Take a chunk of pages from page frame cstart to cend and count the number
  * of pages in each zone, returned via zones[].
  */
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index edd00f6cee37..1302e4ab3c4f 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -270,16 +270,19 @@ void notify_arch_cmos_timer(void)
 	mod_timer(&sync_cmos_timer, jiffies + 1);
 }
 
-static long clock_cmos_diff, sleep_start;
+static long clock_cmos_diff;
+static unsigned long sleep_start;
 
 static int timer_suspend(struct sys_device *dev, pm_message_t state)
 {
 	/*
 	 * Estimate time zone so that set_time can update the clock
 	 */
-	clock_cmos_diff = -get_cmos_time();
+	unsigned long ctime =  get_cmos_time();
+
+	clock_cmos_diff = -ctime;
 	clock_cmos_diff += get_seconds();
-	sleep_start = get_cmos_time();
+	sleep_start = ctime;
 	return 0;
 }
 
@@ -287,18 +290,29 @@ static int timer_resume(struct sys_device *dev)
 {
 	unsigned long flags;
 	unsigned long sec;
-	unsigned long sleep_length;
-
+	unsigned long ctime = get_cmos_time();
+	long sleep_length = (ctime - sleep_start) * HZ;
+	struct timespec ts;
+
+	if (sleep_length < 0) {
+		printk(KERN_WARNING "CMOS clock skew detected in timer resume!\n");
+		/* The time after the resume must not be earlier than the time
+		 * before the suspend or some nasty things will happen
+		 */
+		sleep_length = 0;
+		ctime = sleep_start;
+	}
 #ifdef CONFIG_HPET_TIMER
 	if (is_hpet_enabled())
 		hpet_reenable();
 #endif
 	setup_pit_timer();
-	sec = get_cmos_time() + clock_cmos_diff;
-	sleep_length = (get_cmos_time() - sleep_start) * HZ;
+
+	sec = ctime + clock_cmos_diff;
+	ts.tv_sec = sec;
+	ts.tv_nsec = 0;
+	do_settimeofday(&ts);
 	write_seqlock_irqsave(&xtime_lock, flags);
-	xtime.tv_sec = sec;
-	xtime.tv_nsec = 0;
 	jiffies_64 += sleep_length;
 	wall_jiffies += sleep_length;
 	write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -334,10 +348,11 @@ extern void (*late_time_init)(void);
 /* Duplicate of time_init() below, with hpet_enable part added */
 static void __init hpet_time_init(void)
 {
-	xtime.tv_sec = get_cmos_time();
-	xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
-	set_normalized_timespec(&wall_to_monotonic,
-		-xtime.tv_sec, -xtime.tv_nsec);
+	struct timespec ts;
+	ts.tv_sec = get_cmos_time();
+	ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+
+	do_settimeofday(&ts);
 
 	if ((hpet_enable() >= 0) && hpet_use_timer) {
 		printk("Using HPET for base-timer\n");
@@ -349,6 +364,7 @@ static void __init hpet_time_init(void)
 
 void __init time_init(void)
 {
+	struct timespec ts;
 #ifdef CONFIG_HPET_TIMER
 	if (is_hpet_capable()) {
 		/*
@@ -359,10 +375,10 @@ void __init time_init(void)
 		return;
 	}
 #endif
-	xtime.tv_sec = get_cmos_time();
-	xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
-	set_normalized_timespec(&wall_to_monotonic,
-		-xtime.tv_sec, -xtime.tv_nsec);
+	ts.tv_sec = get_cmos_time();
+	ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+
+	do_settimeofday(&ts);
 
 	time_init_hook();
 }
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
index 14a1376fedd1..6bf14a4e995e 100644
--- a/arch/i386/kernel/time_hpet.c
+++ b/arch/i386/kernel/time_hpet.c
@@ -301,23 +301,25 @@ int hpet_rtc_timer_init(void)
 		hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
 
 	local_irq_save(flags);
+
 	cnt = hpet_readl(HPET_COUNTER);
 	cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
 	hpet_writel(cnt, HPET_T1_CMP);
 	hpet_t1_cmp = cnt;
-	local_irq_restore(flags);
 
 	cfg = hpet_readl(HPET_T1_CFG);
 	cfg &= ~HPET_TN_PERIODIC;
 	cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
 	hpet_writel(cfg, HPET_T1_CFG);
 
+	local_irq_restore(flags);
+
 	return 1;
 }
 
 static void hpet_rtc_timer_reinit(void)
 {
-	unsigned int cfg, cnt;
+	unsigned int cfg, cnt, ticks_per_int, lost_ints;
 
 	if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
 		cfg = hpet_readl(HPET_T1_CFG);
@@ -332,10 +334,33 @@ static void hpet_rtc_timer_reinit(void)
 		hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
 
 	/* It is more accurate to use the comparator value than current count.*/
-	cnt = hpet_t1_cmp;
-	cnt += hpet_tick*HZ/hpet_rtc_int_freq;
-	hpet_writel(cnt, HPET_T1_CMP);
-	hpet_t1_cmp = cnt;
+	ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
+	hpet_t1_cmp += ticks_per_int;
+	hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
+
+	/*
+	 * If the interrupt handler was delayed too long, the write above tries
+	 * to schedule the next interrupt in the past and the hardware would
+	 * not interrupt until the counter had wrapped around.
+	 * So we have to check that the comparator wasn't set to a past time.
+	 */
+	cnt = hpet_readl(HPET_COUNTER);
+	if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
+		lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
+		/* Make sure that, even with the time needed to execute
+		 * this code, the next scheduled interrupt has been moved
+		 * back to the future: */
+		lost_ints++;
+
+		hpet_t1_cmp += lost_ints * ticks_per_int;
+		hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
+
+		if (PIE_on)
+			PIE_count += lost_ints;
+
+		printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
+		       hpet_rtc_int_freq);
+	}
 }
 
 /*
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 7e9edafffd8a..4fcc6690be99 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -313,6 +313,8 @@ void show_registers(struct pt_regs *regs)
 	 */
 	if (in_kernel) {
 		u8 __user *eip;
+		int code_bytes = 64;
+		unsigned char c;
 
 		printk("\n" KERN_EMERG "Stack: ");
 		show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
@@ -320,9 +322,12 @@ void show_registers(struct pt_regs *regs)
 		printk(KERN_EMERG "Code: ");
 
 		eip = (u8 __user *)regs->eip - 43;
-		for (i = 0; i < 64; i++, eip++) {
-			unsigned char c;
-
+		if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
+			/* try starting at EIP */
+			eip = (u8 __user *)regs->eip;
+			code_bytes = 32;
+		}
+		for (i = 0; i < code_bytes; i++, eip++) {
 			if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
 				printk(" Bad EIP value.");
 				break;
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 2d4f1386e2b1..1e7ac1c44ddc 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
 OUTPUT_ARCH(i386)
 ENTRY(phys_startup_32)
 jiffies = jiffies_64;
+
+PHDRS {
+	text PT_LOAD FLAGS(5);	/* R_E */
+	data PT_LOAD FLAGS(7);	/* RWE */
+	note PT_NOTE FLAGS(4);	/* R__ */
+}
 SECTIONS
 {
   . = __KERNEL_START;
@@ -26,7 +32,7 @@ SECTIONS
 	KPROBES_TEXT
 	*(.fixup)
 	*(.gnu.warning)
-	} = 0x9090
+	} :text = 0x9090
 
   _etext = .;			/* End of text section */
 
@@ -48,7 +54,7 @@ SECTIONS
   .data : AT(ADDR(.data) - LOAD_OFFSET) {	/* Data */
 	*(.data)
 	CONSTRUCTORS
-	}
+	} :data
 
   . = ALIGN(4096);
   __nosave_begin = .;
@@ -184,4 +190,6 @@ SECTIONS
   STABS_DEBUG
 
   DWARF_DEBUG
+
+  NOTES
 }
diff --git a/arch/i386/mach-voyager/voyager_thread.c b/arch/i386/mach-voyager/voyager_thread.c
index 50f6de6ff64d..f39887359e8e 100644
--- a/arch/i386/mach-voyager/voyager_thread.c
+++ b/arch/i386/mach-voyager/voyager_thread.c
@@ -130,7 +130,6 @@ thread(void *unused)
 	init_timer(&wakeup_timer);
 
 	sigfillset(&current->blocked);
-	current->signal->tty = NULL;
 
 	printk(KERN_NOTICE "Voyager starting monitor thread\n");
 
diff --git a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c
index 5d44f4f5ff59..4de11f508c3a 100644
--- a/arch/i386/mm/boot_ioremap.c
+++ b/arch/i386/mm/boot_ioremap.c
@@ -29,8 +29,11 @@
  */
 
 #define BOOT_PTE_PTRS (PTRS_PER_PTE*2)
-#define boot_pte_index(address) \
-	     (((address) >> PAGE_SHIFT) & (BOOT_PTE_PTRS - 1))
+
+static unsigned long boot_pte_index(unsigned long vaddr) 
+{
+	return __pa(vaddr) >> PAGE_SHIFT;
+}
 
 static inline boot_pte_t* boot_vaddr_to_pte(void *address)
 {
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index 7c392dc553b8..fb5d8b747de4 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -117,7 +117,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
 void *node_remap_end_vaddr[MAX_NUMNODES];
 void *node_remap_alloc_vaddr[MAX_NUMNODES];
-
+static unsigned long kva_start_pfn;
+static unsigned long kva_pages;
 /*
  * FLAT - support for basic PC memory model with discontig enabled, essentially
  *        a single node with all available processors in it with a flat
@@ -286,7 +287,6 @@ unsigned long __init setup_memory(void)
 {
 	int nid;
 	unsigned long system_start_pfn, system_max_low_pfn;
-	unsigned long reserve_pages;
 
 	/*
 	 * When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -298,14 +298,23 @@ unsigned long __init setup_memory(void)
 	find_max_pfn();
 	get_memcfg_numa();
 
-	reserve_pages = calculate_numa_remap_pages();
+	kva_pages = calculate_numa_remap_pages();
 
 	/* partially used pages are not usable - thus round upwards */
 	system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
 
-	system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages;
-	printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n",
-			reserve_pages, max_low_pfn + reserve_pages);
+	kva_start_pfn = find_max_low_pfn() - kva_pages;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	/* Numa kva area is below the initrd */
+	if (LOADER_TYPE && INITRD_START)
+		kva_start_pfn = PFN_DOWN(INITRD_START)  - kva_pages;
+#endif
+	kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1);
+
+	system_max_low_pfn = max_low_pfn = find_max_low_pfn();
+	printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
+		kva_start_pfn, max_low_pfn);
 	printk("max_pfn = %ld\n", max_pfn);
 #ifdef CONFIG_HIGHMEM
 	highstart_pfn = highend_pfn = max_pfn;
@@ -323,7 +332,7 @@ unsigned long __init setup_memory(void)
 			(ulong) pfn_to_kaddr(max_low_pfn));
 	for_each_online_node(nid) {
 		node_remap_start_vaddr[nid] = pfn_to_kaddr(
-				highstart_pfn + node_remap_offset[nid]);
+				kva_start_pfn + node_remap_offset[nid]);
 		/* Init the node remap allocator */
 		node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
 			(node_remap_size[nid] * PAGE_SIZE);
@@ -338,7 +347,6 @@ unsigned long __init setup_memory(void)
 	}
 	printk("High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
-	vmalloc_earlyreserve = reserve_pages * PAGE_SIZE;
 	for_each_online_node(nid)
 		find_max_pfn_node(nid);
 
@@ -348,13 +356,18 @@ unsigned long __init setup_memory(void)
 	return max_low_pfn;
 }
 
+void __init numa_kva_reserve(void)
+{
+	reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages));
+}
+
 void __init zone_sizes_init(void)
 {
 	int nid;
 
 
 	for_each_online_node(nid) {
-		unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+		unsigned long zones_size[MAX_NR_ZONES] = {0, };
 		unsigned long *zholes_size;
 		unsigned int max_dma;
 
@@ -409,7 +422,7 @@ void __init set_highmem_pages_init(int bad_ppro)
 		zone_end_pfn = zone_start_pfn + zone->spanned_pages;
 
 		printk("Initializing %s for node %d (%08lx:%08lx)\n",
-				zone->name, zone->zone_pgdat->node_id,
+				zone->name, zone_to_nid(zone),
 				zone_start_pfn, zone_end_pfn);
 
 		for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) {
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 89e8486aac34..efd0bcdac65d 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -629,6 +629,48 @@ void __init mem_init(void)
 		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
 	       );
 
+#if 1 /* double-sanity-check paranoia */
+	printk("virtual kernel memory layout:\n"
+	       "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+#ifdef CONFIG_HIGHMEM
+	       "    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+#endif
+	       "    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
+	       "    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB)\n"
+	       "      .init : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+	       "      .data : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+	       "      .text : 0x%08lx - 0x%08lx   (%4ld kB)\n",
+	       FIXADDR_START, FIXADDR_TOP,
+	       (FIXADDR_TOP - FIXADDR_START) >> 10,
+
+#ifdef CONFIG_HIGHMEM
+	       PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
+	       (LAST_PKMAP*PAGE_SIZE) >> 10,
+#endif
+
+	       VMALLOC_START, VMALLOC_END,
+	       (VMALLOC_END - VMALLOC_START) >> 20,
+
+	       (unsigned long)__va(0), (unsigned long)high_memory,
+	       ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
+
+	       (unsigned long)&__init_begin, (unsigned long)&__init_end,
+	       ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10,
+
+	       (unsigned long)&_etext, (unsigned long)&_edata,
+	       ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
+
+	       (unsigned long)&_text, (unsigned long)&_etext,
+	       ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
+
+#ifdef CONFIG_HIGHMEM
+	BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
+	BUG_ON(VMALLOC_END                     > PKMAP_BASE);
+#endif
+	BUG_ON(VMALLOC_START                   > VMALLOC_END);
+	BUG_ON((unsigned long)high_memory      > VMALLOC_START);
+#endif /* double-sanity-check paranoia */
+
 #ifdef CONFIG_X86_PAE
 	if (!cpu_has_pae)
 		panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
@@ -657,7 +699,7 @@ void __init mem_init(void)
 int arch_add_memory(int nid, u64 start, u64 size)
 {
 	struct pglist_data *pgdata = &contig_page_data;
-	struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
+	struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index bd98768d8764..10126e3f8174 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/spinlock.h>
+#include <linux/module.h>
 
 #include <asm/system.h>
 #include <asm/pgtable.h>
@@ -60,7 +61,9 @@ void show_mem(void)
 	printk(KERN_INFO "%lu pages writeback\n",
 					global_page_state(NR_WRITEBACK));
 	printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
-	printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
+	printk(KERN_INFO "%lu pages slab\n",
+		global_page_state(NR_SLAB_RECLAIMABLE) +
+		global_page_state(NR_SLAB_UNRECLAIMABLE));
 	printk(KERN_INFO "%lu pages pagetables\n",
 					global_page_state(NR_PAGETABLE));
 }
@@ -137,6 +140,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
 	__flush_tlb_one(vaddr);
 }
 
+static int fixmaps;
+#ifndef CONFIG_COMPAT_VDSO
+unsigned long __FIXADDR_TOP = 0xfffff000;
+EXPORT_SYMBOL(__FIXADDR_TOP);
+#endif
+
 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
 {
 	unsigned long address = __fix_to_virt(idx);
@@ -146,6 +155,25 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
 		return;
 	}
 	set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+	fixmaps++;
+}
+
+/**
+ * reserve_top_address - reserves a hole in the top of kernel address space
+ * @reserve - size of hole to reserve
+ *
+ * Can be used to relocate the fixmap area and poke a hole in the top
+ * of kernel address space to make room for a hypervisor.
+ */
+void reserve_top_address(unsigned long reserve)
+{
+	BUG_ON(fixmaps > 0);
+#ifdef CONFIG_COMPAT_VDSO
+	BUG_ON(reserve != 0);
+#else
+	__FIXADDR_TOP = -reserve - PAGE_SIZE;
+	__VMALLOC_RESERVE += reserve;
+#endif
 }
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
diff --git a/arch/i386/power/swsusp.S b/arch/i386/power/swsusp.S
index c893b897217f..8a2b50a0aaad 100644
--- a/arch/i386/power/swsusp.S
+++ b/arch/i386/power/swsusp.S
@@ -32,7 +32,7 @@ ENTRY(swsusp_arch_resume)
 	movl	$swsusp_pg_dir-__PAGE_OFFSET, %ecx
 	movl	%ecx, %cr3
 
-	movl	pagedir_nosave, %edx
+	movl	restore_pblist, %edx
 	.p2align 4,,7
 
 copy_loop:
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index db274da7dba1..f521f2f60a78 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -66,15 +66,6 @@ config IA64_UNCACHED_ALLOCATOR
 	bool
 	select GENERIC_ALLOCATOR
 
-config DMA_IS_DMA32
-	bool
-	default y
-
-config DMA_IS_NORMAL
-	bool
-	depends on IA64_SGI_SN2
-	default y
-
 config AUDIT_ARCH
 	bool
 	default y
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 0176556aeecc..32c3abededc6 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -771,16 +771,19 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
 {
 #ifdef CONFIG_ACPI_NUMA
 	int pxm_id;
+	int nid;
 
 	pxm_id = acpi_get_pxm(handle);
-
 	/*
-	 * Assuming that the container driver would have set the proximity
-	 * domain and would have initialized pxm_to_node(pxm_id) && pxm_flag
+	 * We don't have cpu-only-node hotadd. But if the system equips
+	 * SRAT table, pxm is already found and node is ready.
+  	 * So, just pxm_to_nid(pxm) is OK.
+	 * This code here is for the system which doesn't have full SRAT
+  	 * table for possible cpus.
 	 */
-	node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_node(pxm_id);
-
+	nid = acpi_map_pxm_to_node(pxm_id);
 	node_cpuid[cpu].phys_id = physid;
+	node_cpuid[cpu].nid = nid;
 #endif
 	return (0);
 }
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index 1cc360c83e7a..20340631179f 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -29,6 +29,36 @@ EXPORT_SYMBOL(cpu_to_node_map);
 
 cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
 
+void __cpuinit map_cpu_to_node(int cpu, int nid)
+{
+	int oldnid;
+	if (nid < 0) { /* just initialize by zero */
+		cpu_to_node_map[cpu] = 0;
+		return;
+	}
+	/* sanity check first */
+	oldnid = cpu_to_node_map[cpu];
+	if (cpu_isset(cpu, node_to_cpu_mask[oldnid])) {
+		return; /* nothing to do */
+	}
+	/* we don't have cpu-driven node hot add yet...
+	   In usual case, node is created from SRAT at boot time. */
+	if (!node_online(nid))
+		nid = first_online_node;
+	cpu_to_node_map[cpu] = nid;
+	cpu_set(cpu, node_to_cpu_mask[nid]);
+	return;
+}
+
+void __cpuinit unmap_cpu_from_node(int cpu, int nid)
+{
+	WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid]));
+	WARN_ON(cpu_to_node_map[cpu] != nid);
+	cpu_to_node_map[cpu] = 0;
+	cpu_clear(cpu, node_to_cpu_mask[nid]);
+}
+
+
 /**
  * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays
  *
@@ -49,8 +79,6 @@ void __init build_cpu_to_node_map(void)
 				node = node_cpuid[i].nid;
 				break;
 			}
-		cpu_to_node_map[cpu] = (node >= 0) ? node : 0;
-		if (node >= 0)
-			cpu_set(cpu, node_to_cpu_mask[node]);
+		map_cpu_to_node(cpu, node);
 	}
 }
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 84a7e52f56f6..7bb7696e4ce2 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -34,6 +34,7 @@
 #include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/vfs.h>
+#include <linux/smp.h>
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/bitops.h>
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index f648c610b10c..05bdf7affb43 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -36,6 +36,9 @@ int arch_register_cpu(int num)
 	 */
 	if (!can_cpei_retarget() && is_cpu_cpei_target(num))
 		sysfs_cpus[num].cpu.no_control = 1;
+#ifdef CONFIG_NUMA
+	map_cpu_to_node(num, node_cpuid[num].nid);
+#endif
 #endif
 
 	return register_cpu(&sysfs_cpus[num].cpu, num);
@@ -45,7 +48,8 @@ int arch_register_cpu(int num)
 
 void arch_unregister_cpu(int num)
 {
-	return unregister_cpu(&sysfs_cpus[num].cpu);
+	unregister_cpu(&sysfs_cpus[num].cpu);
+	unmap_cpu_from_node(num, cpu_to_node(num));
 }
 EXPORT_SYMBOL(arch_register_cpu);
 EXPORT_SYMBOL(arch_unregister_cpu);
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index 4c73a6763669..c58e933694d5 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -98,7 +98,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
 
 	/* attempt to allocate a granule's worth of cached memory pages */
 
-	page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO,
+	page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
 				IA64_GRANULE_SHIFT-PAGE_SHIFT);
 	if (!page) {
 		mutex_unlock(&uc_pool->add_chunk_mutex);
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 9a8a29339d2d..b632b9c1e3b3 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -32,9 +32,10 @@
 #include <linux/cpumask.h>
 #include <linux/smp_lock.h>
 #include <linux/nodemask.h>
+#include <linux/smp.h>
+
 #include <asm/processor.h>
 #include <asm/topology.h>
-#include <asm/smp.h>
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
 #include <asm/sal.h>
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index b71348fec1f4..bbd97c85bc5d 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -100,7 +100,7 @@ void free_initrd_mem(unsigned long, unsigned long);
 #ifndef CONFIG_DISCONTIGMEM
 unsigned long __init zone_sizes_init(void)
 {
-	unsigned long  zones_size[MAX_NR_ZONES] = {0, 0, 0};
+	unsigned long  zones_size[MAX_NR_ZONES] = {0, };
 	unsigned long  max_dma;
 	unsigned long  low;
 	unsigned long  start_pfn;
diff --git a/arch/m68knommu/mm/init.c b/arch/m68knommu/mm/init.c
index e4c233eef195..06e538d1be3a 100644
--- a/arch/m68knommu/mm/init.c
+++ b/arch/m68knommu/mm/init.c
@@ -136,7 +136,7 @@ void paging_init(void)
 #endif
 
 	{
-		unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+		unsigned long zones_size[MAX_NR_ZONES] = {0, };
 
 		zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT;
 		zones_size[ZONE_NORMAL] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT;
diff --git a/arch/mips/au1000/common/dbdma.c b/arch/mips/au1000/common/dbdma.c
index 98244d51c154..c4fae8ff4671 100644
--- a/arch/mips/au1000/common/dbdma.c
+++ b/arch/mips/au1000/common/dbdma.c
@@ -230,7 +230,7 @@ EXPORT_SYMBOL(au1xxx_ddma_add_device);
 */
 u32
 au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid,
-       void (*callback)(int, void *, struct pt_regs *), void *callparam)
+       void (*callback)(int, void *), void *callparam)
 {
 	unsigned long   flags;
 	u32		used, chan, rv;
@@ -248,8 +248,10 @@ au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid,
 		au1xxx_dbdma_init();
 	dbdma_initialized = 1;
 
-	if ((stp = find_dbdev_id(srcid)) == NULL) return 0;
-	if ((dtp = find_dbdev_id(destid)) == NULL) return 0;
+	if ((stp = find_dbdev_id(srcid)) == NULL)
+		return 0;
+	if ((dtp = find_dbdev_id(destid)) == NULL)
+		return 0;
 
 	used = 0;
 	rv = 0;
@@ -869,7 +871,7 @@ dbdma_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 	au_sync();
 
 	if (ctp->chan_callback)
-		(ctp->chan_callback)(irq, ctp->chan_callparam, regs);
+		(ctp->chan_callback)(irq, ctp->chan_callparam);
 
 	ctp->cur_ptr = phys_to_virt(DSCR_GET_NXTPTR(dp->dscr_nxtptr));
 	return IRQ_RETVAL(1);
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index c52497bb102a..5b06349af2d5 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -163,10 +163,10 @@ static int __init page_is_ram(unsigned long pagenr)
 
 void __init paging_init(void)
 {
-	unsigned long zones_size[] = { [0 ... MAX_NR_ZONES - 1] = 0 };
+	unsigned long zones_size[] = { 0, };
 	unsigned long max_dma, high, low;
 #ifndef CONFIG_FLATMEM
-	unsigned long zholes_size[] = { [0 ... MAX_NR_ZONES - 1] = 0 };
+	unsigned long zholes_size[] = { 0, };
 	unsigned long i, j, pfn;
 #endif
 
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index efe6971fc800..16e5682b01f1 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -19,6 +19,7 @@
 #include <linux/swap.h>
 #include <linux/bootmem.h>
 #include <linux/pfn.h>
+#include <linux/highmem.h>
 #include <asm/page.h>
 #include <asm/sections.h>
 
@@ -508,7 +509,7 @@ extern unsigned long setup_zero_pages(void);
 
 void __init paging_init(void)
 {
-	unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+	unsigned long zones_size[MAX_NR_ZONES] = {0, };
 	unsigned node;
 
 	pagetable_init();
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index f2b96f1e0da7..25ad28d63e88 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -551,7 +551,7 @@ void show_mem(void)
 
 				printk("Zone list for zone %d on node %d: ", j, i);
 				for (k = 0; zl->zones[k] != NULL; k++) 
-					printk("[%d/%s] ", zl->zones[k]->zone_pgdat->node_id, zl->zones[k]->name);
+					printk("[%d/%s] ", zone_to_nid(zl->zones[k]), zl->zones[k]->name);
 				printk("\n");
 			}
 		}
@@ -809,7 +809,7 @@ void __init paging_init(void)
 	flush_tlb_all_local(NULL);
 
 	for (i = 0; i < npmem_ranges; i++) {
-		unsigned long zones_size[MAX_NR_ZONES] = { 0, 0, 0 };
+		unsigned long zones_size[MAX_NR_ZONES] = { 0, };
 
 		/* We have an IOMMU, so all memory can go into a single
 		   ZONE_DMA zone. */
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
index 7369f9a6ad25..69e8f86aa4f8 100644
--- a/arch/powerpc/kernel/swsusp_32.S
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -159,8 +159,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	isync
 
 	/* Load ptr the list of pages to copy in r3 */
-	lis	r11,(pagedir_nosave - KERNELBASE)@h
-	ori	r11,r11,pagedir_nosave@l
+	lis	r11,(restore_pblist - KERNELBASE)@h
+	ori	r11,r11,restore_pblist@l
 	lwz	r10,0(r11)
 
 	/* Copy the pages. This is a very basic implementation, to
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index ab3b0765a64e..8aea3698a77b 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -117,8 +117,7 @@ static void appldata_get_mem_data(void *data)
 	mem_data->pgpgout    = ev[PGPGOUT] >> 1;
 	mem_data->pswpin     = ev[PSWPIN];
 	mem_data->pswpout    = ev[PSWPOUT];
-	mem_data->pgalloc    = ev[PGALLOC_HIGH] + ev[PGALLOC_NORMAL] +
-			       ev[PGALLOC_DMA];
+	mem_data->pgalloc    = ev[PGALLOC_NORMAL] + ev[PGALLOC_DMA];
 	mem_data->pgfault    = ev[PGFAULT];
 	mem_data->pgmajfault = ev[PGMAJFAULT];
 
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 786a44dba5bf..607f50ead1fd 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -15,6 +15,8 @@
 #include <linux/sched.h>
 #include <linux/sysctl.h>
 #include <linux/ctype.h>
+#include <linux/swap.h>
+#include <linux/kthread.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -34,18 +36,18 @@ struct cmm_page_array {
 	unsigned long pages[CMM_NR_PAGES];
 };
 
-static long cmm_pages = 0;
-static long cmm_timed_pages = 0;
-static volatile long cmm_pages_target = 0;
-static volatile long cmm_timed_pages_target = 0;
-static long cmm_timeout_pages = 0;
-static long cmm_timeout_seconds = 0;
+static long cmm_pages;
+static long cmm_timed_pages;
+static volatile long cmm_pages_target;
+static volatile long cmm_timed_pages_target;
+static long cmm_timeout_pages;
+static long cmm_timeout_seconds;
 
-static struct cmm_page_array *cmm_page_list = NULL;
-static struct cmm_page_array *cmm_timed_page_list = NULL;
+static struct cmm_page_array *cmm_page_list;
+static struct cmm_page_array *cmm_timed_page_list;
+static DEFINE_SPINLOCK(cmm_lock);
 
-static unsigned long cmm_thread_active = 0;
-static struct work_struct cmm_thread_starter;
+static struct task_struct *cmm_thread_ptr;
 static wait_queue_head_t cmm_thread_wait;
 static struct timer_list cmm_timer;
 
@@ -53,71 +55,100 @@ static void cmm_timer_fn(unsigned long);
 static void cmm_set_timer(void);
 
 static long
-cmm_alloc_pages(long pages, long *counter, struct cmm_page_array **list)
+cmm_alloc_pages(long nr, long *counter, struct cmm_page_array **list)
 {
-	struct cmm_page_array *pa;
-	unsigned long page;
+	struct cmm_page_array *pa, *npa;
+	unsigned long addr;
 
-	pa = *list;
-	while (pages) {
-		page = __get_free_page(GFP_NOIO);
-		if (!page)
+	while (nr) {
+		addr = __get_free_page(GFP_NOIO);
+		if (!addr)
 			break;
+		spin_lock(&cmm_lock);
+		pa = *list;
 		if (!pa || pa->index >= CMM_NR_PAGES) {
 			/* Need a new page for the page list. */
-			pa = (struct cmm_page_array *)
+			spin_unlock(&cmm_lock);
+			npa = (struct cmm_page_array *)
 				__get_free_page(GFP_NOIO);
-			if (!pa) {
-				free_page(page);
+			if (!npa) {
+				free_page(addr);
 				break;
 			}
-			pa->next = *list;
-			pa->index = 0;
-			*list = pa;
+			spin_lock(&cmm_lock);
+			pa = *list;
+			if (!pa || pa->index >= CMM_NR_PAGES) {
+				npa->next = pa;
+				npa->index = 0;
+				pa = npa;
+				*list = pa;
+			} else
+				free_page((unsigned long) npa);
 		}
-		diag10(page);
-		pa->pages[pa->index++] = page;
+		diag10(addr);
+		pa->pages[pa->index++] = addr;
 		(*counter)++;
-		pages--;
+		spin_unlock(&cmm_lock);
+		nr--;
 	}
-	return pages;
+	return nr;
 }
 
-static void
-cmm_free_pages(long pages, long *counter, struct cmm_page_array **list)
+static long
+cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
 {
 	struct cmm_page_array *pa;
-	unsigned long page;
+	unsigned long addr;
 
+	spin_lock(&cmm_lock);
 	pa = *list;
-	while (pages) {
+	while (nr) {
 		if (!pa || pa->index <= 0)
 			break;
-		page = pa->pages[--pa->index];
+		addr = pa->pages[--pa->index];
 		if (pa->index == 0) {
 			pa = pa->next;
 			free_page((unsigned long) *list);
 			*list = pa;
 		}
-		free_page(page);
+		free_page(addr);
 		(*counter)--;
-		pages--;
+		nr--;
 	}
+	spin_unlock(&cmm_lock);
+	return nr;
 }
 
+static int cmm_oom_notify(struct notifier_block *self,
+			  unsigned long dummy, void *parm)
+{
+	unsigned long *freed = parm;
+	long nr = 256;
+
+	nr = cmm_free_pages(nr, &cmm_timed_pages, &cmm_timed_page_list);
+	if (nr > 0)
+		nr = cmm_free_pages(nr, &cmm_pages, &cmm_page_list);
+	cmm_pages_target = cmm_pages;
+	cmm_timed_pages_target = cmm_timed_pages;
+	*freed += 256 - nr;
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cmm_oom_nb = {
+	.notifier_call = cmm_oom_notify
+};
+
 static int
 cmm_thread(void *dummy)
 {
 	int rc;
 
-	daemonize("cmmthread");
 	while (1) {
 		rc = wait_event_interruptible(cmm_thread_wait,
 			(cmm_pages != cmm_pages_target ||
-			 cmm_timed_pages != cmm_timed_pages_target));
-		if (rc == -ERESTARTSYS) {
-			/* Got kill signal. End thread. */
-			clear_bit(0, &cmm_thread_active);
+			 cmm_timed_pages != cmm_timed_pages_target ||
+			 kthread_should_stop()));
+		if (kthread_should_stop() || rc == -ERESTARTSYS) {
 			cmm_pages_target = cmm_pages;
 			cmm_timed_pages_target = cmm_timed_pages;
 			break;
@@ -143,16 +174,8 @@ cmm_thread(void *dummy)
 }
 
 static void
-cmm_start_thread(void)
-{
-	kernel_thread(cmm_thread, NULL, 0);
-}
-
-static void
 cmm_kick_thread(void)
 {
-	if (!test_and_set_bit(0, &cmm_thread_active))
-		schedule_work(&cmm_thread_starter);
 	wake_up(&cmm_thread_wait);
 }
 
@@ -177,21 +200,21 @@ cmm_set_timer(void)
 static void
 cmm_timer_fn(unsigned long ignored)
 {
-	long pages;
+	long nr;
 
-	pages = cmm_timed_pages_target - cmm_timeout_pages;
-	if (pages < 0)
+	nr = cmm_timed_pages_target - cmm_timeout_pages;
+	if (nr < 0)
 		cmm_timed_pages_target = 0;
 	else
-		cmm_timed_pages_target = pages;
+		cmm_timed_pages_target = nr;
 	cmm_kick_thread();
 	cmm_set_timer();
 }
 
 void
-cmm_set_pages(long pages)
+cmm_set_pages(long nr)
 {
-	cmm_pages_target = pages;
+	cmm_pages_target = nr;
 	cmm_kick_thread();
 }
 
@@ -202,9 +225,9 @@ cmm_get_pages(void)
 }
 
 void
-cmm_add_timed_pages(long pages)
+cmm_add_timed_pages(long nr)
 {
-	cmm_timed_pages_target += pages;
+	cmm_timed_pages_target += nr;
 	cmm_kick_thread();
 }
 
@@ -215,9 +238,9 @@ cmm_get_timed_pages(void)
 }
 
 void
-cmm_set_timeout(long pages, long seconds)
+cmm_set_timeout(long nr, long seconds)
 {
-	cmm_timeout_pages = pages;
+	cmm_timeout_pages = nr;
 	cmm_timeout_seconds = seconds;
 	cmm_set_timer();
 }
@@ -245,7 +268,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char buf[16], *p;
-	long pages;
+	long nr;
 	int len;
 
 	if (!*lenp || (*ppos && !write)) {
@@ -260,17 +283,17 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
 			return -EFAULT;
 		buf[sizeof(buf) - 1] = '\0';
 		cmm_skip_blanks(buf, &p);
-		pages = simple_strtoul(p, &p, 0);
+		nr = simple_strtoul(p, &p, 0);
 		if (ctl == &cmm_table[0])
-			cmm_set_pages(pages);
+			cmm_set_pages(nr);
 		else
-			cmm_add_timed_pages(pages);
+			cmm_add_timed_pages(nr);
 	} else {
 		if (ctl == &cmm_table[0])
-			pages = cmm_get_pages();
+			nr = cmm_get_pages();
 		else
-			pages = cmm_get_timed_pages();
-		len = sprintf(buf, "%ld\n", pages);
+			nr = cmm_get_timed_pages();
+		len = sprintf(buf, "%ld\n", nr);
 		if (len > *lenp)
 			len = *lenp;
 		if (copy_to_user(buffer, buf, len))
@@ -286,7 +309,7 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char buf[64], *p;
-	long pages, seconds;
+	long nr, seconds;
 	int len;
 
 	if (!*lenp || (*ppos && !write)) {
@@ -301,10 +324,10 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
 			return -EFAULT;
 		buf[sizeof(buf) - 1] = '\0';
 		cmm_skip_blanks(buf, &p);
-		pages = simple_strtoul(p, &p, 0);
+		nr = simple_strtoul(p, &p, 0);
 		cmm_skip_blanks(p, &p);
 		seconds = simple_strtoul(p, &p, 0);
-		cmm_set_timeout(pages, seconds);
+		cmm_set_timeout(nr, seconds);
 	} else {
 		len = sprintf(buf, "%ld %ld\n",
 			      cmm_timeout_pages, cmm_timeout_seconds);
@@ -357,7 +380,7 @@ static struct ctl_table cmm_dir_table[] = {
 static void
 cmm_smsg_target(char *from, char *msg)
 {
-	long pages, seconds;
+	long nr, seconds;
 
 	if (strlen(sender) > 0 && strcmp(from, sender) != 0)
 		return;
@@ -366,27 +389,27 @@ cmm_smsg_target(char *from, char *msg)
 	if (strncmp(msg, "SHRINK", 6) == 0) {
 		if (!cmm_skip_blanks(msg + 6, &msg))
 			return;
-		pages = simple_strtoul(msg, &msg, 0);
+		nr = simple_strtoul(msg, &msg, 0);
 		cmm_skip_blanks(msg, &msg);
 		if (*msg == '\0')
-			cmm_set_pages(pages);
+			cmm_set_pages(nr);
 	} else if (strncmp(msg, "RELEASE", 7) == 0) {
 		if (!cmm_skip_blanks(msg + 7, &msg))
 			return;
-		pages = simple_strtoul(msg, &msg, 0);
+		nr = simple_strtoul(msg, &msg, 0);
 		cmm_skip_blanks(msg, &msg);
 		if (*msg == '\0')
-			cmm_add_timed_pages(pages);
+			cmm_add_timed_pages(nr);
 	} else if (strncmp(msg, "REUSE", 5) == 0) {
 		if (!cmm_skip_blanks(msg + 5, &msg))
 			return;
-		pages = simple_strtoul(msg, &msg, 0);
+		nr = simple_strtoul(msg, &msg, 0);
 		if (!cmm_skip_blanks(msg, &msg))
 			return;
 		seconds = simple_strtoul(msg, &msg, 0);
 		cmm_skip_blanks(msg, &msg);
 		if (*msg == '\0')
-			cmm_set_timeout(pages, seconds);
+			cmm_set_timeout(nr, seconds);
 	}
 }
 #endif
@@ -396,21 +419,49 @@ struct ctl_table_header *cmm_sysctl_header;
 static int
 cmm_init (void)
 {
+	int rc = -ENOMEM;
+
 #ifdef CONFIG_CMM_PROC
 	cmm_sysctl_header = register_sysctl_table(cmm_dir_table, 1);
+	if (!cmm_sysctl_header)
+		goto out;
 #endif
 #ifdef CONFIG_CMM_IUCV
-	smsg_register_callback(SMSG_PREFIX, cmm_smsg_target);
+	rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target);
+	if (rc < 0)
+		goto out_smsg;
 #endif
-	INIT_WORK(&cmm_thread_starter, (void *) cmm_start_thread, NULL);
+	rc = register_oom_notifier(&cmm_oom_nb);
+	if (rc < 0)
+		goto out_oom_notify;
 	init_waitqueue_head(&cmm_thread_wait);
 	init_timer(&cmm_timer);
-	return 0;
+	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+	rc = IS_ERR(cmm_thread_ptr) ? PTR_ERR(cmm_thread_ptr) : 0;
+	if (!rc)
+		goto out;
+	/*
+	 * kthread_create failed. undo all the stuff from above again.
+	 */
+	unregister_oom_notifier(&cmm_oom_nb);
+
+out_oom_notify:
+#ifdef CONFIG_CMM_IUCV
+	smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
+out_smsg:
+#endif
+#ifdef CONFIG_CMM_PROC
+	unregister_sysctl_table(cmm_sysctl_header);
+#endif
+out:
+	return rc;
 }
 
 static void
 cmm_exit(void)
 {
+	kthread_stop(cmm_thread_ptr);
+	unregister_oom_notifier(&cmm_oom_nb);
 	cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
 	cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
 #ifdef CONFIG_CMM_PROC
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index ad8ed7d41e16..bf94eedb0a8e 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -30,7 +30,7 @@
 
 #define __pte_offset(address) \
 		((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-#define pte_offset(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
+#define pte_offset(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
 		__pte_offset(address))
 
 static inline void cache_wback_all(void)
diff --git a/arch/sh64/mm/init.c b/arch/sh64/mm/init.c
index 1169757fb38b..83295bd21aa7 100644
--- a/arch/sh64/mm/init.c
+++ b/arch/sh64/mm/init.c
@@ -110,7 +110,7 @@ void show_mem(void)
  */
 void __init paging_init(void)
 {
-	unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+	unsigned long zones_size[MAX_NR_ZONES] = {0, };
 
 	pgd_init((unsigned long)swapper_pg_dir);
 	pgd_init((unsigned long)swapper_pg_dir +
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index 16e13f663ab0..b27a506309ee 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -2175,7 +2175,7 @@ void __init ld_mmu_srmmu(void)
 
 	BTFIXUPSET_CALL(pte_pfn, srmmu_pte_pfn, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(pmd_page, srmmu_pmd_page, BTFIXUPCALL_NORM);
-	BTFIXUPSET_CALL(pgd_page, srmmu_pgd_page, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(pgd_page_vaddr, srmmu_pgd_page, BTFIXUPCALL_NORM);
 
 	BTFIXUPSET_SETHI(none_mask, 0xF0000000);
 
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index 7fdddf3c7e16..436021ceb2e7 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -2280,5 +2280,5 @@ void __init ld_mmu_sun4c(void)
 
 	/* These should _never_ get called with two level tables. */
 	BTFIXUPSET_CALL(pgd_set, sun4c_pgd_set, BTFIXUPCALL_NOP);
-	BTFIXUPSET_CALL(pgd_page, sun4c_pgd_page, BTFIXUPCALL_RETO0);
+	BTFIXUPSET_CALL(pgd_page_vaddr, sun4c_pgd_page, BTFIXUPCALL_RETO0);
 }
diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c
index 8135ec322c9c..642541769a17 100644
--- a/arch/sparc64/solaris/misc.c
+++ b/arch/sparc64/solaris/misc.c
@@ -736,20 +736,15 @@ struct exec_domain solaris_exec_domain = {
 
 extern int init_socksys(void);
 
-#ifdef MODULE
-
 MODULE_AUTHOR("Jakub Jelinek (jj@ultra.linux.cz), Patrik Rak (prak3264@ss1000.ms.mff.cuni.cz)");
 MODULE_DESCRIPTION("Solaris binary emulation module");
 MODULE_LICENSE("GPL");
 
-#ifdef __sparc_v9__
 extern u32 tl0_solaris[8];
 #define update_ttable(x) 										\
 	tl0_solaris[3] = (((long)(x) - (long)tl0_solaris - 3) >> 2) | 0x40000000;			\
 	wmb();		\
 	__asm__ __volatile__ ("flush %0" : : "r" (&tl0_solaris[3]))
-#else
-#endif	
 
 extern u32 solaris_sparc_syscall[];
 extern u32 solaris_syscall[];
@@ -757,7 +752,7 @@ extern void cleanup_socksys(void);
 
 extern u32 entry64_personality_patch;
 
-int init_module(void)
+static int __init solaris_init(void)
 {
 	int ret;
 
@@ -777,19 +772,12 @@ int init_module(void)
 	return 0;
 }
 
-void cleanup_module(void)
+static void __exit solaris_exit(void)
 {
 	update_ttable(solaris_syscall);
 	cleanup_socksys();
 	unregister_exec_domain(&solaris_exec_domain);
 }
 
-#else
-int init_solaris_emul(void)
-{
-	register_exec_domain(&solaris_exec_domain);
-	init_socksys();
-	return 0;
-}
-#endif
-
+module_init(solaris_init);
+module_exit(solaris_exit);
diff --git a/arch/sparc64/solaris/socksys.c b/arch/sparc64/solaris/socksys.c
index bc3df95bc057..7c90e41fd3be 100644
--- a/arch/sparc64/solaris/socksys.c
+++ b/arch/sparc64/solaris/socksys.c
@@ -168,8 +168,7 @@ static struct file_operations socksys_fops = {
 	.release =	socksys_release,
 };
 
-int __init
-init_socksys(void)
+int __init init_socksys(void)
 {
 	int ret;
 	struct file * file;
@@ -199,8 +198,7 @@ init_socksys(void)
 	return 0;
 }
 
-void
-cleanup_socksys(void)
+void __exit cleanup_socksys(void)
 {
 	if (unregister_chrdev(30, "socksys"))
 		printk ("Couldn't unregister socksys character device\n");
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index 7218c754505b..e82764f75e7f 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -544,7 +544,7 @@ static struct chan *parse_chan(struct line *line, char *str, int device,
 
 	ops = NULL;
 	data = NULL;
-	for(i = 0; i < sizeof(chan_table)/sizeof(chan_table[0]); i++){
+	for(i = 0; i < ARRAY_SIZE(chan_table); i++){
 		entry = &chan_table[i];
 		if(!strncmp(str, entry->key, strlen(entry->key))){
 			ops = entry->ops;
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index b414522f7686..79610b5ce67e 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -497,7 +497,7 @@ static void mconsole_get_config(int (*get_config)(char *, char *, int,
 	}
 
 	error = NULL;
-	size = sizeof(default_buf)/sizeof(default_buf[0]);
+	size = ARRAY_SIZE(default_buf);
 	buf = default_buf;
 
 	while(1){
diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c
index 9bfd405c3bd8..5b2f5fe9e426 100644
--- a/arch/um/drivers/mconsole_user.c
+++ b/arch/um/drivers/mconsole_user.c
@@ -16,6 +16,7 @@
 #include "user.h"
 #include "mconsole.h"
 #include "umid.h"
+#include "user_util.h"
 
 static struct mconsole_command commands[] = {
 	/* With uts namespaces, uts information becomes process-specific, so
@@ -65,14 +66,14 @@ static struct mconsole_command *mconsole_parse(struct mc_request *req)
 	struct mconsole_command *cmd;
 	int i;
 
-	for(i=0;i<sizeof(commands)/sizeof(commands[0]);i++){
+	for(i = 0; i < ARRAY_SIZE(commands); i++){
 		cmd = &commands[i];
 		if(!strncmp(req->request.data, cmd->command, 
 			    strlen(cmd->command))){
-			return(cmd);
+			return cmd;
 		}
 	}
-	return(NULL);
+	return NULL;
 }
 
 #define MIN(a,b) ((a)<(b) ? (a):(b))
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 501f95675d89..4a7966b21931 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -31,6 +31,11 @@
 #include "irq_user.h"
 #include "irq_kern.h"
 
+static inline void set_ether_mac(struct net_device *dev, unsigned char *addr)
+{
+	memcpy(dev->dev_addr, addr, ETH_ALEN);
+}
+
 #define DRIVER_NAME "uml-netdev"
 
 static DEFINE_SPINLOCK(opened_lock);
@@ -242,7 +247,7 @@ static int uml_net_set_mac(struct net_device *dev, void *addr)
 	struct sockaddr *hwaddr = addr;
 
 	spin_lock(&lp->lock);
-	memcpy(dev->dev_addr, hwaddr->sa_data, ETH_ALEN);
+	set_ether_mac(dev, hwaddr->sa_data);
 	spin_unlock(&lp->lock);
 
 	return(0);
@@ -790,13 +795,6 @@ void dev_ip_addr(void *d, unsigned char *bin_buf)
 	memcpy(bin_buf, &in->ifa_address, sizeof(in->ifa_address));
 }
 
-void set_ether_mac(void *d, unsigned char *addr)
-{
-	struct net_device *dev = d;
-
-	memcpy(dev->dev_addr, addr, ETH_ALEN);	
-}
-
 struct sk_buff *ether_adjust_skb(struct sk_buff *skb, int extra)
 {
 	if((skb != NULL) && (skb_tailroom(skb) < extra)){
diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c
index 466ff2c2f918..4c767c7adb96 100644
--- a/arch/um/drivers/pcap_kern.c
+++ b/arch/um/drivers/pcap_kern.c
@@ -76,7 +76,7 @@ int pcap_setup(char *str, char **mac_out, void *data)
 	if(host_if != NULL)
 		init->host_if = host_if;
 
-	for(i = 0; i < sizeof(options)/sizeof(options[0]); i++){
+	for(i = 0; i < ARRAY_SIZE(options); i++){
 		if(options[i] == NULL)
 			continue;
 		if(!strcmp(options[i], "promisc"))
diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h
index b98bdd8e052a..89e1dc835a5b 100644
--- a/arch/um/include/kern_util.h
+++ b/arch/um/include/kern_util.h
@@ -27,7 +27,6 @@ extern int ncpus;
 extern char *linux_prog;
 extern char *gdb_init;
 extern int kmalloc_ok;
-extern int timer_irq_inited;
 extern int jail;
 extern int nsyscalls;
 
diff --git a/arch/um/include/longjmp.h b/arch/um/include/longjmp.h
index 1b5c0131a12e..e93c6d3e893b 100644
--- a/arch/um/include/longjmp.h
+++ b/arch/um/include/longjmp.h
@@ -1,9 +1,12 @@
 #ifndef __UML_LONGJMP_H
 #define __UML_LONGJMP_H
 
-#include <setjmp.h>
+#include "sysdep/archsetjmp.h"
 #include "os.h"
 
+extern int setjmp(jmp_buf);
+extern void longjmp(jmp_buf, int);
+
 #define UML_LONGJMP(buf, val) do { \
 	longjmp(*buf, val);	\
 } while(0)
diff --git a/arch/um/include/net_user.h b/arch/um/include/net_user.h
index 800c403920bc..47ef7cb49a8e 100644
--- a/arch/um/include/net_user.h
+++ b/arch/um/include/net_user.h
@@ -26,7 +26,6 @@ struct net_user_info {
 
 extern void ether_user_init(void *data, void *dev);
 extern void dev_ip_addr(void *d, unsigned char *bin_buf);
-extern void set_ether_mac(void *d, unsigned char *addr);
 extern void iter_addresses(void *d, void (*cb)(unsigned char *, 
 					       unsigned char *, void *), 
 			   void *arg);
diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index 5316e8a4a4fd..24fb6d8680e1 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -276,9 +276,11 @@ extern int setjmp_wrapper(void (*proc)(void *, void *), ...);
 
 extern void switch_timers(int to_real);
 extern void idle_sleep(int secs);
+extern int set_interval(int is_virtual);
+#ifdef CONFIG_MODE_TT
 extern void enable_timer(void);
+#endif
 extern void disable_timer(void);
-extern void user_time_init(void);
 extern void uml_idle_timer(void);
 extern unsigned long long os_nsecs(void);
 
@@ -329,6 +331,7 @@ extern void os_set_ioignore(void);
 extern void init_irq_signals(int on_sigstack);
 
 /* sigio.c */
+extern int add_sigio_fd(int fd);
 extern int ignore_sigio_fd(int fd);
 extern void maybe_sigio_broken(int fd, int read);
 
diff --git a/arch/um/include/registers.h b/arch/um/include/registers.h
index 83b688ca198f..f845b3629a6d 100644
--- a/arch/um/include/registers.h
+++ b/arch/um/include/registers.h
@@ -7,6 +7,7 @@
 #define __REGISTERS_H
 
 #include "sysdep/ptrace.h"
+#include "sysdep/archsetjmp.h"
 
 extern void init_thread_registers(union uml_pt_regs *to);
 extern int save_fp_registers(int pid, unsigned long *fp_regs);
@@ -15,6 +16,6 @@ extern void save_registers(int pid, union uml_pt_regs *regs);
 extern void restore_registers(int pid, union uml_pt_regs *regs);
 extern void init_registers(int pid);
 extern void get_safe_registers(unsigned long * regs, unsigned long * fp_regs);
-extern void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer);
+extern unsigned long get_thread_reg(int reg, jmp_buf *buf);
 
 #endif
diff --git a/arch/um/include/sysdep-i386/archsetjmp.h b/arch/um/include/sysdep-i386/archsetjmp.h
new file mode 100644
index 000000000000..ea1ba3d42aee
--- /dev/null
+++ b/arch/um/include/sysdep-i386/archsetjmp.h
@@ -0,0 +1,19 @@
+/*
+ * arch/i386/include/klibc/archsetjmp.h
+ */
+
+#ifndef _KLIBC_ARCHSETJMP_H
+#define _KLIBC_ARCHSETJMP_H
+
+struct __jmp_buf {
+	unsigned int __ebx;
+	unsigned int __esp;
+	unsigned int __ebp;
+	unsigned int __esi;
+	unsigned int __edi;
+	unsigned int __eip;
+};
+
+typedef struct __jmp_buf jmp_buf[1];
+
+#endif				/* _SETJMP_H */
diff --git a/arch/um/include/sysdep-i386/signal.h b/arch/um/include/sysdep-i386/signal.h
deleted file mode 100644
index 07518b162136..000000000000
--- a/arch/um/include/sysdep-i386/signal.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2004 PathScale, Inc
- * Licensed under the GPL
- */
-
-#ifndef __I386_SIGNAL_H_
-#define __I386_SIGNAL_H_
-
-#include <signal.h>
-
-#define ARCH_SIGHDLR_PARAM int sig
-
-#define ARCH_GET_SIGCONTEXT(sc, sig) \
-	do sc = (struct sigcontext *) (&sig + 1); while(0)
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/include/sysdep-x86_64/archsetjmp.h b/arch/um/include/sysdep-x86_64/archsetjmp.h
new file mode 100644
index 000000000000..454fc60aff6d
--- /dev/null
+++ b/arch/um/include/sysdep-x86_64/archsetjmp.h
@@ -0,0 +1,21 @@
+/*
+ * arch/x86_64/include/klibc/archsetjmp.h
+ */
+
+#ifndef _KLIBC_ARCHSETJMP_H
+#define _KLIBC_ARCHSETJMP_H
+
+struct __jmp_buf {
+	unsigned long __rbx;
+	unsigned long __rsp;
+	unsigned long __rbp;
+	unsigned long __r12;
+	unsigned long __r13;
+	unsigned long __r14;
+	unsigned long __r15;
+	unsigned long __rip;
+};
+
+typedef struct __jmp_buf jmp_buf[1];
+
+#endif				/* _SETJMP_H */
diff --git a/arch/um/include/sysdep-x86_64/signal.h b/arch/um/include/sysdep-x86_64/signal.h
deleted file mode 100644
index 6142897af3d1..000000000000
--- a/arch/um/include/sysdep-x86_64/signal.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2004 PathScale, Inc
- * Licensed under the GPL
- */
-
-#ifndef __X86_64_SIGNAL_H_
-#define __X86_64_SIGNAL_H_
-
-#define ARCH_SIGHDLR_PARAM int sig
-
-#define ARCH_GET_SIGCONTEXT(sc, sig_addr) \
-	do { \
-		struct ucontext *__uc; \
-		asm("movq %%rdx, %0" : "=r" (__uc)); \
-		sc = (struct sigcontext *) &__uc->uc_mcontext; \
-	} while(0)
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index fc38a6d5906d..0561c43b4685 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -41,9 +41,11 @@ static long execve1(char *file, char __user * __user *argv,
         long error;
 
 #ifdef CONFIG_TTY_LOG
-	task_lock(current);
+	mutex_lock(&tty_mutex);
+	task_lock(current);	/* FIXME:  is this needed ? */
 	log_exec(argv, current->signal->tty);
 	task_unlock(current);
+	mutex_unlock(&tty_mutex);
 #endif
         error = do_execve(file, argv, env, &current->thread.regs);
         if (error == 0){
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 589c69a75043..ce7f233fc490 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -142,19 +142,6 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 				     .events 		= events,
 				     .current_events 	= 0 } );
 
-	/* Critical section - locked by a spinlock because this stuff can
-	 * be changed from interrupt handlers.  The stuff above is done
-	 * outside the lock because it allocates memory.
-	 */
-
-	/* Actually, it only looks like it can be called from interrupt
-	 * context.  The culprit is reactivate_fd, which calls
-	 * maybe_sigio_broken, which calls write_sigio_workaround,
-	 * which calls activate_fd.  However, write_sigio_workaround should
-	 * only be called once, at boot time.  That would make it clear that
-	 * this is called only from process context, and can be locked with
-	 * a semaphore.
-	 */
 	spin_lock_irqsave(&irq_lock, flags);
 	for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
 		if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
@@ -165,7 +152,6 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 		}
 	}
 
-	/*-------------*/
 	if (type == IRQ_WRITE)
 		fd = -1;
 
@@ -198,7 +184,6 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 
 		spin_lock_irqsave(&irq_lock, flags);
 	}
-	/*-------------*/
 
 	*last_irq_ptr = new_fd;
 	last_irq_ptr = &new_fd->next;
@@ -210,14 +195,14 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 	 */
 	maybe_sigio_broken(fd, (type == IRQ_READ));
 
-	return(0);
+	return 0;
 
  out_unlock:
 	spin_unlock_irqrestore(&irq_lock, flags);
  out_kfree:
 	kfree(new_fd);
  out:
-	return(err);
+	return err;
 }
 
 static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
@@ -302,10 +287,7 @@ void reactivate_fd(int fd, int irqnum)
 	os_set_pollfd(i, irq->fd);
 	spin_unlock_irqrestore(&irq_lock, flags);
 
-	/* This calls activate_fd, so it has to be outside the critical
-	 * section.
-	 */
-	maybe_sigio_broken(fd, (irq->type == IRQ_READ));
+	add_sigio_fd(fd);
 }
 
 void deactivate_fd(int fd, int irqnum)
@@ -316,11 +298,15 @@ void deactivate_fd(int fd, int irqnum)
 
 	spin_lock_irqsave(&irq_lock, flags);
 	irq = find_irq_by_fd(fd, irqnum, &i);
-	if (irq == NULL)
-		goto out;
+	if(irq == NULL){
+		spin_unlock_irqrestore(&irq_lock, flags);
+		return;
+	}
+
 	os_set_pollfd(i, -1);
- out:
 	spin_unlock_irqrestore(&irq_lock, flags);
+
+	ignore_sigio_fd(fd);
 }
 
 int deactivate_all_fds(void)
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 61280167c560..93121c6d26e5 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -79,8 +79,10 @@ void mem_init(void)
 
 	/* this will put all low memory onto the freelists */
 	totalram_pages = free_all_bootmem();
+#ifdef CONFIG_HIGHMEM
 	totalhigh_pages = highmem >> PAGE_SHIFT;
 	totalram_pages += totalhigh_pages;
+#endif
 	num_physpages = totalram_pages;
 	max_pfn = totalram_pages;
 	printk(KERN_INFO "Memory: %luk available\n", 
@@ -221,10 +223,13 @@ void paging_init(void)
 
 	empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
 	empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
-	for(i=0;i<sizeof(zones_size)/sizeof(zones_size[0]);i++) 
+	for(i = 0; i < ARRAY_SIZE(zones_size); i++)
 		zones_size[i] = 0;
+
 	zones_size[ZONE_DMA] = (end_iomem >> PAGE_SHIFT) - (uml_physmem >> PAGE_SHIFT);
+#ifdef CONFIG_HIGHMEM
 	zones_size[ZONE_HIGHMEM] = highmem >> PAGE_SHIFT;
+#endif
 	free_area_init(zones_size);
 
 	/*
diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c
index f6a5a502120b..537895d68ad1 100644
--- a/arch/um/kernel/process_kern.c
+++ b/arch/um/kernel/process_kern.c
@@ -23,6 +23,7 @@
 #include "linux/proc_fs.h"
 #include "linux/ptrace.h"
 #include "linux/random.h"
+#include "linux/personality.h"
 #include "asm/unistd.h"
 #include "asm/mman.h"
 #include "asm/segment.h"
@@ -476,7 +477,7 @@ int singlestepping(void * t)
 #ifndef arch_align_stack
 unsigned long arch_align_stack(unsigned long sp)
 {
-	if (randomize_va_space)
+	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 		sp -= get_random_int() % 8192;
 	return sp & ~0xf;
 }
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 3ef73bf2e781..f602623644aa 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -22,7 +22,7 @@ static void kill_idlers(int me)
 	struct task_struct *p;
 	int i;
 
-	for(i = 0; i < sizeof(idle_threads)/sizeof(idle_threads[0]); i++){
+	for(i = 0; i < ARRAY_SIZE(idle_threads); i++){
 		p = idle_threads[i];
 		if((p != NULL) && (p->thread.mode.tt.extern_pid != me))
 			os_kill_process(p->thread.mode.tt.extern_pid, 0);
@@ -62,14 +62,3 @@ void machine_halt(void)
 {
 	machine_power_off();
 }
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 624ca238d1fd..79c22707a637 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -55,7 +55,7 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
 	 * destroy_context_skas.
 	 */
 
-        mm->context.skas.last_page_table = pmd_page_kernel(*pmd);
+        mm->context.skas.last_page_table = pmd_page_vaddr(*pmd);
 #ifdef CONFIG_3_LEVEL_PGTABLES
         mm->context.skas.last_pmd = (unsigned long) __va(pud_val(*pud));
 #endif
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 552ca1cb9847..2454bbd9555d 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -35,9 +35,6 @@ unsigned long long sched_clock(void)
 	return (unsigned long long)jiffies_64 * (1000000000 / HZ);
 }
 
-/* Changed at early boot */
-int timer_irq_inited = 0;
-
 static unsigned long long prev_nsecs;
 #ifdef CONFIG_UML_REAL_TIME_CLOCK
 static long long delta;   		/* Deviation per interval */
@@ -113,12 +110,13 @@ static void register_timer(void)
 
 	err = request_irq(TIMER_IRQ, um_timer, IRQF_DISABLED, "timer", NULL);
 	if(err != 0)
-		printk(KERN_ERR "timer_init : request_irq failed - "
+		printk(KERN_ERR "register_timer : request_irq failed - "
 		       "errno = %d\n", -err);
 
-	timer_irq_inited = 1;
-
-	user_time_init();
+	err = set_interval(1);
+	if(err != 0)
+		printk(KERN_ERR "register_timer : set_interval failed - "
+		       "errno = %d\n", -err);
 }
 
 extern void (*late_time_init)(void);
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index f5b0636f9ad7..54a5ff25645a 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
  * Licensed under the GPL
  */
@@ -16,12 +16,12 @@
 #include "os.h"
 
 static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
- 		    int r, int w, int x, struct host_vm_op *ops, int *index,
+		    int r, int w, int x, struct host_vm_op *ops, int *index,
 		    int last_filled, union mm_context *mmu, void **flush,
 		    int (*do_ops)(union mm_context *, struct host_vm_op *,
 				  int, int, void **))
 {
-        __u64 offset;
+	__u64 offset;
 	struct host_vm_op *last;
 	int fd, ret = 0;
 
@@ -89,7 +89,7 @@ static int add_munmap(unsigned long addr, unsigned long len,
 static int add_mprotect(unsigned long addr, unsigned long len, int r, int w,
 			int x, struct host_vm_op *ops, int *index,
 			int last_filled, union mm_context *mmu, void **flush,
- 			int (*do_ops)(union mm_context *, struct host_vm_op *,
+			int (*do_ops)(union mm_context *, struct host_vm_op *,
 				      int, int, void **))
 {
 	struct host_vm_op *last;
@@ -124,105 +124,105 @@ static int add_mprotect(unsigned long addr, unsigned long len, int r, int w,
 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
 
 void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
-                      unsigned long end_addr, int force,
+		      unsigned long end_addr, int force,
 		      int (*do_ops)(union mm_context *, struct host_vm_op *,
 				    int, int, void **))
 {
-        pgd_t *npgd;
-        pud_t *npud;
-        pmd_t *npmd;
-        pte_t *npte;
-        union mm_context *mmu = &mm->context;
-        unsigned long addr, end;
-        int r, w, x;
-        struct host_vm_op ops[1];
-        void *flush = NULL;
-        int op_index = -1, last_op = sizeof(ops) / sizeof(ops[0]) - 1;
-        int ret = 0;
-
-        if(mm == NULL) return;
-
-        ops[0].type = NONE;
-        for(addr = start_addr; addr < end_addr && !ret;){
-                npgd = pgd_offset(mm, addr);
-                if(!pgd_present(*npgd)){
-                        end = ADD_ROUND(addr, PGDIR_SIZE);
-                        if(end > end_addr)
-                                end = end_addr;
-                        if(force || pgd_newpage(*npgd)){
-                                ret = add_munmap(addr, end - addr, ops,
-                                                 &op_index, last_op, mmu,
-                                                 &flush, do_ops);
-                                pgd_mkuptodate(*npgd);
-                        }
-                        addr = end;
-                        continue;
-                }
-
-                npud = pud_offset(npgd, addr);
-                if(!pud_present(*npud)){
-                        end = ADD_ROUND(addr, PUD_SIZE);
-                        if(end > end_addr)
-                                end = end_addr;
-                        if(force || pud_newpage(*npud)){
-                                ret = add_munmap(addr, end - addr, ops,
-                                                 &op_index, last_op, mmu,
-                                                 &flush, do_ops);
-                                pud_mkuptodate(*npud);
-                        }
-                        addr = end;
-                        continue;
-                }
-
-                npmd = pmd_offset(npud, addr);
-                if(!pmd_present(*npmd)){
-                        end = ADD_ROUND(addr, PMD_SIZE);
-                        if(end > end_addr)
-                                end = end_addr;
-                        if(force || pmd_newpage(*npmd)){
-                                ret = add_munmap(addr, end - addr, ops,
-                                                 &op_index, last_op, mmu,
-                                                 &flush, do_ops);
-                                pmd_mkuptodate(*npmd);
-                        }
-                        addr = end;
-                        continue;
-                }
-
-                npte = pte_offset_kernel(npmd, addr);
-                r = pte_read(*npte);
-                w = pte_write(*npte);
-                x = pte_exec(*npte);
+	pgd_t *npgd;
+	pud_t *npud;
+	pmd_t *npmd;
+	pte_t *npte;
+	union mm_context *mmu = &mm->context;
+	unsigned long addr, end;
+	int r, w, x;
+	struct host_vm_op ops[1];
+	void *flush = NULL;
+	int op_index = -1, last_op = ARRAY_SIZE(ops) - 1;
+	int ret = 0;
+
+	if(mm == NULL)
+		return;
+
+	ops[0].type = NONE;
+	for(addr = start_addr; addr < end_addr && !ret;){
+		npgd = pgd_offset(mm, addr);
+		if(!pgd_present(*npgd)){
+			end = ADD_ROUND(addr, PGDIR_SIZE);
+			if(end > end_addr)
+				end = end_addr;
+			if(force || pgd_newpage(*npgd)){
+				ret = add_munmap(addr, end - addr, ops,
+						 &op_index, last_op, mmu,
+						 &flush, do_ops);
+				pgd_mkuptodate(*npgd);
+			}
+			addr = end;
+			continue;
+		}
+
+		npud = pud_offset(npgd, addr);
+		if(!pud_present(*npud)){
+			end = ADD_ROUND(addr, PUD_SIZE);
+			if(end > end_addr)
+				end = end_addr;
+			if(force || pud_newpage(*npud)){
+				ret = add_munmap(addr, end - addr, ops,
+						 &op_index, last_op, mmu,
+						 &flush, do_ops);
+				pud_mkuptodate(*npud);
+			}
+			addr = end;
+			continue;
+		}
+
+		npmd = pmd_offset(npud, addr);
+		if(!pmd_present(*npmd)){
+			end = ADD_ROUND(addr, PMD_SIZE);
+			if(end > end_addr)
+				end = end_addr;
+			if(force || pmd_newpage(*npmd)){
+				ret = add_munmap(addr, end - addr, ops,
+						 &op_index, last_op, mmu,
+						 &flush, do_ops);
+				pmd_mkuptodate(*npmd);
+			}
+			addr = end;
+			continue;
+		}
+
+		npte = pte_offset_kernel(npmd, addr);
+		r = pte_read(*npte);
+		w = pte_write(*npte);
+		x = pte_exec(*npte);
 		if (!pte_young(*npte)) {
 			r = 0;
 			w = 0;
 		} else if (!pte_dirty(*npte)) {
 			w = 0;
 		}
-                if(force || pte_newpage(*npte)){
-                        if(pte_present(*npte))
-			  ret = add_mmap(addr,
-					 pte_val(*npte) & PAGE_MASK,
-					 PAGE_SIZE, r, w, x, ops,
-					 &op_index, last_op, mmu,
-					 &flush, do_ops);
+		if(force || pte_newpage(*npte)){
+			if(pte_present(*npte))
+				ret = add_mmap(addr,
+					       pte_val(*npte) & PAGE_MASK,
+					       PAGE_SIZE, r, w, x, ops,
+					       &op_index, last_op, mmu,
+					       &flush, do_ops);
 			else ret = add_munmap(addr, PAGE_SIZE, ops,
 					      &op_index, last_op, mmu,
 					      &flush, do_ops);
-                }
-                else if(pte_newprot(*npte))
+		}
+		else if(pte_newprot(*npte))
 			ret = add_mprotect(addr, PAGE_SIZE, r, w, x, ops,
 					   &op_index, last_op, mmu,
 					   &flush, do_ops);
 
-                *npte = pte_mkuptodate(*npte);
-                addr += PAGE_SIZE;
-        }
-
+		*npte = pte_mkuptodate(*npte);
+		addr += PAGE_SIZE;
+	}
 	if(!ret)
 		ret = (*do_ops)(mmu, ops, op_index, 1, &flush);
 
-	/* This is not an else because ret is modified above */
+/* This is not an else because ret is modified above */
 	if(ret) {
 		printk("fix_range_common: failed, killing current process\n");
 		force_sig(SIGKILL, current);
@@ -231,160 +231,160 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
 
 int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 {
-        struct mm_struct *mm;
-        pgd_t *pgd;
-        pud_t *pud;
-        pmd_t *pmd;
-        pte_t *pte;
-        unsigned long addr, last;
-        int updated = 0, err;
-
-        mm = &init_mm;
-        for(addr = start; addr < end;){
-                pgd = pgd_offset(mm, addr);
-                if(!pgd_present(*pgd)){
-                        last = ADD_ROUND(addr, PGDIR_SIZE);
-                        if(last > end)
-                                last = end;
-                        if(pgd_newpage(*pgd)){
-                                updated = 1;
-                                err = os_unmap_memory((void *) addr,
-                                                      last - addr);
-                                if(err < 0)
-                                        panic("munmap failed, errno = %d\n",
-                                              -err);
-                        }
-                        addr = last;
-                        continue;
-                }
-
-                pud = pud_offset(pgd, addr);
-                if(!pud_present(*pud)){
-                        last = ADD_ROUND(addr, PUD_SIZE);
-                        if(last > end)
-                                last = end;
-                        if(pud_newpage(*pud)){
-                                updated = 1;
-                                err = os_unmap_memory((void *) addr,
-                                                      last - addr);
-                                if(err < 0)
-                                        panic("munmap failed, errno = %d\n",
-                                              -err);
-                        }
-                        addr = last;
-                        continue;
-                }
-
-                pmd = pmd_offset(pud, addr);
-                if(!pmd_present(*pmd)){
-                        last = ADD_ROUND(addr, PMD_SIZE);
-                        if(last > end)
-                                last = end;
-                        if(pmd_newpage(*pmd)){
-                                updated = 1;
-                                err = os_unmap_memory((void *) addr,
-                                                      last - addr);
-                                if(err < 0)
-                                        panic("munmap failed, errno = %d\n",
-                                              -err);
-                        }
-                        addr = last;
-                        continue;
-                }
-
-                pte = pte_offset_kernel(pmd, addr);
-                if(!pte_present(*pte) || pte_newpage(*pte)){
-                        updated = 1;
-                        err = os_unmap_memory((void *) addr,
-                                              PAGE_SIZE);
-                        if(err < 0)
-                                panic("munmap failed, errno = %d\n",
-                                      -err);
-                        if(pte_present(*pte))
-                                map_memory(addr,
-                                           pte_val(*pte) & PAGE_MASK,
-                                           PAGE_SIZE, 1, 1, 1);
-                }
-                else if(pte_newprot(*pte)){
-                        updated = 1;
-                        os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
-                }
-                addr += PAGE_SIZE;
-        }
-        return(updated);
+	struct mm_struct *mm;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long addr, last;
+	int updated = 0, err;
+
+	mm = &init_mm;
+	for(addr = start; addr < end;){
+		pgd = pgd_offset(mm, addr);
+		if(!pgd_present(*pgd)){
+			last = ADD_ROUND(addr, PGDIR_SIZE);
+			if(last > end)
+				last = end;
+			if(pgd_newpage(*pgd)){
+				updated = 1;
+				err = os_unmap_memory((void *) addr,
+						      last - addr);
+				if(err < 0)
+					panic("munmap failed, errno = %d\n",
+					      -err);
+			}
+			addr = last;
+			continue;
+		}
+
+		pud = pud_offset(pgd, addr);
+		if(!pud_present(*pud)){
+			last = ADD_ROUND(addr, PUD_SIZE);
+			if(last > end)
+				last = end;
+			if(pud_newpage(*pud)){
+				updated = 1;
+				err = os_unmap_memory((void *) addr,
+						      last - addr);
+				if(err < 0)
+					panic("munmap failed, errno = %d\n",
+					      -err);
+			}
+			addr = last;
+			continue;
+		}
+
+		pmd = pmd_offset(pud, addr);
+		if(!pmd_present(*pmd)){
+			last = ADD_ROUND(addr, PMD_SIZE);
+			if(last > end)
+				last = end;
+			if(pmd_newpage(*pmd)){
+				updated = 1;
+				err = os_unmap_memory((void *) addr,
+						      last - addr);
+				if(err < 0)
+					panic("munmap failed, errno = %d\n",
+					      -err);
+			}
+			addr = last;
+			continue;
+		}
+
+		pte = pte_offset_kernel(pmd, addr);
+		if(!pte_present(*pte) || pte_newpage(*pte)){
+			updated = 1;
+			err = os_unmap_memory((void *) addr,
+					      PAGE_SIZE);
+			if(err < 0)
+				panic("munmap failed, errno = %d\n",
+				      -err);
+			if(pte_present(*pte))
+				map_memory(addr,
+					   pte_val(*pte) & PAGE_MASK,
+					   PAGE_SIZE, 1, 1, 1);
+		}
+		else if(pte_newprot(*pte)){
+			updated = 1;
+			os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
+		}
+		addr += PAGE_SIZE;
+	}
+	return(updated);
 }
 
 pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
 {
-        return(pgd_offset(mm, address));
+	return(pgd_offset(mm, address));
 }
 
 pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
 {
-        return(pud_offset(pgd, address));
+	return(pud_offset(pgd, address));
 }
 
 pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
 {
-        return(pmd_offset(pud, address));
+	return(pmd_offset(pud, address));
 }
 
 pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
 {
-        return(pte_offset_kernel(pmd, address));
+	return(pte_offset_kernel(pmd, address));
 }
 
 pte_t *addr_pte(struct task_struct *task, unsigned long addr)
 {
-        pgd_t *pgd = pgd_offset(task->mm, addr);
-        pud_t *pud = pud_offset(pgd, addr);
-        pmd_t *pmd = pmd_offset(pud, addr);
+	pgd_t *pgd = pgd_offset(task->mm, addr);
+	pud_t *pud = pud_offset(pgd, addr);
+	pmd_t *pmd = pmd_offset(pud, addr);
 
-        return(pte_offset_map(pmd, addr));
+	return(pte_offset_map(pmd, addr));
 }
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
 {
-        address &= PAGE_MASK;
-        flush_tlb_range(vma, address, address + PAGE_SIZE);
+	address &= PAGE_MASK;
+	flush_tlb_range(vma, address, address + PAGE_SIZE);
 }
 
 void flush_tlb_all(void)
 {
-        flush_tlb_mm(current->mm);
+	flush_tlb_mm(current->mm);
 }
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-        CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt,
-                         flush_tlb_kernel_range_common, start, end);
+	CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt,
+			 flush_tlb_kernel_range_common, start, end);
 }
 
 void flush_tlb_kernel_vm(void)
 {
-        CHOOSE_MODE(flush_tlb_kernel_vm_tt(),
-                    flush_tlb_kernel_range_common(start_vm, end_vm));
+	CHOOSE_MODE(flush_tlb_kernel_vm_tt(),
+		    flush_tlb_kernel_range_common(start_vm, end_vm));
 }
 
 void __flush_tlb_one(unsigned long addr)
 {
-        CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr);
+	CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr);
 }
 
 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		     unsigned long end)
 {
-        CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start,
-                         end);
+	CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start,
+			 end);
 }
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-        CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm);
+	CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm);
 }
 
 void force_flush_all(void)
 {
-        CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas());
+	CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas());
 }
 
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index ac70fa5a2e2a..e5eeaf2b6af1 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -227,9 +227,16 @@ void bad_segv(struct faultinfo fi, unsigned long ip)
 
 void relay_signal(int sig, union uml_pt_regs *regs)
 {
-	if(arch_handle_signal(sig, regs)) return;
-	if(!UPT_IS_USER(regs))
+	if(arch_handle_signal(sig, regs))
+		return;
+
+	if(!UPT_IS_USER(regs)){
+		if(sig == SIGBUS)
+			printk("Bus error - the /dev/shm or /tmp mount likely "
+			       "just ran out of space\n");
 		panic("Kernel mode signal %d", sig);
+	}
+
         current->thread.arch.faultinfo = *UPT_FAULTINFO(regs);
 	force_sig(sig, current);
 }
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index 6987d1d247a2..cd15b9df5b5c 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -42,7 +42,7 @@ static int helper_child(void *arg)
 	if(data->pre_exec != NULL)
 		(*data->pre_exec)(data->pre_data);
 	execvp(argv[0], argv);
-	errval = errno;
+	errval = -errno;
 	printk("helper_child - execve of '%s' failed - errno = %d\n", argv[0], errno);
 	os_write_file(data->fd, &errval, sizeof(errval));
 	kill(os_getpid(), SIGKILL);
@@ -62,7 +62,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv,
 		stack = *stack_out;
 	else stack = alloc_stack(0, __cant_sleep());
 	if(stack == 0)
-		return(-ENOMEM);
+		return -ENOMEM;
 
 	ret = os_pipe(fds, 1, 0);
 	if(ret < 0){
@@ -95,16 +95,16 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv,
 	/* Read the errno value from the child, if the exec failed, or get 0 if
 	 * the exec succeeded because the pipe fd was set as close-on-exec. */
 	n = os_read_file(fds[0], &ret, sizeof(ret));
-	if (n < 0) {
-		printk("run_helper : read on pipe failed, ret = %d\n", -n);
-		ret = n;
-		kill(pid, SIGKILL);
-		CATCH_EINTR(waitpid(pid, NULL, 0));
-	} else if(n != 0){
-		CATCH_EINTR(n = waitpid(pid, NULL, 0));
-		ret = -errno;
-	} else {
+	if(n == 0)
 		ret = pid;
+	else {
+		if(n < 0){
+			printk("run_helper : read on pipe failed, ret = %d\n",
+			       -n);
+			ret = n;
+			kill(pid, SIGKILL);
+		}
+		CATCH_EINTR(waitpid(pid, NULL, 0));
 	}
 
 out_close:
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index 7555bf9c33d9..a97206df5b52 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -132,7 +132,7 @@ void os_set_pollfd(int i, int fd)
 
 void os_set_ioignore(void)
 {
-	set_handler(SIGIO, SIG_IGN, 0, -1);
+	signal(SIGIO, SIG_IGN);
 }
 
 void init_irq_signals(int on_sigstack)
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 90912aaca7aa..d1c5670787dc 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -67,13 +67,32 @@ static __init void do_uml_initcalls(void)
 
 static void last_ditch_exit(int sig)
 {
-	signal(SIGINT, SIG_DFL);
-	signal(SIGTERM, SIG_DFL);
-	signal(SIGHUP, SIG_DFL);
 	uml_cleanup();
 	exit(1);
 }
 
+static void install_fatal_handler(int sig)
+{
+	struct sigaction action;
+
+	/* All signals are enabled in this handler ... */
+	sigemptyset(&action.sa_mask);
+
+	/* ... including the signal being handled, plus we want the
+	 * handler reset to the default behavior, so that if an exit
+	 * handler is hanging for some reason, the UML will just die
+	 * after this signal is sent a second time.
+	 */
+	action.sa_flags = SA_RESETHAND | SA_NODEFER;
+	action.sa_restorer = NULL;
+	action.sa_handler = last_ditch_exit;
+	if(sigaction(sig, &action, NULL) < 0){
+		printf("failed to install handler for signal %d - errno = %d\n",
+		       errno);
+		exit(1);
+	}
+}
+
 #define UML_LIB_PATH	":/usr/lib/uml"
 
 static void setup_env_path(void)
@@ -158,9 +177,12 @@ int main(int argc, char **argv, char **envp)
 	}
 	new_argv[argc] = NULL;
 
-	set_handler(SIGINT, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1);
-	set_handler(SIGTERM, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1);
-	set_handler(SIGHUP, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1);
+	/* Allow these signals to bring down a UML if all other
+	 * methods of control fail.
+	 */
+	install_fatal_handler(SIGINT);
+	install_fatal_handler(SIGTERM);
+	install_fatal_handler(SIGHUP);
 
 	scan_elf_aux( envp);
 
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c
index 560c8063c77c..b170b4704dc4 100644
--- a/arch/um/os-Linux/mem.c
+++ b/arch/um/os-Linux/mem.c
@@ -114,14 +114,14 @@ static void which_tmpdir(void)
 	}
 
 	while(1){
-		found = next(fd, buf, sizeof(buf) / sizeof(buf[0]), ' ');
+		found = next(fd, buf, ARRAY_SIZE(buf), ' ');
 		if(found != 1)
 			break;
 
 		if(!strncmp(buf, "/dev/shm", strlen("/dev/shm")))
 			goto found;
 
-		found = next(fd, buf, sizeof(buf) / sizeof(buf[0]), '\n');
+		found = next(fd, buf, ARRAY_SIZE(buf), '\n');
 		if(found != 1)
 			break;
 	}
@@ -135,7 +135,7 @@ err:
 	return;
 
 found:
-	found = next(fd, buf, sizeof(buf) / sizeof(buf[0]), ' ');
+	found = next(fd, buf, ARRAY_SIZE(buf), ' ');
 	if(found != 1)
 		goto err;
 
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index b98d3ca2cd1b..ff203625a4bd 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -7,7 +7,6 @@
 #include <stdio.h>
 #include <errno.h>
 #include <signal.h>
-#include <setjmp.h>
 #include <linux/unistd.h>
 #include <sys/mman.h>
 #include <sys/wait.h>
@@ -247,7 +246,17 @@ void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int))
 		set_sigstack(sig_stack, pages * page_size());
 		flags = SA_ONSTACK;
 	}
-	if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1);
+	if(usr1_handler){
+		struct sigaction sa;
+
+		sa.sa_handler = usr1_handler;
+		sigemptyset(&sa.sa_mask);
+		sa.sa_flags = flags;
+		sa.sa_restorer = NULL;
+		if(sigaction(SIGUSR1, &sa, NULL) < 0)
+			panic("init_new_thread_stack - sigaction failed - "
+			      "errno = %d\n", errno);
+	}
 }
 
 void init_new_thread_signals(void)
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index 0ecac563c7b3..f6457765b17d 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -43,17 +43,9 @@ struct pollfds {
 /* Protected by sigio_lock().  Used by the sigio thread, but the UML thread
  * synchronizes with it.
  */
-static struct pollfds current_poll = {
-	.poll  		= NULL,
-	.size 		= 0,
-	.used 		= 0
-};
-
-static struct pollfds next_poll = {
-	.poll  		= NULL,
-	.size 		= 0,
-	.used 		= 0
-};
+static struct pollfds current_poll;
+static struct pollfds next_poll;
+static struct pollfds all_sigio_fds;
 
 static int write_sigio_thread(void *unused)
 {
@@ -78,7 +70,8 @@ static int write_sigio_thread(void *unused)
 				n = os_read_file(sigio_private[1], &c, sizeof(c));
 				if(n != sizeof(c))
 					printk("write_sigio_thread : "
-					       "read failed, err = %d\n", -n);
+					       "read on socket failed, "
+					       "err = %d\n", -n);
 				tmp = current_poll;
 				current_poll = next_poll;
 				next_poll = tmp;
@@ -93,35 +86,36 @@ static int write_sigio_thread(void *unused)
 
 			n = os_write_file(respond_fd, &c, sizeof(c));
 			if(n != sizeof(c))
-				printk("write_sigio_thread : write failed, "
-				       "err = %d\n", -n);
+				printk("write_sigio_thread : write on socket "
+				       "failed, err = %d\n", -n);
 		}
 	}
 
 	return 0;
 }
 
-static int need_poll(int n)
+static int need_poll(struct pollfds *polls, int n)
 {
-	if(n <= next_poll.size){
-		next_poll.used = n;
-		return(0);
+	if(n <= polls->size){
+		polls->used = n;
+		return 0;
 	}
-	kfree(next_poll.poll);
-	next_poll.poll = um_kmalloc_atomic(n * sizeof(struct pollfd));
-	if(next_poll.poll == NULL){
+	kfree(polls->poll);
+	polls->poll = um_kmalloc_atomic(n * sizeof(struct pollfd));
+	if(polls->poll == NULL){
 		printk("need_poll : failed to allocate new pollfds\n");
-		next_poll.size = 0;
-		next_poll.used = 0;
-		return(-1);
+		polls->size = 0;
+		polls->used = 0;
+		return -ENOMEM;
 	}
-	next_poll.size = n;
-	next_poll.used = n;
-	return(0);
+	polls->size = n;
+	polls->used = n;
+	return 0;
 }
 
 /* Must be called with sigio_lock held, because it's needed by the marked
- * critical section. */
+ * critical section.
+ */
 static void update_thread(void)
 {
 	unsigned long flags;
@@ -156,34 +150,39 @@ static void update_thread(void)
 	set_signals(flags);
 }
 
-static int add_sigio_fd(int fd, int read)
+int add_sigio_fd(int fd)
 {
-	int err = 0, i, n, events;
+	struct pollfd *p;
+	int err = 0, i, n;
 
 	sigio_lock();
+	for(i = 0; i < all_sigio_fds.used; i++){
+		if(all_sigio_fds.poll[i].fd == fd)
+			break;
+	}
+	if(i == all_sigio_fds.used)
+		goto out;
+
+	p = &all_sigio_fds.poll[i];
+
 	for(i = 0; i < current_poll.used; i++){
 		if(current_poll.poll[i].fd == fd)
 			goto out;
 	}
 
 	n = current_poll.used + 1;
-	err = need_poll(n);
+	err = need_poll(&next_poll, n);
 	if(err)
 		goto out;
 
 	for(i = 0; i < current_poll.used; i++)
 		next_poll.poll[i] = current_poll.poll[i];
 
-	if(read) events = POLLIN;
-	else events = POLLOUT;
-
-	next_poll.poll[n - 1] = ((struct pollfd) { .fd  	= fd,
-						   .events 	= events,
-						   .revents 	= 0 });
+	next_poll.poll[n - 1] = *p;
 	update_thread();
  out:
 	sigio_unlock();
-	return(err);
+	return err;
 }
 
 int ignore_sigio_fd(int fd)
@@ -205,18 +204,14 @@ int ignore_sigio_fd(int fd)
 	if(i == current_poll.used)
 		goto out;
 
-	err = need_poll(current_poll.used - 1);
+	err = need_poll(&next_poll, current_poll.used - 1);
 	if(err)
 		goto out;
 
 	for(i = 0; i < current_poll.used; i++){
 		p = &current_poll.poll[i];
-		if(p->fd != fd) next_poll.poll[n++] = current_poll.poll[i];
-	}
-	if(n == i){
-		printk("ignore_sigio_fd : fd %d not found\n", fd);
-		err = -1;
-		goto out;
+		if(p->fd != fd)
+			next_poll.poll[n++] = *p;
 	}
 
 	update_thread();
@@ -234,7 +229,7 @@ static struct pollfd *setup_initial_poll(int fd)
 		printk("setup_initial_poll : failed to allocate poll\n");
 		return NULL;
 	}
-	*p = ((struct pollfd) { .fd  	= fd,
+	*p = ((struct pollfd) { .fd		= fd,
 				.events 	= POLLIN,
 				.revents 	= 0 });
 	return p;
@@ -323,6 +318,8 @@ out_close1:
 
 void maybe_sigio_broken(int fd, int read)
 {
+	int err;
+
 	if(!isatty(fd))
 		return;
 
@@ -330,7 +327,19 @@ void maybe_sigio_broken(int fd, int read)
 		return;
 
 	write_sigio_workaround();
-	add_sigio_fd(fd, read);
+
+	sigio_lock();
+	err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1);
+	if(err){
+		printk("maybe_sigio_broken - failed to add pollfd\n");
+		goto out;
+	}
+	all_sigio_fds.poll[all_sigio_fds.used++] =
+		((struct pollfd) { .fd  	= fd,
+				   .events 	= read ? POLLIN : POLLOUT,
+				   .revents 	= 0 });
+out:
+	sigio_unlock();
 }
 
 static void sigio_cleanup(void)
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 60e4faedf254..6b81739279d1 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -15,7 +15,6 @@
 #include "user.h"
 #include "signal_kern.h"
 #include "sysdep/sigcontext.h"
-#include "sysdep/signal.h"
 #include "sigcontext.h"
 #include "mode.h"
 #include "os.h"
@@ -38,18 +37,10 @@
 static int signals_enabled = 1;
 static int pending = 0;
 
-void sig_handler(ARCH_SIGHDLR_PARAM)
+void sig_handler(int sig, struct sigcontext *sc)
 {
-	struct sigcontext *sc;
 	int enabled;
 
-	/* Must be the first thing that this handler does - x86_64 stores
-	 * the sigcontext in %rdx, and we need to save it before it has a
-	 * chance to get trashed.
-	 */
-
-	ARCH_GET_SIGCONTEXT(sc, sig);
-
 	enabled = signals_enabled;
 	if(!enabled && (sig == SIGIO)){
 		pending |= SIGIO_MASK;
@@ -64,15 +55,8 @@ void sig_handler(ARCH_SIGHDLR_PARAM)
 	set_signals(enabled);
 }
 
-extern int timer_irq_inited;
-
 static void real_alarm_handler(int sig, struct sigcontext *sc)
 {
-	if(!timer_irq_inited){
-		signals_enabled = 1;
-		return;
-	}
-
 	if(sig == SIGALRM)
 		switch_timers(0);
 
@@ -84,13 +68,10 @@ static void real_alarm_handler(int sig, struct sigcontext *sc)
 
 }
 
-void alarm_handler(ARCH_SIGHDLR_PARAM)
+void alarm_handler(int sig, struct sigcontext *sc)
 {
-	struct sigcontext *sc;
 	int enabled;
 
-	ARCH_GET_SIGCONTEXT(sc, sig);
-
 	enabled = signals_enabled;
 	if(!signals_enabled){
 		if(sig == SIGVTALRM)
@@ -126,6 +107,10 @@ void remove_sigstack(void)
 		panic("disabling signal stack failed, errno = %d\n", errno);
 }
 
+void (*handlers[_NSIG])(int sig, struct sigcontext *sc);
+
+extern void hard_handler(int sig);
+
 void set_handler(int sig, void (*handler)(int), int flags, ...)
 {
 	struct sigaction action;
@@ -133,13 +118,16 @@ void set_handler(int sig, void (*handler)(int), int flags, ...)
 	sigset_t sig_mask;
 	int mask;
 
-	va_start(ap, flags);
-	action.sa_handler = handler;
+	handlers[sig] = (void (*)(int, struct sigcontext *)) handler;
+	action.sa_handler = hard_handler;
+
 	sigemptyset(&action.sa_mask);
-	while((mask = va_arg(ap, int)) != -1){
+
+	va_start(ap, flags);
+	while((mask = va_arg(ap, int)) != -1)
 		sigaddset(&action.sa_mask, mask);
-	}
 	va_end(ap);
+
 	action.sa_flags = flags;
 	action.sa_restorer = NULL;
 	if(sigaction(sig, &action, NULL) < 0)
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 7baf90fda58b..42e3d1ed802c 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -8,7 +8,6 @@
 #include <unistd.h>
 #include <errno.h>
 #include <signal.h>
-#include <setjmp.h>
 #include <sched.h>
 #include "ptrace_user.h"
 #include <sys/wait.h>
@@ -156,11 +155,15 @@ extern int __syscall_stub_start;
 static int userspace_tramp(void *stack)
 {
 	void *addr;
+	int err;
 
 	ptrace(PTRACE_TRACEME, 0, 0, 0);
 
 	init_new_thread_signals();
-	enable_timer();
+	err = set_interval(1);
+	if(err)
+		panic("userspace_tramp - setting timer failed, errno = %d\n",
+		      err);
 
 	if(!proc_mm){
 		/* This has a pte, but it can't be mapped in with the usual
@@ -190,14 +193,25 @@ static int userspace_tramp(void *stack)
 		}
 	}
 	if(!ptrace_faultinfo && (stack != NULL)){
+		struct sigaction sa;
+
 		unsigned long v = UML_CONFIG_STUB_CODE +
 				  (unsigned long) stub_segv_handler -
 				  (unsigned long) &__syscall_stub_start;
 
 		set_sigstack((void *) UML_CONFIG_STUB_DATA, page_size());
-		set_handler(SIGSEGV, (void *) v, SA_ONSTACK,
-			    SIGIO, SIGWINCH, SIGALRM, SIGVTALRM,
-			    SIGUSR1, -1);
+		sigemptyset(&sa.sa_mask);
+		sigaddset(&sa.sa_mask, SIGIO);
+		sigaddset(&sa.sa_mask, SIGWINCH);
+		sigaddset(&sa.sa_mask, SIGALRM);
+		sigaddset(&sa.sa_mask, SIGVTALRM);
+		sigaddset(&sa.sa_mask, SIGUSR1);
+		sa.sa_flags = SA_ONSTACK;
+		sa.sa_handler = (void *) v;
+		sa.sa_restorer = NULL;
+		if(sigaction(SIGSEGV, &sa, NULL) < 0)
+			panic("userspace_tramp - setting SIGSEGV handler "
+			      "failed - errno = %d\n", errno);
 	}
 
 	os_stop_process(os_getpid());
@@ -470,7 +484,7 @@ void thread_wait(void *sw, void *fb)
 	*switch_buf = &buf;
 	fork_buf = fb;
 	if(UML_SETJMP(&buf) == 0)
-		siglongjmp(*fork_buf, INIT_JMP_REMOVE_SIGSTACK);
+		UML_LONGJMP(fork_buf, INIT_JMP_REMOVE_SIGSTACK);
 }
 
 void switch_threads(void *me, void *next)
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 503148504009..7fe92680c7dd 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -14,7 +14,6 @@
 #include <sched.h>
 #include <fcntl.h>
 #include <errno.h>
-#include <setjmp.h>
 #include <sys/time.h>
 #include <sys/wait.h>
 #include <sys/mman.h>
diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile
index b3213613c41c..37806621b25d 100644
--- a/arch/um/os-Linux/sys-i386/Makefile
+++ b/arch/um/os-Linux/sys-i386/Makefile
@@ -3,7 +3,7 @@
 # Licensed under the GPL
 #
 
-obj-$(CONFIG_MODE_SKAS) = registers.o tls.o
+obj-$(CONFIG_MODE_SKAS) = registers.o signal.o tls.o
 
 USER_OBJS := $(obj-y)
 
diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c
index 516f66dd87e3..7cd0369e02b3 100644
--- a/arch/um/os-Linux/sys-i386/registers.c
+++ b/arch/um/os-Linux/sys-i386/registers.c
@@ -5,12 +5,12 @@
 
 #include <errno.h>
 #include <string.h>
-#include <setjmp.h>
 #include "sysdep/ptrace_user.h"
 #include "sysdep/ptrace.h"
 #include "uml-config.h"
 #include "skas_ptregs.h"
 #include "registers.h"
+#include "longjmp.h"
 #include "user.h"
 
 /* These are set once at boot time and not changed thereafter */
@@ -130,11 +130,14 @@ void get_safe_registers(unsigned long *regs, unsigned long *fp_regs)
 		       HOST_FP_SIZE * sizeof(unsigned long));
 }
 
-void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer)
+unsigned long get_thread_reg(int reg, jmp_buf *buf)
 {
-	struct __jmp_buf_tag *jmpbuf = buffer;
-
-	UPT_SET(uml_regs, EIP, jmpbuf->__jmpbuf[JB_PC]);
-	UPT_SET(uml_regs, UESP, jmpbuf->__jmpbuf[JB_SP]);
-	UPT_SET(uml_regs, EBP, jmpbuf->__jmpbuf[JB_BP]);
+	switch(reg){
+	case EIP: return buf[0]->__eip;
+	case UESP: return buf[0]->__esp;
+	case EBP: return buf[0]->__ebp;
+	default:
+		printk("get_thread_regs - unknown register %d\n", reg);
+		return 0;
+	}
 }
diff --git a/arch/um/os-Linux/sys-i386/signal.c b/arch/um/os-Linux/sys-i386/signal.c
new file mode 100644
index 000000000000..0d3eae518352
--- /dev/null
+++ b/arch/um/os-Linux/sys-i386/signal.c
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include <signal.h>
+
+extern void (*handlers[])(int sig, struct sigcontext *sc);
+
+void hard_handler(int sig)
+{
+	struct sigcontext *sc = (struct sigcontext *) (&sig + 1);
+
+	(*handlers[sig])(sig, sc);
+}
diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile
index 340ef26f5944..f67842a7735b 100644
--- a/arch/um/os-Linux/sys-x86_64/Makefile
+++ b/arch/um/os-Linux/sys-x86_64/Makefile
@@ -3,7 +3,7 @@
 # Licensed under the GPL
 #
 
-obj-$(CONFIG_MODE_SKAS) = registers.o
+obj-$(CONFIG_MODE_SKAS) = registers.o signal.o
 
 USER_OBJS := $(obj-y)
 
diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c
index becd898d9398..cb8e8a263280 100644
--- a/arch/um/os-Linux/sys-x86_64/registers.c
+++ b/arch/um/os-Linux/sys-x86_64/registers.c
@@ -5,11 +5,11 @@
 
 #include <errno.h>
 #include <string.h>
-#include <setjmp.h>
 #include "ptrace_user.h"
 #include "uml-config.h"
 #include "skas_ptregs.h"
 #include "registers.h"
+#include "longjmp.h"
 #include "user.h"
 
 /* These are set once at boot time and not changed thereafter */
@@ -78,11 +78,14 @@ void get_safe_registers(unsigned long *regs, unsigned long *fp_regs)
 		       HOST_FP_SIZE * sizeof(unsigned long));
 }
 
-void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer)
+unsigned long get_thread_reg(int reg, jmp_buf *buf)
 {
-	struct __jmp_buf_tag *jmpbuf = buffer;
-
-	UPT_SET(uml_regs, RIP, jmpbuf->__jmpbuf[JB_PC]);
-	UPT_SET(uml_regs, RSP, jmpbuf->__jmpbuf[JB_RSP]);
-	UPT_SET(uml_regs, RBP, jmpbuf->__jmpbuf[JB_RBP]);
+	switch(reg){
+	case RIP: return buf[0]->__rip;
+	case RSP: return buf[0]->__rsp;
+	case RBP: return buf[0]->__rbp;
+	default:
+		printk("get_thread_regs - unknown register %d\n", reg);
+		return 0;
+	}
 }
diff --git a/arch/um/os-Linux/sys-x86_64/signal.c b/arch/um/os-Linux/sys-x86_64/signal.c
new file mode 100644
index 000000000000..3f369e5f976b
--- /dev/null
+++ b/arch/um/os-Linux/sys-x86_64/signal.c
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include <signal.h>
+
+extern void (*handlers[])(int sig, struct sigcontext *sc);
+
+void hard_handler(int sig)
+{
+	struct ucontext *uc;
+	asm("movq %%rdx, %0" : "=r" (uc));
+
+	(*handlers[sig])(sig, (struct sigcontext *) &uc->uc_mcontext);
+}
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index 4ae73c0e5485..38be096e750f 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -17,20 +17,25 @@
 #include "kern_constants.h"
 #include "os.h"
 
-static void set_interval(int timer_type)
+int set_interval(int is_virtual)
 {
 	int usec = 1000000/hz();
+	int timer_type = is_virtual ? ITIMER_VIRTUAL : ITIMER_REAL;
 	struct itimerval interval = ((struct itimerval) { { 0, usec },
 							  { 0, usec } });
 
 	if(setitimer(timer_type, &interval, NULL) == -1)
-		panic("setitimer failed - errno = %d\n", errno);
+		return -errno;
+
+	return 0;
 }
 
+#ifdef CONFIG_MODE_TT
 void enable_timer(void)
 {
-	set_interval(ITIMER_VIRTUAL);
+	set_interval(1);
 }
+#endif
 
 void disable_timer(void)
 {
@@ -40,8 +45,8 @@ void disable_timer(void)
 		printk("disnable_timer - setitimer failed, errno = %d\n",
 		       errno);
 	/* If there are signals already queued, after unblocking ignore them */
-	set_handler(SIGALRM, SIG_IGN, 0, -1);
-	set_handler(SIGVTALRM, SIG_IGN, 0, -1);
+	signal(SIGALRM, SIG_IGN);
+	signal(SIGVTALRM, SIG_IGN);
 }
 
 void switch_timers(int to_real)
@@ -74,7 +79,7 @@ void uml_idle_timer(void)
 
 	set_handler(SIGALRM, (__sighandler_t) alarm_handler,
 		    SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
-	set_interval(ITIMER_REAL);
+	set_interval(0);
 }
 #endif
 
@@ -94,8 +99,3 @@ void idle_sleep(int secs)
 	ts.tv_nsec = 0;
 	nanosleep(&ts, NULL);
 }
-
-void user_time_init(void)
-{
-	set_interval(ITIMER_VIRTUAL);
-}
diff --git a/arch/um/os-Linux/trap.c b/arch/um/os-Linux/trap.c
index 90b29ae9af46..1df231a26244 100644
--- a/arch/um/os-Linux/trap.c
+++ b/arch/um/os-Linux/trap.c
@@ -5,7 +5,6 @@
 
 #include <stdlib.h>
 #include <signal.h>
-#include <setjmp.h>
 #include "kern_util.h"
 #include "user_util.h"
 #include "os.h"
diff --git a/arch/um/os-Linux/uaccess.c b/arch/um/os-Linux/uaccess.c
index 865f6a6a2590..bbb73a650370 100644
--- a/arch/um/os-Linux/uaccess.c
+++ b/arch/um/os-Linux/uaccess.c
@@ -4,8 +4,7 @@
  * Licensed under the GPL
  */
 
-#include <setjmp.h>
-#include <string.h>
+#include <stddef.h>
 #include "longjmp.h"
 
 unsigned long __do_user_copy(void *to, const void *from, int n,
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index c47a2a7ce70e..3f5b1514e8a7 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -7,7 +7,6 @@
 #include <stdlib.h>
 #include <unistd.h>
 #include <limits.h>
-#include <setjmp.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/utsname.h>
@@ -107,11 +106,11 @@ int setjmp_wrapper(void (*proc)(void *, void *), ...)
 	jmp_buf buf;
 	int n;
 
-	n = sigsetjmp(buf, 1);
+	n = UML_SETJMP(&buf);
 	if(n == 0){
 		va_start(args, proc);
 		(*proc)(&buf, &args);
 	}
 	va_end(args);
-	return(n);
+	return n;
 }
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 374d61a19439..59cc70275754 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -1,5 +1,5 @@
 obj-y = bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
-	ptrace_user.o signal.o sigcontext.o syscalls.o sysrq.o \
+	ptrace_user.o setjmp.o signal.o sigcontext.o syscalls.o sysrq.o \
 	sys_call_table.o tls.o
 
 obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o
diff --git a/arch/um/sys-i386/bugs.c b/arch/um/sys-i386/bugs.c
index 41b0ab2fe830..f1bcd399ac90 100644
--- a/arch/um/sys-i386/bugs.c
+++ b/arch/um/sys-i386/bugs.c
@@ -13,6 +13,7 @@
 #include "sysdep/ptrace.h"
 #include "task.h"
 #include "os.h"
+#include "user_util.h"
 
 #define MAXTOKEN 64
 
@@ -104,17 +105,17 @@ int cpu_feature(char *what, char *buf, int len)
 static int check_cpu_flag(char *feature, int *have_it)
 {
 	char buf[MAXTOKEN], c;
-	int fd, len = sizeof(buf)/sizeof(buf[0]);
+	int fd, len = ARRAY_SIZE(buf);
 
 	printk("Checking for host processor %s support...", feature);
 	fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0);
 	if(fd < 0){
 		printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd);
-		return(0);
+		return 0;
 	}
 
 	*have_it = 0;
-	if(!find_cpuinfo_line(fd, "flags", buf, sizeof(buf) / sizeof(buf[0])))
+	if(!find_cpuinfo_line(fd, "flags", buf, ARRAY_SIZE(buf)))
 		goto out;
 
 	c = token(fd, buf, len - 1, ' ');
@@ -138,7 +139,7 @@ static int check_cpu_flag(char *feature, int *have_it)
 	if(*have_it == 0) printk("No\n");
 	else if(*have_it == 1) printk("Yes\n");
 	os_close_file(fd);
-	return(1);
+	return 1;
 }
 
 #if 0 /* This doesn't work in tt mode, plus it's causing compilation problems
diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c
index fe0877b3509c..69971b78beaf 100644
--- a/arch/um/sys-i386/ldt.c
+++ b/arch/um/sys-i386/ldt.c
@@ -424,9 +424,8 @@ void ldt_get_host_info(void)
 			size++;
 	}
 
-	if(size < sizeof(dummy_list)/sizeof(dummy_list[0])) {
+	if(size < ARRAY_SIZE(dummy_list))
 		host_ldt_entries = dummy_list;
-	}
 	else {
 		size = (size + 1) * sizeof(dummy_list[0]);
 		host_ldt_entries = (short *)kmalloc(size, GFP_KERNEL);
diff --git a/arch/um/sys-i386/ptrace_user.c b/arch/um/sys-i386/ptrace_user.c
index 40aa88531446..5f3cc6685820 100644
--- a/arch/um/sys-i386/ptrace_user.c
+++ b/arch/um/sys-i386/ptrace_user.c
@@ -15,6 +15,7 @@
 #include "user.h"
 #include "os.h"
 #include "uml-config.h"
+#include "user_util.h"
 
 int ptrace_getregs(long pid, unsigned long *regs_out)
 {
@@ -51,7 +52,7 @@ static void write_debugregs(int pid, unsigned long *regs)
 	int nregs, i;
 
 	dummy = NULL;
-	nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]);
+	nregs = ARRAY_SIZE(dummy->u_debugreg);
 	for(i = 0; i < nregs; i++){
 		if((i == 4) || (i == 5)) continue;
 		if(ptrace(PTRACE_POKEUSR, pid, &dummy->u_debugreg[i],
@@ -68,7 +69,7 @@ static void read_debugregs(int pid, unsigned long *regs)
 	int nregs, i;
 
 	dummy = NULL;
-	nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]);
+	nregs = ARRAY_SIZE(dummy->u_debugreg);
 	for(i = 0; i < nregs; i++){
 		regs[i] = ptrace(PTRACE_PEEKUSR, pid,
 				 &dummy->u_debugreg[i], 0);
diff --git a/arch/um/sys-i386/setjmp.S b/arch/um/sys-i386/setjmp.S
new file mode 100644
index 000000000000..b766792c9933
--- /dev/null
+++ b/arch/um/sys-i386/setjmp.S
@@ -0,0 +1,58 @@
+#
+# arch/i386/setjmp.S
+#
+# setjmp/longjmp for the i386 architecture
+#
+
+#
+# The jmp_buf is assumed to contain the following, in order:
+#	%ebx
+#	%esp
+#	%ebp
+#	%esi
+#	%edi
+#	<return address>
+#
+
+	.text
+	.align 4
+	.globl setjmp
+	.type setjmp, @function
+setjmp:
+#ifdef _REGPARM
+	movl %eax,%edx
+#else
+	movl 4(%esp),%edx
+#endif
+	popl %ecx			# Return address, and adjust the stack
+	xorl %eax,%eax			# Return value
+	movl %ebx,(%edx)
+	movl %esp,4(%edx)		# Post-return %esp!
+	pushl %ecx			# Make the call/return stack happy
+	movl %ebp,8(%edx)
+	movl %esi,12(%edx)
+	movl %edi,16(%edx)
+	movl %ecx,20(%edx)		# Return address
+	ret
+
+	.size setjmp,.-setjmp
+
+	.text
+	.align 4
+	.globl longjmp
+	.type longjmp, @function
+longjmp:
+#ifdef _REGPARM
+	xchgl %eax,%edx
+#else
+	movl 4(%esp),%edx		# jmp_ptr address
+	movl 8(%esp),%eax		# Return value
+#endif
+	movl (%edx),%ebx
+	movl 4(%edx),%esp
+	movl 8(%edx),%ebp
+	movl 12(%edx),%esi
+	movl 16(%edx),%edi
+	jmp *20(%edx)
+
+	.size longjmp,.-longjmp
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index c19794d435d6..f41768b8e25e 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -5,8 +5,8 @@
 #
 
 obj-y = bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \
-	sigcontext.o signal.o syscalls.o syscall_table.o sysrq.o ksyms.o \
-	tls.o
+	setjmp.o sigcontext.o signal.o syscalls.o syscall_table.o sysrq.o \
+	ksyms.o tls.o
 
 obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o
 obj-$(CONFIG_MODULES) += um_module.o
diff --git a/arch/um/sys-x86_64/setjmp.S b/arch/um/sys-x86_64/setjmp.S
new file mode 100644
index 000000000000..45f547b4043e
--- /dev/null
+++ b/arch/um/sys-x86_64/setjmp.S
@@ -0,0 +1,54 @@
+#
+# arch/x86_64/setjmp.S
+#
+# setjmp/longjmp for the x86-64 architecture
+#
+
+#
+# The jmp_buf is assumed to contain the following, in order:
+#	%rbx
+#	%rsp (post-return)
+#	%rbp
+#	%r12
+#	%r13
+#	%r14
+#	%r15
+#	<return address>
+#
+
+	.text
+	.align 4
+	.globl setjmp
+	.type setjmp, @function
+setjmp:
+	pop  %rsi			# Return address, and adjust the stack
+	xorl %eax,%eax			# Return value
+	movq %rbx,(%rdi)
+	movq %rsp,8(%rdi)		# Post-return %rsp!
+	push %rsi			# Make the call/return stack happy
+	movq %rbp,16(%rdi)
+	movq %r12,24(%rdi)
+	movq %r13,32(%rdi)
+	movq %r14,40(%rdi)
+	movq %r15,48(%rdi)
+	movq %rsi,56(%rdi)		# Return address
+	ret
+
+	.size setjmp,.-setjmp
+
+	.text
+	.align 4
+	.globl longjmp
+	.type longjmp, @function
+longjmp:
+	movl %esi,%eax			# Return value (int)
+	movq (%rdi),%rbx
+	movq 8(%rdi),%rsp
+	movq 16(%rdi),%rbp
+	movq 24(%rdi),%r12
+	movq 32(%rdi),%r13
+	movq 40(%rdi),%r14
+	movq 48(%rdi),%r15
+	jmp *56(%rdi)
+
+	.size longjmp,.-longjmp
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 6cd4878625f1..581ce9af0ec8 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -24,6 +24,10 @@ config X86
 	bool
 	default y
 
+config ZONE_DMA32
+	bool
+	default y
+
 config LOCKDEP_SUPPORT
 	bool
 	default y
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index d6d7f731f6f0..708a3cd9a27e 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -16,6 +16,7 @@
 #include <linux/string.h>
 #include <linux/kexec.h>
 #include <linux/module.h>
+#include <linux/mm.h>
 
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -297,6 +298,53 @@ void __init e820_reserve_resources(void)
 	}
 }
 
+/* Mark pages corresponding to given address range as nosave */
+static void __init
+e820_mark_nosave_range(unsigned long start, unsigned long end)
+{
+	unsigned long pfn, max_pfn;
+
+	if (start >= end)
+		return;
+
+	printk("Nosave address range: %016lx - %016lx\n", start, end);
+	max_pfn = end >> PAGE_SHIFT;
+	for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
+		if (pfn_valid(pfn))
+			SetPageNosave(pfn_to_page(pfn));
+}
+
+/*
+ * Find the ranges of physical addresses that do not correspond to
+ * e820 RAM areas and mark the corresponding pages as nosave for software
+ * suspend and suspend to RAM.
+ *
+ * This function requires the e820 map to be sorted and without any
+ * overlapping entries and assumes the first e820 area to be RAM.
+ */
+void __init e820_mark_nosave_regions(void)
+{
+	int i;
+	unsigned long paddr;
+
+	paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
+	for (i = 1; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+
+		if (paddr < ei->addr)
+			e820_mark_nosave_range(paddr,
+					round_up(ei->addr, PAGE_SIZE));
+
+		paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
+		if (ei->type != E820_RAM)
+			e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
+					paddr);
+
+		if (paddr >= (end_pfn << PAGE_SHIFT))
+			break;
+	}
+}
+
 /* 
  * Add a memory region to the kernel e820 map.
  */ 
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 34afad704824..4b39f0da17f3 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -689,6 +689,7 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	probe_roms();
 	e820_reserve_resources(); 
+	e820_mark_nosave_regions();
 
 	request_resource(&iomem_resource, &video_ram_resource);
 
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 975380207b46..3ae9ffddddc0 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -46,9 +46,10 @@
 #include <linux/bootmem.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
-
 #include <linux/delay.h>
 #include <linux/mc146818rtc.h>
+#include <linux/smp.h>
+
 #include <asm/mtrr.h>
 #include <asm/pgalloc.h>
 #include <asm/desc.h>
diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S
index 320b6fb00cca..bfbe00763c68 100644
--- a/arch/x86_64/kernel/suspend_asm.S
+++ b/arch/x86_64/kernel/suspend_asm.S
@@ -54,7 +54,7 @@ ENTRY(restore_image)
 	movq	%rcx, %cr3;
 	movq	%rax, %cr4;  # turn PGE back on
 
-	movq	pagedir_nosave(%rip), %rdx
+	movq	restore_pblist(%rip), %rdx
 loop:
 	testq	%rdx, %rdx
 	jz	done
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 7a9b18224182..7700e6cd2bd9 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -1148,23 +1148,25 @@ int hpet_rtc_timer_init(void)
 		hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
 
 	local_irq_save(flags);
+
 	cnt = hpet_readl(HPET_COUNTER);
 	cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
 	hpet_writel(cnt, HPET_T1_CMP);
 	hpet_t1_cmp = cnt;
-	local_irq_restore(flags);
 
 	cfg = hpet_readl(HPET_T1_CFG);
 	cfg &= ~HPET_TN_PERIODIC;
 	cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
 	hpet_writel(cfg, HPET_T1_CFG);
 
+	local_irq_restore(flags);
+
 	return 1;
 }
 
 static void hpet_rtc_timer_reinit(void)
 {
-	unsigned int cfg, cnt;
+	unsigned int cfg, cnt, ticks_per_int, lost_ints;
 
 	if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
 		cfg = hpet_readl(HPET_T1_CFG);
@@ -1179,10 +1181,33 @@ static void hpet_rtc_timer_reinit(void)
 		hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
 
 	/* It is more accurate to use the comparator value than current count.*/
-	cnt = hpet_t1_cmp;
-	cnt += hpet_tick*HZ/hpet_rtc_int_freq;
-	hpet_writel(cnt, HPET_T1_CMP);
-	hpet_t1_cmp = cnt;
+	ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
+	hpet_t1_cmp += ticks_per_int;
+	hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
+
+	/*
+	 * If the interrupt handler was delayed too long, the write above tries
+	 * to schedule the next interrupt in the past and the hardware would
+	 * not interrupt until the counter had wrapped around.
+	 * So we have to check that the comparator wasn't set to a past time.
+	 */
+	cnt = hpet_readl(HPET_COUNTER);
+	if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
+		lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
+		/* Make sure that, even with the time needed to execute
+		 * this code, the next scheduled interrupt has been moved
+		 * back to the future: */
+		lost_ints++;
+
+		hpet_t1_cmp += lost_ints * ticks_per_int;
+		hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
+
+		if (PIE_on)
+			PIE_count += lost_ints;
+
+		printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
+		       hpet_rtc_int_freq);
+	}
 }
 
 /*
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index ac8ea66ccb94..4198798e1469 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -299,7 +299,7 @@ static int vmalloc_fault(unsigned long address)
 	if (pgd_none(*pgd))
 		set_pgd(pgd, *pgd_ref);
 	else
-		BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
+		BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
 
 	/* Below here mismatches are bugs because these lower tables
 	   are shared */
@@ -308,7 +308,7 @@ static int vmalloc_fault(unsigned long address)
 	pud_ref = pud_offset(pgd_ref, address);
 	if (pud_none(*pud_ref))
 		return -1;
-	if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref))
+	if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
 		BUG();
 	pmd = pmd_offset(pud, address);
 	pmd_ref = pmd_offset(pud_ref, address);
@@ -641,7 +641,7 @@ void vmalloc_sync_all(void)
 				if (pgd_none(*pgd))
 					set_pgd(pgd, *pgd_ref);
 				else
-					BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
+					BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
 			}
 			spin_unlock(&pgd_lock);
 			set_bit(pgd_index(address), insync);
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index d14fb2dfbfc4..52fd42c40c86 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -536,7 +536,7 @@ int memory_add_physaddr_to_nid(u64 start)
 int arch_add_memory(int nid, u64 start, u64 size)
 {
 	struct pglist_data *pgdat = NODE_DATA(nid);
-	struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
+	struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index ab2ecccf7798..ffa111eea9da 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -851,7 +851,7 @@ static void piix_set_piomode (struct ata_port *ap, struct ata_device *adev)
  *	@ap: Port whose timings we are configuring
  *	@adev: Drive in question
  *	@udma: udma mode, 0 - 6
- *	@is_ich: set if the chip is an ICH device
+ *	@isich: set if the chip is an ICH device
  *
  *	Set UDMA mode for device, in host controller PCI config space.
  *
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 27c22feebf30..8cd730fe5dd3 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -484,7 +484,7 @@ static void nv_error_handler(struct ata_port *ap)
 static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	static int printed_version = 0;
-	struct ata_port_info *ppi;
+	struct ata_port_info *ppi[2];
 	struct ata_probe_ent *probe_ent;
 	int pci_dev_busy = 0;
 	int rc;
@@ -520,8 +520,8 @@ static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	rc = -ENOMEM;
 
-	ppi = &nv_port_info[ent->driver_data];
-	probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
+	ppi[0] = ppi[1] = &nv_port_info[ent->driver_data];
+	probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
 	if (!probe_ent)
 		goto err_out_regions;
 
diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c
index 9b17375d8056..18d49fff8dc4 100644
--- a/drivers/ata/sata_sis.c
+++ b/drivers/ata/sata_sis.c
@@ -240,7 +240,7 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct ata_probe_ent *probe_ent = NULL;
 	int rc;
 	u32 genctl;
-	struct ata_port_info *ppi;
+	struct ata_port_info *ppi[2];
 	int pci_dev_busy = 0;
 	u8 pmr;
 	u8 port2_start;
@@ -265,8 +265,8 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto err_out_regions;
 
-	ppi = &sis_port_info;
-	probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
+	ppi[0] = ppi[1] = &sis_port_info;
+	probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
 	if (!probe_ent) {
 		rc = -ENOMEM;
 		goto err_out_regions;
diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c
index 8fc6e800011a..dd76f37be182 100644
--- a/drivers/ata/sata_uli.c
+++ b/drivers/ata/sata_uli.c
@@ -185,7 +185,7 @@ static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	static int printed_version;
 	struct ata_probe_ent *probe_ent;
-	struct ata_port_info *ppi;
+	struct ata_port_info *ppi[2];
 	int rc;
 	unsigned int board_idx = (unsigned int) ent->driver_data;
 	int pci_dev_busy = 0;
@@ -211,8 +211,8 @@ static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto err_out_regions;
 
-	ppi = &uli_port_info;
-	probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
+	ppi[0] = ppi[1] = &uli_port_info;
+	probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
 	if (!probe_ent) {
 		rc = -ENOMEM;
 		goto err_out_regions;
diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index 7f087aef99de..a72a2389a11c 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c
@@ -318,9 +318,10 @@ static void vt6421_init_addrs(struct ata_probe_ent *probe_ent,
 static struct ata_probe_ent *vt6420_init_probe_ent(struct pci_dev *pdev)
 {
 	struct ata_probe_ent *probe_ent;
-	struct ata_port_info *ppi = &vt6420_port_info;
-
-	probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
+	struct ata_port_info *ppi[2];
+	
+	ppi[0] = ppi[1] = &vt6420_port_info;
+	probe_ent = ata_pci_init_native_mode(pdev, ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
 	if (!probe_ent)
 		return NULL;
 
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 41e052fecd7f..f2511b42dba2 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -454,7 +454,7 @@ rate_to_atmf(unsigned rate)		/* cps to atm forum format */
 	return (NONZERO | (exp << 9) | (rate & 0x1ff));
 }
 
-static void __init
+static void __devinit
 he_init_rx_lbfp0(struct he_dev *he_dev)
 {
 	unsigned i, lbm_offset, lbufd_index, lbuf_addr, lbuf_count;
@@ -485,7 +485,7 @@ he_init_rx_lbfp0(struct he_dev *he_dev)
 	he_writel(he_dev, he_dev->r0_numbuffs, RLBF0_C);
 }
 
-static void __init
+static void __devinit
 he_init_rx_lbfp1(struct he_dev *he_dev)
 {
 	unsigned i, lbm_offset, lbufd_index, lbuf_addr, lbuf_count;
@@ -516,7 +516,7 @@ he_init_rx_lbfp1(struct he_dev *he_dev)
 	he_writel(he_dev, he_dev->r1_numbuffs, RLBF1_C);
 }
 
-static void __init
+static void __devinit
 he_init_tx_lbfp(struct he_dev *he_dev)
 {
 	unsigned i, lbm_offset, lbufd_index, lbuf_addr, lbuf_count;
@@ -546,7 +546,7 @@ he_init_tx_lbfp(struct he_dev *he_dev)
 	he_writel(he_dev, lbufd_index - 1, TLBF_T);
 }
 
-static int __init
+static int __devinit
 he_init_tpdrq(struct he_dev *he_dev)
 {
 	he_dev->tpdrq_base = pci_alloc_consistent(he_dev->pci_dev,
@@ -568,7 +568,7 @@ he_init_tpdrq(struct he_dev *he_dev)
 	return 0;
 }
 
-static void __init
+static void __devinit
 he_init_cs_block(struct he_dev *he_dev)
 {
 	unsigned clock, rate, delta;
@@ -664,7 +664,7 @@ he_init_cs_block(struct he_dev *he_dev)
 
 }
 
-static int __init
+static int __devinit
 he_init_cs_block_rcm(struct he_dev *he_dev)
 {
 	unsigned (*rategrid)[16][16];
@@ -785,7 +785,7 @@ he_init_cs_block_rcm(struct he_dev *he_dev)
 	return 0;
 }
 
-static int __init
+static int __devinit
 he_init_group(struct he_dev *he_dev, int group)
 {
 	int i;
@@ -955,7 +955,7 @@ he_init_group(struct he_dev *he_dev, int group)
 	return 0;
 }
 
-static int __init
+static int __devinit
 he_init_irq(struct he_dev *he_dev)
 {
 	int i;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index e9b0957f15d1..001e6f6b9c1b 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -54,10 +54,12 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf)
 		       "Node %d MemUsed:      %8lu kB\n"
 		       "Node %d Active:       %8lu kB\n"
 		       "Node %d Inactive:     %8lu kB\n"
+#ifdef CONFIG_HIGHMEM
 		       "Node %d HighTotal:    %8lu kB\n"
 		       "Node %d HighFree:     %8lu kB\n"
 		       "Node %d LowTotal:     %8lu kB\n"
 		       "Node %d LowFree:      %8lu kB\n"
+#endif
 		       "Node %d Dirty:        %8lu kB\n"
 		       "Node %d Writeback:    %8lu kB\n"
 		       "Node %d FilePages:    %8lu kB\n"
@@ -66,16 +68,20 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf)
 		       "Node %d PageTables:   %8lu kB\n"
 		       "Node %d NFS_Unstable: %8lu kB\n"
 		       "Node %d Bounce:       %8lu kB\n"
-		       "Node %d Slab:         %8lu kB\n",
+		       "Node %d Slab:         %8lu kB\n"
+		       "Node %d SReclaimable: %8lu kB\n"
+		       "Node %d SUnreclaim:   %8lu kB\n",
 		       nid, K(i.totalram),
 		       nid, K(i.freeram),
 		       nid, K(i.totalram - i.freeram),
 		       nid, K(active),
 		       nid, K(inactive),
+#ifdef CONFIG_HIGHMEM
 		       nid, K(i.totalhigh),
 		       nid, K(i.freehigh),
 		       nid, K(i.totalram - i.totalhigh),
 		       nid, K(i.freeram - i.freehigh),
+#endif
 		       nid, K(node_page_state(nid, NR_FILE_DIRTY)),
 		       nid, K(node_page_state(nid, NR_WRITEBACK)),
 		       nid, K(node_page_state(nid, NR_FILE_PAGES)),
@@ -84,7 +90,10 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf)
 		       nid, K(node_page_state(nid, NR_PAGETABLE)),
 		       nid, K(node_page_state(nid, NR_UNSTABLE_NFS)),
 		       nid, K(node_page_state(nid, NR_BOUNCE)),
-		       nid, K(node_page_state(nid, NR_SLAB)));
+		       nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE) +
+				node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
+		       nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)),
+		       nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE)));
 	n += hugetlb_report_node_meminfo(nid, buf + n);
 	return n;
 }
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 6e6a7c7a7eff..ab6429b4a84e 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -209,11 +209,12 @@ static const unsigned char days_in_mo[] =
  */
 static inline unsigned char rtc_is_updating(void)
 {
+	unsigned long flags;
 	unsigned char uip;
 
-	spin_lock_irq(&rtc_lock);
+	spin_lock_irqsave(&rtc_lock, flags);
 	uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP);
-	spin_unlock_irq(&rtc_lock);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 	return uip;
 }
 
diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c
index 71f27e955d87..c7854ea57b52 100644
--- a/drivers/ide/mips/au1xxx-ide.c
+++ b/drivers/ide/mips/au1xxx-ide.c
@@ -476,13 +476,13 @@ static int auide_dma_lostirq(ide_drive_t *drive)
 	return 0;
 }
 
-static void auide_ddma_tx_callback(int irq, void *param, struct pt_regs *regs)
+static void auide_ddma_tx_callback(int irq, void *param)
 {
 	_auide_hwif *ahwif = (_auide_hwif*)param;
 	ahwif->drive->waiting_for_dma = 0;
 }
 
-static void auide_ddma_rx_callback(int irq, void *param, struct pt_regs *regs)
+static void auide_ddma_rx_callback(int irq, void *param)
 {
 	_auide_hwif *ahwif = (_auide_hwif*)param;
 	ahwif->drive->waiting_for_dma = 0;
diff --git a/drivers/media/video/videodev.c b/drivers/media/video/videodev.c
index 88bf2af2a0e7..edd7b83c3464 100644
--- a/drivers/media/video/videodev.c
+++ b/drivers/media/video/videodev.c
@@ -836,7 +836,7 @@ static int __video_do_ioctl(struct inode *inode, struct file *file,
 			break;
 		}
 
-		if (index<=0 || index >= vfd->tvnormsize) {
+		if (index < 0 || index >= vfd->tvnormsize) {
 			ret=-EINVAL;
 			break;
 		}
diff --git a/drivers/mmc/au1xmmc.c b/drivers/mmc/au1xmmc.c
index fb606165af3b..61268da13957 100644
--- a/drivers/mmc/au1xmmc.c
+++ b/drivers/mmc/au1xmmc.c
@@ -731,7 +731,7 @@ static void au1xmmc_set_ios(struct mmc_host* mmc, struct mmc_ios* ios)
 	}
 }
 
-static void au1xmmc_dma_callback(int irq, void *dev_id, struct pt_regs *regs)
+static void au1xmmc_dma_callback(int irq, void *dev_id)
 {
 	struct au1xmmc_host *host = (struct au1xmmc_host *) dev_id;
 
diff --git a/drivers/net/sunlance.c b/drivers/net/sunlance.c
index 77670741e101..feb42db10ee1 100644
--- a/drivers/net/sunlance.c
+++ b/drivers/net/sunlance.c
@@ -1323,9 +1323,9 @@ static const struct ethtool_ops sparc_lance_ethtool_ops = {
 	.get_link		= sparc_lance_get_link,
 };
 
-static int __init sparc_lance_probe_one(struct sbus_dev *sdev,
-					struct sbus_dma *ledma,
-					struct sbus_dev *lebuffer)
+static int __devinit sparc_lance_probe_one(struct sbus_dev *sdev,
+					   struct sbus_dma *ledma,
+					   struct sbus_dev *lebuffer)
 {
 	static unsigned version_printed;
 	struct net_device *dev;
@@ -1515,7 +1515,7 @@ fail:
 }
 
 /* On 4m, find the associated dma for the lance chip */
-static inline struct sbus_dma *find_ledma(struct sbus_dev *sdev)
+static struct sbus_dma * __devinit find_ledma(struct sbus_dev *sdev)
 {
 	struct sbus_dma *p;
 
@@ -1533,7 +1533,7 @@ static inline struct sbus_dma *find_ledma(struct sbus_dev *sdev)
 
 /* Find all the lance cards on the system and initialize them */
 static struct sbus_dev sun4_sdev;
-static int __init sparc_lance_init(void)
+static int __devinit sparc_lance_init(void)
 {
 	if ((idprom->id_machtype == (SM_SUN4|SM_4_330)) ||
 	    (idprom->id_machtype == (SM_SUN4|SM_4_470))) {
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 372e47f7d596..5f7ba1adb309 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -1929,6 +1929,13 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *port)
 
 	mutex_lock(&state->mutex);
 
+#ifdef CONFIG_DISABLE_CONSOLE_SUSPEND
+	if (uart_console(port)) {
+		mutex_unlock(&state->mutex);
+		return 0;
+	}
+#endif
+
 	if (state->info && state->info->flags & UIF_INITIALIZED) {
 		const struct uart_ops *ops = port->ops;
 
@@ -1967,6 +1974,13 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 
 	mutex_lock(&state->mutex);
 
+#ifdef CONFIG_DISABLE_CONSOLE_SUSPEND
+	if (uart_console(port)) {
+		mutex_unlock(&state->mutex);
+		return 0;
+	}
+#endif
+
 	uart_change_pm(state, 0);
 
 	/*
diff --git a/drivers/video/fbsysfs.c b/drivers/video/fbsysfs.c
index 4f78f234473d..c151dcf68786 100644
--- a/drivers/video/fbsysfs.c
+++ b/drivers/video/fbsysfs.c
@@ -397,6 +397,12 @@ static ssize_t store_bl_curve(struct class_device *class_device,
 	u8 tmp_curve[FB_BACKLIGHT_LEVELS];
 	unsigned int i;
 
+	/* Some drivers don't use framebuffer_alloc(), but those also
+	 * don't have backlights.
+	 */
+	if (!fb_info || !fb_info->bl_dev)
+		return -ENODEV;
+
 	if (count != (FB_BACKLIGHT_LEVELS / 8 * 24))
 		return -EINVAL;
 
@@ -430,6 +436,12 @@ static ssize_t show_bl_curve(struct class_device *class_device, char *buf)
 	ssize_t len = 0;
 	unsigned int i;
 
+	/* Some drivers don't use framebuffer_alloc(), but those also
+	 * don't have backlights.
+	 */
+	if (!fb_info || !fb_info->bl_dev)
+		return -ENODEV;
+
 	mutex_lock(&fb_info->bl_mutex);
 	for (i = 0; i < FB_BACKLIGHT_LEVELS; i += 8)
 		len += snprintf(&buf[len], PAGE_SIZE,
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 8dbd44f10e9d..d96e5c14a9ca 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -32,7 +32,7 @@ static inline int autofs4_can_expire(struct dentry *dentry,
 
 	if (!do_now) {
 		/* Too young to die */
-		if (time_after(ino->last_used + timeout, now))
+		if (!timeout || time_after(ino->last_used + timeout, now))
 			return 0;
 
 		/* update last_used here :-
@@ -253,7 +253,7 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb,
 	struct dentry *root = dget(sb->s_root);
 	int do_now = how & AUTOFS_EXP_IMMEDIATE;
 
-	if (!sbi->exp_timeout || !root)
+	if (!root)
 		return NULL;
 
 	now = jiffies;
@@ -293,7 +293,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
 	int do_now = how & AUTOFS_EXP_IMMEDIATE;
 	int exp_leaves = how & AUTOFS_EXP_LEAVES;
 
-	if ( !sbi->exp_timeout || !root )
+	if (!root)
 		return NULL;
 
 	now = jiffies;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 672a3b90bc55..64802aabd1ac 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1262,7 +1262,7 @@ static void fill_elf_header(struct elfhdr *elf, int segs)
 	return;
 }
 
-static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
+static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
 {
 	phdr->p_type = PT_NOTE;
 	phdr->p_offset = offset;
@@ -1428,7 +1428,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 	int i;
 	struct vm_area_struct *vma;
 	struct elfhdr *elf = NULL;
-	off_t offset = 0, dataoff;
+	loff_t offset = 0, dataoff;
 	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
 	int numnote;
 	struct memelfnote *notes = NULL;
@@ -1661,11 +1661,11 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 	ELF_CORE_WRITE_EXTRA_DATA;
 #endif
 
-	if ((off_t)file->f_pos != offset) {
+	if (file->f_pos != offset) {
 		/* Sanity check */
 		printk(KERN_WARNING
-		       "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
-		       (off_t)file->f_pos, offset);
+		       "elf_core_dump: file->f_pos (%Ld) != offset (%Ld)\n",
+		       file->f_pos, offset);
 	}
 
 end_coredump:
diff --git a/fs/buffer.c b/fs/buffer.c
index 71649ef9b658..3b6d701073e7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2987,6 +2987,7 @@ int try_to_free_buffers(struct page *page)
 
 	spin_lock(&mapping->private_lock);
 	ret = drop_buffers(page, &buffers_to_free);
+	spin_unlock(&mapping->private_lock);
 	if (ret) {
 		/*
 		 * If the filesystem writes its buffers by hand (eg ext3)
@@ -2998,7 +2999,6 @@ int try_to_free_buffers(struct page *page)
 		 */
 		clear_page_dirty(page);
 	}
-	spin_unlock(&mapping->private_lock);
 out:
 	if (buffers_to_free) {
 		struct buffer_head *bh = buffers_to_free;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 42da60784311..32a8caf0c41e 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -160,6 +160,117 @@ static int journal_write_commit_record(journal_t *journal,
 	return (ret == -EIO);
 }
 
+static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
+{
+	int i;
+
+	for (i = 0; i < bufs; i++) {
+		wbuf[i]->b_end_io = end_buffer_write_sync;
+		/* We use-up our safety reference in submit_bh() */
+		submit_bh(WRITE, wbuf[i]);
+	}
+}
+
+/*
+ *  Submit all the data buffers to disk
+ */
+static void journal_submit_data_buffers(journal_t *journal,
+				transaction_t *commit_transaction)
+{
+	struct journal_head *jh;
+	struct buffer_head *bh;
+	int locked;
+	int bufs = 0;
+	struct buffer_head **wbuf = journal->j_wbuf;
+
+	/*
+	 * Whenever we unlock the journal and sleep, things can get added
+	 * onto ->t_sync_datalist, so we have to keep looping back to
+	 * write_out_data until we *know* that the list is empty.
+	 *
+	 * Cleanup any flushed data buffers from the data list.  Even in
+	 * abort mode, we want to flush this out as soon as possible.
+	 */
+write_out_data:
+	cond_resched();
+	spin_lock(&journal->j_list_lock);
+
+	while (commit_transaction->t_sync_datalist) {
+		jh = commit_transaction->t_sync_datalist;
+		bh = jh2bh(jh);
+		locked = 0;
+
+		/* Get reference just to make sure buffer does not disappear
+		 * when we are forced to drop various locks */
+		get_bh(bh);
+		/* If the buffer is dirty, we need to submit IO and hence
+		 * we need the buffer lock. We try to lock the buffer without
+		 * blocking. If we fail, we need to drop j_list_lock and do
+		 * blocking lock_buffer().
+		 */
+		if (buffer_dirty(bh)) {
+			if (test_set_buffer_locked(bh)) {
+				BUFFER_TRACE(bh, "needs blocking lock");
+				spin_unlock(&journal->j_list_lock);
+				/* Write out all data to prevent deadlocks */
+				journal_do_submit_data(wbuf, bufs);
+				bufs = 0;
+				lock_buffer(bh);
+				spin_lock(&journal->j_list_lock);
+			}
+			locked = 1;
+		}
+		/* We have to get bh_state lock. Again out of order, sigh. */
+		if (!inverted_lock(journal, bh)) {
+			jbd_lock_bh_state(bh);
+			spin_lock(&journal->j_list_lock);
+		}
+		/* Someone already cleaned up the buffer? */
+		if (!buffer_jbd(bh)
+			|| jh->b_transaction != commit_transaction
+			|| jh->b_jlist != BJ_SyncData) {
+			jbd_unlock_bh_state(bh);
+			if (locked)
+				unlock_buffer(bh);
+			BUFFER_TRACE(bh, "already cleaned up");
+			put_bh(bh);
+			continue;
+		}
+		if (locked && test_clear_buffer_dirty(bh)) {
+			BUFFER_TRACE(bh, "needs writeout, adding to array");
+			wbuf[bufs++] = bh;
+			__journal_file_buffer(jh, commit_transaction,
+						BJ_Locked);
+			jbd_unlock_bh_state(bh);
+			if (bufs == journal->j_wbufsize) {
+				spin_unlock(&journal->j_list_lock);
+				journal_do_submit_data(wbuf, bufs);
+				bufs = 0;
+				goto write_out_data;
+			}
+		}
+		else {
+			BUFFER_TRACE(bh, "writeout complete: unfile");
+			__journal_unfile_buffer(jh);
+			jbd_unlock_bh_state(bh);
+			if (locked)
+				unlock_buffer(bh);
+			journal_remove_journal_head(bh);
+			/* Once for our safety reference, once for
+			 * journal_remove_journal_head() */
+			put_bh(bh);
+			put_bh(bh);
+		}
+
+		if (lock_need_resched(&journal->j_list_lock)) {
+			spin_unlock(&journal->j_list_lock);
+			goto write_out_data;
+		}
+	}
+	spin_unlock(&journal->j_list_lock);
+	journal_do_submit_data(wbuf, bufs);
+}
+
 /*
  * journal_commit_transaction
  *
@@ -313,80 +424,13 @@ void journal_commit_transaction(journal_t *journal)
 	 * Now start flushing things to disk, in the order they appear
 	 * on the transaction lists.  Data blocks go first.
 	 */
-
 	err = 0;
-	/*
-	 * Whenever we unlock the journal and sleep, things can get added
-	 * onto ->t_sync_datalist, so we have to keep looping back to
-	 * write_out_data until we *know* that the list is empty.
-	 */
-	bufs = 0;
-	/*
-	 * Cleanup any flushed data buffers from the data list.  Even in
-	 * abort mode, we want to flush this out as soon as possible.
-	 */
-write_out_data:
-	cond_resched();
-	spin_lock(&journal->j_list_lock);
-
-	while (commit_transaction->t_sync_datalist) {
-		struct buffer_head *bh;
-
-		jh = commit_transaction->t_sync_datalist;
-		commit_transaction->t_sync_datalist = jh->b_tnext;
-		bh = jh2bh(jh);
-		if (buffer_locked(bh)) {
-			BUFFER_TRACE(bh, "locked");
-			if (!inverted_lock(journal, bh))
-				goto write_out_data;
-			__journal_temp_unlink_buffer(jh);
-			__journal_file_buffer(jh, commit_transaction,
-						BJ_Locked);
-			jbd_unlock_bh_state(bh);
-			if (lock_need_resched(&journal->j_list_lock)) {
-				spin_unlock(&journal->j_list_lock);
-				goto write_out_data;
-			}
-		} else {
-			if (buffer_dirty(bh)) {
-				BUFFER_TRACE(bh, "start journal writeout");
-				get_bh(bh);
-				wbuf[bufs++] = bh;
-				if (bufs == journal->j_wbufsize) {
-					jbd_debug(2, "submit %d writes\n",
-							bufs);
-					spin_unlock(&journal->j_list_lock);
-					ll_rw_block(SWRITE, bufs, wbuf);
-					journal_brelse_array(wbuf, bufs);
-					bufs = 0;
-					goto write_out_data;
-				}
-			} else {
-				BUFFER_TRACE(bh, "writeout complete: unfile");
-				if (!inverted_lock(journal, bh))
-					goto write_out_data;
-				__journal_unfile_buffer(jh);
-				jbd_unlock_bh_state(bh);
-				journal_remove_journal_head(bh);
-				put_bh(bh);
-				if (lock_need_resched(&journal->j_list_lock)) {
-					spin_unlock(&journal->j_list_lock);
-					goto write_out_data;
-				}
-			}
-		}
-	}
-
-	if (bufs) {
-		spin_unlock(&journal->j_list_lock);
-		ll_rw_block(SWRITE, bufs, wbuf);
-		journal_brelse_array(wbuf, bufs);
-		spin_lock(&journal->j_list_lock);
-	}
+	journal_submit_data_buffers(journal, commit_transaction);
 
 	/*
 	 * Wait for all previously submitted IO to complete.
 	 */
+	spin_lock(&journal->j_list_lock);
 	while (commit_transaction->t_locked_list) {
 		struct buffer_head *bh;
 
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 942156225447..5bbd60896050 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -157,10 +157,12 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 		"SwapCached:   %8lu kB\n"
 		"Active:       %8lu kB\n"
 		"Inactive:     %8lu kB\n"
+#ifdef CONFIG_HIGHMEM
 		"HighTotal:    %8lu kB\n"
 		"HighFree:     %8lu kB\n"
 		"LowTotal:     %8lu kB\n"
 		"LowFree:      %8lu kB\n"
+#endif
 		"SwapTotal:    %8lu kB\n"
 		"SwapFree:     %8lu kB\n"
 		"Dirty:        %8lu kB\n"
@@ -168,6 +170,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 		"AnonPages:    %8lu kB\n"
 		"Mapped:       %8lu kB\n"
 		"Slab:         %8lu kB\n"
+		"SReclaimable: %8lu kB\n"
+		"SUnreclaim:   %8lu kB\n"
 		"PageTables:   %8lu kB\n"
 		"NFS_Unstable: %8lu kB\n"
 		"Bounce:       %8lu kB\n"
@@ -183,17 +187,22 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 		K(total_swapcache_pages),
 		K(active),
 		K(inactive),
+#ifdef CONFIG_HIGHMEM
 		K(i.totalhigh),
 		K(i.freehigh),
 		K(i.totalram-i.totalhigh),
 		K(i.freeram-i.freehigh),
+#endif
 		K(i.totalswap),
 		K(i.freeswap),
 		K(global_page_state(NR_FILE_DIRTY)),
 		K(global_page_state(NR_WRITEBACK)),
 		K(global_page_state(NR_ANON_PAGES)),
 		K(global_page_state(NR_FILE_MAPPED)),
-		K(global_page_state(NR_SLAB)),
+		K(global_page_state(NR_SLAB_RECLAIMABLE) +
+				global_page_state(NR_SLAB_UNRECLAIMABLE)),
+		K(global_page_state(NR_SLAB_RECLAIMABLE)),
+		K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
 		K(global_page_state(NR_PAGETABLE)),
 		K(global_page_state(NR_UNSTABLE_NFS)),
 		K(global_page_state(NR_BOUNCE)),
diff --git a/include/asm-alpha/mmzone.h b/include/asm-alpha/mmzone.h
index 64d0ab98fcd8..8af56ce346ad 100644
--- a/include/asm-alpha/mmzone.h
+++ b/include/asm-alpha/mmzone.h
@@ -75,6 +75,7 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
 #define VALID_PAGE(page)	(((page) - mem_map) < max_mapnr)
 
 #define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> 32))
+#define pgd_page(pgd)		(pfn_to_page(pgd_val(pgd) >> 32))
 #define pte_pfn(pte)		(pte_val(pte) >> 32)
 
 #define mk_pte(page, pgprot)						     \
diff --git a/include/asm-alpha/pgtable.h b/include/asm-alpha/pgtable.h
index 93eaa58b7961..49ac9bee7ced 100644
--- a/include/asm-alpha/pgtable.h
+++ b/include/asm-alpha/pgtable.h
@@ -230,16 +230,17 @@ extern inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp)
 
 
 extern inline unsigned long
-pmd_page_kernel(pmd_t pmd)
+pmd_page_vaddr(pmd_t pmd)
 {
 	return ((pmd_val(pmd) & _PFN_MASK) >> (32-PAGE_SHIFT)) + PAGE_OFFSET;
 }
 
 #ifndef CONFIG_DISCONTIGMEM
 #define pmd_page(pmd)	(mem_map + ((pmd_val(pmd) & _PFN_MASK) >> 32))
+#define pgd_page(pgd)	(mem_map + ((pgd_val(pgd) & _PFN_MASK) >> 32))
 #endif
 
-extern inline unsigned long pgd_page(pgd_t pgd)
+extern inline unsigned long pgd_page_vaddr(pgd_t pgd)
 { return PAGE_OFFSET + ((pgd_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); }
 
 extern inline int pte_none(pte_t pte)		{ return !pte_val(pte); }
@@ -293,13 +294,13 @@ extern inline pte_t pte_mkyoung(pte_t pte)	{ pte_val(pte) |= __ACCESS_BITS; retu
 /* Find an entry in the second-level page table.. */
 extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
 {
-	return (pmd_t *) pgd_page(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
+	return (pmd_t *) pgd_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
 }
 
 /* Find an entry in the third-level page table.. */
 extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address)
 {
-	return (pte_t *) pmd_page_kernel(*dir)
+	return (pte_t *) pmd_page_vaddr(*dir)
 		+ ((address >> PAGE_SHIFT) & (PTRS_PER_PAGE - 1));
 }
 
diff --git a/include/asm-arm/pgtable.h b/include/asm-arm/pgtable.h
index 8d3919c6458c..4d10d319fa34 100644
--- a/include/asm-arm/pgtable.h
+++ b/include/asm-arm/pgtable.h
@@ -224,9 +224,9 @@ extern struct page *empty_zero_page;
 #define pte_none(pte)		(!pte_val(pte))
 #define pte_clear(mm,addr,ptep)	set_pte_at((mm),(addr),(ptep), __pte(0))
 #define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))
-#define pte_offset_kernel(dir,addr)	(pmd_page_kernel(*(dir)) + __pte_index(addr))
-#define pte_offset_map(dir,addr)	(pmd_page_kernel(*(dir)) + __pte_index(addr))
-#define pte_offset_map_nested(dir,addr)	(pmd_page_kernel(*(dir)) + __pte_index(addr))
+#define pte_offset_kernel(dir,addr)	(pmd_page_vaddr(*(dir)) + __pte_index(addr))
+#define pte_offset_map(dir,addr)	(pmd_page_vaddr(*(dir)) + __pte_index(addr))
+#define pte_offset_map_nested(dir,addr)	(pmd_page_vaddr(*(dir)) + __pte_index(addr))
 #define pte_unmap(pte)		do { } while (0)
 #define pte_unmap_nested(pte)	do { } while (0)
 
@@ -291,7 +291,7 @@ PTE_BIT_FUNC(mkyoung,   |= L_PTE_YOUNG);
 		clean_pmd_entry(pmdp);	\
 	} while (0)
 
-static inline pte_t *pmd_page_kernel(pmd_t pmd)
+static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 {
 	unsigned long ptr;
 
diff --git a/include/asm-arm26/pgtable.h b/include/asm-arm26/pgtable.h
index 19ac9101a6bb..63a8881fae13 100644
--- a/include/asm-arm26/pgtable.h
+++ b/include/asm-arm26/pgtable.h
@@ -186,12 +186,12 @@ extern struct page *empty_zero_page;
  * return a pointer to memory (no special alignment)
  */
 #define pmd_page(pmd)  ((struct page *)(pmd_val((pmd)) & ~_PMD_PRESENT))
-#define pmd_page_kernel(pmd) ((pte_t *)(pmd_val((pmd)) & ~_PMD_PRESENT))
+#define pmd_page_vaddr(pmd) ((pte_t *)(pmd_val((pmd)) & ~_PMD_PRESENT))
 
-#define pte_offset_kernel(dir,addr)     (pmd_page_kernel(*(dir)) + __pte_index(addr))
+#define pte_offset_kernel(dir,addr)     (pmd_page_vaddr(*(dir)) + __pte_index(addr))
 
-#define pte_offset_map(dir,addr)        (pmd_page_kernel(*(dir)) + __pte_index(addr))
-#define pte_offset_map_nested(dir,addr) (pmd_page_kernel(*(dir)) + __pte_index(addr))
+#define pte_offset_map(dir,addr)        (pmd_page_vaddr(*(dir)) + __pte_index(addr))
+#define pte_offset_map_nested(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr))
 #define pte_unmap(pte)                  do { } while (0)
 #define pte_unmap_nested(pte)           do { } while (0)
 
diff --git a/include/asm-avr32/Kbuild b/include/asm-avr32/Kbuild
new file mode 100644
index 000000000000..8770e73ce938
--- /dev/null
+++ b/include/asm-avr32/Kbuild
@@ -0,0 +1,3 @@
+include include/asm-generic/Kbuild.asm
+
+headers-y	+= cachectl.h
diff --git a/include/asm-avr32/a.out.h b/include/asm-avr32/a.out.h
new file mode 100644
index 000000000000..50bf6e31a143
--- /dev/null
+++ b/include/asm-avr32/a.out.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_AVR32_A_OUT_H
+#define __ASM_AVR32_A_OUT_H
+
+struct exec
+{
+  unsigned long a_info;		/* Use macros N_MAGIC, etc for access */
+  unsigned a_text;		/* length of text, in bytes */
+  unsigned a_data;		/* length of data, in bytes */
+  unsigned a_bss;		/* length of uninitialized data area for file, in bytes */
+  unsigned a_syms;		/* length of symbol table data in file, in bytes */
+  unsigned a_entry;		/* start address */
+  unsigned a_trsize;		/* length of relocation info for text, in bytes */
+  unsigned a_drsize;		/* length of relocation info for data, in bytes */
+};
+
+#define N_TRSIZE(a)	((a).a_trsize)
+#define N_DRSIZE(a)	((a).a_drsize)
+#define N_SYMSIZE(a)	((a).a_syms)
+
+#ifdef __KERNEL__
+
+#define STACK_TOP	TASK_SIZE
+
+#endif
+
+#endif /* __ASM_AVR32_A_OUT_H */
diff --git a/include/asm-avr32/addrspace.h b/include/asm-avr32/addrspace.h
new file mode 100644
index 000000000000..366794858ec7
--- /dev/null
+++ b/include/asm-avr32/addrspace.h
@@ -0,0 +1,43 @@
+/*
+ * Defitions for the address spaces of the AVR32 CPUs. Heavily based on
+ * include/asm-sh/addrspace.h
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_ADDRSPACE_H
+#define __ASM_AVR32_ADDRSPACE_H
+
+#ifdef CONFIG_MMU
+
+/* Memory segments when segmentation is enabled */
+#define P0SEG		0x00000000
+#define P1SEG		0x80000000
+#define P2SEG		0xa0000000
+#define P3SEG		0xc0000000
+#define P4SEG		0xe0000000
+
+/* Returns the privileged segment base of a given address */
+#define PXSEG(a)	(((unsigned long)(a)) & 0xe0000000)
+
+/* Returns the physical address of a PnSEG (n=1,2) address */
+#define PHYSADDR(a)	(((unsigned long)(a)) & 0x1fffffff)
+
+/*
+ * Map an address to a certain privileged segment
+ */
+#define P1SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
+				      | P1SEG))
+#define P2SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
+				      | P2SEG))
+#define P3SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
+				      | P3SEG))
+#define P4SEGADDR(a) ((__typeof__(a))(((unsigned long)(a) & 0x1fffffff) \
+				      | P4SEG))
+
+#endif /* CONFIG_MMU */
+
+#endif /* __ASM_AVR32_ADDRSPACE_H */
diff --git a/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h b/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h
new file mode 100644
index 000000000000..ce1150d4438d
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/at91rm9200_pdc.h
@@ -0,0 +1,36 @@
+/*
+ * include/asm-arm/arch-at91rm9200/at91rm9200_pdc.h
+ *
+ * Copyright (C) 2005 Ivan Kokshaysky
+ * Copyright (C) SAN People
+ *
+ * Peripheral Data Controller (PDC) registers.
+ * Based on AT91RM9200 datasheet revision E.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef AT91RM9200_PDC_H
+#define AT91RM9200_PDC_H
+
+#define AT91_PDC_RPR		0x100	/* Receive Pointer Register */
+#define AT91_PDC_RCR		0x104	/* Receive Counter Register */
+#define AT91_PDC_TPR		0x108	/* Transmit Pointer Register */
+#define AT91_PDC_TCR		0x10c	/* Transmit Counter Register */
+#define AT91_PDC_RNPR		0x110	/* Receive Next Pointer Register */
+#define AT91_PDC_RNCR		0x114	/* Receive Next Counter Register */
+#define AT91_PDC_TNPR		0x118	/* Transmit Next Pointer Register */
+#define AT91_PDC_TNCR		0x11c	/* Transmit Next Counter Register */
+
+#define AT91_PDC_PTCR		0x120	/* Transfer Control Register */
+#define		AT91_PDC_RXTEN		(1 << 0)	/* Receiver Transfer Enable */
+#define		AT91_PDC_RXTDIS		(1 << 1)	/* Receiver Transfer Disable */
+#define		AT91_PDC_TXTEN		(1 << 8)	/* Transmitter Transfer Enable */
+#define		AT91_PDC_TXTDIS		(1 << 9)	/* Transmitter Transfer Disable */
+
+#define AT91_PDC_PTSR		0x124	/* Transfer Status Register */
+
+#endif
diff --git a/include/asm-avr32/arch-at32ap/at91rm9200_usart.h b/include/asm-avr32/arch-at32ap/at91rm9200_usart.h
new file mode 100644
index 000000000000..79f851e31b9c
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/at91rm9200_usart.h
@@ -0,0 +1,123 @@
+/*
+ * include/asm-arm/arch-at91rm9200/at91rm9200_usart.h
+ *
+ * Copyright (C) 2005 Ivan Kokshaysky
+ * Copyright (C) SAN People
+ *
+ * USART registers.
+ * Based on AT91RM9200 datasheet revision E.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef AT91RM9200_USART_H
+#define AT91RM9200_USART_H
+
+#define AT91_US_CR		0x00			/* Control Register */
+#define		AT91_US_RSTRX		(1 <<  2)		/* Reset Receiver */
+#define		AT91_US_RSTTX		(1 <<  3)		/* Reset Transmitter */
+#define		AT91_US_RXEN		(1 <<  4)		/* Receiver Enable */
+#define		AT91_US_RXDIS		(1 <<  5)		/* Receiver Disable */
+#define		AT91_US_TXEN		(1 <<  6)		/* Transmitter Enable */
+#define		AT91_US_TXDIS		(1 <<  7)		/* Transmitter Disable */
+#define		AT91_US_RSTSTA		(1 <<  8)		/* Reset Status Bits */
+#define		AT91_US_STTBRK		(1 <<  9)		/* Start Break */
+#define		AT91_US_STPBRK		(1 << 10)		/* Stop Break */
+#define		AT91_US_STTTO		(1 << 11)		/* Start Time-out */
+#define		AT91_US_SENDA		(1 << 12)		/* Send Address */
+#define		AT91_US_RSTIT		(1 << 13)		/* Reset Iterations */
+#define		AT91_US_RSTNACK		(1 << 14)		/* Reset Non Acknowledge */
+#define		AT91_US_RETTO		(1 << 15)		/* Rearm Time-out */
+#define		AT91_US_DTREN		(1 << 16)		/* Data Terminal Ready Enable */
+#define		AT91_US_DTRDIS		(1 << 17)		/* Data Terminal Ready Disable */
+#define		AT91_US_RTSEN		(1 << 18)		/* Request To Send Enable */
+#define		AT91_US_RTSDIS		(1 << 19)		/* Request To Send Disable */
+
+#define AT91_US_MR		0x04			/* Mode Register */
+#define		AT91_US_USMODE		(0xf <<  0)		/* Mode of the USART */
+#define			AT91_US_USMODE_NORMAL		0
+#define			AT91_US_USMODE_RS485		1
+#define			AT91_US_USMODE_HWHS		2
+#define			AT91_US_USMODE_MODEM		3
+#define			AT91_US_USMODE_ISO7816_T0	4
+#define			AT91_US_USMODE_ISO7816_T1	6
+#define			AT91_US_USMODE_IRDA		8
+#define		AT91_US_USCLKS		(3   <<  4)		/* Clock Selection */
+#define		AT91_US_CHRL		(3   <<  6)		/* Character Length */
+#define			AT91_US_CHRL_5			(0 <<  6)
+#define			AT91_US_CHRL_6			(1 <<  6)
+#define			AT91_US_CHRL_7			(2 <<  6)
+#define			AT91_US_CHRL_8			(3 <<  6)
+#define		AT91_US_SYNC		(1 <<  8)		/* Synchronous Mode Select */
+#define		AT91_US_PAR		(7 <<  9)		/* Parity Type */
+#define			AT91_US_PAR_EVEN		(0 <<  9)
+#define			AT91_US_PAR_ODD			(1 <<  9)
+#define			AT91_US_PAR_SPACE		(2 <<  9)
+#define			AT91_US_PAR_MARK		(3 <<  9)
+#define			AT91_US_PAR_NONE		(4 <<  9)
+#define			AT91_US_PAR_MULTI_DROP		(6 <<  9)
+#define		AT91_US_NBSTOP		(3 << 12)		/* Number of Stop Bits */
+#define			AT91_US_NBSTOP_1		(0 << 12)
+#define			AT91_US_NBSTOP_1_5		(1 << 12)
+#define			AT91_US_NBSTOP_2		(2 << 12)
+#define		AT91_US_CHMODE		(3 << 14)		/* Channel Mode */
+#define			AT91_US_CHMODE_NORMAL		(0 << 14)
+#define			AT91_US_CHMODE_ECHO		(1 << 14)
+#define			AT91_US_CHMODE_LOC_LOOP		(2 << 14)
+#define			AT91_US_CHMODE_REM_LOOP		(3 << 14)
+#define		AT91_US_MSBF		(1 << 16)		/* Bit Order */
+#define		AT91_US_MODE9		(1 << 17)		/* 9-bit Character Length */
+#define		AT91_US_CLKO		(1 << 18)		/* Clock Output Select */
+#define		AT91_US_OVER		(1 << 19)		/* Oversampling Mode */
+#define		AT91_US_INACK		(1 << 20)		/* Inhibit Non Acknowledge */
+#define		AT91_US_DSNACK		(1 << 21)		/* Disable Successive NACK */
+#define		AT91_US_MAX_ITER	(7 << 24)		/* Max Iterations */
+#define		AT91_US_FILTER		(1 << 28)		/* Infrared Receive Line Filter */
+
+#define AT91_US_IER		0x08			/* Interrupt Enable Register */
+#define		AT91_US_RXRDY		(1 <<  0)		/* Receiver Ready */
+#define		AT91_US_TXRDY		(1 <<  1)		/* Transmitter Ready */
+#define		AT91_US_RXBRK		(1 <<  2)		/* Break Received / End of Break */
+#define		AT91_US_ENDRX		(1 <<  3)		/* End of Receiver Transfer */
+#define		AT91_US_ENDTX		(1 <<  4)		/* End of Transmitter Transfer */
+#define		AT91_US_OVRE		(1 <<  5)		/* Overrun Error */
+#define		AT91_US_FRAME		(1 <<  6)		/* Framing Error */
+#define		AT91_US_PARE		(1 <<  7)		/* Parity Error */
+#define		AT91_US_TIMEOUT		(1 <<  8)		/* Receiver Time-out */
+#define		AT91_US_TXEMPTY		(1 <<  9)		/* Transmitter Empty */
+#define		AT91_US_ITERATION	(1 << 10)		/* Max number of Repetitions Reached */
+#define		AT91_US_TXBUFE		(1 << 11)		/* Transmission Buffer Empty */
+#define		AT91_US_RXBUFF		(1 << 12)		/* Reception Buffer Full */
+#define		AT91_US_NACK		(1 << 13)		/* Non Acknowledge */
+#define		AT91_US_RIIC		(1 << 16)		/* Ring Indicator Input Change */
+#define		AT91_US_DSRIC		(1 << 17)		/* Data Set Ready Input Change */
+#define		AT91_US_DCDIC		(1 << 18)		/* Data Carrier Detect Input Change */
+#define		AT91_US_CTSIC		(1 << 19)		/* Clear to Send Input Change */
+#define		AT91_US_RI		(1 << 20)		/* RI */
+#define		AT91_US_DSR		(1 << 21)		/* DSR */
+#define		AT91_US_DCD		(1 << 22)		/* DCD */
+#define		AT91_US_CTS		(1 << 23)		/* CTS */
+
+#define AT91_US_IDR		0x0c			/* Interrupt Disable Register */
+#define AT91_US_IMR		0x10			/* Interrupt Mask Register */
+#define AT91_US_CSR		0x14			/* Channel Status Register */
+#define AT91_US_RHR		0x18			/* Receiver Holding Register */
+#define AT91_US_THR		0x1c			/* Transmitter Holding Register */
+
+#define AT91_US_BRGR		0x20			/* Baud Rate Generator Register */
+#define		AT91_US_CD		(0xffff << 0)		/* Clock Divider */
+
+#define AT91_US_RTOR		0x24			/* Receiver Time-out Register */
+#define		AT91_US_TO		(0xffff << 0)		/* Time-out Value */
+
+#define AT91_US_TTGR		0x28			/* Transmitter Timeguard Register */
+#define		AT91_US_TG		(0xff << 0)		/* Timeguard Value */
+
+#define AT91_US_FIDI		0x40			/* FI DI Ratio Register */
+#define AT91_US_NER		0x44			/* Number of Errors Register */
+#define AT91_US_IF		0x4c			/* IrDA Filter Register */
+
+#endif
diff --git a/include/asm-avr32/arch-at32ap/board.h b/include/asm-avr32/arch-at32ap/board.h
new file mode 100644
index 000000000000..39368e18ab20
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/board.h
@@ -0,0 +1,35 @@
+/*
+ * Platform data definitions.
+ */
+#ifndef __ASM_ARCH_BOARD_H
+#define __ASM_ARCH_BOARD_H
+
+#include <linux/types.h>
+
+/* Add basic devices: system manager, interrupt controller, portmuxes, etc. */
+void at32_add_system_devices(void);
+
+#define AT91_NR_UART	4
+extern struct platform_device *at91_default_console_device;
+
+struct platform_device *at32_add_device_usart(unsigned int id);
+
+struct eth_platform_data {
+	u8	valid;
+	u8	mii_phy_addr;
+	u8	is_rmii;
+	u8	hw_addr[6];
+};
+struct platform_device *
+at32_add_device_eth(unsigned int id, struct eth_platform_data *data);
+
+struct platform_device *at32_add_device_spi(unsigned int id);
+
+struct lcdc_platform_data {
+	unsigned long fbmem_start;
+	unsigned long fbmem_size;
+};
+struct platform_device *
+at32_add_device_lcdc(unsigned int id, struct lcdc_platform_data *data);
+
+#endif /* __ASM_ARCH_BOARD_H */
diff --git a/include/asm-avr32/arch-at32ap/init.h b/include/asm-avr32/arch-at32ap/init.h
new file mode 100644
index 000000000000..43722634e069
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/init.h
@@ -0,0 +1,21 @@
+/*
+ * AT32AP platform initialization calls.
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_AT32AP_INIT_H__
+#define __ASM_AVR32_AT32AP_INIT_H__
+
+void setup_platform(void);
+
+/* Called by setup_platform */
+void at32_clock_init(void);
+void at32_portmux_init(void);
+
+void at32_setup_serial_console(unsigned int usart_id);
+
+#endif /* __ASM_AVR32_AT32AP_INIT_H__ */
diff --git a/include/asm-avr32/arch-at32ap/portmux.h b/include/asm-avr32/arch-at32ap/portmux.h
new file mode 100644
index 000000000000..4d50421262a1
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/portmux.h
@@ -0,0 +1,16 @@
+/*
+ * AT32 portmux interface.
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_AT32_PORTMUX_H__
+#define __ASM_AVR32_AT32_PORTMUX_H__
+
+void portmux_set_func(unsigned int portmux_id, unsigned int pin_id,
+		      unsigned int function_id);
+
+#endif /* __ASM_AVR32_AT32_PORTMUX_H__ */
diff --git a/include/asm-avr32/arch-at32ap/sm.h b/include/asm-avr32/arch-at32ap/sm.h
new file mode 100644
index 000000000000..265a9ead20bf
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/sm.h
@@ -0,0 +1,27 @@
+/*
+ * AT32 System Manager interface.
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_AT32_SM_H__
+#define __ASM_AVR32_AT32_SM_H__
+
+struct irq_chip;
+struct platform_device;
+
+struct at32_sm {
+	spinlock_t lock;
+	void __iomem *regs;
+	struct irq_chip *eim_chip;
+	unsigned int eim_first_irq;
+	struct platform_device *pdev;
+};
+
+extern struct platform_device at32_sm_device;
+extern struct at32_sm system_manager;
+
+#endif /* __ASM_AVR32_AT32_SM_H__ */
diff --git a/include/asm-avr32/arch-at32ap/smc.h b/include/asm-avr32/arch-at32ap/smc.h
new file mode 100644
index 000000000000..3732b328303d
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/smc.h
@@ -0,0 +1,60 @@
+/*
+ * Static Memory Controller for AT32 chips
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * Inspired by the OMAP2 General-Purpose Memory Controller interface
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ARCH_AT32AP_SMC_H
+#define __ARCH_AT32AP_SMC_H
+
+/*
+ * All timing parameters are in nanoseconds.
+ */
+struct smc_config {
+	/* Delay from address valid to assertion of given strobe */
+	u16		ncs_read_setup;
+	u16		nrd_setup;
+	u16		ncs_write_setup;
+	u16		nwe_setup;
+
+	/* Pulse length of given strobe */
+	u16		ncs_read_pulse;
+	u16		nrd_pulse;
+	u16		ncs_write_pulse;
+	u16		nwe_pulse;
+
+	/* Total cycle length of given operation */
+	u16		read_cycle;
+	u16		write_cycle;
+
+	/* Bus width in bytes */
+	u8		bus_width;
+
+	/*
+	 * 0: Data is sampled on rising edge of NCS
+	 * 1: Data is sampled on rising edge of NRD
+	 */
+	unsigned int	nrd_controlled:1;
+
+	/*
+	 * 0: Data is driven on falling edge of NCS
+	 * 1: Data is driven on falling edge of NWR
+	 */
+	unsigned int	nwe_controlled:1;
+
+	/*
+	 * 0: Byte select access type
+	 * 1: Byte write access type
+	 */
+	unsigned int	byte_write:1;
+};
+
+extern int smc_set_configuration(int cs, const struct smc_config *config);
+extern struct smc_config *smc_get_configuration(int cs);
+
+#endif /* __ARCH_AT32AP_SMC_H */
diff --git a/include/asm-avr32/asm.h b/include/asm-avr32/asm.h
new file mode 100644
index 000000000000..515c7618952b
--- /dev/null
+++ b/include/asm-avr32/asm.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_ASM_H__
+#define __ASM_AVR32_ASM_H__
+
+#include <asm/sysreg.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+
+#define mask_interrupts		ssrf	SR_GM_BIT
+#define mask_exceptions		ssrf	SR_EM_BIT
+#define unmask_interrupts	csrf	SR_GM_BIT
+#define unmask_exceptions	csrf	SR_EM_BIT
+
+#ifdef CONFIG_FRAME_POINTER
+	.macro	save_fp
+	st.w	--sp, r7
+	.endm
+	.macro	restore_fp
+	ld.w	r7, sp++
+	.endm
+	.macro	zero_fp
+	mov	r7, 0
+	.endm
+#else
+	.macro	save_fp
+	.endm
+	.macro	restore_fp
+	.endm
+	.macro	zero_fp
+	.endm
+#endif
+	.macro	get_thread_info reg
+	mov	\reg, sp
+	andl	\reg, ~(THREAD_SIZE - 1) & 0xffff
+	.endm
+
+	/* Save and restore registers */
+	.macro	save_min sr, tmp=lr
+	pushm	lr
+	mfsr	\tmp, \sr
+	zero_fp
+	st.w	--sp, \tmp
+	.endm
+
+	.macro	restore_min sr, tmp=lr
+	ld.w	\tmp, sp++
+	mtsr	\sr, \tmp
+	popm	lr
+	.endm
+
+	.macro	save_half sr, tmp=lr
+	save_fp
+	pushm	r8-r9,r10,r11,r12,lr
+	zero_fp
+	mfsr	\tmp, \sr
+	st.w	--sp, \tmp
+	.endm
+
+	.macro	restore_half sr, tmp=lr
+	ld.w	\tmp, sp++
+	mtsr	\sr, \tmp
+	popm	r8-r9,r10,r11,r12,lr
+	restore_fp
+	.endm
+
+	.macro	save_full_user sr, tmp=lr
+	stmts	--sp, r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,sp,lr
+	st.w	--sp, lr
+	zero_fp
+	mfsr	\tmp, \sr
+	st.w	--sp, \tmp
+	.endm
+
+	.macro	restore_full_user sr, tmp=lr
+	ld.w	\tmp, sp++
+	mtsr	\sr, \tmp
+	ld.w	lr, sp++
+	ldmts	sp++, r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,sp,lr
+	.endm
+
+	/* uaccess macros */
+	.macro branch_if_kernel scratch, label
+	get_thread_info \scratch
+	ld.w	\scratch, \scratch[TI_flags]
+	bld	\scratch, TIF_USERSPACE
+	brcc	\label
+	.endm
+
+	.macro ret_if_privileged scratch, addr, size, ret
+	sub	\scratch, \size, 1
+	add	\scratch, \addr
+	retcs	\ret
+	retmi	\ret
+	.endm
+
+#endif /* __ASM_AVR32_ASM_H__ */
diff --git a/include/asm-avr32/atomic.h b/include/asm-avr32/atomic.h
new file mode 100644
index 000000000000..e0b9c44c126c
--- /dev/null
+++ b/include/asm-avr32/atomic.h
@@ -0,0 +1,201 @@
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc.
+ *
+ * But use these as seldom as possible since they are slower than
+ * regular operations.
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_ATOMIC_H
+#define __ASM_AVR32_ATOMIC_H
+
+#include <asm/system.h>
+
+typedef struct { volatile int counter; } atomic_t;
+#define ATOMIC_INIT(i)  { (i) }
+
+#define atomic_read(v)		((v)->counter)
+#define atomic_set(v, i)	(((v)->counter) = i)
+
+/*
+ * atomic_sub_return - subtract the atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v. Returns the resulting value.
+ */
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+	int result;
+
+	asm volatile(
+		"/* atomic_sub_return */\n"
+		"1:	ssrf	5\n"
+		"	ld.w	%0, %2\n"
+		"	sub	%0, %3\n"
+		"	stcond	%1, %0\n"
+		"	brne	1b"
+		: "=&r"(result), "=o"(v->counter)
+		: "m"(v->counter), "ir"(i)
+		: "cc");
+
+	return result;
+}
+
+/*
+ * atomic_add_return - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v. Returns the resulting value.
+ */
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	int result;
+
+	if (__builtin_constant_p(i))
+		result = atomic_sub_return(-i, v);
+	else
+		asm volatile(
+			"/* atomic_add_return */\n"
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %1\n"
+			"	add	%0, %3\n"
+			"	stcond	%2, %0\n"
+			"	brne	1b"
+			: "=&r"(result), "=o"(v->counter)
+			: "m"(v->counter), "r"(i)
+			: "cc", "memory");
+
+	return result;
+}
+
+/*
+ * atomic_sub_unless - sub unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * If the atomic value v is not equal to u, this function subtracts a
+ * from v, and returns non zero. If v is equal to u then it returns
+ * zero. This is done as an atomic operation.
+*/
+static inline int atomic_sub_unless(atomic_t *v, int a, int u)
+{
+	int tmp, result = 0;
+
+	asm volatile(
+		"/* atomic_sub_unless */\n"
+		"1:	ssrf	5\n"
+		"	ld.w	%0, %3\n"
+		"	cp.w	%0, %5\n"
+		"	breq	1f\n"
+		"	sub	%0, %4\n"
+		"	stcond	%2, %0\n"
+		"	brne	1b\n"
+		"	mov	%1, 1\n"
+		"1:"
+		: "=&r"(tmp), "=&r"(result), "=o"(v->counter)
+		: "m"(v->counter), "ir"(a), "ir"(u)
+		: "cc", "memory");
+
+	return result;
+}
+
+/*
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * If the atomic value v is not equal to u, this function adds a to v,
+ * and returns non zero. If v is equal to u then it returns zero. This
+ * is done as an atomic operation.
+*/
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int tmp, result;
+
+	if (__builtin_constant_p(a))
+		result = atomic_sub_unless(v, -a, u);
+	else {
+		result = 0;
+		asm volatile(
+			"/* atomic_add_unless */\n"
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %3\n"
+			"	cp.w	%0, %5\n"
+			"	breq	1f\n"
+			"	add	%0, %4\n"
+			"	stcond	%2, %0\n"
+			"	brne	1b\n"
+			"	mov	%1, 1\n"
+			"1:"
+			: "=&r"(tmp), "=&r"(result), "=o"(v->counter)
+			: "m"(v->counter), "r"(a), "ir"(u)
+			: "cc", "memory");
+	}
+
+	return result;
+}
+
+/*
+ * atomic_sub_if_positive - conditionally subtract integer from atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically test @v and subtract @i if @v is greater or equal than @i.
+ * The function returns the old value of @v minus @i.
+ */
+static inline int atomic_sub_if_positive(int i, atomic_t *v)
+{
+	int result;
+
+	asm volatile(
+		"/* atomic_sub_if_positive */\n"
+		"1:	ssrf	5\n"
+		"	ld.w	%0, %2\n"
+		"	sub	%0, %3\n"
+		"	brlt	1f\n"
+		"	stcond	%1, %0\n"
+		"	brne	1b\n"
+		"1:"
+		: "=&r"(result), "=o"(v->counter)
+		: "m"(v->counter), "ir"(i)
+		: "cc", "memory");
+
+	return result;
+}
+
+#define atomic_xchg(v, new)	(xchg(&((v)->counter), new))
+#define atomic_cmpxchg(v, o, n)	((int)cmpxchg(&((v)->counter), (o), (n)))
+
+#define atomic_sub(i, v)	(void)atomic_sub_return(i, v)
+#define atomic_add(i, v)	(void)atomic_add_return(i, v)
+#define atomic_dec(v)		atomic_sub(1, (v))
+#define atomic_inc(v)		atomic_add(1, (v))
+
+#define atomic_dec_return(v)	atomic_sub_return(1, v)
+#define atomic_inc_return(v)	atomic_add_return(1, v)
+
+#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
+#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0)
+#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
+#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0)
+
+#define atomic_inc_not_zero(v)	atomic_add_unless(v, 1, 0)
+#define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v)
+
+#define smp_mb__before_atomic_dec()	barrier()
+#define smp_mb__after_atomic_dec()	barrier()
+#define smp_mb__before_atomic_inc()	barrier()
+#define smp_mb__after_atomic_inc()	barrier()
+
+#include <asm-generic/atomic.h>
+
+#endif /*  __ASM_AVR32_ATOMIC_H */
diff --git a/include/asm-avr32/auxvec.h b/include/asm-avr32/auxvec.h
new file mode 100644
index 000000000000..d5dd435bf8f4
--- /dev/null
+++ b/include/asm-avr32/auxvec.h
@@ -0,0 +1,4 @@
+#ifndef __ASM_AVR32_AUXVEC_H
+#define __ASM_AVR32_AUXVEC_H
+
+#endif /* __ASM_AVR32_AUXVEC_H */
diff --git a/include/asm-avr32/bitops.h b/include/asm-avr32/bitops.h
new file mode 100644
index 000000000000..5299f8c8e11d
--- /dev/null
+++ b/include/asm-avr32/bitops.h
@@ -0,0 +1,296 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_BITOPS_H
+#define __ASM_AVR32_BITOPS_H
+
+#include <asm/byteorder.h>
+#include <asm/system.h>
+
+/*
+ * clear_bit() doesn't provide any barrier for the compiler
+ */
+#define smp_mb__before_clear_bit()	barrier()
+#define smp_mb__after_clear_bit()	barrier()
+
+/*
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ *
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void set_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long tmp;
+
+	if (__builtin_constant_p(nr)) {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %2\n"
+			"	sbr	%0, %3\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p)
+			: "m"(*p), "i"(nr)
+			: "cc");
+	} else {
+		unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %2\n"
+			"	or	%0, %3\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p)
+			: "m"(*p), "r"(mask)
+			: "cc");
+	}
+}
+
+/*
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static inline void clear_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long tmp;
+
+	if (__builtin_constant_p(nr)) {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %2\n"
+			"	cbr	%0, %3\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p)
+			: "m"(*p), "i"(nr)
+			: "cc");
+	} else {
+		unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %2\n"
+			"	andn	%0, %3\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p)
+			: "m"(*p), "r"(mask)
+			: "cc");
+	}
+}
+
+/*
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to change
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void change_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+	unsigned long tmp;
+
+	asm volatile(
+		"1:	ssrf	5\n"
+		"	ld.w	%0, %2\n"
+		"	eor	%0, %3\n"
+		"	stcond	%1, %0\n"
+		"	brne	1b"
+		: "=&r"(tmp), "=o"(*p)
+		: "m"(*p), "r"(mask)
+		: "cc");
+}
+
+/*
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_set_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+	unsigned long tmp, old;
+
+	if (__builtin_constant_p(nr)) {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %3\n"
+			"	mov	%2, %0\n"
+			"	sbr	%0, %4\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p), "=&r"(old)
+			: "m"(*p), "i"(nr)
+			: "memory", "cc");
+	} else {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%2, %3\n"
+			"	or	%0, %2, %4\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p), "=&r"(old)
+			: "m"(*p), "r"(mask)
+			: "memory", "cc");
+	}
+
+	return (old & mask) != 0;
+}
+
+/*
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_clear_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+	unsigned long tmp, old;
+
+	if (__builtin_constant_p(nr)) {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %3\n"
+			"	mov	%2, %0\n"
+			"	cbr	%0, %4\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p), "=&r"(old)
+			: "m"(*p), "i"(nr)
+			: "memory", "cc");
+	} else {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %3\n"
+			"	mov	%2, %0\n"
+			"	andn	%0, %4\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p), "=&r"(old)
+			: "m"(*p), "r"(mask)
+			: "memory", "cc");
+	}
+
+	return (old & mask) != 0;
+}
+
+/*
+ * test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_change_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+	unsigned long tmp, old;
+
+	asm volatile(
+		"1:	ssrf	5\n"
+		"	ld.w	%2, %3\n"
+		"	eor	%0, %2, %4\n"
+		"	stcond	%1, %0\n"
+		"	brne	1b"
+		: "=&r"(tmp), "=o"(*p), "=&r"(old)
+		: "m"(*p), "r"(mask)
+		: "memory", "cc");
+
+	return (old & mask) != 0;
+}
+
+#include <asm-generic/bitops/non-atomic.h>
+
+/* Find First bit Set */
+static inline unsigned long __ffs(unsigned long word)
+{
+	unsigned long result;
+
+	asm("brev %1\n\t"
+	    "clz %0,%1"
+	    : "=r"(result), "=&r"(word)
+	    : "1"(word));
+	return result;
+}
+
+/* Find First Zero */
+static inline unsigned long ffz(unsigned long word)
+{
+	return __ffs(~word);
+}
+
+/* Find Last bit Set */
+static inline int fls(unsigned long word)
+{
+	unsigned long result;
+
+	asm("clz %0,%1" : "=r"(result) : "r"(word));
+	return 32 - result;
+}
+
+unsigned long find_first_zero_bit(const unsigned long *addr,
+				  unsigned long size);
+unsigned long find_next_zero_bit(const unsigned long *addr,
+				 unsigned long size,
+				 unsigned long offset);
+unsigned long find_first_bit(const unsigned long *addr,
+			     unsigned long size);
+unsigned long find_next_bit(const unsigned long *addr,
+				 unsigned long size,
+				 unsigned long offset);
+
+/*
+ * ffs: find first bit set. This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ *
+ * The difference is that bit numbering starts at 1, and if no bit is set,
+ * the function returns 0.
+ */
+static inline int ffs(unsigned long word)
+{
+	if(word == 0)
+		return 0;
+	return __ffs(word) + 1;
+}
+
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/hweight.h>
+
+#include <asm-generic/bitops/ext2-non-atomic.h>
+#include <asm-generic/bitops/ext2-atomic.h>
+#include <asm-generic/bitops/minix-le.h>
+
+#endif /* __ASM_AVR32_BITOPS_H */
diff --git a/include/asm-avr32/bug.h b/include/asm-avr32/bug.h
new file mode 100644
index 000000000000..521766bc9366
--- /dev/null
+++ b/include/asm-avr32/bug.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_BUG_H
+#define __ASM_AVR32_BUG_H
+
+#ifdef CONFIG_BUG
+
+/*
+ * According to our Chief Architect, this compact opcode is very
+ * unlikely to ever be implemented.
+ */
+#define AVR32_BUG_OPCODE	0x5df0
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+#define BUG()								\
+	do {								\
+		asm volatile(".hword	%0\n\t"				\
+			     ".hword	%1\n\t"				\
+			     ".long	%2"				\
+			     :						\
+			     : "n"(AVR32_BUG_OPCODE),			\
+			       "i"(__LINE__), "X"(__FILE__));		\
+	} while (0)
+
+#else
+
+#define BUG()								\
+	do {								\
+		asm volatile(".hword	%0\n\t"				\
+			     : : "n"(AVR32_BUG_OPCODE));		\
+	} while (0)
+
+#endif /* CONFIG_DEBUG_BUGVERBOSE */
+
+#define HAVE_ARCH_BUG
+
+#endif /* CONFIG_BUG */
+
+#include <asm-generic/bug.h>
+
+#endif /* __ASM_AVR32_BUG_H */
diff --git a/include/asm-avr32/bugs.h b/include/asm-avr32/bugs.h
new file mode 100644
index 000000000000..7635e770622e
--- /dev/null
+++ b/include/asm-avr32/bugs.h
@@ -0,0 +1,15 @@
+/*
+ * This is included by init/main.c to check for architecture-dependent bugs.
+ *
+ * Needs:
+ *      void check_bugs(void);
+ */
+#ifndef __ASM_AVR32_BUGS_H
+#define __ASM_AVR32_BUGS_H
+
+static void __init check_bugs(void)
+{
+	cpu_data->loops_per_jiffy = loops_per_jiffy;
+}
+
+#endif /* __ASM_AVR32_BUGS_H */
diff --git a/include/asm-avr32/byteorder.h b/include/asm-avr32/byteorder.h
new file mode 100644
index 000000000000..402ff4125cdc
--- /dev/null
+++ b/include/asm-avr32/byteorder.h
@@ -0,0 +1,25 @@
+/*
+ * AVR32 endian-conversion functions.
+ */
+#ifndef __ASM_AVR32_BYTEORDER_H
+#define __ASM_AVR32_BYTEORDER_H
+
+#include <asm/types.h>
+#include <linux/compiler.h>
+
+#ifdef __CHECKER__
+extern unsigned long __builtin_bswap_32(unsigned long x);
+extern unsigned short __builtin_bswap_16(unsigned short x);
+#endif
+
+#define __arch__swab32(x) __builtin_bswap_32(x)
+#define __arch__swab16(x) __builtin_bswap_16(x)
+
+#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+# define __BYTEORDER_HAS_U64__
+# define __SWAB_64_THRU_32__
+#endif
+
+#include <linux/byteorder/big_endian.h>
+
+#endif /* __ASM_AVR32_BYTEORDER_H */
diff --git a/include/asm-avr32/cache.h b/include/asm-avr32/cache.h
new file mode 100644
index 000000000000..dabb955f3c00
--- /dev/null
+++ b/include/asm-avr32/cache.h
@@ -0,0 +1,29 @@
+#ifndef __ASM_AVR32_CACHE_H
+#define __ASM_AVR32_CACHE_H
+
+#define L1_CACHE_SHIFT 5
+#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+
+#ifndef __ASSEMBLER__
+struct cache_info {
+	unsigned int ways;
+	unsigned int sets;
+	unsigned int linesz;
+};
+#endif /* __ASSEMBLER */
+
+/* Cache operation constants */
+#define ICACHE_FLUSH		0x00
+#define ICACHE_INVALIDATE	0x01
+#define ICACHE_LOCK		0x02
+#define ICACHE_UNLOCK		0x03
+#define ICACHE_PREFETCH		0x04
+
+#define DCACHE_FLUSH		0x08
+#define DCACHE_LOCK		0x09
+#define DCACHE_UNLOCK		0x0a
+#define DCACHE_INVALIDATE	0x0b
+#define DCACHE_CLEAN		0x0c
+#define DCACHE_CLEAN_INVAL	0x0d
+
+#endif /* __ASM_AVR32_CACHE_H */
diff --git a/include/asm-avr32/cachectl.h b/include/asm-avr32/cachectl.h
new file mode 100644
index 000000000000..4faf1ce60061
--- /dev/null
+++ b/include/asm-avr32/cachectl.h
@@ -0,0 +1,11 @@
+#ifndef __ASM_AVR32_CACHECTL_H
+#define __ASM_AVR32_CACHECTL_H
+
+/*
+ * Operations that can be performed through the cacheflush system call
+ */
+
+/* Clean the data cache, then invalidate the icache */
+#define CACHE_IFLUSH	0
+
+#endif /* __ASM_AVR32_CACHECTL_H */
diff --git a/include/asm-avr32/cacheflush.h b/include/asm-avr32/cacheflush.h
new file mode 100644
index 000000000000..f1bf1708980e
--- /dev/null
+++ b/include/asm-avr32/cacheflush.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_CACHEFLUSH_H
+#define __ASM_AVR32_CACHEFLUSH_H
+
+/* Keep includes the same across arches.  */
+#include <linux/mm.h>
+
+#define CACHE_OP_ICACHE_INVALIDATE	0x01
+#define CACHE_OP_DCACHE_INVALIDATE	0x0b
+#define CACHE_OP_DCACHE_CLEAN		0x0c
+#define CACHE_OP_DCACHE_CLEAN_INVAL	0x0d
+
+/*
+ * Invalidate any cacheline containing virtual address vaddr without
+ * writing anything back to memory.
+ *
+ * Note that this function may corrupt unrelated data structures when
+ * applied on buffers that are not cacheline aligned in both ends.
+ */
+static inline void invalidate_dcache_line(void *vaddr)
+{
+	asm volatile("cache %0[0], %1"
+		     :
+		     : "r"(vaddr), "n"(CACHE_OP_DCACHE_INVALIDATE)
+		     : "memory");
+}
+
+/*
+ * Make sure any cacheline containing virtual address vaddr is written
+ * to memory.
+ */
+static inline void clean_dcache_line(void *vaddr)
+{
+	asm volatile("cache %0[0], %1"
+		     :
+		     : "r"(vaddr), "n"(CACHE_OP_DCACHE_CLEAN)
+		     : "memory");
+}
+
+/*
+ * Make sure any cacheline containing virtual address vaddr is written
+ * to memory and then invalidate it.
+ */
+static inline void flush_dcache_line(void *vaddr)
+{
+	asm volatile("cache %0[0], %1"
+		     :
+		     : "r"(vaddr), "n"(CACHE_OP_DCACHE_CLEAN_INVAL)
+		     : "memory");
+}
+
+/*
+ * Invalidate any instruction cacheline containing virtual address
+ * vaddr.
+ */
+static inline void invalidate_icache_line(void *vaddr)
+{
+	asm volatile("cache %0[0], %1"
+		     :
+		     : "r"(vaddr), "n"(CACHE_OP_ICACHE_INVALIDATE)
+		     : "memory");
+}
+
+/*
+ * Applies the above functions on all lines that are touched by the
+ * specified virtual address range.
+ */
+void invalidate_dcache_region(void *start, size_t len);
+void clean_dcache_region(void *start, size_t len);
+void flush_dcache_region(void *start, size_t len);
+void invalidate_icache_region(void *start, size_t len);
+
+/*
+ * Make sure any pending writes are completed before continuing.
+ */
+#define flush_write_buffer() asm volatile("sync 0" : : : "memory")
+
+/*
+ * The following functions are called when a virtual mapping changes.
+ * We do not need to flush anything in this case.
+ */
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+
+/*
+ * I think we need to implement this one to be able to reliably
+ * execute pages from RAMDISK. However, if we implement the
+ * flush_dcache_*() functions, it might not be needed anymore.
+ *
+ * #define flush_icache_page(vma, page)		do { } while (0)
+ */
+extern void flush_icache_page(struct vm_area_struct *vma, struct page *page);
+
+/*
+ * These are (I think) related to D-cache aliasing.  We might need to
+ * do something here, but only for certain configurations.  No such
+ * configurations exist at this time.
+ */
+#define flush_dcache_page(page)			do { } while (0)
+#define flush_dcache_mmap_lock(page)		do { } while (0)
+#define flush_dcache_mmap_unlock(page)		do { } while (0)
+
+/*
+ * These are for I/D cache coherency. In this case, we do need to
+ * flush with all configurations.
+ */
+extern void flush_icache_range(unsigned long start, unsigned long end);
+extern void flush_icache_user_range(struct vm_area_struct *vma,
+				    struct page *page,
+				    unsigned long addr, int len);
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len) do {	\
+	memcpy(dst, src, len);					\
+	flush_icache_user_range(vma, page, vaddr, len);		\
+} while(0)
+#define copy_from_user_page(vma, page, vaddr, dst, src, len)	\
+	memcpy(dst, src, len)
+
+#endif /* __ASM_AVR32_CACHEFLUSH_H */
diff --git a/include/asm-avr32/checksum.h b/include/asm-avr32/checksum.h
new file mode 100644
index 000000000000..41b7af09edc4
--- /dev/null
+++ b/include/asm-avr32/checksum.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_CHECKSUM_H
+#define __ASM_AVR32_CHECKSUM_H
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+unsigned int csum_partial(const unsigned char * buff, int len,
+			  unsigned int sum);
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums, and handles user-space pointer exceptions correctly, when needed.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+unsigned int csum_partial_copy_generic(const char *src, char *dst, int len,
+				       int sum, int *src_err_ptr,
+				       int *dst_err_ptr);
+
+/*
+ *	Note: when you get a NULL pointer exception here this means someone
+ *	passed in an incorrect kernel address to one of these functions.
+ *
+ *	If you use these functions directly please don't forget the
+ *	verify_area().
+ */
+static inline
+unsigned int csum_partial_copy_nocheck(const char *src, char *dst,
+				       int len, int sum)
+{
+	return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
+}
+
+static inline
+unsigned int csum_partial_copy_from_user (const char __user *src, char *dst,
+					  int len, int sum, int *err_ptr)
+{
+	return csum_partial_copy_generic((const char __force *)src, dst, len,
+					 sum, err_ptr, NULL);
+}
+
+/*
+ *	This is a version of ip_compute_csum() optimized for IP headers,
+ *	which always checksum on 4 octet boundaries.
+ */
+static inline unsigned short ip_fast_csum(unsigned char *iph,
+					  unsigned int ihl)
+{
+	unsigned int sum, tmp;
+
+	__asm__ __volatile__(
+		"	ld.w	%0, %1++\n"
+		"	ld.w	%3, %1++\n"
+		"	sub	%2, 4\n"
+		"	add	%0, %3\n"
+		"	ld.w	%3, %1++\n"
+		"	adc	%0, %0, %3\n"
+		"	ld.w	%3, %1++\n"
+		"	adc	%0, %0, %3\n"
+		"	acr	%0\n"
+		"1:	ld.w	%3, %1++\n"
+		"	add	%0, %3\n"
+		"	acr	%0\n"
+		"	sub	%2, 1\n"
+		"	brne	1b\n"
+		"	lsl	%3, %0, 16\n"
+		"	andl	%0, 0\n"
+		"	mov	%2, 0xffff\n"
+		"	add	%0, %3\n"
+		"	adc	%0, %0, %2\n"
+		"	com	%0\n"
+		"	lsr	%0, 16\n"
+		: "=r"(sum), "=r"(iph), "=r"(ihl), "=r"(tmp)
+		: "1"(iph), "2"(ihl)
+		: "memory", "cc");
+	return sum;
+}
+
+/*
+ *	Fold a partial checksum
+ */
+
+static inline unsigned int csum_fold(unsigned int sum)
+{
+	unsigned int tmp;
+
+	asm("	bfextu	%1, %0, 0, 16\n"
+	    "	lsr	%0, 16\n"
+	    "	add	%0, %1\n"
+	    "	bfextu	%1, %0, 16, 16\n"
+	    "	add	%0, %1"
+	    : "=&r"(sum), "=&r"(tmp)
+	    : "0"(sum));
+
+	return ~sum;
+}
+
+static inline unsigned long csum_tcpudp_nofold(unsigned long saddr,
+					       unsigned long daddr,
+					       unsigned short len,
+					       unsigned short proto,
+					       unsigned int sum)
+{
+	asm("	add	%0, %1\n"
+	    "	adc	%0, %0, %2\n"
+	    "	adc	%0, %0, %3\n"
+	    "	acr	%0"
+	    : "=r"(sum)
+	    : "r"(daddr), "r"(saddr), "r"(ntohs(len) | (proto << 16)),
+	      "0"(sum)
+	    : "cc");
+
+	return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline unsigned short int csum_tcpudp_magic(unsigned long saddr,
+						   unsigned long daddr,
+						   unsigned short len,
+						   unsigned short proto,
+						   unsigned int sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+
+static inline unsigned short ip_compute_csum(unsigned char * buff, int len)
+{
+    return csum_fold(csum_partial(buff, len, 0));
+}
+
+#endif /* __ASM_AVR32_CHECKSUM_H */
diff --git a/include/asm-avr32/cputime.h b/include/asm-avr32/cputime.h
new file mode 100644
index 000000000000..e87e0f81cbeb
--- /dev/null
+++ b/include/asm-avr32/cputime.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_CPUTIME_H
+#define __ASM_AVR32_CPUTIME_H
+
+#include <asm-generic/cputime.h>
+
+#endif /* __ASM_AVR32_CPUTIME_H */
diff --git a/include/asm-avr32/current.h b/include/asm-avr32/current.h
new file mode 100644
index 000000000000..c7b0549eab8a
--- /dev/null
+++ b/include/asm-avr32/current.h
@@ -0,0 +1,15 @@
+#ifndef __ASM_AVR32_CURRENT_H
+#define __ASM_AVR32_CURRENT_H
+
+#include <linux/thread_info.h>
+
+struct task_struct;
+
+inline static struct task_struct * get_current(void)
+{
+	return current_thread_info()->task;
+}
+
+#define current get_current()
+
+#endif /* __ASM_AVR32_CURRENT_H */
diff --git a/include/asm-avr32/delay.h b/include/asm-avr32/delay.h
new file mode 100644
index 000000000000..cc3b2e3343b3
--- /dev/null
+++ b/include/asm-avr32/delay.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_AVR32_DELAY_H
+#define __ASM_AVR32_DELAY_H
+
+/*
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines calling functions in arch/avr32/lib/delay.c
+ */
+
+extern void __bad_udelay(void);
+extern void __bad_ndelay(void);
+
+extern void __udelay(unsigned long usecs);
+extern void __ndelay(unsigned long nsecs);
+extern void __const_udelay(unsigned long usecs);
+extern void __delay(unsigned long loops);
+
+#define udelay(n) (__builtin_constant_p(n) ? \
+	((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c6ul)) : \
+	__udelay(n))
+
+#define ndelay(n) (__builtin_constant_p(n) ? \
+	((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
+	__ndelay(n))
+
+#endif /* __ASM_AVR32_DELAY_H */
diff --git a/include/asm-avr32/div64.h b/include/asm-avr32/div64.h
new file mode 100644
index 000000000000..d7ddd4fdeca6
--- /dev/null
+++ b/include/asm-avr32/div64.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_DIV64_H
+#define __ASM_AVR32_DIV64_H
+
+#include <asm-generic/div64.h>
+
+#endif /* __ASM_AVR32_DIV64_H */
diff --git a/include/asm-avr32/dma-mapping.h b/include/asm-avr32/dma-mapping.h
new file mode 100644
index 000000000000..4c40cb41cdf8
--- /dev/null
+++ b/include/asm-avr32/dma-mapping.h
@@ -0,0 +1,320 @@
+#ifndef __ASM_AVR32_DMA_MAPPING_H
+#define __ASM_AVR32_DMA_MAPPING_H
+
+#include <linux/mm.h>
+#include <linux/device.h>
+#include <asm/scatterlist.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+#include <asm/io.h>
+
+extern void dma_cache_sync(void *vaddr, size_t size, int direction);
+
+/*
+ * Return whether the given device DMA address mask can be supported
+ * properly.  For example, if your device can only drive the low 24-bits
+ * during bus mastering, then you would pass 0x00ffffff as the mask
+ * to this function.
+ */
+static inline int dma_supported(struct device *dev, u64 mask)
+{
+	/* Fix when needed. I really don't know of any limitations */
+	return 1;
+}
+
+static inline int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+		return -EIO;
+
+	*dev->dma_mask = dma_mask;
+	return 0;
+}
+
+/**
+ * dma_alloc_coherent - allocate consistent memory for DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @size: required memory size
+ * @handle: bus-specific DMA address
+ *
+ * Allocate some uncached, unbuffered memory for a device for
+ * performing DMA.  This function allocates pages, and will
+ * return the CPU-viewed address, and sets @handle to be the
+ * device-viewed address.
+ */
+extern void *dma_alloc_coherent(struct device *dev, size_t size,
+				dma_addr_t *handle, gfp_t gfp);
+
+/**
+ * dma_free_coherent - free memory allocated by dma_alloc_coherent
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @size: size of memory originally requested in dma_alloc_coherent
+ * @cpu_addr: CPU-view address returned from dma_alloc_coherent
+ * @handle: device-view address returned from dma_alloc_coherent
+ *
+ * Free (and unmap) a DMA buffer previously allocated by
+ * dma_alloc_coherent().
+ *
+ * References to memory and mappings associated with cpu_addr/handle
+ * during and after this call executing are illegal.
+ */
+extern void dma_free_coherent(struct device *dev, size_t size,
+			      void *cpu_addr, dma_addr_t handle);
+
+/**
+ * dma_alloc_writecombine - allocate write-combining memory for DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @size: required memory size
+ * @handle: bus-specific DMA address
+ *
+ * Allocate some uncached, buffered memory for a device for
+ * performing DMA.  This function allocates pages, and will
+ * return the CPU-viewed address, and sets @handle to be the
+ * device-viewed address.
+ */
+extern void *dma_alloc_writecombine(struct device *dev, size_t size,
+				    dma_addr_t *handle, gfp_t gfp);
+
+/**
+ * dma_free_coherent - free memory allocated by dma_alloc_writecombine
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @size: size of memory originally requested in dma_alloc_writecombine
+ * @cpu_addr: CPU-view address returned from dma_alloc_writecombine
+ * @handle: device-view address returned from dma_alloc_writecombine
+ *
+ * Free (and unmap) a DMA buffer previously allocated by
+ * dma_alloc_writecombine().
+ *
+ * References to memory and mappings associated with cpu_addr/handle
+ * during and after this call executing are illegal.
+ */
+extern void dma_free_writecombine(struct device *dev, size_t size,
+				  void *cpu_addr, dma_addr_t handle);
+
+/**
+ * dma_map_single - map a single buffer for streaming DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @cpu_addr: CPU direct mapped address of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Ensure that any data held in the cache is appropriately discarded
+ * or written back.
+ *
+ * The device owns this memory once this call has completed.  The CPU
+ * can regain ownership by calling dma_unmap_single() or dma_sync_single().
+ */
+static inline dma_addr_t
+dma_map_single(struct device *dev, void *cpu_addr, size_t size,
+	       enum dma_data_direction direction)
+{
+	dma_cache_sync(cpu_addr, size, direction);
+	return virt_to_bus(cpu_addr);
+}
+
+/**
+ * dma_unmap_single - unmap a single buffer previously mapped
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @handle: DMA address of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Unmap a single streaming mode DMA translation.  The handle and size
+ * must match what was provided in the previous dma_map_single() call.
+ * All other usages are undefined.
+ *
+ * After this call, reads by the CPU to the buffer are guaranteed to see
+ * whatever the device wrote there.
+ */
+static inline void
+dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+		 enum dma_data_direction direction)
+{
+
+}
+
+/**
+ * dma_map_page - map a portion of a page for streaming DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @page: page that buffer resides in
+ * @offset: offset into page for start of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Ensure that any data held in the cache is appropriately discarded
+ * or written back.
+ *
+ * The device owns this memory once this call has completed.  The CPU
+ * can regain ownership by calling dma_unmap_page() or dma_sync_single().
+ */
+static inline dma_addr_t
+dma_map_page(struct device *dev, struct page *page,
+	     unsigned long offset, size_t size,
+	     enum dma_data_direction direction)
+{
+	return dma_map_single(dev, page_address(page) + offset,
+			      size, direction);
+}
+
+/**
+ * dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @handle: DMA address of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Unmap a single streaming mode DMA translation.  The handle and size
+ * must match what was provided in the previous dma_map_single() call.
+ * All other usages are undefined.
+ *
+ * After this call, reads by the CPU to the buffer are guaranteed to see
+ * whatever the device wrote there.
+ */
+static inline void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+	       enum dma_data_direction direction)
+{
+	dma_unmap_single(dev, dma_address, size, direction);
+}
+
+/**
+ * dma_map_sg - map a set of SG buffers for streaming mode DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Map a set of buffers described by scatterlist in streaming
+ * mode for DMA.  This is the scatter-gather version of the
+ * above pci_map_single interface.  Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length.  They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ *       DMA address/length pairs than there are SG table elements.
+ *       (for example via virtual mapping capabilities)
+ *       The routine returns the number of addr/length pairs actually
+ *       used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+static inline int
+dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+	   enum dma_data_direction direction)
+{
+	int i;
+
+	for (i = 0; i < nents; i++) {
+		char *virt;
+
+		sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
+		virt = page_address(sg[i].page) + sg[i].offset;
+		dma_cache_sync(virt, sg[i].length, direction);
+	}
+
+	return nents;
+}
+
+/**
+ * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Unmap a set of streaming mode DMA translations.
+ * Again, CPU read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+static inline void
+dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+	     enum dma_data_direction direction)
+{
+
+}
+
+/**
+ * dma_sync_single_for_cpu
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @handle: DMA address of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Make physical memory consistent for a single streaming mode DMA
+ * translation after a transfer.
+ *
+ * If you perform a dma_map_single() but wish to interrogate the
+ * buffer using the cpu, yet do not wish to teardown the DMA mapping,
+ * you must call this function before doing so.  At the next point you
+ * give the DMA address back to the card, you must first perform a
+ * dma_sync_single_for_device, and then the device again owns the
+ * buffer.
+ */
+static inline void
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+			size_t size, enum dma_data_direction direction)
+{
+	dma_cache_sync(bus_to_virt(dma_handle), size, direction);
+}
+
+static inline void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+			   size_t size, enum dma_data_direction direction)
+{
+	dma_cache_sync(bus_to_virt(dma_handle), size, direction);
+}
+
+/**
+ * dma_sync_sg_for_cpu
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Make physical memory consistent for a set of streaming
+ * mode DMA translations after a transfer.
+ *
+ * The same as dma_sync_single_for_* but for a scatter-gather list,
+ * same rules and usage.
+ */
+static inline void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+		    int nents, enum dma_data_direction direction)
+{
+	int i;
+
+	for (i = 0; i < nents; i++) {
+		dma_cache_sync(page_address(sg[i].page) + sg[i].offset,
+			       sg[i].length, direction);
+	}
+}
+
+static inline void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+		       int nents, enum dma_data_direction direction)
+{
+	int i;
+
+	for (i = 0; i < nents; i++) {
+		dma_cache_sync(page_address(sg[i].page) + sg[i].offset,
+			       sg[i].length, direction);
+	}
+}
+
+/* Now for the API extensions over the pci_ one */
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+static inline int dma_is_consistent(dma_addr_t dma_addr)
+{
+	return 1;
+}
+
+static inline int dma_get_cache_alignment(void)
+{
+	return boot_cpu_data.dcache.linesz;
+}
+
+#endif /* __ASM_AVR32_DMA_MAPPING_H */
diff --git a/include/asm-avr32/dma.h b/include/asm-avr32/dma.h
new file mode 100644
index 000000000000..9e91205590ac
--- /dev/null
+++ b/include/asm-avr32/dma.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_AVR32_DMA_H
+#define __ASM_AVR32_DMA_H
+
+/* The maximum address that we can perform a DMA transfer to on this platform.
+ * Not really applicable to AVR32, but some functions need it. */
+#define MAX_DMA_ADDRESS		0xffffffff
+
+#endif /* __ASM_AVR32_DMA_H */
diff --git a/include/asm-avr32/elf.h b/include/asm-avr32/elf.h
new file mode 100644
index 000000000000..d334b4994d2d
--- /dev/null
+++ b/include/asm-avr32/elf.h
@@ -0,0 +1,110 @@
+#ifndef __ASM_AVR32_ELF_H
+#define __ASM_AVR32_ELF_H
+
+/* AVR32 relocation numbers */
+#define R_AVR32_NONE		0
+#define R_AVR32_32		1
+#define R_AVR32_16		2
+#define R_AVR32_8		3
+#define R_AVR32_32_PCREL	4
+#define R_AVR32_16_PCREL	5
+#define R_AVR32_8_PCREL		6
+#define R_AVR32_DIFF32		7
+#define R_AVR32_DIFF16		8
+#define R_AVR32_DIFF8		9
+#define R_AVR32_GOT32		10
+#define R_AVR32_GOT16		11
+#define R_AVR32_GOT8		12
+#define R_AVR32_21S		13
+#define R_AVR32_16U		14
+#define R_AVR32_16S		15
+#define R_AVR32_8S		16
+#define R_AVR32_8S_EXT		17
+#define R_AVR32_22H_PCREL	18
+#define R_AVR32_18W_PCREL	19
+#define R_AVR32_16B_PCREL	20
+#define R_AVR32_16N_PCREL	21
+#define R_AVR32_14UW_PCREL	22
+#define R_AVR32_11H_PCREL	23
+#define R_AVR32_10UW_PCREL	24
+#define R_AVR32_9H_PCREL	25
+#define R_AVR32_9UW_PCREL	26
+#define R_AVR32_HI16		27
+#define R_AVR32_LO16		28
+#define R_AVR32_GOTPC		29
+#define R_AVR32_GOTCALL		30
+#define R_AVR32_LDA_GOT		31
+#define R_AVR32_GOT21S		32
+#define R_AVR32_GOT18SW		33
+#define R_AVR32_GOT16S		34
+#define R_AVR32_GOT7UW		35
+#define R_AVR32_32_CPENT	36
+#define R_AVR32_CPCALL		37
+#define R_AVR32_16_CP		38
+#define R_AVR32_9W_CP		39
+#define R_AVR32_RELATIVE	40
+#define R_AVR32_GLOB_DAT	41
+#define R_AVR32_JMP_SLOT	42
+#define R_AVR32_ALIGN		43
+
+/*
+ * ELF register definitions..
+ */
+
+#include <asm/ptrace.h>
+#include <asm/user.h>
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof (struct pt_regs) / sizeof (elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct user_fpu_struct elf_fpregset_t;
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ( (x)->e_machine == EM_AVR32 )
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS	ELFCLASS32
+#ifdef __LITTLE_ENDIAN__
+#define ELF_DATA	ELFDATA2LSB
+#else
+#define ELF_DATA	ELFDATA2MSB
+#endif
+#define ELF_ARCH	EM_AVR32
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE	4096
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+   use of this is to invoke "./ld.so someprog" to test out a new version of
+   the loader.  We need to make sure that it is out of the way of the program
+   that it will "exec", and that there is sufficient room for the brk.  */
+
+#define ELF_ET_DYN_BASE         (2 * TASK_SIZE / 3)
+
+
+/* This yields a mask that user programs can use to figure out what
+   instruction set this CPU supports.  This could be done in user space,
+   but it's not easy, and we've already done it here.  */
+
+#define ELF_HWCAP	(0)
+
+/* This yields a string that ld.so will use to load implementation
+   specific libraries for optimization.  This is more specific in
+   intent than poking at uname or /proc/cpuinfo.
+
+   For the moment, we have only optimizations for the Intel generations,
+   but that could change... */
+
+#define ELF_PLATFORM  (NULL)
+
+#ifdef __KERNEL__
+#define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX_32BIT)
+#endif
+
+#endif /* __ASM_AVR32_ELF_H */
diff --git a/include/asm-avr32/emergency-restart.h b/include/asm-avr32/emergency-restart.h
new file mode 100644
index 000000000000..3e7e014776ba
--- /dev/null
+++ b/include/asm-avr32/emergency-restart.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_EMERGENCY_RESTART_H
+#define __ASM_AVR32_EMERGENCY_RESTART_H
+
+#include <asm-generic/emergency-restart.h>
+
+#endif /* __ASM_AVR32_EMERGENCY_RESTART_H */
diff --git a/include/asm-avr32/errno.h b/include/asm-avr32/errno.h
new file mode 100644
index 000000000000..558a7249f06d
--- /dev/null
+++ b/include/asm-avr32/errno.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_ERRNO_H
+#define __ASM_AVR32_ERRNO_H
+
+#include <asm-generic/errno.h>
+
+#endif /* __ASM_AVR32_ERRNO_H */
diff --git a/include/asm-avr32/fcntl.h b/include/asm-avr32/fcntl.h
new file mode 100644
index 000000000000..14c0c4402b11
--- /dev/null
+++ b/include/asm-avr32/fcntl.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_FCNTL_H
+#define __ASM_AVR32_FCNTL_H
+
+#include <asm-generic/fcntl.h>
+
+#endif /* __ASM_AVR32_FCNTL_H */
diff --git a/include/asm-avr32/futex.h b/include/asm-avr32/futex.h
new file mode 100644
index 000000000000..10419f14a68a
--- /dev/null
+++ b/include/asm-avr32/futex.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_FUTEX_H
+#define __ASM_AVR32_FUTEX_H
+
+#include <asm-generic/futex.h>
+
+#endif /* __ASM_AVR32_FUTEX_H */
diff --git a/include/asm-avr32/hardirq.h b/include/asm-avr32/hardirq.h
new file mode 100644
index 000000000000..267354356f60
--- /dev/null
+++ b/include/asm-avr32/hardirq.h
@@ -0,0 +1,34 @@
+#ifndef __ASM_AVR32_HARDIRQ_H
+#define __ASM_AVR32_HARDIRQ_H
+
+#include <linux/threads.h>
+#include <asm/irq.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/cache.h>
+
+/* entry.S is sensitive to the offsets of these fields */
+typedef struct {
+	unsigned int __softirq_pending;
+} ____cacheline_aligned irq_cpustat_t;
+
+void ack_bad_irq(unsigned int irq);
+
+/* Standard mappings for irq_cpustat_t above */
+#include <linux/irq_cpustat.h>
+
+#endif /* __ASSEMBLY__ */
+
+#define HARDIRQ_BITS	12
+
+/*
+ * The hardirq mask has to be large enough to have
+ * space for potentially all IRQ sources in the system
+ * nesting on a single CPU:
+ */
+#if (1 << HARDIRQ_BITS) < NR_IRQS
+# error HARDIRQ_BITS is too low!
+#endif
+
+#endif /* __ASM_AVR32_HARDIRQ_H */
diff --git a/include/asm-avr32/hw_irq.h b/include/asm-avr32/hw_irq.h
new file mode 100644
index 000000000000..218b0a6bfd1b
--- /dev/null
+++ b/include/asm-avr32/hw_irq.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_AVR32_HW_IRQ_H
+#define __ASM_AVR32_HW_IRQ_H
+
+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i)
+{
+	/* Nothing to do */
+}
+
+#endif /* __ASM_AVR32_HW_IRQ_H */
diff --git a/include/asm-avr32/intc.h b/include/asm-avr32/intc.h
new file mode 100644
index 000000000000..1ac9ca75e8fd
--- /dev/null
+++ b/include/asm-avr32/intc.h
@@ -0,0 +1,128 @@
+#ifndef __ASM_AVR32_INTC_H
+#define __ASM_AVR32_INTC_H
+
+#include <linux/sysdev.h>
+#include <linux/interrupt.h>
+
+struct irq_controller;
+struct irqaction;
+struct pt_regs;
+
+struct platform_device;
+
+/* Information about the internal interrupt controller */
+struct intc_device {
+	/* ioremapped address of configuration block */
+	void __iomem *regs;
+
+	/* the physical device */
+	struct platform_device *pdev;
+
+	/* Number of interrupt lines per group. */
+	unsigned int irqs_per_group;
+
+	/* The highest group ID + 1 */
+	unsigned int nr_groups;
+
+	/*
+	 * Bitfield indicating which groups are actually in use.  The
+	 * size of the array is
+	 * ceil(group_max / (8 * sizeof(unsigned int))).
+	 */
+	unsigned int group_mask[];
+};
+
+struct irq_controller_class {
+	/*
+	 * A short name identifying this kind of controller.
+	 */
+	const char *typename;
+	/*
+	 * Handle the IRQ.  Must do any necessary acking and masking.
+	 */
+	irqreturn_t (*handle)(int irq, void *dev_id, struct pt_regs *regs);
+	/*
+	 * Register a new IRQ handler.
+	 */
+	int (*setup)(struct irq_controller *ctrl, unsigned int irq,
+		     struct irqaction *action);
+	/*
+	 * Unregister a IRQ handler.
+	 */
+	void (*free)(struct irq_controller *ctrl, unsigned int irq,
+		     void *dev_id);
+	/*
+	 * Mask the IRQ in the interrupt controller.
+	 */
+	void (*mask)(struct irq_controller *ctrl, unsigned int irq);
+	/*
+	 * Unmask the IRQ in the interrupt controller.
+	 */
+	void (*unmask)(struct irq_controller *ctrl, unsigned int irq);
+	/*
+	 * Set the type of the IRQ. See below for possible types.
+	 * Return -EINVAL if a given type is not supported
+	 */
+	int (*set_type)(struct irq_controller *ctrl, unsigned int irq,
+			unsigned int type);
+	/*
+	 * Return the IRQ type currently set
+	 */
+	unsigned int (*get_type)(struct irq_controller *ctrl, unsigned int irq);
+};
+
+struct irq_controller {
+	struct irq_controller_class *class;
+	unsigned int irq_group;
+	unsigned int first_irq;
+	unsigned int nr_irqs;
+	struct list_head list;
+};
+
+struct intc_group_desc {
+	struct irq_controller *ctrl;
+	irqreturn_t (*handle)(int, void *, struct pt_regs *);
+	unsigned long flags;
+	void *dev_id;
+	const char *devname;
+};
+
+/*
+ * The internal interrupt controller.  Defined in board/part-specific
+ * devices.c.
+ * TODO: Should probably be defined per-cpu.
+ */
+extern struct intc_device intc;
+
+extern int request_internal_irq(unsigned int irq,
+				irqreturn_t (*handler)(int, void *, struct pt_regs *),
+				unsigned long irqflags,
+				const char *devname, void *dev_id);
+extern void free_internal_irq(unsigned int irq);
+
+/* Only used by time_init() */
+extern int setup_internal_irq(unsigned int irq, struct intc_group_desc *desc);
+
+/*
+ * Set interrupt priority for a given group. `group' can be found by
+ * using irq_to_group(irq). Priority can be from 0 (lowest) to 3
+ * (highest). Higher-priority interrupts will preempt lower-priority
+ * interrupts (unless interrupts are masked globally).
+ *
+ * This function does not check for conflicts within a group.
+ */
+extern int intc_set_priority(unsigned int group,
+			     unsigned int priority);
+
+/*
+ * Returns a bitmask of pending interrupts in a group.
+ */
+extern unsigned long intc_get_pending(unsigned int group);
+
+/*
+ * Register a new external interrupt controller.  Returns the first
+ * external IRQ number that is assigned to the new controller.
+ */
+extern int intc_register_controller(struct irq_controller *ctrl);
+
+#endif /* __ASM_AVR32_INTC_H */
diff --git a/include/asm-avr32/io.h b/include/asm-avr32/io.h
new file mode 100644
index 000000000000..2fc8f111dce9
--- /dev/null
+++ b/include/asm-avr32/io.h
@@ -0,0 +1,253 @@
+#ifndef __ASM_AVR32_IO_H
+#define __ASM_AVR32_IO_H
+
+#include <linux/string.h>
+
+#ifdef __KERNEL__
+
+#include <asm/addrspace.h>
+#include <asm/byteorder.h>
+
+/* virt_to_phys will only work when address is in P1 or P2 */
+static __inline__ unsigned long virt_to_phys(volatile void *address)
+{
+	return PHYSADDR(address);
+}
+
+static __inline__ void * phys_to_virt(unsigned long address)
+{
+	return (void *)P1SEGADDR(address);
+}
+
+#define cached_to_phys(addr)	((unsigned long)PHYSADDR(addr))
+#define uncached_to_phys(addr)	((unsigned long)PHYSADDR(addr))
+#define phys_to_cached(addr)	((void *)P1SEGADDR(addr))
+#define phys_to_uncached(addr)	((void *)P2SEGADDR(addr))
+
+/*
+ * Generic IO read/write.  These perform native-endian accesses.  Note
+ * that some architectures will want to re-define __raw_{read,write}w.
+ */
+extern void __raw_writesb(unsigned int addr, const void *data, int bytelen);
+extern void __raw_writesw(unsigned int addr, const void *data, int wordlen);
+extern void __raw_writesl(unsigned int addr, const void *data, int longlen);
+
+extern void __raw_readsb(unsigned int addr, void *data, int bytelen);
+extern void __raw_readsw(unsigned int addr, void *data, int wordlen);
+extern void __raw_readsl(unsigned int addr, void *data, int longlen);
+
+static inline void writeb(unsigned char b, volatile void __iomem *addr)
+{
+	*(volatile unsigned char __force *)addr = b;
+}
+static inline void writew(unsigned short b, volatile void __iomem *addr)
+{
+	*(volatile unsigned short __force *)addr = b;
+}
+static inline void writel(unsigned int b, volatile void __iomem *addr)
+{
+	*(volatile unsigned int __force *)addr = b;
+}
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+
+static inline unsigned char readb(const volatile void __iomem *addr)
+{
+	return *(const volatile unsigned char __force *)addr;
+}
+static inline unsigned short readw(const volatile void __iomem *addr)
+{
+	return *(const volatile unsigned short __force *)addr;
+}
+static inline unsigned int readl(const volatile void __iomem *addr)
+{
+	return *(const volatile unsigned int __force *)addr;
+}
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+
+#define writesb(p, d, l)	__raw_writesb((unsigned int)p, d, l)
+#define writesw(p, d, l)	__raw_writesw((unsigned int)p, d, l)
+#define writesl(p, d, l)	__raw_writesl((unsigned int)p, d, l)
+
+#define readsb(p, d, l)		__raw_readsb((unsigned int)p, d, l)
+#define readsw(p, d, l)		__raw_readsw((unsigned int)p, d, l)
+#define readsl(p, d, l)		__raw_readsl((unsigned int)p, d, l)
+
+/*
+ * These two are only here because ALSA _thinks_ it needs them...
+ */
+static inline void memcpy_fromio(void * to, const volatile void __iomem *from,
+				 unsigned long count)
+{
+	char *p = to;
+	while (count) {
+		count--;
+		*p = readb(from);
+		p++;
+		from++;
+	}
+}
+
+static inline void  memcpy_toio(volatile void __iomem *to, const void * from,
+				unsigned long count)
+{
+	const char *p = from;
+	while (count) {
+		count--;
+		writeb(*p, to);
+		p++;
+		to++;
+	}
+}
+
+static inline void memset_io(volatile void __iomem *addr, unsigned char val,
+			     unsigned long count)
+{
+	memset((void __force *)addr, val, count);
+}
+
+/*
+ * Bad read/write accesses...
+ */
+extern void __readwrite_bug(const char *fn);
+
+#define IO_SPACE_LIMIT	0xffffffff
+
+/* Convert I/O port address to virtual address */
+#define __io(p)		((void __iomem *)phys_to_uncached(p))
+
+/*
+ *  IO port access primitives
+ *  -------------------------
+ *
+ * The AVR32 doesn't have special IO access instructions; all IO is memory
+ * mapped. Note that these are defined to perform little endian accesses
+ * only. Their primary purpose is to access PCI and ISA peripherals.
+ *
+ * Note that for a big endian machine, this implies that the following
+ * big endian mode connectivity is in place.
+ *
+ * The machine specific io.h include defines __io to translate an "IO"
+ * address to a memory address.
+ *
+ * Note that we prevent GCC re-ordering or caching values in expressions
+ * by introducing sequence points into the in*() definitions.  Note that
+ * __raw_* do not guarantee this behaviour.
+ *
+ * The {in,out}[bwl] macros are for emulating x86-style PCI/ISA IO space.
+ */
+#define outb(v, p)		__raw_writeb(v, __io(p))
+#define outw(v, p)		__raw_writew(cpu_to_le16(v), __io(p))
+#define outl(v, p)		__raw_writel(cpu_to_le32(v), __io(p))
+
+#define inb(p)			__raw_readb(__io(p))
+#define inw(p)			le16_to_cpu(__raw_readw(__io(p)))
+#define inl(p)			le32_to_cpu(__raw_readl(__io(p)))
+
+static inline void __outsb(unsigned long port, void *addr, unsigned int count)
+{
+	while (count--) {
+		outb(*(u8 *)addr, port);
+		addr++;
+	}
+}
+
+static inline void __insb(unsigned long port, void *addr, unsigned int count)
+{
+	while (count--) {
+		*(u8 *)addr = inb(port);
+		addr++;
+	}
+}
+
+static inline void __outsw(unsigned long port, void *addr, unsigned int count)
+{
+	while (count--) {
+		outw(*(u16 *)addr, port);
+		addr += 2;
+	}
+}
+
+static inline void __insw(unsigned long port, void *addr, unsigned int count)
+{
+	while (count--) {
+		*(u16 *)addr = inw(port);
+		addr += 2;
+	}
+}
+
+static inline void __outsl(unsigned long port, void *addr, unsigned int count)
+{
+	while (count--) {
+		outl(*(u32 *)addr, port);
+		addr += 4;
+	}
+}
+
+static inline void __insl(unsigned long port, void *addr, unsigned int count)
+{
+	while (count--) {
+		*(u32 *)addr = inl(port);
+		addr += 4;
+	}
+}
+
+#define outsb(port, addr, count)	__outsb(port, addr, count)
+#define insb(port, addr, count)		__insb(port, addr, count)
+#define outsw(port, addr, count)	__outsw(port, addr, count)
+#define insw(port, addr, count)		__insw(port, addr, count)
+#define outsl(port, addr, count)	__outsl(port, addr, count)
+#define insl(port, addr, count)		__insl(port, addr, count)
+
+extern void __iomem *__ioremap(unsigned long offset, size_t size,
+			       unsigned long flags);
+extern void __iounmap(void __iomem *addr);
+
+/*
+ * ioremap	-   map bus memory into CPU space
+ * @offset	bus address of the memory
+ * @size	size of the resource to map
+ *
+ * ioremap performs a platform specific sequence of operations to make
+ * bus memory CPU accessible via the readb/.../writel functions and
+ * the other mmio helpers. The returned address is not guaranteed to
+ * be usable directly as a virtual address.
+ */
+#define ioremap(offset, size)			\
+	__ioremap((offset), (size), 0)
+
+#define iounmap(addr)				\
+	__iounmap(addr)
+
+#define cached(addr) P1SEGADDR(addr)
+#define uncached(addr) P2SEGADDR(addr)
+
+#define virt_to_bus virt_to_phys
+#define bus_to_virt phys_to_virt
+#define page_to_bus page_to_phys
+#define bus_to_page phys_to_page
+
+#define dma_cache_wback_inv(_start, _size)	\
+	flush_dcache_region(_start, _size)
+#define dma_cache_inv(_start, _size)		\
+	invalidate_dcache_region(_start, _size)
+#define dma_cache_wback(_start, _size)		\
+	clean_dcache_region(_start, _size)
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)    __va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p)   p
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_AVR32_IO_H */
diff --git a/include/asm-avr32/ioctl.h b/include/asm-avr32/ioctl.h
new file mode 100644
index 000000000000..c8472c1398ef
--- /dev/null
+++ b/include/asm-avr32/ioctl.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_IOCTL_H
+#define __ASM_AVR32_IOCTL_H
+
+#include <asm-generic/ioctl.h>
+
+#endif /* __ASM_AVR32_IOCTL_H */
diff --git a/include/asm-avr32/ioctls.h b/include/asm-avr32/ioctls.h
new file mode 100644
index 000000000000..0500426b7186
--- /dev/null
+++ b/include/asm-avr32/ioctls.h
@@ -0,0 +1,83 @@
+#ifndef __ASM_AVR32_IOCTLS_H
+#define __ASM_AVR32_IOCTLS_H
+
+#include <asm/ioctl.h>
+
+/* 0x54 is just a magic number to make these relatively unique ('T') */
+
+#define TCGETS		0x5401
+#define TCSETS		0x5402 /* Clashes with SNDCTL_TMR_START sound ioctl */
+#define TCSETSW		0x5403
+#define TCSETSF		0x5404
+#define TCGETA		0x5405
+#define TCSETA		0x5406
+#define TCSETAW		0x5407
+#define TCSETAF		0x5408
+#define TCSBRK		0x5409
+#define TCXONC		0x540A
+#define TCFLSH		0x540B
+#define TIOCEXCL	0x540C
+#define TIOCNXCL	0x540D
+#define TIOCSCTTY	0x540E
+#define TIOCGPGRP	0x540F
+#define TIOCSPGRP	0x5410
+#define TIOCOUTQ	0x5411
+#define TIOCSTI		0x5412
+#define TIOCGWINSZ	0x5413
+#define TIOCSWINSZ	0x5414
+#define TIOCMGET	0x5415
+#define TIOCMBIS	0x5416
+#define TIOCMBIC	0x5417
+#define TIOCMSET	0x5418
+#define TIOCGSOFTCAR	0x5419
+#define TIOCSSOFTCAR	0x541A
+#define FIONREAD	0x541B
+#define TIOCINQ		FIONREAD
+#define TIOCLINUX	0x541C
+#define TIOCCONS	0x541D
+#define TIOCGSERIAL	0x541E
+#define TIOCSSERIAL	0x541F
+#define TIOCPKT		0x5420
+#define FIONBIO		0x5421
+#define TIOCNOTTY	0x5422
+#define TIOCSETD	0x5423
+#define TIOCGETD	0x5424
+#define TCSBRKP		0x5425	/* Needed for POSIX tcsendbreak() */
+/* #define TIOCTTYGSTRUCT 0x5426 - Former debugging-only ioctl */
+#define TIOCSBRK	0x5427  /* BSD compatibility */
+#define TIOCCBRK	0x5428  /* BSD compatibility */
+#define TIOCGSID	0x5429  /* Return the session ID of FD */
+#define TIOCGPTN	_IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+#define TIOCSPTLCK	_IOW('T',0x31, int)  /* Lock/unlock Pty */
+
+#define FIONCLEX	0x5450
+#define FIOCLEX		0x5451
+#define FIOASYNC	0x5452
+#define TIOCSERCONFIG	0x5453
+#define TIOCSERGWILD	0x5454
+#define TIOCSERSWILD	0x5455
+#define TIOCGLCKTRMIOS	0x5456
+#define TIOCSLCKTRMIOS	0x5457
+#define TIOCSERGSTRUCT	0x5458 /* For debugging only */
+#define TIOCSERGETLSR   0x5459 /* Get line status register */
+#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
+#define TIOCSERSETMULTI 0x545B /* Set multiport config */
+
+#define TIOCMIWAIT	0x545C	/* wait for a change on serial input line(s) */
+#define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
+#define TIOCGHAYESESP   0x545E  /* Get Hayes ESP configuration */
+#define TIOCSHAYESESP   0x545F  /* Set Hayes ESP configuration */
+#define FIOQSIZE	0x5460
+
+/* Used for packet mode */
+#define TIOCPKT_DATA		 0
+#define TIOCPKT_FLUSHREAD	 1
+#define TIOCPKT_FLUSHWRITE	 2
+#define TIOCPKT_STOP		 4
+#define TIOCPKT_START		 8
+#define TIOCPKT_NOSTOP		16
+#define TIOCPKT_DOSTOP		32
+
+#define TIOCSER_TEMT    0x01	/* Transmitter physically empty */
+
+#endif /* __ASM_AVR32_IOCTLS_H */
diff --git a/include/asm-avr32/ipcbuf.h b/include/asm-avr32/ipcbuf.h
new file mode 100644
index 000000000000..1552c9698f5e
--- /dev/null
+++ b/include/asm-avr32/ipcbuf.h
@@ -0,0 +1,29 @@
+#ifndef __ASM_AVR32_IPCBUF_H
+#define __ASM_AVR32_IPCBUF_H
+
+/*
+* The user_ipc_perm structure for AVR32 architecture.
+* Note extra padding because this structure is passed back and forth
+* between kernel and user space.
+*
+* Pad space is left for:
+* - 32-bit mode_t and seq
+* - 2 miscellaneous 32-bit values
+*/
+
+struct ipc64_perm
+{
+        __kernel_key_t          key;
+        __kernel_uid32_t        uid;
+        __kernel_gid32_t        gid;
+        __kernel_uid32_t        cuid;
+        __kernel_gid32_t        cgid;
+        __kernel_mode_t         mode;
+        unsigned short          __pad1;
+        unsigned short          seq;
+        unsigned short          __pad2;
+        unsigned long           __unused1;
+        unsigned long           __unused2;
+};
+
+#endif /* __ASM_AVR32_IPCBUF_H */
diff --git a/include/asm-avr32/irq.h b/include/asm-avr32/irq.h
new file mode 100644
index 000000000000..f7e725707dd7
--- /dev/null
+++ b/include/asm-avr32/irq.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_AVR32_IRQ_H
+#define __ASM_AVR32_IRQ_H
+
+#define NR_INTERNAL_IRQS	64
+#define NR_EXTERNAL_IRQS	64
+#define NR_IRQS			(NR_INTERNAL_IRQS + NR_EXTERNAL_IRQS)
+
+#define irq_canonicalize(i)	(i)
+
+#endif /* __ASM_AVR32_IOCTLS_H */
diff --git a/include/asm-avr32/irqflags.h b/include/asm-avr32/irqflags.h
new file mode 100644
index 000000000000..93570daac38a
--- /dev/null
+++ b/include/asm-avr32/irqflags.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_IRQFLAGS_H
+#define __ASM_AVR32_IRQFLAGS_H
+
+#include <asm/sysreg.h>
+
+static inline unsigned long __raw_local_save_flags(void)
+{
+	return sysreg_read(SR);
+}
+
+#define raw_local_save_flags(x)					\
+	do { (x) = __raw_local_save_flags(); } while (0)
+
+/*
+ * This will restore ALL status register flags, not only the interrupt
+ * mask flag.
+ *
+ * The empty asm statement informs the compiler of this fact while
+ * also serving as a barrier.
+ */
+static inline void raw_local_irq_restore(unsigned long flags)
+{
+	sysreg_write(SR, flags);
+	asm volatile("" : : : "memory", "cc");
+}
+
+static inline void raw_local_irq_disable(void)
+{
+	asm volatile("ssrf %0" : : "n"(SYSREG_GM_OFFSET) : "memory");
+}
+
+static inline void raw_local_irq_enable(void)
+{
+	asm volatile("csrf %0" : : "n"(SYSREG_GM_OFFSET) : "memory");
+}
+
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & SYSREG_BIT(GM)) != 0;
+}
+
+static inline int raw_irqs_disabled(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	return raw_irqs_disabled_flags(flags);
+}
+
+static inline unsigned long __raw_local_irq_save(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	raw_local_irq_disable();
+
+	return flags;
+}
+
+#define raw_local_irq_save(flags)				\
+	do { (flags) = __raw_local_irq_save(); } while (0)
+
+#endif /* __ASM_AVR32_IRQFLAGS_H */
diff --git a/include/asm-avr32/kdebug.h b/include/asm-avr32/kdebug.h
new file mode 100644
index 000000000000..f583b643ffb2
--- /dev/null
+++ b/include/asm-avr32/kdebug.h
@@ -0,0 +1,38 @@
+#ifndef __ASM_AVR32_KDEBUG_H
+#define __ASM_AVR32_KDEBUG_H
+
+#include <linux/notifier.h>
+
+struct pt_regs;
+
+struct die_args {
+	struct pt_regs *regs;
+	int trapnr;
+};
+
+int register_die_notifier(struct notifier_block *nb);
+int unregister_die_notifier(struct notifier_block *nb);
+int register_page_fault_notifier(struct notifier_block *nb);
+int unregister_page_fault_notifier(struct notifier_block *nb);
+extern struct atomic_notifier_head avr32_die_chain;
+
+/* Grossly misnamed. */
+enum die_val {
+	DIE_FAULT,
+	DIE_BREAKPOINT,
+	DIE_SSTEP,
+	DIE_PAGE_FAULT,
+};
+
+static inline int notify_die(enum die_val val, struct pt_regs *regs,
+			     int trap, int sig)
+{
+	struct die_args args = {
+		.regs = regs,
+		.trapnr = trap,
+	};
+
+	return atomic_notifier_call_chain(&avr32_die_chain, val, &args);
+}
+
+#endif /* __ASM_AVR32_KDEBUG_H */
diff --git a/include/asm-avr32/kmap_types.h b/include/asm-avr32/kmap_types.h
new file mode 100644
index 000000000000..b7f5c6870107
--- /dev/null
+++ b/include/asm-avr32/kmap_types.h
@@ -0,0 +1,30 @@
+#ifndef __ASM_AVR32_KMAP_TYPES_H
+#define __ASM_AVR32_KMAP_TYPES_H
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+# define D(n) __KM_FENCE_##n ,
+#else
+# define D(n)
+#endif
+
+enum km_type {
+D(0)	KM_BOUNCE_READ,
+D(1)	KM_SKB_SUNRPC_DATA,
+D(2)	KM_SKB_DATA_SOFTIRQ,
+D(3)	KM_USER0,
+D(4)	KM_USER1,
+D(5)	KM_BIO_SRC_IRQ,
+D(6)	KM_BIO_DST_IRQ,
+D(7)	KM_PTE0,
+D(8)	KM_PTE1,
+D(9)	KM_PTE2,
+D(10)	KM_IRQ0,
+D(11)	KM_IRQ1,
+D(12)	KM_SOFTIRQ0,
+D(13)	KM_SOFTIRQ1,
+D(14)	KM_TYPE_NR
+};
+
+#undef D
+
+#endif /* __ASM_AVR32_KMAP_TYPES_H */
diff --git a/include/asm-avr32/kprobes.h b/include/asm-avr32/kprobes.h
new file mode 100644
index 000000000000..09a5cbe2f896
--- /dev/null
+++ b/include/asm-avr32/kprobes.h
@@ -0,0 +1,34 @@
+/*
+ * Kernel Probes (KProbes)
+ *
+ * Copyright (C) 2005-2006 Atmel Corporation
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_KPROBES_H
+#define __ASM_AVR32_KPROBES_H
+
+#include <linux/types.h>
+
+typedef u16	kprobe_opcode_t;
+#define BREAKPOINT_INSTRUCTION	0xd673	/* breakpoint */
+#define MAX_INSN_SIZE		2
+
+#define ARCH_INACTIVE_KPROBE_COUNT 1
+
+#define arch_remove_kprobe(p)	do { } while (0)
+
+/* Architecture specific copy of original instruction */
+struct arch_specific_insn {
+	kprobe_opcode_t	insn[MAX_INSN_SIZE];
+};
+
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+				    unsigned long val, void *data);
+
+#define flush_insn_slot(p)	do { } while (0)
+
+#endif /* __ASM_AVR32_KPROBES_H */
diff --git a/include/asm-avr32/linkage.h b/include/asm-avr32/linkage.h
new file mode 100644
index 000000000000..f7b285e910d4
--- /dev/null
+++ b/include/asm-avr32/linkage.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#define __ALIGN .balign 2
+#define __ALIGN_STR ".balign 2"
+
+#endif /* __ASM_LINKAGE_H */
diff --git a/include/asm-avr32/local.h b/include/asm-avr32/local.h
new file mode 100644
index 000000000000..1c1619694da3
--- /dev/null
+++ b/include/asm-avr32/local.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_LOCAL_H
+#define __ASM_AVR32_LOCAL_H
+
+#include <asm-generic/local.h>
+
+#endif /* __ASM_AVR32_LOCAL_H */
diff --git a/include/asm-avr32/mach/serial_at91.h b/include/asm-avr32/mach/serial_at91.h
new file mode 100644
index 000000000000..1290bb32802d
--- /dev/null
+++ b/include/asm-avr32/mach/serial_at91.h
@@ -0,0 +1,33 @@
+/*
+ *  linux/include/asm-arm/mach/serial_at91.h
+ *
+ *  Based on serial_sa1100.h  by Nicolas Pitre
+ *
+ *  Copyright (C) 2002 ATMEL Rousset
+ *
+ *  Low level machine dependent UART functions.
+ */
+
+struct uart_port;
+
+/*
+ * This is a temporary structure for registering these
+ * functions; it is intended to be discarded after boot.
+ */
+struct at91_port_fns {
+	void	(*set_mctrl)(struct uart_port *, u_int);
+	u_int	(*get_mctrl)(struct uart_port *);
+	void	(*enable_ms)(struct uart_port *);
+	void	(*pm)(struct uart_port *, u_int, u_int);
+	int	(*set_wake)(struct uart_port *, u_int);
+	int	(*open)(struct uart_port *);
+	void	(*close)(struct uart_port *);
+};
+
+#if defined(CONFIG_SERIAL_AT91)
+void at91_register_uart_fns(struct at91_port_fns *fns);
+#else
+#define at91_register_uart_fns(fns) do { } while (0)
+#endif
+
+
diff --git a/include/asm-avr32/mman.h b/include/asm-avr32/mman.h
new file mode 100644
index 000000000000..648f91e7187a
--- /dev/null
+++ b/include/asm-avr32/mman.h
@@ -0,0 +1,17 @@
+#ifndef __ASM_AVR32_MMAN_H__
+#define __ASM_AVR32_MMAN_H__
+
+#include <asm-generic/mman.h>
+
+#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
+#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
+#define MAP_LOCKED	0x2000		/* pages are locked */
+#define MAP_NORESERVE	0x4000		/* don't check for reservations */
+#define MAP_POPULATE	0x8000		/* populate (prefault) page tables */
+#define MAP_NONBLOCK	0x10000		/* do not block on IO */
+
+#define MCL_CURRENT	1		/* lock all current mappings */
+#define MCL_FUTURE	2		/* lock all future mappings */
+
+#endif /* __ASM_AVR32_MMAN_H__ */
diff --git a/include/asm-avr32/mmu.h b/include/asm-avr32/mmu.h
new file mode 100644
index 000000000000..60c2d2650d32
--- /dev/null
+++ b/include/asm-avr32/mmu.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_AVR32_MMU_H
+#define __ASM_AVR32_MMU_H
+
+/* Default "unsigned long" context */
+typedef unsigned long mm_context_t;
+
+#define MMU_ITLB_ENTRIES	64
+#define MMU_DTLB_ENTRIES	64
+
+#endif /* __ASM_AVR32_MMU_H */
diff --git a/include/asm-avr32/mmu_context.h b/include/asm-avr32/mmu_context.h
new file mode 100644
index 000000000000..31add1ae8089
--- /dev/null
+++ b/include/asm-avr32/mmu_context.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * ASID handling taken from SH implementation.
+ *   Copyright (C) 1999 Niibe Yutaka
+ *   Copyright (C) 2003 Paul Mundt
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_MMU_CONTEXT_H
+#define __ASM_AVR32_MMU_CONTEXT_H
+
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm/sysreg.h>
+
+/*
+ * The MMU "context" consists of two things:
+ *    (a) TLB cache version
+ *    (b) ASID (Address Space IDentifier)
+ */
+#define MMU_CONTEXT_ASID_MASK		0x000000ff
+#define MMU_CONTEXT_VERSION_MASK	0xffffff00
+#define MMU_CONTEXT_FIRST_VERSION       0x00000100
+#define NO_CONTEXT			0
+
+#define MMU_NO_ASID			0x100
+
+/* Virtual Page Number mask */
+#define MMU_VPN_MASK	0xfffff000
+
+/* Cache of MMU context last used */
+extern unsigned long mmu_context_cache;
+
+/*
+ * Get MMU context if needed
+ */
+static inline void
+get_mmu_context(struct mm_struct *mm)
+{
+	unsigned long mc = mmu_context_cache;
+
+	if (((mm->context ^ mc) & MMU_CONTEXT_VERSION_MASK) == 0)
+		/* It's up to date, do nothing */
+		return;
+
+	/* It's old, we need to get new context with new version */
+	mc = ++mmu_context_cache;
+	if (!(mc & MMU_CONTEXT_ASID_MASK)) {
+		/*
+		 * We have exhausted all ASIDs of this version.
+		 * Flush the TLB and start new cycle.
+		 */
+		flush_tlb_all();
+		/*
+		 * Fix version. Note that we avoid version #0
+		 * to distinguish NO_CONTEXT.
+		 */
+		if (!mc)
+			mmu_context_cache = mc = MMU_CONTEXT_FIRST_VERSION;
+	}
+	mm->context = mc;
+}
+
+/*
+ * Initialize the context related info for a new mm_struct
+ * instance.
+ */
+static inline int init_new_context(struct task_struct *tsk,
+				       struct mm_struct *mm)
+{
+	mm->context = NO_CONTEXT;
+	return 0;
+}
+
+/*
+ * Destroy context related info for an mm_struct that is about
+ * to be put to rest.
+ */
+static inline void destroy_context(struct mm_struct *mm)
+{
+	/* Do nothing */
+}
+
+static inline void set_asid(unsigned long asid)
+{
+	/* XXX: We're destroying TLBEHI[8:31] */
+	sysreg_write(TLBEHI, asid & MMU_CONTEXT_ASID_MASK);
+	cpu_sync_pipeline();
+}
+
+static inline unsigned long get_asid(void)
+{
+	unsigned long asid;
+
+	asid = sysreg_read(TLBEHI);
+	return asid & MMU_CONTEXT_ASID_MASK;
+}
+
+static inline void activate_context(struct mm_struct *mm)
+{
+	get_mmu_context(mm);
+	set_asid(mm->context & MMU_CONTEXT_ASID_MASK);
+}
+
+static inline void switch_mm(struct mm_struct *prev,
+				 struct mm_struct *next,
+				 struct task_struct *tsk)
+{
+	if (likely(prev != next)) {
+		unsigned long __pgdir = (unsigned long)next->pgd;
+
+		sysreg_write(PTBR, __pgdir);
+		activate_context(next);
+	}
+}
+
+#define deactivate_mm(tsk,mm) do { } while(0)
+
+#define activate_mm(prev, next) switch_mm((prev), (next), NULL)
+
+static inline void
+enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+}
+
+
+static inline void enable_mmu(void)
+{
+	sysreg_write(MMUCR, (SYSREG_BIT(MMUCR_S)
+			     | SYSREG_BIT(E)
+			     | SYSREG_BIT(MMUCR_I)));
+	nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop();
+
+	if (mmu_context_cache == NO_CONTEXT)
+		mmu_context_cache = MMU_CONTEXT_FIRST_VERSION;
+
+	set_asid(mmu_context_cache & MMU_CONTEXT_ASID_MASK);
+}
+
+static inline void disable_mmu(void)
+{
+	sysreg_write(MMUCR, SYSREG_BIT(MMUCR_S));
+}
+
+#endif /* __ASM_AVR32_MMU_CONTEXT_H */
diff --git a/include/asm-avr32/module.h b/include/asm-avr32/module.h
new file mode 100644
index 000000000000..451444538a1b
--- /dev/null
+++ b/include/asm-avr32/module.h
@@ -0,0 +1,28 @@
+#ifndef __ASM_AVR32_MODULE_H
+#define __ASM_AVR32_MODULE_H
+
+struct mod_arch_syminfo {
+	unsigned long got_offset;
+	int got_initialized;
+};
+
+struct mod_arch_specific {
+	/* Starting offset of got in the module core memory. */
+	unsigned long got_offset;
+	/* Size of the got. */
+	unsigned long got_size;
+	/* Number of symbols in syminfo. */
+	int nsyms;
+	/* Additional symbol information (got offsets). */
+	struct mod_arch_syminfo *syminfo;
+};
+
+#define Elf_Shdr		Elf32_Shdr
+#define Elf_Sym			Elf32_Sym
+#define Elf_Ehdr		Elf32_Ehdr
+
+#define MODULE_PROC_FAMILY "AVR32v1"
+
+#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
+
+#endif /* __ASM_AVR32_MODULE_H */
diff --git a/include/asm-avr32/msgbuf.h b/include/asm-avr32/msgbuf.h
new file mode 100644
index 000000000000..ac18bc4da7f7
--- /dev/null
+++ b/include/asm-avr32/msgbuf.h
@@ -0,0 +1,31 @@
+#ifndef __ASM_AVR32_MSGBUF_H
+#define __ASM_AVR32_MSGBUF_H
+
+/*
+ * The msqid64_ds structure for i386 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct msqid64_ds {
+	struct ipc64_perm msg_perm;
+	__kernel_time_t msg_stime;	/* last msgsnd time */
+	unsigned long	__unused1;
+	__kernel_time_t msg_rtime;	/* last msgrcv time */
+	unsigned long	__unused2;
+	__kernel_time_t msg_ctime;	/* last change time */
+	unsigned long	__unused3;
+	unsigned long  msg_cbytes;	/* current number of bytes on queue */
+	unsigned long  msg_qnum;	/* number of messages in queue */
+	unsigned long  msg_qbytes;	/* max number of bytes on queue */
+	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
+	__kernel_pid_t msg_lrpid;	/* last receive pid */
+	unsigned long  __unused4;
+	unsigned long  __unused5;
+};
+
+#endif /* __ASM_AVR32_MSGBUF_H */
diff --git a/include/asm-avr32/mutex.h b/include/asm-avr32/mutex.h
new file mode 100644
index 000000000000..458c1f7fbc18
--- /dev/null
+++ b/include/asm-avr32/mutex.h
@@ -0,0 +1,9 @@
+/*
+ * Pull in the generic implementation for the mutex fastpath.
+ *
+ * TODO: implement optimized primitives instead, or leave the generic
+ * implementation in place, or pick the atomic_xchg() based generic
+ * implementation. (see asm-generic/mutex-xchg.h for details)
+ */
+
+#include <asm-generic/mutex-dec.h>
diff --git a/include/asm-avr32/namei.h b/include/asm-avr32/namei.h
new file mode 100644
index 000000000000..f0a26de06cab
--- /dev/null
+++ b/include/asm-avr32/namei.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_AVR32_NAMEI_H
+#define __ASM_AVR32_NAMEI_H
+
+/* This dummy routine may be changed to something useful */
+#define __emul_prefix() NULL
+
+#endif /* __ASM_AVR32_NAMEI_H */
diff --git a/include/asm-avr32/numnodes.h b/include/asm-avr32/numnodes.h
new file mode 100644
index 000000000000..0b864d7ce330
--- /dev/null
+++ b/include/asm-avr32/numnodes.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_AVR32_NUMNODES_H
+#define __ASM_AVR32_NUMNODES_H
+
+/* Max 4 nodes */
+#define NODES_SHIFT	2
+
+#endif /* __ASM_AVR32_NUMNODES_H */
diff --git a/include/asm-avr32/ocd.h b/include/asm-avr32/ocd.h
new file mode 100644
index 000000000000..46f73180a127
--- /dev/null
+++ b/include/asm-avr32/ocd.h
@@ -0,0 +1,78 @@
+/*
+ * AVR32 OCD Registers
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_OCD_H
+#define __ASM_AVR32_OCD_H
+
+/* Debug Registers */
+#define DBGREG_DID		  0
+#define DBGREG_DC		  8
+#define DBGREG_DS		 16
+#define DBGREG_RWCS		 28
+#define DBGREG_RWA		 36
+#define DBGREG_RWD		 40
+#define DBGREG_WT		 44
+#define DBGREG_DTC		 52
+#define DBGREG_DTSA0		 56
+#define DBGREG_DTSA1		 60
+#define DBGREG_DTEA0		 72
+#define DBGREG_DTEA1		 76
+#define DBGREG_BWC0A		 88
+#define DBGREG_BWC0B		 92
+#define DBGREG_BWC1A		 96
+#define DBGREG_BWC1B		100
+#define DBGREG_BWC2A		104
+#define DBGREG_BWC2B		108
+#define DBGREG_BWC3A		112
+#define DBGREG_BWC3B		116
+#define DBGREG_BWA0A		120
+#define DBGREG_BWA0B		124
+#define DBGREG_BWA1A		128
+#define DBGREG_BWA1B		132
+#define DBGREG_BWA2A		136
+#define DBGREG_BWA2B		140
+#define DBGREG_BWA3A		144
+#define DBGREG_BWA3B		148
+#define DBGREG_BWD3A		153
+#define DBGREG_BWD3B		156
+
+#define DBGREG_PID		284
+
+#define SABAH_OCD		0x01
+#define SABAH_ICACHE		0x02
+#define SABAH_MEM_CACHED	0x04
+#define SABAH_MEM_UNCACHED	0x05
+
+/* Fields in the Development Control register */
+#define DC_SS_BIT		8
+
+#define DC_SS			(1 <<  DC_SS_BIT)
+#define DC_DBE			(1 << 13)
+#define DC_RID			(1 << 27)
+#define DC_ORP			(1 << 28)
+#define DC_MM			(1 << 29)
+#define DC_RES			(1 << 30)
+
+/* Fields in the Development Status register */
+#define DS_SSS			(1 <<  0)
+#define DS_SWB			(1 <<  1)
+#define DS_HWB			(1 <<  2)
+#define DS_BP_SHIFT		8
+#define DS_BP_MASK		(0xff << DS_BP_SHIFT)
+
+#define __mfdr(addr)							\
+({									\
+	register unsigned long value;					\
+	asm volatile("mfdr	%0, %1" : "=r"(value) : "i"(addr));	\
+	value;								\
+})
+#define __mtdr(addr, value)						\
+	asm volatile("mtdr	%0, %1" : : "i"(addr), "r"(value))
+
+#endif /* __ASM_AVR32_OCD_H */
diff --git a/include/asm-avr32/page.h b/include/asm-avr32/page.h
new file mode 100644
index 000000000000..0f630b3e9932
--- /dev/null
+++ b/include/asm-avr32/page.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PAGE_H
+#define __ASM_AVR32_PAGE_H
+
+#ifdef __KERNEL__
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT	12
+#ifdef __ASSEMBLY__
+#define PAGE_SIZE	(1 << PAGE_SHIFT)
+#else
+#define PAGE_SIZE	(1UL << PAGE_SHIFT)
+#endif
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+#define PTE_MASK	PAGE_MASK
+
+#ifndef __ASSEMBLY__
+
+#include <asm/addrspace.h>
+
+extern void clear_page(void *to);
+extern void copy_page(void *to, void *from);
+
+#define clear_user_page(page, vaddr, pg)	clear_page(page)
+#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+
+#define pte_val(x)		((x).pte)
+#define pgd_val(x)		((x).pgd)
+#define pgprot_val(x)		((x).pgprot)
+
+#define __pte(x)		((pte_t) { (x) })
+#define __pgd(x)		((pgd_t) { (x) })
+#define __pgprot(x)		((pgprot_t) { (x) })
+
+/* FIXME: These should be removed soon */
+extern unsigned long memory_start, memory_end;
+
+/* Pure 2^n version of get_order */
+static inline int get_order(unsigned long size)
+{
+	unsigned lz;
+
+	size = (size - 1) >> PAGE_SHIFT;
+	asm("clz %0, %1" : "=r"(lz) : "r"(size));
+	return 32 - lz;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+/* Align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)	(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
+
+/*
+ * The hardware maps the virtual addresses 0x80000000 -> 0x9fffffff
+ * permanently to the physical addresses 0x00000000 -> 0x1fffffff when
+ * segmentation is enabled. We want to make use of this in order to
+ * minimize TLB pressure.
+ */
+#define PAGE_OFFSET		(0x80000000UL)
+
+/*
+ * ALSA uses virt_to_page() on DMA pages, which I'm not entirely sure
+ * is a good idea. Anyway, we can't simply subtract PAGE_OFFSET here
+ * in that case, so we'll have to mask out the three most significant
+ * bits of the address instead...
+ *
+ * What's the difference between __pa() and virt_to_phys() anyway?
+ */
+#define __pa(x)		PHYSADDR(x)
+#define __va(x)		((void *)(P1SEGADDR(x)))
+
+#define MAP_NR(addr)	(((unsigned long)(addr) - PAGE_OFFSET) >> PAGE_SHIFT)
+
+#define phys_to_page(phys)	(pfn_to_page(phys >> PAGE_SHIFT))
+#define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
+
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+
+#define PHYS_PFN_OFFSET		(CONFIG_PHYS_OFFSET >> PAGE_SHIFT)
+
+#define pfn_to_page(pfn)	(mem_map + ((pfn) - PHYS_PFN_OFFSET))
+#define page_to_pfn(page)	((unsigned long)((page) - mem_map) + PHYS_PFN_OFFSET)
+#define pfn_valid(pfn)		((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr))
+#endif /* CONFIG_NEED_MULTIPLE_NODES */
+
+#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE |	\
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+/*
+ * Memory above this physical address will be considered highmem.
+ */
+#define HIGHMEM_START		0x20000000UL
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_AVR32_PAGE_H */
diff --git a/include/asm-avr32/param.h b/include/asm-avr32/param.h
new file mode 100644
index 000000000000..34bc8d4c3b29
--- /dev/null
+++ b/include/asm-avr32/param.h
@@ -0,0 +1,23 @@
+#ifndef __ASM_AVR32_PARAM_H
+#define __ASM_AVR32_PARAM_H
+
+#ifdef __KERNEL__
+# define HZ		CONFIG_HZ
+# define USER_HZ	100		/* User interfaces are in "ticks" */
+# define CLOCKS_PER_SEC	(USER_HZ)	/* frequency at which times() counts */
+#endif
+
+#ifndef HZ
+# define HZ		100
+#endif
+
+/* TODO: Should be configurable */
+#define EXEC_PAGESIZE	4096
+
+#ifndef NOGROUP
+# define NOGROUP	(-1)
+#endif
+
+#define MAXHOSTNAMELEN	64
+
+#endif /* __ASM_AVR32_PARAM_H */
diff --git a/include/asm-avr32/pci.h b/include/asm-avr32/pci.h
new file mode 100644
index 000000000000..0f5f134b896a
--- /dev/null
+++ b/include/asm-avr32/pci.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_AVR32_PCI_H__
+#define __ASM_AVR32_PCI_H__
+
+/* We don't support PCI yet, but some drivers require this file anyway */
+
+#define PCI_DMA_BUS_IS_PHYS	(1)
+
+#endif /* __ASM_AVR32_PCI_H__ */
diff --git a/include/asm-avr32/percpu.h b/include/asm-avr32/percpu.h
new file mode 100644
index 000000000000..69227b4cd0d4
--- /dev/null
+++ b/include/asm-avr32/percpu.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_PERCPU_H
+#define __ASM_AVR32_PERCPU_H
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ASM_AVR32_PERCPU_H */
diff --git a/include/asm-avr32/pgalloc.h b/include/asm-avr32/pgalloc.h
new file mode 100644
index 000000000000..7492cfb92ced
--- /dev/null
+++ b/include/asm-avr32/pgalloc.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PGALLOC_H
+#define __ASM_AVR32_PGALLOC_H
+
+#include <asm/processor.h>
+#include <linux/threads.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#define pmd_populate_kernel(mm, pmd, pte) \
+	set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
+
+static __inline__ void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+				    struct page *pte)
+{
+	set_pmd(pmd, __pmd(_PAGE_TABLE + page_to_phys(pte)));
+}
+
+/*
+ * Allocate and free page tables
+ */
+static __inline__ pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	unsigned int pgd_size = (USER_PTRS_PER_PGD * sizeof(pgd_t));
+	pgd_t *pgd = (pgd_t *)kmalloc(pgd_size, GFP_KERNEL);
+
+	if (pgd)
+		memset(pgd, 0, pgd_size);
+
+	return pgd;
+}
+
+static inline void pgd_free(pgd_t *pgd)
+{
+	kfree(pgd);
+}
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+					  unsigned long address)
+{
+	int count = 0;
+	pte_t *pte;
+
+	do {
+		pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_REPEAT);
+		if (pte)
+			clear_page(pte);
+		else {
+			current->state = TASK_UNINTERRUPTIBLE;
+			schedule_timeout(HZ);
+		}
+	} while (!pte && (count++ < 10));
+
+	return pte;
+}
+
+static inline struct page *pte_alloc_one(struct mm_struct *mm,
+					 unsigned long address)
+{
+	int count = 0;
+	struct page *pte;
+
+	do {
+		pte = alloc_pages(GFP_KERNEL, 0);
+		if (pte)
+			clear_page(page_address(pte));
+		else {
+			current->state = TASK_UNINTERRUPTIBLE;
+			schedule_timeout(HZ);
+		}
+	} while (!pte && (count++ < 10));
+
+	return pte;
+}
+
+static inline void pte_free_kernel(pte_t *pte)
+{
+	free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct page *pte)
+{
+	__free_page(pte);
+}
+
+#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+
+#define check_pgt_cache() do { } while(0)
+
+#endif /* __ASM_AVR32_PGALLOC_H */
diff --git a/include/asm-avr32/pgtable-2level.h b/include/asm-avr32/pgtable-2level.h
new file mode 100644
index 000000000000..425dd567b5b9
--- /dev/null
+++ b/include/asm-avr32/pgtable-2level.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PGTABLE_2LEVEL_H
+#define __ASM_AVR32_PGTABLE_2LEVEL_H
+
+#include <asm-generic/pgtable-nopmd.h>
+
+/*
+ * Traditional 2-level paging structure
+ */
+#define PGDIR_SHIFT	22
+#define PTRS_PER_PGD	1024
+
+#define PTRS_PER_PTE	1024
+
+#ifndef __ASSEMBLY__
+#define pte_ERROR(e) \
+	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pgd_ERROR(e) \
+	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified.  Thus, the following
+ * hook is made available.
+ */
+#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
+#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep, pteval)
+
+/*
+ * (pmds are folded into pgds so this doesn't get actually called,
+ * but the define is needed for a generic inline function.)
+ */
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
+
+#define pte_pfn(x)		((unsigned long)(((x).pte >> PAGE_SHIFT)))
+#define pfn_pte(pfn, prot)	__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+#define pfn_pmd(pfn, prot)	__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_AVR32_PGTABLE_2LEVEL_H */
diff --git a/include/asm-avr32/pgtable.h b/include/asm-avr32/pgtable.h
new file mode 100644
index 000000000000..6b8ca9db2bd5
--- /dev/null
+++ b/include/asm-avr32/pgtable.h
@@ -0,0 +1,408 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PGTABLE_H
+#define __ASM_AVR32_PGTABLE_H
+
+#include <asm/addrspace.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/sched.h>
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Use two-level page tables just as the i386 (without PAE)
+ */
+#include <asm/pgtable-2level.h>
+
+/*
+ * The following code might need some cleanup when the values are
+ * final...
+ */
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+#define FIRST_USER_ADDRESS	0
+
+#define PTE_PHYS_MASK	0x1ffff000
+
+#ifndef __ASSEMBLY__
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+extern void paging_init(void);
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used for
+ * zero-mapped memory areas etc.
+ */
+extern struct page *empty_zero_page;
+#define ZERO_PAGE(vaddr) (empty_zero_page)
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8 MiB value just means that there will be a 8 MiB "hole"
+ * after the uncached physical memory (P2 segment) until the vmalloc
+ * area starts. That means that any out-of-bounds memory accesses will
+ * hopefully be caught; we don't know if the end of the P1/P2 segments
+ * are actually used for anything, but it is anyway safer to let the
+ * MMU catch these kinds of errors than to rely on the memory bus.
+ *
+ * A "hole" of the same size is added to the end of the P3 segment as
+ * well. It might seem wasteful to use 16 MiB of virtual address space
+ * on this, but we do have 512 MiB of it...
+ *
+ * The vmalloc() routines leave a hole of 4 KiB between each vmalloced
+ * area for the same reason.
+ */
+#define VMALLOC_OFFSET	(8 * 1024 * 1024)
+#define VMALLOC_START	(P3SEG + VMALLOC_OFFSET)
+#define VMALLOC_END	(P4SEG - VMALLOC_OFFSET)
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Page flags. Some of these flags are not directly supported by
+ * hardware, so we have to emulate them.
+ */
+#define _TLBEHI_BIT_VALID	9
+#define _TLBEHI_VALID		(1 << _TLBEHI_BIT_VALID)
+
+#define _PAGE_BIT_WT		0  /* W-bit   : write-through */
+#define _PAGE_BIT_DIRTY		1  /* D-bit   : page changed */
+#define _PAGE_BIT_SZ0		2  /* SZ0-bit : Size of page */
+#define _PAGE_BIT_SZ1		3  /* SZ1-bit : Size of page */
+#define _PAGE_BIT_EXECUTE	4  /* X-bit   : execute access allowed */
+#define _PAGE_BIT_RW		5  /* AP0-bit : write access allowed */
+#define _PAGE_BIT_USER		6  /* AP1-bit : user space access allowed */
+#define _PAGE_BIT_BUFFER	7  /* B-bit   : bufferable */
+#define _PAGE_BIT_GLOBAL	8  /* G-bit   : global (ignore ASID) */
+#define _PAGE_BIT_CACHABLE	9  /* C-bit   : cachable */
+
+/* If we drop support for 1K pages, we get two extra bits */
+#define _PAGE_BIT_PRESENT	10
+#define _PAGE_BIT_ACCESSED	11 /* software: page was accessed */
+
+/* The following flags are only valid when !PRESENT */
+#define _PAGE_BIT_FILE		0 /* software: pagecache or swap? */
+
+#define _PAGE_WT		(1 << _PAGE_BIT_WT)
+#define _PAGE_DIRTY		(1 << _PAGE_BIT_DIRTY)
+#define _PAGE_EXECUTE		(1 << _PAGE_BIT_EXECUTE)
+#define _PAGE_RW		(1 << _PAGE_BIT_RW)
+#define _PAGE_USER		(1 << _PAGE_BIT_USER)
+#define _PAGE_BUFFER		(1 << _PAGE_BIT_BUFFER)
+#define _PAGE_GLOBAL		(1 << _PAGE_BIT_GLOBAL)
+#define _PAGE_CACHABLE		(1 << _PAGE_BIT_CACHABLE)
+
+/* Software flags */
+#define _PAGE_ACCESSED		(1 << _PAGE_BIT_ACCESSED)
+#define _PAGE_PRESENT		(1 << _PAGE_BIT_PRESENT)
+#define _PAGE_FILE		(1 << _PAGE_BIT_FILE)
+
+/*
+ * Page types, i.e. sizes. _PAGE_TYPE_NONE corresponds to what is
+ * usually called _PAGE_PROTNONE on other architectures.
+ *
+ * XXX: Find out if _PAGE_PROTNONE is equivalent with !_PAGE_USER. If
+ * so, we can encode all possible page sizes (although we can't really
+ * support 1K pages anyway due to the _PAGE_PRESENT and _PAGE_ACCESSED
+ * bits)
+ *
+ */
+#define _PAGE_TYPE_MASK		((1 << _PAGE_BIT_SZ0) | (1 << _PAGE_BIT_SZ1))
+#define _PAGE_TYPE_NONE		(0 << _PAGE_BIT_SZ0)
+#define _PAGE_TYPE_SMALL	(1 << _PAGE_BIT_SZ0)
+#define _PAGE_TYPE_MEDIUM	(2 << _PAGE_BIT_SZ0)
+#define _PAGE_TYPE_LARGE	(3 << _PAGE_BIT_SZ0)
+
+/*
+ * Mask which drop software flags. We currently can't handle more than
+ * 512 MiB of physical memory, so we can use bits 29-31 for other
+ * stuff.  With a fixed 4K page size, we can use bits 10-11 as well as
+ * bits 2-3 (SZ)
+ */
+#define _PAGE_FLAGS_HARDWARE_MASK	0xfffff3ff
+
+#define _PAGE_FLAGS_CACHE_MASK	(_PAGE_CACHABLE | _PAGE_BUFFER | _PAGE_WT)
+
+/* TODO: Check for saneness */
+/* User-mode page table flags (to be set in a pgd or pmd entry) */
+#define _PAGE_TABLE		(_PAGE_PRESENT | _PAGE_TYPE_SMALL | _PAGE_RW \
+				 | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
+/* Kernel-mode page table flags */
+#define _KERNPG_TABLE		(_PAGE_PRESENT | _PAGE_TYPE_SMALL | _PAGE_RW \
+				 | _PAGE_ACCESSED | _PAGE_DIRTY)
+/* Flags that may be modified by software */
+#define _PAGE_CHG_MASK		(PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY \
+				 | _PAGE_FLAGS_CACHE_MASK)
+
+#define _PAGE_FLAGS_READ	(_PAGE_CACHABLE	| _PAGE_BUFFER)
+#define _PAGE_FLAGS_WRITE	(_PAGE_FLAGS_READ | _PAGE_RW | _PAGE_DIRTY)
+
+#define _PAGE_NORMAL(x)	__pgprot((x) | _PAGE_PRESENT | _PAGE_TYPE_SMALL	\
+				 | _PAGE_ACCESSED)
+
+#define PAGE_NONE	(_PAGE_ACCESSED | _PAGE_TYPE_NONE)
+#define PAGE_READ	(_PAGE_FLAGS_READ | _PAGE_USER)
+#define PAGE_EXEC	(_PAGE_FLAGS_READ | _PAGE_EXECUTE | _PAGE_USER)
+#define PAGE_WRITE	(_PAGE_FLAGS_WRITE | _PAGE_USER)
+#define PAGE_KERNEL	_PAGE_NORMAL(_PAGE_FLAGS_WRITE | _PAGE_EXECUTE | _PAGE_GLOBAL)
+#define PAGE_KERNEL_RO	_PAGE_NORMAL(_PAGE_FLAGS_READ | _PAGE_EXECUTE | _PAGE_GLOBAL)
+
+#define _PAGE_P(x)	_PAGE_NORMAL((x) & ~(_PAGE_RW | _PAGE_DIRTY))
+#define _PAGE_S(x)	_PAGE_NORMAL(x)
+
+#define PAGE_COPY	_PAGE_P(PAGE_WRITE | PAGE_READ)
+
+#ifndef __ASSEMBLY__
+/*
+ * The hardware supports flags for write- and execute access. Read is
+ * always allowed if the page is loaded into the TLB, so the "-w-",
+ * "--x" and "-wx" mappings are implemented as "rw-", "r-x" and "rwx",
+ * respectively.
+ *
+ * The "---" case is handled by software; the page will simply not be
+ * loaded into the TLB if the page type is _PAGE_TYPE_NONE.
+ */
+
+#define __P000	__pgprot(PAGE_NONE)
+#define __P001	_PAGE_P(PAGE_READ)
+#define __P010	_PAGE_P(PAGE_WRITE)
+#define __P011	_PAGE_P(PAGE_WRITE | PAGE_READ)
+#define __P100	_PAGE_P(PAGE_EXEC)
+#define __P101	_PAGE_P(PAGE_EXEC | PAGE_READ)
+#define __P110	_PAGE_P(PAGE_EXEC | PAGE_WRITE)
+#define __P111	_PAGE_P(PAGE_EXEC | PAGE_WRITE | PAGE_READ)
+
+#define __S000	__pgprot(PAGE_NONE)
+#define __S001	_PAGE_S(PAGE_READ)
+#define __S010	_PAGE_S(PAGE_WRITE)
+#define __S011	_PAGE_S(PAGE_WRITE | PAGE_READ)
+#define __S100	_PAGE_S(PAGE_EXEC)
+#define __S101	_PAGE_S(PAGE_EXEC | PAGE_READ)
+#define __S110	_PAGE_S(PAGE_EXEC | PAGE_WRITE)
+#define __S111	_PAGE_S(PAGE_EXEC | PAGE_WRITE | PAGE_READ)
+
+#define pte_none(x)	(!pte_val(x))
+#define pte_present(x)	(pte_val(x) & _PAGE_PRESENT)
+
+#define pte_clear(mm,addr,xp)					\
+	do {							\
+		set_pte_at(mm, addr, xp, __pte(0));		\
+	} while (0)
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_read(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_USER;
+}
+static inline int pte_write(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_RW;
+}
+static inline int pte_exec(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_EXECUTE;
+}
+static inline int pte_dirty(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_DIRTY;
+}
+static inline int pte_young(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_ACCESSED;
+}
+
+/*
+ * The following only work if pte_present() is not true.
+ */
+static inline int pte_file(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_FILE;
+}
+
+/* Mutator functions for PTE bits */
+static inline pte_t pte_rdprotect(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER));
+	return pte;
+}
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_RW));
+	return pte;
+}
+static inline pte_t pte_exprotect(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_EXECUTE));
+	return pte;
+}
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY));
+	return pte;
+}
+static inline pte_t pte_mkold(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_ACCESSED));
+	return pte;
+}
+static inline pte_t pte_mkread(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER));
+	return pte;
+}
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW));
+	return pte;
+}
+static inline pte_t pte_mkexec(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) | _PAGE_EXECUTE));
+	return pte;
+}
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY));
+	return pte;
+}
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED));
+	return pte;
+}
+
+#define pmd_none(x)	(!pmd_val(x))
+#define pmd_present(x)	(pmd_val(x) & _PAGE_PRESENT)
+#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
+#define	pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER))	\
+			 != _KERNPG_TABLE)
+
+/*
+ * Permanent address of a page. We don't support highmem, so this is
+ * trivial.
+ */
+#define pages_to_mb(x)	((x) >> (20-PAGE_SHIFT))
+#define pte_page(x) 	phys_to_page(pte_val(x) & PTE_PHYS_MASK)
+
+/*
+ * Mark the prot value as uncacheable and unbufferable
+ */
+#define pgprot_noncached(prot)						\
+	__pgprot(pgprot_val(prot) & ~(_PAGE_BUFFER | _PAGE_CACHABLE))
+
+/*
+ * Mark the prot value as uncacheable but bufferable
+ */
+#define pgprot_writecombine(prot)					\
+	__pgprot((pgprot_val(prot) & ~_PAGE_CACHABLE) | _PAGE_BUFFER)
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * extern pte_t mk_pte(struct page *page, pgprot_t pgprot)
+ */
+#define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	set_pte(&pte, __pte((pte_val(pte) & _PAGE_CHG_MASK)
+			    | pgprot_val(newprot)));
+	return pte;
+}
+
+#define page_pte(page)	page_pte_prot(page, __pgprot(0))
+
+#define pmd_page_vaddr(pmd)					\
+	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+
+#define pmd_page(pmd)	(phys_to_page(pmd_val(pmd)))
+
+/* to find an entry in a page-table-directory. */
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
+#define pgd_offset_current(address)				\
+	((pgd_t *)__mfsr(SYSREG_PTBR) + pgd_index(address))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+/* Find an entry in the third-level page table.. */
+#define pte_index(address)				\
+	((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset(dir, address)					\
+	((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
+#define pte_offset_kernel(dir, address)					\
+	((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
+#define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
+#define pte_offset_map_nested(dir, address) pte_offset_kernel(dir, address)
+#define pte_unmap(pte)		do { } while (0)
+#define pte_unmap_nested(pte)	do { } while (0)
+
+struct vm_area_struct;
+extern void update_mmu_cache(struct vm_area_struct * vma,
+			     unsigned long address, pte_t pte);
+
+/*
+ * Encode and decode a swap entry
+ *
+ * Constraints:
+ *   _PAGE_FILE at bit 0
+ *   _PAGE_TYPE_* at bits 2-3 (for emulating _PAGE_PROTNONE)
+ *   _PAGE_PRESENT at bit 10
+ *
+ * We encode the type into bits 4-9 and offset into bits 11-31. This
+ * gives us a 21 bits offset, or 2**21 * 4K = 8G usable swap space per
+ * device, and 64 possible types.
+ *
+ * NOTE: We should set ZEROs at the position of _PAGE_PRESENT
+ *       and _PAGE_PROTNONE bits
+ */
+#define __swp_type(x)		(((x).val >> 4) & 0x3f)
+#define __swp_offset(x)		((x).val >> 11)
+#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 4) | ((offset) << 11) })
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
+
+/*
+ * Encode and decode a nonlinear file mapping entry. We have to
+ * preserve _PAGE_FILE and _PAGE_PRESENT here. _PAGE_TYPE_* isn't
+ * necessary, since _PAGE_FILE implies !_PAGE_PROTNONE (?)
+ */
+#define PTE_FILE_MAX_BITS	30
+#define pte_to_pgoff(pte)	(((pte_val(pte) >> 1) & 0x1ff)		\
+				 | ((pte_val(pte) >> 11) << 9))
+#define pgoff_to_pte(off)	((pte_t) { ((((off) & 0x1ff) << 1)	\
+					    | (((off) >> 9) << 11)	\
+					    | _PAGE_FILE) })
+
+typedef pte_t *pte_addr_t;
+
+#define kern_addr_valid(addr)	(1)
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)	\
+	remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+#define MK_IOSPACE_PFN(space, pfn)	(pfn)
+#define GET_IOSPACE(pfn)		0
+#define GET_PFN(pfn)			(pfn)
+
+/* No page table caches to initialize (?) */
+#define pgtable_cache_init()	do { } while(0)
+
+#include <asm-generic/pgtable.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_AVR32_PGTABLE_H */
diff --git a/include/asm-avr32/poll.h b/include/asm-avr32/poll.h
new file mode 100644
index 000000000000..736e29755dfc
--- /dev/null
+++ b/include/asm-avr32/poll.h
@@ -0,0 +1,27 @@
+#ifndef __ASM_AVR32_POLL_H
+#define __ASM_AVR32_POLL_H
+
+/* These are specified by iBCS2 */
+#define POLLIN		0x0001
+#define POLLPRI		0x0002
+#define POLLOUT		0x0004
+#define POLLERR		0x0008
+#define POLLHUP		0x0010
+#define POLLNVAL	0x0020
+
+/* The rest seem to be more-or-less nonstandard. Check them! */
+#define POLLRDNORM	0x0040
+#define POLLRDBAND	0x0080
+#define POLLWRNORM	0x0100
+#define POLLWRBAND	0x0200
+#define POLLMSG		0x0400
+#define POLLREMOVE	0x1000
+#define POLLRDHUP	0x2000
+
+struct pollfd {
+	int fd;
+	short events;
+	short revents;
+};
+
+#endif /* __ASM_AVR32_POLL_H */
diff --git a/include/asm-avr32/posix_types.h b/include/asm-avr32/posix_types.h
new file mode 100644
index 000000000000..2831b039b349
--- /dev/null
+++ b/include/asm-avr32/posix_types.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_POSIX_TYPES_H
+#define __ASM_AVR32_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ */
+
+typedef unsigned long   __kernel_ino_t;
+typedef unsigned short  __kernel_mode_t;
+typedef unsigned short  __kernel_nlink_t;
+typedef long            __kernel_off_t;
+typedef int             __kernel_pid_t;
+typedef unsigned short  __kernel_ipc_pid_t;
+typedef unsigned int	__kernel_uid_t;
+typedef unsigned int	__kernel_gid_t;
+typedef unsigned long	__kernel_size_t;
+typedef int             __kernel_ssize_t;
+typedef int             __kernel_ptrdiff_t;
+typedef long            __kernel_time_t;
+typedef long            __kernel_suseconds_t;
+typedef long            __kernel_clock_t;
+typedef int             __kernel_timer_t;
+typedef int             __kernel_clockid_t;
+typedef int             __kernel_daddr_t;
+typedef char *          __kernel_caddr_t;
+typedef unsigned short  __kernel_uid16_t;
+typedef unsigned short  __kernel_gid16_t;
+typedef unsigned int    __kernel_uid32_t;
+typedef unsigned int    __kernel_gid32_t;
+
+typedef unsigned short  __kernel_old_uid_t;
+typedef unsigned short  __kernel_old_gid_t;
+typedef unsigned short  __kernel_old_dev_t;
+
+#ifdef __GNUC__
+typedef long long       __kernel_loff_t;
+#endif
+
+typedef struct {
+#if defined(__KERNEL__) || defined(__USE_ALL)
+    int     val[2];
+#else /* !defined(__KERNEL__) && !defined(__USE_ALL) */
+    int     __val[2];
+#endif /* !defined(__KERNEL__) && !defined(__USE_ALL) */
+} __kernel_fsid_t;
+
+#if defined(__KERNEL__)
+
+#undef  __FD_SET
+static __inline__ void __FD_SET(unsigned long __fd, __kernel_fd_set *__fdsetp)
+{
+    unsigned long __tmp = __fd / __NFDBITS;
+    unsigned long __rem = __fd % __NFDBITS;
+    __fdsetp->fds_bits[__tmp] |= (1UL<<__rem);
+}
+
+#undef  __FD_CLR
+static __inline__ void __FD_CLR(unsigned long __fd, __kernel_fd_set *__fdsetp)
+{
+    unsigned long __tmp = __fd / __NFDBITS;
+    unsigned long __rem = __fd % __NFDBITS;
+    __fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem);
+}
+
+
+#undef  __FD_ISSET
+static __inline__ int __FD_ISSET(unsigned long __fd, const __kernel_fd_set *__p)
+{
+    unsigned long __tmp = __fd / __NFDBITS;
+    unsigned long __rem = __fd % __NFDBITS;
+    return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0;
+}
+
+/*
+ * This will unroll the loop for the normal constant case (8 ints,
+ * for a 256-bit fd_set)
+ */
+#undef  __FD_ZERO
+static __inline__ void __FD_ZERO(__kernel_fd_set *__p)
+{
+    unsigned long *__tmp = __p->fds_bits;
+    int __i;
+
+    if (__builtin_constant_p(__FDSET_LONGS)) {
+        switch (__FDSET_LONGS) {
+            case 16:
+                __tmp[ 0] = 0; __tmp[ 1] = 0;
+                __tmp[ 2] = 0; __tmp[ 3] = 0;
+                __tmp[ 4] = 0; __tmp[ 5] = 0;
+                __tmp[ 6] = 0; __tmp[ 7] = 0;
+                __tmp[ 8] = 0; __tmp[ 9] = 0;
+                __tmp[10] = 0; __tmp[11] = 0;
+                __tmp[12] = 0; __tmp[13] = 0;
+                __tmp[14] = 0; __tmp[15] = 0;
+                return;
+
+            case 8:
+                __tmp[ 0] = 0; __tmp[ 1] = 0;
+                __tmp[ 2] = 0; __tmp[ 3] = 0;
+                __tmp[ 4] = 0; __tmp[ 5] = 0;
+                __tmp[ 6] = 0; __tmp[ 7] = 0;
+                return;
+
+            case 4:
+                __tmp[ 0] = 0; __tmp[ 1] = 0;
+                __tmp[ 2] = 0; __tmp[ 3] = 0;
+                return;
+        }
+    }
+    __i = __FDSET_LONGS;
+    while (__i) {
+        __i--;
+        *__tmp = 0;
+        __tmp++;
+    }
+}
+
+#endif /* defined(__KERNEL__) */
+
+#endif /* __ASM_AVR32_POSIX_TYPES_H */
diff --git a/include/asm-avr32/processor.h b/include/asm-avr32/processor.h
new file mode 100644
index 000000000000..f6913778a45f
--- /dev/null
+++ b/include/asm-avr32/processor.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PROCESSOR_H
+#define __ASM_AVR32_PROCESSOR_H
+
+#include <asm/page.h>
+#include <asm/cache.h>
+
+#define TASK_SIZE	0x80000000
+
+#ifndef __ASSEMBLY__
+
+static inline void *current_text_addr(void)
+{
+	register void *pc asm("pc");
+	return pc;
+}
+
+enum arch_type {
+	ARCH_AVR32A,
+	ARCH_AVR32B,
+	ARCH_MAX
+};
+
+enum cpu_type {
+	CPU_MORGAN,
+	CPU_AT32AP,
+	CPU_MAX
+};
+
+enum tlb_config {
+	TLB_NONE,
+	TLB_SPLIT,
+	TLB_UNIFIED,
+	TLB_INVALID
+};
+
+struct avr32_cpuinfo {
+	struct clk *clk;
+	unsigned long loops_per_jiffy;
+	enum arch_type arch_type;
+	enum cpu_type cpu_type;
+	unsigned short arch_revision;
+	unsigned short cpu_revision;
+	enum tlb_config tlb_config;
+
+	struct cache_info icache;
+	struct cache_info dcache;
+};
+
+extern struct avr32_cpuinfo boot_cpu_data;
+
+#ifdef CONFIG_SMP
+extern struct avr32_cpuinfo cpu_data[];
+#define current_cpu_data cpu_data[smp_processor_id()]
+#else
+#define cpu_data (&boot_cpu_data)
+#define current_cpu_data boot_cpu_data
+#endif
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's
+ */
+#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 3))
+
+#define cpu_relax()		barrier()
+#define cpu_sync_pipeline()	asm volatile("sub pc, -2" : : : "memory")
+
+struct cpu_context {
+	unsigned long sr;
+	unsigned long pc;
+	unsigned long ksp;	/* Kernel stack pointer */
+	unsigned long r7;
+	unsigned long r6;
+	unsigned long r5;
+	unsigned long r4;
+	unsigned long r3;
+	unsigned long r2;
+	unsigned long r1;
+	unsigned long r0;
+};
+
+/* This struct contains the CPU context as stored by switch_to() */
+struct thread_struct {
+	struct cpu_context cpu_context;
+	unsigned long single_step_addr;
+	u16 single_step_insn;
+};
+
+#define INIT_THREAD {						\
+	.cpu_context = {					\
+		.ksp = sizeof(init_stack) + (long)&init_stack,	\
+	},							\
+}
+
+/*
+ * Do necessary setup to start up a newly executed thread.
+ */
+#define start_thread(regs, new_pc, new_sp)	 \
+	do {					 \
+		set_fs(USER_DS);		 \
+		memset(regs, 0, sizeof(*regs));	 \
+		regs->sr = MODE_USER;		 \
+		regs->pc = new_pc & ~1;		 \
+		regs->sp = new_sp;		 \
+	} while(0)
+
+struct task_struct;
+
+/* Free all resources held by a thread */
+extern void release_thread(struct task_struct *);
+
+/* Create a kernel thread without removing it from tasklists */
+extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+/* Prepare to copy thread state - unlazy all lazy status */
+#define prepare_to_copy(tsk) do { } while(0)
+
+/* Return saved PC of a blocked thread */
+#define thread_saved_pc(tsk)    ((tsk)->thread.cpu_context.pc)
+
+struct pt_regs;
+void show_trace(struct task_struct *task, unsigned long *stack,
+		struct pt_regs *regs);
+
+extern unsigned long get_wchan(struct task_struct *p);
+
+#define KSTK_EIP(tsk)	((tsk)->thread.cpu_context.pc)
+#define KSTK_ESP(tsk)	((tsk)->thread.cpu_context.ksp)
+
+#define ARCH_HAS_PREFETCH
+
+static inline void prefetch(const void *x)
+{
+	const char *c = x;
+	asm volatile("pref %0" : : "r"(c));
+}
+#define PREFETCH_STRIDE	L1_CACHE_BYTES
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_AVR32_PROCESSOR_H */
diff --git a/include/asm-avr32/ptrace.h b/include/asm-avr32/ptrace.h
new file mode 100644
index 000000000000..60f0f19a81f1
--- /dev/null
+++ b/include/asm-avr32/ptrace.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_PTRACE_H
+#define __ASM_AVR32_PTRACE_H
+
+#define PTRACE_GETREGS		12
+#define PTRACE_SETREGS		13
+
+/*
+ * Status Register bits
+ */
+#define SR_H		0x40000000
+#define SR_R		0x20000000
+#define SR_J		0x10000000
+#define SR_DM		0x08000000
+#define SR_D		0x04000000
+#define MODE_NMI	0x01c00000
+#define MODE_EXCEPTION	0x01800000
+#define MODE_INT3	0x01400000
+#define MODE_INT2	0x01000000
+#define MODE_INT1	0x00c00000
+#define MODE_INT0	0x00800000
+#define MODE_SUPERVISOR	0x00400000
+#define MODE_USER	0x00000000
+#define MODE_MASK	0x01c00000
+#define SR_EM		0x00200000
+#define SR_I3M		0x00100000
+#define SR_I2M		0x00080000
+#define SR_I1M		0x00040000
+#define SR_I0M		0x00020000
+#define SR_GM		0x00010000
+
+#define SR_H_BIT	30
+#define SR_R_BIT	29
+#define SR_J_BIT	28
+#define SR_DM_BIT	27
+#define SR_D_BIT	26
+#define MODE_SHIFT	22
+#define SR_EM_BIT	21
+#define SR_I3M_BIT	20
+#define SR_I2M_BIT	19
+#define SR_I1M_BIT	18
+#define SR_I0M_BIT	17
+#define SR_GM_BIT	16
+
+/* The user-visible part */
+#define SR_L		0x00000020
+#define SR_Q		0x00000010
+#define SR_V		0x00000008
+#define SR_N		0x00000004
+#define SR_Z		0x00000002
+#define SR_C		0x00000001
+
+#define SR_L_BIT	5
+#define SR_Q_BIT	4
+#define SR_V_BIT	3
+#define SR_N_BIT	2
+#define SR_Z_BIT	1
+#define SR_C_BIT	0
+
+/*
+ * The order is defined by the stmts instruction. r0 is stored first,
+ * so it gets the highest address.
+ *
+ * Registers 0-12 are general-purpose registers (r12 is normally used for
+ * the function return value).
+ * Register 13 is the stack pointer
+ * Register 14 is the link register
+ * Register 15 is the program counter (retrieved from the RAR sysreg)
+ */
+#define FRAME_SIZE_FULL 72
+#define REG_R12_ORIG	68
+#define REG_R0		64
+#define REG_R1		60
+#define REG_R2		56
+#define REG_R3		52
+#define REG_R4		48
+#define REG_R5		44
+#define REG_R6		40
+#define REG_R7		36
+#define REG_R8		32
+#define REG_R9		28
+#define REG_R10		24
+#define REG_R11		20
+#define REG_R12		16
+#define REG_SP		12
+#define REG_LR		 8
+
+#define FRAME_SIZE_MIN	 8
+#define REG_PC		 4
+#define REG_SR		 0
+
+#ifndef __ASSEMBLY__
+struct pt_regs {
+	/* These are always saved */
+	unsigned long sr;
+	unsigned long pc;
+
+	/* These are sometimes saved */
+	unsigned long lr;
+	unsigned long sp;
+	unsigned long r12;
+	unsigned long r11;
+	unsigned long r10;
+	unsigned long r9;
+	unsigned long r8;
+	unsigned long r7;
+	unsigned long r6;
+	unsigned long r5;
+	unsigned long r4;
+	unsigned long r3;
+	unsigned long r2;
+	unsigned long r1;
+	unsigned long r0;
+
+	/* Only saved on system call */
+	unsigned long r12_orig;
+};
+
+#ifdef __KERNEL__
+# define user_mode(regs) (((regs)->sr & MODE_MASK) == MODE_USER)
+extern void show_regs (struct pt_regs *);
+
+static __inline__ int valid_user_regs(struct pt_regs *regs)
+{
+	/*
+	 * Some of the Java bits might be acceptable if/when we
+	 * implement some support for that stuff...
+	 */
+	if ((regs->sr & 0xffff0000) == 0)
+		return 1;
+
+	/*
+	 * Force status register flags to be sane and report this
+	 * illegal behaviour...
+	 */
+	regs->sr &= 0x0000ffff;
+	return 0;
+}
+
+#define instruction_pointer(regs) ((regs)->pc)
+
+#define profile_pc(regs) instruction_pointer(regs)
+
+#endif /* __KERNEL__ */
+
+#endif /* ! __ASSEMBLY__ */
+
+#endif /* __ASM_AVR32_PTRACE_H */
diff --git a/include/asm-avr32/resource.h b/include/asm-avr32/resource.h
new file mode 100644
index 000000000000..c6dd101472b1
--- /dev/null
+++ b/include/asm-avr32/resource.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_RESOURCE_H
+#define __ASM_AVR32_RESOURCE_H
+
+#include <asm-generic/resource.h>
+
+#endif /* __ASM_AVR32_RESOURCE_H */
diff --git a/include/asm-avr32/scatterlist.h b/include/asm-avr32/scatterlist.h
new file mode 100644
index 000000000000..bfe7d753423c
--- /dev/null
+++ b/include/asm-avr32/scatterlist.h
@@ -0,0 +1,21 @@
+#ifndef __ASM_AVR32_SCATTERLIST_H
+#define __ASM_AVR32_SCATTERLIST_H
+
+struct scatterlist {
+    struct page		*page;
+    unsigned int	offset;
+    dma_addr_t		dma_address;
+    unsigned int	length;
+};
+
+/* These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg)	((sg)->dma_address)
+#define sg_dma_len(sg)		((sg)->length)
+
+#define ISA_DMA_THRESHOLD (0xffffffff)
+
+#endif /* __ASM_AVR32_SCATTERLIST_H */
diff --git a/include/asm-avr32/sections.h b/include/asm-avr32/sections.h
new file mode 100644
index 000000000000..aa14252e4181
--- /dev/null
+++ b/include/asm-avr32/sections.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_SECTIONS_H
+#define __ASM_AVR32_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+#endif /* __ASM_AVR32_SECTIONS_H */
diff --git a/include/asm-avr32/semaphore.h b/include/asm-avr32/semaphore.h
new file mode 100644
index 000000000000..ef99ddccc10c
--- /dev/null
+++ b/include/asm-avr32/semaphore.h
@@ -0,0 +1,109 @@
+/*
+ * SMP- and interrupt-safe semaphores.
+ *
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * Based on include/asm-i386/semaphore.h
+ *   Copyright (C) 1996 Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SEMAPHORE_H
+#define __ASM_AVR32_SEMAPHORE_H
+
+#include <linux/linkage.h>
+
+#include <asm/system.h>
+#include <asm/atomic.h>
+#include <linux/wait.h>
+#include <linux/rwsem.h>
+
+struct semaphore {
+	atomic_t count;
+	int sleepers;
+	wait_queue_head_t wait;
+};
+
+#define __SEMAPHORE_INITIALIZER(name, n)				\
+{									\
+	.count		= ATOMIC_INIT(n),				\
+	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
+}
+
+#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
+	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
+
+#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
+#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0)
+
+static inline void sema_init (struct semaphore *sem, int val)
+{
+	atomic_set(&sem->count, val);
+	sem->sleepers = 0;
+	init_waitqueue_head(&sem->wait);
+}
+
+static inline void init_MUTEX (struct semaphore *sem)
+{
+	sema_init(sem, 1);
+}
+
+static inline void init_MUTEX_LOCKED (struct semaphore *sem)
+{
+	sema_init(sem, 0);
+}
+
+void __down(struct semaphore * sem);
+int  __down_interruptible(struct semaphore * sem);
+void __up(struct semaphore * sem);
+
+/*
+ * This is ugly, but we want the default case to fall through.
+ * "__down_failed" is a special asm handler that calls the C
+ * routine that actually waits. See arch/i386/kernel/semaphore.c
+ */
+static inline void down(struct semaphore * sem)
+{
+	might_sleep();
+	if (unlikely(atomic_dec_return (&sem->count) < 0))
+		__down (sem);
+}
+
+/*
+ * Interruptible try to acquire a semaphore.  If we obtained
+ * it, return zero.  If we were interrupted, returns -EINTR
+ */
+static inline int down_interruptible(struct semaphore * sem)
+{
+	int ret = 0;
+
+	might_sleep();
+	if (unlikely(atomic_dec_return (&sem->count) < 0))
+		ret = __down_interruptible (sem);
+	return ret;
+}
+
+/*
+ * Non-blockingly attempt to down() a semaphore.
+ * Returns zero if we acquired it
+ */
+static inline int down_trylock(struct semaphore * sem)
+{
+	return atomic_dec_if_positive(&sem->count) < 0;
+}
+
+/*
+ * Note! This is subtle. We jump to wake people up only if
+ * the semaphore was negative (== somebody was waiting on it).
+ * The default case (no contention) will result in NO
+ * jumps for both down() and up().
+ */
+static inline void up(struct semaphore * sem)
+{
+	if (unlikely(atomic_inc_return (&sem->count) <= 0))
+		__up (sem);
+}
+
+#endif /*__ASM_AVR32_SEMAPHORE_H */
diff --git a/include/asm-avr32/sembuf.h b/include/asm-avr32/sembuf.h
new file mode 100644
index 000000000000..e472216e0c97
--- /dev/null
+++ b/include/asm-avr32/sembuf.h
@@ -0,0 +1,25 @@
+#ifndef __ASM_AVR32_SEMBUF_H
+#define __ASM_AVR32_SEMBUF_H
+
+/*
+* The semid64_ds structure for AVR32 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct semid64_ds {
+        struct ipc64_perm sem_perm;             /* permissions .. see ipc.h */
+        __kernel_time_t sem_otime;              /* last semop time */
+        unsigned long   __unused1;
+        __kernel_time_t sem_ctime;              /* last change time */
+        unsigned long   __unused2;
+        unsigned long   sem_nsems;              /* no. of semaphores in array */
+        unsigned long   __unused3;
+        unsigned long   __unused4;
+};
+
+#endif /* __ASM_AVR32_SEMBUF_H */
diff --git a/include/asm-avr32/setup.h b/include/asm-avr32/setup.h
new file mode 100644
index 000000000000..10193da4113b
--- /dev/null
+++ b/include/asm-avr32/setup.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/include/asm-arm/setup.h
+ *   Copyright (C) 1997-1999 Russel King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SETUP_H__
+#define __ASM_AVR32_SETUP_H__
+
+#define COMMAND_LINE_SIZE 256
+
+/* Magic number indicating that a tag table is present */
+#define ATAG_MAGIC	0xa2a25441
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Generic memory range, used by several tags.
+ *
+ *   addr is always physical.
+ *   size is measured in bytes.
+ *   next is for use by the OS, e.g. for grouping regions into
+ *        linked lists.
+ */
+struct tag_mem_range {
+	u32			addr;
+	u32			size;
+	struct tag_mem_range *	next;
+};
+
+/* The list ends with an ATAG_NONE node. */
+#define ATAG_NONE	0x00000000
+
+struct tag_header {
+	u32 size;
+	u32 tag;
+};
+
+/* The list must start with an ATAG_CORE node */
+#define ATAG_CORE	0x54410001
+
+struct tag_core {
+	u32 flags;
+	u32 pagesize;
+	u32 rootdev;
+};
+
+/* it is allowed to have multiple ATAG_MEM nodes */
+#define ATAG_MEM	0x54410002
+/* ATAG_MEM uses tag_mem_range */
+
+/* command line: \0 terminated string */
+#define ATAG_CMDLINE	0x54410003
+
+struct tag_cmdline {
+	char	cmdline[1];	/* this is the minimum size */
+};
+
+/* Ramdisk image (may be compressed) */
+#define ATAG_RDIMG	0x54410004
+/* ATAG_RDIMG uses tag_mem_range */
+
+/* Information about various clocks present in the system */
+#define ATAG_CLOCK	0x54410005
+
+struct tag_clock {
+	u32	clock_id;	/* Which clock are we talking about? */
+	u32	clock_flags;	/* Special features */
+	u64	clock_hz;	/* Clock speed in Hz */
+};
+
+/* The clock types we know about */
+#define CLOCK_BOOTCPU	0
+
+/* Memory reserved for the system (e.g. the bootloader) */
+#define ATAG_RSVD_MEM	0x54410006
+/* ATAG_RSVD_MEM uses tag_mem_range */
+
+/* Ethernet information */
+
+#define ATAG_ETHERNET	0x54410007
+
+struct tag_ethernet {
+	u8	mac_index;
+	u8	mii_phy_addr;
+	u8	hw_address[6];
+};
+
+#define ETH_INVALID_PHY	0xff
+
+struct tag {
+	struct tag_header hdr;
+	union {
+		struct tag_core core;
+		struct tag_mem_range mem_range;
+		struct tag_cmdline cmdline;
+		struct tag_clock clock;
+		struct tag_ethernet ethernet;
+	} u;
+};
+
+struct tagtable {
+	u32	tag;
+	int	(*parse)(struct tag *);
+};
+
+#define __tag __attribute_used__ __attribute__((__section__(".taglist")))
+#define __tagtable(tag, fn)						\
+	static struct tagtable __tagtable_##fn __tag = { tag, fn }
+
+#define tag_member_present(tag,member)					\
+	((unsigned long)(&((struct tag *)0L)->member + 1)		\
+	 <= (tag)->hdr.size * 4)
+
+#define tag_next(t)	((struct tag *)((u32 *)(t) + (t)->hdr.size))
+#define tag_size(type)	((sizeof(struct tag_header) + sizeof(struct type)) >> 2)
+
+#define for_each_tag(t,base)						\
+	for (t = base; t->hdr.size; t = tag_next(t))
+
+extern struct tag_mem_range *mem_phys;
+extern struct tag_mem_range *mem_reserved;
+extern struct tag_mem_range *mem_ramdisk;
+
+extern struct tag *bootloader_tags;
+
+extern void setup_bootmem(void);
+extern void setup_processor(void);
+extern void board_setup_fbmem(unsigned long fbmem_start,
+			      unsigned long fbmem_size);
+
+/* Chip-specific hook to enable the use of SDRAM */
+void chip_enable_sdram(void);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_AVR32_SETUP_H__ */
diff --git a/include/asm-avr32/shmbuf.h b/include/asm-avr32/shmbuf.h
new file mode 100644
index 000000000000..c62fba41739a
--- /dev/null
+++ b/include/asm-avr32/shmbuf.h
@@ -0,0 +1,42 @@
+#ifndef __ASM_AVR32_SHMBUF_H
+#define __ASM_AVR32_SHMBUF_H
+
+/*
+ * The shmid64_ds structure for i386 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct shmid64_ds {
+	struct ipc64_perm	shm_perm;	/* operation perms */
+	size_t			shm_segsz;	/* size of segment (bytes) */
+	__kernel_time_t		shm_atime;	/* last attach time */
+	unsigned long		__unused1;
+	__kernel_time_t		shm_dtime;	/* last detach time */
+	unsigned long		__unused2;
+	__kernel_time_t		shm_ctime;	/* last change time */
+	unsigned long		__unused3;
+	__kernel_pid_t		shm_cpid;	/* pid of creator */
+	__kernel_pid_t		shm_lpid;	/* pid of last operator */
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned long		__unused4;
+	unsigned long		__unused5;
+};
+
+struct shminfo64 {
+	unsigned long	shmmax;
+	unsigned long	shmmin;
+	unsigned long	shmmni;
+	unsigned long	shmseg;
+	unsigned long	shmall;
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif /* __ASM_AVR32_SHMBUF_H */
diff --git a/include/asm-avr32/shmparam.h b/include/asm-avr32/shmparam.h
new file mode 100644
index 000000000000..3681266c77f7
--- /dev/null
+++ b/include/asm-avr32/shmparam.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_SHMPARAM_H
+#define __ASM_AVR32_SHMPARAM_H
+
+#define SHMLBA PAGE_SIZE	/* attach addr a multiple of this */
+
+#endif /* __ASM_AVR32_SHMPARAM_H */
diff --git a/include/asm-avr32/sigcontext.h b/include/asm-avr32/sigcontext.h
new file mode 100644
index 000000000000..e04062b5f39f
--- /dev/null
+++ b/include/asm-avr32/sigcontext.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SIGCONTEXT_H
+#define __ASM_AVR32_SIGCONTEXT_H
+
+struct sigcontext {
+	unsigned long	oldmask;
+
+	/* CPU registers */
+	unsigned long	sr;
+	unsigned long	pc;
+	unsigned long	lr;
+	unsigned long	sp;
+	unsigned long	r12;
+	unsigned long	r11;
+	unsigned long	r10;
+	unsigned long	r9;
+	unsigned long	r8;
+	unsigned long	r7;
+	unsigned long	r6;
+	unsigned long	r5;
+	unsigned long	r4;
+	unsigned long	r3;
+	unsigned long	r2;
+	unsigned long	r1;
+	unsigned long	r0;
+};
+
+#endif /* __ASM_AVR32_SIGCONTEXT_H */
diff --git a/include/asm-avr32/siginfo.h b/include/asm-avr32/siginfo.h
new file mode 100644
index 000000000000..5ee93f40a8a8
--- /dev/null
+++ b/include/asm-avr32/siginfo.h
@@ -0,0 +1,6 @@
+#ifndef _AVR32_SIGINFO_H
+#define _AVR32_SIGINFO_H
+
+#include <asm-generic/siginfo.h>
+
+#endif
diff --git a/include/asm-avr32/signal.h b/include/asm-avr32/signal.h
new file mode 100644
index 000000000000..caffefeeba1f
--- /dev/null
+++ b/include/asm-avr32/signal.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SIGNAL_H
+#define __ASM_AVR32_SIGNAL_H
+
+#include <linux/types.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+
+#ifdef __KERNEL__
+/* Most things should be clean enough to redefine this at will, if care
+   is taken to make libc match.  */
+
+#define _NSIG		64
+#define _NSIG_BPW	32
+#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
+
+typedef unsigned long old_sigset_t;		/* at least 32 bits */
+
+typedef struct {
+	unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+#else
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+#define NSIG		32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGIOT		 6
+#define SIGBUS		 7
+#define SIGFPE		 8
+#define SIGKILL		 9
+#define SIGUSR1		10
+#define SIGSEGV		11
+#define SIGUSR2		12
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGSTKFLT	16
+#define SIGCHLD		17
+#define SIGCONT		18
+#define SIGSTOP		19
+#define SIGTSTP		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGURG		23
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGIO		29
+#define SIGPOLL		SIGIO
+/*
+#define SIGLOST		29
+*/
+#define SIGPWR		30
+#define SIGSYS		31
+#define	SIGUNUSED	31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN	32
+#define SIGRTMAX	(_NSIG-1)
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_NOCLDSTOP		flag to turn off SIGCHLD when children stop.
+ * SA_NOCLDWAIT		flag on SIGCHLD to inhibit zombies.
+ * SA_SIGINFO		deliver the signal with SIGINFO structs
+ * SA_ONSTACK		indicates that a registered stack_t will be used.
+ * SA_RESTART		flag to get restarting signals (which were the default long ago)
+ * SA_NODEFER		prevents the current signal from being masked in the handler.
+ * SA_RESETHAND		clears the handler when the signal is delivered.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP	0x00000001
+#define SA_NOCLDWAIT	0x00000002
+#define SA_SIGINFO	0x00000004
+#define SA_RESTORER	0x04000000
+#define SA_ONSTACK	0x08000000
+#define SA_RESTART	0x10000000
+#define SA_NODEFER	0x40000000
+#define SA_RESETHAND	0x80000000
+
+#define SA_NOMASK	SA_NODEFER
+#define SA_ONESHOT	SA_RESETHAND
+
+/*
+ * sigaltstack controls
+ */
+#define SS_ONSTACK	1
+#define SS_DISABLE	2
+
+#define MINSIGSTKSZ	2048
+#define SIGSTKSZ	8192
+
+#include <asm-generic/signal.h>
+
+#ifdef __KERNEL__
+struct old_sigaction {
+	__sighandler_t sa_handler;
+	old_sigset_t sa_mask;
+	unsigned long sa_flags;
+	__sigrestore_t sa_restorer;
+};
+
+struct sigaction {
+	__sighandler_t sa_handler;
+	unsigned long sa_flags;
+	__sigrestore_t sa_restorer;
+	sigset_t sa_mask;		/* mask last for extensibility */
+};
+
+struct k_sigaction {
+	struct sigaction sa;
+};
+#else
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+struct sigaction {
+	union {
+		__sighandler_t _sa_handler;
+		void (*_sa_sigaction)(int, struct siginfo *, void *);
+	} _u;
+	sigset_t sa_mask;
+	unsigned long sa_flags;
+	void (*sa_restorer)(void);
+};
+
+#define sa_handler	_u._sa_handler
+#define sa_sigaction	_u._sa_sigaction
+
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+	void __user *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+} stack_t;
+
+#ifdef __KERNEL__
+
+#include <asm/sigcontext.h>
+#undef __HAVE_ARCH_SIG_BITOPS
+
+#define ptrace_signal_deliver(regs, cookie) do { } while (0)
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/include/asm-avr32/socket.h b/include/asm-avr32/socket.h
new file mode 100644
index 000000000000..543229de8173
--- /dev/null
+++ b/include/asm-avr32/socket.h
@@ -0,0 +1,53 @@
+#ifndef __ASM_AVR32_SOCKET_H
+#define __ASM_AVR32_SOCKET_H
+
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+#define SOL_SOCKET	1
+
+#define SO_DEBUG	1
+#define SO_REUSEADDR	2
+#define SO_TYPE		3
+#define SO_ERROR	4
+#define SO_DONTROUTE	5
+#define SO_BROADCAST	6
+#define SO_SNDBUF	7
+#define SO_RCVBUF	8
+#define SO_SNDBUFFORCE	32
+#define SO_RCVBUFFORCE	33
+#define SO_KEEPALIVE	9
+#define SO_OOBINLINE	10
+#define SO_NO_CHECK	11
+#define SO_PRIORITY	12
+#define SO_LINGER	13
+#define SO_BSDCOMPAT	14
+/* To add :#define SO_REUSEPORT 15 */
+#define SO_PASSCRED	16
+#define SO_PEERCRED	17
+#define SO_RCVLOWAT	18
+#define SO_SNDLOWAT	19
+#define SO_RCVTIMEO	20
+#define SO_SNDTIMEO	21
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION		22
+#define SO_SECURITY_ENCRYPTION_TRANSPORT	23
+#define SO_SECURITY_ENCRYPTION_NETWORK		24
+
+#define SO_BINDTODEVICE	25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER        26
+#define SO_DETACH_FILTER        27
+
+#define SO_PEERNAME		28
+#define SO_TIMESTAMP		29
+#define SCM_TIMESTAMP		SO_TIMESTAMP
+
+#define SO_ACCEPTCONN		30
+
+#define SO_PEERSEC		31
+#define SO_PASSSEC		34
+
+#endif /* __ASM_AVR32_SOCKET_H */
diff --git a/include/asm-avr32/sockios.h b/include/asm-avr32/sockios.h
new file mode 100644
index 000000000000..84f3d65b3b3b
--- /dev/null
+++ b/include/asm-avr32/sockios.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_AVR32_SOCKIOS_H
+#define __ASM_AVR32_SOCKIOS_H
+
+/* Socket-level I/O control calls. */
+#define FIOSETOWN 	0x8901
+#define SIOCSPGRP	0x8902
+#define FIOGETOWN	0x8903
+#define SIOCGPGRP	0x8904
+#define SIOCATMARK	0x8905
+#define SIOCGSTAMP	0x8906		/* Get stamp */
+
+#endif /* __ASM_AVR32_SOCKIOS_H */
diff --git a/include/asm-avr32/stat.h b/include/asm-avr32/stat.h
new file mode 100644
index 000000000000..e72881e10230
--- /dev/null
+++ b/include/asm-avr32/stat.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_STAT_H
+#define __ASM_AVR32_STAT_H
+
+struct __old_kernel_stat {
+        unsigned short st_dev;
+        unsigned short st_ino;
+        unsigned short st_mode;
+        unsigned short st_nlink;
+        unsigned short st_uid;
+        unsigned short st_gid;
+        unsigned short st_rdev;
+        unsigned long  st_size;
+        unsigned long  st_atime;
+        unsigned long  st_mtime;
+        unsigned long  st_ctime;
+};
+
+struct stat {
+        unsigned long st_dev;
+        unsigned long st_ino;
+        unsigned short st_mode;
+        unsigned short st_nlink;
+        unsigned short st_uid;
+        unsigned short st_gid;
+        unsigned long  st_rdev;
+        unsigned long  st_size;
+        unsigned long  st_blksize;
+        unsigned long  st_blocks;
+        unsigned long  st_atime;
+        unsigned long  st_atime_nsec;
+        unsigned long  st_mtime;
+        unsigned long  st_mtime_nsec;
+        unsigned long  st_ctime;
+        unsigned long  st_ctime_nsec;
+        unsigned long  __unused4;
+        unsigned long  __unused5;
+};
+
+#define STAT_HAVE_NSEC 1
+
+struct stat64 {
+	unsigned long long st_dev;
+
+	unsigned long long st_ino;
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+
+	unsigned long	st_uid;
+	unsigned long	st_gid;
+
+	unsigned long long st_rdev;
+
+	long long	st_size;
+	unsigned long	__pad1;		/* align 64-bit st_blocks */
+	unsigned long	st_blksize;
+
+	unsigned long long st_blocks;	/* Number 512-byte blocks allocated. */
+
+	unsigned long	st_atime;
+	unsigned long	st_atime_nsec;
+
+	unsigned long	st_mtime;
+	unsigned long	st_mtime_nsec;
+
+	unsigned long	st_ctime;
+	unsigned long	st_ctime_nsec;
+
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+};
+
+#endif /* __ASM_AVR32_STAT_H */
diff --git a/include/asm-avr32/statfs.h b/include/asm-avr32/statfs.h
new file mode 100644
index 000000000000..2961bd18c50e
--- /dev/null
+++ b/include/asm-avr32/statfs.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_STATFS_H
+#define __ASM_AVR32_STATFS_H
+
+#include <asm-generic/statfs.h>
+
+#endif /* __ASM_AVR32_STATFS_H */
diff --git a/include/asm-avr32/string.h b/include/asm-avr32/string.h
new file mode 100644
index 000000000000..c91a623cd585
--- /dev/null
+++ b/include/asm-avr32/string.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_STRING_H
+#define __ASM_AVR32_STRING_H
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *b, int c, size_t len);
+
+#define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *to, const void *from, size_t len);
+
+#endif /* __ASM_AVR32_STRING_H */
diff --git a/include/asm-avr32/sysreg.h b/include/asm-avr32/sysreg.h
new file mode 100644
index 000000000000..f91975f330f6
--- /dev/null
+++ b/include/asm-avr32/sysreg.h
@@ -0,0 +1,332 @@
+/*
+ * AVR32 System Registers
+ *
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SYSREG_H__
+#define __ASM_AVR32_SYSREG_H__
+
+/* sysreg register offsets */
+#define SYSREG_SR                               0x0000
+#define SYSREG_EVBA                             0x0004
+#define SYSREG_ACBA                             0x0008
+#define SYSREG_CPUCR                            0x000c
+#define SYSREG_ECR                              0x0010
+#define SYSREG_RSR_SUP                          0x0014
+#define SYSREG_RSR_INT0                         0x0018
+#define SYSREG_RSR_INT1                         0x001c
+#define SYSREG_RSR_INT2                         0x0020
+#define SYSREG_RSR_INT3                         0x0024
+#define SYSREG_RSR_EX                           0x0028
+#define SYSREG_RSR_NMI                          0x002c
+#define SYSREG_RSR_DBG                          0x0030
+#define SYSREG_RAR_SUP                          0x0034
+#define SYSREG_RAR_INT0                         0x0038
+#define SYSREG_RAR_INT1                         0x003c
+#define SYSREG_RAR_INT2                         0x0040
+#define SYSREG_RAR_INT3                         0x0044
+#define SYSREG_RAR_EX                           0x0048
+#define SYSREG_RAR_NMI                          0x004c
+#define SYSREG_RAR_DBG                          0x0050
+#define SYSREG_JECR                             0x0054
+#define SYSREG_JOSP                             0x0058
+#define SYSREG_JAVA_LV0                         0x005c
+#define SYSREG_JAVA_LV1                         0x0060
+#define SYSREG_JAVA_LV2                         0x0064
+#define SYSREG_JAVA_LV3                         0x0068
+#define SYSREG_JAVA_LV4                         0x006c
+#define SYSREG_JAVA_LV5                         0x0070
+#define SYSREG_JAVA_LV6                         0x0074
+#define SYSREG_JAVA_LV7                         0x0078
+#define SYSREG_JTBA                             0x007c
+#define SYSREG_JBCR                             0x0080
+#define SYSREG_CONFIG0                          0x0100
+#define SYSREG_CONFIG1                          0x0104
+#define SYSREG_COUNT                            0x0108
+#define SYSREG_COMPARE                          0x010c
+#define SYSREG_TLBEHI                           0x0110
+#define SYSREG_TLBELO                           0x0114
+#define SYSREG_PTBR                             0x0118
+#define SYSREG_TLBEAR                           0x011c
+#define SYSREG_MMUCR                            0x0120
+#define SYSREG_TLBARLO                          0x0124
+#define SYSREG_TLBARHI                          0x0128
+#define SYSREG_PCCNT                            0x012c
+#define SYSREG_PCNT0                            0x0130
+#define SYSREG_PCNT1                            0x0134
+#define SYSREG_PCCR                             0x0138
+#define SYSREG_BEAR                             0x013c
+
+/* Bitfields in SR */
+#define SYSREG_SR_C_OFFSET                      0
+#define SYSREG_SR_C_SIZE                        1
+#define SYSREG_Z_OFFSET                         1
+#define SYSREG_Z_SIZE                           1
+#define SYSREG_SR_N_OFFSET                      2
+#define SYSREG_SR_N_SIZE                        1
+#define SYSREG_SR_V_OFFSET                      3
+#define SYSREG_SR_V_SIZE                        1
+#define SYSREG_Q_OFFSET                         4
+#define SYSREG_Q_SIZE                           1
+#define SYSREG_GM_OFFSET                        16
+#define SYSREG_GM_SIZE                          1
+#define SYSREG_I0M_OFFSET                       17
+#define SYSREG_I0M_SIZE                         1
+#define SYSREG_I1M_OFFSET                       18
+#define SYSREG_I1M_SIZE                         1
+#define SYSREG_I2M_OFFSET                       19
+#define SYSREG_I2M_SIZE                         1
+#define SYSREG_I3M_OFFSET                       20
+#define SYSREG_I3M_SIZE                         1
+#define SYSREG_EM_OFFSET                        21
+#define SYSREG_EM_SIZE                          1
+#define SYSREG_M0_OFFSET                        22
+#define SYSREG_M0_SIZE                          1
+#define SYSREG_M1_OFFSET                        23
+#define SYSREG_M1_SIZE                          1
+#define SYSREG_M2_OFFSET                        24
+#define SYSREG_M2_SIZE                          1
+#define SYSREG_SR_D_OFFSET                      26
+#define SYSREG_SR_D_SIZE                        1
+#define SYSREG_DM_OFFSET                        27
+#define SYSREG_DM_SIZE                          1
+#define SYSREG_SR_J_OFFSET                      28
+#define SYSREG_SR_J_SIZE                        1
+#define SYSREG_R_OFFSET                         29
+#define SYSREG_R_SIZE                           1
+#define SYSREG_H_OFFSET                         30
+#define SYSREG_H_SIZE                           1
+
+/* Bitfields in EVBA */
+
+/* Bitfields in ACBA */
+
+/* Bitfields in CPUCR */
+#define SYSREG_BI_OFFSET                        0
+#define SYSREG_BI_SIZE                          1
+#define SYSREG_BE_OFFSET                        1
+#define SYSREG_BE_SIZE                          1
+#define SYSREG_FE_OFFSET                        2
+#define SYSREG_FE_SIZE                          1
+#define SYSREG_RE_OFFSET                        3
+#define SYSREG_RE_SIZE                          1
+#define SYSREG_IBE_OFFSET                       4
+#define SYSREG_IBE_SIZE                         1
+#define SYSREG_IEE_OFFSET                       5
+#define SYSREG_IEE_SIZE                         1
+
+/* Bitfields in ECR */
+#define SYSREG_ECR_OFFSET                       0
+#define SYSREG_ECR_SIZE                         32
+
+/* Bitfields in RSR_SUP */
+
+/* Bitfields in RSR_INT0 */
+
+/* Bitfields in RSR_INT1 */
+
+/* Bitfields in RSR_INT2 */
+
+/* Bitfields in RSR_INT3 */
+
+/* Bitfields in RSR_EX */
+
+/* Bitfields in RSR_NMI */
+
+/* Bitfields in RSR_DBG */
+
+/* Bitfields in RAR_SUP */
+
+/* Bitfields in RAR_INT0 */
+
+/* Bitfields in RAR_INT1 */
+
+/* Bitfields in RAR_INT2 */
+
+/* Bitfields in RAR_INT3 */
+
+/* Bitfields in RAR_EX */
+
+/* Bitfields in RAR_NMI */
+
+/* Bitfields in RAR_DBG */
+
+/* Bitfields in JECR */
+
+/* Bitfields in JOSP */
+
+/* Bitfields in JAVA_LV0 */
+
+/* Bitfields in JAVA_LV1 */
+
+/* Bitfields in JAVA_LV2 */
+
+/* Bitfields in JAVA_LV3 */
+
+/* Bitfields in JAVA_LV4 */
+
+/* Bitfields in JAVA_LV5 */
+
+/* Bitfields in JAVA_LV6 */
+
+/* Bitfields in JAVA_LV7 */
+
+/* Bitfields in JTBA */
+
+/* Bitfields in JBCR */
+
+/* Bitfields in CONFIG0 */
+#define SYSREG_CONFIG0_D_OFFSET                 1
+#define SYSREG_CONFIG0_D_SIZE                   1
+#define SYSREG_CONFIG0_S_OFFSET                 2
+#define SYSREG_CONFIG0_S_SIZE                   1
+#define SYSREG_O_OFFSET                         3
+#define SYSREG_O_SIZE                           1
+#define SYSREG_P_OFFSET                         4
+#define SYSREG_P_SIZE                           1
+#define SYSREG_CONFIG0_J_OFFSET                 5
+#define SYSREG_CONFIG0_J_SIZE                   1
+#define SYSREG_F_OFFSET                         6
+#define SYSREG_F_SIZE                           1
+#define SYSREG_MMUT_OFFSET                      7
+#define SYSREG_MMUT_SIZE                        3
+#define SYSREG_AR_OFFSET                        10
+#define SYSREG_AR_SIZE                          3
+#define SYSREG_AT_OFFSET                        13
+#define SYSREG_AT_SIZE                          3
+#define SYSREG_PROCESSORREVISION_OFFSET         16
+#define SYSREG_PROCESSORREVISION_SIZE           8
+#define SYSREG_PROCESSORID_OFFSET               24
+#define SYSREG_PROCESSORID_SIZE                 8
+
+/* Bitfields in CONFIG1 */
+#define SYSREG_DASS_OFFSET                      0
+#define SYSREG_DASS_SIZE                        3
+#define SYSREG_DLSZ_OFFSET                      3
+#define SYSREG_DLSZ_SIZE                        3
+#define SYSREG_DSET_OFFSET                      6
+#define SYSREG_DSET_SIZE                        4
+#define SYSREG_IASS_OFFSET                      10
+#define SYSREG_IASS_SIZE                        2
+#define SYSREG_ILSZ_OFFSET                      13
+#define SYSREG_ILSZ_SIZE                        3
+#define SYSREG_ISET_OFFSET                      16
+#define SYSREG_ISET_SIZE                        4
+#define SYSREG_DMMUSZ_OFFSET                    20
+#define SYSREG_DMMUSZ_SIZE                      6
+#define SYSREG_IMMUSZ_OFFSET                    26
+#define SYSREG_IMMUSZ_SIZE                      6
+
+/* Bitfields in COUNT */
+
+/* Bitfields in COMPARE */
+
+/* Bitfields in TLBEHI */
+#define SYSREG_ASID_OFFSET                      0
+#define SYSREG_ASID_SIZE                        8
+#define SYSREG_TLBEHI_I_OFFSET                  8
+#define SYSREG_TLBEHI_I_SIZE                    1
+#define SYSREG_TLBEHI_V_OFFSET                  9
+#define SYSREG_TLBEHI_V_SIZE                    1
+#define SYSREG_VPN_OFFSET                       10
+#define SYSREG_VPN_SIZE                         22
+
+/* Bitfields in TLBELO */
+#define SYSREG_W_OFFSET                         0
+#define SYSREG_W_SIZE                           1
+#define SYSREG_TLBELO_D_OFFSET                  1
+#define SYSREG_TLBELO_D_SIZE                    1
+#define SYSREG_SZ_OFFSET                        2
+#define SYSREG_SZ_SIZE                          2
+#define SYSREG_AP_OFFSET                        4
+#define SYSREG_AP_SIZE                          3
+#define SYSREG_B_OFFSET                         7
+#define SYSREG_B_SIZE                           1
+#define SYSREG_G_OFFSET                         8
+#define SYSREG_G_SIZE                           1
+#define SYSREG_TLBELO_C_OFFSET                  9
+#define SYSREG_TLBELO_C_SIZE                    1
+#define SYSREG_PFN_OFFSET                       10
+#define SYSREG_PFN_SIZE                         22
+
+/* Bitfields in PTBR */
+
+/* Bitfields in TLBEAR */
+
+/* Bitfields in MMUCR */
+#define SYSREG_E_OFFSET                         0
+#define SYSREG_E_SIZE                           1
+#define SYSREG_M_OFFSET                         1
+#define SYSREG_M_SIZE                           1
+#define SYSREG_MMUCR_I_OFFSET                   2
+#define SYSREG_MMUCR_I_SIZE                     1
+#define SYSREG_MMUCR_N_OFFSET                   3
+#define SYSREG_MMUCR_N_SIZE                     1
+#define SYSREG_MMUCR_S_OFFSET                   4
+#define SYSREG_MMUCR_S_SIZE                     1
+#define SYSREG_DLA_OFFSET                       8
+#define SYSREG_DLA_SIZE                         6
+#define SYSREG_DRP_OFFSET                       14
+#define SYSREG_DRP_SIZE                         6
+#define SYSREG_ILA_OFFSET                       20
+#define SYSREG_ILA_SIZE                         6
+#define SYSREG_IRP_OFFSET                       26
+#define SYSREG_IRP_SIZE                         6
+
+/* Bitfields in TLBARLO */
+
+/* Bitfields in TLBARHI */
+
+/* Bitfields in PCCNT */
+
+/* Bitfields in PCNT0 */
+
+/* Bitfields in PCNT1 */
+
+/* Bitfields in PCCR */
+
+/* Bitfields in BEAR */
+
+/* Constants for ECR */
+#define ECR_UNRECOVERABLE                       0
+#define ECR_TLB_MULTIPLE                        1
+#define ECR_BUS_ERROR_WRITE                     2
+#define ECR_BUS_ERROR_READ                      3
+#define ECR_NMI                                 4
+#define ECR_ADDR_ALIGN_X                        5
+#define ECR_PROTECTION_X                        6
+#define ECR_DEBUG                               7
+#define ECR_ILLEGAL_OPCODE                      8
+#define ECR_UNIMPL_INSTRUCTION                  9
+#define ECR_PRIVILEGE_VIOLATION                 10
+#define ECR_FPE                                 11
+#define ECR_COPROC_ABSENT                       12
+#define ECR_ADDR_ALIGN_R                        13
+#define ECR_ADDR_ALIGN_W                        14
+#define ECR_PROTECTION_R                        15
+#define ECR_PROTECTION_W                        16
+#define ECR_DTLB_MODIFIED                       17
+#define ECR_TLB_MISS_X                          20
+#define ECR_TLB_MISS_R                          24
+#define ECR_TLB_MISS_W                          28
+
+/* Bit manipulation macros */
+#define SYSREG_BIT(name)                        (1 << SYSREG_##name##_OFFSET)
+#define SYSREG_BF(name,value)                   (((value) & ((1 << SYSREG_##name##_SIZE) - 1)) << SYSREG_##name##_OFFSET)
+#define SYSREG_BFEXT(name,value)                (((value) >> SYSREG_##name##_OFFSET) & ((1 << SYSREG_##name##_SIZE) - 1))
+#define SYSREG_BFINS(name,value,old)            (((old) & ~(((1 << SYSREG_##name##_SIZE) - 1) << SYSREG_##name##_OFFSET)) | SYSREG_BF(name,value))
+
+#ifdef __CHECKER__
+extern unsigned long __builtin_mfsr(unsigned long reg);
+extern void __builtin_mtsr(unsigned long reg, unsigned long value);
+#endif
+
+/* Register access macros */
+#define sysreg_read(reg)                        __builtin_mfsr(SYSREG_##reg)
+#define sysreg_write(reg, value)                __builtin_mtsr(SYSREG_##reg, value)
+
+#endif /* __ASM_AVR32_SYSREG_H__ */
diff --git a/include/asm-avr32/system.h b/include/asm-avr32/system.h
new file mode 100644
index 000000000000..ac596058697d
--- /dev/null
+++ b/include/asm-avr32/system.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_SYSTEM_H
+#define __ASM_AVR32_SYSTEM_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+
+#define xchg(ptr,x) \
+	((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+
+#define nop() asm volatile("nop")
+
+#define mb()			asm volatile("" : : : "memory")
+#define rmb()			mb()
+#define wmb()			asm volatile("sync 0" : : : "memory")
+#define read_barrier_depends()  do { } while(0)
+#define set_mb(var, value)      do { var = value; mb(); } while(0)
+
+/*
+ * Help PathFinder and other Nexus-compliant debuggers keep track of
+ * the current PID by emitting an Ownership Trace Message each time we
+ * switch task.
+ */
+#ifdef CONFIG_OWNERSHIP_TRACE
+#include <asm/ocd.h>
+#define finish_arch_switch(prev)			\
+	do {						\
+		__mtdr(DBGREG_PID, prev->pid);		\
+		__mtdr(DBGREG_PID, current->pid);	\
+	} while(0)
+#endif
+
+/*
+ * switch_to(prev, next, last) should switch from task `prev' to task
+ * `next'. `prev' will never be the same as `next'.
+ *
+ * We just delegate everything to the __switch_to assembly function,
+ * which is implemented in arch/avr32/kernel/switch_to.S
+ *
+ * mb() tells GCC not to cache `current' across this call.
+ */
+struct cpu_context;
+struct task_struct;
+extern struct task_struct *__switch_to(struct task_struct *,
+				       struct cpu_context *,
+				       struct cpu_context *);
+#define switch_to(prev, next, last)					\
+	do {								\
+		last = __switch_to(prev, &prev->thread.cpu_context + 1,	\
+				   &next->thread.cpu_context);		\
+	} while (0)
+
+#ifdef CONFIG_SMP
+# error "The AVR32 port does not support SMP"
+#else
+# define smp_mb()		barrier()
+# define smp_rmb()		barrier()
+# define smp_wmb()		barrier()
+# define smp_read_barrier_depends() do { } while(0)
+#endif
+
+#include <linux/irqflags.h>
+
+extern void __xchg_called_with_bad_pointer(void);
+
+#ifdef __CHECKER__
+extern unsigned long __builtin_xchg(void *ptr, unsigned long x);
+#endif
+
+#define xchg_u32(val, m) __builtin_xchg((void *)m, val)
+
+static inline unsigned long __xchg(unsigned long x,
+				       volatile void *ptr,
+				       int size)
+{
+	switch(size) {
+	case 4:
+		return xchg_u32(x, ptr);
+	default:
+		__xchg_called_with_bad_pointer();
+		return x;
+	}
+}
+
+static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+					  unsigned long new)
+{
+	__u32 ret;
+
+	asm volatile(
+		"1:	ssrf	5\n"
+		"	ld.w	%[ret], %[m]\n"
+		"	cp.w	%[ret], %[old]\n"
+		"	brne	2f\n"
+		"	stcond	%[m], %[new]\n"
+		"	brne	1b\n"
+		"2:\n"
+		: [ret] "=&r"(ret), [m] "=m"(*m)
+		: "m"(m), [old] "ir"(old), [new] "r"(new)
+		: "memory", "cc");
+	return ret;
+}
+
+extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels(
+        volatile int * m, unsigned long old, unsigned long new);
+#define __cmpxchg_u64 __cmpxchg_u64_unsupported_on_32bit_kernels
+
+/* This function doesn't exist, so you'll get a linker error
+   if something tries to do an invalid cmpxchg().  */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+#define __HAVE_ARCH_CMPXCHG 1
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+				      unsigned long new, int size)
+{
+	switch (size) {
+	case 4:
+		return __cmpxchg_u32(ptr, old, new);
+	case 8:
+		return __cmpxchg_u64(ptr, old, new);
+	}
+
+	__cmpxchg_called_with_bad_pointer();
+	return old;
+}
+
+#define cmpxchg(ptr, old, new)					\
+	((typeof(*(ptr)))__cmpxchg((ptr), (unsigned long)(old),	\
+				   (unsigned long)(new),	\
+				   sizeof(*(ptr))))
+
+struct pt_regs;
+extern void __die(const char *, struct pt_regs *, unsigned long,
+		  const char *, const char *, unsigned long);
+extern void __die_if_kernel(const char *, struct pt_regs *, unsigned long,
+			    const char *, const char *, unsigned long);
+
+#define die(msg, regs, err)					\
+	__die(msg, regs, err, __FILE__ ":", __FUNCTION__, __LINE__)
+#define die_if_kernel(msg, regs, err)					\
+	__die_if_kernel(msg, regs, err, __FILE__ ":", __FUNCTION__, __LINE__)
+
+#define arch_align_stack(x)	(x)
+
+#endif /* __ASM_AVR32_SYSTEM_H */
diff --git a/include/asm-avr32/termbits.h b/include/asm-avr32/termbits.h
new file mode 100644
index 000000000000..9dc6eacafa33
--- /dev/null
+++ b/include/asm-avr32/termbits.h
@@ -0,0 +1,173 @@
+#ifndef __ASM_AVR32_TERMBITS_H
+#define __ASM_AVR32_TERMBITS_H
+
+#include <linux/posix_types.h>
+
+typedef unsigned char	cc_t;
+typedef unsigned int	speed_t;
+typedef unsigned int	tcflag_t;
+
+#define NCCS 19
+struct termios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+};
+
+/* c_cc characters */
+#define VINTR 0
+#define VQUIT 1
+#define VERASE 2
+#define VKILL 3
+#define VEOF 4
+#define VTIME 5
+#define VMIN 6
+#define VSWTC 7
+#define VSTART 8
+#define VSTOP 9
+#define VSUSP 10
+#define VEOL 11
+#define VREPRINT 12
+#define VDISCARD 13
+#define VWERASE 14
+#define VLNEXT 15
+#define VEOL2 16
+
+/* c_iflag bits */
+#define IGNBRK	0000001
+#define BRKINT	0000002
+#define IGNPAR	0000004
+#define PARMRK	0000010
+#define INPCK	0000020
+#define ISTRIP	0000040
+#define INLCR	0000100
+#define IGNCR	0000200
+#define ICRNL	0000400
+#define IUCLC	0001000
+#define IXON	0002000
+#define IXANY	0004000
+#define IXOFF	0010000
+#define IMAXBEL	0020000
+#define IUTF8	0040000
+
+/* c_oflag bits */
+#define OPOST	0000001
+#define OLCUC	0000002
+#define ONLCR	0000004
+#define OCRNL	0000010
+#define ONOCR	0000020
+#define ONLRET	0000040
+#define OFILL	0000100
+#define OFDEL	0000200
+#define NLDLY	0000400
+#define   NL0	0000000
+#define   NL1	0000400
+#define CRDLY	0003000
+#define   CR0	0000000
+#define   CR1	0001000
+#define   CR2	0002000
+#define   CR3	0003000
+#define TABDLY	0014000
+#define   TAB0	0000000
+#define   TAB1	0004000
+#define   TAB2	0010000
+#define   TAB3	0014000
+#define   XTABS	0014000
+#define BSDLY	0020000
+#define   BS0	0000000
+#define   BS1	0020000
+#define VTDLY	0040000
+#define   VT0	0000000
+#define   VT1	0040000
+#define FFDLY	0100000
+#define   FF0	0000000
+#define   FF1	0100000
+
+/* c_cflag bit meaning */
+#define CBAUD	0010017
+#define  B0	0000000		/* hang up */
+#define  B50	0000001
+#define  B75	0000002
+#define  B110	0000003
+#define  B134	0000004
+#define  B150	0000005
+#define  B200	0000006
+#define  B300	0000007
+#define  B600	0000010
+#define  B1200	0000011
+#define  B1800	0000012
+#define  B2400	0000013
+#define  B4800	0000014
+#define  B9600	0000015
+#define  B19200	0000016
+#define  B38400	0000017
+#define EXTA B19200
+#define EXTB B38400
+#define CSIZE	0000060
+#define   CS5	0000000
+#define   CS6	0000020
+#define   CS7	0000040
+#define   CS8	0000060
+#define CSTOPB	0000100
+#define CREAD	0000200
+#define PARENB	0000400
+#define PARODD	0001000
+#define HUPCL	0002000
+#define CLOCAL	0004000
+#define CBAUDEX 0010000
+#define    B57600 0010001
+#define   B115200 0010002
+#define   B230400 0010003
+#define   B460800 0010004
+#define   B500000 0010005
+#define   B576000 0010006
+#define   B921600 0010007
+#define  B1000000 0010010
+#define  B1152000 0010011
+#define  B1500000 0010012
+#define  B2000000 0010013
+#define  B2500000 0010014
+#define  B3000000 0010015
+#define  B3500000 0010016
+#define  B4000000 0010017
+#define CIBAUD	  002003600000	/* input baud rate (not used) */
+#define CMSPAR	  010000000000		/* mark or space (stick) parity */
+#define CRTSCTS	  020000000000		/* flow control */
+
+/* c_lflag bits */
+#define ISIG	0000001
+#define ICANON	0000002
+#define XCASE	0000004
+#define ECHO	0000010
+#define ECHOE	0000020
+#define ECHOK	0000040
+#define ECHONL	0000100
+#define NOFLSH	0000200
+#define TOSTOP	0000400
+#define ECHOCTL	0001000
+#define ECHOPRT	0002000
+#define ECHOKE	0004000
+#define FLUSHO	0010000
+#define PENDIN	0040000
+#define IEXTEN	0100000
+
+/* tcflow() and TCXONC use these */
+#define	TCOOFF		0
+#define	TCOON		1
+#define	TCIOFF		2
+#define	TCION		3
+
+/* tcflush() and TCFLSH use these */
+#define	TCIFLUSH	0
+#define	TCOFLUSH	1
+#define	TCIOFLUSH	2
+
+/* tcsetattr uses these */
+#define	TCSANOW		0
+#define	TCSADRAIN	1
+#define	TCSAFLUSH	2
+
+#endif /* __ASM_AVR32_TERMBITS_H */
diff --git a/include/asm-avr32/termios.h b/include/asm-avr32/termios.h
new file mode 100644
index 000000000000..615bc0639e5c
--- /dev/null
+++ b/include/asm-avr32/termios.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TERMIOS_H
+#define __ASM_AVR32_TERMIOS_H
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+/* modem lines */
+#define TIOCM_LE	0x001
+#define TIOCM_DTR	0x002
+#define TIOCM_RTS	0x004
+#define TIOCM_ST	0x008
+#define TIOCM_SR	0x010
+#define TIOCM_CTS	0x020
+#define TIOCM_CAR	0x040
+#define TIOCM_RNG	0x080
+#define TIOCM_DSR	0x100
+#define TIOCM_CD	TIOCM_CAR
+#define TIOCM_RI	TIOCM_RNG
+#define TIOCM_OUT1	0x2000
+#define TIOCM_OUT2	0x4000
+#define TIOCM_LOOP	0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+/* line disciplines */
+#define N_TTY		0
+#define N_SLIP		1
+#define N_MOUSE		2
+#define N_PPP		3
+#define N_STRIP		4
+#define N_AX25		5
+#define N_X25		6	/* X.25 async */
+#define N_6PACK		7
+#define N_MASC		8	/* Reserved for Mobitex module <kaz@cafe.net> */
+#define N_R3964		9	/* Reserved for Simatic R3964 module */
+#define N_PROFIBUS_FDL	10	/* Reserved for Profibus <Dave@mvhi.com> */
+#define N_IRDA		11	/* Linux IR - http://irda.sourceforge.net/ */
+#define N_SMSBLOCK	12	/* SMS block mode - for talking to GSM data cards about SMS messages */
+#define N_HDLC		13	/* synchronous HDLC */
+#define N_SYNC_PPP	14	/* synchronous PPP */
+#define N_HCI		15  /* Bluetooth HCI UART */
+
+#ifdef __KERNEL__
+/*	intr=^C		quit=^\		erase=del	kill=^U
+	eof=^D		vtime=\0	vmin=\1		sxtc=\0
+	start=^Q	stop=^S		susp=^Z		eol=\0
+	reprint=^R	discard=^U	werase=^W	lnext=^V
+	eol2=\0
+*/
+#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
+
+#include <asm-generic/termios.h>
+
+#endif	/* __KERNEL__ */
+
+#endif	/* __ASM_AVR32_TERMIOS_H */
diff --git a/include/asm-avr32/thread_info.h b/include/asm-avr32/thread_info.h
new file mode 100644
index 000000000000..d1f5b35ebd54
--- /dev/null
+++ b/include/asm-avr32/thread_info.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_THREAD_INFO_H
+#define __ASM_AVR32_THREAD_INFO_H
+
+#include <asm/page.h>
+
+#define THREAD_SIZE_ORDER	1
+#define THREAD_SIZE		(PAGE_SIZE << THREAD_SIZE_ORDER)
+
+#ifndef __ASSEMBLY__
+#include <asm/types.h>
+
+struct task_struct;
+struct exec_domain;
+
+struct thread_info {
+	struct task_struct	*task;		/* main task structure */
+	struct exec_domain	*exec_domain;	/* execution domain */
+	unsigned long		flags;		/* low level flags */
+	__u32			cpu;
+	__s32			preempt_count;	/* 0 => preemptable, <0 => BUG */
+	struct restart_block	restart_block;
+	__u8			supervisor_stack[0];
+};
+
+#define INIT_THREAD_INFO(tsk)						\
+{									\
+	.task		= &tsk,						\
+	.exec_domain	= &default_exec_domain,				\
+	.flags		= 0,						\
+	.cpu		= 0,						\
+	.preempt_count	= 1,						\
+	.restart_block	= {						\
+		.fn	= do_no_restart_syscall				\
+	}								\
+}
+
+#define init_thread_info	(init_thread_union.thread_info)
+#define init_stack		(init_thread_union.stack)
+
+/*
+ * Get the thread information struct from C.
+ * We do the usual trick and use the lower end of the stack for this
+ */
+static inline struct thread_info *current_thread_info(void)
+{
+	unsigned long addr = ~(THREAD_SIZE - 1);
+
+	asm("and %0, sp" : "=r"(addr) : "0"(addr));
+	return (struct thread_info *)addr;
+}
+
+/* thread information allocation */
+#define alloc_thread_info(ti) \
+	((struct thread_info *) __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
+#define free_thread_info(ti) free_pages((unsigned long)(ti), 1)
+#define get_thread_info(ti) get_task_struct((ti)->task)
+#define put_thread_info(ti) put_task_struct((ti)->task)
+
+#endif /* !__ASSEMBLY__ */
+
+#define PREEMPT_ACTIVE		0x40000000
+
+/*
+ * Thread information flags
+ * - these are process state flags that various assembly files may need to access
+ * - pending work-to-be-done flags are in LSW
+ * - other flags in MSW
+ */
+#define TIF_SYSCALL_TRACE       0       /* syscall trace active */
+#define TIF_NOTIFY_RESUME       1       /* resumption notification requested */
+#define TIF_SIGPENDING          2       /* signal pending */
+#define TIF_NEED_RESCHED        3       /* rescheduling necessary */
+#define TIF_POLLING_NRFLAG      4       /* true if poll_idle() is polling
+					   TIF_NEED_RESCHED */
+#define TIF_BREAKPOINT		5	/* true if we should break after return */
+#define TIF_SINGLE_STEP		6	/* single step after next break */
+#define TIF_MEMDIE		7
+#define TIF_RESTORE_SIGMASK	8	/* restore signal mask in do_signal */
+#define TIF_USERSPACE		31      /* true if FS sets userspace */
+
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
+#define _TIF_BREAKPOINT		(1 << TIF_BREAKPOINT)
+#define _TIF_SINGLE_STEP	(1 << TIF_SINGLE_STEP)
+#define _TIF_MEMDIE		(1 << TIF_MEMDIE)
+#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
+
+/* XXX: These two masks must never span more than 16 bits! */
+/* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK		0x0000013e
+/* work to do on any return to userspace */
+#define _TIF_ALLWORK_MASK	0x0000013f
+/* work to do on return from debug mode */
+#define _TIF_DBGWORK_MASK	0x0000017e
+
+#endif /* __ASM_AVR32_THREAD_INFO_H */
diff --git a/include/asm-avr32/timex.h b/include/asm-avr32/timex.h
new file mode 100644
index 000000000000..5e44ecb3ce0c
--- /dev/null
+++ b/include/asm-avr32/timex.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TIMEX_H
+#define __ASM_AVR32_TIMEX_H
+
+/*
+ * This is the frequency of the timer used for Linux's timer interrupt.
+ * The value should be defined as accurate as possible or under certain
+ * circumstances Linux timekeeping might become inaccurate or fail.
+ *
+ * For many system the exact clockrate of the timer isn't known but due to
+ * the way this value is used we can get away with a wrong value as long
+ * as this value is:
+ *
+ *  - a multiple of HZ
+ *  - a divisor of the actual rate
+ *
+ * 500000 is a good such cheat value.
+ *
+ * The obscure number 1193182 is the same as used by the original i8254
+ * time in legacy PC hardware; the chip is never found in AVR32 systems.
+ */
+#define CLOCK_TICK_RATE		500000	/* Underlying HZ */
+
+typedef unsigned long cycles_t;
+
+static inline cycles_t get_cycles (void)
+{
+	return 0;
+}
+
+extern int read_current_timer(unsigned long *timer_value);
+#define ARCH_HAS_READ_CURRENT_TIMER	1
+
+#endif /* __ASM_AVR32_TIMEX_H */
diff --git a/include/asm-avr32/tlb.h b/include/asm-avr32/tlb.h
new file mode 100644
index 000000000000..5c55f9ce7c7d
--- /dev/null
+++ b/include/asm-avr32/tlb.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TLB_H
+#define __ASM_AVR32_TLB_H
+
+#define tlb_start_vma(tlb, vma) \
+	flush_cache_range(vma, vma->vm_start, vma->vm_end)
+
+#define tlb_end_vma(tlb, vma) \
+	flush_tlb_range(vma, vma->vm_start, vma->vm_end)
+
+#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while(0)
+
+/*
+ * Flush whole TLB for MM
+ */
+#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+
+#include <asm-generic/tlb.h>
+
+/*
+ * For debugging purposes
+ */
+extern void show_dtlb_entry(unsigned int index);
+extern void dump_dtlb(void);
+
+#endif /* __ASM_AVR32_TLB_H */
diff --git a/include/asm-avr32/tlbflush.h b/include/asm-avr32/tlbflush.h
new file mode 100644
index 000000000000..730e268f81f3
--- /dev/null
+++ b/include/asm-avr32/tlbflush.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TLBFLUSH_H
+#define __ASM_AVR32_TLBFLUSH_H
+
+#include <asm/mmu.h>
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes' TLB entries
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLBs
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ */
+extern void flush_tlb(void);
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			    unsigned long end);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
+extern void __flush_tlb_page(unsigned long asid, unsigned long page);
+
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+				      unsigned long start, unsigned long end)
+{
+	/* Nothing to do */
+}
+
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+
+#endif /* __ASM_AVR32_TLBFLUSH_H */
diff --git a/include/asm-avr32/topology.h b/include/asm-avr32/topology.h
new file mode 100644
index 000000000000..5b766cbb4806
--- /dev/null
+++ b/include/asm-avr32/topology.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_AVR32_TOPOLOGY_H
+#define __ASM_AVR32_TOPOLOGY_H
+
+#include <asm-generic/topology.h>
+
+#endif /* __ASM_AVR32_TOPOLOGY_H */
diff --git a/include/asm-avr32/traps.h b/include/asm-avr32/traps.h
new file mode 100644
index 000000000000..6a8fb944f414
--- /dev/null
+++ b/include/asm-avr32/traps.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TRAPS_H
+#define __ASM_AVR32_TRAPS_H
+
+#include <linux/list.h>
+
+struct undef_hook {
+	struct list_head node;
+	u32 insn_mask;
+	u32 insn_val;
+	int (*fn)(struct pt_regs *regs, u32 insn);
+};
+
+void register_undef_hook(struct undef_hook *hook);
+void unregister_undef_hook(struct undef_hook *hook);
+
+#endif /* __ASM_AVR32_TRAPS_H */
diff --git a/include/asm-avr32/types.h b/include/asm-avr32/types.h
new file mode 100644
index 000000000000..3f47db9675af
--- /dev/null
+++ b/include/asm-avr32/types.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_TYPES_H
+#define __ASM_AVR32_TYPES_H
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned short umode_t;
+
+/*
+ * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
+ * header files exported to user space
+ */
+typedef __signed__ char __s8;
+typedef unsigned char __u8;
+
+typedef __signed__ short __s16;
+typedef unsigned short __u16;
+
+typedef __signed__ int __s32;
+typedef unsigned int __u32;
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+typedef __signed__ long long __s64;
+typedef unsigned long long __u64;
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * These aren't exported outside the kernel to avoid name space clashes
+ */
+#ifdef __KERNEL__
+
+#define BITS_PER_LONG 32
+
+#ifndef __ASSEMBLY__
+
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long long s64;
+typedef unsigned long long u64;
+
+/* Dma addresses are 32-bits wide.  */
+
+typedef u32 dma_addr_t;
+
+#ifdef CONFIG_LBD
+typedef u64 sector_t;
+#define HAVE_SECTOR_T
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+
+#endif /* __ASM_AVR32_TYPES_H */
diff --git a/include/asm-avr32/uaccess.h b/include/asm-avr32/uaccess.h
new file mode 100644
index 000000000000..821deb5a9d28
--- /dev/null
+++ b/include/asm-avr32/uaccess.h
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_UACCESS_H
+#define __ASM_AVR32_UACCESS_H
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+
+#define VERIFY_READ	0
+#define VERIFY_WRITE	1
+
+typedef struct {
+	unsigned int is_user_space;
+} mm_segment_t;
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons (Data Segment Register?), these macros are misnamed.
+ */
+#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
+#define segment_eq(a,b)	((a).is_user_space == (b).is_user_space)
+
+#define USER_ADDR_LIMIT 0x80000000
+
+#define KERNEL_DS	MAKE_MM_SEG(0)
+#define USER_DS		MAKE_MM_SEG(1)
+
+#define get_ds()	(KERNEL_DS)
+
+static inline mm_segment_t get_fs(void)
+{
+	return MAKE_MM_SEG(test_thread_flag(TIF_USERSPACE));
+}
+
+static inline void set_fs(mm_segment_t s)
+{
+	if (s.is_user_space)
+		set_thread_flag(TIF_USERSPACE);
+	else
+		clear_thread_flag(TIF_USERSPACE);
+}
+
+/*
+ * Test whether a block of memory is a valid user space address.
+ * Returns 0 if the range is valid, nonzero otherwise.
+ *
+ * We do the following checks:
+ *   1. Is the access from kernel space?
+ *   2. Does (addr + size) set the carry bit?
+ *   3. Is (addr + size) a negative number (i.e. >= 0x80000000)?
+ *
+ * If yes on the first check, access is granted.
+ * If no on any of the others, access is denied.
+ */
+#define __range_ok(addr, size)						\
+	(test_thread_flag(TIF_USERSPACE)				\
+	 && (((unsigned long)(addr) >= 0x80000000)			\
+	     || ((unsigned long)(size) > 0x80000000)			\
+	     || (((unsigned long)(addr) + (unsigned long)(size)) > 0x80000000)))
+
+#define access_ok(type, addr, size) (likely(__range_ok(addr, size) == 0))
+
+static inline int
+verify_area(int type, const void __user *addr, unsigned long size)
+{
+	return access_ok(type, addr, size) ? 0 : -EFAULT;
+}
+
+/* Generic arbitrary sized copy. Return the number of bytes NOT copied */
+extern __kernel_size_t __copy_user(void *to, const void *from,
+				   __kernel_size_t n);
+
+extern __kernel_size_t copy_to_user(void __user *to, const void *from,
+				    __kernel_size_t n);
+extern __kernel_size_t copy_from_user(void *to, const void __user *from,
+				      __kernel_size_t n);
+
+static inline __kernel_size_t __copy_to_user(void __user *to, const void *from,
+					     __kernel_size_t n)
+{
+	return __copy_user((void __force *)to, from, n);
+}
+static inline __kernel_size_t __copy_from_user(void *to,
+					       const void __user *from,
+					       __kernel_size_t n)
+{
+	return __copy_user(to, (const void __force *)from, n);
+}
+
+#define __copy_to_user_inatomic __copy_to_user
+#define __copy_from_user_inatomic __copy_from_user
+
+/*
+ * put_user: - Write a simple value into user space.
+ * @x:   Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ */
+#define put_user(x,ptr)	\
+	__put_user_check((x),(ptr),sizeof(*(ptr)))
+
+/*
+ * get_user: - Get a simple variable from user space.
+ * @x:   Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define get_user(x,ptr) \
+	__get_user_check((x),(ptr),sizeof(*(ptr)))
+
+/*
+ * __put_user: - Write a simple value into user space, with less checking.
+ * @x:   Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ */
+#define __put_user(x,ptr) \
+	__put_user_nocheck((x),(ptr),sizeof(*(ptr)))
+
+/*
+ * __get_user: - Get a simple variable from user space, with less checking.
+ * @x:   Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define __get_user(x,ptr) \
+	__get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+
+extern int __get_user_bad(void);
+extern int __put_user_bad(void);
+
+#define __get_user_nocheck(x, ptr, size)				\
+({									\
+	typeof(*(ptr)) __gu_val = (typeof(*(ptr)) __force)0;		\
+	int __gu_err = 0;						\
+									\
+	switch (size) {							\
+	case 1: __get_user_asm("ub", __gu_val, ptr, __gu_err); break;	\
+	case 2: __get_user_asm("uh", __gu_val, ptr, __gu_err); break;	\
+	case 4: __get_user_asm("w", __gu_val, ptr, __gu_err); break;	\
+	case 8: __get_user_asm("d", __gu_val, ptr, __gu_err); break;	\
+	default: __gu_err = __get_user_bad(); break;			\
+	}								\
+									\
+	x = __gu_val;							\
+	__gu_err;							\
+})
+
+#define __get_user_check(x, ptr, size)					\
+({									\
+	typeof(*(ptr)) __gu_val = (typeof(*(ptr)) __force)0;		\
+	const typeof(*(ptr)) __user * __gu_addr = (ptr);		\
+	int __gu_err = 0;						\
+									\
+	if (access_ok(VERIFY_READ, __gu_addr, size)) {			\
+		switch (size) {						\
+		case 1:							\
+			__get_user_asm("ub", __gu_val, __gu_addr,	\
+				       __gu_err);			\
+			break;						\
+		case 2:							\
+			__get_user_asm("uh", __gu_val, __gu_addr,	\
+				       __gu_err);			\
+			break;						\
+		case 4:							\
+			__get_user_asm("w", __gu_val, __gu_addr,	\
+				       __gu_err);			\
+			break;						\
+		case 8:							\
+			__get_user_asm("d", __gu_val, __gu_addr,	\
+				       __gu_err);			\
+			break;						\
+		default:						\
+			__gu_err = __get_user_bad();			\
+			break;						\
+		}							\
+	} else {							\
+		__gu_err = -EFAULT;					\
+	}								\
+	x = __gu_val;							\
+	__gu_err;							\
+})
+
+#define __get_user_asm(suffix, __gu_val, ptr, __gu_err)			\
+	asm volatile(							\
+		"1:	ld." suffix "	%1, %3			\n"	\
+		"2:						\n"	\
+		"	.section .fixup, \"ax\"			\n"	\
+		"3:	mov	%0, %4				\n"	\
+		"	rjmp	2b				\n"	\
+		"	.previous				\n"	\
+		"	.section __ex_table, \"a\"		\n"	\
+		"	.long	1b, 3b				\n"	\
+		"	.previous				\n"	\
+		: "=r"(__gu_err), "=r"(__gu_val)			\
+		: "0"(__gu_err), "m"(*(ptr)), "i"(-EFAULT))
+
+#define __put_user_nocheck(x, ptr, size)				\
+({									\
+	typeof(*(ptr)) __pu_val;					\
+	int __pu_err = 0;						\
+									\
+	__pu_val = (x);							\
+	switch (size) {							\
+	case 1: __put_user_asm("b", ptr, __pu_val, __pu_err); break;	\
+	case 2: __put_user_asm("h", ptr, __pu_val, __pu_err); break;	\
+	case 4: __put_user_asm("w", ptr, __pu_val, __pu_err); break;	\
+	case 8: __put_user_asm("d", ptr, __pu_val, __pu_err); break;	\
+	default: __pu_err = __put_user_bad(); break;			\
+	}								\
+	__pu_err;							\
+})
+
+#define __put_user_check(x, ptr, size)					\
+({									\
+	typeof(*(ptr)) __pu_val;					\
+	typeof(*(ptr)) __user *__pu_addr = (ptr);			\
+	int __pu_err = 0;						\
+									\
+	__pu_val = (x);							\
+	if (access_ok(VERIFY_WRITE, __pu_addr, size)) {			\
+		switch (size) {						\
+		case 1:							\
+			__put_user_asm("b", __pu_addr, __pu_val,	\
+				       __pu_err);			\
+			break;						\
+		case 2:							\
+			__put_user_asm("h", __pu_addr, __pu_val,	\
+				       __pu_err);			\
+			break;						\
+		case 4:							\
+			__put_user_asm("w", __pu_addr, __pu_val,	\
+				       __pu_err);			\
+			break;						\
+		case 8:							\
+			__put_user_asm("d", __pu_addr, __pu_val,		\
+				       __pu_err);			\
+			break;						\
+		default:						\
+			__pu_err = __put_user_bad();			\
+			break;						\
+		}							\
+	} else {							\
+		__pu_err = -EFAULT;					\
+	}								\
+	__pu_err;							\
+})
+
+#define __put_user_asm(suffix, ptr, __pu_val, __gu_err)			\
+	asm volatile(							\
+		"1:	st." suffix "	%1, %3			\n"	\
+		"2:						\n"	\
+		"	.section .fixup, \"ax\"			\n"	\
+		"3:	mov	%0, %4				\n"	\
+		"	rjmp	2b				\n"	\
+		"	.previous				\n"	\
+		"	.section __ex_table, \"a\"		\n"	\
+		"	.long	1b, 3b				\n"	\
+		"	.previous				\n"	\
+		: "=r"(__gu_err), "=m"(*(ptr))				\
+		: "0"(__gu_err), "r"(__pu_val), "i"(-EFAULT))
+
+extern __kernel_size_t clear_user(void __user *addr, __kernel_size_t size);
+extern __kernel_size_t __clear_user(void __user *addr, __kernel_size_t size);
+
+extern long strncpy_from_user(char *dst, const char __user *src, long count);
+extern long __strncpy_from_user(char *dst, const char __user *src, long count);
+
+extern long strnlen_user(const char __user *__s, long __n);
+extern long __strnlen_user(const char __user *__s, long __n);
+
+#define strlen_user(s) strnlen_user(s, ~0UL >> 1)
+
+struct exception_table_entry
+{
+	unsigned long insn, fixup;
+};
+
+#endif /* __ASM_AVR32_UACCESS_H */
diff --git a/include/asm-avr32/ucontext.h b/include/asm-avr32/ucontext.h
new file mode 100644
index 000000000000..ac7259c2a799
--- /dev/null
+++ b/include/asm-avr32/ucontext.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_AVR32_UCONTEXT_H
+#define __ASM_AVR32_UCONTEXT_H
+
+struct ucontext {
+	unsigned long		uc_flags;
+	struct ucontext	*	uc_link;
+	stack_t			uc_stack;
+	struct sigcontext	uc_mcontext;
+	sigset_t		uc_sigmask;
+};
+
+#endif /* __ASM_AVR32_UCONTEXT_H */
diff --git a/include/asm-avr32/unaligned.h b/include/asm-avr32/unaligned.h
new file mode 100644
index 000000000000..3042723fcbfd
--- /dev/null
+++ b/include/asm-avr32/unaligned.h
@@ -0,0 +1,25 @@
+#ifndef __ASM_AVR32_UNALIGNED_H
+#define __ASM_AVR32_UNALIGNED_H
+
+/*
+ * AVR32 can handle some unaligned accesses, depending on the
+ * implementation.  The AVR32 AP implementation can handle unaligned
+ * words, but halfwords must be halfword-aligned, and doublewords must
+ * be word-aligned.
+ *
+ * TODO: Make all this CPU-specific and optimize.
+ */
+
+#include <linux/string.h>
+
+/* Use memmove here, so gcc does not insert a __builtin_memcpy. */
+
+#define get_unaligned(ptr) \
+  ({ __typeof__(*(ptr)) __tmp; memmove(&__tmp, (ptr), sizeof(*(ptr))); __tmp; })
+
+#define put_unaligned(val, ptr)				\
+  ({ __typeof__(*(ptr)) __tmp = (val);			\
+     memmove((ptr), &__tmp, sizeof(*(ptr)));		\
+     (void)0; })
+
+#endif /* __ASM_AVR32_UNALIGNED_H */
diff --git a/include/asm-avr32/unistd.h b/include/asm-avr32/unistd.h
new file mode 100644
index 000000000000..1f528f92690d
--- /dev/null
+++ b/include/asm-avr32/unistd.h
@@ -0,0 +1,387 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_UNISTD_H
+#define __ASM_AVR32_UNISTD_H
+
+/*
+ * This file contains the system call numbers.
+ */
+
+#define __NR_restart_syscall      0
+#define __NR_exit		  1
+#define __NR_fork		  2
+#define __NR_read		  3
+#define __NR_write		  4
+#define __NR_open		  5
+#define __NR_close		  6
+#define __NR_umask		  7
+#define __NR_creat		  8
+#define __NR_link		  9
+#define __NR_unlink		 10
+#define __NR_execve		 11
+#define __NR_chdir		 12
+#define __NR_time		 13
+#define __NR_mknod		 14
+#define __NR_chmod		 15
+#define __NR_chown		 16
+#define __NR_lchown		 17
+#define __NR_lseek		 18
+#define __NR__llseek		 19
+#define __NR_getpid		 20
+#define __NR_mount		 21
+#define __NR_umount2		 22
+#define __NR_setuid		 23
+#define __NR_getuid		 24
+#define __NR_stime		 25
+#define __NR_ptrace		 26
+#define __NR_alarm		 27
+#define __NR_pause		 28
+#define __NR_utime		 29
+#define __NR_stat		 30
+#define __NR_fstat		 31
+#define __NR_lstat		 32
+#define __NR_access		 33
+#define __NR_chroot		 34
+#define __NR_sync		 35
+#define __NR_fsync		 36
+#define __NR_kill		 37
+#define __NR_rename		 38
+#define __NR_mkdir		 39
+#define __NR_rmdir		 40
+#define __NR_dup		 41
+#define __NR_pipe		 42
+#define __NR_times		 43
+#define __NR_clone		 44
+#define __NR_brk		 45
+#define __NR_setgid		 46
+#define __NR_getgid		 47
+#define __NR_getcwd		 48
+#define __NR_geteuid		 49
+#define __NR_getegid		 50
+#define __NR_acct		 51
+#define __NR_setfsuid		 52
+#define __NR_setfsgid		 53
+#define __NR_ioctl		 54
+#define __NR_fcntl		 55
+#define __NR_setpgid		 56
+#define __NR_mremap		 57
+#define __NR_setresuid		 58
+#define __NR_getresuid		 59
+#define __NR_setreuid		 60
+#define __NR_setregid		 61
+#define __NR_ustat		 62
+#define __NR_dup2		 63
+#define __NR_getppid		 64
+#define __NR_getpgrp		 65
+#define __NR_setsid		 66
+#define __NR_rt_sigaction	 67
+#define __NR_rt_sigreturn	 68
+#define __NR_rt_sigprocmask	 69
+#define __NR_rt_sigpending	 70
+#define __NR_rt_sigtimedwait	 71
+#define __NR_rt_sigqueueinfo	 72
+#define __NR_rt_sigsuspend	 73
+#define __NR_sethostname	 74
+#define __NR_setrlimit		 75
+#define __NR_getrlimit		 76	/* SuS compliant getrlimit */
+#define __NR_getrusage		 77
+#define __NR_gettimeofday	 78
+#define __NR_settimeofday	 79
+#define __NR_getgroups		 80
+#define __NR_setgroups		 81
+#define __NR_select		 82
+#define __NR_symlink		 83
+#define __NR_fchdir		 84
+#define __NR_readlink		 85
+#define __NR_pread		 86
+#define __NR_pwrite		 87
+#define __NR_swapon		 88
+#define __NR_reboot		 89
+#define __NR_mmap2		 90
+#define __NR_munmap		 91
+#define __NR_truncate		 92
+#define __NR_ftruncate		 93
+#define __NR_fchmod		 94
+#define __NR_fchown		 95
+#define __NR_getpriority	 96
+#define __NR_setpriority	 97
+#define __NR_wait4		 98
+#define __NR_statfs		 99
+#define __NR_fstatfs		100
+#define __NR_vhangup		101
+#define __NR_sigaltstack	102
+#define __NR_syslog		103
+#define __NR_setitimer		104
+#define __NR_getitimer		105
+#define __NR_swapoff		106
+#define __NR_sysinfo		107
+#define __NR_ipc		108
+#define __NR_sendfile		109
+#define __NR_setdomainname	110
+#define __NR_uname		111
+#define __NR_adjtimex		112
+#define __NR_mprotect		113
+#define __NR_vfork		114
+#define __NR_init_module	115
+#define __NR_delete_module	116
+#define __NR_quotactl		117
+#define __NR_getpgid		118
+#define __NR_bdflush		119
+#define __NR_sysfs		120
+#define __NR_personality	121
+#define __NR_afs_syscall	122 /* Syscall for Andrew File System */
+#define __NR_getdents		123
+#define __NR_flock		124
+#define __NR_msync		125
+#define __NR_readv		126
+#define __NR_writev		127
+#define __NR_getsid		128
+#define __NR_fdatasync		129
+#define __NR__sysctl		130
+#define __NR_mlock		131
+#define __NR_munlock		132
+#define __NR_mlockall		133
+#define __NR_munlockall		134
+#define __NR_sched_setparam		135
+#define __NR_sched_getparam		136
+#define __NR_sched_setscheduler		137
+#define __NR_sched_getscheduler		138
+#define __NR_sched_yield		139
+#define __NR_sched_get_priority_max	140
+#define __NR_sched_get_priority_min	141
+#define __NR_sched_rr_get_interval	142
+#define __NR_nanosleep		143
+#define __NR_poll		144
+#define __NR_nfsservctl		145
+#define __NR_setresgid		146
+#define __NR_getresgid		147
+#define __NR_prctl              148
+#define __NR_socket		149
+#define __NR_bind		150
+#define __NR_connect		151
+#define __NR_listen		152
+#define __NR_accept		153
+#define __NR_getsockname	154
+#define __NR_getpeername	155
+#define __NR_socketpair		156
+#define __NR_send		157
+#define __NR_recv		158
+#define __NR_sendto		159
+#define __NR_recvfrom		160
+#define __NR_shutdown		161
+#define __NR_setsockopt		162
+#define __NR_getsockopt		163
+#define __NR_sendmsg		164
+#define __NR_recvmsg		165
+#define __NR_truncate64		166
+#define __NR_ftruncate64	167
+#define __NR_stat64		168
+#define __NR_lstat64		169
+#define __NR_fstat64		170
+#define __NR_pivot_root		171
+#define __NR_mincore		172
+#define __NR_madvise		173
+#define __NR_getdents64		174
+#define __NR_fcntl64		175
+#define __NR_gettid		176
+#define __NR_readahead		177
+#define __NR_setxattr		178
+#define __NR_lsetxattr		179
+#define __NR_fsetxattr		180
+#define __NR_getxattr		181
+#define __NR_lgetxattr		182
+#define __NR_fgetxattr		183
+#define __NR_listxattr		184
+#define __NR_llistxattr		185
+#define __NR_flistxattr		186
+#define __NR_removexattr	187
+#define __NR_lremovexattr	188
+#define __NR_fremovexattr	189
+#define __NR_tkill		190
+#define __NR_sendfile64		191
+#define __NR_futex		192
+#define __NR_sched_setaffinity	193
+#define __NR_sched_getaffinity	194
+#define __NR_capget		195
+#define __NR_capset		196
+#define __NR_io_setup		197
+#define __NR_io_destroy		198
+#define __NR_io_getevents	199
+#define __NR_io_submit		200
+#define __NR_io_cancel		201
+#define __NR_fadvise64		202
+#define __NR_exit_group		203
+#define __NR_lookup_dcookie	204
+#define __NR_epoll_create	205
+#define __NR_epoll_ctl		206
+#define __NR_epoll_wait		207
+#define __NR_remap_file_pages	208
+#define __NR_set_tid_address	209
+
+#define __NR_timer_create	210
+#define __NR_timer_settime	211
+#define __NR_timer_gettime	212
+#define __NR_timer_getoverrun	213
+#define __NR_timer_delete	214
+#define __NR_clock_settime	215
+#define __NR_clock_gettime	216
+#define __NR_clock_getres	217
+#define __NR_clock_nanosleep	218
+#define __NR_statfs64		219
+#define __NR_fstatfs64		220
+#define __NR_tgkill		221
+				/* 222 reserved for tux */
+#define __NR_utimes		223
+#define __NR_fadvise64_64	224
+
+#define __NR_cacheflush		225
+
+#define __NR_vserver		226
+#define __NR_mq_open		227
+#define __NR_mq_unlink		228
+#define __NR_mq_timedsend	229
+#define __NR_mq_timedreceive	230
+#define __NR_mq_notify		231
+#define __NR_mq_getsetattr	232
+#define __NR_kexec_load		233
+#define __NR_waitid		234
+#define __NR_add_key		235
+#define __NR_request_key	236
+#define __NR_keyctl		237
+#define __NR_ioprio_set		238
+#define __NR_ioprio_get		239
+#define __NR_inotify_init	240
+#define __NR_inotify_add_watch	241
+#define __NR_inotify_rm_watch	242
+#define __NR_openat		243
+#define __NR_mkdirat		244
+#define __NR_mknodat		245
+#define __NR_fchownat		246
+#define __NR_futimesat		247
+#define __NR_fstatat64		248
+#define __NR_unlinkat		249
+#define __NR_renameat		250
+#define __NR_linkat		251
+#define __NR_symlinkat		252
+#define __NR_readlinkat		253
+#define __NR_fchmodat		254
+#define __NR_faccessat		255
+#define __NR_pselect6		256
+#define __NR_ppoll		257
+#define __NR_unshare		258
+#define __NR_set_robust_list	259
+#define __NR_get_robust_list	260
+#define __NR_splice		261
+#define __NR_sync_file_range	262
+#define __NR_tee		263
+#define __NR_vmsplice		264
+
+#define NR_syscalls		265
+
+
+/*
+ * AVR32 calling convention for system calls:
+ *   - System call number in r8
+ *   - Parameters in r12 and downwards to r9 as well as r6 and r5.
+ *   - Return value in r12
+ */
+
+/*
+ * user-visible error numbers are in the range -1 - -124: see
+ * <asm-generic/errno.h>
+ */
+
+#define __syscall_return(type, res) do {				\
+		if ((unsigned long)(res) >= (unsigned long)(-125)) {	\
+			errno = -(res);					\
+			res = -1;					\
+		}							\
+		return (type) (res);					\
+	} while (0)
+
+#ifdef __KERNEL__
+#define __ARCH_WANT_IPC_PARSE_VERSION
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SYS_ALARM
+#define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_PAUSE
+#define __ARCH_WANT_SYS_TIME
+#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_WAITPID
+#define __ARCH_WANT_SYS_FADVISE64
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_LLSEEK
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_RT_SIGACTION
+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#endif
+
+#if defined(__KERNEL_SYSCALLS__) || defined(__CHECKER__)
+
+#include <linux/types.h>
+#include <linux/linkage.h>
+#include <asm/signal.h>
+
+struct pt_regs;
+
+/*
+ * we need this inline - forking from kernel space will result
+ * in NO COPY ON WRITE (!!!), until an execve is executed. This
+ * is no problem, but for the stack. This is handled by not letting
+ * main() use the stack at all after fork(). Thus, no function
+ * calls - which means inline code for fork too, as otherwise we
+ * would use the stack upon exit from 'fork()'.
+ *
+ * Actually only pause and fork are needed inline, so that there
+ * won't be any messing with the stack from main(), but we define
+ * some others too.
+ */
+static inline int execve(const char *file, char **argv, char **envp)
+{
+	register long scno asm("r8") = __NR_execve;
+	register long sc1 asm("r12") = (long)file;
+	register long sc2 asm("r11") = (long)argv;
+	register long sc3 asm("r10") = (long)envp;
+	int res;
+
+	asm volatile("scall"
+		     : "=r"(sc1)
+		     : "r"(scno), "0"(sc1), "r"(sc2), "r"(sc3)
+		     : "lr", "memory");
+	res = sc1;
+	__syscall_return(int, res);
+}
+
+asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);
+asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+			       struct pt_regs *regs);
+asmlinkage int sys_rt_sigreturn(struct pt_regs *regs);
+asmlinkage int sys_pipe(unsigned long __user *filedes);
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+			  unsigned long prot, unsigned long flags,
+			  unsigned long fd, off_t offset);
+asmlinkage int sys_cacheflush(int operation, void __user *addr, size_t len);
+asmlinkage int sys_fork(struct pt_regs *regs);
+asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
+			 unsigned long parent_tidptr,
+			 unsigned long child_tidptr, struct pt_regs *regs);
+asmlinkage int sys_vfork(struct pt_regs *regs);
+asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv,
+			  char __user *__user *uenvp, struct pt_regs *regs);
+
+#endif
+
+/*
+ * "Conditional" syscalls
+ *
+ * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
+ * but it doesn't work on all toolchains, so we just do it by hand
+ */
+#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall");
+
+#endif /* __ASM_AVR32_UNISTD_H */
diff --git a/include/asm-avr32/user.h b/include/asm-avr32/user.h
new file mode 100644
index 000000000000..060fb3acee49
--- /dev/null
+++ b/include/asm-avr32/user.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Note: We may not need these definitions for AVR32, as we don't
+ * support a.out.
+ */
+#ifndef __ASM_AVR32_USER_H
+#define __ASM_AVR32_USER_H
+
+#include <linux/types.h>
+#include <asm/ptrace.h>
+#include <asm/page.h>
+
+/*
+ * Core file format: The core file is written in such a way that gdb
+ * can understand it and provide useful information to the user (under
+ * linux we use the `trad-core' bfd).  The file contents are as follows:
+ *
+ *  upage: 1 page consisting of a user struct that tells gdb
+ *	what is present in the file.  Directly after this is a
+ *	copy of the task_struct, which is currently not used by gdb,
+ *	but it may come in handy at some point.  All of the registers
+ *	are stored as part of the upage.  The upage should always be
+ *	only one page long.
+ *  data: The data segment follows next.  We use current->end_text to
+ *	current->brk to pick up all of the user variables, plus any memory
+ *	that may have been sbrk'ed.  No attempt is made to determine if a
+ *	page is demand-zero or if a page is totally unused, we just cover
+ *	the entire range.  All of the addresses are rounded in such a way
+ *	that an integral number of pages is written.
+ *  stack: We need the stack information in order to get a meaningful
+ *	backtrace.  We need to write the data from usp to
+ *	current->start_stack, so we round each of these in order to be able
+ *	to write an integer number of pages.
+ */
+
+struct user_fpu_struct {
+	/* We have no FPU (yet) */
+};
+
+struct user {
+	struct pt_regs	regs;			/* entire machine state */
+	size_t		u_tsize;		/* text size (pages) */
+	size_t		u_dsize;		/* data size (pages) */
+	size_t		u_ssize;		/* stack size (pages) */
+	unsigned long	start_code;		/* text starting address */
+	unsigned long	start_data;		/* data starting address */
+	unsigned long	start_stack;		/* stack starting address */
+	long int	signal;			/* signal causing core dump */
+	struct regs *	u_ar0;			/* help gdb find registers */
+	unsigned long	magic;			/* identifies a core file */
+	char		u_comm[32];		/* user command name */
+};
+
+#define NBPG			PAGE_SIZE
+#define UPAGES			1
+#define HOST_TEXT_START_ADDR	(u.start_code)
+#define HOST_DATA_START_ADDR	(u.start_data)
+#define HOST_STACK_END_ADDR	(u.start_stack + u.u_ssize * NBPG)
+
+#endif /* __ASM_AVR32_USER_H */
diff --git a/include/asm-cris/pgtable.h b/include/asm-cris/pgtable.h
index 5d76c1c0d6c9..c94a7107019c 100644
--- a/include/asm-cris/pgtable.h
+++ b/include/asm-cris/pgtable.h
@@ -253,7 +253,7 @@ static inline void pmd_set(pmd_t * pmdp, pte_t * ptep)
 { pmd_val(*pmdp) = _PAGE_TABLE | (unsigned long) ptep; }
 
 #define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
-#define pmd_page_kernel(pmd)	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+#define pmd_page_vaddr(pmd)	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
 
 /* to find an entry in a page-table-directory. */
 #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
@@ -271,7 +271,7 @@ static inline pgd_t * pgd_offset(struct mm_struct * mm, unsigned long address)
 #define __pte_offset(address) \
 	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_offset_kernel(dir, address) \
-	((pte_t *) pmd_page_kernel(*(dir)) +  __pte_offset(address))
+	((pte_t *) pmd_page_vaddr(*(dir)) +  __pte_offset(address))
 #define pte_offset_map(dir, address) \
 	((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address))
 #define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
diff --git a/include/asm-frv/bitops.h b/include/asm-frv/bitops.h
index 980ae1b0cd28..1f70d47148bd 100644
--- a/include/asm-frv/bitops.h
+++ b/include/asm-frv/bitops.h
@@ -157,23 +157,105 @@ static inline int __test_bit(int nr, const volatile void * addr)
  __constant_test_bit((nr),(addr)) : \
  __test_bit((nr),(addr)))
 
-#include <asm-generic/bitops/ffs.h>
-#include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/find.h>
 
-/*
- * fls: find last bit set.
+/**
+ * fls - find last bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs:
+ * - return 32..1 to indicate bit 31..0 most significant bit set
+ * - return 0 to indicate no bits set
  */
 #define fls(x)						\
 ({							\
 	int bit;					\
 							\
-	asm("scan %1,gr0,%0" : "=r"(bit) : "r"(x));	\
+	asm("	subcc	%1,gr0,gr0,icc0		\n"	\
+	    "	ckne	icc0,cc4		\n"	\
+	    "	cscan.p	%1,gr0,%0	,cc4,#1	\n"	\
+	    "	csub	%0,%0,%0	,cc4,#0	\n"	\
+	    "   csub    %2,%0,%0	,cc4,#1	\n"	\
+	    : "=&r"(bit)				\
+	    : "r"(x), "r"(32)				\
+	    : "icc0", "cc4"				\
+	    );						\
 							\
-	bit ? 33 - bit : bit;				\
+	bit;						\
 })
 
-#include <asm-generic/bitops/fls64.h>
+/**
+ * fls64 - find last bit set in a 64-bit value
+ * @n: the value to search
+ *
+ * This is defined the same way as ffs:
+ * - return 64..1 to indicate bit 63..0 most significant bit set
+ * - return 0 to indicate no bits set
+ */
+static inline __attribute__((const))
+int fls64(u64 n)
+{
+	union {
+		u64 ll;
+		struct { u32 h, l; };
+	} _;
+	int bit, x, y;
+
+	_.ll = n;
+
+	asm("	subcc.p		%3,gr0,gr0,icc0		\n"
+	    "	subcc		%4,gr0,gr0,icc1		\n"
+	    "	ckne		icc0,cc4		\n"
+	    "	ckne		icc1,cc5		\n"
+	    "	norcr		cc4,cc5,cc6		\n"
+	    "	csub.p		%0,%0,%0	,cc6,1	\n"
+	    "	orcr		cc5,cc4,cc4		\n"
+	    "	andcr		cc4,cc5,cc4		\n"
+	    "	cscan.p		%3,gr0,%0	,cc4,0	\n"
+	    "   setlos		#64,%1			\n"
+	    "	cscan.p		%4,gr0,%0	,cc4,1	\n"
+	    "   setlos		#32,%2			\n"
+	    "	csub.p		%1,%0,%0	,cc4,0	\n"
+	    "	csub		%2,%0,%0	,cc4,1	\n"
+	    : "=&r"(bit), "=r"(x), "=r"(y)
+	    : "0r"(_.h), "r"(_.l)
+	    : "icc0", "icc1", "cc4", "cc5", "cc6"
+	    );
+	return bit;
+
+}
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * - return 32..1 to indicate bit 31..0 most least significant bit set
+ * - return 0 to indicate no bits set
+ */
+static inline __attribute__((const))
+int ffs(int x)
+{
+	/* Note: (x & -x) gives us a mask that is the least significant
+	 * (rightmost) 1-bit of the value in x.
+	 */
+	return fls(x & -x);
+}
+
+/**
+ * __ffs - find first bit set
+ * @x: the word to search
+ *
+ * - return 31..0 to indicate bit 31..0 most least significant bit set
+ * - if no bits are set in x, the result is undefined
+ */
+static inline __attribute__((const))
+int __ffs(unsigned long x)
+{
+	int bit;
+	asm("scan %1,gr0,%0" : "=r"(bit) : "r"(x & -x));
+	return 31 - bit;
+}
+
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/hweight.h>
 
diff --git a/include/asm-frv/cpu-irqs.h b/include/asm-frv/cpu-irqs.h
index 5cd691e1f8c4..478f3498fcfe 100644
--- a/include/asm-frv/cpu-irqs.h
+++ b/include/asm-frv/cpu-irqs.h
@@ -14,36 +14,6 @@
 
 #ifndef __ASSEMBLY__
 
-#include <asm/irq-routing.h>
-
-#define IRQ_BASE_CPU		(NR_IRQ_ACTIONS_PER_GROUP * 0)
-
-/* IRQ IDs presented to drivers */
-enum {
-	IRQ_CPU__UNUSED = IRQ_BASE_CPU,
-	IRQ_CPU_UART0,
-	IRQ_CPU_UART1,
-	IRQ_CPU_TIMER0,
-	IRQ_CPU_TIMER1,
-	IRQ_CPU_TIMER2,
-	IRQ_CPU_DMA0,
-	IRQ_CPU_DMA1,
-	IRQ_CPU_DMA2,
-	IRQ_CPU_DMA3,
-	IRQ_CPU_DMA4,
-	IRQ_CPU_DMA5,
-	IRQ_CPU_DMA6,
-	IRQ_CPU_DMA7,
-	IRQ_CPU_EXTERNAL0,
-	IRQ_CPU_EXTERNAL1,
-	IRQ_CPU_EXTERNAL2,
-	IRQ_CPU_EXTERNAL3,
-	IRQ_CPU_EXTERNAL4,
-	IRQ_CPU_EXTERNAL5,
-	IRQ_CPU_EXTERNAL6,
-	IRQ_CPU_EXTERNAL7,
-};
-
 /* IRQ to level mappings */
 #define IRQ_GDBSTUB_LEVEL	15
 #define IRQ_UART_LEVEL		13
@@ -82,6 +52,30 @@ enum {
 #define IRQ_XIRQ6_LEVEL		7
 #define IRQ_XIRQ7_LEVEL		8
 
+/* IRQ IDs presented to drivers */
+#define IRQ_CPU__UNUSED		IRQ_BASE_CPU
+#define IRQ_CPU_UART0		(IRQ_BASE_CPU + IRQ_UART0_LEVEL)
+#define IRQ_CPU_UART1		(IRQ_BASE_CPU + IRQ_UART1_LEVEL)
+#define IRQ_CPU_TIMER0		(IRQ_BASE_CPU + IRQ_TIMER0_LEVEL)
+#define IRQ_CPU_TIMER1		(IRQ_BASE_CPU + IRQ_TIMER1_LEVEL)
+#define IRQ_CPU_TIMER2		(IRQ_BASE_CPU + IRQ_TIMER2_LEVEL)
+#define IRQ_CPU_DMA0		(IRQ_BASE_CPU + IRQ_DMA0_LEVEL)
+#define IRQ_CPU_DMA1		(IRQ_BASE_CPU + IRQ_DMA1_LEVEL)
+#define IRQ_CPU_DMA2		(IRQ_BASE_CPU + IRQ_DMA2_LEVEL)
+#define IRQ_CPU_DMA3		(IRQ_BASE_CPU + IRQ_DMA3_LEVEL)
+#define IRQ_CPU_DMA4		(IRQ_BASE_CPU + IRQ_DMA4_LEVEL)
+#define IRQ_CPU_DMA5		(IRQ_BASE_CPU + IRQ_DMA5_LEVEL)
+#define IRQ_CPU_DMA6		(IRQ_BASE_CPU + IRQ_DMA6_LEVEL)
+#define IRQ_CPU_DMA7		(IRQ_BASE_CPU + IRQ_DMA7_LEVEL)
+#define IRQ_CPU_EXTERNAL0	(IRQ_BASE_CPU + IRQ_XIRQ0_LEVEL)
+#define IRQ_CPU_EXTERNAL1	(IRQ_BASE_CPU + IRQ_XIRQ1_LEVEL)
+#define IRQ_CPU_EXTERNAL2	(IRQ_BASE_CPU + IRQ_XIRQ2_LEVEL)
+#define IRQ_CPU_EXTERNAL3	(IRQ_BASE_CPU + IRQ_XIRQ3_LEVEL)
+#define IRQ_CPU_EXTERNAL4	(IRQ_BASE_CPU + IRQ_XIRQ4_LEVEL)
+#define IRQ_CPU_EXTERNAL5	(IRQ_BASE_CPU + IRQ_XIRQ5_LEVEL)
+#define IRQ_CPU_EXTERNAL6	(IRQ_BASE_CPU + IRQ_XIRQ6_LEVEL)
+#define IRQ_CPU_EXTERNAL7	(IRQ_BASE_CPU + IRQ_XIRQ7_LEVEL)
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_CPU_IRQS_H */
diff --git a/include/asm-frv/hardirq.h b/include/asm-frv/hardirq.h
index 7581b5a7559a..fc47515822a2 100644
--- a/include/asm-frv/hardirq.h
+++ b/include/asm-frv/hardirq.h
@@ -26,5 +26,10 @@ typedef struct {
 #error SMP not available on FR-V
 #endif /* CONFIG_SMP */
 
+extern atomic_t irq_err_count;
+static inline void ack_bad_irq(int irq)
+{
+	atomic_inc(&irq_err_count);
+}
 
 #endif
diff --git a/include/asm-frv/irq-routing.h b/include/asm-frv/irq-routing.h
deleted file mode 100644
index ac3ab900a1dc..000000000000
--- a/include/asm-frv/irq-routing.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/* irq-routing.h: multiplexed IRQ routing
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _ASM_IRQ_ROUTING_H
-#define _ASM_IRQ_ROUTING_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/spinlock.h>
-#include <asm/irq.h>
-
-struct irq_source;
-struct irq_level;
-
-/*
- * IRQ action distribution sets
- */
-struct irq_group {
-	int			first_irq;	/* first IRQ distributed here */
-	void (*control)(struct irq_group *group, int index, int on);
-
-	struct irqaction	*actions[NR_IRQ_ACTIONS_PER_GROUP];	/* IRQ action chains */
-	struct irq_source	*sources[NR_IRQ_ACTIONS_PER_GROUP];	/* IRQ sources */
-	int			disable_cnt[NR_IRQ_ACTIONS_PER_GROUP];	/* disable counts */
-};
-
-/*
- * IRQ source manager
- */
-struct irq_source {
-	struct irq_source	*next;
-	struct irq_level	*level;
-	const char		*muxname;
-	volatile void __iomem	*muxdata;
-	unsigned long		irqmask;
-
-	void (*doirq)(struct irq_source *source);
-};
-
-/*
- * IRQ level management (per CPU IRQ priority / entry vector)
- */
-struct irq_level {
-	int			usage;
-	int			disable_count;
-	unsigned long		flags;		/* current IRQF_DISABLED and IRQF_SHARED settings */
-	spinlock_t		lock;
-	struct irq_source	*sources;
-};
-
-extern struct irq_level frv_irq_levels[16];
-extern struct irq_group *irq_groups[NR_IRQ_GROUPS];
-
-extern void frv_irq_route(struct irq_source *source, int irqlevel);
-extern void frv_irq_route_external(struct irq_source *source, int irq);
-extern void frv_irq_set_group(struct irq_group *group);
-extern void distribute_irqs(struct irq_group *group, unsigned long irqmask);
-extern void route_cpu_irqs(void);
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* _ASM_IRQ_ROUTING_H */
diff --git a/include/asm-frv/irq.h b/include/asm-frv/irq.h
index 58b619215a50..8fefd6b827aa 100644
--- a/include/asm-frv/irq.h
+++ b/include/asm-frv/irq.h
@@ -1,6 +1,6 @@
 /* irq.h: FRV IRQ definitions
  *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -12,32 +12,22 @@
 #ifndef _ASM_IRQ_H_
 #define _ASM_IRQ_H_
 
-
-/*
- * the system has an on-CPU PIC and another PIC on the FPGA and other PICs on other peripherals,
- * so we do some routing in irq-routing.[ch] to reduce the number of false-positives seen by
- * drivers
- */
-
 /* this number is used when no interrupt has been assigned */
 #define NO_IRQ				(-1)
 
-#define NR_IRQ_LOG2_ACTIONS_PER_GROUP	5
-#define NR_IRQ_ACTIONS_PER_GROUP	(1 << NR_IRQ_LOG2_ACTIONS_PER_GROUP)
-#define NR_IRQ_GROUPS			4
-#define NR_IRQS				(NR_IRQ_ACTIONS_PER_GROUP * NR_IRQ_GROUPS)
+#define NR_IRQS				48
+#define IRQ_BASE_CPU			(0 * 16)
+#define IRQ_BASE_FPGA			(1 * 16)
+#define IRQ_BASE_MB93493		(2 * 16)
 
 /* probe returns a 32-bit IRQ mask:-/ */
-#define MIN_PROBE_IRQ	(NR_IRQS - 32)
+#define MIN_PROBE_IRQ			(NR_IRQS - 32)
 
+#ifndef __ASSEMBLY__
 static inline int irq_canonicalize(int irq)
 {
 	return irq;
 }
-
-extern void disable_irq_nosync(unsigned int irq);
-extern void disable_irq(unsigned int irq);
-extern void enable_irq(unsigned int irq);
-
+#endif
 
 #endif /* _ASM_IRQ_H_ */
diff --git a/include/asm-frv/mb93091-fpga-irqs.h b/include/asm-frv/mb93091-fpga-irqs.h
index 341bfc52a0eb..19778c5ba9d6 100644
--- a/include/asm-frv/mb93091-fpga-irqs.h
+++ b/include/asm-frv/mb93091-fpga-irqs.h
@@ -12,11 +12,9 @@
 #ifndef _ASM_MB93091_FPGA_IRQS_H
 #define _ASM_MB93091_FPGA_IRQS_H
 
-#ifndef __ASSEMBLY__
-
-#include <asm/irq-routing.h>
+#include <asm/irq.h>
 
-#define IRQ_BASE_FPGA		(NR_IRQ_ACTIONS_PER_GROUP * 1)
+#ifndef __ASSEMBLY__
 
 /* IRQ IDs presented to drivers */
 enum {
diff --git a/include/asm-frv/mb93093-fpga-irqs.h b/include/asm-frv/mb93093-fpga-irqs.h
index 1e0f11c2fcdb..590266b1a6d3 100644
--- a/include/asm-frv/mb93093-fpga-irqs.h
+++ b/include/asm-frv/mb93093-fpga-irqs.h
@@ -12,11 +12,9 @@
 #ifndef _ASM_MB93093_FPGA_IRQS_H
 #define _ASM_MB93093_FPGA_IRQS_H
 
-#ifndef __ASSEMBLY__
-
-#include <asm/irq-routing.h>
+#include <asm/irq.h>
 
-#define IRQ_BASE_FPGA		(NR_IRQ_ACTIONS_PER_GROUP * 1)
+#ifndef __ASSEMBLY__
 
 /* IRQ IDs presented to drivers */
 enum {
diff --git a/include/asm-frv/mb93493-irqs.h b/include/asm-frv/mb93493-irqs.h
index 15096e731325..82c7aeddd333 100644
--- a/include/asm-frv/mb93493-irqs.h
+++ b/include/asm-frv/mb93493-irqs.h
@@ -12,11 +12,9 @@
 #ifndef _ASM_MB93493_IRQS_H
 #define _ASM_MB93493_IRQS_H
 
-#ifndef __ASSEMBLY__
-
-#include <asm/irq-routing.h>
+#include <asm/irq.h>
 
-#define IRQ_BASE_MB93493	(NR_IRQ_ACTIONS_PER_GROUP * 2)
+#ifndef __ASSEMBLY__
 
 /* IRQ IDs presented to drivers */
 enum {
diff --git a/include/asm-frv/mb93493-regs.h b/include/asm-frv/mb93493-regs.h
index c54aa9d14468..8a1f6aac8cf1 100644
--- a/include/asm-frv/mb93493-regs.h
+++ b/include/asm-frv/mb93493-regs.h
@@ -15,6 +15,7 @@
 #include <asm/mb-regs.h>
 #include <asm/mb93493-irqs.h>
 
+#define __addr_MB93493(X)	((volatile unsigned long *)(__region_CS3 + (X)))
 #define __get_MB93493(X)	({ *(volatile unsigned long *)(__region_CS3 + (X)); })
 
 #define __set_MB93493(X,V)						\
@@ -26,6 +27,7 @@ do {									\
 #define __set_MB93493_STSR(X,V)	__set_MB93493(0x3c0 + (X) * 4, (V))
 #define MB93493_STSR_EN
 
+#define __addr_MB93493_IQSR(X)	__addr_MB93493(0x3d0 + (X) * 4)
 #define __get_MB93493_IQSR(X)	__get_MB93493(0x3d0 + (X) * 4)
 #define __set_MB93493_IQSR(X,V)	__set_MB93493(0x3d0 + (X) * 4, (V))
 
diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h
index 7af7485e889e..2fb3c6f05e03 100644
--- a/include/asm-frv/pgtable.h
+++ b/include/asm-frv/pgtable.h
@@ -217,7 +217,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
 }
 
 #define pgd_page(pgd)				(pud_page((pud_t){ pgd }))
-#define pgd_page_kernel(pgd)			(pud_page_kernel((pud_t){ pgd }))
+#define pgd_page_vaddr(pgd)			(pud_page_vaddr((pud_t){ pgd }))
 
 /*
  * allocating and freeing a pud is trivial: the 1-entry pud is
@@ -246,7 +246,7 @@ static inline void pud_clear(pud_t *pud)	{ }
 #define set_pud(pudptr, pudval)			set_pmd((pmd_t *)(pudptr), (pmd_t) { pudval })
 
 #define pud_page(pud)				(pmd_page((pmd_t){ pud }))
-#define pud_page_kernel(pud)			(pmd_page_kernel((pmd_t){ pud }))
+#define pud_page_vaddr(pud)			(pmd_page_vaddr((pmd_t){ pud }))
 
 /*
  * (pmds are folded into pgds so this doesn't get actually called,
@@ -362,7 +362,7 @@ static inline pmd_t *pmd_offset(pud_t *dir, unsigned long address)
 #define	pmd_bad(x)	(pmd_val(x) & xAMPRx_SS)
 #define pmd_clear(xp)	do { __set_pmd(xp, 0); } while(0)
 
-#define pmd_page_kernel(pmd) \
+#define pmd_page_vaddr(pmd) \
 	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
 
 #ifndef CONFIG_DISCONTIGMEM
@@ -458,7 +458,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #define pte_index(address) \
 		(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_offset_kernel(dir, address) \
-	((pte_t *) pmd_page_kernel(*(dir)) +  pte_index(address))
+	((pte_t *) pmd_page_vaddr(*(dir)) +  pte_index(address))
 
 #if defined(CONFIG_HIGHPTE)
 #define pte_offset_map(dir, address) \
diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h
index 68c6fea994d9..7b88d3931e34 100644
--- a/include/asm-generic/4level-fixup.h
+++ b/include/asm-generic/4level-fixup.h
@@ -21,6 +21,10 @@
 #define pud_present(pud)		1
 #define pud_ERROR(pud)			do { } while (0)
 #define pud_clear(pud)			pgd_clear(pud)
+#define pud_val(pud)			pgd_val(pud)
+#define pud_populate(mm, pud, pmd)	pgd_populate(mm, pud, pmd)
+#define pud_page(pud)			pgd_page(pud)
+#define pud_page_vaddr(pud)		pgd_page_vaddr(pud)
 
 #undef pud_free_tlb
 #define pud_free_tlb(tlb, x)            do { } while (0)
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index e160e04290fb..6d45ee5472af 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -14,7 +14,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
     __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
 
 /* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
+#define per_cpu(var, cpu) (*({				\
+	extern int simple_indentifier_##var(void);	\
+	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
 #define __get_cpu_var(var) per_cpu(var, smp_processor_id())
 #define __raw_get_cpu_var(var) per_cpu(var, raw_smp_processor_id())
 
diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h
index c8d53ba20e19..29ff5d84d8c3 100644
--- a/include/asm-generic/pgtable-nopmd.h
+++ b/include/asm-generic/pgtable-nopmd.h
@@ -47,7 +47,7 @@ static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address)
 #define __pmd(x)				((pmd_t) { __pud(x) } )
 
 #define pud_page(pud)				(pmd_page((pmd_t){ pud }))
-#define pud_page_kernel(pud)			(pmd_page_kernel((pmd_t){ pud }))
+#define pud_page_vaddr(pud)			(pmd_page_vaddr((pmd_t){ pud }))
 
 /*
  * allocating and freeing a pmd is trivial: the 1-entry pmd is
diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h
index 82e29f0ce467..566464500558 100644
--- a/include/asm-generic/pgtable-nopud.h
+++ b/include/asm-generic/pgtable-nopud.h
@@ -44,7 +44,7 @@ static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address)
 #define __pud(x)				((pud_t) { __pgd(x) } )
 
 #define pgd_page(pgd)				(pud_page((pud_t){ pgd }))
-#define pgd_page_kernel(pgd)			(pud_page_kernel((pud_t){ pgd }))
+#define pgd_page_vaddr(pgd)			(pud_page_vaddr((pud_t){ pgd }))
 
 /*
  * allocating and freeing a pud is trivial: the 1-entry pud is
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index c2059a3a0621..349260cd86ed 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_GENERIC_PGTABLE_H
 #define _ASM_GENERIC_PGTABLE_H
 
+#ifndef __ASSEMBLY__
+
 #ifndef __HAVE_ARCH_PTEP_ESTABLISH
 /*
  * Establish a new mapping:
@@ -188,7 +190,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 })
 #endif
 
-#ifndef __ASSEMBLY__
 /*
  * When walking page tables, we usually want to skip any p?d_none entries;
  * and any p?d_bad entries - reporting the error before resetting to none.
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index db5a3732f106..253ae1328271 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -194,3 +194,6 @@
 		.stab.index 0 : { *(.stab.index) }			\
 		.stab.indexstr 0 : { *(.stab.indexstr) }		\
 		.comment 0 : { *(.comment) }
+
+#define NOTES								\
+		.notes : { *(.note.*) } :note
diff --git a/include/asm-i386/Kbuild b/include/asm-i386/Kbuild
index b75a348d0c1c..147e4ac1ebf0 100644
--- a/include/asm-i386/Kbuild
+++ b/include/asm-i386/Kbuild
@@ -3,6 +3,7 @@ include include/asm-generic/Kbuild.asm
 header-y += boot.h
 header-y += debugreg.h
 header-y += ldt.h
+header-y += ptrace-abi.h
 header-y += ucontext.h
 
 unifdef-y += mtrr.h
diff --git a/include/asm-i386/dma-mapping.h b/include/asm-i386/dma-mapping.h
index 9cf20cacf76e..576ae01d71c8 100644
--- a/include/asm-i386/dma-mapping.h
+++ b/include/asm-i386/dma-mapping.h
@@ -21,8 +21,7 @@ static inline dma_addr_t
 dma_map_single(struct device *dev, void *ptr, size_t size,
 	       enum dma_data_direction direction)
 {
-	if (direction == DMA_NONE)
-		BUG();
+	BUG_ON(direction == DMA_NONE);
 	WARN_ON(size == 0);
 	flush_write_buffers();
 	return virt_to_phys(ptr);
@@ -32,8 +31,7 @@ static inline void
 dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
 		 enum dma_data_direction direction)
 {
-	if (direction == DMA_NONE)
-		BUG();
+	BUG_ON(direction == DMA_NONE);
 }
 
 static inline int
@@ -42,8 +40,7 @@ dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 {
 	int i;
 
-	if (direction == DMA_NONE)
-		BUG();
+	BUG_ON(direction == DMA_NONE);
 	WARN_ON(nents == 0 || sg[0].length == 0);
 
 	for (i = 0; i < nents; i++ ) {
diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h
index a48cc3f7ccc6..02428cb36621 100644
--- a/include/asm-i386/fixmap.h
+++ b/include/asm-i386/fixmap.h
@@ -19,7 +19,11 @@
  * Leave one empty page between vmalloc'ed areas and
  * the start of the fixmap.
  */
-#define __FIXADDR_TOP	0xfffff000
+#ifndef CONFIG_COMPAT_VDSO
+extern unsigned long __FIXADDR_TOP;
+#else
+#define __FIXADDR_TOP  0xfffff000
+#endif
 
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
@@ -93,6 +97,7 @@ enum fixed_addresses {
 
 extern void __set_fixmap (enum fixed_addresses idx,
 					unsigned long phys, pgprot_t flags);
+extern void reserve_top_address(unsigned long reserve);
 
 #define set_fixmap(idx, phys) \
 		__set_fixmap(idx, phys, PAGE_KERNEL)
diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h
index 22cb07cc8f32..61b073322006 100644
--- a/include/asm-i386/mmzone.h
+++ b/include/asm-i386/mmzone.h
@@ -38,10 +38,16 @@ static inline void get_memcfg_numa(void)
 }
 
 extern int early_pfn_to_nid(unsigned long pfn);
+extern void numa_kva_reserve(void);
 
 #else /* !CONFIG_NUMA */
+
 #define get_memcfg_numa get_memcfg_numa_flat
 #define get_zholes_size(n) (0)
+
+static inline void numa_kva_reserve(void)
+{
+}
 #endif /* CONFIG_NUMA */
 
 #ifdef CONFIG_DISCONTIGMEM
diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h
index 2756d4b04c27..201c86a6711e 100644
--- a/include/asm-i386/pgtable-2level.h
+++ b/include/asm-i386/pgtable-2level.h
@@ -21,8 +21,9 @@
 #define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
 #define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
 
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 #define ptep_get_and_clear(mm,addr,xp)	__pte(xchg(&(xp)->pte_low, 0))
-#define pte_same(a, b)		((a).pte_low == (b).pte_low)
+
 #define pte_page(x)		pfn_to_page(pte_pfn(x))
 #define pte_none(x)		(!(x).pte_low)
 #define pte_pfn(x)		((unsigned long)(((x).pte_low >> PAGE_SHIFT)))
diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h
index dccb1b3337ad..0d899173232e 100644
--- a/include/asm-i386/pgtable-3level.h
+++ b/include/asm-i386/pgtable-3level.h
@@ -77,7 +77,7 @@ static inline void pud_clear (pud_t * pud) { }
 #define pud_page(pud) \
 ((struct page *) __va(pud_val(pud) & PAGE_MASK))
 
-#define pud_page_kernel(pud) \
+#define pud_page_vaddr(pud) \
 ((unsigned long) __va(pud_val(pud) & PAGE_MASK))
 
 
@@ -105,6 +105,7 @@ static inline void pmd_clear(pmd_t *pmd)
 	*(tmp + 1) = 0;
 }
 
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	pte_t res;
@@ -117,6 +118,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 	return res;
 }
 
+#define __HAVE_ARCH_PTE_SAME
 static inline int pte_same(pte_t a, pte_t b)
 {
 	return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 09697fec3d2b..0dc051a8078b 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -246,6 +246,23 @@ static inline pte_t pte_mkhuge(pte_t pte)	{ (pte).pte_low |= _PAGE_PSE; return p
 # include <asm/pgtable-2level.h>
 #endif
 
+/*
+ * We only update the dirty/accessed state if we set
+ * the dirty bit by hand in the kernel, since the hardware
+ * will do the accessed bit for us, and we don't want to
+ * race with other CPU's that might be updating the dirty
+ * bit at the same time.
+ */
+#define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags(vma, address, ptep, entry, dirty)		\
+do {									\
+	if (dirty) {							\
+		(ptep)->pte_low = (entry).pte_low;			\
+		flush_tlb_page(vma, address);				\
+	}								\
+} while (0)
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
 static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
 {
 	if (!pte_dirty(*ptep))
@@ -253,6 +270,7 @@ static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned
 	return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
 }
 
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
 {
 	if (!pte_young(*ptep))
@@ -260,6 +278,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned
 	return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
 }
 
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
 static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
 {
 	pte_t pte;
@@ -272,6 +291,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
 	return pte;
 }
 
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
@@ -364,11 +384,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #define pte_index(address) \
 		(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_offset_kernel(dir, address) \
-	((pte_t *) pmd_page_kernel(*(dir)) +  pte_index(address))
+	((pte_t *) pmd_page_vaddr(*(dir)) +  pte_index(address))
 
 #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
 
-#define pmd_page_kernel(pmd) \
+#define pmd_page_vaddr(pmd) \
 		((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
 
 /*
@@ -411,23 +431,8 @@ extern void noexec_setup(const char *str);
 /*
  * The i386 doesn't have any external MMU info: the kernel page
  * tables contain all the necessary information.
- *
- * Also, we only update the dirty/accessed state if we set
- * the dirty bit by hand in the kernel, since the hardware
- * will do the accessed bit for us, and we don't want to
- * race with other CPU's that might be updating the dirty
- * bit at the same time.
  */
 #define update_mmu_cache(vma,address,pte) do { } while (0)
-#define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
-	do {								  \
-		if (__dirty) {						  \
-			(__ptep)->pte_low = (__entry).pte_low;	  	  \
-			flush_tlb_page(__vma, __address);		  \
-		}							  \
-	} while (0)
-
 #endif /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_FLATMEM
@@ -441,12 +446,6 @@ extern void noexec_setup(const char *str);
 #define GET_IOSPACE(pfn)		0
 #define GET_PFN(pfn)			(pfn)
 
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTE_SAME
 #include <asm-generic/pgtable.h>
 
 #endif /* _I386_PGTABLE_H */
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index b32346d62e10..2277127696d2 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -143,6 +143,18 @@ static inline void detect_ht(struct cpuinfo_x86 *c) {}
 #define X86_EFLAGS_VIP	0x00100000 /* Virtual Interrupt Pending */
 #define X86_EFLAGS_ID	0x00200000 /* CPUID detection flag */
 
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+			   unsigned int *ecx, unsigned int *edx)
+{
+	/* ecx is often an input as well as an output. */
+	__asm__("cpuid"
+		: "=a" (*eax),
+		  "=b" (*ebx),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (*eax), "2" (*ecx));
+}
+
 /*
  * Generic CPUID function
  * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
@@ -150,24 +162,18 @@ static inline void detect_ht(struct cpuinfo_x86 *c) {}
  */
 static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
 {
-	__asm__("cpuid"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (op), "c"(0));
+	*eax = op;
+	*ecx = 0;
+	__cpuid(eax, ebx, ecx, edx);
 }
 
 /* Some CPUID calls want 'count' to be placed in ecx */
 static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
-	       	int *edx)
+			       int *edx)
 {
-	__asm__("cpuid"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (op), "c" (count));
+	*eax = op;
+	*ecx = count;
+	__cpuid(eax, ebx, ecx, edx);
 }
 
 /*
@@ -175,42 +181,30 @@ static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
  */
 static inline unsigned int cpuid_eax(unsigned int op)
 {
-	unsigned int eax;
+	unsigned int eax, ebx, ecx, edx;
 
-	__asm__("cpuid"
-		: "=a" (eax)
-		: "0" (op)
-		: "bx", "cx", "dx");
+	cpuid(op, &eax, &ebx, &ecx, &edx);
 	return eax;
 }
 static inline unsigned int cpuid_ebx(unsigned int op)
 {
-	unsigned int eax, ebx;
+	unsigned int eax, ebx, ecx, edx;
 
-	__asm__("cpuid"
-		: "=a" (eax), "=b" (ebx)
-		: "0" (op)
-		: "cx", "dx" );
+	cpuid(op, &eax, &ebx, &ecx, &edx);
 	return ebx;
 }
 static inline unsigned int cpuid_ecx(unsigned int op)
 {
-	unsigned int eax, ecx;
+	unsigned int eax, ebx, ecx, edx;
 
-	__asm__("cpuid"
-		: "=a" (eax), "=c" (ecx)
-		: "0" (op)
-		: "bx", "dx" );
+	cpuid(op, &eax, &ebx, &ecx, &edx);
 	return ecx;
 }
 static inline unsigned int cpuid_edx(unsigned int op)
 {
-	unsigned int eax, edx;
+	unsigned int eax, ebx, ecx, edx;
 
-	__asm__("cpuid"
-		: "=a" (eax), "=d" (edx)
-		: "0" (op)
-		: "bx", "cx");
+	cpuid(op, &eax, &ebx, &ecx, &edx);
 	return edx;
 }
 
diff --git a/include/asm-i386/ptrace-abi.h b/include/asm-i386/ptrace-abi.h
new file mode 100644
index 000000000000..a44901817a26
--- /dev/null
+++ b/include/asm-i386/ptrace-abi.h
@@ -0,0 +1,39 @@
+#ifndef I386_PTRACE_ABI_H
+#define I386_PTRACE_ABI_H
+
+#define EBX 0
+#define ECX 1
+#define EDX 2
+#define ESI 3
+#define EDI 4
+#define EBP 5
+#define EAX 6
+#define DS 7
+#define ES 8
+#define FS 9
+#define GS 10
+#define ORIG_EAX 11
+#define EIP 12
+#define CS  13
+#define EFL 14
+#define UESP 15
+#define SS   16
+#define FRAME_SIZE 17
+
+/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+#define PTRACE_GETREGS            12
+#define PTRACE_SETREGS            13
+#define PTRACE_GETFPREGS          14
+#define PTRACE_SETFPREGS          15
+#define PTRACE_GETFPXREGS         18
+#define PTRACE_SETFPXREGS         19
+
+#define PTRACE_OLDSETOPTIONS         21
+
+#define PTRACE_GET_THREAD_AREA    25
+#define PTRACE_SET_THREAD_AREA    26
+
+#define PTRACE_SYSEMU		  31
+#define PTRACE_SYSEMU_SINGLESTEP  32
+
+#endif
diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h
index f324c53b6f9a..1910880fcd40 100644
--- a/include/asm-i386/ptrace.h
+++ b/include/asm-i386/ptrace.h
@@ -1,24 +1,7 @@
 #ifndef _I386_PTRACE_H
 #define _I386_PTRACE_H
 
-#define EBX 0
-#define ECX 1
-#define EDX 2
-#define ESI 3
-#define EDI 4
-#define EBP 5
-#define EAX 6
-#define DS 7
-#define ES 8
-#define FS 9
-#define GS 10
-#define ORIG_EAX 11
-#define EIP 12
-#define CS  13
-#define EFL 14
-#define UESP 15
-#define SS   16
-#define FRAME_SIZE 17
+#include <asm/ptrace-abi.h>
 
 /* this struct defines the way the registers are stored on the 
    stack during a system call. */
@@ -41,22 +24,6 @@ struct pt_regs {
 	int  xss;
 };
 
-/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
-#define PTRACE_GETREGS            12
-#define PTRACE_SETREGS            13
-#define PTRACE_GETFPREGS          14
-#define PTRACE_SETFPREGS          15
-#define PTRACE_GETFPXREGS         18
-#define PTRACE_SETFPXREGS         19
-
-#define PTRACE_OLDSETOPTIONS         21
-
-#define PTRACE_GET_THREAD_AREA    25
-#define PTRACE_SET_THREAD_AREA    26
-
-#define PTRACE_SYSEMU		  31
-#define PTRACE_SYSEMU_SINGLESTEP  32
-
 #ifdef __KERNEL__
 
 #include <asm/vm86.h>
diff --git a/include/asm-i386/sync_bitops.h b/include/asm-i386/sync_bitops.h
new file mode 100644
index 000000000000..c94d51c993ee
--- /dev/null
+++ b/include/asm-i386/sync_bitops.h
@@ -0,0 +1,156 @@
+#ifndef _I386_SYNC_BITOPS_H
+#define _I386_SYNC_BITOPS_H
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ */
+
+/*
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+
+#define ADDR (*(volatile long *) addr)
+
+/**
+ * sync_set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ *
+ * Note: there are no guarantees that this function will not be reordered
+ * on non x86 architectures, so if you are writting portable code,
+ * make sure not to rely on its reordering guarantees.
+ *
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void sync_set_bit(int nr, volatile unsigned long * addr)
+{
+	__asm__ __volatile__("lock; btsl %1,%0"
+			     :"+m" (ADDR)
+			     :"Ir" (nr)
+			     : "memory");
+}
+
+/**
+ * sync_clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * sync_clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static inline void sync_clear_bit(int nr, volatile unsigned long * addr)
+{
+	__asm__ __volatile__("lock; btrl %1,%0"
+			     :"+m" (ADDR)
+			     :"Ir" (nr)
+			     : "memory");
+}
+
+/**
+ * sync_change_bit - Toggle a bit in memory
+ * @nr: Bit to change
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered. It may be
+ * reordered on other architectures than x86.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void sync_change_bit(int nr, volatile unsigned long * addr)
+{
+	__asm__ __volatile__("lock; btcl %1,%0"
+			     :"+m" (ADDR)
+			     :"Ir" (nr)
+			     : "memory");
+}
+
+/**
+ * sync_test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It may be reordered on other architectures than x86.
+ * It also implies a memory barrier.
+ */
+static inline int sync_test_and_set_bit(int nr, volatile unsigned long * addr)
+{
+	int oldbit;
+
+	__asm__ __volatile__("lock; btsl %2,%1\n\tsbbl %0,%0"
+			     :"=r" (oldbit),"+m" (ADDR)
+			     :"Ir" (nr) : "memory");
+	return oldbit;
+}
+
+/**
+ * sync_test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It can be reorderdered on other architectures other than x86.
+ * It also implies a memory barrier.
+ */
+static inline int sync_test_and_clear_bit(int nr, volatile unsigned long * addr)
+{
+	int oldbit;
+
+	__asm__ __volatile__("lock; btrl %2,%1\n\tsbbl %0,%0"
+			     :"=r" (oldbit),"+m" (ADDR)
+			     :"Ir" (nr) : "memory");
+	return oldbit;
+}
+
+/**
+ * sync_test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int sync_test_and_change_bit(int nr, volatile unsigned long* addr)
+{
+	int oldbit;
+
+	__asm__ __volatile__("lock; btcl %2,%1\n\tsbbl %0,%0"
+			     :"=r" (oldbit),"+m" (ADDR)
+			     :"Ir" (nr) : "memory");
+	return oldbit;
+}
+
+static __always_inline int sync_const_test_bit(int nr, const volatile unsigned long *addr)
+{
+	return ((1UL << (nr & 31)) &
+		(((const volatile unsigned int *)addr)[nr >> 5])) != 0;
+}
+
+static inline int sync_var_test_bit(int nr, const volatile unsigned long * addr)
+{
+	int oldbit;
+
+	__asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0"
+			     :"=r" (oldbit)
+			     :"m" (ADDR),"Ir" (nr));
+	return oldbit;
+}
+
+#define sync_test_bit(nr,addr)			\
+	(__builtin_constant_p(nr) ?		\
+	 sync_constant_test_bit((nr),(addr)) :	\
+	 sync_var_test_bit((nr),(addr)))
+
+#undef ADDR
+
+#endif /* _I386_SYNC_BITOPS_H */
diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h
index 098bcee94e38..a6dabbcd6e6a 100644
--- a/include/asm-i386/system.h
+++ b/include/asm-i386/system.h
@@ -267,6 +267,9 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 #define cmpxchg(ptr,o,n)\
 	((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
 					(unsigned long)(n),sizeof(*(ptr))))
+#define sync_cmpxchg(ptr,o,n)\
+	((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
+					(unsigned long)(n),sizeof(*(ptr))))
 #endif
 
 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
@@ -296,6 +299,39 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 	return old;
 }
 
+/*
+ * Always use locked operations when touching memory shared with a
+ * hypervisor, since the system may be SMP even if the guest kernel
+ * isn't.
+ */
+static inline unsigned long __sync_cmpxchg(volatile void *ptr,
+					    unsigned long old,
+					    unsigned long new, int size)
+{
+	unsigned long prev;
+	switch (size) {
+	case 1:
+		__asm__ __volatile__("lock; cmpxchgb %b1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
+				     : "memory");
+		return prev;
+	case 2:
+		__asm__ __volatile__("lock; cmpxchgw %w1,%2"
+				     : "=a"(prev)
+				     : "r"(new), "m"(*__xg(ptr)), "0"(old)
+				     : "memory");
+		return prev;
+	case 4:
+		__asm__ __volatile__("lock; cmpxchgl %1,%2"
+				     : "=a"(prev)
+				     : "r"(new), "m"(*__xg(ptr)), "0"(old)
+				     : "memory");
+		return prev;
+	}
+	return old;
+}
+
 #ifndef CONFIG_X86_CMPXCHG
 /*
  * Building a kernel capable running on 80386. It may be necessary to
diff --git a/include/asm-ia64/numa.h b/include/asm-ia64/numa.h
index e5a8260593a5..e0a1d173e42d 100644
--- a/include/asm-ia64/numa.h
+++ b/include/asm-ia64/numa.h
@@ -64,6 +64,10 @@ extern int paddr_to_nid(unsigned long paddr);
 
 #define local_nodeid (cpu_to_node_map[smp_processor_id()])
 
+extern void map_cpu_to_node(int cpu, int nid);
+extern void unmap_cpu_from_node(int cpu, int nid);
+
+
 #else /* !CONFIG_NUMA */
 
 #define paddr_to_nid(addr)	0
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h
index 228981cadf8f..553182747722 100644
--- a/include/asm-ia64/pgtable.h
+++ b/include/asm-ia64/pgtable.h
@@ -275,21 +275,23 @@ ia64_phys_addr_valid (unsigned long addr)
 #define pmd_bad(pmd)			(!ia64_phys_addr_valid(pmd_val(pmd)))
 #define pmd_present(pmd)		(pmd_val(pmd) != 0UL)
 #define pmd_clear(pmdp)			(pmd_val(*(pmdp)) = 0UL)
-#define pmd_page_kernel(pmd)		((unsigned long) __va(pmd_val(pmd) & _PFN_MASK))
+#define pmd_page_vaddr(pmd)		((unsigned long) __va(pmd_val(pmd) & _PFN_MASK))
 #define pmd_page(pmd)			virt_to_page((pmd_val(pmd) + PAGE_OFFSET))
 
 #define pud_none(pud)			(!pud_val(pud))
 #define pud_bad(pud)			(!ia64_phys_addr_valid(pud_val(pud)))
 #define pud_present(pud)		(pud_val(pud) != 0UL)
 #define pud_clear(pudp)			(pud_val(*(pudp)) = 0UL)
-#define pud_page(pud)			((unsigned long) __va(pud_val(pud) & _PFN_MASK))
+#define pud_page_vaddr(pud)		((unsigned long) __va(pud_val(pud) & _PFN_MASK))
+#define pud_page(pud)			virt_to_page((pud_val(pud) + PAGE_OFFSET))
 
 #ifdef CONFIG_PGTABLE_4
 #define pgd_none(pgd)			(!pgd_val(pgd))
 #define pgd_bad(pgd)			(!ia64_phys_addr_valid(pgd_val(pgd)))
 #define pgd_present(pgd)		(pgd_val(pgd) != 0UL)
 #define pgd_clear(pgdp)			(pgd_val(*(pgdp)) = 0UL)
-#define pgd_page(pgd)			((unsigned long) __va(pgd_val(pgd) & _PFN_MASK))
+#define pgd_page_vaddr(pgd)		((unsigned long) __va(pgd_val(pgd) & _PFN_MASK))
+#define pgd_page(pgd)			virt_to_page((pgd_val(pgd) + PAGE_OFFSET))
 #endif
 
 /*
@@ -360,19 +362,19 @@ pgd_offset (struct mm_struct *mm, unsigned long address)
 #ifdef CONFIG_PGTABLE_4
 /* Find an entry in the second-level page table.. */
 #define pud_offset(dir,addr) \
-	((pud_t *) pgd_page(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
+	((pud_t *) pgd_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
 #endif
 
 /* Find an entry in the third-level page table.. */
 #define pmd_offset(dir,addr) \
-	((pmd_t *) pud_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
+	((pmd_t *) pud_page_vaddr(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
 
 /*
  * Find an entry in the third-level page table.  This looks more complicated than it
  * should be because some platforms place page tables in high memory.
  */
 #define pte_index(addr)	 	(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-#define pte_offset_kernel(dir,addr)	((pte_t *) pmd_page_kernel(*(dir)) + pte_index(addr))
+#define pte_offset_kernel(dir,addr)	((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr))
 #define pte_offset_map(dir,addr)	pte_offset_kernel(dir, addr)
 #define pte_offset_map_nested(dir,addr)	pte_offset_map(dir, addr)
 #define pte_unmap(pte)			do { } while (0)
diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
index 719ff309ce09..74bde1c2bb1a 100644
--- a/include/asm-ia64/smp.h
+++ b/include/asm-ia64/smp.h
@@ -122,8 +122,6 @@ extern void __init smp_build_cpu_map(void);
 extern void __init init_smp_config (void);
 extern void smp_do_timer (struct pt_regs *regs);
 
-extern int smp_call_function_single (int cpuid, void (*func) (void *info), void *info,
-				     int retry, int wait);
 extern void smp_send_reschedule (int cpu);
 extern void lock_ipi_calllock(void);
 extern void unlock_ipi_calllock(void);
diff --git a/include/asm-m32r/pgtable-2level.h b/include/asm-m32r/pgtable-2level.h
index be0f167e344a..6a674e3d37a2 100644
--- a/include/asm-m32r/pgtable-2level.h
+++ b/include/asm-m32r/pgtable-2level.h
@@ -52,9 +52,13 @@ static inline int pgd_present(pgd_t pgd)	{ return 1; }
 #define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
 #define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval)
 
-#define pgd_page(pgd) \
+#define pgd_page_vaddr(pgd) \
 ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
 
+#ifndef CONFIG_DISCONTIGMEM
+#define pgd_page(pgd)	(mem_map + ((pgd_val(pgd) >> PAGE_SHIFT) - PFN_BASE))
+#endif /* !CONFIG_DISCONTIGMEM */
+
 static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address)
 {
 	return (pmd_t *) dir;
diff --git a/include/asm-m32r/pgtable.h b/include/asm-m32r/pgtable.h
index 1983b7f4527a..1c15ba7ce319 100644
--- a/include/asm-m32r/pgtable.h
+++ b/include/asm-m32r/pgtable.h
@@ -336,7 +336,7 @@ static inline void pmd_set(pmd_t * pmdp, pte_t * ptep)
 	pmd_val(*pmdp) = (((unsigned long) ptep) & PAGE_MASK);
 }
 
-#define pmd_page_kernel(pmd)	\
+#define pmd_page_vaddr(pmd)	\
 	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
 
 #ifndef CONFIG_DISCONTIGMEM
@@ -358,7 +358,7 @@ static inline void pmd_set(pmd_t * pmdp, pte_t * ptep)
 #define pte_index(address)	\
 	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_offset_kernel(dir, address)	\
-	((pte_t *)pmd_page_kernel(*(dir)) + pte_index(address))
+	((pte_t *)pmd_page_vaddr(*(dir)) + pte_index(address))
 #define pte_offset_map(dir, address)	\
 	((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
 #define pte_offset_map_nested(dir, address)	pte_offset_map(dir, address)
diff --git a/include/asm-m68k/motorola_pgtable.h b/include/asm-m68k/motorola_pgtable.h
index 1ccc7338a54b..61e4406ed96a 100644
--- a/include/asm-m68k/motorola_pgtable.h
+++ b/include/asm-m68k/motorola_pgtable.h
@@ -150,6 +150,7 @@ static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
 #define pgd_bad(pgd)		((pgd_val(pgd) & _DESCTYPE_MASK) != _PAGE_TABLE)
 #define pgd_present(pgd)	(pgd_val(pgd) & _PAGE_TABLE)
 #define pgd_clear(pgdp)		({ pgd_val(*pgdp) = 0; })
+#define pgd_page(pgd)		(mem_map + ((unsigned long)(__va(pgd_val(pgd)) - PAGE_OFFSET) >> PAGE_SHIFT))
 
 #define pte_ERROR(e) \
 	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
diff --git a/include/asm-mips/mach-au1x00/au1xxx_dbdma.h b/include/asm-mips/mach-au1x00/au1xxx_dbdma.h
index d5b38a247e5a..eeb0c3115b6a 100644
--- a/include/asm-mips/mach-au1x00/au1xxx_dbdma.h
+++ b/include/asm-mips/mach-au1x00/au1xxx_dbdma.h
@@ -316,7 +316,7 @@ typedef struct dbdma_chan_config {
 	au1x_ddma_desc_t	*chan_desc_base;
 	au1x_ddma_desc_t	*get_ptr, *put_ptr, *cur_ptr;
 	void			*chan_callparam;
-	void (*chan_callback)(int, void *, struct pt_regs *);
+	void (*chan_callback)(int, void *);
 } chan_tab_t;
 
 #define DEV_FLAGS_INUSE		(1 << 0)
@@ -334,8 +334,8 @@ typedef struct dbdma_chan_config {
  * meaningful name.  The 'callback' is called during dma completion
  * interrupt.
  */
-u32 au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid,
-       void (*callback)(int, void *, struct pt_regs *), void *callparam);
+extern u32 au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid,
+	void (*callback)(int, void *), void *callparam);
 
 #define DBDMA_MEM_CHAN	DSCR_CMD0_ALWAYS
 
diff --git a/include/asm-mips/pgtable-32.h b/include/asm-mips/pgtable-32.h
index 4b26d8528133..d20f2e9b28be 100644
--- a/include/asm-mips/pgtable-32.h
+++ b/include/asm-mips/pgtable-32.h
@@ -156,9 +156,9 @@ pfn_pte(unsigned long pfn, pgprot_t prot)
 #define __pte_offset(address)						\
 	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_offset(dir, address)					\
-	((pte_t *) (pmd_page_kernel(*dir)) + __pte_offset(address))
+	((pte_t *) (pmd_page_vaddr(*dir)) + __pte_offset(address))
 #define pte_offset_kernel(dir, address) \
-	((pte_t *) pmd_page_kernel(*(dir)) +  __pte_offset(address))
+	((pte_t *) pmd_page_vaddr(*(dir)) +  __pte_offset(address))
 
 #define pte_offset_map(dir, address)                                    \
 	((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address))
diff --git a/include/asm-mips/pgtable-64.h b/include/asm-mips/pgtable-64.h
index e3db93212eab..c59a1e21f5b0 100644
--- a/include/asm-mips/pgtable-64.h
+++ b/include/asm-mips/pgtable-64.h
@@ -178,24 +178,26 @@ static inline void pud_clear(pud_t *pudp)
 /* to find an entry in a page-table-directory */
 #define pgd_offset(mm,addr)	((mm)->pgd + pgd_index(addr))
 
-static inline unsigned long pud_page(pud_t pud)
+static inline unsigned long pud_page_vaddr(pud_t pud)
 {
 	return pud_val(pud);
 }
+#define pud_phys(pud)		(pud_val(pud) - PAGE_OFFSET)
+#define pud_page(pud)		(pfn_to_page(pud_phys(pud) >> PAGE_SHIFT))
 
 /* Find an entry in the second-level page table.. */
 static inline pmd_t *pmd_offset(pud_t * pud, unsigned long address)
 {
-	return (pmd_t *) pud_page(*pud) + pmd_index(address);
+	return (pmd_t *) pud_page_vaddr(*pud) + pmd_index(address);
 }
 
 /* Find an entry in the third-level page table.. */
 #define __pte_offset(address)						\
 	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_offset(dir, address)					\
-	((pte_t *) (pmd_page_kernel(*dir)) + __pte_offset(address))
+	((pte_t *) (pmd_page_vaddr(*dir)) + __pte_offset(address))
 #define pte_offset_kernel(dir, address)					\
-	((pte_t *) pmd_page_kernel(*(dir)) +  __pte_offset(address))
+	((pte_t *) pmd_page_vaddr(*(dir)) +  __pte_offset(address))
 #define pte_offset_map(dir, address)					\
 	((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address))
 #define pte_offset_map_nested(dir, address)				\
diff --git a/include/asm-mips/pgtable.h b/include/asm-mips/pgtable.h
index a36ca1be17f2..1ca4d1e185c7 100644
--- a/include/asm-mips/pgtable.h
+++ b/include/asm-mips/pgtable.h
@@ -87,7 +87,7 @@ extern void paging_init(void);
  */
 #define pmd_phys(pmd)		(pmd_val(pmd) - PAGE_OFFSET)
 #define pmd_page(pmd)		(pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT))
-#define pmd_page_kernel(pmd)	pmd_val(pmd)
+#define pmd_page_vaddr(pmd)	pmd_val(pmd)
 
 #if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32_R1)
 
diff --git a/include/asm-parisc/pgtable.h b/include/asm-parisc/pgtable.h
index 5066c54dae0a..c0b61e0d1497 100644
--- a/include/asm-parisc/pgtable.h
+++ b/include/asm-parisc/pgtable.h
@@ -303,7 +303,8 @@ static inline void pmd_clear(pmd_t *pmd) {
 
 
 #if PT_NLEVELS == 3
-#define pgd_page(pgd) ((unsigned long) __va(pgd_address(pgd)))
+#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd)))
+#define pgd_page(pgd)	virt_to_page((void *)pgd_page_vaddr(pgd))
 
 /* For 64 bit we have three level tables */
 
@@ -382,7 +383,7 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 #define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))
 
-#define pmd_page_kernel(pmd)	((unsigned long) __va(pmd_address(pmd)))
+#define pmd_page_vaddr(pmd)	((unsigned long) __va(pmd_address(pmd)))
 
 #define __pmd_page(pmd) ((unsigned long) __va(pmd_address(pmd)))
 #define pmd_page(pmd)	virt_to_page((void *)__pmd_page(pmd))
@@ -400,7 +401,7 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 #if PT_NLEVELS == 3
 #define pmd_offset(dir,address) \
-((pmd_t *) pgd_page(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1)))
+((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1)))
 #else
 #define pmd_offset(dir,addr) ((pmd_t *) dir)
 #endif
@@ -408,7 +409,7 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 /* Find an entry in the third-level page table.. */ 
 #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
 #define pte_offset_kernel(pmd, address) \
-	((pte_t *) pmd_page_kernel(*(pmd)) + pte_index(address))
+	((pte_t *) pmd_page_vaddr(*(pmd)) + pte_index(address))
 #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
 #define pte_offset_map_nested(pmd, address) pte_offset_kernel(pmd, address)
 #define pte_unmap(pte) do { } while (0)
diff --git a/include/asm-powerpc/pgtable-4k.h b/include/asm-powerpc/pgtable-4k.h
index e7036155672e..345d9b07b3e2 100644
--- a/include/asm-powerpc/pgtable-4k.h
+++ b/include/asm-powerpc/pgtable-4k.h
@@ -88,10 +88,11 @@
 #define pgd_bad(pgd)		(pgd_val(pgd) == 0)
 #define pgd_present(pgd)	(pgd_val(pgd) != 0)
 #define pgd_clear(pgdp)		(pgd_val(*(pgdp)) = 0)
-#define pgd_page(pgd)		(pgd_val(pgd) & ~PGD_MASKED_BITS)
+#define pgd_page_vaddr(pgd)	(pgd_val(pgd) & ~PGD_MASKED_BITS)
+#define pgd_page(pgd)		virt_to_page(pgd_page_vaddr(pgd))
 
 #define pud_offset(pgdp, addr)	\
-  (((pud_t *) pgd_page(*(pgdp))) + \
+  (((pud_t *) pgd_page_vaddr(*(pgdp))) + \
     (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
 
 #define pud_ERROR(e) \
diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h
index 8dbf5ad8150f..10f52743f4ff 100644
--- a/include/asm-powerpc/pgtable.h
+++ b/include/asm-powerpc/pgtable.h
@@ -196,8 +196,8 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
 				 || (pmd_val(pmd) & PMD_BAD_BITS))
 #define	pmd_present(pmd)	(pmd_val(pmd) != 0)
 #define	pmd_clear(pmdp)		(pmd_val(*(pmdp)) = 0)
-#define pmd_page_kernel(pmd)	(pmd_val(pmd) & ~PMD_MASKED_BITS)
-#define pmd_page(pmd)		virt_to_page(pmd_page_kernel(pmd))
+#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & ~PMD_MASKED_BITS)
+#define pmd_page(pmd)		virt_to_page(pmd_page_vaddr(pmd))
 
 #define pud_set(pudp, pudval)	(pud_val(*(pudp)) = (pudval))
 #define pud_none(pud)		(!pud_val(pud))
@@ -205,7 +205,8 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
 				 || (pud_val(pud) & PUD_BAD_BITS))
 #define pud_present(pud)	(pud_val(pud) != 0)
 #define pud_clear(pudp)		(pud_val(*(pudp)) = 0)
-#define pud_page(pud)		(pud_val(pud) & ~PUD_MASKED_BITS)
+#define pud_page_vaddr(pud)	(pud_val(pud) & ~PUD_MASKED_BITS)
+#define pud_page(pud)		virt_to_page(pud_page_vaddr(pud))
 
 #define pgd_set(pgdp, pudp)	({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
 
@@ -219,10 +220,10 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
 #define pgd_offset(mm, address)	 ((mm)->pgd + pgd_index(address))
 
 #define pmd_offset(pudp,addr) \
-  (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
+  (((pmd_t *) pud_page_vaddr(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
 
 #define pte_offset_kernel(dir,addr) \
-  (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
+  (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
 
 #define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
 #define pte_offset_map_nested(dir,addr)	pte_offset_kernel((dir), (addr))
diff --git a/include/asm-ppc/pgtable.h b/include/asm-ppc/pgtable.h
index 51fa7c662917..b1fdbf40dba2 100644
--- a/include/asm-ppc/pgtable.h
+++ b/include/asm-ppc/pgtable.h
@@ -526,7 +526,7 @@ static inline int pgd_bad(pgd_t pgd)		{ return 0; }
 static inline int pgd_present(pgd_t pgd)	{ return 1; }
 #define pgd_clear(xp)				do { } while (0)
 
-#define pgd_page(pgd) \
+#define pgd_page_vaddr(pgd) \
 	((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
 
 /*
@@ -720,12 +720,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
  * of the pte page.  -- paulus
  */
 #ifndef CONFIG_BOOKE
-#define pmd_page_kernel(pmd)	\
+#define pmd_page_vaddr(pmd)	\
 	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
 #define pmd_page(pmd)		\
 	(mem_map + (pmd_val(pmd) >> PAGE_SHIFT))
 #else
-#define pmd_page_kernel(pmd)	\
+#define pmd_page_vaddr(pmd)	\
 	((unsigned long) (pmd_val(pmd) & PAGE_MASK))
 #define pmd_page(pmd)		\
 	(mem_map + (__pa(pmd_val(pmd)) >> PAGE_SHIFT))
@@ -748,7 +748,7 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
 #define pte_index(address)		\
 	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_offset_kernel(dir, addr)	\
-	((pte_t *) pmd_page_kernel(*(dir)) + pte_index(addr))
+	((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr))
 #define pte_offset_map(dir, addr)		\
 	((pte_t *) kmap_atomic(pmd_page(*(dir)), KM_PTE0) + pte_index(addr))
 #define pte_offset_map_nested(dir, addr)	\
diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h
index 28b3517e787c..495ad99c7635 100644
--- a/include/asm-s390/percpu.h
+++ b/include/asm-s390/percpu.h
@@ -15,18 +15,20 @@
  */
 #if defined(__s390x__) && defined(MODULE)
 
-#define __reloc_hide(var,offset) \
-  (*({ unsigned long *__ptr; \
-       asm ( "larl %0,per_cpu__"#var"@GOTENT" \
-             : "=a" (__ptr) : "X" (per_cpu__##var) ); \
-       (typeof(&per_cpu__##var))((*__ptr) + (offset)); }))
+#define __reloc_hide(var,offset) (*({			\
+	extern int simple_indentifier_##var(void);	\
+	unsigned long *__ptr;				\
+	asm ( "larl %0,per_cpu__"#var"@GOTENT"		\
+	    : "=a" (__ptr) : "X" (per_cpu__##var) );	\
+	(typeof(&per_cpu__##var))((*__ptr) + (offset));	}))
 
 #else
 
-#define __reloc_hide(var, offset) \
-  (*({ unsigned long __ptr; \
-       asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) ); \
-       (typeof(&per_cpu__##var)) (__ptr + (offset)); }))
+#define __reloc_hide(var, offset) (*({				\
+	extern int simple_indentifier_##var(void);		\
+	unsigned long __ptr;					\
+	asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) );	\
+	(typeof(&per_cpu__##var)) (__ptr + (offset)); }))
 
 #endif
 
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 1a07028d575e..e965309fedac 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -664,11 +664,13 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
 #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
 #define pte_page(x) pfn_to_page(pte_pfn(x))
 
-#define pmd_page_kernel(pmd) (pmd_val(pmd) & PAGE_MASK)
+#define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK)
 
 #define pmd_page(pmd) (mem_map+(pmd_val(pmd) >> PAGE_SHIFT))
 
-#define pgd_page_kernel(pgd) (pgd_val(pgd) & PAGE_MASK)
+#define pgd_page_vaddr(pgd) (pgd_val(pgd) & PAGE_MASK)
+
+#define pgd_page(pgd) (mem_map+(pgd_val(pgd) >> PAGE_SHIFT))
 
 /* to find an entry in a page-table-directory */
 #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
@@ -690,14 +692,14 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
 /* Find an entry in the second-level page table.. */
 #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
 #define pmd_offset(dir,addr) \
-	((pmd_t *) pgd_page_kernel(*(dir)) + pmd_index(addr))
+	((pmd_t *) pgd_page_vaddr(*(dir)) + pmd_index(addr))
 
 #endif /* __s390x__ */
 
 /* Find an entry in the third-level page table.. */
 #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
 #define pte_offset_kernel(pmd, address) \
-	((pte_t *) pmd_page_kernel(*(pmd)) + pte_index(address))
+	((pte_t *) pmd_page_vaddr(*(pmd)) + pte_index(address))
 #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
 #define pte_offset_map_nested(pmd, address) pte_offset_kernel(pmd, address)
 #define pte_unmap(pte) do { } while (0)
diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h
index a3a4e5fd30d7..578c2209fa76 100644
--- a/include/asm-s390/processor.h
+++ b/include/asm-s390/processor.h
@@ -337,6 +337,8 @@ struct notifier_block;
 int register_idle_notifier(struct notifier_block *nb);
 int unregister_idle_notifier(struct notifier_block *nb);
 
+#define ARCH_LOW_ADDRESS_LIMIT	0x7fffffffUL
+
 #endif
 
 /*
diff --git a/include/asm-sh/pgtable-2level.h b/include/asm-sh/pgtable-2level.h
index b0528aa3cb1f..b525db6f61c6 100644
--- a/include/asm-sh/pgtable-2level.h
+++ b/include/asm-sh/pgtable-2level.h
@@ -50,9 +50,12 @@ static inline void pgd_clear (pgd_t * pgdp) 	{ }
 #define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
 #define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval)
 
-#define pgd_page(pgd) \
+#define pgd_page_vaddr(pgd) \
 ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
 
+#define pgd_page(pgd) \
+	(phys_to_page(pgd_val(pgd)))
+
 static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
 {
 	return (pmd_t *) dir;
diff --git a/include/asm-sh/pgtable.h b/include/asm-sh/pgtable.h
index dcd23a03683d..40d41a78041e 100644
--- a/include/asm-sh/pgtable.h
+++ b/include/asm-sh/pgtable.h
@@ -225,7 +225,7 @@ static inline pgprot_t pgprot_noncached(pgprot_t _prot)
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 { set_pte(&pte, __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot))); return pte; }
 
-#define pmd_page_kernel(pmd) \
+#define pmd_page_vaddr(pmd) \
 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
 
 #define pmd_page(pmd) \
@@ -242,7 +242,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #define pte_index(address) \
 		((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_offset_kernel(dir, address) \
-	((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
+	((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
 #define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
 #define pte_offset_map_nested(dir, address) pte_offset_kernel(dir, address)
 #define pte_unmap(pte)		do { } while (0)
diff --git a/include/asm-sh64/pgtable.h b/include/asm-sh64/pgtable.h
index 54c7821893f5..6b97c4cb1d64 100644
--- a/include/asm-sh64/pgtable.h
+++ b/include/asm-sh64/pgtable.h
@@ -190,7 +190,9 @@ static inline int pgd_bad(pgd_t pgd)		{ return 0; }
 #endif
 
 
-#define pgd_page(pgd_entry)	((unsigned long) (pgd_val(pgd_entry) & PAGE_MASK))
+#define pgd_page_vaddr(pgd_entry)	((unsigned long) (pgd_val(pgd_entry) & PAGE_MASK))
+#define pgd_page(pgd)	(virt_to_page(pgd_val(pgd)))
+
 
 /*
  * PMD defines. Middle level.
@@ -219,7 +221,7 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
 #define pmd_none(pmd_entry)	(pmd_val((pmd_entry)) == _PMD_EMPTY)
 #define pmd_bad(pmd_entry)	((pmd_val(pmd_entry) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
 
-#define pmd_page_kernel(pmd_entry) \
+#define pmd_page_vaddr(pmd_entry) \
 	((unsigned long) __va(pmd_val(pmd_entry) & PAGE_MASK))
 
 #define pmd_page(pmd) \
diff --git a/include/asm-sparc/pgtable.h b/include/asm-sparc/pgtable.h
index 226c6475c9a2..4f0a5ba0d6a0 100644
--- a/include/asm-sparc/pgtable.h
+++ b/include/asm-sparc/pgtable.h
@@ -143,10 +143,10 @@ extern unsigned long empty_zero_page;
 /*
  */
 BTFIXUPDEF_CALL_CONST(struct page *, pmd_page, pmd_t)
-BTFIXUPDEF_CALL_CONST(unsigned long, pgd_page, pgd_t)
+BTFIXUPDEF_CALL_CONST(unsigned long, pgd_page_vaddr, pgd_t)
 
 #define pmd_page(pmd) BTFIXUP_CALL(pmd_page)(pmd)
-#define pgd_page(pgd) BTFIXUP_CALL(pgd_page)(pgd)
+#define pgd_page_vaddr(pgd) BTFIXUP_CALL(pgd_page_vaddr)(pgd)
 
 BTFIXUPDEF_SETHI(none_mask)
 BTFIXUPDEF_CALL_CONST(int, pte_present, pte_t)
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index ebfe395cfb87..b12be7a869f6 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -630,8 +630,9 @@ static inline unsigned long pte_present(pte_t pte)
 #define __pmd_page(pmd)		\
 	((unsigned long) __va((((unsigned long)pmd_val(pmd))<<11UL)))
 #define pmd_page(pmd) 			virt_to_page((void *)__pmd_page(pmd))
-#define pud_page(pud)		\
+#define pud_page_vaddr(pud)		\
 	((unsigned long) __va((((unsigned long)pud_val(pud))<<11UL)))
+#define pud_page(pud) 			virt_to_page((void *)pud_page_vaddr(pud))
 #define pmd_none(pmd)			(!pmd_val(pmd))
 #define pmd_bad(pmd)			(0)
 #define pmd_present(pmd)		(pmd_val(pmd) != 0U)
@@ -653,7 +654,7 @@ static inline unsigned long pte_present(pte_t pte)
 
 /* Find an entry in the second-level page table.. */
 #define pmd_offset(pudp, address)	\
-	((pmd_t *) pud_page(*(pudp)) + \
+	((pmd_t *) pud_page_vaddr(*(pudp)) + \
 	 (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)))
 
 /* Find an entry in the third-level page table.. */
diff --git a/include/asm-um/pgtable-2level.h b/include/asm-um/pgtable-2level.h
index ffe017f6b64b..6050e0eb257e 100644
--- a/include/asm-um/pgtable-2level.h
+++ b/include/asm-um/pgtable-2level.h
@@ -41,7 +41,7 @@ static inline void pgd_mkuptodate(pgd_t pgd)	{ }
 #define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot))
 #define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot))
 
-#define pmd_page_kernel(pmd) \
+#define pmd_page_vaddr(pmd) \
 	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
 
 /*
diff --git a/include/asm-um/pgtable-3level.h b/include/asm-um/pgtable-3level.h
index 786c25727289..ca0c2a92a112 100644
--- a/include/asm-um/pgtable-3level.h
+++ b/include/asm-um/pgtable-3level.h
@@ -74,11 +74,12 @@ extern inline void pud_clear (pud_t *pud)
         set_pud(pud, __pud(0));
 }
 
-#define pud_page(pud) \
+#define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK)
+#define pud_page_vaddr(pud) \
 	((struct page *) __va(pud_val(pud) & PAGE_MASK))
 
 /* Find an entry in the second-level page table.. */
-#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
+#define pmd_offset(pud, address) ((pmd_t *) pud_page_vaddr(*(pud)) + \
 			pmd_index(address))
 
 static inline unsigned long pte_pfn(pte_t pte)
diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h
index ac64eb955868..4862daf8b906 100644
--- a/include/asm-um/pgtable.h
+++ b/include/asm-um/pgtable.h
@@ -349,7 +349,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	return pte; 
 }
 
-#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
 
 /*
  * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
@@ -389,7 +389,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
  */
 #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_offset_kernel(dir, address) \
-	((pte_t *) pmd_page_kernel(*(dir)) +  pte_index(address))
+	((pte_t *) pmd_page_vaddr(*(dir)) +  pte_index(address))
 #define pte_offset_map(dir, address) \
 	((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
 #define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
diff --git a/include/asm-um/processor-generic.h b/include/asm-um/processor-generic.h
index 824c28896382..afa4fe1ca9f1 100644
--- a/include/asm-um/processor-generic.h
+++ b/include/asm-um/processor-generic.h
@@ -138,9 +138,7 @@ extern struct cpuinfo_um cpu_data[];
 
 #ifdef CONFIG_MODE_SKAS
 #define KSTK_REG(tsk, reg) \
-	({ union uml_pt_regs regs; \
-	   get_thread_regs(&regs, tsk->thread.mode.skas.switch_buf); \
-	   UPT_REG(&regs, reg); })
+	get_thread_reg(reg, tsk->thread.mode.skas.switch_buf)
 #else
 #define KSTK_REG(tsk, reg) (0xbadbabe)
 #endif
diff --git a/include/asm-um/ptrace-generic.h b/include/asm-um/ptrace-generic.h
index a36f5371b36b..99c87c5ce994 100644
--- a/include/asm-um/ptrace-generic.h
+++ b/include/asm-um/ptrace-generic.h
@@ -8,19 +8,7 @@
 
 #ifndef __ASSEMBLY__
 
-
-#define pt_regs pt_regs_subarch
-#define show_regs show_regs_subarch
-#define send_sigtrap send_sigtrap_subarch
-
-#include "asm/arch/ptrace.h"
-
-#undef pt_regs
-#undef show_regs
-#undef send_sigtrap
-#undef user_mode
-#undef instruction_pointer
-
+#include "asm/arch/ptrace-abi.h"
 #include "sysdep/ptrace.h"
 
 struct pt_regs {
diff --git a/include/asm-um/ptrace-x86_64.h b/include/asm-um/ptrace-x86_64.h
index c894e68b1f96..2074483e6ca4 100644
--- a/include/asm-um/ptrace-x86_64.h
+++ b/include/asm-um/ptrace-x86_64.h
@@ -11,15 +11,11 @@
 #include "asm/errno.h"
 #include "asm/host_ldt.h"
 
-#define signal_fault signal_fault_x86_64
 #define __FRAME_OFFSETS /* Needed to get the R* macros */
 #include "asm/ptrace-generic.h"
-#undef signal_fault
 
 #define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
 
-void signal_fault(struct pt_regs_subarch *regs, void *frame, char *where);
-
 #define FS_BASE (21 * sizeof(unsigned long))
 #define GS_BASE (22 * sizeof(unsigned long))
 #define DS (23 * sizeof(unsigned long))
diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild
index 40f2f13fe174..1ee9b07f3fe6 100644
--- a/include/asm-x86_64/Kbuild
+++ b/include/asm-x86_64/Kbuild
@@ -11,6 +11,7 @@ header-y += debugreg.h
 header-y += ldt.h
 header-y += msr.h
 header-y += prctl.h
+header-y += ptrace-abi.h
 header-y += setup.h
 header-y += sigcontext32.h
 header-y += ucontext.h
diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h
index 670a3388e70a..f65674832318 100644
--- a/include/asm-x86_64/e820.h
+++ b/include/asm-x86_64/e820.h
@@ -46,6 +46,7 @@ extern void setup_memory_region(void);
 extern void contig_e820_setup(void); 
 extern unsigned long e820_end_of_ram(void);
 extern void e820_reserve_resources(void);
+extern void e820_mark_nosave_regions(void);
 extern void e820_print_map(char *who);
 extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
 extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h
index 08dd9f9dda81..bffb2f886a51 100644
--- a/include/asm-x86_64/percpu.h
+++ b/include/asm-x86_64/percpu.h
@@ -21,9 +21,15 @@
     __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
 
 /* var is in discarded region: offset to particular copy we want */
-#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
-#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()))
-#define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()))
+#define per_cpu(var, cpu) (*({				\
+	extern int simple_indentifier_##var(void);	\
+	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)); }))
+#define __get_cpu_var(var) (*({				\
+	extern int simple_indentifier_##var(void);	\
+	RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); }))
+#define __raw_get_cpu_var(var) (*({			\
+	extern int simple_indentifier_##var(void);	\
+	RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); }))
 
 /* A macro to avoid #include hell... */
 #define percpu_modcopy(pcpudst, src, size)			\
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index a31ab4e68a9b..51eba2395171 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -101,9 +101,6 @@ static inline void pgd_clear (pgd_t * pgd)
 	set_pgd(pgd, __pgd(0));
 }
 
-#define pud_page(pud) \
-((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
-
 #define ptep_get_and_clear(mm,addr,xp)	__pte(xchg(&(xp)->pte, 0))
 
 struct mm_struct;
@@ -326,7 +323,8 @@ static inline int pmd_large(pmd_t pte) {
 /*
  * Level 4 access.
  */
-#define pgd_page(pgd) ((unsigned long) __va((unsigned long)pgd_val(pgd) & PTE_MASK))
+#define pgd_page_vaddr(pgd) ((unsigned long) __va((unsigned long)pgd_val(pgd) & PTE_MASK))
+#define pgd_page(pgd)		(pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
 #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
 #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
 #define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
@@ -335,16 +333,18 @@ static inline int pmd_large(pmd_t pte) {
 
 /* PUD - Level3 access */
 /* to find an entry in a page-table-directory. */
+#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
+#define pud_page(pud)		(pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
 #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address))
+#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
 #define pud_present(pud) (pud_val(pud) & _PAGE_PRESENT)
 
 /* PMD  - Level 2 access */
-#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
+#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
 #define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
 
 #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
-#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
+#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
 			pmd_index(address))
 #define pmd_none(x)	(!pmd_val(x))
 #define pmd_present(x)	(pmd_val(x) & _PAGE_PRESENT)
@@ -382,7 +382,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 #define pte_index(address) \
 		(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
+#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
 			pte_index(address))
 
 /* x86-64 always has all page tables mapped. */
diff --git a/include/asm-x86_64/ptrace-abi.h b/include/asm-x86_64/ptrace-abi.h
new file mode 100644
index 000000000000..19184b0806b1
--- /dev/null
+++ b/include/asm-x86_64/ptrace-abi.h
@@ -0,0 +1,51 @@
+#ifndef _X86_64_PTRACE_ABI_H
+#define _X86_64_PTRACE_ABI_H
+
+#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+#define R15 0
+#define R14 8
+#define R13 16
+#define R12 24
+#define RBP 32
+#define RBX 40
+/* arguments: interrupts/non tracing syscalls only save upto here*/
+#define R11 48
+#define R10 56
+#define R9 64
+#define R8 72
+#define RAX 80
+#define RCX 88
+#define RDX 96
+#define RSI 104
+#define RDI 112
+#define ORIG_RAX 120       /* = ERROR */
+/* end of arguments */
+/* cpu exception frame or undefined in case of fast syscall. */
+#define RIP 128
+#define CS 136
+#define EFLAGS 144
+#define RSP 152
+#define SS 160
+#define ARGOFFSET R11
+#endif /* __ASSEMBLY__ */
+
+/* top of stack page */
+#define FRAME_SIZE 168
+
+#define PTRACE_OLDSETOPTIONS         21
+
+/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+#define PTRACE_GETREGS            12
+#define PTRACE_SETREGS            13
+#define PTRACE_GETFPREGS          14
+#define PTRACE_SETFPREGS          15
+#define PTRACE_GETFPXREGS         18
+#define PTRACE_SETFPXREGS         19
+
+/* only useful for access 32bit programs */
+#define PTRACE_GET_THREAD_AREA    25
+#define PTRACE_SET_THREAD_AREA    26
+
+#define PTRACE_ARCH_PRCTL	  30	/* arch_prctl for child */
+
+#endif
diff --git a/include/asm-x86_64/ptrace.h b/include/asm-x86_64/ptrace.h
index ca6f15ff61d4..ab827dc381d7 100644
--- a/include/asm-x86_64/ptrace.h
+++ b/include/asm-x86_64/ptrace.h
@@ -1,40 +1,9 @@
 #ifndef _X86_64_PTRACE_H
 #define _X86_64_PTRACE_H
 
-#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) 
-#define R15 0
-#define R14 8
-#define R13 16
-#define R12 24
-#define RBP 32
-#define RBX 40
-/* arguments: interrupts/non tracing syscalls only save upto here*/
-#define R11 48
-#define R10 56	
-#define R9 64
-#define R8 72
-#define RAX 80
-#define RCX 88
-#define RDX 96
-#define RSI 104
-#define RDI 112
-#define ORIG_RAX 120       /* = ERROR */ 
-/* end of arguments */ 	
-/* cpu exception frame or undefined in case of fast syscall. */
-#define RIP 128
-#define CS 136
-#define EFLAGS 144
-#define RSP 152
-#define SS 160
-#define ARGOFFSET R11
-#endif /* __ASSEMBLY__ */
+#include <asm/ptrace-abi.h>
 
-/* top of stack page */ 
-#define FRAME_SIZE 168
-
-#define PTRACE_OLDSETOPTIONS         21
-
-#ifndef __ASSEMBLY__ 
+#ifndef __ASSEMBLY__
 
 struct pt_regs {
 	unsigned long r15;
@@ -45,7 +14,7 @@ struct pt_regs {
 	unsigned long rbx;
 /* arguments: non interrupts/non tracing syscalls only save upto here*/
  	unsigned long r11;
-	unsigned long r10;	
+	unsigned long r10;
 	unsigned long r9;
 	unsigned long r8;
 	unsigned long rax;
@@ -54,32 +23,18 @@ struct pt_regs {
 	unsigned long rsi;
 	unsigned long rdi;
 	unsigned long orig_rax;
-/* end of arguments */ 	
+/* end of arguments */
 /* cpu exception frame or undefined */
 	unsigned long rip;
 	unsigned long cs;
-	unsigned long eflags; 
-	unsigned long rsp; 
+	unsigned long eflags;
+	unsigned long rsp;
 	unsigned long ss;
-/* top of stack page */ 
+/* top of stack page */
 };
 
 #endif
 
-/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
-#define PTRACE_GETREGS            12
-#define PTRACE_SETREGS            13
-#define PTRACE_GETFPREGS          14
-#define PTRACE_SETFPREGS          15
-#define PTRACE_GETFPXREGS         18
-#define PTRACE_SETFPXREGS         19
-
-/* only useful for access 32bit programs */
-#define PTRACE_GET_THREAD_AREA    25
-#define PTRACE_SET_THREAD_AREA    26
-
-#define PTRACE_ARCH_PRCTL	  30	/* arch_prctl for child */
-
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__) 
 #define user_mode(regs) (!!((regs)->cs & 3))
 #define user_mode_vm(regs) user_mode(regs)
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index 6805e1feb300..ce97f65e1d10 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -48,8 +48,6 @@ extern void unlock_ipi_call_lock(void);
 extern int smp_num_siblings;
 extern void smp_send_reschedule(int cpu);
 void smp_stop_cpu(void);
-extern int smp_call_function_single(int cpuid, void (*func) (void *info),
-				void *info, int retry, int wait);
 
 extern cpumask_t cpu_sibling_map[NR_CPUS];
 extern cpumask_t cpu_core_map[NR_CPUS];
diff --git a/include/asm-xtensa/pgtable.h b/include/asm-xtensa/pgtable.h
index 7b15afb70c56..a47cc734c20c 100644
--- a/include/asm-xtensa/pgtable.h
+++ b/include/asm-xtensa/pgtable.h
@@ -218,7 +218,7 @@ extern pgd_t swapper_pg_dir[PAGE_SIZE/sizeof(pgd_t)];
 /*
  * The pmd contains the kernel virtual address of the pte page.
  */
-#define pmd_page_kernel(pmd) ((unsigned long)(pmd_val(pmd) & PAGE_MASK))
+#define pmd_page_vaddr(pmd) ((unsigned long)(pmd_val(pmd) & PAGE_MASK))
 #define pmd_page(pmd) virt_to_page(pmd_val(pmd))
 
 /*
@@ -349,7 +349,7 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 /* Find an entry in the third-level page table.. */
 #define pte_index(address)	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define pte_offset_kernel(dir,addr) 					\
-	((pte_t*) pmd_page_kernel(*(dir)) + pte_index(addr))
+	((pte_t*) pmd_page_vaddr(*(dir)) + pte_index(addr))
 #define pte_offset_map(dir,addr)	pte_offset_kernel((dir),(addr))
 #define pte_offset_map_nested(dir,addr)	pte_offset_kernel((dir),(addr))
 
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index e319c649e4fd..31e9abb6d977 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -4,11 +4,8 @@
 #ifndef _LINUX_BOOTMEM_H
 #define _LINUX_BOOTMEM_H
 
-#include <asm/pgtable.h>
-#include <asm/dma.h>
-#include <linux/cache.h>
-#include <linux/init.h>
 #include <linux/mmzone.h>
+#include <asm/dma.h>
 
 /*
  *  simple boot-time physical memory area allocator.
@@ -41,45 +38,64 @@ typedef struct bootmem_data {
 	struct list_head list;
 } bootmem_data_t;
 
-extern unsigned long __init bootmem_bootmap_pages (unsigned long);
-extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend);
-extern void __init free_bootmem (unsigned long addr, unsigned long size);
-extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
-extern void * __init __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal);
-extern void * __init __alloc_bootmem_low(unsigned long size,
-					 unsigned long align,
-					 unsigned long goal);
-extern void * __init __alloc_bootmem_low_node(pg_data_t *pgdat,
-					      unsigned long size,
-					      unsigned long align,
-					      unsigned long goal);
-extern void * __init __alloc_bootmem_core(struct bootmem_data *bdata,
-		unsigned long size, unsigned long align, unsigned long goal,
-		unsigned long limit);
+extern unsigned long bootmem_bootmap_pages(unsigned long);
+extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
+extern void free_bootmem(unsigned long addr, unsigned long size);
+extern void *__alloc_bootmem(unsigned long size,
+			     unsigned long align,
+			     unsigned long goal);
+extern void *__alloc_bootmem_nopanic(unsigned long size,
+				     unsigned long align,
+				     unsigned long goal);
+extern void *__alloc_bootmem_low(unsigned long size,
+				 unsigned long align,
+				 unsigned long goal);
+extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
+				      unsigned long size,
+				      unsigned long align,
+				      unsigned long goal);
+extern void *__alloc_bootmem_core(struct bootmem_data *bdata,
+				  unsigned long size,
+				  unsigned long align,
+				  unsigned long goal,
+				  unsigned long limit);
+
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
+extern void reserve_bootmem(unsigned long addr, unsigned long size);
 #define alloc_bootmem(x) \
-	__alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low(x) \
-	__alloc_bootmem_low((x), SMP_CACHE_BYTES, 0)
+	__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
 #define alloc_bootmem_pages(x) \
-	__alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages(x) \
-	__alloc_bootmem_low((x), PAGE_SIZE, 0)
+	__alloc_bootmem_low(x, PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-extern unsigned long __init free_all_bootmem (void);
-extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
-extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn);
-extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size);
-extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size);
-extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat);
+
+extern unsigned long free_all_bootmem(void);
+extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
+extern void *__alloc_bootmem_node(pg_data_t *pgdat,
+				  unsigned long size,
+				  unsigned long align,
+				  unsigned long goal);
+extern unsigned long init_bootmem_node(pg_data_t *pgdat,
+				       unsigned long freepfn,
+				       unsigned long startpfn,
+				       unsigned long endpfn);
+extern void reserve_bootmem_node(pg_data_t *pgdat,
+				 unsigned long physaddr,
+				 unsigned long size);
+extern void free_bootmem_node(pg_data_t *pgdat,
+			      unsigned long addr,
+			      unsigned long size);
+
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 #define alloc_bootmem_node(pgdat, x) \
-	__alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node(pgdat, x) \
-	__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages_node(pgdat, x) \
-	__alloc_bootmem_low_node((pgdat), (x), PAGE_SIZE, 0)
+	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
 #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
@@ -89,19 +105,19 @@ static inline void *alloc_remap(int nid, unsigned long size)
 {
 	return NULL;
 }
-#endif
+#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */
 
 extern unsigned long __meminitdata nr_kernel_pages;
 extern unsigned long nr_all_pages;
 
-extern void *__init alloc_large_system_hash(const char *tablename,
-					    unsigned long bucketsize,
-					    unsigned long numentries,
-					    int scale,
-					    int flags,
-					    unsigned int *_hash_shift,
-					    unsigned int *_hash_mask,
-					    unsigned long limit);
+extern void *alloc_large_system_hash(const char *tablename,
+				     unsigned long bucketsize,
+				     unsigned long numentries,
+				     int scale,
+				     int flags,
+				     unsigned int *_hash_shift,
+				     unsigned int *_hash_mask,
+				     unsigned long limit);
 
 #define HASH_HIGHMEM	0x00000001	/* Consider highmem? */
 #define HASH_EARLY	0x00000002	/* Allocating during early boot? */
diff --git a/include/linux/console.h b/include/linux/console.h
index 3bdf2155e565..76a1807726eb 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -120,9 +120,14 @@ extern void console_stop(struct console *);
 extern void console_start(struct console *);
 extern int is_console_locked(void);
 
+#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND
 /* Suspend and resume console messages over PM events */
 extern void suspend_console(void);
 extern void resume_console(void);
+#else
+static inline void suspend_console(void) {}
+static inline void resume_console(void) {}
+#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */
 
 /* Some debug stub to catch some of the obvious races in the VT code */
 #if 1
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 8fb344a9abd8..3fef7d67aedc 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -89,4 +89,12 @@ int cpu_down(unsigned int cpu);
 static inline int cpu_is_offline(int cpu) { return 0; }
 #endif
 
+#ifdef CONFIG_SUSPEND_SMP
+extern int disable_nonboot_cpus(void);
+extern void enable_nonboot_cpus(void);
+#else
+static inline int disable_nonboot_cpus(void) { return 0; }
+static inline void enable_nonboot_cpus(void) {}
+#endif
+
 #endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 2d7671c92c0b..d6f4ec467a4b 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -169,6 +169,12 @@ enum {
 	DCCPO_MAX_CCID_SPECIFIC = 255,
 };
 
+/* DCCP CCIDS */
+enum {
+	DCCPC_CCID2 = 2,
+	DCCPC_CCID3 = 3,
+};
+
 /* DCCP features */
 enum {
 	DCCPF_RESERVED = 0,
@@ -320,7 +326,7 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
 /* initial values for each feature */
 #define DCCPF_INITIAL_SEQUENCE_WINDOW		100
 #define DCCPF_INITIAL_ACK_RATIO			2
-#define DCCPF_INITIAL_CCID			2
+#define DCCPF_INITIAL_CCID			DCCPC_CCID2
 #define DCCPF_INITIAL_SEND_ACK_VECTOR		1
 /* FIXME: for now we're default to 1 but it should really be 0 */
 #define DCCPF_INITIAL_SEND_NDP_COUNT		1
@@ -404,6 +410,7 @@ struct dccp_service_list {
 };
 
 #define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1)
+#define DCCP_SERVICE_CODE_IS_ABSENT 		 0
 
 static inline int dccp_list_has_service(const struct dccp_service_list *sl,
 					const __be32 service)
@@ -484,11 +491,6 @@ static inline struct dccp_minisock *dccp_msk(const struct sock *sk)
 	return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock;
 }
 
-static inline int dccp_service_not_initialized(const struct sock *sk)
-{
-	return dccp_sk(sk)->dccps_service == DCCP_SERVICE_INVALID_VALUE;
-}
-
 static inline const char *dccp_role(const struct sock *sk)
 {
 	switch (dccp_sk(sk)->dccps_role) {
diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h
index 6a5796c81c90..666e0a5f00fc 100644
--- a/include/linux/elf-em.h
+++ b/include/linux/elf-em.h
@@ -31,6 +31,7 @@
 #define EM_M32R		88	/* Renesas M32R */
 #define EM_H8_300	46	/* Renesas H8/300,300H,H8S */
 #define EM_FRV		0x5441	/* Fujitsu FR-V */
+#define EM_AVR32	0x18ad	/* Atmel AVR32 */
 
 /*
  * This is an interim value that we will use until the committee comes
diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h
new file mode 100644
index 000000000000..67396db141e8
--- /dev/null
+++ b/include/linux/elfnote.h
@@ -0,0 +1,90 @@
+#ifndef _LINUX_ELFNOTE_H
+#define _LINUX_ELFNOTE_H
+/*
+ * Helper macros to generate ELF Note structures, which are put into a
+ * PT_NOTE segment of the final vmlinux image.  These are useful for
+ * including name-value pairs of metadata into the kernel binary (or
+ * modules?) for use by external programs.
+ *
+ * Each note has three parts: a name, a type and a desc.  The name is
+ * intended to distinguish the note's originator, so it would be a
+ * company, project, subsystem, etc; it must be in a suitable form for
+ * use in a section name.  The type is an integer which is used to tag
+ * the data, and is considered to be within the "name" namespace (so
+ * "FooCo"'s type 42 is distinct from "BarProj"'s type 42).  The
+ * "desc" field is the actual data.  There are no constraints on the
+ * desc field's contents, though typically they're fairly small.
+ *
+ * All notes from a given NAME are put into a section named
+ * .note.NAME.  When the kernel image is finally linked, all the notes
+ * are packed into a single .notes section, which is mapped into the
+ * PT_NOTE segment.  Because notes for a given name are grouped into
+ * the same section, they'll all be adjacent the output file.
+ *
+ * This file defines macros for both C and assembler use.  Their
+ * syntax is slightly different, but they're semantically similar.
+ *
+ * See the ELF specification for more detail about ELF notes.
+ */
+
+#ifdef __ASSEMBLER__
+/*
+ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
+ * turn out to be the same size and shape), followed by the name and
+ * desc data with appropriate padding.  The 'desctype' argument is the
+ * assembler pseudo op defining the type of the data e.g. .asciz while
+ * 'descdata' is the data itself e.g.  "hello, world".
+ *
+ * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two")
+ *      ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
+ */
+#define ELFNOTE(name, type, desctype, descdata)	\
+.pushsection .note.name			;	\
+  .align 4				;	\
+  .long 2f - 1f		/* namesz */	;	\
+  .long 4f - 3f		/* descsz */	;	\
+  .long type				;	\
+1:.asciz "name"				;	\
+2:.align 4				;	\
+3:desctype descdata			;	\
+4:.align 4				;	\
+.popsection				;
+#else	/* !__ASSEMBLER__ */
+#include <linux/elf.h>
+/*
+ * Use an anonymous structure which matches the shape of
+ * Elf{32,64}_Nhdr, but includes the name and desc data.  The size and
+ * type of name and desc depend on the macro arguments.  "name" must
+ * be a literal string, and "desc" must be passed by value.  You may
+ * only define one note per line, since __LINE__ is used to generate
+ * unique symbols.
+ */
+#define _ELFNOTE_PASTE(a,b)	a##b
+#define _ELFNOTE(size, name, unique, type, desc)			\
+	static const struct {						\
+		struct elf##size##_note _nhdr;				\
+		unsigned char _name[sizeof(name)]			\
+		__attribute__((aligned(sizeof(Elf##size##_Word))));	\
+		typeof(desc) _desc					\
+			     __attribute__((aligned(sizeof(Elf##size##_Word)))); \
+	} _ELFNOTE_PASTE(_note_, unique)				\
+		__attribute_used__					\
+		__attribute__((section(".note." name),			\
+			       aligned(sizeof(Elf##size##_Word)),	\
+			       unused)) = {				\
+		{							\
+			sizeof(name),					\
+			sizeof(desc),					\
+			type,						\
+		},							\
+		name,							\
+		desc							\
+	}
+#define ELFNOTE(size, name, type, desc)		\
+	_ELFNOTE(size, name, __LINE__, type, desc)
+
+#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc)
+#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc)
+#endif	/* __ASSEMBLER__ */
+
+#endif /* _LINUX_ELFNOTE_H */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index cc9e60844484..8b34aabfe4c6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -9,17 +9,16 @@ struct vm_area_struct;
 
 /*
  * GFP bitmasks..
+ *
+ * Zone modifiers (see linux/mmzone.h - low three bits)
+ *
+ * Do not put any conditional on these. If necessary modify the definitions
+ * without the underscores and use the consistently. The definitions here may
+ * be used in bit comparisons.
  */
-/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */
 #define __GFP_DMA	((__force gfp_t)0x01u)
 #define __GFP_HIGHMEM	((__force gfp_t)0x02u)
-#ifdef CONFIG_DMA_IS_DMA32
-#define __GFP_DMA32	((__force gfp_t)0x01)	/* ZONE_DMA is ZONE_DMA32 */
-#elif BITS_PER_LONG < 64
-#define __GFP_DMA32	((__force gfp_t)0x00)	/* ZONE_NORMAL is ZONE_DMA32 */
-#else
-#define __GFP_DMA32	((__force gfp_t)0x04)	/* Has own ZONE_DMA32 */
-#endif
+#define __GFP_DMA32	((__force gfp_t)0x04u)
 
 /*
  * Action modifiers - doesn't change the zoning
@@ -46,6 +45,7 @@ struct vm_area_struct;
 #define __GFP_ZERO	((__force gfp_t)0x8000u)/* Return zeroed page on success */
 #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
 #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
 
 #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -54,7 +54,7 @@ struct vm_area_struct;
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
 			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-			__GFP_NOMEMALLOC|__GFP_HARDWALL)
+			__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE)
 
 /* This equals 0, but use constants in case they ever change */
 #define GFP_NOWAIT	(GFP_ATOMIC & ~__GFP_HIGH)
@@ -67,6 +67,8 @@ struct vm_area_struct;
 #define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
 			 __GFP_HIGHMEM)
 
+#define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
+
 /* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
    platforms, used as appropriate on others */
 
@@ -76,11 +78,19 @@ struct vm_area_struct;
 #define GFP_DMA32	__GFP_DMA32
 
 
-static inline int gfp_zone(gfp_t gfp)
+static inline enum zone_type gfp_zone(gfp_t flags)
 {
-	int zone = GFP_ZONEMASK & (__force int) gfp;
-	BUG_ON(zone >= GFP_ZONETYPES);
-	return zone;
+	if (flags & __GFP_DMA)
+		return ZONE_DMA;
+#ifdef CONFIG_ZONE_DMA32
+	if (flags & __GFP_DMA32)
+		return ZONE_DMA32;
+#endif
+#ifdef CONFIG_HIGHMEM
+	if (flags & __GFP_HIGHMEM)
+		return ZONE_HIGHMEM;
+#endif
+	return ZONE_NORMAL;
 }
 
 /*
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 85ce7ef9a512..fd7d12daa94f 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -24,11 +24,15 @@ static inline void flush_kernel_dcache_page(struct page *page)
 
 /* declarations for linux/mm/highmem.c */
 unsigned int nr_free_highpages(void);
+extern unsigned long totalhigh_pages;
 
 #else /* CONFIG_HIGHMEM */
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
 
+#define totalhigh_pages 0
+
+#ifndef ARCH_HAS_KMAP
 static inline void *kmap(struct page *page)
 {
 	might_sleep();
@@ -41,6 +45,7 @@ static inline void *kmap(struct page *page)
 #define kunmap_atomic(addr, idx)	do { } while (0)
 #define kmap_atomic_pfn(pfn, idx)	page_address(pfn_to_page(pfn))
 #define kmap_atomic_to_page(ptr)	virt_to_page(ptr)
+#endif
 
 #endif /* CONFIG_HIGHMEM */
 
diff --git a/include/linux/irq.h b/include/linux/irq.h
index fbf6d901e9c2..48d3cb3b6a47 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -320,7 +320,9 @@ handle_irq_name(void fastcall (*handle)(unsigned int, struct irq_desc *,
  * Monolithic do_IRQ implementation.
  * (is an explicit fastcall, because i386 4KSTACKS calls it from assembly)
  */
+#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
 extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs);
+#endif
 
 /*
  * Architectures call this to let the generic IRQ layer
@@ -332,10 +334,14 @@ static inline void generic_handle_irq(unsigned int irq, struct pt_regs *regs)
 {
 	struct irq_desc *desc = irq_desc + irq;
 
+#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
+	desc->handle_irq(irq, desc, regs);
+#else
 	if (likely(desc->handle_irq))
 		desc->handle_irq(irq, desc, regs);
 	else
 		__do_IRQ(irq, regs);
+#endif
 }
 
 /* Handling of unhandled and spurious interrupts: */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b2ae4fdce8b..e44a37e2c71c 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -33,6 +33,7 @@ extern const char linux_banner[];
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL))
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
 
 #define	KERN_EMERG	"<0>"	/* system is unusable			*/
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 72440f0a443d..09f0f575ddff 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -162,9 +162,9 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
 		unsigned long addr);
 extern unsigned slab_node(struct mempolicy *policy);
 
-extern int policy_zone;
+extern enum zone_type policy_zone;
 
-static inline void check_highest_zone(int k)
+static inline void check_highest_zone(enum zone_type k)
 {
 	if (k > policy_zone)
 		policy_zone = k;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 224178a000d2..856f0ee7e84a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -15,6 +15,7 @@
 #include <linux/fs.h>
 #include <linux/mutex.h>
 #include <linux/debug_locks.h>
+#include <linux/backing-dev.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -218,7 +219,8 @@ struct inode;
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
  * moment. Note that we have no way to track which tasks are using
- * a page.
+ * a page, though if it is a pagecache page, rmap structures can tell us
+ * who is mapping it.
  */
 struct page {
 	unsigned long flags;		/* Atomic flags, some possibly
@@ -278,6 +280,12 @@ struct page {
  */
 #include <linux/page-flags.h>
 
+#ifdef CONFIG_DEBUG_VM
+#define VM_BUG_ON(cond) BUG_ON(cond)
+#else
+#define VM_BUG_ON(condition) do { } while(0)
+#endif
+
 /*
  * Methods to modify the page usage count.
  *
@@ -292,12 +300,11 @@ struct page {
  */
 
 /*
- * Drop a ref, return true if the logical refcount fell to zero (the page has
- * no users)
+ * Drop a ref, return true if the refcount fell to zero (the page has no users)
  */
 static inline int put_page_testzero(struct page *page)
 {
-	BUG_ON(atomic_read(&page->_count) == 0);
+	VM_BUG_ON(atomic_read(&page->_count) == 0);
 	return atomic_dec_and_test(&page->_count);
 }
 
@@ -307,11 +314,10 @@ static inline int put_page_testzero(struct page *page)
  */
 static inline int get_page_unless_zero(struct page *page)
 {
+	VM_BUG_ON(PageCompound(page));
 	return atomic_inc_not_zero(&page->_count);
 }
 
-extern void FASTCALL(__page_cache_release(struct page *));
-
 static inline int page_count(struct page *page)
 {
 	if (unlikely(PageCompound(page)))
@@ -323,6 +329,7 @@ static inline void get_page(struct page *page)
 {
 	if (unlikely(PageCompound(page)))
 		page = (struct page *)page_private(page);
+	VM_BUG_ON(atomic_read(&page->_count) == 0);
 	atomic_inc(&page->_count);
 }
 
@@ -349,43 +356,55 @@ void split_page(struct page *page, unsigned int order);
  * For the non-reserved pages, page_count(page) denotes a reference count.
  *   page_count() == 0 means the page is free. page->lru is then used for
  *   freelist management in the buddy allocator.
- *   page_count() == 1 means the page is used for exactly one purpose
- *   (e.g. a private data page of one process).
+ *   page_count() > 0  means the page has been allocated.
+ *
+ * Pages are allocated by the slab allocator in order to provide memory
+ * to kmalloc and kmem_cache_alloc. In this case, the management of the
+ * page, and the fields in 'struct page' are the responsibility of mm/slab.c
+ * unless a particular usage is carefully commented. (the responsibility of
+ * freeing the kmalloc memory is the caller's, of course).
  *
- * A page may be used for kmalloc() or anyone else who does a
- * __get_free_page(). In this case the page_count() is at least 1, and
- * all other fields are unused but should be 0 or NULL. The
- * management of this page is the responsibility of the one who uses
- * it.
+ * A page may be used by anyone else who does a __get_free_page().
+ * In this case, page_count still tracks the references, and should only
+ * be used through the normal accessor functions. The top bits of page->flags
+ * and page->virtual store page management information, but all other fields
+ * are unused and could be used privately, carefully. The management of this
+ * page is the responsibility of the one who allocated it, and those who have
+ * subsequently been given references to it.
  *
- * The other pages (we may call them "process pages") are completely
+ * The other pages (we may call them "pagecache pages") are completely
  * managed by the Linux memory manager: I/O, buffers, swapping etc.
  * The following discussion applies only to them.
  *
- * A page may belong to an inode's memory mapping. In this case,
- * page->mapping is the pointer to the inode, and page->index is the
- * file offset of the page, in units of PAGE_CACHE_SIZE.
+ * A pagecache page contains an opaque `private' member, which belongs to the
+ * page's address_space. Usually, this is the address of a circular list of
+ * the page's disk buffers. PG_private must be set to tell the VM to call
+ * into the filesystem to release these pages.
  *
- * A page contains an opaque `private' member, which belongs to the
- * page's address_space.  Usually, this is the address of a circular
- * list of the page's disk buffers.
+ * A page may belong to an inode's memory mapping. In this case, page->mapping
+ * is the pointer to the inode, and page->index is the file offset of the page,
+ * in units of PAGE_CACHE_SIZE.
  *
- * For pages belonging to inodes, the page_count() is the number of
- * attaches, plus 1 if `private' contains something, plus one for
- * the page cache itself.
+ * If pagecache pages are not associated with an inode, they are said to be
+ * anonymous pages. These may become associated with the swapcache, and in that
+ * case PG_swapcache is set, and page->private is an offset into the swapcache.
  *
- * Instead of keeping dirty/clean pages in per address-space lists, we instead
- * now tag pages as dirty/under writeback in the radix tree.
+ * In either case (swapcache or inode backed), the pagecache itself holds one
+ * reference to the page. Setting PG_private should also increment the
+ * refcount. The each user mapping also has a reference to the page.
  *
- * There is also a per-mapping radix tree mapping index to the page
- * in memory if present. The tree is rooted at mapping->root.  
+ * The pagecache pages are stored in a per-mapping radix tree, which is
+ * rooted at mapping->page_tree, and indexed by offset.
+ * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
+ * lists, we instead now tag pages as dirty/writeback in the radix tree.
  *
- * All process pages can do I/O:
+ * All pagecache pages may be subject to I/O:
  * - inode pages may need to be read from disk,
  * - inode pages which have been modified and are MAP_SHARED may need
- *   to be written to disk,
- * - private pages which have been modified may need to be swapped out
- *   to swap space and (later) to be read back into memory.
+ *   to be written back to the inode on disk,
+ * - anonymous pages (including MAP_PRIVATE file mappings) which have been
+ *   modified may need to be swapped out to swap space and (later) to be read
+ *   back into memory.
  */
 
 /*
@@ -463,7 +482,7 @@ void split_page(struct page *page, unsigned int order);
 #define SECTIONS_MASK		((1UL << SECTIONS_WIDTH) - 1)
 #define ZONETABLE_MASK		((1UL << ZONETABLE_SHIFT) - 1)
 
-static inline unsigned long page_zonenum(struct page *page)
+static inline enum zone_type page_zonenum(struct page *page)
 {
 	return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
 }
@@ -480,23 +499,29 @@ static inline struct zone *page_zone(struct page *page)
 	return zone_table[page_zone_id(page)];
 }
 
+static inline unsigned long zone_to_nid(struct zone *zone)
+{
+	return zone->zone_pgdat->node_id;
+}
+
 static inline unsigned long page_to_nid(struct page *page)
 {
 	if (FLAGS_HAS_NODE)
 		return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
 	else
-		return page_zone(page)->zone_pgdat->node_id;
+		return zone_to_nid(page_zone(page));
 }
 static inline unsigned long page_to_section(struct page *page)
 {
 	return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
 }
 
-static inline void set_page_zone(struct page *page, unsigned long zone)
+static inline void set_page_zone(struct page *page, enum zone_type zone)
 {
 	page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
 	page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
 }
+
 static inline void set_page_node(struct page *page, unsigned long node)
 {
 	page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
@@ -508,7 +533,7 @@ static inline void set_page_section(struct page *page, unsigned long section)
 	page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
 }
 
-static inline void set_page_links(struct page *page, unsigned long zone,
+static inline void set_page_links(struct page *page, enum zone_type zone,
 	unsigned long node, unsigned long pfn)
 {
 	set_page_zone(page, zone);
@@ -802,6 +827,39 @@ struct shrinker;
 extern struct shrinker *set_shrinker(int, shrinker_t);
 extern void remove_shrinker(struct shrinker *shrinker);
 
+/*
+ * Some shared mappigns will want the pages marked read-only
+ * to track write events. If so, we'll downgrade vm_page_prot
+ * to the private version (using protection_map[] without the
+ * VM_SHARED bit).
+ */
+static inline int vma_wants_writenotify(struct vm_area_struct *vma)
+{
+	unsigned int vm_flags = vma->vm_flags;
+
+	/* If it was private or non-writable, the write bit is already clear */
+	if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
+		return 0;
+
+	/* The backer wishes to know when pages are first written to? */
+	if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+		return 1;
+
+	/* The open routine did something to the protections already? */
+	if (pgprot_val(vma->vm_page_prot) !=
+	    pgprot_val(protection_map[vm_flags &
+		    (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]))
+		return 0;
+
+	/* Specialty mapping? */
+	if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
+		return 0;
+
+	/* Can the mapping track the dirty pages? */
+	return vma->vm_file && vma->vm_file->f_mapping &&
+		mapping_cap_account_dirty(vma->vm_file->f_mapping);
+}
+
 extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
 
 int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f45163c528e8..3693f1a52788 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -51,7 +51,8 @@ enum zone_stat_item {
 	NR_FILE_MAPPED,	/* pagecache pages mapped into pagetables.
 			   only modified from process context */
 	NR_FILE_PAGES,
-	NR_SLAB,	/* Pages used by slab allocator */
+	NR_SLAB_RECLAIMABLE,
+	NR_SLAB_UNRECLAIMABLE,
 	NR_PAGETABLE,	/* used for pagetables */
 	NR_FILE_DIRTY,
 	NR_WRITEBACK,
@@ -88,53 +89,68 @@ struct per_cpu_pageset {
 #define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
 #endif
 
-#define ZONE_DMA		0
-#define ZONE_DMA32		1
-#define ZONE_NORMAL		2
-#define ZONE_HIGHMEM		3
-
-#define MAX_NR_ZONES		4	/* Sync this with ZONES_SHIFT */
-#define ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
-
+enum zone_type {
+	/*
+	 * ZONE_DMA is used when there are devices that are not able
+	 * to do DMA to all of addressable memory (ZONE_NORMAL). Then we
+	 * carve out the portion of memory that is needed for these devices.
+	 * The range is arch specific.
+	 *
+	 * Some examples
+	 *
+	 * Architecture		Limit
+	 * ---------------------------
+	 * parisc, ia64, sparc	<4G
+	 * s390			<2G
+	 * arm26		<48M
+	 * arm			Various
+	 * alpha		Unlimited or 0-16MB.
+	 *
+	 * i386, x86_64 and multiple other arches
+	 * 			<16M.
+	 */
+	ZONE_DMA,
+#ifdef CONFIG_ZONE_DMA32
+	/*
+	 * x86_64 needs two ZONE_DMAs because it supports devices that are
+	 * only able to do DMA to the lower 16M but also 32 bit devices that
+	 * can only do DMA areas below 4G.
+	 */
+	ZONE_DMA32,
+#endif
+	/*
+	 * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
+	 * performed on pages in ZONE_NORMAL if the DMA devices support
+	 * transfers to all addressable memory.
+	 */
+	ZONE_NORMAL,
+#ifdef CONFIG_HIGHMEM
+	/*
+	 * A memory area that is only addressable by the kernel through
+	 * mapping portions into its own address space. This is for example
+	 * used by i386 to allow the kernel to address the memory beyond
+	 * 900MB. The kernel will set up special mappings (page
+	 * table entries on i386) for each page that the kernel needs to
+	 * access.
+	 */
+	ZONE_HIGHMEM,
+#endif
+	MAX_NR_ZONES
+};
 
 /*
  * When a memory allocation must conform to specific limitations (such
  * as being suitable for DMA) the caller will pass in hints to the
  * allocator in the gfp_mask, in the zone modifier bits.  These bits
  * are used to select a priority ordered list of memory zones which
- * match the requested limits.  GFP_ZONEMASK defines which bits within
- * the gfp_mask should be considered as zone modifiers.  Each valid
- * combination of the zone modifier bits has a corresponding list
- * of zones (in node_zonelists).  Thus for two zone modifiers there
- * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will
- * be 8 (2 ** 3) zonelists.  GFP_ZONETYPES defines the number of possible
- * combinations of zone modifiers in "zone modifier space".
- *
- * As an optimisation any zone modifier bits which are only valid when
- * no other zone modifier bits are set (loners) should be placed in
- * the highest order bits of this field.  This allows us to reduce the
- * extent of the zonelists thus saving space.  For example in the case
- * of three zone modifier bits, we could require up to eight zonelists.
- * If the left most zone modifier is a "loner" then the highest valid
- * zonelist would be four allowing us to allocate only five zonelists.
- * Use the first form for GFP_ZONETYPES when the left most bit is not
- * a "loner", otherwise use the second.
- *
- * NOTE! Make sure this matches the zones in <linux/gfp.h>
+ * match the requested limits. See gfp_zone() in include/linux/gfp.h
  */
-#define GFP_ZONEMASK	0x07
-/* #define GFP_ZONETYPES       (GFP_ZONEMASK + 1) */           /* Non-loner */
-#define GFP_ZONETYPES  ((GFP_ZONEMASK + 1) / 2 + 1)            /* Loner */
 
-/*
- * On machines where it is needed (eg PCs) we divide physical memory
- * into multiple physical zones. On a 32bit PC we have 4 zones:
- *
- * ZONE_DMA	  < 16 MB	ISA DMA capable memory
- * ZONE_DMA32	     0 MB 	Empty
- * ZONE_NORMAL	16-896 MB	direct mapped by the kernel
- * ZONE_HIGHMEM	 > 896 MB	only page cache and user processes
- */
+#if !defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_HIGHMEM)
+#define ZONES_SHIFT 1
+#else
+#define ZONES_SHIFT 2
+#endif
 
 struct zone {
 	/* Fields commonly accessed by the page allocator */
@@ -154,7 +170,8 @@ struct zone {
 	/*
 	 * zone reclaim becomes active if more unmapped pages exist.
 	 */
-	unsigned long		min_unmapped_ratio;
+	unsigned long		min_unmapped_pages;
+	unsigned long		min_slab_pages;
 	struct per_cpu_pageset	*pageset[NR_CPUS];
 #else
 	struct per_cpu_pageset	pageset[NR_CPUS];
@@ -266,7 +283,6 @@ struct zone {
 	char			*name;
 } ____cacheline_internodealigned_in_smp;
 
-
 /*
  * The "priority" of VM scanning is how much of the queues we will scan in one
  * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
@@ -304,7 +320,7 @@ struct zonelist {
 struct bootmem_data;
 typedef struct pglist_data {
 	struct zone node_zones[MAX_NR_ZONES];
-	struct zonelist node_zonelists[GFP_ZONETYPES];
+	struct zonelist node_zonelists[MAX_NR_ZONES];
 	int nr_zones;
 #ifdef CONFIG_FLAT_NODE_MEM_MAP
 	struct page *node_mem_map;
@@ -373,12 +389,16 @@ static inline int populated_zone(struct zone *zone)
 	return (!!zone->present_pages);
 }
 
-static inline int is_highmem_idx(int idx)
+static inline int is_highmem_idx(enum zone_type idx)
 {
+#ifdef CONFIG_HIGHMEM
 	return (idx == ZONE_HIGHMEM);
+#else
+	return 0;
+#endif
 }
 
-static inline int is_normal_idx(int idx)
+static inline int is_normal_idx(enum zone_type idx)
 {
 	return (idx == ZONE_NORMAL);
 }
@@ -391,7 +411,11 @@ static inline int is_normal_idx(int idx)
  */
 static inline int is_highmem(struct zone *zone)
 {
+#ifdef CONFIG_HIGHMEM
 	return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM;
+#else
+	return 0;
+#endif
 }
 
 static inline int is_normal(struct zone *zone)
@@ -401,7 +425,11 @@ static inline int is_normal(struct zone *zone)
 
 static inline int is_dma32(struct zone *zone)
 {
+#ifdef CONFIG_ZONE_DMA32
 	return zone == zone->zone_pgdat->node_zones + ZONE_DMA32;
+#else
+	return 0;
+#endif
 }
 
 static inline int is_dma(struct zone *zone)
@@ -421,6 +449,8 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file
 					void __user *, size_t *, loff_t *);
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
 			struct file *, void __user *, size_t *, loff_t *);
+int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
+			struct file *, void __user *, size_t *, loff_t *);
 
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 9a285cecf249..312bd2ffee33 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -10,6 +10,8 @@ header-y += xt_connmark.h
 header-y += xt_CONNMARK.h
 header-y += xt_conntrack.h
 header-y += xt_dccp.h
+header-y += xt_dscp.h
+header-y += xt_DSCP.h
 header-y += xt_esp.h
 header-y += xt_helper.h
 header-y += xt_length.h
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5748642e9f36..9d7921dd50f0 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -13,24 +13,25 @@
  * PG_reserved is set for special pages, which can never be swapped out. Some
  * of them might not even exist (eg empty_bad_page)...
  *
- * The PG_private bitflag is set if page->private contains a valid value.
+ * The PG_private bitflag is set on pagecache pages if they contain filesystem
+ * specific data (which is normally at page->private). It can be used by
+ * private allocations for its own usage.
  *
- * During disk I/O, PG_locked is used. This bit is set before I/O and
- * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks
- * waiting for the I/O on this page to complete.
+ * During initiation of disk I/O, PG_locked is set. This bit is set before I/O
+ * and cleared when writeback _starts_ or when read _completes_. PG_writeback
+ * is set before writeback starts and cleared when it finishes.
+ *
+ * PG_locked also pins a page in pagecache, and blocks truncation of the file
+ * while it is held.
+ *
+ * page_waitqueue(page) is a wait queue of all tasks waiting for the page
+ * to become unlocked.
  *
  * PG_uptodate tells whether the page's contents is valid.  When a read
  * completes, the page becomes uptodate, unless a disk I/O error happened.
  *
- * For choosing which pages to swap out, inode pages carry a PG_referenced bit,
- * which is set any time the system accesses that page through the (mapping,
- * index) hash table.  This referenced bit, together with the referenced bit
- * in the page tables, is used to manipulate page->age and move the page across
- * the active, inactive_dirty and inactive_clean lists.
- *
- * Note that the referenced bit, the page->lru list_head and the active,
- * inactive_dirty and inactive_clean lists are protected by the
- * zone->lru_lock, and *NOT* by the usual PG_locked bit!
+ * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
+ * file-backed pagecache (see mm/vmscan.c).
  *
  * PG_error is set to indicate that an I/O error occurred on this page.
  *
@@ -42,6 +43,10 @@
  * space, they need to be kmapped separately for doing IO on the pages.  The
  * struct page (these bits with information) are always mapped into kernel
  * address space...
+ *
+ * PG_buddy is set to indicate that the page is free and in the buddy system
+ * (see mm/page_alloc.c).
+ *
  */
 
 /*
@@ -74,7 +79,7 @@
 #define PG_checked		 8	/* kill me in 2.5.<early>. */
 #define PG_arch_1		 9
 #define PG_reserved		10
-#define PG_private		11	/* Has something at ->private */
+#define PG_private		11	/* If pagecache, has fs-private data */
 
 #define PG_writeback		12	/* Page is under writeback */
 #define PG_nosave		13	/* Used for system suspend/resume */
@@ -83,7 +88,7 @@
 
 #define PG_mappedtodisk		16	/* Has blocks allocated on-disk */
 #define PG_reclaim		17	/* To be reclaimed asap */
-#define PG_nosave_free		18	/* Free, should not be written */
+#define PG_nosave_free		18	/* Used for system suspend/resume */
 #define PG_buddy		19	/* Page is free, on buddy lists */
 
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0a2f5d27f60e..64f950925151 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -130,14 +130,29 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 }
 
 extern void FASTCALL(__lock_page(struct page *page));
+extern void FASTCALL(__lock_page_nosync(struct page *page));
 extern void FASTCALL(unlock_page(struct page *page));
 
+/*
+ * lock_page may only be called if we have the page's inode pinned.
+ */
 static inline void lock_page(struct page *page)
 {
 	might_sleep();
 	if (TestSetPageLocked(page))
 		__lock_page(page);
 }
+
+/*
+ * lock_page_nosync should only be used if we can't pin the page's inode.
+ * Doesn't play quite so well with block device plugging.
+ */
+static inline void lock_page_nosync(struct page *page)
+{
+	might_sleep();
+	if (TestSetPageLocked(page))
+		__lock_page_nosync(page);
+}
 	
 /*
  * This is exported only for wait_on_page_locked/wait_on_page_writeback.
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index cb9039a21f2a..3835a9642f13 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -1,9 +1,12 @@
 #ifndef __LINUX_PERCPU_H
 #define __LINUX_PERCPU_H
+
 #include <linux/spinlock.h> /* For preempt_disable() */
 #include <linux/slab.h> /* For kmalloc() */
 #include <linux/smp.h>
 #include <linux/string.h> /* For memset() */
+#include <linux/cpumask.h>
+
 #include <asm/percpu.h>
 
 /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
@@ -11,8 +14,14 @@
 #define PERCPU_ENOUGH_ROOM 32768
 #endif
 
-/* Must be an lvalue. */
-#define get_cpu_var(var) (*({ preempt_disable(); &__get_cpu_var(var); }))
+/*
+ * Must be an lvalue. Since @var must be a simple identifier,
+ * we force a syntax error here if it isn't.
+ */
+#define get_cpu_var(var) (*({				\
+	extern int simple_indentifier_##var(void);	\
+	preempt_disable();				\
+	&__get_cpu_var(var); }))
 #define put_cpu_var(var) preempt_enable()
 
 #ifdef CONFIG_SMP
@@ -21,39 +30,77 @@ struct percpu_data {
 	void *ptrs[NR_CPUS];
 };
 
+#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
 /* 
- * Use this to get to a cpu's version of the per-cpu object allocated using
- * alloc_percpu.  Non-atomic access to the current CPU's version should
+ * Use this to get to a cpu's version of the per-cpu object dynamically
+ * allocated. Non-atomic access to the current CPU's version should
  * probably be combined with get_cpu()/put_cpu().
  */ 
-#define per_cpu_ptr(ptr, cpu)                   \
-({                                              \
-        struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \
-        (__typeof__(ptr))__p->ptrs[(cpu)];	\
+#define percpu_ptr(ptr, cpu)                              \
+({                                                        \
+        struct percpu_data *__p = __percpu_disguise(ptr); \
+        (__typeof__(ptr))__p->ptrs[(cpu)];	          \
 })
 
-extern void *__alloc_percpu(size_t size);
-extern void free_percpu(const void *);
+extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu);
+extern void percpu_depopulate(void *__pdata, int cpu);
+extern int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+				  cpumask_t *mask);
+extern void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask);
+extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask);
+extern void percpu_free(void *__pdata);
 
 #else /* CONFIG_SMP */
 
-#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+
+static inline void percpu_depopulate(void *__pdata, int cpu)
+{
+}
+
+static inline void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
+{
+}
 
-static inline void *__alloc_percpu(size_t size)
+static inline void *percpu_populate(void *__pdata, size_t size, gfp_t gfp,
+				    int cpu)
 {
-	void *ret = kmalloc(size, GFP_KERNEL);
-	if (ret)
-		memset(ret, 0, size);
-	return ret;
+	return percpu_ptr(__pdata, cpu);
 }
-static inline void free_percpu(const void *ptr)
-{	
-	kfree(ptr);
+
+static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+					 cpumask_t *mask)
+{
+	return 0;
+}
+
+static inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
+{
+	return kzalloc(size, gfp);
+}
+
+static inline void percpu_free(void *__pdata)
+{
+	kfree(__pdata);
 }
 
 #endif /* CONFIG_SMP */
 
-/* Simple wrapper for the common case: zeros memory. */
-#define alloc_percpu(type)	((type *)(__alloc_percpu(sizeof(type))))
+#define percpu_populate_mask(__pdata, size, gfp, mask) \
+	__percpu_populate_mask((__pdata), (size), (gfp), &(mask))
+#define percpu_depopulate_mask(__pdata, mask) \
+	__percpu_depopulate_mask((__pdata), &(mask))
+#define percpu_alloc_mask(size, gfp, mask) \
+	__percpu_alloc_mask((size), (gfp), &(mask))
+
+#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map)
+
+/* (legacy) interface for use without CPU hotplug handling */
+
+#define __alloc_percpu(size)	percpu_alloc_mask((size), GFP_KERNEL, \
+						  cpu_possible_map)
+#define alloc_percpu(type)	(type *)__alloc_percpu(sizeof(type))
+#define free_percpu(ptr)	percpu_free((ptr))
+#define per_cpu_ptr(ptr, cpu)	percpu_ptr((ptr), (cpu))
 
 #endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h
index a376bd4ade39..81e9299ca148 100644
--- a/include/linux/resume-trace.h
+++ b/include/linux/resume-trace.h
@@ -3,21 +3,25 @@
 
 #ifdef CONFIG_PM_TRACE
 
+extern int pm_trace_enabled;
+
 struct device;
 extern void set_trace_device(struct device *);
 extern void generate_resume_trace(void *tracedata, unsigned int user);
 
 #define TRACE_DEVICE(dev) set_trace_device(dev)
-#define TRACE_RESUME(user) do {				\
-	void *tracedata;				\
-	asm volatile("movl $1f,%0\n"			\
-		".section .tracedata,\"a\"\n"		\
-		"1:\t.word %c1\n"			\
-		"\t.long %c2\n"				\
-		".previous"				\
-		:"=r" (tracedata)			\
-		: "i" (__LINE__), "i" (__FILE__));	\
-	generate_resume_trace(tracedata, user);		\
+#define TRACE_RESUME(user) do {					\
+	if (pm_trace_enabled) {					\
+		void *tracedata;				\
+		asm volatile("movl $1f,%0\n"			\
+			".section .tracedata,\"a\"\n"		\
+			"1:\t.word %c1\n"			\
+			"\t.long %c2\n"				\
+			".previous"				\
+			:"=r" (tracedata)			\
+			: "i" (__LINE__), "i" (__FILE__));	\
+		generate_resume_trace(tracedata, user);		\
+	}							\
 } while (0)
 
 #else
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bf97b0900014..db2c1df4fef9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -103,6 +103,14 @@ pte_t *page_check_address(struct page *, struct mm_struct *,
  */
 unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
 
+/*
+ * Cleans the PTEs of shared mappings.
+ * (and since clean PTEs should also be readonly, write protects them too)
+ *
+ * returns the number of cleaned PTEs.
+ */
+int page_mkclean(struct page *);
+
 #else	/* !CONFIG_MMU */
 
 #define anon_vma_init()		do {} while (0)
@@ -112,6 +120,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
 #define page_referenced(page,l) TestClearPageReferenced(page)
 #define try_to_unmap(page, refs) SWAP_FAIL
 
+static inline int page_mkclean(struct page *page)
+{
+	return 0;
+}
+
+
 #endif	/* CONFIG_MMU */
 
 /*
diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index aad4e390d6a5..d1b7ca6c1c57 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -46,7 +46,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule);
 
 /**
  *	selinux_audit_rule_match - determine if a context ID matches a rule.
- *	@ctxid: the context ID to check
+ *	@sid: the context ID to check
  *	@field: the field this rule refers to
  *	@op: the operater the rule uses
  *	@rule: pointer to the audit rule to check against
@@ -55,7 +55,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule);
  *	Returns 1 if the context id matches the rule, 0 if it does not, and
  *	-errno on failure.
  */
-int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
                              struct selinux_audit_rule *rule,
                              struct audit_context *actx);
 
@@ -70,18 +70,8 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
 void selinux_audit_set_callback(int (*callback)(void));
 
 /**
- *	selinux_task_ctxid - determine a context ID for a process.
- *	@tsk: the task object
- *	@ctxid: ID value returned via this
- *
- *	On return, ctxid will contain an ID for the context.  This value
- *	should only be used opaquely.
- */
-void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid);
-
-/**
- *     selinux_ctxid_to_string - map a security context ID to a string
- *     @ctxid: security context ID to be converted.
+ *     selinux_sid_to_string - map a security context ID to a string
+ *     @sid: security context ID to be converted.
  *     @ctx: address of context string to be returned
  *     @ctxlen: length of returned context string.
  *
@@ -89,7 +79,7 @@ void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid);
  *     string will be allocated internally, and the caller must call
  *     kfree() on it after use.
  */
-int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen);
+int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen);
 
 /**
  *     selinux_get_inode_sid - get the inode's security context ID
@@ -154,7 +144,7 @@ static inline void selinux_audit_rule_free(struct selinux_audit_rule *rule)
 	return;
 }
 
-static inline int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+static inline int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
                                            struct selinux_audit_rule *rule,
                                            struct audit_context *actx)
 {
@@ -166,12 +156,7 @@ static inline void selinux_audit_set_callback(int (*callback)(void))
 	return;
 }
 
-static inline void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid)
-{
-	*ctxid = 0;
-}
-
-static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
+static inline int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen)
 {
        *ctx = NULL;
        *ctxlen = 0;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 45ad55b70d1c..66d6eb78d1c6 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -67,7 +67,6 @@ extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t);
 extern void kmem_cache_free(kmem_cache_t *, void *);
 extern unsigned int kmem_cache_size(kmem_cache_t *);
 extern const char *kmem_cache_name(kmem_cache_t *);
-extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags);
 
 /* Size description struct for general caches. */
 struct cache_sizes {
@@ -203,7 +202,30 @@ extern int slab_is_available(void);
 
 #ifdef CONFIG_NUMA
 extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node);
-extern void *kmalloc_node(size_t size, gfp_t flags, int node);
+extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
+
+static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+{
+	if (__builtin_constant_p(size)) {
+		int i = 0;
+#define CACHE(x) \
+		if (size <= x) \
+			goto found; \
+		else \
+			i++;
+#include "kmalloc_sizes.h"
+#undef CACHE
+		{
+			extern void __you_cannot_kmalloc_that_much(void);
+			__you_cannot_kmalloc_that_much();
+		}
+found:
+		return kmem_cache_alloc_node((flags & GFP_DMA) ?
+			malloc_sizes[i].cs_dmacachep :
+			malloc_sizes[i].cs_cachep, flags, node);
+	}
+	return __kmalloc_node(size, flags, node);
+}
 #else
 static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node)
 {
@@ -223,7 +245,6 @@ extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr));
 /* SLOB allocator routines */
 
 void kmem_cache_init(void);
-struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags);
 struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t,
 	unsigned long,
 	void (*)(void *, struct kmem_cache *, unsigned long),
@@ -263,8 +284,6 @@ extern kmem_cache_t	*fs_cachep;
 extern kmem_cache_t	*sighand_cachep;
 extern kmem_cache_t	*bio_cachep;
 
-extern atomic_t slab_reclaim_pages;
-
 #endif	/* __KERNEL__ */
 
 #endif	/* _LINUX_SLAB_H */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 837e8bce1349..51649987f691 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -53,6 +53,9 @@ extern void smp_cpus_done(unsigned int max_cpus);
  */
 int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
 
+int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
+				int retry, int wait);
+
 /*
  * Call a function on all processors
  */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 96e31aa64cc7..b1237f16ecde 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -10,29 +10,11 @@
 #include <linux/pm.h>
 
 /* page backup entry */
-typedef struct pbe {
+struct pbe {
 	unsigned long address;		/* address of the copy */
 	unsigned long orig_address;	/* original address of page */
 	struct pbe *next;
-} suspend_pagedir_t;
-
-#define for_each_pbe(pbe, pblist) \
-	for (pbe = pblist ; pbe ; pbe = pbe->next)
-
-#define PBES_PER_PAGE      (PAGE_SIZE/sizeof(struct pbe))
-#define PB_PAGE_SKIP       (PBES_PER_PAGE-1)
-
-#define for_each_pb_page(pbe, pblist) \
-	for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next)
-
-
-#define SWAP_FILENAME_MAXLENGTH	32
-
-
-extern dev_t swsusp_resume_device;
-   
-/* mm/vmscan.c */
-extern int shrink_mem(void);
+};
 
 /* mm/page_alloc.c */
 extern void drain_local_pages(void);
@@ -53,18 +35,10 @@ static inline void pm_restore_console(void) {}
 static inline int software_suspend(void)
 {
 	printk("Warning: fake suspend called\n");
-	return -EPERM;
+	return -ENOSYS;
 }
 #endif /* CONFIG_PM */
 
-#ifdef CONFIG_SUSPEND_SMP
-extern void disable_nonboot_cpus(void);
-extern void enable_nonboot_cpus(void);
-#else
-static inline void disable_nonboot_cpus(void) {}
-static inline void enable_nonboot_cpus(void) {}
-#endif
-
 void save_processor_state(void);
 void restore_processor_state(void);
 struct saved_context;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5e59184c9096..e7c36ba2a2db 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -10,6 +10,10 @@
 #include <asm/atomic.h>
 #include <asm/page.h>
 
+struct notifier_block;
+
+struct bio;
+
 #define SWAP_FLAG_PREFER	0x8000	/* set if swap priority specified */
 #define SWAP_FLAG_PRIO_MASK	0x7fff
 #define SWAP_FLAG_PRIO_SHIFT	0
@@ -156,13 +160,14 @@ struct swap_list_t {
 
 /* linux/mm/oom_kill.c */
 extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
+extern int register_oom_notifier(struct notifier_block *nb);
+extern int unregister_oom_notifier(struct notifier_block *nb);
 
 /* linux/mm/memory.c */
 extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
 
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
-extern unsigned long totalhigh_pages;
 extern unsigned long totalreserve_pages;
 extern long nr_swap_pages;
 extern unsigned int nr_free_pages(void);
@@ -190,6 +195,7 @@ extern long vm_total_pages;
 #ifdef CONFIG_NUMA
 extern int zone_reclaim_mode;
 extern int sysctl_min_unmapped_ratio;
+extern int sysctl_min_slab_ratio;
 extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
 #else
 #define zone_reclaim_mode 0
@@ -212,7 +218,9 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct file *, struct page *);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
-extern int rw_swap_page_sync(int, swp_entry_t, struct page *);
+extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page,
+				struct bio **bio_chain);
+extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err);
 
 /* linux/mm/swap_state.c */
 extern struct address_space swapper_space;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 736ed917a4f8..eca555781d05 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -191,6 +191,7 @@ enum
 	VM_MIN_UNMAPPED=32,	/* Set min percent of unmapped pages */
 	VM_PANIC_ON_OOM=33,	/* panic at out-of-memory */
 	VM_VDSO_ENABLED=34,	/* map VDSO into new processes? */
+	VM_MIN_SLAB=35,		 /* Percent pages ignored by zone reclaim */
 };
 
 
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 71b6363caaaf..dee88c6b6fa7 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -44,8 +44,6 @@ extern void *vmalloc_32_user(unsigned long size);
 extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
 extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
 				pgprot_t prot);
-extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
-				pgprot_t prot, int node);
 extern void vfree(void *addr);
 
 extern void *vmap(struct page **pages, unsigned int count,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 2d9b1b60798a..176c7f797339 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -18,7 +18,19 @@
  * generated will simply be the increment of a global address.
  */
 
-#define FOR_ALL_ZONES(x) x##_DMA, x##_DMA32, x##_NORMAL, x##_HIGH
+#ifdef CONFIG_ZONE_DMA32
+#define DMA32_ZONE(xx) xx##_DMA32,
+#else
+#define DMA32_ZONE(xx)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define HIGHMEM_ZONE(xx) , xx##_HIGH
+#else
+#define HIGHMEM_ZONE(xx)
+#endif
+
+#define FOR_ALL_ZONES(xx) xx##_DMA, DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx)
 
 enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		FOR_ALL_ZONES(PGALLOC),
@@ -124,12 +136,10 @@ static inline unsigned long node_page_state(int node,
 	struct zone *zones = NODE_DATA(node)->node_zones;
 
 	return
-#ifndef CONFIG_DMA_IS_NORMAL
-#if !defined(CONFIG_DMA_IS_DMA32) && BITS_PER_LONG >= 64
+#ifdef CONFIG_ZONE_DMA32
 		zone_page_state(&zones[ZONE_DMA32], item) +
 #endif
 		zone_page_state(&zones[ZONE_NORMAL], item) +
-#endif
 #ifdef CONFIG_HIGHMEM
 		zone_page_state(&zones[ZONE_HIGHMEM], item) +
 #endif
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 0422036af4eb..56a23a0e7f2e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -116,6 +116,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping,
 			loff_t pos, loff_t count);
 int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
 			   loff_t pos, loff_t count);
+void set_page_dirty_balance(struct page *page);
 
 /* pdflush.c */
 extern int nr_pdflush_threads;	/* Global so it can be exported to sysctl
diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 59406e0dc5b2..2d72496c2029 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -130,8 +130,9 @@ extern int cipso_v4_rbm_strictvalid;
 int cipso_v4_doi_add(struct cipso_v4_doi *doi_def);
 int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head));
 struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi);
-struct sk_buff *cipso_v4_doi_dump_all(size_t headroom);
-struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom);
+int cipso_v4_doi_walk(u32 *skip_cnt,
+		     int (*callback) (struct cipso_v4_doi *doi_def, void *arg),
+	             void *cb_arg);
 int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain);
 int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
 			       const char *domain);
@@ -152,14 +153,11 @@ static inline struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
 	return NULL;
 }
 
-static inline struct sk_buff *cipso_v4_doi_dump_all(size_t headroom)
+static inline int cipso_v4_doi_walk(u32 *skip_cnt,
+		     int (*callback) (struct cipso_v4_doi *doi_def, void *arg),
+		     void *cb_arg)
 {
-	return NULL;
-}
-
-static inline struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom)
-{
-	return NULL;
+	return 0;
 }
 
 static inline int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def,
@@ -205,6 +203,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway);
 int cipso_v4_socket_setattr(const struct socket *sock,
 			    const struct cipso_v4_doi *doi_def,
 			    const struct netlbl_lsm_secattr *secattr);
+int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr);
 int cipso_v4_socket_getattr(const struct socket *sock,
 			    struct netlbl_lsm_secattr *secattr);
 int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
@@ -225,6 +224,12 @@ static inline int cipso_v4_socket_setattr(const struct socket *sock,
 	return -ENOSYS;
 }
 
+static inline int cipso_v4_sock_getattr(struct sock *sk,
+					struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
 static inline int cipso_v4_socket_getattr(const struct socket *sock,
 					  struct netlbl_lsm_secattr *secattr)
 {
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index dd5780b36919..6692430063fd 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -57,9 +57,8 @@
  * The payload is dependent on the subsystem specified in the
  * 'nlmsghdr->nlmsg_type' and should be defined below, supporting functions
  * should be defined in the corresponding net/netlabel/netlabel_<subsys>.h|c
- * file.  All of the fields in the NetLabel payload are NETLINK attributes, the
- * length of each field is the length of the NETLINK attribute payload, see
- * include/net/netlink.h for more information on NETLINK attributes.
+ * file.  All of the fields in the NetLabel payload are NETLINK attributes, see
+ * the include/net/netlink.h file for more information on NETLINK attributes.
  *
  */
 
@@ -82,50 +81,6 @@
 #define NETLBL_NLTYPE_UNLABELED         5
 #define NETLBL_NLTYPE_UNLABELED_NAME    "NLBL_UNLBL"
 
-/* NetLabel return codes */
-#define NETLBL_E_OK                     0
-
-/*
- * Helper functions
- */
-
-#define NETLBL_LEN_U8                   nla_total_size(sizeof(u8))
-#define NETLBL_LEN_U16                  nla_total_size(sizeof(u16))
-#define NETLBL_LEN_U32                  nla_total_size(sizeof(u32))
-
-/**
- * netlbl_netlink_alloc_skb - Allocate a NETLINK message buffer
- * @head: the amount of headroom in bytes
- * @body: the desired size (minus headroom) in bytes
- * @gfp_flags: the alloc flags to pass to alloc_skb()
- *
- * Description:
- * Allocate a NETLINK message buffer based on the sizes given in @head and
- * @body.  If @head is greater than zero skb_reserve() is called to reserve
- * @head bytes at the start of the buffer.  Returns a valid sk_buff pointer on
- * success, NULL on failure.
- *
- */
-static inline struct sk_buff *netlbl_netlink_alloc_skb(size_t head,
-						       size_t body,
-						       gfp_t gfp_flags)
-{
-	struct sk_buff *skb;
-
-	skb = alloc_skb(NLMSG_ALIGN(head + body), gfp_flags);
-	if (skb == NULL)
-		return NULL;
-	if (head > 0) {
-		skb_reserve(skb, head);
-		if (skb_tailroom(skb) < body) {
-			kfree_skb(skb);
-			return NULL;
-		}
-	}
-
-	return skb;
-}
-
 /*
  * NetLabel - Kernel API for accessing the network packet label mappings.
  *
@@ -238,6 +193,8 @@ static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr,
 #ifdef CONFIG_NETLABEL
 int netlbl_socket_setattr(const struct socket *sock,
 			  const struct netlbl_lsm_secattr *secattr);
+int netlbl_sock_getattr(struct sock *sk,
+			struct netlbl_lsm_secattr *secattr);
 int netlbl_socket_getattr(const struct socket *sock,
 			  struct netlbl_lsm_secattr *secattr);
 int netlbl_skbuff_getattr(const struct sk_buff *skb,
@@ -250,6 +207,12 @@ static inline int netlbl_socket_setattr(const struct socket *sock,
 	return -ENOSYS;
 }
 
+static inline int netlbl_sock_getattr(struct sock *sk,
+				      struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
 static inline int netlbl_socket_getattr(const struct socket *sock,
 					struct netlbl_lsm_secattr *secattr)
 {
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 11dc2e7f679a..4ab68a7a636a 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -146,11 +146,13 @@
  *   nla_ok(nla, remaining)		does nla fit into remaining bytes?
  *   nla_next(nla, remaining)		get next netlink attribute
  *   nla_validate()			validate a stream of attributes
+ *   nla_validate_nested()		validate a stream of nested attributes
  *   nla_find()				find attribute in stream of attributes
  *   nla_find_nested()			find attribute in nested attributes
  *   nla_parse()			parse and validate stream of attrs
  *   nla_parse_nested()			parse nested attribuets
  *   nla_for_each_attr()		loop over all attributes
+ *   nla_for_each_nested()		loop over the nested attributes
  *=========================================================================
  */
 
@@ -950,6 +952,24 @@ static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
 }
 
 /**
+ * nla_validate_nested - Validate a stream of nested attributes
+ * @start: container attribute
+ * @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
+ *
+ * Validates all attributes in the nested attribute stream against the
+ * specified policy. Attributes with a type exceeding maxtype will be
+ * ignored. See documenation of struct nla_policy for more details.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+static inline int nla_validate_nested(struct nlattr *start, int maxtype,
+				      struct nla_policy *policy)
+{
+	return nla_validate(nla_data(start), nla_len(start), maxtype, policy);
+}
+
+/**
  * nla_for_each_attr - iterate over a stream of attributes
  * @pos: loop counter, set to current attribute
  * @head: head of attribute stream
diff --git a/kernel/audit.c b/kernel/audit.c
index 963fd15c9621..f9889ee77825 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -244,7 +244,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid)
 		char *ctx = NULL;
 		u32 len;
 		int rc;
-		if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+		if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
 			return rc;
 		else
 			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -267,7 +267,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
 		char *ctx = NULL;
 		u32 len;
 		int rc;
-		if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+		if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
 			return rc;
 		else
 			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -293,7 +293,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
 		char *ctx = NULL;
 		u32 len;
 		int rc;
-		if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+		if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
 			return rc;
 		else
 			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -321,7 +321,7 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid)
 		char *ctx = NULL;
 		u32 len;
 		int rc;
-		if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+		if ((rc = selinux_sid_to_string(sid, &ctx, &len)))
 			return rc;
 		else
 			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
@@ -538,7 +538,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (status_get->mask & AUDIT_STATUS_PID) {
 			int old   = audit_pid;
 			if (sid) {
-				if ((err = selinux_ctxid_to_string(
+				if ((err = selinux_sid_to_string(
 						sid, &ctx, &len)))
 					return err;
 				else
@@ -576,7 +576,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 						 "user pid=%d uid=%u auid=%u",
 						 pid, uid, loginuid);
 				if (sid) {
-					if (selinux_ctxid_to_string(
+					if (selinux_sid_to_string(
 							sid, &ctx, &len)) {
 						audit_log_format(ab, 
 							" ssid=%u", sid);
@@ -614,7 +614,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 					   loginuid, sid);
 		break;
 	case AUDIT_SIGNAL_INFO:
-		err = selinux_ctxid_to_string(audit_sig_sid, &ctx, &len);
+		err = selinux_sid_to_string(audit_sig_sid, &ctx, &len);
 		if (err)
 			return err;
 		sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index a44879b0c72f..1a58a81fb09d 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1398,7 +1398,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action,
 	if (sid) {
 		char *ctx = NULL;
 		u32 len;
-		if (selinux_ctxid_to_string(sid, &ctx, &len))
+		if (selinux_sid_to_string(sid, &ctx, &len))
 			audit_log_format(ab, " ssid=%u", sid);
 		else
 			audit_log_format(ab, " subj=%s", ctx);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 1bd8827a0102..fb83c5cb8c32 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -385,7 +385,7 @@ static int audit_filter_rules(struct task_struct *tsk,
 			   logged upon error */
 			if (f->se_rule) {
 				if (need_sid) {
-					selinux_task_ctxid(tsk, &sid);
+					selinux_get_task_sid(tsk, &sid);
 					need_sid = 0;
 				}
 				result = selinux_audit_rule_match(sid, f->type,
@@ -898,7 +898,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			if (axi->osid != 0) {
 				char *ctx = NULL;
 				u32 len;
-				if (selinux_ctxid_to_string(
+				if (selinux_sid_to_string(
 						axi->osid, &ctx, &len)) {
 					audit_log_format(ab, " osid=%u",
 							axi->osid);
@@ -1005,7 +1005,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		if (n->osid != 0) {
 			char *ctx = NULL;
 			u32 len;
-			if (selinux_ctxid_to_string(
+			if (selinux_sid_to_string(
 				n->osid, &ctx, &len)) {
 				audit_log_format(ab, " osid=%u", n->osid);
 				call_panic = 2;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f230f9ae01c2..32c96628463e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -21,6 +21,11 @@ static DEFINE_MUTEX(cpu_bitmask_lock);
 
 static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain);
 
+/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+ * Should always be manipulated under cpu_add_remove_lock
+ */
+static int cpu_hotplug_disabled;
+
 #ifdef CONFIG_HOTPLUG_CPU
 
 /* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
@@ -108,30 +113,25 @@ static int take_cpu_down(void *unused)
 	return 0;
 }
 
-int cpu_down(unsigned int cpu)
+/* Requires cpu_add_remove_lock to be held */
+static int _cpu_down(unsigned int cpu)
 {
 	int err;
 	struct task_struct *p;
 	cpumask_t old_allowed, tmp;
 
-	mutex_lock(&cpu_add_remove_lock);
-	if (num_online_cpus() == 1) {
-		err = -EBUSY;
-		goto out;
-	}
+	if (num_online_cpus() == 1)
+		return -EBUSY;
 
-	if (!cpu_online(cpu)) {
-		err = -EINVAL;
-		goto out;
-	}
+	if (!cpu_online(cpu))
+		return -EINVAL;
 
 	err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
 						(void *)(long)cpu);
 	if (err == NOTIFY_BAD) {
 		printk("%s: attempt to take down CPU %u failed\n",
 				__FUNCTION__, cpu);
-		err = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	/* Ensure that we are not runnable on dying cpu */
@@ -179,22 +179,32 @@ out_thread:
 	err = kthread_stop(p);
 out_allowed:
 	set_cpus_allowed(current, old_allowed);
-out:
+	return err;
+}
+
+int cpu_down(unsigned int cpu)
+{
+	int err = 0;
+
+	mutex_lock(&cpu_add_remove_lock);
+	if (cpu_hotplug_disabled)
+		err = -EBUSY;
+	else
+		err = _cpu_down(cpu);
+
 	mutex_unlock(&cpu_add_remove_lock);
 	return err;
 }
 #endif /*CONFIG_HOTPLUG_CPU*/
 
-int __devinit cpu_up(unsigned int cpu)
+/* Requires cpu_add_remove_lock to be held */
+static int __devinit _cpu_up(unsigned int cpu)
 {
 	int ret;
 	void *hcpu = (void *)(long)cpu;
 
-	mutex_lock(&cpu_add_remove_lock);
-	if (cpu_online(cpu) || !cpu_present(cpu)) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (cpu_online(cpu) || !cpu_present(cpu))
+		return -EINVAL;
 
 	ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
 	if (ret == NOTIFY_BAD) {
@@ -219,7 +229,95 @@ out_notify:
 	if (ret != 0)
 		blocking_notifier_call_chain(&cpu_chain,
 				CPU_UP_CANCELED, hcpu);
+
+	return ret;
+}
+
+int __devinit cpu_up(unsigned int cpu)
+{
+	int err = 0;
+
+	mutex_lock(&cpu_add_remove_lock);
+	if (cpu_hotplug_disabled)
+		err = -EBUSY;
+	else
+		err = _cpu_up(cpu);
+
+	mutex_unlock(&cpu_add_remove_lock);
+	return err;
+}
+
+#ifdef CONFIG_SUSPEND_SMP
+static cpumask_t frozen_cpus;
+
+int disable_nonboot_cpus(void)
+{
+	int cpu, first_cpu, error;
+
+	mutex_lock(&cpu_add_remove_lock);
+	first_cpu = first_cpu(cpu_present_map);
+	if (!cpu_online(first_cpu)) {
+		error = _cpu_up(first_cpu);
+		if (error) {
+			printk(KERN_ERR "Could not bring CPU%d up.\n",
+				first_cpu);
+			goto out;
+		}
+	}
+	error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu));
+	if (error) {
+		printk(KERN_ERR "Could not run on CPU%d\n", first_cpu);
+		goto out;
+	}
+	/* We take down all of the non-boot CPUs in one shot to avoid races
+	 * with the userspace trying to use the CPU hotplug at the same time
+	 */
+	cpus_clear(frozen_cpus);
+	printk("Disabling non-boot CPUs ...\n");
+	for_each_online_cpu(cpu) {
+		if (cpu == first_cpu)
+			continue;
+		error = _cpu_down(cpu);
+		if (!error) {
+			cpu_set(cpu, frozen_cpus);
+			printk("CPU%d is down\n", cpu);
+		} else {
+			printk(KERN_ERR "Error taking CPU%d down: %d\n",
+				cpu, error);
+			break;
+		}
+	}
+	if (!error) {
+		BUG_ON(num_online_cpus() > 1);
+		/* Make sure the CPUs won't be enabled by someone else */
+		cpu_hotplug_disabled = 1;
+	} else {
+		printk(KERN_ERR "Non-boot CPUs are not disabled");
+	}
 out:
 	mutex_unlock(&cpu_add_remove_lock);
-	return ret;
+	return error;
+}
+
+void enable_nonboot_cpus(void)
+{
+	int cpu, error;
+
+	/* Allow everyone to use the CPU hotplug again */
+	mutex_lock(&cpu_add_remove_lock);
+	cpu_hotplug_disabled = 0;
+	mutex_unlock(&cpu_add_remove_lock);
+
+	printk("Enabling non-boot CPUs ...\n");
+	for_each_cpu_mask(cpu, frozen_cpus) {
+		error = cpu_up(cpu);
+		if (!error) {
+			printk("CPU%d is up\n", cpu);
+			continue;
+		}
+		printk(KERN_WARNING "Error taking CPU%d up: %d\n",
+			cpu, error);
+	}
+	cpus_clear(frozen_cpus);
 }
+#endif
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4ea6f0dc2fc5..cff41511269f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2245,7 +2245,7 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
 	int i;
 
 	for (i = 0; zl->zones[i]; i++) {
-		int nid = zl->zones[i]->zone_pgdat->node_id;
+		int nid = zone_to_nid(zl->zones[i]);
 
 		if (node_isset(nid, current->mems_allowed))
 			return 1;
@@ -2316,9 +2316,9 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
 	const struct cpuset *cs;	/* current cpuset ancestors */
 	int allowed;			/* is allocation in zone z allowed? */
 
-	if (in_interrupt())
+	if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
 		return 1;
-	node = z->zone_pgdat->node_id;
+	node = zone_to_nid(z);
 	might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
 	if (node_isset(node, current->mems_allowed))
 		return 1;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 48a53f68af96..4c6cdbaed661 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -154,6 +154,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
 	return retval;
 }
 
+#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
 /**
  * __do_IRQ - original all in one highlevel IRQ handler
  * @irq:	the interrupt number
@@ -253,6 +254,7 @@ out:
 
 	return 1;
 }
+#endif
 
 #ifdef CONFIG_TRACE_IRQFLAGS
 
diff --git a/kernel/module.c b/kernel/module.c
index 2a19cd47c046..b7fe6e840963 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1054,6 +1054,12 @@ static int mod_sysfs_setup(struct module *mod,
 {
 	int err;
 
+	if (!module_subsys.kset.subsys) {
+		printk(KERN_ERR "%s: module_subsys not initialized\n",
+		       mod->name);
+		err = -EINVAL;
+		goto out;
+	}
 	memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj));
 	err = kobject_set_name(&mod->mkobj.kobj, "%s", mod->name);
 	if (err)
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 619ecabf7c58..4b6e2f18e056 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -36,6 +36,17 @@ config PM_DEBUG
 	code. This is helpful when debugging and reporting various PM bugs, 
 	like suspend support.
 
+config DISABLE_CONSOLE_SUSPEND
+	bool "Keep console(s) enabled during suspend/resume (DANGEROUS)"
+	depends on PM && PM_DEBUG
+	default n
+	---help---
+	This option turns off the console suspend mechanism that prevents
+	debug messages from reaching the console during the suspend/resume
+	operations.  This may be helpful when debugging device drivers'
+	suspend/resume routines, but may itself lead to problems, for example
+	if netconsole is used.
+
 config PM_TRACE
 	bool "Suspend/resume event tracing"
 	depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 8d0af3d37a4b..38725f526afc 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -7,6 +7,4 @@ obj-y				:= main.o process.o console.o
 obj-$(CONFIG_PM_LEGACY)		+= pm.o
 obj-$(CONFIG_SOFTWARE_SUSPEND)	+= swsusp.o disk.o snapshot.o swap.o user.o
 
-obj-$(CONFIG_SUSPEND_SMP)	+= smp.o
-
 obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index e13e74067845..7c7b9b65e365 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -18,6 +18,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/pm.h>
+#include <linux/cpu.h>
 
 #include "power.h"
 
@@ -72,7 +73,10 @@ static int prepare_processes(void)
 	int error;
 
 	pm_prepare_console();
-	disable_nonboot_cpus();
+
+	error = disable_nonboot_cpus();
+	if (error)
+		goto enable_cpus;
 
 	if (freeze_processes()) {
 		error = -EBUSY;
@@ -84,6 +88,7 @@ static int prepare_processes(void)
 		return 0;
 thaw:
 	thaw_processes();
+enable_cpus:
 	enable_nonboot_cpus();
 	pm_restore_console();
 	return error;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6d295c776794..873228c71dab 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -16,6 +16,8 @@
 #include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/resume-trace.h>
 
 #include "power.h"
 
@@ -51,7 +53,7 @@ void pm_set_ops(struct pm_ops * ops)
 
 static int suspend_prepare(suspend_state_t state)
 {
-	int error = 0;
+	int error;
 	unsigned int free_pages;
 
 	if (!pm_ops || !pm_ops->enter)
@@ -59,12 +61,9 @@ static int suspend_prepare(suspend_state_t state)
 
 	pm_prepare_console();
 
-	disable_nonboot_cpus();
-
-	if (num_online_cpus() != 1) {
-		error = -EPERM;
+	error = disable_nonboot_cpus();
+	if (error)
 		goto Enable_cpu;
-	}
 
 	if (freeze_processes()) {
 		error = -EAGAIN;
@@ -283,10 +282,39 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n
 
 power_attr(state);
 
+#ifdef CONFIG_PM_TRACE
+int pm_trace_enabled;
+
+static ssize_t pm_trace_show(struct subsystem * subsys, char * buf)
+{
+	return sprintf(buf, "%d\n", pm_trace_enabled);
+}
+
+static ssize_t
+pm_trace_store(struct subsystem * subsys, const char * buf, size_t n)
+{
+	int val;
+
+	if (sscanf(buf, "%d", &val) == 1) {
+		pm_trace_enabled = !!val;
+		return n;
+	}
+	return -EINVAL;
+}
+
+power_attr(pm_trace);
+
+static struct attribute * g[] = {
+	&state_attr.attr,
+	&pm_trace_attr.attr,
+	NULL,
+};
+#else
 static struct attribute * g[] = {
 	&state_attr.attr,
 	NULL,
 };
+#endif /* CONFIG_PM_TRACE */
 
 static struct attribute_group attr_group = {
 	.attrs = g,
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 57a792982fb9..bfe999f7b272 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -38,8 +38,6 @@ extern struct subsystem power_subsys;
 /* References to section boundaries */
 extern const void __nosave_begin, __nosave_end;
 
-extern struct pbe *pagedir_nosave;
-
 /* Preferred image size in bytes (default 500 MB) */
 extern unsigned long image_size;
 extern int in_suspend;
@@ -50,21 +48,62 @@ extern asmlinkage int swsusp_arch_resume(void);
 
 extern unsigned int count_data_pages(void);
 
+/**
+ *	Auxiliary structure used for reading the snapshot image data and
+ *	metadata from and writing them to the list of page backup entries
+ *	(PBEs) which is the main data structure of swsusp.
+ *
+ *	Using struct snapshot_handle we can transfer the image, including its
+ *	metadata, as a continuous sequence of bytes with the help of
+ *	snapshot_read_next() and snapshot_write_next().
+ *
+ *	The code that writes the image to a storage or transfers it to
+ *	the user land is required to use snapshot_read_next() for this
+ *	purpose and it should not make any assumptions regarding the internal
+ *	structure of the image.  Similarly, the code that reads the image from
+ *	a storage or transfers it from the user land is required to use
+ *	snapshot_write_next().
+ *
+ *	This may allow us to change the internal structure of the image
+ *	in the future with considerably less effort.
+ */
+
 struct snapshot_handle {
-	loff_t		offset;
-	unsigned int	page;
-	unsigned int	page_offset;
-	unsigned int	prev;
-	struct pbe	*pbe, *last_pbe;
-	void		*buffer;
-	unsigned int	buf_offset;
+	loff_t		offset;	/* number of the last byte ready for reading
+				 * or writing in the sequence
+				 */
+	unsigned int	cur;	/* number of the block of PAGE_SIZE bytes the
+				 * next operation will refer to (ie. current)
+				 */
+	unsigned int	cur_offset;	/* offset with respect to the current
+					 * block (for the next operation)
+					 */
+	unsigned int	prev;	/* number of the block of PAGE_SIZE bytes that
+				 * was the current one previously
+				 */
+	void		*buffer;	/* address of the block to read from
+					 * or write to
+					 */
+	unsigned int	buf_offset;	/* location to read from or write to,
+					 * given as a displacement from 'buffer'
+					 */
+	int		sync_read;	/* Set to one to notify the caller of
+					 * snapshot_write_next() that it may
+					 * need to call wait_on_bio_chain()
+					 */
 };
 
+/* This macro returns the address from/to which the caller of
+ * snapshot_read_next()/snapshot_write_next() is allowed to
+ * read/write data after the function returns
+ */
 #define data_of(handle)	((handle).buffer + (handle).buf_offset)
 
+extern unsigned int snapshot_additional_pages(struct zone *zone);
 extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
 extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
-int snapshot_image_loaded(struct snapshot_handle *handle);
+extern int snapshot_image_loaded(struct snapshot_handle *handle);
+extern void snapshot_free_unused_memory(struct snapshot_handle *handle);
 
 #define SNAPSHOT_IOC_MAGIC	'3'
 #define SNAPSHOT_FREEZE			_IO(SNAPSHOT_IOC_MAGIC, 1)
diff --git a/kernel/power/smp.c b/kernel/power/smp.c
deleted file mode 100644
index 5957312b2d68..000000000000
--- a/kernel/power/smp.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * drivers/power/smp.c - Functions for stopping other CPUs.
- *
- * Copyright 2004 Pavel Machek <pavel@suse.cz>
- * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz>
- *
- * This file is released under the GPLv2.
- */
-
-#undef DEBUG
-
-#include <linux/smp_lock.h>
-#include <linux/interrupt.h>
-#include <linux/suspend.h>
-#include <linux/module.h>
-#include <linux/cpu.h>
-#include <asm/atomic.h>
-#include <asm/tlbflush.h>
-
-/* This is protected by pm_sem semaphore */
-static cpumask_t frozen_cpus;
-
-void disable_nonboot_cpus(void)
-{
-	int cpu, error;
-
-	error = 0;
-	cpus_clear(frozen_cpus);
-	printk("Freezing cpus ...\n");
-	for_each_online_cpu(cpu) {
-		if (cpu == 0)
-			continue;
-		error = cpu_down(cpu);
-		if (!error) {
-			cpu_set(cpu, frozen_cpus);
-			printk("CPU%d is down\n", cpu);
-			continue;
-		}
-		printk("Error taking cpu %d down: %d\n", cpu, error);
-	}
-	BUG_ON(raw_smp_processor_id() != 0);
-	if (error)
-		panic("cpus not sleeping");
-}
-
-void enable_nonboot_cpus(void)
-{
-	int cpu, error;
-
-	printk("Thawing cpus ...\n");
-	for_each_cpu_mask(cpu, frozen_cpus) {
-		error = cpu_up(cpu);
-		if (!error) {
-			printk("CPU%d is up\n", cpu);
-			continue;
-		}
-		printk("Error taking cpu %d up: %d\n", cpu, error);
-		panic("Not enough cpus");
-	}
-	cpus_clear(frozen_cpus);
-}
-
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 75d4886e648e..1b84313cbab5 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -34,10 +34,12 @@
 
 #include "power.h"
 
-struct pbe *pagedir_nosave;
+/* List of PBEs used for creating and restoring the suspend image */
+struct pbe *restore_pblist;
+
 static unsigned int nr_copy_pages;
 static unsigned int nr_meta_pages;
-static unsigned long *buffer;
+static void *buffer;
 
 #ifdef CONFIG_HIGHMEM
 unsigned int count_highmem_pages(void)
@@ -156,240 +158,637 @@ static inline int save_highmem(void) {return 0;}
 static inline int restore_highmem(void) {return 0;}
 #endif
 
-static int pfn_is_nosave(unsigned long pfn)
+/**
+ *	@safe_needed - on resume, for storing the PBE list and the image,
+ *	we can only use memory pages that do not conflict with the pages
+ *	used before suspend.
+ *
+ *	The unsafe pages are marked with the PG_nosave_free flag
+ *	and we count them using unsafe_pages
+ */
+
+#define PG_ANY		0
+#define PG_SAFE		1
+#define PG_UNSAFE_CLEAR	1
+#define PG_UNSAFE_KEEP	0
+
+static unsigned int allocated_unsafe_pages;
+
+static void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
 {
-	unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
-	unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
-	return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+	void *res;
+
+	res = (void *)get_zeroed_page(gfp_mask);
+	if (safe_needed)
+		while (res && PageNosaveFree(virt_to_page(res))) {
+			/* The page is unsafe, mark it for swsusp_free() */
+			SetPageNosave(virt_to_page(res));
+			allocated_unsafe_pages++;
+			res = (void *)get_zeroed_page(gfp_mask);
+		}
+	if (res) {
+		SetPageNosave(virt_to_page(res));
+		SetPageNosaveFree(virt_to_page(res));
+	}
+	return res;
+}
+
+unsigned long get_safe_page(gfp_t gfp_mask)
+{
+	return (unsigned long)alloc_image_page(gfp_mask, PG_SAFE);
 }
 
 /**
- *	saveable - Determine whether a page should be cloned or not.
- *	@pfn:	The page
- *
- *	We save a page if it's Reserved, and not in the range of pages
- *	statically defined as 'unsaveable', or if it isn't reserved, and
- *	isn't part of a free chunk of pages.
+ *	free_image_page - free page represented by @addr, allocated with
+ *	alloc_image_page (page flags set by it must be cleared)
  */
 
-static int saveable(struct zone *zone, unsigned long *zone_pfn)
+static inline void free_image_page(void *addr, int clear_nosave_free)
 {
-	unsigned long pfn = *zone_pfn + zone->zone_start_pfn;
-	struct page *page;
+	ClearPageNosave(virt_to_page(addr));
+	if (clear_nosave_free)
+		ClearPageNosaveFree(virt_to_page(addr));
+	free_page((unsigned long)addr);
+}
 
-	if (!pfn_valid(pfn))
-		return 0;
+/* struct linked_page is used to build chains of pages */
 
-	page = pfn_to_page(pfn);
-	BUG_ON(PageReserved(page) && PageNosave(page));
-	if (PageNosave(page))
-		return 0;
-	if (PageReserved(page) && pfn_is_nosave(pfn))
-		return 0;
-	if (PageNosaveFree(page))
-		return 0;
+#define LINKED_PAGE_DATA_SIZE	(PAGE_SIZE - sizeof(void *))
 
-	return 1;
-}
+struct linked_page {
+	struct linked_page *next;
+	char data[LINKED_PAGE_DATA_SIZE];
+} __attribute__((packed));
 
-unsigned int count_data_pages(void)
+static inline void
+free_list_of_pages(struct linked_page *list, int clear_page_nosave)
 {
-	struct zone *zone;
-	unsigned long zone_pfn;
-	unsigned int n = 0;
+	while (list) {
+		struct linked_page *lp = list->next;
 
-	for_each_zone (zone) {
-		if (is_highmem(zone))
-			continue;
-		mark_free_pages(zone);
-		for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
-			n += saveable(zone, &zone_pfn);
+		free_image_page(list, clear_page_nosave);
+		list = lp;
 	}
-	return n;
 }
 
-static void copy_data_pages(struct pbe *pblist)
+/**
+  *	struct chain_allocator is used for allocating small objects out of
+  *	a linked list of pages called 'the chain'.
+  *
+  *	The chain grows each time when there is no room for a new object in
+  *	the current page.  The allocated objects cannot be freed individually.
+  *	It is only possible to free them all at once, by freeing the entire
+  *	chain.
+  *
+  *	NOTE: The chain allocator may be inefficient if the allocated objects
+  *	are not much smaller than PAGE_SIZE.
+  */
+
+struct chain_allocator {
+	struct linked_page *chain;	/* the chain */
+	unsigned int used_space;	/* total size of objects allocated out
+					 * of the current page
+					 */
+	gfp_t gfp_mask;		/* mask for allocating pages */
+	int safe_needed;	/* if set, only "safe" pages are allocated */
+};
+
+static void
+chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
 {
-	struct zone *zone;
-	unsigned long zone_pfn;
-	struct pbe *pbe, *p;
+	ca->chain = NULL;
+	ca->used_space = LINKED_PAGE_DATA_SIZE;
+	ca->gfp_mask = gfp_mask;
+	ca->safe_needed = safe_needed;
+}
 
-	pbe = pblist;
-	for_each_zone (zone) {
-		if (is_highmem(zone))
-			continue;
-		mark_free_pages(zone);
-		/* This is necessary for swsusp_free() */
-		for_each_pb_page (p, pblist)
-			SetPageNosaveFree(virt_to_page(p));
-		for_each_pbe (p, pblist)
-			SetPageNosaveFree(virt_to_page(p->address));
-		for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
-			if (saveable(zone, &zone_pfn)) {
-				struct page *page;
-				long *src, *dst;
-				int n;
-
-				page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
-				BUG_ON(!pbe);
-				pbe->orig_address = (unsigned long)page_address(page);
-				/* copy_page and memcpy are not usable for copying task structs. */
-				dst = (long *)pbe->address;
-				src = (long *)pbe->orig_address;
-				for (n = PAGE_SIZE / sizeof(long); n; n--)
-					*dst++ = *src++;
-				pbe = pbe->next;
-			}
-		}
+static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
+{
+	void *ret;
+
+	if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
+		struct linked_page *lp;
+
+		lp = alloc_image_page(ca->gfp_mask, ca->safe_needed);
+		if (!lp)
+			return NULL;
+
+		lp->next = ca->chain;
+		ca->chain = lp;
+		ca->used_space = 0;
 	}
-	BUG_ON(pbe);
+	ret = ca->chain->data + ca->used_space;
+	ca->used_space += size;
+	return ret;
 }
 
+static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
+{
+	free_list_of_pages(ca->chain, clear_page_nosave);
+	memset(ca, 0, sizeof(struct chain_allocator));
+}
 
 /**
- *	free_pagedir - free pages allocated with alloc_pagedir()
+ *	Data types related to memory bitmaps.
+ *
+ *	Memory bitmap is a structure consiting of many linked lists of
+ *	objects.  The main list's elements are of type struct zone_bitmap
+ *	and each of them corresonds to one zone.  For each zone bitmap
+ *	object there is a list of objects of type struct bm_block that
+ *	represent each blocks of bit chunks in which information is
+ *	stored.
+ *
+ *	struct memory_bitmap contains a pointer to the main list of zone
+ *	bitmap objects, a struct bm_position used for browsing the bitmap,
+ *	and a pointer to the list of pages used for allocating all of the
+ *	zone bitmap objects and bitmap block objects.
+ *
+ *	NOTE: It has to be possible to lay out the bitmap in memory
+ *	using only allocations of order 0.  Additionally, the bitmap is
+ *	designed to work with arbitrary number of zones (this is over the
+ *	top for now, but let's avoid making unnecessary assumptions ;-).
+ *
+ *	struct zone_bitmap contains a pointer to a list of bitmap block
+ *	objects and a pointer to the bitmap block object that has been
+ *	most recently used for setting bits.  Additionally, it contains the
+ *	pfns that correspond to the start and end of the represented zone.
+ *
+ *	struct bm_block contains a pointer to the memory page in which
+ *	information is stored (in the form of a block of bit chunks
+ *	of type unsigned long each).  It also contains the pfns that
+ *	correspond to the start and end of the represented memory area and
+ *	the number of bit chunks in the block.
+ *
+ *	NOTE: Memory bitmaps are used for two types of operations only:
+ *	"set a bit" and "find the next bit set".  Moreover, the searching
+ *	is always carried out after all of the "set a bit" operations
+ *	on given bitmap.
  */
 
-static void free_pagedir(struct pbe *pblist, int clear_nosave_free)
+#define BM_END_OF_MAP	(~0UL)
+
+#define BM_CHUNKS_PER_BLOCK	(PAGE_SIZE / sizeof(long))
+#define BM_BITS_PER_CHUNK	(sizeof(long) << 3)
+#define BM_BITS_PER_BLOCK	(PAGE_SIZE << 3)
+
+struct bm_block {
+	struct bm_block *next;		/* next element of the list */
+	unsigned long start_pfn;	/* pfn represented by the first bit */
+	unsigned long end_pfn;	/* pfn represented by the last bit plus 1 */
+	unsigned int size;	/* number of bit chunks */
+	unsigned long *data;	/* chunks of bits representing pages */
+};
+
+struct zone_bitmap {
+	struct zone_bitmap *next;	/* next element of the list */
+	unsigned long start_pfn;	/* minimal pfn in this zone */
+	unsigned long end_pfn;		/* maximal pfn in this zone plus 1 */
+	struct bm_block *bm_blocks;	/* list of bitmap blocks */
+	struct bm_block *cur_block;	/* recently used bitmap block */
+};
+
+/* strcut bm_position is used for browsing memory bitmaps */
+
+struct bm_position {
+	struct zone_bitmap *zone_bm;
+	struct bm_block *block;
+	int chunk;
+	int bit;
+};
+
+struct memory_bitmap {
+	struct zone_bitmap *zone_bm_list;	/* list of zone bitmaps */
+	struct linked_page *p_list;	/* list of pages used to store zone
+					 * bitmap objects and bitmap block
+					 * objects
+					 */
+	struct bm_position cur;	/* most recently used bit position */
+};
+
+/* Functions that operate on memory bitmaps */
+
+static inline void memory_bm_reset_chunk(struct memory_bitmap *bm)
 {
-	struct pbe *pbe;
+	bm->cur.chunk = 0;
+	bm->cur.bit = -1;
+}
 
-	while (pblist) {
-		pbe = (pblist + PB_PAGE_SKIP)->next;
-		ClearPageNosave(virt_to_page(pblist));
-		if (clear_nosave_free)
-			ClearPageNosaveFree(virt_to_page(pblist));
-		free_page((unsigned long)pblist);
-		pblist = pbe;
-	}
+static void memory_bm_position_reset(struct memory_bitmap *bm)
+{
+	struct zone_bitmap *zone_bm;
+
+	zone_bm = bm->zone_bm_list;
+	bm->cur.zone_bm = zone_bm;
+	bm->cur.block = zone_bm->bm_blocks;
+	memory_bm_reset_chunk(bm);
 }
 
+static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
+
 /**
- *	fill_pb_page - Create a list of PBEs on a given memory page
+ *	create_bm_block_list - create a list of block bitmap objects
  */
 
-static inline void fill_pb_page(struct pbe *pbpage)
+static inline struct bm_block *
+create_bm_block_list(unsigned int nr_blocks, struct chain_allocator *ca)
 {
-	struct pbe *p;
+	struct bm_block *bblist = NULL;
+
+	while (nr_blocks-- > 0) {
+		struct bm_block *bb;
 
-	p = pbpage;
-	pbpage += PB_PAGE_SKIP;
-	do
-		p->next = p + 1;
-	while (++p < pbpage);
+		bb = chain_alloc(ca, sizeof(struct bm_block));
+		if (!bb)
+			return NULL;
+
+		bb->next = bblist;
+		bblist = bb;
+	}
+	return bblist;
 }
 
 /**
- *	create_pbe_list - Create a list of PBEs on top of a given chain
- *	of memory pages allocated with alloc_pagedir()
+ *	create_zone_bm_list - create a list of zone bitmap objects
  */
 
-static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
+static inline struct zone_bitmap *
+create_zone_bm_list(unsigned int nr_zones, struct chain_allocator *ca)
 {
-	struct pbe *pbpage, *p;
-	unsigned int num = PBES_PER_PAGE;
+	struct zone_bitmap *zbmlist = NULL;
 
-	for_each_pb_page (pbpage, pblist) {
-		if (num >= nr_pages)
-			break;
+	while (nr_zones-- > 0) {
+		struct zone_bitmap *zbm;
+
+		zbm = chain_alloc(ca, sizeof(struct zone_bitmap));
+		if (!zbm)
+			return NULL;
+
+		zbm->next = zbmlist;
+		zbmlist = zbm;
+	}
+	return zbmlist;
+}
+
+/**
+  *	memory_bm_create - allocate memory for a memory bitmap
+  */
+
+static int
+memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
+{
+	struct chain_allocator ca;
+	struct zone *zone;
+	struct zone_bitmap *zone_bm;
+	struct bm_block *bb;
+	unsigned int nr;
+
+	chain_init(&ca, gfp_mask, safe_needed);
 
-		fill_pb_page(pbpage);
-		num += PBES_PER_PAGE;
+	/* Compute the number of zones */
+	nr = 0;
+	for_each_zone (zone)
+		if (populated_zone(zone) && !is_highmem(zone))
+			nr++;
+
+	/* Allocate the list of zones bitmap objects */
+	zone_bm = create_zone_bm_list(nr, &ca);
+	bm->zone_bm_list = zone_bm;
+	if (!zone_bm) {
+		chain_free(&ca, PG_UNSAFE_CLEAR);
+		return -ENOMEM;
 	}
-	if (pbpage) {
-		for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++)
-			p->next = p + 1;
-		p->next = NULL;
+
+	/* Initialize the zone bitmap objects */
+	for_each_zone (zone) {
+		unsigned long pfn;
+
+		if (!populated_zone(zone) || is_highmem(zone))
+			continue;
+
+		zone_bm->start_pfn = zone->zone_start_pfn;
+		zone_bm->end_pfn = zone->zone_start_pfn + zone->spanned_pages;
+		/* Allocate the list of bitmap block objects */
+		nr = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
+		bb = create_bm_block_list(nr, &ca);
+		zone_bm->bm_blocks = bb;
+		zone_bm->cur_block = bb;
+		if (!bb)
+			goto Free;
+
+		nr = zone->spanned_pages;
+		pfn = zone->zone_start_pfn;
+		/* Initialize the bitmap block objects */
+		while (bb) {
+			unsigned long *ptr;
+
+			ptr = alloc_image_page(gfp_mask, safe_needed);
+			bb->data = ptr;
+			if (!ptr)
+				goto Free;
+
+			bb->start_pfn = pfn;
+			if (nr >= BM_BITS_PER_BLOCK) {
+				pfn += BM_BITS_PER_BLOCK;
+				bb->size = BM_CHUNKS_PER_BLOCK;
+				nr -= BM_BITS_PER_BLOCK;
+			} else {
+				/* This is executed only once in the loop */
+				pfn += nr;
+				bb->size = DIV_ROUND_UP(nr, BM_BITS_PER_CHUNK);
+			}
+			bb->end_pfn = pfn;
+			bb = bb->next;
+		}
+		zone_bm = zone_bm->next;
 	}
+	bm->p_list = ca.chain;
+	memory_bm_position_reset(bm);
+	return 0;
+
+Free:
+	bm->p_list = ca.chain;
+	memory_bm_free(bm, PG_UNSAFE_CLEAR);
+	return -ENOMEM;
 }
 
-static unsigned int unsafe_pages;
+/**
+  *	memory_bm_free - free memory occupied by the memory bitmap @bm
+  */
+
+static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
+{
+	struct zone_bitmap *zone_bm;
+
+	/* Free the list of bit blocks for each zone_bitmap object */
+	zone_bm = bm->zone_bm_list;
+	while (zone_bm) {
+		struct bm_block *bb;
+
+		bb = zone_bm->bm_blocks;
+		while (bb) {
+			if (bb->data)
+				free_image_page(bb->data, clear_nosave_free);
+			bb = bb->next;
+		}
+		zone_bm = zone_bm->next;
+	}
+	free_list_of_pages(bm->p_list, clear_nosave_free);
+	bm->zone_bm_list = NULL;
+}
 
 /**
- *	@safe_needed - on resume, for storing the PBE list and the image,
- *	we can only use memory pages that do not conflict with the pages
- *	used before suspend.
+ *	memory_bm_set_bit - set the bit in the bitmap @bm that corresponds
+ *	to given pfn.  The cur_zone_bm member of @bm and the cur_block member
+ *	of @bm->cur_zone_bm are updated.
  *
- *	The unsafe pages are marked with the PG_nosave_free flag
- *	and we count them using unsafe_pages
+ *	If the bit cannot be set, the function returns -EINVAL .
  */
 
-static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
+static int
+memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
 {
-	void *res;
-
-	res = (void *)get_zeroed_page(gfp_mask);
-	if (safe_needed)
-		while (res && PageNosaveFree(virt_to_page(res))) {
-			/* The page is unsafe, mark it for swsusp_free() */
-			SetPageNosave(virt_to_page(res));
-			unsafe_pages++;
-			res = (void *)get_zeroed_page(gfp_mask);
+	struct zone_bitmap *zone_bm;
+	struct bm_block *bb;
+
+	/* Check if the pfn is from the current zone */
+	zone_bm = bm->cur.zone_bm;
+	if (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
+		zone_bm = bm->zone_bm_list;
+		/* We don't assume that the zones are sorted by pfns */
+		while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
+			zone_bm = zone_bm->next;
+			if (unlikely(!zone_bm))
+				return -EINVAL;
 		}
-	if (res) {
-		SetPageNosave(virt_to_page(res));
-		SetPageNosaveFree(virt_to_page(res));
+		bm->cur.zone_bm = zone_bm;
 	}
-	return res;
+	/* Check if the pfn corresponds to the current bitmap block */
+	bb = zone_bm->cur_block;
+	if (pfn < bb->start_pfn)
+		bb = zone_bm->bm_blocks;
+
+	while (pfn >= bb->end_pfn) {
+		bb = bb->next;
+		if (unlikely(!bb))
+			return -EINVAL;
+	}
+	zone_bm->cur_block = bb;
+	pfn -= bb->start_pfn;
+	set_bit(pfn % BM_BITS_PER_CHUNK, bb->data + pfn / BM_BITS_PER_CHUNK);
+	return 0;
 }
 
-unsigned long get_safe_page(gfp_t gfp_mask)
+/* Two auxiliary functions for memory_bm_next_pfn */
+
+/* Find the first set bit in the given chunk, if there is one */
+
+static inline int next_bit_in_chunk(int bit, unsigned long *chunk_p)
 {
-	return (unsigned long)alloc_image_page(gfp_mask, 1);
+	bit++;
+	while (bit < BM_BITS_PER_CHUNK) {
+		if (test_bit(bit, chunk_p))
+			return bit;
+
+		bit++;
+	}
+	return -1;
+}
+
+/* Find a chunk containing some bits set in given block of bits */
+
+static inline int next_chunk_in_block(int n, struct bm_block *bb)
+{
+	n++;
+	while (n < bb->size) {
+		if (bb->data[n])
+			return n;
+
+		n++;
+	}
+	return -1;
 }
 
 /**
- *	alloc_pagedir - Allocate the page directory.
- *
- *	First, determine exactly how many pages we need and
- *	allocate them.
+ *	memory_bm_next_pfn - find the pfn that corresponds to the next set bit
+ *	in the bitmap @bm.  If the pfn cannot be found, BM_END_OF_MAP is
+ *	returned.
  *
- *	We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
- *	struct pbe elements (pbes) and the last element in the page points
- *	to the next page.
+ *	It is required to run memory_bm_position_reset() before the first call to
+ *	this function.
+ */
+
+static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
+{
+	struct zone_bitmap *zone_bm;
+	struct bm_block *bb;
+	int chunk;
+	int bit;
+
+	do {
+		bb = bm->cur.block;
+		do {
+			chunk = bm->cur.chunk;
+			bit = bm->cur.bit;
+			do {
+				bit = next_bit_in_chunk(bit, bb->data + chunk);
+				if (bit >= 0)
+					goto Return_pfn;
+
+				chunk = next_chunk_in_block(chunk, bb);
+				bit = -1;
+			} while (chunk >= 0);
+			bb = bb->next;
+			bm->cur.block = bb;
+			memory_bm_reset_chunk(bm);
+		} while (bb);
+		zone_bm = bm->cur.zone_bm->next;
+		if (zone_bm) {
+			bm->cur.zone_bm = zone_bm;
+			bm->cur.block = zone_bm->bm_blocks;
+			memory_bm_reset_chunk(bm);
+		}
+	} while (zone_bm);
+	memory_bm_position_reset(bm);
+	return BM_END_OF_MAP;
+
+Return_pfn:
+	bm->cur.chunk = chunk;
+	bm->cur.bit = bit;
+	return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit;
+}
+
+/**
+ *	snapshot_additional_pages - estimate the number of additional pages
+ *	be needed for setting up the suspend image data structures for given
+ *	zone (usually the returned value is greater than the exact number)
+ */
+
+unsigned int snapshot_additional_pages(struct zone *zone)
+{
+	unsigned int res;
+
+	res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
+	res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE);
+	return res;
+}
+
+/**
+ *	pfn_is_nosave - check if given pfn is in the 'nosave' section
+ */
+
+static inline int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
+	unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
+	return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
+
+/**
+ *	saveable - Determine whether a page should be cloned or not.
+ *	@pfn:	The page
  *
- *	On each page we set up a list of struct_pbe elements.
+ *	We save a page if it isn't Nosave, and is not in the range of pages
+ *	statically defined as 'unsaveable', and it
+ *	isn't a part of a free chunk of pages.
  */
 
-static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask,
-				 int safe_needed)
+static struct page *saveable_page(unsigned long pfn)
 {
-	unsigned int num;
-	struct pbe *pblist, *pbe;
+	struct page *page;
+
+	if (!pfn_valid(pfn))
+		return NULL;
 
-	if (!nr_pages)
+	page = pfn_to_page(pfn);
+
+	if (PageNosave(page))
+		return NULL;
+	if (PageReserved(page) && pfn_is_nosave(pfn))
 		return NULL;
+	if (PageNosaveFree(page))
+		return NULL;
+
+	return page;
+}
+
+unsigned int count_data_pages(void)
+{
+	struct zone *zone;
+	unsigned long pfn, max_zone_pfn;
+	unsigned int n = 0;
 
-	pblist = alloc_image_page(gfp_mask, safe_needed);
-	/* FIXME: rewrite this ugly loop */
-	for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages;
-        		pbe = pbe->next, num += PBES_PER_PAGE) {
-		pbe += PB_PAGE_SKIP;
-		pbe->next = alloc_image_page(gfp_mask, safe_needed);
+	for_each_zone (zone) {
+		if (is_highmem(zone))
+			continue;
+		mark_free_pages(zone);
+		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+			n += !!saveable_page(pfn);
 	}
-	if (!pbe) { /* get_zeroed_page() failed */
-		free_pagedir(pblist, 1);
-		pblist = NULL;
-        } else
-		create_pbe_list(pblist, nr_pages);
-	return pblist;
+	return n;
+}
+
+static inline void copy_data_page(long *dst, long *src)
+{
+	int n;
+
+	/* copy_page and memcpy are not usable for copying task structs. */
+	for (n = PAGE_SIZE / sizeof(long); n; n--)
+		*dst++ = *src++;
+}
+
+static void
+copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
+{
+	struct zone *zone;
+	unsigned long pfn;
+
+	for_each_zone (zone) {
+		unsigned long max_zone_pfn;
+
+		if (is_highmem(zone))
+			continue;
+
+		mark_free_pages(zone);
+		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+			if (saveable_page(pfn))
+				memory_bm_set_bit(orig_bm, pfn);
+	}
+	memory_bm_position_reset(orig_bm);
+	memory_bm_position_reset(copy_bm);
+	do {
+		pfn = memory_bm_next_pfn(orig_bm);
+		if (likely(pfn != BM_END_OF_MAP)) {
+			struct page *page;
+			void *src;
+
+			page = pfn_to_page(pfn);
+			src = page_address(page);
+			page = pfn_to_page(memory_bm_next_pfn(copy_bm));
+			copy_data_page(page_address(page), src);
+		}
+	} while (pfn != BM_END_OF_MAP);
 }
 
 /**
- * Free pages we allocated for suspend. Suspend pages are alocated
- * before atomic copy, so we need to free them after resume.
+ *	swsusp_free - free pages allocated for the suspend.
+ *
+ *	Suspend pages are alocated before the atomic copy is made, so we
+ *	need to release them after the resume.
  */
 
 void swsusp_free(void)
 {
 	struct zone *zone;
-	unsigned long zone_pfn;
+	unsigned long pfn, max_zone_pfn;
 
 	for_each_zone(zone) {
-		for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
-			if (pfn_valid(zone_pfn + zone->zone_start_pfn)) {
-				struct page *page;
-				page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
+		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+			if (pfn_valid(pfn)) {
+				struct page *page = pfn_to_page(pfn);
+
 				if (PageNosave(page) && PageNosaveFree(page)) {
 					ClearPageNosave(page);
 					ClearPageNosaveFree(page);
@@ -399,7 +798,7 @@ void swsusp_free(void)
 	}
 	nr_copy_pages = 0;
 	nr_meta_pages = 0;
-	pagedir_nosave = NULL;
+	restore_pblist = NULL;
 	buffer = NULL;
 }
 
@@ -414,46 +813,57 @@ void swsusp_free(void)
 static int enough_free_mem(unsigned int nr_pages)
 {
 	struct zone *zone;
-	unsigned int n = 0;
+	unsigned int free = 0, meta = 0;
 
 	for_each_zone (zone)
-		if (!is_highmem(zone))
-			n += zone->free_pages;
-	pr_debug("swsusp: available memory: %u pages\n", n);
-	return n > (nr_pages + PAGES_FOR_IO +
-		(nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
-}
+		if (!is_highmem(zone)) {
+			free += zone->free_pages;
+			meta += snapshot_additional_pages(zone);
+		}
 
-static int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed)
-{
-	struct pbe *p;
+	pr_debug("swsusp: pages needed: %u + %u + %u, available pages: %u\n",
+		nr_pages, PAGES_FOR_IO, meta, free);
 
-	for_each_pbe (p, pblist) {
-		p->address = (unsigned long)alloc_image_page(gfp_mask, safe_needed);
-		if (!p->address)
-			return -ENOMEM;
-	}
-	return 0;
+	return free > nr_pages + PAGES_FOR_IO + meta;
 }
 
-static struct pbe *swsusp_alloc(unsigned int nr_pages)
+static int
+swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
+		unsigned int nr_pages)
 {
-	struct pbe *pblist;
+	int error;
 
-	if (!(pblist = alloc_pagedir(nr_pages, GFP_ATOMIC | __GFP_COLD, 0))) {
-		printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
-		return NULL;
-	}
+	error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
+	if (error)
+		goto Free;
 
-	if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) {
-		printk(KERN_ERR "suspend: Allocating image pages failed.\n");
-		swsusp_free();
-		return NULL;
+	error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
+	if (error)
+		goto Free;
+
+	while (nr_pages-- > 0) {
+		struct page *page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+		if (!page)
+			goto Free;
+
+		SetPageNosave(page);
+		SetPageNosaveFree(page);
+		memory_bm_set_bit(copy_bm, page_to_pfn(page));
 	}
+	return 0;
 
-	return pblist;
+Free:
+	swsusp_free();
+	return -ENOMEM;
 }
 
+/* Memory bitmap used for marking saveable pages */
+static struct memory_bitmap orig_bm;
+/* Memory bitmap used for marking allocated pages that will contain the copies
+ * of saveable pages
+ */
+static struct memory_bitmap copy_bm;
+
 asmlinkage int swsusp_save(void)
 {
 	unsigned int nr_pages;
@@ -464,25 +874,19 @@ asmlinkage int swsusp_save(void)
 	nr_pages = count_data_pages();
 	printk("swsusp: Need to copy %u pages\n", nr_pages);
 
-	pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n",
-		 nr_pages,
-		 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE,
-		 PAGES_FOR_IO, nr_free_pages());
-
 	if (!enough_free_mem(nr_pages)) {
 		printk(KERN_ERR "swsusp: Not enough free memory\n");
 		return -ENOMEM;
 	}
 
-	pagedir_nosave = swsusp_alloc(nr_pages);
-	if (!pagedir_nosave)
+	if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages))
 		return -ENOMEM;
 
 	/* During allocating of suspend pagedir, new cold pages may appear.
 	 * Kill them.
 	 */
 	drain_local_pages();
-	copy_data_pages(pagedir_nosave);
+	copy_data_pages(&copy_bm, &orig_bm);
 
 	/*
 	 * End of critical section. From now on, we can write to memory,
@@ -511,22 +915,20 @@ static void init_header(struct swsusp_info *info)
 }
 
 /**
- *	pack_orig_addresses - the .orig_address fields of the PBEs from the
- *	list starting at @pbe are stored in the array @buf[] (1 page)
+ *	pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
+ *	are stored in the array @buf[] (1 page at a time)
  */
 
-static inline struct pbe *pack_orig_addresses(unsigned long *buf, struct pbe *pbe)
+static inline void
+pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 {
 	int j;
 
-	for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
-		buf[j] = pbe->orig_address;
-		pbe = pbe->next;
+	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
+		buf[j] = memory_bm_next_pfn(bm);
+		if (unlikely(buf[j] == BM_END_OF_MAP))
+			break;
 	}
-	if (!pbe)
-		for (; j < PAGE_SIZE / sizeof(long); j++)
-			buf[j] = 0;
-	return pbe;
 }
 
 /**
@@ -553,37 +955,39 @@ static inline struct pbe *pack_orig_addresses(unsigned long *buf, struct pbe *pb
 
 int snapshot_read_next(struct snapshot_handle *handle, size_t count)
 {
-	if (handle->page > nr_meta_pages + nr_copy_pages)
+	if (handle->cur > nr_meta_pages + nr_copy_pages)
 		return 0;
+
 	if (!buffer) {
 		/* This makes the buffer be freed by swsusp_free() */
-		buffer = alloc_image_page(GFP_ATOMIC, 0);
+		buffer = alloc_image_page(GFP_ATOMIC, PG_ANY);
 		if (!buffer)
 			return -ENOMEM;
 	}
 	if (!handle->offset) {
 		init_header((struct swsusp_info *)buffer);
 		handle->buffer = buffer;
-		handle->pbe = pagedir_nosave;
+		memory_bm_position_reset(&orig_bm);
+		memory_bm_position_reset(&copy_bm);
 	}
-	if (handle->prev < handle->page) {
-		if (handle->page <= nr_meta_pages) {
-			handle->pbe = pack_orig_addresses(buffer, handle->pbe);
-			if (!handle->pbe)
-				handle->pbe = pagedir_nosave;
+	if (handle->prev < handle->cur) {
+		if (handle->cur <= nr_meta_pages) {
+			memset(buffer, 0, PAGE_SIZE);
+			pack_pfns(buffer, &orig_bm);
 		} else {
-			handle->buffer = (void *)handle->pbe->address;
-			handle->pbe = handle->pbe->next;
+			unsigned long pfn = memory_bm_next_pfn(&copy_bm);
+
+			handle->buffer = page_address(pfn_to_page(pfn));
 		}
-		handle->prev = handle->page;
+		handle->prev = handle->cur;
 	}
-	handle->buf_offset = handle->page_offset;
-	if (handle->page_offset + count >= PAGE_SIZE) {
-		count = PAGE_SIZE - handle->page_offset;
-		handle->page_offset = 0;
-		handle->page++;
+	handle->buf_offset = handle->cur_offset;
+	if (handle->cur_offset + count >= PAGE_SIZE) {
+		count = PAGE_SIZE - handle->cur_offset;
+		handle->cur_offset = 0;
+		handle->cur++;
 	} else {
-		handle->page_offset += count;
+		handle->cur_offset += count;
 	}
 	handle->offset += count;
 	return count;
@@ -595,47 +999,50 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
  *	had been used before suspend
  */
 
-static int mark_unsafe_pages(struct pbe *pblist)
+static int mark_unsafe_pages(struct memory_bitmap *bm)
 {
 	struct zone *zone;
-	unsigned long zone_pfn;
-	struct pbe *p;
-
-	if (!pblist) /* a sanity check */
-		return -EINVAL;
+	unsigned long pfn, max_zone_pfn;
 
 	/* Clear page flags */
 	for_each_zone (zone) {
-		for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
-			if (pfn_valid(zone_pfn + zone->zone_start_pfn))
-				ClearPageNosaveFree(pfn_to_page(zone_pfn +
-					zone->zone_start_pfn));
+		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+			if (pfn_valid(pfn))
+				ClearPageNosaveFree(pfn_to_page(pfn));
 	}
 
-	/* Mark orig addresses */
-	for_each_pbe (p, pblist) {
-		if (virt_addr_valid(p->orig_address))
-			SetPageNosaveFree(virt_to_page(p->orig_address));
-		else
-			return -EFAULT;
-	}
+	/* Mark pages that correspond to the "original" pfns as "unsafe" */
+	memory_bm_position_reset(bm);
+	do {
+		pfn = memory_bm_next_pfn(bm);
+		if (likely(pfn != BM_END_OF_MAP)) {
+			if (likely(pfn_valid(pfn)))
+				SetPageNosaveFree(pfn_to_page(pfn));
+			else
+				return -EFAULT;
+		}
+	} while (pfn != BM_END_OF_MAP);
 
-	unsafe_pages = 0;
+	allocated_unsafe_pages = 0;
 
 	return 0;
 }
 
-static void copy_page_backup_list(struct pbe *dst, struct pbe *src)
+static void
+duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
 {
-	/* We assume both lists contain the same number of elements */
-	while (src) {
-		dst->orig_address = src->orig_address;
-		dst = dst->next;
-		src = src->next;
+	unsigned long pfn;
+
+	memory_bm_position_reset(src);
+	pfn = memory_bm_next_pfn(src);
+	while (pfn != BM_END_OF_MAP) {
+		memory_bm_set_bit(dst, pfn);
+		pfn = memory_bm_next_pfn(src);
 	}
 }
 
-static int check_header(struct swsusp_info *info)
+static inline int check_header(struct swsusp_info *info)
 {
 	char *reason = NULL;
 
@@ -662,19 +1069,14 @@ static int check_header(struct swsusp_info *info)
  *	load header - check the image header and copy data from it
  */
 
-static int load_header(struct snapshot_handle *handle,
-                              struct swsusp_info *info)
+static int
+load_header(struct swsusp_info *info)
 {
 	int error;
-	struct pbe *pblist;
 
+	restore_pblist = NULL;
 	error = check_header(info);
 	if (!error) {
-		pblist = alloc_pagedir(info->image_pages, GFP_ATOMIC, 0);
-		if (!pblist)
-			return -ENOMEM;
-		pagedir_nosave = pblist;
-		handle->pbe = pblist;
 		nr_copy_pages = info->image_pages;
 		nr_meta_pages = info->pages - info->image_pages - 1;
 	}
@@ -682,113 +1084,137 @@ static int load_header(struct snapshot_handle *handle,
 }
 
 /**
- *	unpack_orig_addresses - copy the elements of @buf[] (1 page) to
- *	the PBEs in the list starting at @pbe
+ *	unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
+ *	the corresponding bit in the memory bitmap @bm
  */
 
-static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
-                                                struct pbe *pbe)
+static inline void
+unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
 {
 	int j;
 
-	for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
-		pbe->orig_address = buf[j];
-		pbe = pbe->next;
+	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
+		if (unlikely(buf[j] == BM_END_OF_MAP))
+			break;
+
+		memory_bm_set_bit(bm, buf[j]);
 	}
-	return pbe;
 }
 
 /**
- *	prepare_image - use metadata contained in the PBE list
- *	pointed to by pagedir_nosave to mark the pages that will
- *	be overwritten in the process of restoring the system
- *	memory state from the image ("unsafe" pages) and allocate
- *	memory for the image
+ *	prepare_image - use the memory bitmap @bm to mark the pages that will
+ *	be overwritten in the process of restoring the system memory state
+ *	from the suspend image ("unsafe" pages) and allocate memory for the
+ *	image.
  *
- *	The idea is to allocate the PBE list first and then
- *	allocate as many pages as it's needed for the image data,
- *	but not to assign these pages to the PBEs initially.
- *	Instead, we just mark them as allocated and create a list
- *	of "safe" which will be used later
+ *	The idea is to allocate a new memory bitmap first and then allocate
+ *	as many pages as needed for the image data, but not to assign these
+ *	pages to specific tasks initially.  Instead, we just mark them as
+ *	allocated and create a list of "safe" pages that will be used later.
  */
 
-struct safe_page {
-	struct safe_page *next;
-	char padding[PAGE_SIZE - sizeof(void *)];
-};
+#define PBES_PER_LINKED_PAGE	(LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
 
-static struct safe_page *safe_pages;
+static struct linked_page *safe_pages_list;
 
-static int prepare_image(struct snapshot_handle *handle)
+static int
+prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 {
-	int error = 0;
-	unsigned int nr_pages = nr_copy_pages;
-	struct pbe *p, *pblist = NULL;
+	unsigned int nr_pages;
+	struct linked_page *sp_list, *lp;
+	int error;
 
-	p = pagedir_nosave;
-	error = mark_unsafe_pages(p);
-	if (!error) {
-		pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1);
-		if (pblist)
-			copy_page_backup_list(pblist, p);
-		free_pagedir(p, 0);
-		if (!pblist)
+	error = mark_unsafe_pages(bm);
+	if (error)
+		goto Free;
+
+	error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
+	if (error)
+		goto Free;
+
+	duplicate_memory_bitmap(new_bm, bm);
+	memory_bm_free(bm, PG_UNSAFE_KEEP);
+	/* Reserve some safe pages for potential later use.
+	 *
+	 * NOTE: This way we make sure there will be enough safe pages for the
+	 * chain_alloc() in get_buffer().  It is a bit wasteful, but
+	 * nr_copy_pages cannot be greater than 50% of the memory anyway.
+	 */
+	sp_list = NULL;
+	/* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
+	nr_pages = nr_copy_pages - allocated_unsafe_pages;
+	nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
+	while (nr_pages > 0) {
+		lp = alloc_image_page(GFP_ATOMIC, PG_SAFE);
+		if (!lp) {
 			error = -ENOMEM;
+			goto Free;
+		}
+		lp->next = sp_list;
+		sp_list = lp;
+		nr_pages--;
 	}
-	safe_pages = NULL;
-	if (!error && nr_pages > unsafe_pages) {
-		nr_pages -= unsafe_pages;
-		while (nr_pages--) {
-			struct safe_page *ptr;
-
-			ptr = (struct safe_page *)get_zeroed_page(GFP_ATOMIC);
-			if (!ptr) {
-				error = -ENOMEM;
-				break;
-			}
-			if (!PageNosaveFree(virt_to_page(ptr))) {
-				/* The page is "safe", add it to the list */
-				ptr->next = safe_pages;
-				safe_pages = ptr;
-			}
-			/* Mark the page as allocated */
-			SetPageNosave(virt_to_page(ptr));
-			SetPageNosaveFree(virt_to_page(ptr));
+	/* Preallocate memory for the image */
+	safe_pages_list = NULL;
+	nr_pages = nr_copy_pages - allocated_unsafe_pages;
+	while (nr_pages > 0) {
+		lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
+		if (!lp) {
+			error = -ENOMEM;
+			goto Free;
+		}
+		if (!PageNosaveFree(virt_to_page(lp))) {
+			/* The page is "safe", add it to the list */
+			lp->next = safe_pages_list;
+			safe_pages_list = lp;
 		}
+		/* Mark the page as allocated */
+		SetPageNosave(virt_to_page(lp));
+		SetPageNosaveFree(virt_to_page(lp));
+		nr_pages--;
 	}
-	if (!error) {
-		pagedir_nosave = pblist;
-	} else {
-		handle->pbe = NULL;
-		swsusp_free();
+	/* Free the reserved safe pages so that chain_alloc() can use them */
+	while (sp_list) {
+		lp = sp_list->next;
+		free_image_page(sp_list, PG_UNSAFE_CLEAR);
+		sp_list = lp;
 	}
+	return 0;
+
+Free:
+	swsusp_free();
 	return error;
 }
 
-static void *get_buffer(struct snapshot_handle *handle)
+/**
+ *	get_buffer - compute the address that snapshot_write_next() should
+ *	set for its caller to write to.
+ */
+
+static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
 {
-	struct pbe *pbe = handle->pbe, *last = handle->last_pbe;
-	struct page *page = virt_to_page(pbe->orig_address);
+	struct pbe *pbe;
+	struct page *page = pfn_to_page(memory_bm_next_pfn(bm));
 
-	if (PageNosave(page) && PageNosaveFree(page)) {
-		/*
-		 * We have allocated the "original" page frame and we can
-		 * use it directly to store the read page
+	if (PageNosave(page) && PageNosaveFree(page))
+		/* We have allocated the "original" page frame and we can
+		 * use it directly to store the loaded page.
 		 */
-		pbe->address = 0;
-		if (last && last->next)
-			last->next = NULL;
-		return (void *)pbe->orig_address;
-	}
-	/*
-	 * The "original" page frame has not been allocated and we have to
-	 * use a "safe" page frame to store the read page
+		return page_address(page);
+
+	/* The "original" page frame has not been allocated and we have to
+	 * use a "safe" page frame to store the loaded page.
 	 */
-	pbe->address = (unsigned long)safe_pages;
-	safe_pages = safe_pages->next;
-	if (last)
-		last->next = pbe;
-	handle->last_pbe = pbe;
+	pbe = chain_alloc(ca, sizeof(struct pbe));
+	if (!pbe) {
+		swsusp_free();
+		return NULL;
+	}
+	pbe->orig_address = (unsigned long)page_address(page);
+	pbe->address = (unsigned long)safe_pages_list;
+	safe_pages_list = safe_pages_list->next;
+	pbe->next = restore_pblist;
+	restore_pblist = pbe;
 	return (void *)pbe->address;
 }
 
@@ -816,46 +1242,60 @@ static void *get_buffer(struct snapshot_handle *handle)
 
 int snapshot_write_next(struct snapshot_handle *handle, size_t count)
 {
+	static struct chain_allocator ca;
 	int error = 0;
 
-	if (handle->prev && handle->page > nr_meta_pages + nr_copy_pages)
+	/* Check if we have already loaded the entire image */
+	if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
 		return 0;
+
 	if (!buffer) {
 		/* This makes the buffer be freed by swsusp_free() */
-		buffer = alloc_image_page(GFP_ATOMIC, 0);
+		buffer = alloc_image_page(GFP_ATOMIC, PG_ANY);
 		if (!buffer)
 			return -ENOMEM;
 	}
 	if (!handle->offset)
 		handle->buffer = buffer;
-	if (handle->prev < handle->page) {
-		if (!handle->prev) {
-			error = load_header(handle, (struct swsusp_info *)buffer);
+	handle->sync_read = 1;
+	if (handle->prev < handle->cur) {
+		if (handle->prev == 0) {
+			error = load_header(buffer);
 			if (error)
 				return error;
+
+			error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
+			if (error)
+				return error;
+
 		} else if (handle->prev <= nr_meta_pages) {
-			handle->pbe = unpack_orig_addresses(buffer, handle->pbe);
-			if (!handle->pbe) {
-				error = prepare_image(handle);
+			unpack_orig_pfns(buffer, &copy_bm);
+			if (handle->prev == nr_meta_pages) {
+				error = prepare_image(&orig_bm, &copy_bm);
 				if (error)
 					return error;
-				handle->pbe = pagedir_nosave;
-				handle->last_pbe = NULL;
-				handle->buffer = get_buffer(handle);
+
+				chain_init(&ca, GFP_ATOMIC, PG_SAFE);
+				memory_bm_position_reset(&orig_bm);
+				restore_pblist = NULL;
+				handle->buffer = get_buffer(&orig_bm, &ca);
+				handle->sync_read = 0;
+				if (!handle->buffer)
+					return -ENOMEM;
 			}
 		} else {
-			handle->pbe = handle->pbe->next;
-			handle->buffer = get_buffer(handle);
+			handle->buffer = get_buffer(&orig_bm, &ca);
+			handle->sync_read = 0;
 		}
-		handle->prev = handle->page;
+		handle->prev = handle->cur;
 	}
-	handle->buf_offset = handle->page_offset;
-	if (handle->page_offset + count >= PAGE_SIZE) {
-		count = PAGE_SIZE - handle->page_offset;
-		handle->page_offset = 0;
-		handle->page++;
+	handle->buf_offset = handle->cur_offset;
+	if (handle->cur_offset + count >= PAGE_SIZE) {
+		count = PAGE_SIZE - handle->cur_offset;
+		handle->cur_offset = 0;
+		handle->cur++;
 	} else {
-		handle->page_offset += count;
+		handle->cur_offset += count;
 	}
 	handle->offset += count;
 	return count;
@@ -863,6 +1303,13 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
 
 int snapshot_image_loaded(struct snapshot_handle *handle)
 {
-	return !(!handle->pbe || handle->pbe->next || !nr_copy_pages ||
-		handle->page <= nr_meta_pages + nr_copy_pages);
+	return !(!nr_copy_pages ||
+			handle->cur <= nr_meta_pages + nr_copy_pages);
+}
+
+void snapshot_free_unused_memory(struct snapshot_handle *handle)
+{
+	/* Free only if we have loaded the image entirely */
+	if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
+		memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
 }
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index f1dd146bd64d..9b2ee5344dee 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -22,6 +22,7 @@
 #include <linux/device.h>
 #include <linux/buffer_head.h>
 #include <linux/bio.h>
+#include <linux/blkdev.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/pm.h>
@@ -49,18 +50,16 @@ static int mark_swapfiles(swp_entry_t start)
 {
 	int error;
 
-	rw_swap_page_sync(READ,
-			  swp_entry(root_swap, 0),
-			  virt_to_page((unsigned long)&swsusp_header));
+	rw_swap_page_sync(READ, swp_entry(root_swap, 0),
+			  virt_to_page((unsigned long)&swsusp_header), NULL);
 	if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
 	    !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
 		memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
 		memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
 		swsusp_header.image = start;
-		error = rw_swap_page_sync(WRITE,
-					  swp_entry(root_swap, 0),
-					  virt_to_page((unsigned long)
-						       &swsusp_header));
+		error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0),
+				virt_to_page((unsigned long)&swsusp_header),
+				NULL);
 	} else {
 		pr_debug("swsusp: Partition is not swap space.\n");
 		error = -ENODEV;
@@ -88,16 +87,37 @@ static int swsusp_swap_check(void) /* This is called before saving image */
  *	write_page - Write one page to given swap location.
  *	@buf:		Address we're writing.
  *	@offset:	Offset of the swap page we're writing to.
+ *	@bio_chain:	Link the next write BIO here
  */
 
-static int write_page(void *buf, unsigned long offset)
+static int write_page(void *buf, unsigned long offset, struct bio **bio_chain)
 {
 	swp_entry_t entry;
 	int error = -ENOSPC;
 
 	if (offset) {
+		struct page *page = virt_to_page(buf);
+
+		if (bio_chain) {
+			/*
+			 * Whether or not we successfully allocated a copy page,
+			 * we take a ref on the page here.  It gets undone in
+			 * wait_on_bio_chain().
+			 */
+			struct page *page_copy;
+			page_copy = alloc_page(GFP_ATOMIC);
+			if (page_copy == NULL) {
+				WARN_ON_ONCE(1);
+				bio_chain = NULL;	/* Go synchronous */
+				get_page(page);
+			} else {
+				memcpy(page_address(page_copy),
+					page_address(page), PAGE_SIZE);
+				page = page_copy;
+			}
+		}
 		entry = swp_entry(root_swap, offset);
-		error = rw_swap_page_sync(WRITE, entry, virt_to_page(buf));
+		error = rw_swap_page_sync(WRITE, entry, page, bio_chain);
 	}
 	return error;
 }
@@ -146,6 +166,26 @@ static void release_swap_writer(struct swap_map_handle *handle)
 	handle->bitmap = NULL;
 }
 
+static void show_speed(struct timeval *start, struct timeval *stop,
+			unsigned nr_pages, char *msg)
+{
+	s64 elapsed_centisecs64;
+	int centisecs;
+	int k;
+	int kps;
+
+	elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start);
+	do_div(elapsed_centisecs64, NSEC_PER_SEC / 100);
+	centisecs = elapsed_centisecs64;
+	if (centisecs == 0)
+		centisecs = 1;	/* avoid div-by-zero */
+	k = nr_pages * (PAGE_SIZE / 1024);
+	kps = (k * 100) / centisecs;
+	printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k,
+			centisecs / 100, centisecs % 100,
+			kps / 1000, (kps % 1000) / 10);
+}
+
 static int get_swap_writer(struct swap_map_handle *handle)
 {
 	handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
@@ -165,37 +205,70 @@ static int get_swap_writer(struct swap_map_handle *handle)
 	return 0;
 }
 
-static int swap_write_page(struct swap_map_handle *handle, void *buf)
+static int wait_on_bio_chain(struct bio **bio_chain)
 {
-	int error;
+	struct bio *bio;
+	struct bio *next_bio;
+	int ret = 0;
+
+	if (bio_chain == NULL)
+		return 0;
+
+	bio = *bio_chain;
+	if (bio == NULL)
+		return 0;
+	while (bio) {
+		struct page *page;
+
+		next_bio = bio->bi_private;
+		page = bio->bi_io_vec[0].bv_page;
+		wait_on_page_locked(page);
+		if (!PageUptodate(page) || PageError(page))
+			ret = -EIO;
+		put_page(page);
+		bio_put(bio);
+		bio = next_bio;
+	}
+	*bio_chain = NULL;
+	return ret;
+}
+
+static int swap_write_page(struct swap_map_handle *handle, void *buf,
+				struct bio **bio_chain)
+{
+	int error = 0;
 	unsigned long offset;
 
 	if (!handle->cur)
 		return -EINVAL;
 	offset = alloc_swap_page(root_swap, handle->bitmap);
-	error = write_page(buf, offset);
+	error = write_page(buf, offset, bio_chain);
 	if (error)
 		return error;
 	handle->cur->entries[handle->k++] = offset;
 	if (handle->k >= MAP_PAGE_ENTRIES) {
+		error = wait_on_bio_chain(bio_chain);
+		if (error)
+			goto out;
 		offset = alloc_swap_page(root_swap, handle->bitmap);
 		if (!offset)
 			return -ENOSPC;
 		handle->cur->next_swap = offset;
-		error = write_page(handle->cur, handle->cur_swap);
+		error = write_page(handle->cur, handle->cur_swap, NULL);
 		if (error)
-			return error;
+			goto out;
 		memset(handle->cur, 0, PAGE_SIZE);
 		handle->cur_swap = offset;
 		handle->k = 0;
 	}
-	return 0;
+out:
+	return error;
 }
 
 static int flush_swap_writer(struct swap_map_handle *handle)
 {
 	if (handle->cur && handle->cur_swap)
-		return write_page(handle->cur, handle->cur_swap);
+		return write_page(handle->cur, handle->cur_swap, NULL);
 	else
 		return -EINVAL;
 }
@@ -206,21 +279,29 @@ static int flush_swap_writer(struct swap_map_handle *handle)
 
 static int save_image(struct swap_map_handle *handle,
                       struct snapshot_handle *snapshot,
-                      unsigned int nr_pages)
+                      unsigned int nr_to_write)
 {
 	unsigned int m;
 	int ret;
 	int error = 0;
+	int nr_pages;
+	int err2;
+	struct bio *bio;
+	struct timeval start;
+	struct timeval stop;
 
-	printk("Saving image data pages (%u pages) ...     ", nr_pages);
-	m = nr_pages / 100;
+	printk("Saving image data pages (%u pages) ...     ", nr_to_write);
+	m = nr_to_write / 100;
 	if (!m)
 		m = 1;
 	nr_pages = 0;
+	bio = NULL;
+	do_gettimeofday(&start);
 	do {
 		ret = snapshot_read_next(snapshot, PAGE_SIZE);
 		if (ret > 0) {
-			error = swap_write_page(handle, data_of(*snapshot));
+			error = swap_write_page(handle, data_of(*snapshot),
+						&bio);
 			if (error)
 				break;
 			if (!(nr_pages % m))
@@ -228,8 +309,13 @@ static int save_image(struct swap_map_handle *handle,
 			nr_pages++;
 		}
 	} while (ret > 0);
+	err2 = wait_on_bio_chain(&bio);
+	do_gettimeofday(&stop);
+	if (!error)
+		error = err2;
 	if (!error)
 		printk("\b\b\b\bdone\n");
+	show_speed(&start, &stop, nr_to_write, "Wrote");
 	return error;
 }
 
@@ -245,8 +331,7 @@ static int enough_swap(unsigned int nr_pages)
 	unsigned int free_swap = count_swap_pages(root_swap, 1);
 
 	pr_debug("swsusp: free swap pages: %u\n", free_swap);
-	return free_swap > (nr_pages + PAGES_FOR_IO +
-		(nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
+	return free_swap > nr_pages + PAGES_FOR_IO;
 }
 
 /**
@@ -266,7 +351,8 @@ int swsusp_write(void)
 	int error;
 
 	if ((error = swsusp_swap_check())) {
-		printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n");
+		printk(KERN_ERR "swsusp: Cannot find swap device, try "
+				"swapon -a.\n");
 		return error;
 	}
 	memset(&snapshot, 0, sizeof(struct snapshot_handle));
@@ -281,7 +367,7 @@ int swsusp_write(void)
 	error = get_swap_writer(&handle);
 	if (!error) {
 		unsigned long start = handle.cur_swap;
-		error = swap_write_page(&handle, header);
+		error = swap_write_page(&handle, header, NULL);
 		if (!error)
 			error = save_image(&handle, &snapshot,
 					header->pages - 1);
@@ -298,27 +384,6 @@ int swsusp_write(void)
 	return error;
 }
 
-/*
- *	Using bio to read from swap.
- *	This code requires a bit more work than just using buffer heads
- *	but, it is the recommended way for 2.5/2.6.
- *	The following are to signal the beginning and end of I/O. Bios
- *	finish asynchronously, while we want them to happen synchronously.
- *	A simple atomic_t, and a wait loop take care of this problem.
- */
-
-static atomic_t io_done = ATOMIC_INIT(0);
-
-static int end_io(struct bio *bio, unsigned int num, int err)
-{
-	if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
-		printk(KERN_ERR "I/O error reading swsusp image.\n");
-		return -EIO;
-	}
-	atomic_set(&io_done, 0);
-	return 0;
-}
-
 static struct block_device *resume_bdev;
 
 /**
@@ -326,15 +391,15 @@ static struct block_device *resume_bdev;
  *	@rw:	READ or WRITE.
  *	@off	physical offset of page.
  *	@page:	page we're reading or writing.
+ *	@bio_chain: list of pending biod (for async reading)
  *
  *	Straight from the textbook - allocate and initialize the bio.
- *	If we're writing, make sure the page is marked as dirty.
- *	Then submit it and wait.
+ *	If we're reading, make sure the page is marked as dirty.
+ *	Then submit it and, if @bio_chain == NULL, wait.
  */
-
-static int submit(int rw, pgoff_t page_off, void *page)
+static int submit(int rw, pgoff_t page_off, struct page *page,
+			struct bio **bio_chain)
 {
-	int error = 0;
 	struct bio *bio;
 
 	bio = bio_alloc(GFP_ATOMIC, 1);
@@ -342,33 +407,40 @@ static int submit(int rw, pgoff_t page_off, void *page)
 		return -ENOMEM;
 	bio->bi_sector = page_off * (PAGE_SIZE >> 9);
 	bio->bi_bdev = resume_bdev;
-	bio->bi_end_io = end_io;
+	bio->bi_end_io = end_swap_bio_read;
 
-	if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
-		printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
-		error = -EFAULT;
-		goto Done;
+	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+		printk("swsusp: ERROR: adding page to bio at %ld\n", page_off);
+		bio_put(bio);
+		return -EFAULT;
 	}
 
-	atomic_set(&io_done, 1);
-	submit_bio(rw | (1 << BIO_RW_SYNC), bio);
-	while (atomic_read(&io_done))
-		yield();
-	if (rw == READ)
-		bio_set_pages_dirty(bio);
- Done:
-	bio_put(bio);
-	return error;
+	lock_page(page);
+	bio_get(bio);
+
+	if (bio_chain == NULL) {
+		submit_bio(rw | (1 << BIO_RW_SYNC), bio);
+		wait_on_page_locked(page);
+		if (rw == READ)
+			bio_set_pages_dirty(bio);
+		bio_put(bio);
+	} else {
+		get_page(page);
+		bio->bi_private = *bio_chain;
+		*bio_chain = bio;
+		submit_bio(rw | (1 << BIO_RW_SYNC), bio);
+	}
+	return 0;
 }
 
-static int bio_read_page(pgoff_t page_off, void *page)
+static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
 {
-	return submit(READ, page_off, page);
+	return submit(READ, page_off, virt_to_page(addr), bio_chain);
 }
 
-static int bio_write_page(pgoff_t page_off, void *page)
+static int bio_write_page(pgoff_t page_off, void *addr)
 {
-	return submit(WRITE, page_off, page);
+	return submit(WRITE, page_off, virt_to_page(addr), NULL);
 }
 
 /**
@@ -393,7 +465,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
 	handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
 	if (!handle->cur)
 		return -ENOMEM;
-	error = bio_read_page(swp_offset(start), handle->cur);
+	error = bio_read_page(swp_offset(start), handle->cur, NULL);
 	if (error) {
 		release_swap_reader(handle);
 		return error;
@@ -402,7 +474,8 @@ static int get_swap_reader(struct swap_map_handle *handle,
 	return 0;
 }
 
-static int swap_read_page(struct swap_map_handle *handle, void *buf)
+static int swap_read_page(struct swap_map_handle *handle, void *buf,
+				struct bio **bio_chain)
 {
 	unsigned long offset;
 	int error;
@@ -412,16 +485,17 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf)
 	offset = handle->cur->entries[handle->k];
 	if (!offset)
 		return -EFAULT;
-	error = bio_read_page(offset, buf);
+	error = bio_read_page(offset, buf, bio_chain);
 	if (error)
 		return error;
 	if (++handle->k >= MAP_PAGE_ENTRIES) {
+		error = wait_on_bio_chain(bio_chain);
 		handle->k = 0;
 		offset = handle->cur->next_swap;
 		if (!offset)
 			release_swap_reader(handle);
-		else
-			error = bio_read_page(offset, handle->cur);
+		else if (!error)
+			error = bio_read_page(offset, handle->cur, NULL);
 	}
 	return error;
 }
@@ -434,33 +508,49 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf)
 
 static int load_image(struct swap_map_handle *handle,
                       struct snapshot_handle *snapshot,
-                      unsigned int nr_pages)
+                      unsigned int nr_to_read)
 {
 	unsigned int m;
-	int ret;
 	int error = 0;
+	struct timeval start;
+	struct timeval stop;
+	struct bio *bio;
+	int err2;
+	unsigned nr_pages;
 
-	printk("Loading image data pages (%u pages) ...     ", nr_pages);
-	m = nr_pages / 100;
+	printk("Loading image data pages (%u pages) ...     ", nr_to_read);
+	m = nr_to_read / 100;
 	if (!m)
 		m = 1;
 	nr_pages = 0;
-	do {
-		ret = snapshot_write_next(snapshot, PAGE_SIZE);
-		if (ret > 0) {
-			error = swap_read_page(handle, data_of(*snapshot));
-			if (error)
-				break;
-			if (!(nr_pages % m))
-				printk("\b\b\b\b%3d%%", nr_pages / m);
-			nr_pages++;
-		}
-	} while (ret > 0);
+	bio = NULL;
+	do_gettimeofday(&start);
+	for ( ; ; ) {
+		error = snapshot_write_next(snapshot, PAGE_SIZE);
+		if (error <= 0)
+			break;
+		error = swap_read_page(handle, data_of(*snapshot), &bio);
+		if (error)
+			break;
+		if (snapshot->sync_read)
+			error = wait_on_bio_chain(&bio);
+		if (error)
+			break;
+		if (!(nr_pages % m))
+			printk("\b\b\b\b%3d%%", nr_pages / m);
+		nr_pages++;
+	}
+	err2 = wait_on_bio_chain(&bio);
+	do_gettimeofday(&stop);
+	if (!error)
+		error = err2;
 	if (!error) {
 		printk("\b\b\b\bdone\n");
+		snapshot_free_unused_memory(snapshot);
 		if (!snapshot_image_loaded(snapshot))
 			error = -ENODATA;
 	}
+	show_speed(&start, &stop, nr_to_read, "Read");
 	return error;
 }
 
@@ -483,7 +573,7 @@ int swsusp_read(void)
 	header = (struct swsusp_info *)data_of(snapshot);
 	error = get_swap_reader(&handle, swsusp_header.image);
 	if (!error)
-		error = swap_read_page(&handle, header);
+		error = swap_read_page(&handle, header, NULL);
 	if (!error)
 		error = load_image(&handle, &snapshot, header->pages - 1);
 	release_swap_reader(&handle);
@@ -509,7 +599,7 @@ int swsusp_check(void)
 	if (!IS_ERR(resume_bdev)) {
 		set_blocksize(resume_bdev, PAGE_SIZE);
 		memset(&swsusp_header, 0, sizeof(swsusp_header));
-		if ((error = bio_read_page(0, &swsusp_header)))
+		if ((error = bio_read_page(0, &swsusp_header, NULL)))
 			return error;
 		if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
 			memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 17f669c83012..8ef677ea0cea 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -193,14 +193,13 @@ int swsusp_shrink_memory(void)
 	printk("Shrinking memory...  ");
 	do {
 		size = 2 * count_highmem_pages();
-		size += size / 50 + count_data_pages();
-		size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE +
-			PAGES_FOR_IO;
+		size += size / 50 + count_data_pages() + PAGES_FOR_IO;
 		tmp = size;
 		for_each_zone (zone)
 			if (!is_highmem(zone) && populated_zone(zone)) {
 				tmp -= zone->free_pages;
 				tmp += zone->lowmem_reserve[ZONE_NORMAL];
+				tmp += snapshot_additional_pages(zone);
 			}
 		if (tmp > 0) {
 			tmp = __shrink_memory(tmp);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 3f1539fbe48a..2e4499f3e4d9 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -19,6 +19,7 @@
 #include <linux/swapops.h>
 #include <linux/pm.h>
 #include <linux/fs.h>
+#include <linux/cpu.h>
 
 #include <asm/uaccess.h>
 
@@ -139,12 +140,15 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 		if (data->frozen)
 			break;
 		down(&pm_sem);
-		disable_nonboot_cpus();
-		if (freeze_processes()) {
-			thaw_processes();
-			enable_nonboot_cpus();
-			error = -EBUSY;
+		error = disable_nonboot_cpus();
+		if (!error) {
+			error = freeze_processes();
+			if (error) {
+				thaw_processes();
+				error = -EBUSY;
+			}
 		}
+		enable_nonboot_cpus();
 		up(&pm_sem);
 		if (!error)
 			data->frozen = 1;
@@ -189,6 +193,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
 			error = -EPERM;
 			break;
 		}
+		snapshot_free_unused_memory(&data->handle);
 		down(&pm_sem);
 		pm_prepare_console();
 		error = device_suspend(PMSG_FREEZE);
diff --git a/kernel/printk.c b/kernel/printk.c
index 1149365e989e..771f5e861bcd 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -721,6 +721,7 @@ int __init add_preferred_console(char *name, int idx, char *options)
 	return 0;
 }
 
+#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND
 /**
  * suspend_console - suspend the console subsystem
  *
@@ -728,6 +729,7 @@ int __init add_preferred_console(char *name, int idx, char *options)
  */
 void suspend_console(void)
 {
+	printk("Suspending console(s)\n");
 	acquire_console_sem();
 	console_suspended = 1;
 }
@@ -737,6 +739,7 @@ void resume_console(void)
 	console_suspended = 0;
 	release_console_sem();
 }
+#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */
 
 /**
  * acquire_console_sem - lock the console system for exclusive use.
diff --git a/kernel/profile.c b/kernel/profile.c
index d5bd75e7501c..fb660c7d35ba 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -309,13 +309,17 @@ static int __devinit profile_cpu_callback(struct notifier_block *info,
 		node = cpu_to_node(cpu);
 		per_cpu(cpu_profile_flip, cpu) = 0;
 		if (!per_cpu(cpu_profile_hits, cpu)[1]) {
-			page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+			page = alloc_pages_node(node,
+					GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+					0);
 			if (!page)
 				return NOTIFY_BAD;
 			per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
 		}
 		if (!per_cpu(cpu_profile_hits, cpu)[0]) {
-			page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+			page = alloc_pages_node(node,
+					GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+					0);
 			if (!page)
 				goto out_free;
 			per_cpu(cpu_profile_hits, cpu)[0] = page_address(page);
@@ -491,12 +495,16 @@ static int __init create_hash_tables(void)
 		int node = cpu_to_node(cpu);
 		struct page *page;
 
-		page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+		page = alloc_pages_node(node,
+				GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+				0);
 		if (!page)
 			goto out_cleanup;
 		per_cpu(cpu_profile_hits, cpu)[1]
 				= (struct profile_hit *)page_address(page);
-		page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+		page = alloc_pages_node(node,
+				GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+				0);
 		if (!page)
 			goto out_cleanup;
 		per_cpu(cpu_profile_hits, cpu)[0]
diff --git a/kernel/sched.c b/kernel/sched.c
index a234fbee1238..5c848fd4e461 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -238,6 +238,7 @@ struct rq {
 	/* For active balancing */
 	int active_balance;
 	int push_cpu;
+	int cpu;		/* cpu of this runqueue */
 
 	struct task_struct *migration_thread;
 	struct list_head migration_queue;
@@ -267,6 +268,15 @@ struct rq {
 
 static DEFINE_PER_CPU(struct rq, runqueues);
 
+static inline int cpu_of(struct rq *rq)
+{
+#ifdef CONFIG_SMP
+	return rq->cpu;
+#else
+	return 0;
+#endif
+}
+
 /*
  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
  * See detach_destroy_domains: synchronize_sched for details.
@@ -2211,7 +2221,8 @@ out:
  */
 static struct sched_group *
 find_busiest_group(struct sched_domain *sd, int this_cpu,
-		   unsigned long *imbalance, enum idle_type idle, int *sd_idle)
+		   unsigned long *imbalance, enum idle_type idle, int *sd_idle,
+		   cpumask_t *cpus)
 {
 	struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
 	unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -2248,7 +2259,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		sum_weighted_load = sum_nr_running = avg_load = 0;
 
 		for_each_cpu_mask(i, group->cpumask) {
-			struct rq *rq = cpu_rq(i);
+			struct rq *rq;
+
+			if (!cpu_isset(i, *cpus))
+				continue;
+
+			rq = cpu_rq(i);
 
 			if (*sd_idle && !idle_cpu(i))
 				*sd_idle = 0;
@@ -2466,13 +2482,17 @@ ret:
  */
 static struct rq *
 find_busiest_queue(struct sched_group *group, enum idle_type idle,
-		   unsigned long imbalance)
+		   unsigned long imbalance, cpumask_t *cpus)
 {
 	struct rq *busiest = NULL, *rq;
 	unsigned long max_load = 0;
 	int i;
 
 	for_each_cpu_mask(i, group->cpumask) {
+
+		if (!cpu_isset(i, *cpus))
+			continue;
+
 		rq = cpu_rq(i);
 
 		if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
@@ -2511,6 +2531,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 	struct sched_group *group;
 	unsigned long imbalance;
 	struct rq *busiest;
+	cpumask_t cpus = CPU_MASK_ALL;
 
 	if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
 	    !sched_smt_power_savings)
@@ -2518,13 +2539,15 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 
 	schedstat_inc(sd, lb_cnt[idle]);
 
-	group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle);
+redo:
+	group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
+							&cpus);
 	if (!group) {
 		schedstat_inc(sd, lb_nobusyg[idle]);
 		goto out_balanced;
 	}
 
-	busiest = find_busiest_queue(group, idle, imbalance);
+	busiest = find_busiest_queue(group, idle, imbalance, &cpus);
 	if (!busiest) {
 		schedstat_inc(sd, lb_nobusyq[idle]);
 		goto out_balanced;
@@ -2549,8 +2572,12 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 		double_rq_unlock(this_rq, busiest);
 
 		/* All tasks on this runqueue were pinned by CPU affinity */
-		if (unlikely(all_pinned))
+		if (unlikely(all_pinned)) {
+			cpu_clear(cpu_of(busiest), cpus);
+			if (!cpus_empty(cpus))
+				goto redo;
 			goto out_balanced;
+		}
 	}
 
 	if (!nr_moved) {
@@ -2639,18 +2666,22 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
 	unsigned long imbalance;
 	int nr_moved = 0;
 	int sd_idle = 0;
+	cpumask_t cpus = CPU_MASK_ALL;
 
 	if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
 		sd_idle = 1;
 
 	schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
-	group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle);
+redo:
+	group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE,
+				&sd_idle, &cpus);
 	if (!group) {
 		schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
 		goto out_balanced;
 	}
 
-	busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance);
+	busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance,
+				&cpus);
 	if (!busiest) {
 		schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
 		goto out_balanced;
@@ -2668,6 +2699,12 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
 					minus_1_or_zero(busiest->nr_running),
 					imbalance, sd, NEWLY_IDLE, NULL);
 		spin_unlock(&busiest->lock);
+
+		if (!nr_moved) {
+			cpu_clear(cpu_of(busiest), cpus);
+			if (!cpus_empty(cpus))
+				goto redo;
+		}
 	}
 
 	if (!nr_moved) {
@@ -6747,6 +6784,7 @@ void __init sched_init(void)
 			rq->cpu_load[j] = 0;
 		rq->active_balance = 0;
 		rq->push_cpu = 0;
+		rq->cpu = i;
 		rq->migration_thread = NULL;
 		INIT_LIST_HEAD(&rq->migration_queue);
 #endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 362a0cc37138..fd43c3e6786b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -943,6 +943,17 @@ static ctl_table vm_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one_hundred,
 	},
+	{
+		.ctl_name	= VM_MIN_SLAB,
+		.procname	= "min_slab_ratio",
+		.data		= &sysctl_min_slab_ratio,
+		.maxlen		= sizeof(sysctl_min_slab_ratio),
+		.mode		= 0644,
+		.proc_handler	= &sysctl_min_slab_ratio_sysctl_handler,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &one_hundred,
+	},
 #endif
 #ifdef CONFIG_X86_32
 	{
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 554ee688a9f8..3f21cc79a134 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -277,7 +277,7 @@ config DEBUG_HIGHMEM
 config DEBUG_BUGVERBOSE
 	bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED
 	depends on BUG
-	depends on ARM || ARM26 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV
+	depends on ARM || ARM26 || AVR32 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV
 	default !EMBEDDED
 	help
 	  Say Y here to make BUG() panics output the file name and line number
@@ -315,7 +315,7 @@ config DEBUG_VM
 
 config FRAME_POINTER
 	bool "Compile the kernel with frame pointers"
-	depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390)
+	depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390 || AVR32)
 	default y if DEBUG_INFO && UML
 	help
 	  If you say Y here the resulting kernel image will be slightly larger
diff --git a/mm/Makefile b/mm/Makefile
index 9dd824c11eeb..60c56c0b5e10 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -23,4 +23,4 @@ obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
-
+obj-$(CONFIG_SMP) += allocpercpu.o
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
new file mode 100644
index 000000000000..eaa9abeea536
--- /dev/null
+++ b/mm/allocpercpu.c
@@ -0,0 +1,129 @@
+/*
+ * linux/mm/allocpercpu.c
+ *
+ * Separated from slab.c August 11, 2006 Christoph Lameter <clameter@sgi.com>
+ */
+#include <linux/mm.h>
+#include <linux/module.h>
+
+/**
+ * percpu_depopulate - depopulate per-cpu data for given cpu
+ * @__pdata: per-cpu data to depopulate
+ * @cpu: depopulate per-cpu data for this cpu
+ *
+ * Depopulating per-cpu data for a cpu going offline would be a typical
+ * use case. You need to register a cpu hotplug handler for that purpose.
+ */
+void percpu_depopulate(void *__pdata, int cpu)
+{
+	struct percpu_data *pdata = __percpu_disguise(__pdata);
+	if (pdata->ptrs[cpu]) {
+		kfree(pdata->ptrs[cpu]);
+		pdata->ptrs[cpu] = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(percpu_depopulate);
+
+/**
+ * percpu_depopulate_mask - depopulate per-cpu data for some cpu's
+ * @__pdata: per-cpu data to depopulate
+ * @mask: depopulate per-cpu data for cpu's selected through mask bits
+ */
+void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
+{
+	int cpu;
+	for_each_cpu_mask(cpu, *mask)
+		percpu_depopulate(__pdata, cpu);
+}
+EXPORT_SYMBOL_GPL(__percpu_depopulate_mask);
+
+/**
+ * percpu_populate - populate per-cpu data for given cpu
+ * @__pdata: per-cpu data to populate further
+ * @size: size of per-cpu object
+ * @gfp: may sleep or not etc.
+ * @cpu: populate per-data for this cpu
+ *
+ * Populating per-cpu data for a cpu coming online would be a typical
+ * use case. You need to register a cpu hotplug handler for that purpose.
+ * Per-cpu object is populated with zeroed buffer.
+ */
+void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
+{
+	struct percpu_data *pdata = __percpu_disguise(__pdata);
+	int node = cpu_to_node(cpu);
+
+	BUG_ON(pdata->ptrs[cpu]);
+	if (node_online(node)) {
+		/* FIXME: kzalloc_node(size, gfp, node) */
+		pdata->ptrs[cpu] = kmalloc_node(size, gfp, node);
+		if (pdata->ptrs[cpu])
+			memset(pdata->ptrs[cpu], 0, size);
+	} else
+		pdata->ptrs[cpu] = kzalloc(size, gfp);
+	return pdata->ptrs[cpu];
+}
+EXPORT_SYMBOL_GPL(percpu_populate);
+
+/**
+ * percpu_populate_mask - populate per-cpu data for more cpu's
+ * @__pdata: per-cpu data to populate further
+ * @size: size of per-cpu object
+ * @gfp: may sleep or not etc.
+ * @mask: populate per-cpu data for cpu's selected through mask bits
+ *
+ * Per-cpu objects are populated with zeroed buffers.
+ */
+int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+			   cpumask_t *mask)
+{
+	cpumask_t populated = CPU_MASK_NONE;
+	int cpu;
+
+	for_each_cpu_mask(cpu, *mask)
+		if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) {
+			__percpu_depopulate_mask(__pdata, &populated);
+			return -ENOMEM;
+		} else
+			cpu_set(cpu, populated);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__percpu_populate_mask);
+
+/**
+ * percpu_alloc_mask - initial setup of per-cpu data
+ * @size: size of per-cpu object
+ * @gfp: may sleep or not etc.
+ * @mask: populate per-data for cpu's selected through mask bits
+ *
+ * Populating per-cpu data for all online cpu's would be a typical use case,
+ * which is simplified by the percpu_alloc() wrapper.
+ * Per-cpu objects are populated with zeroed buffers.
+ */
+void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
+{
+	void *pdata = kzalloc(sizeof(struct percpu_data), gfp);
+	void *__pdata = __percpu_disguise(pdata);
+
+	if (unlikely(!pdata))
+		return NULL;
+	if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask)))
+		return __pdata;
+	kfree(pdata);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(__percpu_alloc_mask);
+
+/**
+ * percpu_free - final cleanup of per-cpu data
+ * @__pdata: object to clean up
+ *
+ * We simply clean up any per-cpu object left. No need for the client to
+ * track and specify through a bis mask which per-cpu objects are to free.
+ */
+void percpu_free(void *__pdata)
+{
+	__percpu_depopulate_mask(__pdata, &cpu_possible_map);
+	kfree(__percpu_disguise(__pdata));
+}
+EXPORT_SYMBOL_GPL(percpu_free);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 50353e0dac12..d53112fcb404 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -8,17 +8,15 @@
  *  free memory collector. It's used to deal with reserved
  *  system memory and memory holes as well.
  */
-
-#include <linux/mm.h>
-#include <linux/kernel_stat.h>
-#include <linux/swap.h>
-#include <linux/interrupt.h>
 #include <linux/init.h>
+#include <linux/pfn.h>
 #include <linux/bootmem.h>
-#include <linux/mmzone.h>
 #include <linux/module.h>
-#include <asm/dma.h>
+
+#include <asm/bug.h>
 #include <asm/io.h>
+#include <asm/processor.h>
+
 #include "internal.h"
 
 /*
@@ -41,7 +39,7 @@ unsigned long saved_max_pfn;
 #endif
 
 /* return the number of _pages_ that will be allocated for the boot bitmap */
-unsigned long __init bootmem_bootmap_pages (unsigned long pages)
+unsigned long __init bootmem_bootmap_pages(unsigned long pages)
 {
 	unsigned long mapsize;
 
@@ -51,12 +49,14 @@ unsigned long __init bootmem_bootmap_pages (unsigned long pages)
 
 	return mapsize;
 }
+
 /*
  * link bdata in order
  */
-static void link_bootmem(bootmem_data_t *bdata)
+static void __init link_bootmem(bootmem_data_t *bdata)
 {
 	bootmem_data_t *ent;
+
 	if (list_empty(&bdata_list)) {
 		list_add(&bdata->list, &bdata_list);
 		return;
@@ -69,22 +69,32 @@ static void link_bootmem(bootmem_data_t *bdata)
 		}
 	}
 	list_add_tail(&bdata->list, &bdata_list);
-	return;
 }
 
+/*
+ * Given an initialised bdata, it returns the size of the boot bitmap
+ */
+static unsigned long __init get_mapsize(bootmem_data_t *bdata)
+{
+	unsigned long mapsize;
+	unsigned long start = PFN_DOWN(bdata->node_boot_start);
+	unsigned long end = bdata->node_low_pfn;
+
+	mapsize = ((end - start) + 7) / 8;
+	return ALIGN(mapsize, sizeof(long));
+}
 
 /*
  * Called once to set up the allocator itself.
  */
-static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
+static unsigned long __init init_bootmem_core(pg_data_t *pgdat,
 	unsigned long mapstart, unsigned long start, unsigned long end)
 {
 	bootmem_data_t *bdata = pgdat->bdata;
-	unsigned long mapsize = ((end - start)+7)/8;
+	unsigned long mapsize;
 
-	mapsize = ALIGN(mapsize, sizeof(long));
-	bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
-	bdata->node_boot_start = (start << PAGE_SHIFT);
+	bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
+	bdata->node_boot_start = PFN_PHYS(start);
 	bdata->node_low_pfn = end;
 	link_bootmem(bdata);
 
@@ -92,6 +102,7 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
 	 * Initially all pages are reserved - setup_arch() has to
 	 * register free RAM areas explicitly.
 	 */
+	mapsize = get_mapsize(bdata);
 	memset(bdata->node_bootmem_map, 0xff, mapsize);
 
 	return mapsize;
@@ -102,22 +113,22 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
  * might be used for boot-time allocations - or it might get added
  * to the free page pool later on.
  */
-static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size)
+static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
+					unsigned long size)
 {
+	unsigned long sidx, eidx;
 	unsigned long i;
+
 	/*
 	 * round up, partially reserved pages are considered
 	 * fully reserved.
 	 */
-	unsigned long sidx = (addr - bdata->node_boot_start)/PAGE_SIZE;
-	unsigned long eidx = (addr + size - bdata->node_boot_start + 
-							PAGE_SIZE-1)/PAGE_SIZE;
-	unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE;
-
 	BUG_ON(!size);
-	BUG_ON(sidx >= eidx);
-	BUG_ON((addr >> PAGE_SHIFT) >= bdata->node_low_pfn);
-	BUG_ON(end > bdata->node_low_pfn);
+	BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
+	BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);
+
+	sidx = PFN_DOWN(addr - bdata->node_boot_start);
+	eidx = PFN_UP(addr + size - bdata->node_boot_start);
 
 	for (i = sidx; i < eidx; i++)
 		if (test_and_set_bit(i, bdata->node_bootmem_map)) {
@@ -127,20 +138,18 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long add
 		}
 }
 
-static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size)
+static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
+				     unsigned long size)
 {
+	unsigned long sidx, eidx;
 	unsigned long i;
-	unsigned long start;
+
 	/*
 	 * round down end of usable mem, partially free pages are
 	 * considered reserved.
 	 */
-	unsigned long sidx;
-	unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE;
-	unsigned long end = (addr + size)/PAGE_SIZE;
-
 	BUG_ON(!size);
-	BUG_ON(end > bdata->node_low_pfn);
+	BUG_ON(PFN_DOWN(addr + size) > bdata->node_low_pfn);
 
 	if (addr < bdata->last_success)
 		bdata->last_success = addr;
@@ -148,8 +157,8 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
 	/*
 	 * Round up the beginning of the address.
 	 */
-	start = (addr + PAGE_SIZE-1) / PAGE_SIZE;
-	sidx = start - (bdata->node_boot_start/PAGE_SIZE);
+	sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start);
+	eidx = PFN_DOWN(addr + size - bdata->node_boot_start);
 
 	for (i = sidx; i < eidx; i++) {
 		if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
@@ -175,10 +184,10 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
 	      unsigned long align, unsigned long goal, unsigned long limit)
 {
 	unsigned long offset, remaining_size, areasize, preferred;
-	unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn;
+	unsigned long i, start = 0, incr, eidx, end_pfn;
 	void *ret;
 
-	if(!size) {
+	if (!size) {
 		printk("__alloc_bootmem_core(): zero-sized request\n");
 		BUG();
 	}
@@ -187,23 +196,22 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
 	if (limit && bdata->node_boot_start >= limit)
 		return NULL;
 
-        limit >>=PAGE_SHIFT;
+	end_pfn = bdata->node_low_pfn;
+	limit = PFN_DOWN(limit);
 	if (limit && end_pfn > limit)
 		end_pfn = limit;
 
-	eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+	eidx = end_pfn - PFN_DOWN(bdata->node_boot_start);
 	offset = 0;
-	if (align &&
-	    (bdata->node_boot_start & (align - 1UL)) != 0)
-		offset = (align - (bdata->node_boot_start & (align - 1UL)));
-	offset >>= PAGE_SHIFT;
+	if (align && (bdata->node_boot_start & (align - 1UL)) != 0)
+		offset = align - (bdata->node_boot_start & (align - 1UL));
+	offset = PFN_DOWN(offset);
 
 	/*
 	 * We try to allocate bootmem pages above 'goal'
 	 * first, then we try to allocate lower pages.
 	 */
-	if (goal && (goal >= bdata->node_boot_start) && 
-	    ((goal >> PAGE_SHIFT) < end_pfn)) {
+	if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) {
 		preferred = goal - bdata->node_boot_start;
 
 		if (bdata->last_success >= preferred)
@@ -212,9 +220,8 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
 	} else
 		preferred = 0;
 
-	preferred = ALIGN(preferred, align) >> PAGE_SHIFT;
-	preferred += offset;
-	areasize = (size+PAGE_SIZE-1)/PAGE_SIZE;
+	preferred = PFN_DOWN(ALIGN(preferred, align)) + offset;
+	areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
 	incr = align >> PAGE_SHIFT ? : 1;
 
 restart_scan:
@@ -229,7 +236,7 @@ restart_scan:
 		for (j = i + 1; j < i + areasize; ++j) {
 			if (j >= eidx)
 				goto fail_block;
-			if (test_bit (j, bdata->node_bootmem_map))
+			if (test_bit(j, bdata->node_bootmem_map))
 				goto fail_block;
 		}
 		start = i;
@@ -245,7 +252,7 @@ restart_scan:
 	return NULL;
 
 found:
-	bdata->last_success = start << PAGE_SHIFT;
+	bdata->last_success = PFN_PHYS(start);
 	BUG_ON(start >= eidx);
 
 	/*
@@ -257,19 +264,21 @@ found:
 	    bdata->last_offset && bdata->last_pos+1 == start) {
 		offset = ALIGN(bdata->last_offset, align);
 		BUG_ON(offset > PAGE_SIZE);
-		remaining_size = PAGE_SIZE-offset;
+		remaining_size = PAGE_SIZE - offset;
 		if (size < remaining_size) {
 			areasize = 0;
 			/* last_pos unchanged */
-			bdata->last_offset = offset+size;
-			ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
-						bdata->node_boot_start);
+			bdata->last_offset = offset + size;
+			ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
+					   offset +
+					   bdata->node_boot_start);
 		} else {
 			remaining_size = size - remaining_size;
-			areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE;
-			ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
-						bdata->node_boot_start);
-			bdata->last_pos = start+areasize-1;
+			areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
+			ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
+					   offset +
+					   bdata->node_boot_start);
+			bdata->last_pos = start + areasize - 1;
 			bdata->last_offset = remaining_size;
 		}
 		bdata->last_offset &= ~PAGE_MASK;
@@ -282,7 +291,7 @@ found:
 	/*
 	 * Reserve the area now:
 	 */
-	for (i = start; i < start+areasize; i++)
+	for (i = start; i < start + areasize; i++)
 		if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
 			BUG();
 	memset(ret, 0, size);
@@ -303,8 +312,8 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
 
 	count = 0;
 	/* first extant page of the node */
-	pfn = bdata->node_boot_start >> PAGE_SHIFT;
-	idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+	pfn = PFN_DOWN(bdata->node_boot_start);
+	idx = bdata->node_low_pfn - pfn;
 	map = bdata->node_bootmem_map;
 	/* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */
 	if (bdata->node_boot_start == 0 ||
@@ -333,7 +342,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
 				}
 			}
 		} else {
-			i+=BITS_PER_LONG;
+			i += BITS_PER_LONG;
 		}
 		pfn += BITS_PER_LONG;
 	}
@@ -345,9 +354,10 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
 	 */
 	page = virt_to_page(bdata->node_bootmem_map);
 	count = 0;
-	for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) {
-		count++;
+	idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT;
+	for (i = 0; i < idx; i++, page++) {
 		__free_pages_bootmem(page, 0);
+		count++;
 	}
 	total += count;
 	bdata->node_bootmem_map = NULL;
@@ -355,64 +365,72 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
 	return total;
 }
 
-unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn)
+unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
+				unsigned long startpfn, unsigned long endpfn)
 {
-	return(init_bootmem_core(pgdat, freepfn, startpfn, endpfn));
+	return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
 }
 
-void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size)
+void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+				 unsigned long size)
 {
 	reserve_bootmem_core(pgdat->bdata, physaddr, size);
 }
 
-void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size)
+void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+			      unsigned long size)
 {
 	free_bootmem_core(pgdat->bdata, physaddr, size);
 }
 
-unsigned long __init free_all_bootmem_node (pg_data_t *pgdat)
+unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
 {
-	return(free_all_bootmem_core(pgdat));
+	return free_all_bootmem_core(pgdat);
 }
 
-unsigned long __init init_bootmem (unsigned long start, unsigned long pages)
+unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
 {
 	max_low_pfn = pages;
 	min_low_pfn = start;
-	return(init_bootmem_core(NODE_DATA(0), start, 0, pages));
+	return init_bootmem_core(NODE_DATA(0), start, 0, pages);
 }
 
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-void __init reserve_bootmem (unsigned long addr, unsigned long size)
+void __init reserve_bootmem(unsigned long addr, unsigned long size)
 {
 	reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size);
 }
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
-void __init free_bootmem (unsigned long addr, unsigned long size)
+void __init free_bootmem(unsigned long addr, unsigned long size)
 {
 	free_bootmem_core(NODE_DATA(0)->bdata, addr, size);
 }
 
-unsigned long __init free_all_bootmem (void)
+unsigned long __init free_all_bootmem(void)
 {
-	return(free_all_bootmem_core(NODE_DATA(0)));
+	return free_all_bootmem_core(NODE_DATA(0));
 }
 
-void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
+				      unsigned long goal)
 {
 	bootmem_data_t *bdata;
 	void *ptr;
 
-	list_for_each_entry(bdata, &bdata_list, list)
-		if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0)))
-			return(ptr);
+	list_for_each_entry(bdata, &bdata_list, list) {
+		ptr = __alloc_bootmem_core(bdata, size, align, goal, 0);
+		if (ptr)
+			return ptr;
+	}
 	return NULL;
 }
 
-void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem(unsigned long size, unsigned long align,
+			      unsigned long goal)
 {
 	void *mem = __alloc_bootmem_nopanic(size,align,goal);
+
 	if (mem)
 		return mem;
 	/*
@@ -424,29 +442,34 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned
 }
 
 
-void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align,
-				   unsigned long goal)
+void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
+				   unsigned long align, unsigned long goal)
 {
 	void *ptr;
 
 	ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
 	if (ptr)
-		return (ptr);
+		return ptr;
 
 	return __alloc_bootmem(size, align, goal);
 }
 
-#define LOW32LIMIT 0xffffffff
+#ifndef ARCH_LOW_ADDRESS_LIMIT
+#define ARCH_LOW_ADDRESS_LIMIT	0xffffffffUL
+#endif
 
-void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
+				  unsigned long goal)
 {
 	bootmem_data_t *bdata;
 	void *ptr;
 
-	list_for_each_entry(bdata, &bdata_list, list)
-		if ((ptr = __alloc_bootmem_core(bdata, size,
-						 align, goal, LOW32LIMIT)))
-			return(ptr);
+	list_for_each_entry(bdata, &bdata_list, list) {
+		ptr = __alloc_bootmem_core(bdata, size, align, goal,
+						ARCH_LOW_ADDRESS_LIMIT);
+		if (ptr)
+			return ptr;
+	}
 
 	/*
 	 * Whoops, we cannot satisfy the allocation request.
@@ -459,5 +482,6 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsig
 void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
 				       unsigned long align, unsigned long goal)
 {
-	return __alloc_bootmem_core(pgdat->bdata, size, align, goal, LOW32LIMIT);
+	return __alloc_bootmem_core(pgdat->bdata, size, align, goal,
+				    ARCH_LOW_ADDRESS_LIMIT);
 }
diff --git a/mm/filemap.c b/mm/filemap.c
index b9a60c43b61a..afcdc72b5e90 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -488,6 +488,12 @@ struct page *page_cache_alloc_cold(struct address_space *x)
 EXPORT_SYMBOL(page_cache_alloc_cold);
 #endif
 
+static int __sleep_on_page_lock(void *word)
+{
+	io_schedule();
+	return 0;
+}
+
 /*
  * In order to wait for pages to become available there must be
  * waitqueues associated with pages. By using a hash table of
@@ -577,13 +583,24 @@ void fastcall __lock_page(struct page *page)
 }
 EXPORT_SYMBOL(__lock_page);
 
+/*
+ * Variant of lock_page that does not require the caller to hold a reference
+ * on the page's mapping.
+ */
+void fastcall __lock_page_nosync(struct page *page)
+{
+	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+	__wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
+							TASK_UNINTERRUPTIBLE);
+}
+
 /**
  * find_get_page - find and get a page reference
  * @mapping: the address_space to search
  * @offset: the page index
  *
- * A rather lightweight function, finding and getting a reference to a
- * hashed page atomically.
+ * Is there a pagecache struct page at the given (mapping, offset) tuple?
+ * If yes, increment its refcount and return it; if no, return NULL.
  */
 struct page * find_get_page(struct address_space *mapping, unsigned long offset)
 {
@@ -970,7 +987,7 @@ page_not_up_to_date:
 		/* Get exclusive access to the page ... */
 		lock_page(page);
 
-		/* Did it get unhashed before we got the lock? */
+		/* Did it get truncated before we got the lock? */
 		if (!page->mapping) {
 			unlock_page(page);
 			page_cache_release(page);
@@ -1610,7 +1627,7 @@ no_cached_page:
 page_not_uptodate:
 	lock_page(page);
 
-	/* Did it get unhashed while we waited for it? */
+	/* Did it get truncated while we waited for it? */
 	if (!page->mapping) {
 		unlock_page(page);
 		goto err;
diff --git a/mm/fremap.c b/mm/fremap.c
index 21b7d0cbc98c..aa30618ec6b2 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -79,9 +79,9 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		inc_mm_counter(mm, file_rss);
 
 	flush_icache_page(vma, page);
-	set_pte_at(mm, addr, pte, mk_pte(page, prot));
+	pte_val = mk_pte(page, prot);
+	set_pte_at(mm, addr, pte, pte_val);
 	page_add_file_rmap(page);
-	pte_val = *pte;
 	update_mmu_cache(vma, addr, pte_val);
 	lazy_mmu_prot_update(pte_val);
 	err = 0;
diff --git a/mm/highmem.c b/mm/highmem.c
index 9b2a5403c447..ee5519b176ee 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -46,6 +46,19 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
  */
 #ifdef CONFIG_HIGHMEM
 
+unsigned long totalhigh_pages __read_mostly;
+
+unsigned int nr_free_highpages (void)
+{
+	pg_data_t *pgdat;
+	unsigned int pages = 0;
+
+	for_each_online_pgdat(pgdat)
+		pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+
+	return pages;
+}
+
 static int pkmap_count[LAST_PKMAP];
 static unsigned int last_pkmap_nr;
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index df499973255f..7c7d03dbf73d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -72,7 +72,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
 	struct zone **z;
 
 	for (z = zonelist->zones; *z; z++) {
-		nid = (*z)->zone_pgdat->node_id;
+		nid = zone_to_nid(*z);
 		if (cpuset_zone_allowed(*z, GFP_HIGHUSER) &&
 		    !list_empty(&hugepage_freelists[nid]))
 			break;
@@ -177,7 +177,7 @@ static void update_and_free_page(struct page *page)
 {
 	int i;
 	nr_huge_pages--;
-	nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--;
+	nr_huge_pages_node[page_to_nid(page)]--;
 	for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
 		page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
 				1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
@@ -191,7 +191,8 @@ static void update_and_free_page(struct page *page)
 #ifdef CONFIG_HIGHMEM
 static void try_to_free_low(unsigned long count)
 {
-	int i, nid;
+	int i;
+
 	for (i = 0; i < MAX_NUMNODES; ++i) {
 		struct page *page, *next;
 		list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
@@ -199,9 +200,8 @@ static void try_to_free_low(unsigned long count)
 				continue;
 			list_del(&page->lru);
 			update_and_free_page(page);
-			nid = page_zone(page)->zone_pgdat->node_id;
 			free_huge_pages--;
-			free_huge_pages_node[nid]--;
+			free_huge_pages_node[page_to_nid(page)]--;
 			if (count >= nr_huge_pages)
 				return;
 		}
diff --git a/mm/internal.h b/mm/internal.h
index d20e3cc4aef0..d527b80b292f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -24,8 +24,8 @@ static inline void set_page_count(struct page *page, int v)
  */
 static inline void set_page_refcounted(struct page *page)
 {
-	BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page);
-	BUG_ON(atomic_read(&page->_count));
+	VM_BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page);
+	VM_BUG_ON(atomic_read(&page->_count));
 	set_page_count(page, 1);
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index 109e9866237e..92a3ebd8d795 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -49,6 +49,7 @@
 #include <linux/module.h>
 #include <linux/delayacct.h>
 #include <linux/init.h>
+#include <linux/writeback.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -1226,7 +1227,12 @@ out:
 	return retval;
 }
 
-/*
+/**
+ * vm_insert_page - insert single page into user vma
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @page: source kernel page
+ *
  * This allows drivers to insert individual pages they've allocated
  * into a user vma.
  *
@@ -1318,7 +1324,16 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
 	return 0;
 }
 
-/*  Note: this is only safe if the mm semaphore is held when called. */
+/**
+ * remap_pfn_range - remap kernel memory to userspace
+ * @vma: user vma to map to
+ * @addr: target user address to start at
+ * @pfn: physical address of kernel memory
+ * @size: size of map area
+ * @prot: page protection flags for this mapping
+ *
+ *  Note: this is only safe if the mm semaphore is held when called.
+ */
 int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 		    unsigned long pfn, unsigned long size, pgprot_t prot)
 {
@@ -1458,14 +1473,29 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	struct page *old_page, *new_page;
 	pte_t entry;
-	int reuse, ret = VM_FAULT_MINOR;
+	int reuse = 0, ret = VM_FAULT_MINOR;
+	struct page *dirty_page = NULL;
 
 	old_page = vm_normal_page(vma, address, orig_pte);
 	if (!old_page)
 		goto gotten;
 
-	if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) ==
-				(VM_SHARED|VM_WRITE))) {
+	/*
+	 * Take out anonymous pages first, anonymous shared vmas are
+	 * not dirty accountable.
+	 */
+	if (PageAnon(old_page)) {
+		if (!TestSetPageLocked(old_page)) {
+			reuse = can_share_swap_page(old_page);
+			unlock_page(old_page);
+		}
+	} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
+					(VM_WRITE|VM_SHARED))) {
+		/*
+		 * Only catch write-faults on shared writable pages,
+		 * read-only shared pages can get COWed by
+		 * get_user_pages(.write=1, .force=1).
+		 */
 		if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
 			/*
 			 * Notify the address space that the page is about to
@@ -1494,13 +1524,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			if (!pte_same(*page_table, orig_pte))
 				goto unlock;
 		}
-
+		dirty_page = old_page;
+		get_page(dirty_page);
 		reuse = 1;
-	} else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
-		reuse = can_share_swap_page(old_page);
-		unlock_page(old_page);
-	} else {
-		reuse = 0;
 	}
 
 	if (reuse) {
@@ -1566,6 +1592,10 @@ gotten:
 		page_cache_release(old_page);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
+	if (dirty_page) {
+		set_page_dirty_balance(dirty_page);
+		put_page(dirty_page);
+	}
 	return ret;
 oom:
 	if (old_page)
@@ -1785,9 +1815,10 @@ void unmap_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
-/*
- * Handle all mappings that got truncated by a "truncate()"
- * system call.
+/**
+ * vmtruncate - unmap mappings "freed" by truncate() syscall
+ * @inode: inode of the file used
+ * @offset: file offset to start truncating
  *
  * NOTE! We have to be ready to update the memory sharing
  * between the file and the memory map for a potential last
@@ -1856,11 +1887,16 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
 }
 EXPORT_UNUSED_SYMBOL(vmtruncate_range);  /*  June 2006  */
 
-/* 
+/**
+ * swapin_readahead - swap in pages in hope we need them soon
+ * @entry: swap entry of this memory
+ * @addr: address to start
+ * @vma: user vma this addresses belong to
+ *
  * Primitive swap readahead code. We simply read an aligned block of
  * (1 << page_cluster) entries in the swap area. This method is chosen
  * because it doesn't cost us any seek time.  We also make sure to queue
- * the 'original' request together with the readahead ones...  
+ * the 'original' request together with the readahead ones...
  *
  * This has been extended to use the NUMA policies from the mm triggering
  * the readahead.
@@ -2098,6 +2134,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned int sequence = 0;
 	int ret = VM_FAULT_MINOR;
 	int anon = 0;
+	struct page *dirty_page = NULL;
 
 	pte_unmap(page_table);
 	BUG_ON(vma->vm_flags & VM_PFNMAP);
@@ -2192,6 +2229,10 @@ retry:
 		} else {
 			inc_mm_counter(mm, file_rss);
 			page_add_file_rmap(new_page);
+			if (write_access) {
+				dirty_page = new_page;
+				get_page(dirty_page);
+			}
 		}
 	} else {
 		/* One of our sibling threads was faster, back out. */
@@ -2204,6 +2245,10 @@ retry:
 	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
+	if (dirty_page) {
+		set_page_dirty_balance(dirty_page);
+		put_page(dirty_page);
+	}
 	return ret;
 oom:
 	page_cache_release(new_page);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a9963ceddd65..38f89650bc84 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -105,7 +105,7 @@ static struct kmem_cache *sn_cache;
 
 /* Highest zone. An specific allocation for a zone below that is not
    policied. */
-int policy_zone = ZONE_DMA;
+enum zone_type policy_zone = ZONE_DMA;
 
 struct mempolicy default_policy = {
 	.refcnt = ATOMIC_INIT(1), /* never free it */
@@ -137,7 +137,8 @@ static int mpol_check_policy(int mode, nodemask_t *nodes)
 static struct zonelist *bind_zonelist(nodemask_t *nodes)
 {
 	struct zonelist *zl;
-	int num, max, nd, k;
+	int num, max, nd;
+	enum zone_type k;
 
 	max = 1 + MAX_NR_ZONES * nodes_weight(*nodes);
 	zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL);
@@ -148,12 +149,16 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
 	   lower zones etc. Avoid empty zones because the memory allocator
 	   doesn't like them. If you implement node hot removal you
 	   have to fix that. */
-	for (k = policy_zone; k >= 0; k--) { 
+	k = policy_zone;
+	while (1) {
 		for_each_node_mask(nd, *nodes) { 
 			struct zone *z = &NODE_DATA(nd)->node_zones[k];
 			if (z->present_pages > 0) 
 				zl->zones[num++] = z;
 		}
+		if (k == 0)
+			break;
+		k--;
 	}
 	zl->zones[num] = NULL;
 	return zl;
@@ -482,7 +487,7 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes)
 	switch (p->policy) {
 	case MPOL_BIND:
 		for (i = 0; p->v.zonelist->zones[i]; i++)
-			node_set(p->v.zonelist->zones[i]->zone_pgdat->node_id,
+			node_set(zone_to_nid(p->v.zonelist->zones[i]),
 				*nodes);
 		break;
 	case MPOL_DEFAULT:
@@ -1140,7 +1145,7 @@ unsigned slab_node(struct mempolicy *policy)
 		 * Follow bind policy behavior and start allocation at the
 		 * first node.
 		 */
-		return policy->v.zonelist->zones[0]->zone_pgdat->node_id;
+		return zone_to_nid(policy->v.zonelist->zones[0]);
 
 	case MPOL_PREFERRED:
 		if (policy->v.preferred_node >= 0)
@@ -1285,7 +1290,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
 
 	if ((gfp & __GFP_WAIT) && !in_interrupt())
 		cpuset_update_task_memory_state();
-	if (!pol || in_interrupt())
+	if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
 		pol = &default_policy;
 	if (pol->policy == MPOL_INTERLEAVE)
 		return alloc_page_interleave(gfp, order, interleave_nodes(pol));
@@ -1644,7 +1649,7 @@ void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
 
 		nodes_clear(nodes);
 		for (z = pol->v.zonelist->zones; *z; z++)
-			node_set((*z)->zone_pgdat->node_id, nodes);
+			node_set(zone_to_nid(*z), nodes);
 		nodes_remap(tmp, nodes, *mpolmask, *newmask);
 		nodes = tmp;
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 3f1e0c2c942c..20a8c2687b1e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -741,7 +741,7 @@ static struct page *new_page_node(struct page *p, unsigned long private,
 
 	*result = &pm->status;
 
-	return alloc_pages_node(pm->node, GFP_HIGHUSER, 0);
+	return alloc_pages_node(pm->node, GFP_HIGHUSER | GFP_THISNODE, 0);
 }
 
 /*
diff --git a/mm/mmap.c b/mm/mmap.c
index d799d896d74a..eea8eefd51a8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -116,7 +116,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
 		 * which are reclaimable, under pressure.  The dentry
 		 * cache and most inode caches should fall into this
 		 */
-		free += atomic_read(&slab_reclaim_pages);
+		free += global_page_state(NR_SLAB_RECLAIMABLE);
 
 		/*
 		 * Leave the last 3% for root
@@ -1105,12 +1105,6 @@ munmap_back:
 			goto free_vma;
 	}
 
-	/* Don't make the VMA automatically writable if it's shared, but the
-	 * backer wishes to know when pages are first written to */
-	if (vma->vm_ops && vma->vm_ops->page_mkwrite)
-		vma->vm_page_prot =
-			protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)];
-
 	/* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
 	 * shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
 	 * that memory reservation must be checked; but that reservation
@@ -1128,6 +1122,10 @@ munmap_back:
 	pgoff = vma->vm_pgoff;
 	vm_flags = vma->vm_flags;
 
+	if (vma_wants_writenotify(vma))
+		vma->vm_page_prot =
+			protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)];
+
 	if (!file || !vma_merge(mm, prev, addr, vma->vm_end,
 			vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
 		file = vma->vm_file;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 638edabaff71..955f9d0e38aa 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -27,7 +27,8 @@
 #include <asm/tlbflush.h>
 
 static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
-		unsigned long addr, unsigned long end, pgprot_t newprot)
+		unsigned long addr, unsigned long end, pgprot_t newprot,
+		int dirty_accountable)
 {
 	pte_t *pte, oldpte;
 	spinlock_t *ptl;
@@ -42,7 +43,14 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 			 * bits by wiping the pte and then setting the new pte
 			 * into place.
 			 */
-			ptent = pte_modify(ptep_get_and_clear(mm, addr, pte), newprot);
+			ptent = ptep_get_and_clear(mm, addr, pte);
+			ptent = pte_modify(ptent, newprot);
+			/*
+			 * Avoid taking write faults for pages we know to be
+			 * dirty.
+			 */
+			if (dirty_accountable && pte_dirty(ptent))
+				ptent = pte_mkwrite(ptent);
 			set_pte_at(mm, addr, pte, ptent);
 			lazy_mmu_prot_update(ptent);
 #ifdef CONFIG_MIGRATION
@@ -66,7 +74,8 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 }
 
 static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud,
-		unsigned long addr, unsigned long end, pgprot_t newprot)
+		unsigned long addr, unsigned long end, pgprot_t newprot,
+		int dirty_accountable)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -76,12 +85,13 @@ static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud,
 		next = pmd_addr_end(addr, end);
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		change_pte_range(mm, pmd, addr, next, newprot);
+		change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
 	} while (pmd++, addr = next, addr != end);
 }
 
 static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd,
-		unsigned long addr, unsigned long end, pgprot_t newprot)
+		unsigned long addr, unsigned long end, pgprot_t newprot,
+		int dirty_accountable)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -91,12 +101,13 @@ static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd,
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
 			continue;
-		change_pmd_range(mm, pud, addr, next, newprot);
+		change_pmd_range(mm, pud, addr, next, newprot, dirty_accountable);
 	} while (pud++, addr = next, addr != end);
 }
 
 static void change_protection(struct vm_area_struct *vma,
-		unsigned long addr, unsigned long end, pgprot_t newprot)
+		unsigned long addr, unsigned long end, pgprot_t newprot,
+		int dirty_accountable)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgd;
@@ -110,7 +121,7 @@ static void change_protection(struct vm_area_struct *vma,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		change_pud_range(mm, pgd, addr, next, newprot);
+		change_pud_range(mm, pgd, addr, next, newprot, dirty_accountable);
 	} while (pgd++, addr = next, addr != end);
 	flush_tlb_range(vma, start, end);
 }
@@ -123,10 +134,9 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 	unsigned long oldflags = vma->vm_flags;
 	long nrpages = (end - start) >> PAGE_SHIFT;
 	unsigned long charged = 0;
-	unsigned int mask;
-	pgprot_t newprot;
 	pgoff_t pgoff;
 	int error;
+	int dirty_accountable = 0;
 
 	if (newflags == oldflags) {
 		*pprev = vma;
@@ -176,24 +186,23 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 	}
 
 success:
-	/* Don't make the VMA automatically writable if it's shared, but the
-	 * backer wishes to know when pages are first written to */
-	mask = VM_READ|VM_WRITE|VM_EXEC|VM_SHARED;
-	if (vma->vm_ops && vma->vm_ops->page_mkwrite)
-		mask &= ~VM_SHARED;
-
-	newprot = protection_map[newflags & mask];
-
 	/*
 	 * vm_flags and vm_page_prot are protected by the mmap_sem
 	 * held in write mode.
 	 */
 	vma->vm_flags = newflags;
-	vma->vm_page_prot = newprot;
+	vma->vm_page_prot = protection_map[newflags &
+		(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+	if (vma_wants_writenotify(vma)) {
+		vma->vm_page_prot = protection_map[newflags &
+			(VM_READ|VM_WRITE|VM_EXEC)];
+		dirty_accountable = 1;
+	}
+
 	if (is_vm_hugetlb_page(vma))
-		hugetlb_change_protection(vma, start, end, newprot);
+		hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
 	else
-		change_protection(vma, start, end, newprot);
+		change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
 	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
 	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
 	return 0;
diff --git a/mm/msync.c b/mm/msync.c
index d083544df21b..358d73cf7b78 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -7,149 +7,33 @@
 /*
  * The msync() system call.
  */
-#include <linux/slab.h>
-#include <linux/pagemap.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
-#include <linux/hugetlb.h>
-#include <linux/writeback.h>
 #include <linux/file.h>
 #include <linux/syscalls.h>
 
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-
-static unsigned long msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
-				unsigned long addr, unsigned long end)
-{
-	pte_t *pte;
-	spinlock_t *ptl;
-	int progress = 0;
-	unsigned long ret = 0;
-
-again:
-	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-	do {
-		struct page *page;
-
-		if (progress >= 64) {
-			progress = 0;
-			if (need_resched() || need_lockbreak(ptl))
-				break;
-		}
-		progress++;
-		if (!pte_present(*pte))
-			continue;
-		if (!pte_maybe_dirty(*pte))
-			continue;
-		page = vm_normal_page(vma, addr, *pte);
-		if (!page)
-			continue;
-		if (ptep_clear_flush_dirty(vma, addr, pte) ||
-				page_test_and_clear_dirty(page))
-			ret += set_page_dirty(page);
-		progress += 3;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-	pte_unmap_unlock(pte - 1, ptl);
-	cond_resched();
-	if (addr != end)
-		goto again;
-	return ret;
-}
-
-static inline unsigned long msync_pmd_range(struct vm_area_struct *vma,
-			pud_t *pud, unsigned long addr, unsigned long end)
-{
-	pmd_t *pmd;
-	unsigned long next;
-	unsigned long ret = 0;
-
-	pmd = pmd_offset(pud, addr);
-	do {
-		next = pmd_addr_end(addr, end);
-		if (pmd_none_or_clear_bad(pmd))
-			continue;
-		ret += msync_pte_range(vma, pmd, addr, next);
-	} while (pmd++, addr = next, addr != end);
-	return ret;
-}
-
-static inline unsigned long msync_pud_range(struct vm_area_struct *vma,
-			pgd_t *pgd, unsigned long addr, unsigned long end)
-{
-	pud_t *pud;
-	unsigned long next;
-	unsigned long ret = 0;
-
-	pud = pud_offset(pgd, addr);
-	do {
-		next = pud_addr_end(addr, end);
-		if (pud_none_or_clear_bad(pud))
-			continue;
-		ret += msync_pmd_range(vma, pud, addr, next);
-	} while (pud++, addr = next, addr != end);
-	return ret;
-}
-
-static unsigned long msync_page_range(struct vm_area_struct *vma,
-				unsigned long addr, unsigned long end)
-{
-	pgd_t *pgd;
-	unsigned long next;
-	unsigned long ret = 0;
-
-	/* For hugepages we can't go walking the page table normally,
-	 * but that's ok, hugetlbfs is memory based, so we don't need
-	 * to do anything more on an msync().
-	 */
-	if (vma->vm_flags & VM_HUGETLB)
-		return 0;
-
-	BUG_ON(addr >= end);
-	pgd = pgd_offset(vma->vm_mm, addr);
-	flush_cache_range(vma, addr, end);
-	do {
-		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd))
-			continue;
-		ret += msync_pud_range(vma, pgd, addr, next);
-	} while (pgd++, addr = next, addr != end);
-	return ret;
-}
-
 /*
  * MS_SYNC syncs the entire file - including mappings.
  *
- * MS_ASYNC does not start I/O (it used to, up to 2.5.67).  Instead, it just
- * marks the relevant pages dirty.  The application may now run fsync() to
+ * MS_ASYNC does not start I/O (it used to, up to 2.5.67).
+ * Nor does it marks the relevant pages dirty (it used to up to 2.6.17).
+ * Now it doesn't do anything, since dirty pages are properly tracked.
+ *
+ * The application may now run fsync() to
  * write out the dirty pages and wait on the writeout and check the result.
  * Or the application may run fadvise(FADV_DONTNEED) against the fd to start
  * async writeout immediately.
  * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to
  * applications.
  */
-static int msync_interval(struct vm_area_struct *vma, unsigned long addr,
-			unsigned long end, int flags,
-			unsigned long *nr_pages_dirtied)
-{
-	struct file *file = vma->vm_file;
-
-	if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED))
-		return -EBUSY;
-
-	if (file && (vma->vm_flags & VM_SHARED))
-		*nr_pages_dirtied = msync_page_range(vma, addr, end);
-	return 0;
-}
-
 asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
 {
 	unsigned long end;
+	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	int unmapped_error = 0;
 	int error = -EINVAL;
-	int done = 0;
 
 	if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
 		goto out;
@@ -169,64 +53,50 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
 	 * If the interval [start,end) covers some unmapped address ranges,
 	 * just ignore them, but return -ENOMEM at the end.
 	 */
-	down_read(&current->mm->mmap_sem);
-	vma = find_vma(current->mm, start);
-	if (!vma) {
-		error = -ENOMEM;
-		goto out_unlock;
-	}
-	do {
-		unsigned long nr_pages_dirtied = 0;
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, start);
+	for (;;) {
 		struct file *file;
 
+		/* Still start < end. */
+		error = -ENOMEM;
+		if (!vma)
+			goto out_unlock;
 		/* Here start < vma->vm_end. */
 		if (start < vma->vm_start) {
-			unmapped_error = -ENOMEM;
 			start = vma->vm_start;
+			if (start >= end)
+				goto out_unlock;
+			unmapped_error = -ENOMEM;
 		}
 		/* Here vma->vm_start <= start < vma->vm_end. */
-		if (end <= vma->vm_end) {
-			if (start < end) {
-				error = msync_interval(vma, start, end, flags,
-							&nr_pages_dirtied);
-				if (error)
-					goto out_unlock;
-			}
-			error = unmapped_error;
-			done = 1;
-		} else {
-			/* Here vma->vm_start <= start < vma->vm_end < end. */
-			error = msync_interval(vma, start, vma->vm_end, flags,
-						&nr_pages_dirtied);
-			if (error)
-				goto out_unlock;
+		if ((flags & MS_INVALIDATE) &&
+				(vma->vm_flags & VM_LOCKED)) {
+			error = -EBUSY;
+			goto out_unlock;
 		}
 		file = vma->vm_file;
 		start = vma->vm_end;
-		if ((flags & MS_ASYNC) && file && nr_pages_dirtied) {
-			get_file(file);
-			up_read(&current->mm->mmap_sem);
-			balance_dirty_pages_ratelimited_nr(file->f_mapping,
-							nr_pages_dirtied);
-			fput(file);
-			down_read(&current->mm->mmap_sem);
-			vma = find_vma(current->mm, start);
-		} else if ((flags & MS_SYNC) && file &&
+		if ((flags & MS_SYNC) && file &&
 				(vma->vm_flags & VM_SHARED)) {
 			get_file(file);
-			up_read(&current->mm->mmap_sem);
+			up_read(&mm->mmap_sem);
 			error = do_fsync(file, 0);
 			fput(file);
-			down_read(&current->mm->mmap_sem);
-			if (error)
-				goto out_unlock;
-			vma = find_vma(current->mm, start);
+			if (error || start >= end)
+				goto out;
+			down_read(&mm->mmap_sem);
+			vma = find_vma(mm, start);
 		} else {
+			if (start >= end) {
+				error = 0;
+				goto out_unlock;
+			}
 			vma = vma->vm_next;
 		}
-	} while (vma && !done);
+	}
 out_unlock:
-	up_read(&current->mm->mmap_sem);
+	up_read(&mm->mmap_sem);
 out:
-	return error;
+	return error ? : unmapped_error;
 }
diff --git a/mm/nommu.c b/mm/nommu.c
index c576df71e3bb..d99dea31e443 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1133,7 +1133,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
 		 * which are reclaimable, under pressure.  The dentry
 		 * cache and most inode caches should fall into this
 		 */
-		free += atomic_read(&slab_reclaim_pages);
+		free += global_page_state(NR_SLAB_RECLAIMABLE);
 
 		/*
 		 * Leave the last 3% for root
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b9af136e5cfa..bada3d03119f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -21,6 +21,8 @@
 #include <linux/timex.h>
 #include <linux/jiffies.h>
 #include <linux/cpuset.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
 
 int sysctl_panic_on_oom;
 /* #define DEBUG */
@@ -58,6 +60,12 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 	}
 
 	/*
+	 * swapoff can easily use up all memory, so kill those first.
+	 */
+	if (p->flags & PF_SWAPOFF)
+		return ULONG_MAX;
+
+	/*
 	 * The memory size of the process is the basis for the badness.
 	 */
 	points = mm->total_vm;
@@ -127,6 +135,14 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 		points /= 4;
 
 	/*
+	 * If p's nodes don't overlap ours, it may still help to kill p
+	 * because p may have allocated or otherwise mapped memory on
+	 * this node before. However it will be less likely.
+	 */
+	if (!cpuset_excl_nodes_overlap(p))
+		points /= 8;
+
+	/*
 	 * Adjust the score by oomkilladj.
 	 */
 	if (p->oomkilladj) {
@@ -161,8 +177,7 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
 
 	for (z = zonelist->zones; *z; z++)
 		if (cpuset_zone_allowed(*z, gfp_mask))
-			node_clear((*z)->zone_pgdat->node_id,
-					nodes);
+			node_clear(zone_to_nid(*z), nodes);
 		else
 			return CONSTRAINT_CPUSET;
 
@@ -191,25 +206,38 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
 		unsigned long points;
 		int releasing;
 
+		/* skip kernel threads */
+		if (!p->mm)
+			continue;
 		/* skip the init task with pid == 1 */
 		if (p->pid == 1)
 			continue;
-		if (p->oomkilladj == OOM_DISABLE)
-			continue;
-		/* If p's nodes don't overlap ours, it won't help to kill p. */
-		if (!cpuset_excl_nodes_overlap(p))
-			continue;
 
 		/*
 		 * This is in the process of releasing memory so wait for it
 		 * to finish before killing some other task by mistake.
+		 *
+		 * However, if p is the current task, we allow the 'kill' to
+		 * go ahead if it is exiting: this will simply set TIF_MEMDIE,
+		 * which will allow it to gain access to memory reserves in
+		 * the process of exiting and releasing its resources.
+		 * Otherwise we could get an OOM deadlock.
 		 */
 		releasing = test_tsk_thread_flag(p, TIF_MEMDIE) ||
 						p->flags & PF_EXITING;
-		if (releasing && !(p->flags & PF_DEAD))
+		if (releasing) {
+			/* PF_DEAD tasks have already released their mm */
+			if (p->flags & PF_DEAD)
+				continue;
+			if (p->flags & PF_EXITING && p == current) {
+				chosen = p;
+				*ppoints = ULONG_MAX;
+				break;
+			}
 			return ERR_PTR(-1UL);
-		if (p->flags & PF_SWAPOFF)
-			return p;
+		}
+		if (p->oomkilladj == OOM_DISABLE)
+			continue;
 
 		points = badness(p, uptime.tv_sec);
 		if (points > *ppoints || !chosen) {
@@ -221,9 +249,9 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
 }
 
 /**
- * We must be careful though to never send SIGKILL a process with
- * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that
- * we select a process with CAP_SYS_RAW_IO set).
+ * Send SIGKILL to the selected  process irrespective of  CAP_SYS_RAW_IO
+ * flag though it's unlikely that  we select a process with CAP_SYS_RAW_IO
+ * set.
  */
 static void __oom_kill_task(struct task_struct *p, const char *message)
 {
@@ -241,8 +269,11 @@ static void __oom_kill_task(struct task_struct *p, const char *message)
 		return;
 	}
 	task_unlock(p);
-	printk(KERN_ERR "%s: Killed process %d (%s).\n",
+
+	if (message) {
+		printk(KERN_ERR "%s: Killed process %d (%s).\n",
 				message, p->pid, p->comm);
+	}
 
 	/*
 	 * We give our sacrificial lamb high priority and access to
@@ -293,8 +324,17 @@ static int oom_kill_process(struct task_struct *p, unsigned long points,
 	struct task_struct *c;
 	struct list_head *tsk;
 
-	printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li and "
-		"children.\n", p->pid, p->comm, points);
+	/*
+	 * If the task is already exiting, don't alarm the sysadmin or kill
+	 * its children or threads, just set TIF_MEMDIE so it can die quickly
+	 */
+	if (p->flags & PF_EXITING) {
+		__oom_kill_task(p, NULL);
+		return 0;
+	}
+
+	printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li"
+			" and children.\n", p->pid, p->comm, points);
 	/* Try to kill a child first */
 	list_for_each(tsk, &p->children) {
 		c = list_entry(tsk, struct task_struct, sibling);
@@ -306,6 +346,20 @@ static int oom_kill_process(struct task_struct *p, unsigned long points,
 	return oom_kill_task(p, message);
 }
 
+static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
+
+int register_oom_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&oom_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(register_oom_notifier);
+
+int unregister_oom_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&oom_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_oom_notifier);
+
 /**
  * out_of_memory - kill the "best" process when we run out of memory
  *
@@ -318,10 +372,17 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
 {
 	struct task_struct *p;
 	unsigned long points = 0;
+	unsigned long freed = 0;
+
+	blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
+	if (freed > 0)
+		/* Got some memory back in the last second. */
+		return;
 
 	if (printk_ratelimit()) {
-		printk("oom-killer: gfp_mask=0x%x, order=%d\n",
-			gfp_mask, order);
+		printk(KERN_WARNING "%s invoked oom-killer: "
+			"gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
+			current->comm, gfp_mask, order, current->oomkilladj);
 		dump_stack();
 		show_mem();
 	}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 77a0bc4e261a..555752907dc3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -23,6 +23,7 @@
 #include <linux/backing-dev.h>
 #include <linux/blkdev.h>
 #include <linux/mpage.h>
+#include <linux/rmap.h>
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/smp.h>
@@ -243,6 +244,16 @@ static void balance_dirty_pages(struct address_space *mapping)
 		pdflush_operation(background_writeout, 0);
 }
 
+void set_page_dirty_balance(struct page *page)
+{
+	if (set_page_dirty(page)) {
+		struct address_space *mapping = page_mapping(page);
+
+		if (mapping)
+			balance_dirty_pages_ratelimited(mapping);
+	}
+}
+
 /**
  * balance_dirty_pages_ratelimited_nr - balance dirty memory state
  * @mapping: address_space which was dirtied
@@ -550,7 +561,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 		return 0;
 	wbc->for_writepages = 1;
 	if (mapping->a_ops->writepages)
-		ret =  mapping->a_ops->writepages(mapping, wbc);
+		ret = mapping->a_ops->writepages(mapping, wbc);
 	else
 		ret = generic_writepages(mapping, wbc);
 	wbc->for_writepages = 0;
@@ -690,7 +701,7 @@ int set_page_dirty_lock(struct page *page)
 {
 	int ret;
 
-	lock_page(page);
+	lock_page_nosync(page);
 	ret = set_page_dirty(page);
 	unlock_page(page);
 	return ret;
@@ -712,9 +723,15 @@ int test_clear_page_dirty(struct page *page)
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
-			if (mapping_cap_account_dirty(mapping))
-				__dec_zone_page_state(page, NR_FILE_DIRTY);
 			write_unlock_irqrestore(&mapping->tree_lock, flags);
+			/*
+			 * We can continue to use `mapping' here because the
+			 * page is locked, which pins the address_space
+			 */
+			if (mapping_cap_account_dirty(mapping)) {
+				page_mkclean(page);
+				dec_zone_page_state(page, NR_FILE_DIRTY);
+			}
 			return 1;
 		}
 		write_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -744,8 +761,10 @@ int clear_page_dirty_for_io(struct page *page)
 
 	if (mapping) {
 		if (TestClearPageDirty(page)) {
-			if (mapping_cap_account_dirty(mapping))
+			if (mapping_cap_account_dirty(mapping)) {
+				page_mkclean(page);
 				dec_zone_page_state(page, NR_FILE_DIRTY);
+			}
 			return 1;
 		}
 		return 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3b5358a0561f..9810f0a60db7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -51,7 +51,6 @@ EXPORT_SYMBOL(node_online_map);
 nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
 EXPORT_SYMBOL(node_possible_map);
 unsigned long totalram_pages __read_mostly;
-unsigned long totalhigh_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
 long nr_swap_pages;
 int percpu_pagelist_fraction;
@@ -69,7 +68,15 @@ static void __free_pages_ok(struct page *page, unsigned int order);
  * TBD: should special case ZONE_DMA32 machines here - in those we normally
  * don't need any ZONE_NORMAL reservation
  */
-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };
+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
+	 256,
+#ifdef CONFIG_ZONE_DMA32
+	 256,
+#endif
+#ifdef CONFIG_HIGHMEM
+	 32
+#endif
+};
 
 EXPORT_SYMBOL(totalram_pages);
 
@@ -80,7 +87,17 @@ EXPORT_SYMBOL(totalram_pages);
 struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
 EXPORT_SYMBOL(zone_table);
 
-static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };
+static char *zone_names[MAX_NR_ZONES] = {
+	 "DMA",
+#ifdef CONFIG_ZONE_DMA32
+	 "DMA32",
+#endif
+	 "Normal",
+#ifdef CONFIG_HIGHMEM
+	 "HighMem"
+#endif
+};
+
 int min_free_kbytes = 1024;
 
 unsigned long __meminitdata nr_kernel_pages;
@@ -127,7 +144,6 @@ static int bad_range(struct zone *zone, struct page *page)
 
 	return 0;
 }
-
 #else
 static inline int bad_range(struct zone *zone, struct page *page)
 {
@@ -218,12 +234,12 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
 {
 	int i;
 
-	BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
+	VM_BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
 	/*
 	 * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO
 	 * and __GFP_HIGHMEM from hard or soft interrupt context.
 	 */
-	BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt());
+	VM_BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt());
 	for (i = 0; i < (1 << order); i++)
 		clear_highpage(page + i);
 }
@@ -347,8 +363,8 @@ static inline void __free_one_page(struct page *page,
 
 	page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
 
-	BUG_ON(page_idx & (order_size - 1));
-	BUG_ON(bad_range(zone, page));
+	VM_BUG_ON(page_idx & (order_size - 1));
+	VM_BUG_ON(bad_range(zone, page));
 
 	zone->free_pages += order_size;
 	while (order < MAX_ORDER-1) {
@@ -421,7 +437,7 @@ static void free_pages_bulk(struct zone *zone, int count,
 	while (count--) {
 		struct page *page;
 
-		BUG_ON(list_empty(list));
+		VM_BUG_ON(list_empty(list));
 		page = list_entry(list->prev, struct page, lru);
 		/* have to delete it as __free_one_page list manipulates */
 		list_del(&page->lru);
@@ -432,9 +448,11 @@ static void free_pages_bulk(struct zone *zone, int count,
 
 static void free_one_page(struct zone *zone, struct page *page, int order)
 {
-	LIST_HEAD(list);
-	list_add(&page->lru, &list);
-	free_pages_bulk(zone, 1, &list, order);
+	spin_lock(&zone->lock);
+	zone->all_unreclaimable = 0;
+	zone->pages_scanned = 0;
+	__free_one_page(page, zone ,order);
+	spin_unlock(&zone->lock);
 }
 
 static void __free_pages_ok(struct page *page, unsigned int order)
@@ -512,7 +530,7 @@ static inline void expand(struct zone *zone, struct page *page,
 		area--;
 		high--;
 		size >>= 1;
-		BUG_ON(bad_range(zone, &page[size]));
+		VM_BUG_ON(bad_range(zone, &page[size]));
 		list_add(&page[size].lru, &area->free_list);
 		area->nr_free++;
 		set_page_order(&page[size], high);
@@ -615,19 +633,23 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 #ifdef CONFIG_NUMA
 /*
  * Called from the slab reaper to drain pagesets on a particular node that
- * belong to the currently executing processor.
+ * belongs to the currently executing processor.
  * Note that this function must be called with the thread pinned to
  * a single processor.
  */
 void drain_node_pages(int nodeid)
 {
-	int i, z;
+	int i;
+	enum zone_type z;
 	unsigned long flags;
 
 	for (z = 0; z < MAX_NR_ZONES; z++) {
 		struct zone *zone = NODE_DATA(nodeid)->node_zones + z;
 		struct per_cpu_pageset *pset;
 
+		if (!populated_zone(zone))
+			continue;
+
 		pset = zone_pcp(zone, smp_processor_id());
 		for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
 			struct per_cpu_pages *pcp;
@@ -672,7 +694,8 @@ static void __drain_pages(unsigned int cpu)
 
 void mark_free_pages(struct zone *zone)
 {
-	unsigned long zone_pfn, flags;
+	unsigned long pfn, max_zone_pfn;
+	unsigned long flags;
 	int order;
 	struct list_head *curr;
 
@@ -680,18 +703,25 @@ void mark_free_pages(struct zone *zone)
 		return;
 
 	spin_lock_irqsave(&zone->lock, flags);
-	for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
-		ClearPageNosaveFree(pfn_to_page(zone_pfn + zone->zone_start_pfn));
+
+	max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+	for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+		if (pfn_valid(pfn)) {
+			struct page *page = pfn_to_page(pfn);
+
+			if (!PageNosave(page))
+				ClearPageNosaveFree(page);
+		}
 
 	for (order = MAX_ORDER - 1; order >= 0; --order)
 		list_for_each(curr, &zone->free_area[order].free_list) {
-			unsigned long start_pfn, i;
+			unsigned long i;
 
-			start_pfn = page_to_pfn(list_entry(curr, struct page, lru));
+			pfn = page_to_pfn(list_entry(curr, struct page, lru));
+			for (i = 0; i < (1UL << order); i++)
+				SetPageNosaveFree(pfn_to_page(pfn + i));
+		}
 
-			for (i=0; i < (1<<order); i++)
-				SetPageNosaveFree(pfn_to_page(start_pfn+i));
-	}
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
 
@@ -761,8 +791,8 @@ void split_page(struct page *page, unsigned int order)
 {
 	int i;
 
-	BUG_ON(PageCompound(page));
-	BUG_ON(!page_count(page));
+	VM_BUG_ON(PageCompound(page));
+	VM_BUG_ON(!page_count(page));
 	for (i = 1; i < (1 << order); i++)
 		set_page_refcounted(page + i);
 }
@@ -809,7 +839,7 @@ again:
 	local_irq_restore(flags);
 	put_cpu();
 
-	BUG_ON(bad_range(zone, page));
+	VM_BUG_ON(bad_range(zone, page));
 	if (prep_new_page(page, order, gfp_flags))
 		goto again;
 	return page;
@@ -870,32 +900,37 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
 	struct zone **z = zonelist->zones;
 	struct page *page = NULL;
 	int classzone_idx = zone_idx(*z);
+	struct zone *zone;
 
 	/*
 	 * Go through the zonelist once, looking for a zone with enough free.
 	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
 	 */
 	do {
+		zone = *z;
+		if (unlikely((gfp_mask & __GFP_THISNODE) &&
+			zone->zone_pgdat != zonelist->zones[0]->zone_pgdat))
+				break;
 		if ((alloc_flags & ALLOC_CPUSET) &&
-				!cpuset_zone_allowed(*z, gfp_mask))
+				!cpuset_zone_allowed(zone, gfp_mask))
 			continue;
 
 		if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
 			unsigned long mark;
 			if (alloc_flags & ALLOC_WMARK_MIN)
-				mark = (*z)->pages_min;
+				mark = zone->pages_min;
 			else if (alloc_flags & ALLOC_WMARK_LOW)
-				mark = (*z)->pages_low;
+				mark = zone->pages_low;
 			else
-				mark = (*z)->pages_high;
-			if (!zone_watermark_ok(*z, order, mark,
+				mark = zone->pages_high;
+			if (!zone_watermark_ok(zone , order, mark,
 				    classzone_idx, alloc_flags))
 				if (!zone_reclaim_mode ||
-				    !zone_reclaim(*z, gfp_mask, order))
+				    !zone_reclaim(zone, gfp_mask, order))
 					continue;
 		}
 
-		page = buffered_rmqueue(zonelist, *z, order, gfp_mask);
+		page = buffered_rmqueue(zonelist, zone, order, gfp_mask);
 		if (page) {
 			break;
 		}
@@ -1083,7 +1118,7 @@ fastcall unsigned long get_zeroed_page(gfp_t gfp_mask)
 	 * get_zeroed_page() returns a 32-bit address, which cannot represent
 	 * a highmem page
 	 */
-	BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
+	VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
 
 	page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
 	if (page)
@@ -1116,7 +1151,7 @@ EXPORT_SYMBOL(__free_pages);
 fastcall void free_pages(unsigned long addr, unsigned int order)
 {
 	if (addr != 0) {
-		BUG_ON(!virt_addr_valid((void *)addr));
+		VM_BUG_ON(!virt_addr_valid((void *)addr));
 		__free_pages(virt_to_page((void *)addr), order);
 	}
 }
@@ -1142,7 +1177,8 @@ EXPORT_SYMBOL(nr_free_pages);
 #ifdef CONFIG_NUMA
 unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
 {
-	unsigned int i, sum = 0;
+	unsigned int sum = 0;
+	enum zone_type i;
 
 	for (i = 0; i < MAX_NR_ZONES; i++)
 		sum += pgdat->node_zones[i].free_pages;
@@ -1186,24 +1222,10 @@ unsigned int nr_free_pagecache_pages(void)
 {
 	return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
 }
-
-#ifdef CONFIG_HIGHMEM
-unsigned int nr_free_highpages (void)
-{
-	pg_data_t *pgdat;
-	unsigned int pages = 0;
-
-	for_each_online_pgdat(pgdat)
-		pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
-
-	return pages;
-}
-#endif
-
 #ifdef CONFIG_NUMA
 static void show_node(struct zone *zone)
 {
-	printk("Node %d ", zone->zone_pgdat->node_id);
+	printk("Node %ld ", zone_to_nid(zone));
 }
 #else
 #define show_node(zone)	do { } while (0)
@@ -1215,13 +1237,8 @@ void si_meminfo(struct sysinfo *val)
 	val->sharedram = 0;
 	val->freeram = nr_free_pages();
 	val->bufferram = nr_blockdev_pages();
-#ifdef CONFIG_HIGHMEM
 	val->totalhigh = totalhigh_pages;
 	val->freehigh = nr_free_highpages();
-#else
-	val->totalhigh = 0;
-	val->freehigh = 0;
-#endif
 	val->mem_unit = PAGE_SIZE;
 }
 
@@ -1234,8 +1251,13 @@ void si_meminfo_node(struct sysinfo *val, int nid)
 
 	val->totalram = pgdat->node_present_pages;
 	val->freeram = nr_free_pages_pgdat(pgdat);
+#ifdef CONFIG_HIGHMEM
 	val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
 	val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+#else
+	val->totalhigh = 0;
+	val->freehigh = 0;
+#endif
 	val->mem_unit = PAGE_SIZE;
 }
 #endif
@@ -1282,10 +1304,6 @@ void show_free_areas(void)
 
 	get_zone_counts(&active, &inactive, &free);
 
-	printk("Free pages: %11ukB (%ukB HighMem)\n",
-		K(nr_free_pages()),
-		K(nr_free_highpages()));
-
 	printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu "
 		"unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n",
 		active,
@@ -1294,7 +1312,8 @@ void show_free_areas(void)
 		global_page_state(NR_WRITEBACK),
 		global_page_state(NR_UNSTABLE_NFS),
 		nr_free_pages(),
-		global_page_state(NR_SLAB),
+		global_page_state(NR_SLAB_RECLAIMABLE) +
+			global_page_state(NR_SLAB_UNRECLAIMABLE),
 		global_page_state(NR_FILE_MAPPED),
 		global_page_state(NR_PAGETABLE));
 
@@ -1360,39 +1379,25 @@ void show_free_areas(void)
  * Add all populated zones of a node to the zonelist.
  */
 static int __meminit build_zonelists_node(pg_data_t *pgdat,
-			struct zonelist *zonelist, int nr_zones, int zone_type)
+			struct zonelist *zonelist, int nr_zones, enum zone_type zone_type)
 {
 	struct zone *zone;
 
-	BUG_ON(zone_type > ZONE_HIGHMEM);
+	BUG_ON(zone_type >= MAX_NR_ZONES);
+	zone_type++;
 
 	do {
+		zone_type--;
 		zone = pgdat->node_zones + zone_type;
 		if (populated_zone(zone)) {
-#ifndef CONFIG_HIGHMEM
-			BUG_ON(zone_type > ZONE_NORMAL);
-#endif
 			zonelist->zones[nr_zones++] = zone;
 			check_highest_zone(zone_type);
 		}
-		zone_type--;
 
-	} while (zone_type >= 0);
+	} while (zone_type);
 	return nr_zones;
 }
 
-static inline int highest_zone(int zone_bits)
-{
-	int res = ZONE_NORMAL;
-	if (zone_bits & (__force int)__GFP_HIGHMEM)
-		res = ZONE_HIGHMEM;
-	if (zone_bits & (__force int)__GFP_DMA32)
-		res = ZONE_DMA32;
-	if (zone_bits & (__force int)__GFP_DMA)
-		res = ZONE_DMA;
-	return res;
-}
-
 #ifdef CONFIG_NUMA
 #define MAX_NODE_LOAD (num_online_nodes())
 static int __meminitdata node_load[MAX_NUMNODES];
@@ -1458,13 +1463,14 @@ static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask)
 
 static void __meminit build_zonelists(pg_data_t *pgdat)
 {
-	int i, j, k, node, local_node;
+	int j, node, local_node;
+	enum zone_type i;
 	int prev_node, load;
 	struct zonelist *zonelist;
 	nodemask_t used_mask;
 
 	/* initialize zonelists */
-	for (i = 0; i < GFP_ZONETYPES; i++) {
+	for (i = 0; i < MAX_NR_ZONES; i++) {
 		zonelist = pgdat->node_zonelists + i;
 		zonelist->zones[0] = NULL;
 	}
@@ -1494,13 +1500,11 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
 			node_load[node] += load;
 		prev_node = node;
 		load--;
-		for (i = 0; i < GFP_ZONETYPES; i++) {
+		for (i = 0; i < MAX_NR_ZONES; i++) {
 			zonelist = pgdat->node_zonelists + i;
 			for (j = 0; zonelist->zones[j] != NULL; j++);
 
-			k = highest_zone(i);
-
-	 		j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+	 		j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
 			zonelist->zones[j] = NULL;
 		}
 	}
@@ -1510,17 +1514,16 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
 
 static void __meminit build_zonelists(pg_data_t *pgdat)
 {
-	int i, j, k, node, local_node;
+	int node, local_node;
+	enum zone_type i,j;
 
 	local_node = pgdat->node_id;
-	for (i = 0; i < GFP_ZONETYPES; i++) {
+	for (i = 0; i < MAX_NR_ZONES; i++) {
 		struct zonelist *zonelist;
 
 		zonelist = pgdat->node_zonelists + i;
 
-		j = 0;
-		k = highest_zone(i);
- 		j = build_zonelists_node(pgdat, zonelist, j, k);
+ 		j = build_zonelists_node(pgdat, zonelist, 0, i);
  		/*
  		 * Now we build the zonelist so that it contains the zones
  		 * of all the other nodes.
@@ -1532,12 +1535,12 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
 		for (node = local_node + 1; node < MAX_NUMNODES; node++) {
 			if (!node_online(node))
 				continue;
-			j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+			j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
 		}
 		for (node = 0; node < local_node; node++) {
 			if (!node_online(node))
 				continue;
-			j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+			j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
 		}
 
 		zonelist->zones[j] = NULL;
@@ -1643,7 +1646,7 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
 		unsigned long *zones_size, unsigned long *zholes_size)
 {
 	unsigned long realtotalpages, totalpages = 0;
-	int i;
+	enum zone_type i;
 
 	for (i = 0; i < MAX_NR_ZONES; i++)
 		totalpages += zones_size[i];
@@ -1698,8 +1701,8 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
 }
 
 #define ZONETABLE_INDEX(x, zone_nr)	((x << ZONES_SHIFT) | zone_nr)
-void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn,
-		unsigned long size)
+void zonetable_add(struct zone *zone, int nid, enum zone_type zid,
+		unsigned long pfn, unsigned long size)
 {
 	unsigned long snum = pfn_to_section_nr(pfn);
 	unsigned long end = pfn_to_section_nr(pfn + size);
@@ -1845,8 +1848,10 @@ static inline void free_zone_pagesets(int cpu)
 	for_each_zone(zone) {
 		struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
 
+		/* Free per_cpu_pageset if it is slab allocated */
+		if (pset != &boot_pageset[cpu])
+			kfree(pset);
 		zone_pcp(zone, cpu) = NULL;
-		kfree(pset);
 	}
 }
 
@@ -1981,7 +1986,7 @@ __meminit int init_currently_empty_zone(struct zone *zone,
 static void __meminit free_area_init_core(struct pglist_data *pgdat,
 		unsigned long *zones_size, unsigned long *zholes_size)
 {
-	unsigned long j;
+	enum zone_type j;
 	int nid = pgdat->node_id;
 	unsigned long zone_start_pfn = pgdat->node_start_pfn;
 	int ret;
@@ -1999,15 +2004,16 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
 		if (zholes_size)
 			realsize -= zholes_size[j];
 
-		if (j < ZONE_HIGHMEM)
+		if (!is_highmem_idx(j))
 			nr_kernel_pages += realsize;
 		nr_all_pages += realsize;
 
 		zone->spanned_pages = size;
 		zone->present_pages = realsize;
 #ifdef CONFIG_NUMA
-		zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio)
+		zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
 						/ 100;
+		zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
 #endif
 		zone->name = zone_names[j];
 		spin_lock_init(&zone->lock);
@@ -2129,7 +2135,7 @@ static void calculate_totalreserve_pages(void)
 {
 	struct pglist_data *pgdat;
 	unsigned long reserve_pages = 0;
-	int i, j;
+	enum zone_type i, j;
 
 	for_each_online_pgdat(pgdat) {
 		for (i = 0; i < MAX_NR_ZONES; i++) {
@@ -2162,7 +2168,7 @@ static void calculate_totalreserve_pages(void)
 static void setup_per_zone_lowmem_reserve(void)
 {
 	struct pglist_data *pgdat;
-	int j, idx;
+	enum zone_type j, idx;
 
 	for_each_online_pgdat(pgdat) {
 		for (j = 0; j < MAX_NR_ZONES; j++) {
@@ -2171,9 +2177,12 @@ static void setup_per_zone_lowmem_reserve(void)
 
 			zone->lowmem_reserve[j] = 0;
 
-			for (idx = j-1; idx >= 0; idx--) {
+			idx = j;
+			while (idx) {
 				struct zone *lower_zone;
 
+				idx--;
+
 				if (sysctl_lowmem_reserve_ratio[idx] < 1)
 					sysctl_lowmem_reserve_ratio[idx] = 1;
 
@@ -2314,10 +2323,26 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
 		return rc;
 
 	for_each_zone(zone)
-		zone->min_unmapped_ratio = (zone->present_pages *
+		zone->min_unmapped_pages = (zone->present_pages *
 				sysctl_min_unmapped_ratio) / 100;
 	return 0;
 }
+
+int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
+	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+	struct zone *zone;
+	int rc;
+
+	rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	if (rc)
+		return rc;
+
+	for_each_zone(zone)
+		zone->min_slab_pages = (zone->present_pages *
+				sysctl_min_slab_ratio) / 100;
+	return 0;
+}
 #endif
 
 /*
diff --git a/mm/page_io.c b/mm/page_io.c
index 88029948d00a..d4840ecbf8f9 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -52,14 +52,29 @@ static int end_swap_bio_write(struct bio *bio, unsigned int bytes_done, int err)
 	if (bio->bi_size)
 		return 1;
 
-	if (!uptodate)
+	if (!uptodate) {
 		SetPageError(page);
+		/*
+		 * We failed to write the page out to swap-space.
+		 * Re-dirty the page in order to avoid it being reclaimed.
+		 * Also print a dire warning that things will go BAD (tm)
+		 * very quickly.
+		 *
+		 * Also clear PG_reclaim to avoid rotate_reclaimable_page()
+		 */
+		set_page_dirty(page);
+		printk(KERN_ALERT "Write-error on swap-device (%u:%u:%Lu)\n",
+				imajor(bio->bi_bdev->bd_inode),
+				iminor(bio->bi_bdev->bd_inode),
+				(unsigned long long)bio->bi_sector);
+		ClearPageReclaim(page);
+	}
 	end_page_writeback(page);
 	bio_put(bio);
 	return 0;
 }
 
-static int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err)
+int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err)
 {
 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	struct page *page = bio->bi_io_vec[0].bv_page;
@@ -70,6 +85,10 @@ static int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err)
 	if (!uptodate) {
 		SetPageError(page);
 		ClearPageUptodate(page);
+		printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
+				imajor(bio->bi_bdev->bd_inode),
+				iminor(bio->bi_bdev->bd_inode),
+				(unsigned long long)bio->bi_sector);
 	} else {
 		SetPageUptodate(page);
 	}
@@ -137,10 +156,12 @@ out:
  * We use end_swap_bio_read() even for writes, because it happens to do what
  * we want.
  */
-int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page)
+int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page,
+			struct bio **bio_chain)
 {
 	struct bio *bio;
 	int ret = 0;
+	int bio_rw;
 
 	lock_page(page);
 
@@ -151,11 +172,22 @@ int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page)
 		goto out;
 	}
 
-	submit_bio(rw | (1 << BIO_RW_SYNC), bio);
-	wait_on_page_locked(page);
-
-	if (!PageUptodate(page) || PageError(page))
-		ret = -EIO;
+	bio_rw = rw;
+	if (!bio_chain)
+		bio_rw |= (1 << BIO_RW_SYNC);
+	if (bio_chain)
+		bio_get(bio);
+	submit_bio(bio_rw, bio);
+	if (bio_chain == NULL) {
+		wait_on_page_locked(page);
+
+		if (!PageUptodate(page) || PageError(page))
+			ret = -EIO;
+	}
+	if (bio_chain) {
+		bio->bi_private = *bio_chain;
+		*bio_chain = bio;
+	}
 out:
 	return ret;
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 40158b59729e..e2155d791d99 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -434,6 +434,71 @@ int page_referenced(struct page *page, int is_locked)
 	return referenced;
 }
 
+static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long address;
+	pte_t *pte, entry;
+	spinlock_t *ptl;
+	int ret = 0;
+
+	address = vma_address(page, vma);
+	if (address == -EFAULT)
+		goto out;
+
+	pte = page_check_address(page, mm, address, &ptl);
+	if (!pte)
+		goto out;
+
+	if (!pte_dirty(*pte) && !pte_write(*pte))
+		goto unlock;
+
+	entry = ptep_get_and_clear(mm, address, pte);
+	entry = pte_mkclean(entry);
+	entry = pte_wrprotect(entry);
+	ptep_establish(vma, address, pte, entry);
+	lazy_mmu_prot_update(entry);
+	ret = 1;
+
+unlock:
+	pte_unmap_unlock(pte, ptl);
+out:
+	return ret;
+}
+
+static int page_mkclean_file(struct address_space *mapping, struct page *page)
+{
+	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	struct vm_area_struct *vma;
+	struct prio_tree_iter iter;
+	int ret = 0;
+
+	BUG_ON(PageAnon(page));
+
+	spin_lock(&mapping->i_mmap_lock);
+	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+		if (vma->vm_flags & VM_SHARED)
+			ret += page_mkclean_one(page, vma);
+	}
+	spin_unlock(&mapping->i_mmap_lock);
+	return ret;
+}
+
+int page_mkclean(struct page *page)
+{
+	int ret = 0;
+
+	BUG_ON(!PageLocked(page));
+
+	if (page_mapped(page)) {
+		struct address_space *mapping = page_mapping(page);
+		if (mapping)
+			ret = page_mkclean_file(mapping, page);
+	}
+
+	return ret;
+}
+
 /**
  * page_set_anon_rmap - setup new anonymous rmap
  * @page:	the page to add the mapping to
diff --git a/mm/shmem.c b/mm/shmem.c
index db21c51531ca..8631be45b40d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -45,6 +45,7 @@
 #include <linux/namei.h>
 #include <linux/ctype.h>
 #include <linux/migrate.h>
+#include <linux/highmem.h>
 
 #include <asm/uaccess.h>
 #include <asm/div64.h>
diff --git a/mm/slab.c b/mm/slab.c
index 21ba06035700..7a48eb1a60c8 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -313,7 +313,7 @@ static int drain_freelist(struct kmem_cache *cache,
 			struct kmem_list3 *l3, int tofree);
 static void free_block(struct kmem_cache *cachep, void **objpp, int len,
 			int node);
-static void enable_cpucache(struct kmem_cache *cachep);
+static int enable_cpucache(struct kmem_cache *cachep);
 static void cache_reap(void *unused);
 
 /*
@@ -674,6 +674,8 @@ static struct kmem_cache cache_cache = {
 #endif
 };
 
+#define BAD_ALIEN_MAGIC 0x01020304ul
+
 #ifdef CONFIG_LOCKDEP
 
 /*
@@ -682,42 +684,58 @@ static struct kmem_cache cache_cache = {
  * The locking for this is tricky in that it nests within the locks
  * of all other slabs in a few places; to deal with this special
  * locking we put on-slab caches into a separate lock-class.
+ *
+ * We set lock class for alien array caches which are up during init.
+ * The lock annotation will be lost if all cpus of a node goes down and
+ * then comes back up during hotplug
  */
-static struct lock_class_key on_slab_key;
+static struct lock_class_key on_slab_l3_key;
+static struct lock_class_key on_slab_alc_key;
+
+static inline void init_lock_keys(void)
 
-static inline void init_lock_keys(struct cache_sizes *s)
 {
 	int q;
-
-	for (q = 0; q < MAX_NUMNODES; q++) {
-		if (!s->cs_cachep->nodelists[q] || OFF_SLAB(s->cs_cachep))
-			continue;
-		lockdep_set_class(&s->cs_cachep->nodelists[q]->list_lock,
-				  &on_slab_key);
+	struct cache_sizes *s = malloc_sizes;
+
+	while (s->cs_size != ULONG_MAX) {
+		for_each_node(q) {
+			struct array_cache **alc;
+			int r;
+			struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
+			if (!l3 || OFF_SLAB(s->cs_cachep))
+				continue;
+			lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
+			alc = l3->alien;
+			/*
+			 * FIXME: This check for BAD_ALIEN_MAGIC
+			 * should go away when common slab code is taught to
+			 * work even without alien caches.
+			 * Currently, non NUMA code returns BAD_ALIEN_MAGIC
+			 * for alloc_alien_cache,
+			 */
+			if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
+				continue;
+			for_each_node(r) {
+				if (alc[r])
+					lockdep_set_class(&alc[r]->lock,
+					     &on_slab_alc_key);
+			}
+		}
+		s++;
 	}
 }
-
 #else
-static inline void init_lock_keys(struct cache_sizes *s)
+static inline void init_lock_keys(void)
 {
 }
 #endif
 
-
-
 /* Guard access to the cache-chain. */
 static DEFINE_MUTEX(cache_chain_mutex);
 static struct list_head cache_chain;
 
 /*
- * vm_enough_memory() looks at this to determine how many slab-allocated pages
- * are possibly freeable under pressure
- *
- * SLAB_RECLAIM_ACCOUNT turns this on per-slab
- */
-atomic_t slab_reclaim_pages;
-
-/*
  * chicken and egg problem: delay the per-cpu array allocation
  * until the general caches are up.
  */
@@ -768,11 +786,10 @@ static inline struct kmem_cache *__find_general_cachep(size_t size,
 	return csizep->cs_cachep;
 }
 
-struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
+static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
 {
 	return __find_general_cachep(size, gfpflags);
 }
-EXPORT_SYMBOL(kmem_find_general_cachep);
 
 static size_t slab_mgmt_size(size_t nr_objs, size_t align)
 {
@@ -1092,7 +1109,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
 
 static inline struct array_cache **alloc_alien_cache(int node, int limit)
 {
-	return (struct array_cache **) 0x01020304ul;
+	return (struct array_cache **)BAD_ALIEN_MAGIC;
 }
 
 static inline void free_alien_cache(struct array_cache **ac_ptr)
@@ -1422,7 +1439,6 @@ void __init kmem_cache_init(void)
 					ARCH_KMALLOC_FLAGS|SLAB_PANIC,
 					NULL, NULL);
 		}
-		init_lock_keys(sizes);
 
 		sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
 					sizes->cs_size,
@@ -1491,10 +1507,15 @@ void __init kmem_cache_init(void)
 		struct kmem_cache *cachep;
 		mutex_lock(&cache_chain_mutex);
 		list_for_each_entry(cachep, &cache_chain, next)
-			enable_cpucache(cachep);
+			if (enable_cpucache(cachep))
+				BUG();
 		mutex_unlock(&cache_chain_mutex);
 	}
 
+	/* Annotate slab for lockdep -- annotate the malloc caches */
+	init_lock_keys();
+
+
 	/* Done! */
 	g_cpucache_up = FULL;
 
@@ -1551,8 +1572,11 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 
 	nr_pages = (1 << cachep->gfporder);
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
-		atomic_add(nr_pages, &slab_reclaim_pages);
-	add_zone_page_state(page_zone(page), NR_SLAB, nr_pages);
+		add_zone_page_state(page_zone(page),
+			NR_SLAB_RECLAIMABLE, nr_pages);
+	else
+		add_zone_page_state(page_zone(page),
+			NR_SLAB_UNRECLAIMABLE, nr_pages);
 	for (i = 0; i < nr_pages; i++)
 		__SetPageSlab(page + i);
 	return page_address(page);
@@ -1567,7 +1591,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
 	struct page *page = virt_to_page(addr);
 	const unsigned long nr_freed = i;
 
-	sub_zone_page_state(page_zone(page), NR_SLAB, nr_freed);
+	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
+		sub_zone_page_state(page_zone(page),
+				NR_SLAB_RECLAIMABLE, nr_freed);
+	else
+		sub_zone_page_state(page_zone(page),
+				NR_SLAB_UNRECLAIMABLE, nr_freed);
 	while (i--) {
 		BUG_ON(!PageSlab(page));
 		__ClearPageSlab(page);
@@ -1576,8 +1605,6 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += nr_freed;
 	free_pages((unsigned long)addr, cachep->gfporder);
-	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
-		atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages);
 }
 
 static void kmem_rcu_free(struct rcu_head *head)
@@ -1834,6 +1861,27 @@ static void set_up_list3s(struct kmem_cache *cachep, int index)
 	}
 }
 
+static void __kmem_cache_destroy(struct kmem_cache *cachep)
+{
+	int i;
+	struct kmem_list3 *l3;
+
+	for_each_online_cpu(i)
+	    kfree(cachep->array[i]);
+
+	/* NUMA: free the list3 structures */
+	for_each_online_node(i) {
+		l3 = cachep->nodelists[i];
+		if (l3) {
+			kfree(l3->shared);
+			free_alien_cache(l3->alien);
+			kfree(l3);
+		}
+	}
+	kmem_cache_free(&cache_cache, cachep);
+}
+
+
 /**
  * calculate_slab_order - calculate size (page order) of slabs
  * @cachep: pointer to the cache that is being created
@@ -1904,12 +1952,11 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
 	return left_over;
 }
 
-static void setup_cpu_cache(struct kmem_cache *cachep)
+static int setup_cpu_cache(struct kmem_cache *cachep)
 {
-	if (g_cpucache_up == FULL) {
-		enable_cpucache(cachep);
-		return;
-	}
+	if (g_cpucache_up == FULL)
+		return enable_cpucache(cachep);
+
 	if (g_cpucache_up == NONE) {
 		/*
 		 * Note: the first kmem_cache_create must create the cache
@@ -1956,6 +2003,7 @@ static void setup_cpu_cache(struct kmem_cache *cachep)
 	cpu_cache_get(cachep)->touched = 0;
 	cachep->batchcount = 1;
 	cachep->limit = BOOT_CPUCACHE_ENTRIES;
+	return 0;
 }
 
 /**
@@ -2097,6 +2145,15 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 	} else {
 		ralign = BYTES_PER_WORD;
 	}
+
+	/*
+	 * Redzoning and user store require word alignment. Note this will be
+	 * overridden by architecture or caller mandated alignment if either
+	 * is greater than BYTES_PER_WORD.
+	 */
+	if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER)
+		ralign = BYTES_PER_WORD;
+
 	/* 2) arch mandated alignment: disables debug if necessary */
 	if (ralign < ARCH_SLAB_MINALIGN) {
 		ralign = ARCH_SLAB_MINALIGN;
@@ -2110,8 +2167,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 			flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
 	}
 	/*
-	 * 4) Store it. Note that the debug code below can reduce
-	 *    the alignment to BYTES_PER_WORD.
+	 * 4) Store it.
 	 */
 	align = ralign;
 
@@ -2123,20 +2179,19 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 #if DEBUG
 	cachep->obj_size = size;
 
+	/*
+	 * Both debugging options require word-alignment which is calculated
+	 * into align above.
+	 */
 	if (flags & SLAB_RED_ZONE) {
-		/* redzoning only works with word aligned caches */
-		align = BYTES_PER_WORD;
-
 		/* add space for red zone words */
 		cachep->obj_offset += BYTES_PER_WORD;
 		size += 2 * BYTES_PER_WORD;
 	}
 	if (flags & SLAB_STORE_USER) {
-		/* user store requires word alignment and
-		 * one word storage behind the end of the real
-		 * object.
+		/* user store requires one word storage behind the end of
+		 * the real object.
 		 */
-		align = BYTES_PER_WORD;
 		size += BYTES_PER_WORD;
 	}
 #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
@@ -2200,14 +2255,26 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 		cachep->gfpflags |= GFP_DMA;
 	cachep->buffer_size = size;
 
-	if (flags & CFLGS_OFF_SLAB)
+	if (flags & CFLGS_OFF_SLAB) {
 		cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
+		/*
+		 * This is a possibility for one of the malloc_sizes caches.
+		 * But since we go off slab only for object size greater than
+		 * PAGE_SIZE/8, and malloc_sizes gets created in ascending order,
+		 * this should not happen at all.
+		 * But leave a BUG_ON for some lucky dude.
+		 */
+		BUG_ON(!cachep->slabp_cache);
+	}
 	cachep->ctor = ctor;
 	cachep->dtor = dtor;
 	cachep->name = name;
 
-
-	setup_cpu_cache(cachep);
+	if (setup_cpu_cache(cachep)) {
+		__kmem_cache_destroy(cachep);
+		cachep = NULL;
+		goto oops;
+	}
 
 	/* cache setup completed, link it into the list */
 	list_add(&cachep->next, &cache_chain);
@@ -2389,9 +2456,6 @@ EXPORT_SYMBOL(kmem_cache_shrink);
  */
 int kmem_cache_destroy(struct kmem_cache *cachep)
 {
-	int i;
-	struct kmem_list3 *l3;
-
 	BUG_ON(!cachep || in_interrupt());
 
 	/* Don't let CPUs to come and go */
@@ -2417,25 +2481,23 @@ int kmem_cache_destroy(struct kmem_cache *cachep)
 	if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
 		synchronize_rcu();
 
-	for_each_online_cpu(i)
-	    kfree(cachep->array[i]);
-
-	/* NUMA: free the list3 structures */
-	for_each_online_node(i) {
-		l3 = cachep->nodelists[i];
-		if (l3) {
-			kfree(l3->shared);
-			free_alien_cache(l3->alien);
-			kfree(l3);
-		}
-	}
-	kmem_cache_free(&cache_cache, cachep);
+	__kmem_cache_destroy(cachep);
 	unlock_cpu_hotplug();
 	return 0;
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
 
-/* Get the memory for a slab management obj. */
+/*
+ * Get the memory for a slab management obj.
+ * For a slab cache when the slab descriptor is off-slab, slab descriptors
+ * always come from malloc_sizes caches.  The slab descriptor cannot
+ * come from the same cache which is getting created because,
+ * when we are searching for an appropriate cache for these
+ * descriptors in kmem_cache_create, we search through the malloc_sizes array.
+ * If we are creating a malloc_sizes cache here it would not be visible to
+ * kmem_find_general_cachep till the initialization is complete.
+ * Hence we cannot have slabp_cache same as the original cache.
+ */
 static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
 				   int colour_off, gfp_t local_flags,
 				   int nodeid)
@@ -3119,6 +3181,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
 		if (slabp->inuse == 0) {
 			if (l3->free_objects > l3->free_limit) {
 				l3->free_objects -= cachep->num;
+				/* No need to drop any previously held
+				 * lock here, even if we have a off-slab slab
+				 * descriptor it is guaranteed to come from
+				 * a different cache, refer to comments before
+				 * alloc_slabmgmt.
+				 */
 				slab_destroy(cachep, slabp);
 			} else {
 				list_add(&slabp->list, &l3->slabs_free);
@@ -3317,7 +3385,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 
-void *kmalloc_node(size_t size, gfp_t flags, int node)
+void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	struct kmem_cache *cachep;
 
@@ -3326,7 +3394,7 @@ void *kmalloc_node(size_t size, gfp_t flags, int node)
 		return NULL;
 	return kmem_cache_alloc_node(cachep, flags, node);
 }
-EXPORT_SYMBOL(kmalloc_node);
+EXPORT_SYMBOL(__kmalloc_node);
 #endif
 
 /**
@@ -3370,55 +3438,6 @@ void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
 EXPORT_SYMBOL(__kmalloc_track_caller);
 #endif
 
-#ifdef CONFIG_SMP
-/**
- * __alloc_percpu - allocate one copy of the object for every present
- * cpu in the system, zeroing them.
- * Objects should be dereferenced using the per_cpu_ptr macro only.
- *
- * @size: how many bytes of memory are required.
- */
-void *__alloc_percpu(size_t size)
-{
-	int i;
-	struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
-
-	if (!pdata)
-		return NULL;
-
-	/*
-	 * Cannot use for_each_online_cpu since a cpu may come online
-	 * and we have no way of figuring out how to fix the array
-	 * that we have allocated then....
-	 */
-	for_each_possible_cpu(i) {
-		int node = cpu_to_node(i);
-
-		if (node_online(node))
-			pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node);
-		else
-			pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
-
-		if (!pdata->ptrs[i])
-			goto unwind_oom;
-		memset(pdata->ptrs[i], 0, size);
-	}
-
-	/* Catch derefs w/o wrappers */
-	return (void *)(~(unsigned long)pdata);
-
-unwind_oom:
-	while (--i >= 0) {
-		if (!cpu_possible(i))
-			continue;
-		kfree(pdata->ptrs[i]);
-	}
-	kfree(pdata);
-	return NULL;
-}
-EXPORT_SYMBOL(__alloc_percpu);
-#endif
-
 /**
  * kmem_cache_free - Deallocate an object
  * @cachep: The cache the allocation was from.
@@ -3464,29 +3483,6 @@ void kfree(const void *objp)
 }
 EXPORT_SYMBOL(kfree);
 
-#ifdef CONFIG_SMP
-/**
- * free_percpu - free previously allocated percpu memory
- * @objp: pointer returned by alloc_percpu.
- *
- * Don't free memory not originally allocated by alloc_percpu()
- * The complemented objp is to check for that.
- */
-void free_percpu(const void *objp)
-{
-	int i;
-	struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp);
-
-	/*
-	 * We allocate for all cpus so we cannot use for online cpu here.
-	 */
-	for_each_possible_cpu(i)
-	    kfree(p->ptrs[i]);
-	kfree(p);
-}
-EXPORT_SYMBOL(free_percpu);
-#endif
-
 unsigned int kmem_cache_size(struct kmem_cache *cachep)
 {
 	return obj_size(cachep);
@@ -3603,22 +3599,26 @@ static void do_ccupdate_local(void *info)
 static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
 				int batchcount, int shared)
 {
-	struct ccupdate_struct new;
-	int i, err;
+	struct ccupdate_struct *new;
+	int i;
+
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
 
-	memset(&new.new, 0, sizeof(new.new));
 	for_each_online_cpu(i) {
-		new.new[i] = alloc_arraycache(cpu_to_node(i), limit,
+		new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
 						batchcount);
-		if (!new.new[i]) {
+		if (!new->new[i]) {
 			for (i--; i >= 0; i--)
-				kfree(new.new[i]);
+				kfree(new->new[i]);
+			kfree(new);
 			return -ENOMEM;
 		}
 	}
-	new.cachep = cachep;
+	new->cachep = cachep;
 
-	on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1);
+	on_each_cpu(do_ccupdate_local, (void *)new, 1, 1);
 
 	check_irq_on();
 	cachep->batchcount = batchcount;
@@ -3626,7 +3626,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
 	cachep->shared = shared;
 
 	for_each_online_cpu(i) {
-		struct array_cache *ccold = new.new[i];
+		struct array_cache *ccold = new->new[i];
 		if (!ccold)
 			continue;
 		spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
@@ -3634,18 +3634,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
 		spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
 		kfree(ccold);
 	}
-
-	err = alloc_kmemlist(cachep);
-	if (err) {
-		printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n",
-		       cachep->name, -err);
-		BUG();
-	}
-	return 0;
+	kfree(new);
+	return alloc_kmemlist(cachep);
 }
 
 /* Called with cache_chain_mutex held always */
-static void enable_cpucache(struct kmem_cache *cachep)
+static int enable_cpucache(struct kmem_cache *cachep)
 {
 	int err;
 	int limit, shared;
@@ -3697,6 +3691,7 @@ static void enable_cpucache(struct kmem_cache *cachep)
 	if (err)
 		printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
 		       cachep->name, -err);
+	return err;
 }
 
 /*
@@ -4157,6 +4152,7 @@ static int leaks_show(struct seq_file *m, void *p)
 		show_symbol(m, n[2*i+2]);
 		seq_putc(m, '\n');
 	}
+
 	return 0;
 }
 
diff --git a/mm/slob.c b/mm/slob.c
index 7b52b20b9607..20188627347c 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -339,52 +339,3 @@ void kmem_cache_init(void)
 
 	mod_timer(&slob_timer, jiffies + HZ);
 }
-
-atomic_t slab_reclaim_pages = ATOMIC_INIT(0);
-EXPORT_SYMBOL(slab_reclaim_pages);
-
-#ifdef CONFIG_SMP
-
-void *__alloc_percpu(size_t size)
-{
-	int i;
-	struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL);
-
-	if (!pdata)
-		return NULL;
-
-	for_each_possible_cpu(i) {
-		pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
-		if (!pdata->ptrs[i])
-			goto unwind_oom;
-		memset(pdata->ptrs[i], 0, size);
-	}
-
-	/* Catch derefs w/o wrappers */
-	return (void *) (~(unsigned long) pdata);
-
-unwind_oom:
-	while (--i >= 0) {
-		if (!cpu_possible(i))
-			continue;
-		kfree(pdata->ptrs[i]);
-	}
-	kfree(pdata);
-	return NULL;
-}
-EXPORT_SYMBOL(__alloc_percpu);
-
-void
-free_percpu(const void *objp)
-{
-	int i;
-	struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp);
-
-	for_each_possible_cpu(i)
-		kfree(p->ptrs[i]);
-
-	kfree(p);
-}
-EXPORT_SYMBOL(free_percpu);
-
-#endif
diff --git a/mm/swap.c b/mm/swap.c
index 687686a61f7c..2e0e871f542f 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -34,6 +34,25 @@
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 
+/*
+ * This path almost never happens for VM activity - pages are normally
+ * freed via pagevecs.  But it gets used by networking.
+ */
+static void fastcall __page_cache_release(struct page *page)
+{
+	if (PageLRU(page)) {
+		unsigned long flags;
+		struct zone *zone = page_zone(page);
+
+		spin_lock_irqsave(&zone->lru_lock, flags);
+		VM_BUG_ON(!PageLRU(page));
+		__ClearPageLRU(page);
+		del_page_from_lru(zone, page);
+		spin_unlock_irqrestore(&zone->lru_lock, flags);
+	}
+	free_hot_page(page);
+}
+
 static void put_compound_page(struct page *page)
 {
 	page = (struct page *)page_private(page);
@@ -223,26 +242,6 @@ int lru_add_drain_all(void)
 #endif
 
 /*
- * This path almost never happens for VM activity - pages are normally
- * freed via pagevecs.  But it gets used by networking.
- */
-void fastcall __page_cache_release(struct page *page)
-{
-	if (PageLRU(page)) {
-		unsigned long flags;
-		struct zone *zone = page_zone(page);
-
-		spin_lock_irqsave(&zone->lru_lock, flags);
-		BUG_ON(!PageLRU(page));
-		__ClearPageLRU(page);
-		del_page_from_lru(zone, page);
-		spin_unlock_irqrestore(&zone->lru_lock, flags);
-	}
-	free_hot_page(page);
-}
-EXPORT_SYMBOL(__page_cache_release);
-
-/*
  * Batched page_cache_release().  Decrement the reference count on all the
  * passed pages.  If it fell to zero then remove the page from the LRU and
  * free it.
@@ -284,7 +283,7 @@ void release_pages(struct page **pages, int nr, int cold)
 				zone = pagezone;
 				spin_lock_irq(&zone->lru_lock);
 			}
-			BUG_ON(!PageLRU(page));
+			VM_BUG_ON(!PageLRU(page));
 			__ClearPageLRU(page);
 			del_page_from_lru(zone, page);
 		}
@@ -337,7 +336,7 @@ void __pagevec_release_nonlru(struct pagevec *pvec)
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
 
-		BUG_ON(PageLRU(page));
+		VM_BUG_ON(PageLRU(page));
 		if (put_page_testzero(page))
 			pagevec_add(&pages_to_free, page);
 	}
@@ -364,7 +363,7 @@ void __pagevec_lru_add(struct pagevec *pvec)
 			zone = pagezone;
 			spin_lock_irq(&zone->lru_lock);
 		}
-		BUG_ON(PageLRU(page));
+		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
 		add_page_to_inactive_list(zone, page);
 	}
@@ -391,9 +390,9 @@ void __pagevec_lru_add_active(struct pagevec *pvec)
 			zone = pagezone;
 			spin_lock_irq(&zone->lru_lock);
 		}
-		BUG_ON(PageLRU(page));
+		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
-		BUG_ON(PageActive(page));
+		VM_BUG_ON(PageActive(page));
 		SetPageActive(page);
 		add_page_to_active_list(zone, page);
 	}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 266162d2ba28..9aad8b0cc6ee 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -24,6 +24,9 @@
 DEFINE_RWLOCK(vmlist_lock);
 struct vm_struct *vmlist;
 
+static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
+			    int node);
+
 static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
 {
 	pte_t *pte;
@@ -478,8 +481,8 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
  *	allocator with @gfp_mask flags.  Map them into contiguous
  *	kernel virtual space, using a pagetable protection of @prot.
  */
-void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
-			int node)
+static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
+			    int node)
 {
 	struct vm_struct *area;
 
@@ -493,7 +496,6 @@ void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
 
 	return __vmalloc_area_node(area, gfp_mask, prot, node);
 }
-EXPORT_SYMBOL(__vmalloc_node);
 
 void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5d4c4d02254d..87779dda4ec6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -62,6 +62,8 @@ struct scan_control {
 	int swap_cluster_max;
 
 	int swappiness;
+
+	int all_unreclaimable;
 };
 
 /*
@@ -377,8 +379,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
 
 int remove_mapping(struct address_space *mapping, struct page *page)
 {
-	if (!mapping)
-		return 0;		/* truncate got there first */
+	BUG_ON(!PageLocked(page));
+	BUG_ON(mapping != page_mapping(page));
 
 	write_lock_irq(&mapping->tree_lock);
 
@@ -440,7 +442,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		if (TestSetPageLocked(page))
 			goto keep;
 
-		BUG_ON(PageActive(page));
+		VM_BUG_ON(PageActive(page));
 
 		sc->nr_scanned++;
 
@@ -547,7 +549,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 				goto free_it;
 		}
 
-		if (!remove_mapping(mapping, page))
+		if (!mapping || !remove_mapping(mapping, page))
 			goto keep_locked;
 
 free_it:
@@ -564,7 +566,7 @@ keep_locked:
 		unlock_page(page);
 keep:
 		list_add(&page->lru, &ret_pages);
-		BUG_ON(PageLRU(page));
+		VM_BUG_ON(PageLRU(page));
 	}
 	list_splice(&ret_pages, page_list);
 	if (pagevec_count(&freed_pvec))
@@ -603,7 +605,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 		page = lru_to_page(src);
 		prefetchw_prev_lru_page(page, src, flags);
 
-		BUG_ON(!PageLRU(page));
+		VM_BUG_ON(!PageLRU(page));
 
 		list_del(&page->lru);
 		target = src;
@@ -674,7 +676,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		 */
 		while (!list_empty(&page_list)) {
 			page = lru_to_page(&page_list);
-			BUG_ON(PageLRU(page));
+			VM_BUG_ON(PageLRU(page));
 			SetPageLRU(page);
 			list_del(&page->lru);
 			if (PageActive(page))
@@ -695,6 +697,11 @@ done:
 	return nr_reclaimed;
 }
 
+static inline int zone_is_near_oom(struct zone *zone)
+{
+	return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3;
+}
+
 /*
  * This moves pages from the active list to the inactive list.
  *
@@ -730,6 +737,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		long distress;
 		long swap_tendency;
 
+		if (zone_is_near_oom(zone))
+			goto force_reclaim_mapped;
+
 		/*
 		 * `distress' is a measure of how much trouble we're having
 		 * reclaiming pages.  0 -> no problems.  100 -> great trouble.
@@ -765,6 +775,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		 * memory onto the inactive list.
 		 */
 		if (swap_tendency >= 100)
+force_reclaim_mapped:
 			reclaim_mapped = 1;
 	}
 
@@ -797,9 +808,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	while (!list_empty(&l_inactive)) {
 		page = lru_to_page(&l_inactive);
 		prefetchw_prev_lru_page(page, &l_inactive, flags);
-		BUG_ON(PageLRU(page));
+		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
-		BUG_ON(!PageActive(page));
+		VM_BUG_ON(!PageActive(page));
 		ClearPageActive(page);
 
 		list_move(&page->lru, &zone->inactive_list);
@@ -827,9 +838,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 	while (!list_empty(&l_active)) {
 		page = lru_to_page(&l_active);
 		prefetchw_prev_lru_page(page, &l_active, flags);
-		BUG_ON(PageLRU(page));
+		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
-		BUG_ON(!PageActive(page));
+		VM_BUG_ON(!PageActive(page));
 		list_move(&page->lru, &zone->active_list);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
@@ -925,6 +936,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
 	unsigned long nr_reclaimed = 0;
 	int i;
 
+	sc->all_unreclaimable = 1;
 	for (i = 0; zones[i] != NULL; i++) {
 		struct zone *zone = zones[i];
 
@@ -941,6 +953,8 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
 		if (zone->all_unreclaimable && priority != DEF_PRIORITY)
 			continue;	/* Let kswapd poll it */
 
+		sc->all_unreclaimable = 0;
+
 		nr_reclaimed += shrink_zone(priority, zone, sc);
 	}
 	return nr_reclaimed;
@@ -1021,6 +1035,9 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
 		if (sc.nr_scanned && priority < DEF_PRIORITY - 2)
 			blk_congestion_wait(WRITE, HZ/10);
 	}
+	/* top priority shrink_caches still had more to do? don't OOM, then */
+	if (!sc.all_unreclaimable)
+		ret = 1;
 out:
 	for (i = 0; zones[i] != 0; i++) {
 		struct zone *zone = zones[i];
@@ -1153,7 +1170,7 @@ scan:
 			if (zone->all_unreclaimable)
 				continue;
 			if (nr_slab == 0 && zone->pages_scanned >=
-				    (zone->nr_active + zone->nr_inactive) * 4)
+				    (zone->nr_active + zone->nr_inactive) * 6)
 				zone->all_unreclaimable = 1;
 			/*
 			 * If we've done a decent amount of scanning and
@@ -1361,7 +1378,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 	for_each_zone(zone)
 		lru_pages += zone->nr_active + zone->nr_inactive;
 
-	nr_slab = global_page_state(NR_SLAB);
+	nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
 	/* If slab caches are huge, it's better to hit them first */
 	while (nr_slab >= lru_pages) {
 		reclaim_state.reclaimed_slab = 0;
@@ -1510,7 +1527,6 @@ int zone_reclaim_mode __read_mostly;
 #define RECLAIM_ZONE (1<<0)	/* Run shrink_cache on the zone */
 #define RECLAIM_WRITE (1<<1)	/* Writeout pages during reclaim */
 #define RECLAIM_SWAP (1<<2)	/* Swap pages out during reclaim */
-#define RECLAIM_SLAB (1<<3)	/* Do a global slab shrink if the zone is out of memory */
 
 /*
  * Priority for ZONE_RECLAIM. This determines the fraction of pages
@@ -1526,6 +1542,12 @@ int zone_reclaim_mode __read_mostly;
 int sysctl_min_unmapped_ratio = 1;
 
 /*
+ * If the number of slab pages in a zone grows beyond this percentage then
+ * slab reclaim needs to occur.
+ */
+int sysctl_min_slab_ratio = 5;
+
+/*
  * Try to free up some pages from this zone through reclaim.
  */
 static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
@@ -1544,6 +1566,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 		.gfp_mask = gfp_mask,
 		.swappiness = vm_swappiness,
 	};
+	unsigned long slab_reclaimable;
 
 	disable_swap_token();
 	cond_resched();
@@ -1556,29 +1579,43 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
-	/*
-	 * Free memory by calling shrink zone with increasing priorities
-	 * until we have enough memory freed.
-	 */
-	priority = ZONE_RECLAIM_PRIORITY;
-	do {
-		nr_reclaimed += shrink_zone(priority, zone, &sc);
-		priority--;
-	} while (priority >= 0 && nr_reclaimed < nr_pages);
+	if (zone_page_state(zone, NR_FILE_PAGES) -
+		zone_page_state(zone, NR_FILE_MAPPED) >
+		zone->min_unmapped_pages) {
+		/*
+		 * Free memory by calling shrink zone with increasing
+		 * priorities until we have enough memory freed.
+		 */
+		priority = ZONE_RECLAIM_PRIORITY;
+		do {
+			nr_reclaimed += shrink_zone(priority, zone, &sc);
+			priority--;
+		} while (priority >= 0 && nr_reclaimed < nr_pages);
+	}
 
-	if (nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) {
+	slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
+	if (slab_reclaimable > zone->min_slab_pages) {
 		/*
 		 * shrink_slab() does not currently allow us to determine how
-		 * many pages were freed in this zone. So we just shake the slab
-		 * a bit and then go off node for this particular allocation
-		 * despite possibly having freed enough memory to allocate in
-		 * this zone.  If we freed local memory then the next
-		 * allocations will be local again.
+		 * many pages were freed in this zone. So we take the current
+		 * number of slab pages and shake the slab until it is reduced
+		 * by the same nr_pages that we used for reclaiming unmapped
+		 * pages.
 		 *
-		 * shrink_slab will free memory on all zones and may take
-		 * a long time.
+		 * Note that shrink_slab will free memory on all zones and may
+		 * take a long time.
+		 */
+		while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
+			zone_page_state(zone, NR_SLAB_RECLAIMABLE) >
+				slab_reclaimable - nr_pages)
+			;
+
+		/*
+		 * Update nr_reclaimed by the number of slab pages we
+		 * reclaimed from this zone.
 		 */
-		shrink_slab(sc.nr_scanned, gfp_mask, order);
+		nr_reclaimed += slab_reclaimable -
+			zone_page_state(zone, NR_SLAB_RECLAIMABLE);
 	}
 
 	p->reclaim_state = NULL;
@@ -1592,7 +1629,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	int node_id;
 
 	/*
-	 * Zone reclaim reclaims unmapped file backed pages.
+	 * Zone reclaim reclaims unmapped file backed pages and
+	 * slab pages if we are over the defined limits.
 	 *
 	 * A small portion of unmapped file backed pages is needed for
 	 * file I/O otherwise pages read by file I/O will be immediately
@@ -1601,7 +1639,9 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	 * unmapped file backed pages.
 	 */
 	if (zone_page_state(zone, NR_FILE_PAGES) -
-	    zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio)
+	    zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages
+	    && zone_page_state(zone, NR_SLAB_RECLAIMABLE)
+			<= zone->min_slab_pages)
 		return 0;
 
 	/*
@@ -1621,7 +1661,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	 * over remote processors and spread off node memory allocations
 	 * as wide as possible.
 	 */
-	node_id = zone->zone_pgdat->node_id;
+	node_id = zone_to_nid(zone);
 	mask = node_to_cpumask(node_id);
 	if (!cpus_empty(mask) && node_id != numa_node_id())
 		return 0;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c1b5f4106b38..490d8c1a0ded 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -321,6 +321,9 @@ void refresh_cpu_vm_stats(int cpu)
 	for_each_zone(zone) {
 		struct per_cpu_pageset *pcp;
 
+		if (!populated_zone(zone))
+			continue;
+
 		pcp = zone_pcp(zone, cpu);
 
 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
@@ -435,12 +438,28 @@ struct seq_operations fragmentation_op = {
 	.show	= frag_show,
 };
 
+#ifdef CONFIG_ZONE_DMA32
+#define TEXT_FOR_DMA32(xx) xx "_dma32",
+#else
+#define TEXT_FOR_DMA32(xx)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define TEXT_FOR_HIGHMEM(xx) xx "_high",
+#else
+#define TEXT_FOR_HIGHMEM(xx)
+#endif
+
+#define TEXTS_FOR_ZONES(xx) xx "_dma", TEXT_FOR_DMA32(xx) xx "_normal", \
+					TEXT_FOR_HIGHMEM(xx)
+
 static char *vmstat_text[] = {
 	/* Zoned VM counters */
 	"nr_anon_pages",
 	"nr_mapped",
 	"nr_file_pages",
-	"nr_slab",
+	"nr_slab_reclaimable",
+	"nr_slab_unreclaimable",
 	"nr_page_table_pages",
 	"nr_dirty",
 	"nr_writeback",
@@ -462,10 +481,7 @@ static char *vmstat_text[] = {
 	"pswpin",
 	"pswpout",
 
-	"pgalloc_dma",
-	"pgalloc_dma32",
-	"pgalloc_normal",
-	"pgalloc_high",
+	TEXTS_FOR_ZONES("pgalloc")
 
 	"pgfree",
 	"pgactivate",
@@ -474,25 +490,10 @@ static char *vmstat_text[] = {
 	"pgfault",
 	"pgmajfault",
 
-	"pgrefill_dma",
-	"pgrefill_dma32",
-	"pgrefill_normal",
-	"pgrefill_high",
-
-	"pgsteal_dma",
-	"pgsteal_dma32",
-	"pgsteal_normal",
-	"pgsteal_high",
-
-	"pgscan_kswapd_dma",
-	"pgscan_kswapd_dma32",
-	"pgscan_kswapd_normal",
-	"pgscan_kswapd_high",
-
-	"pgscan_direct_dma",
-	"pgscan_direct_dma32",
-	"pgscan_direct_normal",
-	"pgscan_direct_high",
+	TEXTS_FOR_ZONES("pgrefill")
+	TEXTS_FOR_ZONES("pgsteal")
+	TEXTS_FOR_ZONES("pgscan_kswapd")
+	TEXTS_FOR_ZONES("pgscan_direct")
 
 	"pginodesteal",
 	"slabs_scanned",
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 859e3359fcda..e2a095d0fd80 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -40,6 +40,22 @@ config IP_DCCP_DEBUG
 
 	  Just say N.
 
+config NET_DCCPPROBE
+	tristate "DCCP connection probing"
+	depends on PROC_FS && KPROBES
+	---help---
+	This module allows for capturing the changes to DCCP connection
+	state in response to incoming packets. It is used for debugging
+	DCCP congestion avoidance modules. If you don't understand
+	what was just said, you don't need it: say N.
+
+	Documentation on how to use the packet generator can be found
+	at http://linux-net.osdl.org/index.php/DccpProbe
+
+	To compile this code as a module, choose M here: the
+	module will be called dccp_probe.
+
+
 endmenu
 
 endmenu
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 7696e219b05d..17ed99c46617 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -11,9 +11,11 @@ dccp_ipv4-y := ipv4.o
 dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
 
 obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
+obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o
 
 dccp-$(CONFIG_SYSCTL) += sysctl.o
 
 dccp_diag-y := diag.o
+dccp_probe-y := probe.o
 
 obj-y += ccids/
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 457dd3db7f41..2efb505aeb35 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -808,7 +808,7 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 }
 
 static struct ccid_operations ccid2 = {
-	.ccid_id		= 2,
+	.ccid_id		= DCCPC_CCID2,
 	.ccid_name		= "ccid2",
 	.ccid_owner		= THIS_MODULE,
 	.ccid_hc_tx_obj_size	= sizeof(struct ccid2_hc_tx_sock),
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 195aa9566228..67d2dc0e7c67 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -1240,7 +1240,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
 }
 
 static struct ccid_operations ccid3 = {
-	.ccid_id		   = 3,
+	.ccid_id		   = DCCPC_CCID3,
 	.ccid_name		   = "ccid3",
 	.ccid_owner		   = THIS_MODULE,
 	.ccid_hc_tx_obj_size	   = sizeof(struct ccid3_hc_tx_sock),
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 9a1a76a7dc41..66be29b6f508 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -56,9 +56,6 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	dp->dccps_role = DCCP_ROLE_CLIENT;
 
-	if (dccp_service_not_initialized(sk))
-		return -EPROTO;
-
 	if (addr_len < sizeof(struct sockaddr_in))
 		return -EINVAL;
 
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
new file mode 100644
index 000000000000..146496fce2e2
--- /dev/null
+++ b/net/dccp/probe.c
@@ -0,0 +1,198 @@
+/*
+ * dccp_probe - Observe the DCCP flow with kprobes.
+ *
+ * The idea for this came from Werner Almesberger's umlsim
+ * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
+ *
+ * Modified for DCCP from Stephen Hemminger's code
+ * Copyright (C) 2006, Ian McDonald <ian.mcdonald@jandi.co.nz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/socket.h>
+#include <linux/dccp.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/kfifo.h>
+#include <linux/vmalloc.h>
+
+#include "dccp.h"
+#include "ccid.h"
+#include "ccids/ccid3.h"
+
+static int port;
+
+static int bufsize = 64 * 1024;
+
+static const char procname[] = "dccpprobe";
+
+struct {
+	struct kfifo	  *fifo;
+	spinlock_t	  lock;
+	wait_queue_head_t wait;
+	struct timeval	  tstart;
+} dccpw;
+
+static void printl(const char *fmt, ...)
+{
+	va_list args;
+	int len;
+	struct timeval now;
+	char tbuf[256];
+
+	va_start(args, fmt);
+	do_gettimeofday(&now);
+
+	now.tv_sec -= dccpw.tstart.tv_sec;
+	now.tv_usec -= dccpw.tstart.tv_usec;
+	if (now.tv_usec < 0) {
+		--now.tv_sec;
+		now.tv_usec += 1000000;
+	}
+
+	len = sprintf(tbuf, "%lu.%06lu ",
+		      (unsigned long) now.tv_sec,
+		      (unsigned long) now.tv_usec);
+	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
+	va_end(args);
+
+	kfifo_put(dccpw.fifo, tbuf, len);
+	wake_up(&dccpw.wait);
+}
+
+static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
+			 struct msghdr *msg, size_t size)
+{
+	const struct dccp_minisock *dmsk = dccp_msk(sk);
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct ccid3_hc_tx_sock *hctx;
+
+	if (dmsk->dccpms_tx_ccid == DCCPC_CCID3)
+		hctx = ccid3_hc_tx_sk(sk);
+	else
+		hctx = NULL;
+
+	if (port == 0 || ntohs(inet->dport) == port ||
+	    ntohs(inet->sport) == port) {
+		if (hctx)
+			printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %d\n",
+			   NIPQUAD(inet->saddr), ntohs(inet->sport),
+			   NIPQUAD(inet->daddr), ntohs(inet->dport), size,
+			   hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
+			   hctx->ccid3hctx_p, hctx->ccid3hctx_t_ipi);
+		else
+			printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
+			   NIPQUAD(inet->saddr), ntohs(inet->sport),
+			   NIPQUAD(inet->daddr), ntohs(inet->dport), size);
+	}
+
+	jprobe_return();
+	return 0;
+}
+
+static struct jprobe dccp_send_probe = {
+	.kp	= { .addr = (kprobe_opcode_t *)&dccp_sendmsg, },
+	.entry	= (kprobe_opcode_t *)&jdccp_sendmsg,
+};
+
+static int dccpprobe_open(struct inode *inode, struct file *file)
+{
+	kfifo_reset(dccpw.fifo);
+	do_gettimeofday(&dccpw.tstart);
+	return 0;
+}
+
+static ssize_t dccpprobe_read(struct file *file, char __user *buf,
+			      size_t len, loff_t *ppos)
+{
+	int error = 0, cnt = 0;
+	unsigned char *tbuf;
+
+	if (!buf || len < 0)
+		return -EINVAL;
+
+	if (len == 0)
+		return 0;
+
+	tbuf = vmalloc(len);
+	if (!tbuf)
+		return -ENOMEM;
+
+	error = wait_event_interruptible(dccpw.wait,
+					 __kfifo_len(dccpw.fifo) != 0);
+	if (error)
+		goto out_free;
+
+	cnt = kfifo_get(dccpw.fifo, tbuf, len);
+	error = copy_to_user(buf, tbuf, cnt);
+
+out_free:
+	vfree(tbuf);
+
+	return error ? error : cnt;
+}
+
+static struct file_operations dccpprobe_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = dccpprobe_open,
+	.read    = dccpprobe_read,
+};
+
+static __init int dccpprobe_init(void)
+{
+	int ret = -ENOMEM;
+
+	init_waitqueue_head(&dccpw.wait);
+	spin_lock_init(&dccpw.lock);
+	dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock);
+
+	if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops))
+		goto err0;
+
+	ret = register_jprobe(&dccp_send_probe);
+	if (ret)
+		goto err1;
+
+	pr_info("DCCP watch registered (port=%d)\n", port);
+	return 0;
+err1:
+	proc_net_remove(procname);
+err0:
+	kfifo_free(dccpw.fifo);
+	return ret;
+}
+module_init(dccpprobe_init);
+
+static __exit void dccpprobe_exit(void)
+{
+	kfifo_free(dccpw.fifo);
+	proc_net_remove(procname);
+	unregister_jprobe(&dccp_send_probe);
+
+}
+module_exit(dccpprobe_exit);
+
+MODULE_PARM_DESC(port, "Port to match (0=all)");
+module_param(port, int, 0);
+
+MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
+module_param(bufsize, int, 0);
+
+MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>");
+MODULE_DESCRIPTION("DCCP snooper");
+MODULE_LICENSE("GPL");
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 962df0ea31aa..72cbdcfc2c65 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -217,7 +217,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 	icsk->icsk_sync_mss	= dccp_sync_mss;
 	dp->dccps_mss_cache	= 536;
 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
-	dp->dccps_service	= DCCP_SERVICE_INVALID_VALUE;
+	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
 	dp->dccps_l_ack_ratio	= dp->dccps_r_ack_ratio = 1;
 
 	return 0;
@@ -267,12 +267,6 @@ static inline int dccp_listen_start(struct sock *sk)
 	struct dccp_sock *dp = dccp_sk(sk);
 
 	dp->dccps_role = DCCP_ROLE_LISTEN;
-	/*
-	 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
-	 * before calling listen()
-	 */
-	if (dccp_service_not_initialized(sk))
-		return -EPROTO;
 	return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
 }
 
@@ -540,9 +534,6 @@ static int dccp_getsockopt_service(struct sock *sk, int len,
 	int err = -ENOENT, slen = 0, total_len = sizeof(u32);
 
 	lock_sock(sk);
-	if (dccp_service_not_initialized(sk))
-		goto out;
-
 	if ((sl = dp->dccps_service_list) != NULL) {
 		slen = sl->dccpsl_nr * sizeof(u32);
 		total_len += slen;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 1650b64415aa..30af4a4dfcc8 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -448,24 +448,22 @@ config INET_TCP_DIAG
 	depends on INET_DIAG
 	def_tristate INET_DIAG
 
-config TCP_CONG_ADVANCED
+menuconfig TCP_CONG_ADVANCED
 	bool "TCP: advanced congestion control"
 	---help---
 	  Support for selection of various TCP congestion control
 	  modules.
 
 	  Nearly all users can safely say no here, and a safe default
-	  selection will be made (BIC-TCP with new Reno as a fallback).
+	  selection will be made (CUBIC with new Reno as a fallback).
 
 	  If unsure, say N.
 
-# TCP Reno is builtin (required as fallback)
-menu "TCP congestion control"
-	depends on TCP_CONG_ADVANCED
+if TCP_CONG_ADVANCED
 
 config TCP_CONG_BIC
 	tristate "Binary Increase Congestion (BIC) control"
-	default y
+	default m
 	---help---
 	BIC-TCP is a sender-side only change that ensures a linear RTT
 	fairness under large windows while offering both scalability and
@@ -479,7 +477,7 @@ config TCP_CONG_BIC
 
 config TCP_CONG_CUBIC
 	tristate "CUBIC TCP"
-	default m
+	default y
 	---help---
 	This is version 2.0 of BIC-TCP which uses a cubic growth function
 	among other techniques.
@@ -574,12 +572,49 @@ config TCP_CONG_VENO
 	loss packets.
 	See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
 
-endmenu
+choice
+	prompt "Default TCP congestion control"
+	default DEFAULT_CUBIC
+	help
+	  Select the TCP congestion control that will be used by default
+	  for all connections.
 
-config TCP_CONG_BIC
+	config DEFAULT_BIC
+		bool "Bic" if TCP_CONG_BIC=y
+
+	config DEFAULT_CUBIC
+		bool "Cubic" if TCP_CONG_CUBIC=y
+
+	config DEFAULT_HTCP
+		bool "Htcp" if TCP_CONG_HTCP=y
+
+	config DEFAULT_VEGAS
+		bool "Vegas" if TCP_CONG_VEGAS=y
+
+	config DEFAULT_WESTWOOD
+		bool "Westwood" if TCP_CONG_WESTWOOD=y
+
+	config DEFAULT_RENO
+		bool "Reno"
+
+endchoice
+
+endif
+
+config TCP_CONG_CUBIC
 	tristate
 	depends on !TCP_CONG_ADVANCED
 	default y
 
+config DEFAULT_TCP_CONG
+	string
+	default "bic" if DEFAULT_BIC
+	default "cubic" if DEFAULT_CUBIC
+	default "htcp" if DEFAULT_HTCP
+	default "vegas" if DEFAULT_VEGAS
+	default "westwood" if DEFAULT_WESTWOOD
+	default "reno" if DEFAULT_RENO
+	default "cubic"
+
 source "net/ipv4/ipvs/Kconfig"
 
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 80a2a0911b49..e6ce0b3ba62a 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -259,7 +259,7 @@ void cipso_v4_cache_invalidate(void)
 	u32 iter;
 
 	for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
-		spin_lock(&cipso_v4_cache[iter].lock);
+		spin_lock_bh(&cipso_v4_cache[iter].lock);
 		list_for_each_entry_safe(entry,
 					 tmp_entry,
 					 &cipso_v4_cache[iter].list, list) {
@@ -267,7 +267,7 @@ void cipso_v4_cache_invalidate(void)
 			cipso_v4_cache_entry_free(entry);
 		}
 		cipso_v4_cache[iter].size = 0;
-		spin_unlock(&cipso_v4_cache[iter].lock);
+		spin_unlock_bh(&cipso_v4_cache[iter].lock);
 	}
 
 	return;
@@ -309,7 +309,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
 
 	hash = cipso_v4_map_cache_hash(key, key_len);
 	bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
-	spin_lock(&cipso_v4_cache[bkt].lock);
+	spin_lock_bh(&cipso_v4_cache[bkt].lock);
 	list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) {
 		if (entry->hash == hash &&
 		    entry->key_len == key_len &&
@@ -318,7 +318,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
 			secattr->cache.free = entry->lsm_data.free;
 			secattr->cache.data = entry->lsm_data.data;
 			if (prev_entry == NULL) {
-				spin_unlock(&cipso_v4_cache[bkt].lock);
+				spin_unlock_bh(&cipso_v4_cache[bkt].lock);
 				return 0;
 			}
 
@@ -333,12 +333,12 @@ static int cipso_v4_cache_check(const unsigned char *key,
 					   &prev_entry->list);
 			}
 
-			spin_unlock(&cipso_v4_cache[bkt].lock);
+			spin_unlock_bh(&cipso_v4_cache[bkt].lock);
 			return 0;
 		}
 		prev_entry = entry;
 	}
-	spin_unlock(&cipso_v4_cache[bkt].lock);
+	spin_unlock_bh(&cipso_v4_cache[bkt].lock);
 
 	return -ENOENT;
 }
@@ -387,7 +387,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb,
 	entry->lsm_data.data = secattr->cache.data;
 
 	bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
-	spin_lock(&cipso_v4_cache[bkt].lock);
+	spin_lock_bh(&cipso_v4_cache[bkt].lock);
 	if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
 		list_add(&entry->list, &cipso_v4_cache[bkt].list);
 		cipso_v4_cache[bkt].size += 1;
@@ -398,7 +398,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb,
 		list_add(&entry->list, &cipso_v4_cache[bkt].list);
 		cipso_v4_cache_entry_free(old_entry);
 	}
-	spin_unlock(&cipso_v4_cache[bkt].lock);
+	spin_unlock_bh(&cipso_v4_cache[bkt].lock);
 
 	return 0;
 
@@ -530,197 +530,42 @@ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
 }
 
 /**
- * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff
- * @headroom: the amount of headroom to allocate for the sk_buff
+ * cipso_v4_doi_walk - Iterate through the DOI definitions
+ * @skip_cnt: skip past this number of DOI definitions, updated
+ * @callback: callback for each DOI definition
+ * @cb_arg: argument for the callback function
  *
  * Description:
- * Dump a list of all the configured DOI values into a sk_buff.  The returned
- * sk_buff has room at the front of the sk_buff for @headroom bytes.  See
- * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format.  This
- * function may fail if another process is changing the DOI list at the same
- * time.  Returns a pointer to a sk_buff on success, NULL on error.
+ * Iterate over the DOI definition list, skipping the first @skip_cnt entries.
+ * For each entry call @callback, if @callback returns a negative value stop
+ * 'walking' through the list and return.  Updates the value in @skip_cnt upon
+ * return.  Returns zero on success, negative values on failure.
  *
  */
-struct sk_buff *cipso_v4_doi_dump_all(size_t headroom)
+int cipso_v4_doi_walk(u32 *skip_cnt,
+		     int (*callback) (struct cipso_v4_doi *doi_def, void *arg),
+		     void *cb_arg)
 {
-	struct sk_buff *skb = NULL;
-	struct cipso_v4_doi *iter;
+	int ret_val = -ENOENT;
 	u32 doi_cnt = 0;
-	ssize_t buf_len;
+	struct cipso_v4_doi *iter_doi;
 
-	buf_len = NETLBL_LEN_U32;
 	rcu_read_lock();
-	list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
-		if (iter->valid) {
-			doi_cnt += 1;
-			buf_len += 2 * NETLBL_LEN_U32;
-		}
-
-	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
-	if (skb == NULL)
-		goto doi_dump_all_failure;
-
-	if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0)
-		goto doi_dump_all_failure;
-	buf_len -= NETLBL_LEN_U32;
-	list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
-		if (iter->valid) {
-			if (buf_len < 2 * NETLBL_LEN_U32)
-				goto doi_dump_all_failure;
-			if (nla_put_u32(skb, NLA_U32, iter->doi) != 0)
-				goto doi_dump_all_failure;
-			if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
-				goto doi_dump_all_failure;
-			buf_len -= 2 * NETLBL_LEN_U32;
-		}
-	rcu_read_unlock();
-
-	return skb;
-
-doi_dump_all_failure:
-	rcu_read_unlock();
-	kfree(skb);
-	return NULL;
-}
-
-/**
- * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff
- * @doi: the DOI value
- * @headroom: the amount of headroom to allocate for the sk_buff
- *
- * Description:
- * Lookup the DOI definition matching @doi and dump it's contents into a
- * sk_buff.  The returned sk_buff has room at the front of the sk_buff for
- * @headroom bytes.  See net/netlabel/netlabel_cipso_v4.h for the LIST message
- * format.  This function may fail if another process is changing the DOI list
- * at the same time.  Returns a pointer to a sk_buff on success, NULL on error.
- *
- */
-struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom)
-{
-	struct sk_buff *skb = NULL;
-	struct cipso_v4_doi *iter;
-	u32 tag_cnt = 0;
-	u32 lvl_cnt = 0;
-	u32 cat_cnt = 0;
-	ssize_t buf_len;
-	ssize_t tmp;
-
-	rcu_read_lock();
-	iter = cipso_v4_doi_getdef(doi);
-	if (iter == NULL)
-		goto doi_dump_failure;
-	buf_len = NETLBL_LEN_U32;
-	switch (iter->type) {
-	case CIPSO_V4_MAP_PASS:
-		buf_len += NETLBL_LEN_U32;
-		while(tag_cnt < CIPSO_V4_TAG_MAXCNT &&
-		      iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
-			tag_cnt += 1;
-			buf_len += NETLBL_LEN_U8;
-		}
-		break;
-	case CIPSO_V4_MAP_STD:
-		buf_len += 3 * NETLBL_LEN_U32;
-		while (tag_cnt < CIPSO_V4_TAG_MAXCNT &&
-		       iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
-			tag_cnt += 1;
-			buf_len += NETLBL_LEN_U8;
-		}
-		for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
-			if (iter->map.std->lvl.local[tmp] !=
-			    CIPSO_V4_INV_LVL) {
-				lvl_cnt += 1;
-				buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8;
-			}
-		for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
-			if (iter->map.std->cat.local[tmp] !=
-			    CIPSO_V4_INV_CAT) {
-				cat_cnt += 1;
-				buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16;
+	list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list)
+		if (iter_doi->valid) {
+			if (doi_cnt++ < *skip_cnt)
+				continue;
+			ret_val = callback(iter_doi, cb_arg);
+			if (ret_val < 0) {
+				doi_cnt--;
+				goto doi_walk_return;
 			}
-		break;
-	}
-
-	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
-	if (skb == NULL)
-		goto doi_dump_failure;
-
-	if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
-		goto doi_dump_failure;
-	buf_len -= NETLBL_LEN_U32;
-	if (iter != cipso_v4_doi_getdef(doi))
-		goto doi_dump_failure;
-	switch (iter->type) {
-	case CIPSO_V4_MAP_PASS:
-		if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
-			goto doi_dump_failure;
-		buf_len -= NETLBL_LEN_U32;
-		for (tmp = 0;
-		     tmp < CIPSO_V4_TAG_MAXCNT &&
-			     iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
-		     tmp++) {
-			if (buf_len < NETLBL_LEN_U8)
-				goto doi_dump_failure;
-			if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
-				goto doi_dump_failure;
-			buf_len -= NETLBL_LEN_U8;
 		}
-		break;
-	case CIPSO_V4_MAP_STD:
-		if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
-			goto doi_dump_failure;
-		if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0)
-			goto doi_dump_failure;
-		if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0)
-			goto doi_dump_failure;
-		buf_len -= 3 * NETLBL_LEN_U32;
-		for (tmp = 0;
-		     tmp < CIPSO_V4_TAG_MAXCNT &&
-			     iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
-		     tmp++) {
-			if (buf_len < NETLBL_LEN_U8)
-				goto doi_dump_failure;
-			if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
-				goto doi_dump_failure;
-			buf_len -= NETLBL_LEN_U8;
-		}
-		for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
-			if (iter->map.std->lvl.local[tmp] !=
-			    CIPSO_V4_INV_LVL) {
-				if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8)
-					goto doi_dump_failure;
-				if (nla_put_u32(skb, NLA_U32, tmp) != 0)
-					goto doi_dump_failure;
-				if (nla_put_u8(skb,
-					   NLA_U8,
-					   iter->map.std->lvl.local[tmp]) != 0)
-					goto doi_dump_failure;
-				buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8;
-			}
-		for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
-			if (iter->map.std->cat.local[tmp] !=
-			    CIPSO_V4_INV_CAT) {
-				if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16)
-					goto doi_dump_failure;
-				if (nla_put_u32(skb, NLA_U32, tmp) != 0)
-					goto doi_dump_failure;
-				if (nla_put_u16(skb,
-					   NLA_U16,
-					   iter->map.std->cat.local[tmp]) != 0)
-					goto doi_dump_failure;
-				buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16;
-			}
-		break;
-	}
-	rcu_read_unlock();
-
-	return skb;
 
-doi_dump_failure:
+doi_walk_return:
 	rcu_read_unlock();
-	kfree(skb);
-	return NULL;
+	*skip_cnt = doi_cnt;
+	return ret_val;
 }
 
 /**
@@ -1486,43 +1331,40 @@ socket_setattr_failure:
 }
 
 /**
- * cipso_v4_socket_getattr - Get the security attributes from a socket
- * @sock: the socket
+ * cipso_v4_sock_getattr - Get the security attributes from a sock
+ * @sk: the sock
  * @secattr: the security attributes
  *
  * Description:
- * Query @sock to see if there is a CIPSO option attached to the socket and if
- * there is return the CIPSO security attributes in @secattr.  Returns zero on
- * success and negative values on failure.
+ * Query @sk to see if there is a CIPSO option attached to the sock and if
+ * there is return the CIPSO security attributes in @secattr.  This function
+ * requires that @sk be locked, or privately held, but it does not do any
+ * locking itself.  Returns zero on success and negative values on failure.
  *
  */
-int cipso_v4_socket_getattr(const struct socket *sock,
-			    struct netlbl_lsm_secattr *secattr)
+int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
 {
 	int ret_val = -ENOMSG;
-	struct sock *sk;
 	struct inet_sock *sk_inet;
 	unsigned char *cipso_ptr;
 	u32 doi;
 	struct cipso_v4_doi *doi_def;
 
-	sk = sock->sk;
-	lock_sock(sk);
 	sk_inet = inet_sk(sk);
 	if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0)
-		goto socket_getattr_return;
+		return -ENOMSG;
 	cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso -
 		sizeof(struct iphdr);
 	ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr);
 	if (ret_val == 0)
-		goto socket_getattr_return;
+		return ret_val;
 
 	doi = ntohl(*(u32 *)&cipso_ptr[2]);
 	rcu_read_lock();
 	doi_def = cipso_v4_doi_getdef(doi);
 	if (doi_def == NULL) {
 		rcu_read_unlock();
-		goto socket_getattr_return;
+		return -ENOMSG;
 	}
 	switch (cipso_ptr[6]) {
 	case CIPSO_V4_TAG_RBITMAP:
@@ -1533,8 +1375,29 @@ int cipso_v4_socket_getattr(const struct socket *sock,
 	}
 	rcu_read_unlock();
 
-socket_getattr_return:
-	release_sock(sk);
+	return ret_val;
+}
+
+/**
+ * cipso_v4_socket_getattr - Get the security attributes from a socket
+ * @sock: the socket
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Query @sock to see if there is a CIPSO option attached to the socket and if
+ * there is return the CIPSO security attributes in @secattr.  Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_socket_getattr(const struct socket *sock,
+			    struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+
+	lock_sock(sock->sk);
+	ret_val = cipso_v4_sock_getattr(sock->sk, secattr);
+	release_sock(sock->sk);
+
 	return ret_val;
 }
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 19b2071ff319..e82a5be894b5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -129,6 +129,12 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
 	return ret;
 }
 
+static int __init tcp_congestion_default(void)
+{
+	return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
+}
+
+late_initcall(tcp_congestion_default);
 
 ctl_table ipv4_table[] = {
         {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 7ff2e4273a7c..af0aca1e6be6 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -48,7 +48,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
 		printk(KERN_NOTICE "TCP %s already registered\n", ca->name);
 		ret = -EEXIST;
 	} else {
-		list_add_rcu(&ca->list, &tcp_cong_list);
+		list_add_tail_rcu(&ca->list, &tcp_cong_list);
 		printk(KERN_INFO "TCP %s registered\n", ca->name);
 	}
 	spin_unlock(&tcp_cong_list_lock);
diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig
index fe23cb7f1e87..9f7121ae13e9 100644
--- a/net/netlabel/Kconfig
+++ b/net/netlabel/Kconfig
@@ -9,6 +9,9 @@ config NETLABEL
 	---help---
 	  NetLabel provides support for explicit network packet labeling
 	  protocols such as CIPSO and RIPSO.  For more information see
-	  Documentation/netlabel.
+	  Documentation/netlabel as well as the NetLabel SourceForge project
+	  for configuration tools and additional documentation.
+
+	   * http://netlabel.sf.net
 
 	  If you are unsure, say N.
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index a4f40adc447b..4125a55f469f 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -41,15 +41,37 @@
 #include "netlabel_user.h"
 #include "netlabel_cipso_v4.h"
 
+/* Argument struct for cipso_v4_doi_walk() */
+struct netlbl_cipsov4_doiwalk_arg {
+	struct netlink_callback *nl_cb;
+	struct sk_buff *skb;
+	u32 seq;
+};
+
 /* NetLabel Generic NETLINK CIPSOv4 family */
 static struct genl_family netlbl_cipsov4_gnl_family = {
 	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
 	.name = NETLBL_NLTYPE_CIPSOV4_NAME,
 	.version = NETLBL_PROTO_VERSION,
-	.maxattr = 0,
+	.maxattr = NLBL_CIPSOV4_A_MAX,
 };
 
+/* NetLabel Netlink attribute policy */
+static struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = {
+	[NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 },
+	[NLBL_CIPSOV4_A_MTYPE] = { .type = NLA_U32 },
+	[NLBL_CIPSOV4_A_TAG] = { .type = NLA_U8 },
+	[NLBL_CIPSOV4_A_TAGLST] = { .type = NLA_NESTED },
+	[NLBL_CIPSOV4_A_MLSLVLLOC] = { .type = NLA_U32 },
+	[NLBL_CIPSOV4_A_MLSLVLREM] = { .type = NLA_U32 },
+	[NLBL_CIPSOV4_A_MLSLVL] = { .type = NLA_NESTED },
+	[NLBL_CIPSOV4_A_MLSLVLLST] = { .type = NLA_NESTED },
+	[NLBL_CIPSOV4_A_MLSCATLOC] = { .type = NLA_U32 },
+	[NLBL_CIPSOV4_A_MLSCATREM] = { .type = NLA_U32 },
+	[NLBL_CIPSOV4_A_MLSCAT] = { .type = NLA_NESTED },
+	[NLBL_CIPSOV4_A_MLSCATLST] = { .type = NLA_NESTED },
+};
 
 /*
  * Helper Functions
@@ -81,6 +103,41 @@ static void netlbl_cipsov4_doi_free(struct rcu_head *entry)
 	kfree(ptr);
 }
 
+/**
+ * netlbl_cipsov4_add_common - Parse the common sections of a ADD message
+ * @info: the Generic NETLINK info block
+ * @doi_def: the CIPSO V4 DOI definition
+ *
+ * Description:
+ * Parse the common sections of a ADD message and fill in the related values
+ * in @doi_def.  Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_cipsov4_add_common(struct genl_info *info,
+				     struct cipso_v4_doi *doi_def)
+{
+	struct nlattr *nla;
+	int nla_rem;
+	u32 iter = 0;
+
+	doi_def->doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
+
+	if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_TAGLST],
+				NLBL_CIPSOV4_A_MAX,
+				netlbl_cipsov4_genl_policy) != 0)
+		return -EINVAL;
+
+	nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem)
+		if (nla->nla_type == NLBL_CIPSOV4_A_TAG) {
+			if (iter > CIPSO_V4_TAG_MAXCNT)
+				return -EINVAL;
+			doi_def->tags[iter++] = nla_get_u8(nla);
+		}
+	if (iter < CIPSO_V4_TAG_MAXCNT)
+		doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
+
+	return 0;
+}
 
 /*
  * NetLabel Command Handlers
@@ -88,9 +145,7 @@ static void netlbl_cipsov4_doi_free(struct rcu_head *entry)
 
 /**
  * netlbl_cipsov4_add_std - Adds a CIPSO V4 DOI definition
- * @doi: the DOI value
- * @msg: the ADD message data
- * @msg_size: the size of the ADD message buffer
+ * @info: the Generic NETLINK info block
  *
  * Description:
  * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message
@@ -98,29 +153,28 @@ static void netlbl_cipsov4_doi_free(struct rcu_head *entry)
  * error.
  *
  */
-static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size)
+static int netlbl_cipsov4_add_std(struct genl_info *info)
 {
 	int ret_val = -EINVAL;
-	int msg_len = msg_size;
-	u32 num_tags;
-	u32 num_lvls;
-	u32 num_cats;
 	struct cipso_v4_doi *doi_def = NULL;
-	u32 iter;
-	u32 tmp_val_a;
-	u32 tmp_val_b;
+	struct nlattr *nla_a;
+	struct nlattr *nla_b;
+	int nla_a_rem;
+	int nla_b_rem;
 
-	if (msg_len < NETLBL_LEN_U32)
-		goto add_std_failure;
-	num_tags = netlbl_getinc_u32(&msg, &msg_len);
-	if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT)
-		goto add_std_failure;
+	if (!info->attrs[NLBL_CIPSOV4_A_DOI] ||
+	    !info->attrs[NLBL_CIPSOV4_A_TAGLST] ||
+	    !info->attrs[NLBL_CIPSOV4_A_MLSLVLLST])
+		return -EINVAL;
+
+	if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
+				NLBL_CIPSOV4_A_MAX,
+				netlbl_cipsov4_genl_policy) != 0)
+		return -EINVAL;
 
 	doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
-	if (doi_def == NULL) {
-		ret_val = -ENOMEM;
-		goto add_std_failure;
-	}
+	if (doi_def == NULL)
+		return -ENOMEM;
 	doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL);
 	if (doi_def->map.std == NULL) {
 		ret_val = -ENOMEM;
@@ -128,28 +182,32 @@ static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size)
 	}
 	doi_def->type = CIPSO_V4_MAP_STD;
 
-	for (iter = 0; iter < num_tags; iter++) {
-		if (msg_len < NETLBL_LEN_U8)
-			goto add_std_failure;
-		doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len);
-		switch (doi_def->tags[iter]) {
-		case CIPSO_V4_TAG_RBITMAP:
-			break;
-		default:
-			goto add_std_failure;
-		}
-	}
-	if (iter < CIPSO_V4_TAG_MAXCNT)
-		doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
-
-	if (msg_len < 6 * NETLBL_LEN_U32)
+	ret_val = netlbl_cipsov4_add_common(info, doi_def);
+	if (ret_val != 0)
 		goto add_std_failure;
 
-	num_lvls = netlbl_getinc_u32(&msg, &msg_len);
-	if (num_lvls == 0)
-		goto add_std_failure;
-	doi_def->map.std->lvl.local_size = netlbl_getinc_u32(&msg, &msg_len);
-	if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS)
+	nla_for_each_nested(nla_a,
+			    info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
+			    nla_a_rem)
+		if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) {
+			nla_for_each_nested(nla_b, nla_a, nla_b_rem)
+				switch (nla_b->nla_type) {
+				case NLBL_CIPSOV4_A_MLSLVLLOC:
+					if (nla_get_u32(nla_b) >=
+					    doi_def->map.std->lvl.local_size)
+					     doi_def->map.std->lvl.local_size =
+						     nla_get_u32(nla_b) + 1;
+					break;
+				case NLBL_CIPSOV4_A_MLSLVLREM:
+					if (nla_get_u32(nla_b) >=
+					    doi_def->map.std->lvl.cipso_size)
+					     doi_def->map.std->lvl.cipso_size =
+						     nla_get_u32(nla_b) + 1;
+					break;
+				}
+		}
+	if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS ||
+	    doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS)
 		goto add_std_failure;
 	doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size,
 					      sizeof(u32),
@@ -158,9 +216,6 @@ static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size)
 		ret_val = -ENOMEM;
 		goto add_std_failure;
 	}
-	doi_def->map.std->lvl.cipso_size = netlbl_getinc_u8(&msg, &msg_len);
-	if (doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS)
-		goto add_std_failure;
 	doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size,
 					      sizeof(u32),
 					      GFP_KERNEL);
@@ -168,68 +223,101 @@ static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size)
 		ret_val = -ENOMEM;
 		goto add_std_failure;
 	}
+	nla_for_each_nested(nla_a,
+			    info->attrs[NLBL_CIPSOV4_A_MLSLVLLST],
+			    nla_a_rem)
+		if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) {
+			struct nlattr *lvl_loc;
+			struct nlattr *lvl_rem;
+
+			if (nla_validate_nested(nla_a,
+					      NLBL_CIPSOV4_A_MAX,
+					      netlbl_cipsov4_genl_policy) != 0)
+				goto add_std_failure;
+
+			lvl_loc = nla_find_nested(nla_a,
+						  NLBL_CIPSOV4_A_MLSLVLLOC);
+			lvl_rem = nla_find_nested(nla_a,
+						  NLBL_CIPSOV4_A_MLSLVLREM);
+			if (lvl_loc == NULL || lvl_rem == NULL)
+				goto add_std_failure;
+			doi_def->map.std->lvl.local[nla_get_u32(lvl_loc)] =
+				nla_get_u32(lvl_rem);
+			doi_def->map.std->lvl.cipso[nla_get_u32(lvl_rem)] =
+				nla_get_u32(lvl_loc);
+		}
 
-	num_cats = netlbl_getinc_u32(&msg, &msg_len);
-	doi_def->map.std->cat.local_size = netlbl_getinc_u32(&msg, &msg_len);
-	if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS)
-		goto add_std_failure;
-	doi_def->map.std->cat.local = kcalloc(doi_def->map.std->cat.local_size,
+	if (info->attrs[NLBL_CIPSOV4_A_MLSCATLST]) {
+		if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
+					NLBL_CIPSOV4_A_MAX,
+					netlbl_cipsov4_genl_policy) != 0)
+			goto add_std_failure;
+
+		nla_for_each_nested(nla_a,
+				    info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
+				    nla_a_rem)
+			if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) {
+				if (nla_validate_nested(nla_a,
+					      NLBL_CIPSOV4_A_MAX,
+					      netlbl_cipsov4_genl_policy) != 0)
+					goto add_std_failure;
+				nla_for_each_nested(nla_b, nla_a, nla_b_rem)
+					switch (nla_b->nla_type) {
+					case NLBL_CIPSOV4_A_MLSCATLOC:
+						if (nla_get_u32(nla_b) >=
+					      doi_def->map.std->cat.local_size)
+					     doi_def->map.std->cat.local_size =
+						     nla_get_u32(nla_b) + 1;
+						break;
+					case NLBL_CIPSOV4_A_MLSCATREM:
+						if (nla_get_u32(nla_b) >=
+					      doi_def->map.std->cat.cipso_size)
+					     doi_def->map.std->cat.cipso_size =
+						     nla_get_u32(nla_b) + 1;
+						break;
+					}
+			}
+		if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS ||
+		    doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS)
+			goto add_std_failure;
+		doi_def->map.std->cat.local = kcalloc(
+			                      doi_def->map.std->cat.local_size,
 					      sizeof(u32),
 					      GFP_KERNEL);
-	if (doi_def->map.std->cat.local == NULL) {
-		ret_val = -ENOMEM;
-		goto add_std_failure;
-	}
-	doi_def->map.std->cat.cipso_size = netlbl_getinc_u16(&msg, &msg_len);
-	if (doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS)
-		goto add_std_failure;
-	doi_def->map.std->cat.cipso = kcalloc(doi_def->map.std->cat.cipso_size,
+		if (doi_def->map.std->cat.local == NULL) {
+			ret_val = -ENOMEM;
+			goto add_std_failure;
+		}
+		doi_def->map.std->cat.cipso = kcalloc(
+			                      doi_def->map.std->cat.cipso_size,
 					      sizeof(u32),
 					      GFP_KERNEL);
-	if (doi_def->map.std->cat.cipso == NULL) {
-		ret_val = -ENOMEM;
-		goto add_std_failure;
-	}
-
-	if (msg_len <
-	    num_lvls * (NETLBL_LEN_U32 + NETLBL_LEN_U8) +
-	    num_cats * (NETLBL_LEN_U32 + NETLBL_LEN_U16))
-		goto add_std_failure;
-
-	for (iter = 0; iter < doi_def->map.std->lvl.cipso_size; iter++)
-		doi_def->map.std->lvl.cipso[iter] = CIPSO_V4_INV_LVL;
-	for (iter = 0; iter < doi_def->map.std->lvl.local_size; iter++)
-		doi_def->map.std->lvl.local[iter] = CIPSO_V4_INV_LVL;
-	for (iter = 0; iter < doi_def->map.std->cat.cipso_size; iter++)
-		doi_def->map.std->cat.cipso[iter] = CIPSO_V4_INV_CAT;
-	for (iter = 0; iter < doi_def->map.std->cat.local_size; iter++)
-		doi_def->map.std->cat.local[iter] = CIPSO_V4_INV_CAT;
-
-	for (iter = 0; iter < num_lvls; iter++) {
-		tmp_val_a = netlbl_getinc_u32(&msg, &msg_len);
-		tmp_val_b = netlbl_getinc_u8(&msg, &msg_len);
-
-		if (tmp_val_a >= doi_def->map.std->lvl.local_size ||
-		    tmp_val_b >= doi_def->map.std->lvl.cipso_size)
-			goto add_std_failure;
-
-		doi_def->map.std->lvl.cipso[tmp_val_b] = tmp_val_a;
-		doi_def->map.std->lvl.local[tmp_val_a] = tmp_val_b;
-	}
-
-	for (iter = 0; iter < num_cats; iter++) {
-		tmp_val_a = netlbl_getinc_u32(&msg, &msg_len);
-		tmp_val_b = netlbl_getinc_u16(&msg, &msg_len);
-
-		if (tmp_val_a >= doi_def->map.std->cat.local_size ||
-		    tmp_val_b >= doi_def->map.std->cat.cipso_size)
+		if (doi_def->map.std->cat.cipso == NULL) {
+			ret_val = -ENOMEM;
 			goto add_std_failure;
-
-		doi_def->map.std->cat.cipso[tmp_val_b] = tmp_val_a;
-		doi_def->map.std->cat.local[tmp_val_a] = tmp_val_b;
+		}
+		nla_for_each_nested(nla_a,
+				    info->attrs[NLBL_CIPSOV4_A_MLSCATLST],
+				    nla_a_rem)
+			if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) {
+				struct nlattr *cat_loc;
+				struct nlattr *cat_rem;
+
+				cat_loc = nla_find_nested(nla_a,
+						     NLBL_CIPSOV4_A_MLSCATLOC);
+				cat_rem = nla_find_nested(nla_a,
+						     NLBL_CIPSOV4_A_MLSCATREM);
+				if (cat_loc == NULL || cat_rem == NULL)
+					goto add_std_failure;
+				doi_def->map.std->cat.local[
+				                        nla_get_u32(cat_loc)] =
+					nla_get_u32(cat_rem);
+				doi_def->map.std->cat.cipso[
+					                nla_get_u32(cat_rem)] =
+					nla_get_u32(cat_loc);
+			}
 	}
 
-	doi_def->doi = doi;
 	ret_val = cipso_v4_doi_add(doi_def);
 	if (ret_val != 0)
 		goto add_std_failure;
@@ -243,9 +331,7 @@ add_std_failure:
 
 /**
  * netlbl_cipsov4_add_pass - Adds a CIPSO V4 DOI definition
- * @doi: the DOI value
- * @msg: the ADD message data
- * @msg_size: the size of the ADD message buffer
+ * @info: the Generic NETLINK info block
  *
  * Description:
  * Create a new CIPSO_V4_MAP_PASS DOI definition based on the given ADD message
@@ -253,52 +339,31 @@ add_std_failure:
  * error.
  *
  */
-static int netlbl_cipsov4_add_pass(u32 doi,
-				   struct nlattr *msg,
-				   size_t msg_size)
+static int netlbl_cipsov4_add_pass(struct genl_info *info)
 {
-	int ret_val = -EINVAL;
-	int msg_len = msg_size;
-	u32 num_tags;
+	int ret_val;
 	struct cipso_v4_doi *doi_def = NULL;
-	u32 iter;
 
-	if (msg_len < NETLBL_LEN_U32)
-		goto add_pass_failure;
-	num_tags = netlbl_getinc_u32(&msg, &msg_len);
-	if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT)
-		goto add_pass_failure;
+	if (!info->attrs[NLBL_CIPSOV4_A_DOI] ||
+	    !info->attrs[NLBL_CIPSOV4_A_TAGLST])
+		return -EINVAL;
 
 	doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
-	if (doi_def == NULL) {
-		ret_val = -ENOMEM;
-		goto add_pass_failure;
-	}
+	if (doi_def == NULL)
+		return -ENOMEM;
 	doi_def->type = CIPSO_V4_MAP_PASS;
 
-	for (iter = 0; iter < num_tags; iter++) {
-		if (msg_len < NETLBL_LEN_U8)
-			goto add_pass_failure;
-		doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len);
-		switch (doi_def->tags[iter]) {
-		case CIPSO_V4_TAG_RBITMAP:
-			break;
-		default:
-			goto add_pass_failure;
-		}
-	}
-	if (iter < CIPSO_V4_TAG_MAXCNT)
-		doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
+	ret_val = netlbl_cipsov4_add_common(info, doi_def);
+	if (ret_val != 0)
+		goto add_pass_failure;
 
-	doi_def->doi = doi;
 	ret_val = cipso_v4_doi_add(doi_def);
 	if (ret_val != 0)
 		goto add_pass_failure;
 	return 0;
 
 add_pass_failure:
-	if (doi_def)
-		netlbl_cipsov4_doi_free(&doi_def->rcu);
+	netlbl_cipsov4_doi_free(&doi_def->rcu);
 	return ret_val;
 }
 
@@ -316,34 +381,21 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
 
 {
 	int ret_val = -EINVAL;
-	u32 doi;
 	u32 map_type;
-	int msg_len = netlbl_netlink_payload_len(skb);
-	struct nlattr *msg = netlbl_netlink_payload_data(skb);
-
-	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
-	if (ret_val != 0)
-		goto add_return;
 
-	if (msg_len < 2 * NETLBL_LEN_U32)
-		goto add_return;
+	if (!info->attrs[NLBL_CIPSOV4_A_MTYPE])
+		return -EINVAL;
 
-	doi = netlbl_getinc_u32(&msg, &msg_len);
-	map_type = netlbl_getinc_u32(&msg, &msg_len);
+	map_type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]);
 	switch (map_type) {
 	case CIPSO_V4_MAP_STD:
-		ret_val = netlbl_cipsov4_add_std(doi, msg, msg_len);
+		ret_val = netlbl_cipsov4_add_std(info);
 		break;
 	case CIPSO_V4_MAP_PASS:
-		ret_val = netlbl_cipsov4_add_pass(doi, msg, msg_len);
+		ret_val = netlbl_cipsov4_add_pass(info);
 		break;
 	}
 
-add_return:
-	netlbl_netlink_send_ack(info,
-				netlbl_cipsov4_gnl_family.id,
-				NLBL_CIPSOV4_C_ACK,
-				-ret_val);
 	return ret_val;
 }
 
@@ -353,84 +405,239 @@ add_return:
  * @info: the Generic NETLINK info block
  *
  * Description:
- * Process a user generated LIST message and respond accordingly.  Returns
- * zero on success and negative values on error.
+ * Process a user generated LIST message and respond accordingly.  While the
+ * response message generated by the kernel is straightforward, determining
+ * before hand the size of the buffer to allocate is not (we have to generate
+ * the message to know the size).  In order to keep this function sane what we
+ * do is allocate a buffer of NLMSG_GOODSIZE and try to fit the response in
+ * that size, if we fail then we restart with a larger buffer and try again.
+ * We continue in this manner until we hit a limit of failed attempts then we
+ * give up and just send an error message.  Returns zero on success and
+ * negative values on error.
  *
  */
 static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info)
 {
-	int ret_val = -EINVAL;
+	int ret_val;
+	struct sk_buff *ans_skb = NULL;
+	u32 nlsze_mult = 1;
+	void *data;
 	u32 doi;
-	struct nlattr *msg = netlbl_netlink_payload_data(skb);
-	struct sk_buff *ans_skb;
+	struct nlattr *nla_a;
+	struct nlattr *nla_b;
+	struct cipso_v4_doi *doi_def;
+	u32 iter;
 
-	if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32)
+	if (!info->attrs[NLBL_CIPSOV4_A_DOI]) {
+		ret_val = -EINVAL;
 		goto list_failure;
+	}
 
-	doi = nla_get_u32(msg);
-	ans_skb = cipso_v4_doi_dump(doi, NLMSG_SPACE(GENL_HDRLEN));
+list_start:
+	ans_skb = nlmsg_new(NLMSG_GOODSIZE * nlsze_mult, GFP_KERNEL);
 	if (ans_skb == NULL) {
 		ret_val = -ENOMEM;
 		goto list_failure;
 	}
-	netlbl_netlink_hdr_push(ans_skb,
-				info->snd_pid,
-				0,
-				netlbl_cipsov4_gnl_family.id,
-				NLBL_CIPSOV4_C_LIST);
+	data = netlbl_netlink_hdr_put(ans_skb,
+				      info->snd_pid,
+				      info->snd_seq,
+				      netlbl_cipsov4_gnl_family.id,
+				      0,
+				      NLBL_CIPSOV4_C_LIST);
+	if (data == NULL) {
+		ret_val = -ENOMEM;
+		goto list_failure;
+	}
+
+	doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
+
+	rcu_read_lock();
+	doi_def = cipso_v4_doi_getdef(doi);
+	if (doi_def == NULL) {
+		ret_val = -EINVAL;
+		goto list_failure;
+	}
+
+	ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type);
+	if (ret_val != 0)
+		goto list_failure_lock;
+
+	nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_TAGLST);
+	if (nla_a == NULL) {
+		ret_val = -ENOMEM;
+		goto list_failure_lock;
+	}
+	for (iter = 0;
+	     iter < CIPSO_V4_TAG_MAXCNT &&
+	       doi_def->tags[iter] != CIPSO_V4_TAG_INVALID;
+	     iter++) {
+		ret_val = nla_put_u8(ans_skb,
+				     NLBL_CIPSOV4_A_TAG,
+				     doi_def->tags[iter]);
+		if (ret_val != 0)
+			goto list_failure_lock;
+	}
+	nla_nest_end(ans_skb, nla_a);
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_STD:
+		nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST);
+		if (nla_a == NULL) {
+			ret_val = -ENOMEM;
+			goto list_failure_lock;
+		}
+		for (iter = 0;
+		     iter < doi_def->map.std->lvl.local_size;
+		     iter++) {
+			if (doi_def->map.std->lvl.local[iter] ==
+			    CIPSO_V4_INV_LVL)
+				continue;
+
+			nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVL);
+			if (nla_b == NULL) {
+				ret_val = -ENOMEM;
+				goto list_retry;
+			}
+			ret_val = nla_put_u32(ans_skb,
+					      NLBL_CIPSOV4_A_MLSLVLLOC,
+					      iter);
+			if (ret_val != 0)
+				goto list_retry;
+			ret_val = nla_put_u32(ans_skb,
+					    NLBL_CIPSOV4_A_MLSLVLREM,
+					    doi_def->map.std->lvl.local[iter]);
+			if (ret_val != 0)
+				goto list_retry;
+			nla_nest_end(ans_skb, nla_b);
+		}
+		nla_nest_end(ans_skb, nla_a);
+
+		nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCATLST);
+		if (nla_a == NULL) {
+			ret_val = -ENOMEM;
+			goto list_retry;
+		}
+		for (iter = 0;
+		     iter < doi_def->map.std->cat.local_size;
+		     iter++) {
+			if (doi_def->map.std->cat.local[iter] ==
+			    CIPSO_V4_INV_CAT)
+				continue;
+
+			nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCAT);
+			if (nla_b == NULL) {
+				ret_val = -ENOMEM;
+				goto list_retry;
+			}
+			ret_val = nla_put_u32(ans_skb,
+					      NLBL_CIPSOV4_A_MLSCATLOC,
+					      iter);
+			if (ret_val != 0)
+				goto list_retry;
+			ret_val = nla_put_u32(ans_skb,
+					    NLBL_CIPSOV4_A_MLSCATREM,
+					    doi_def->map.std->cat.local[iter]);
+			if (ret_val != 0)
+				goto list_retry;
+			nla_nest_end(ans_skb, nla_b);
+		}
+		nla_nest_end(ans_skb, nla_a);
+
+		break;
+	}
+	rcu_read_unlock();
 
-	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	genlmsg_end(ans_skb, data);
+
+	ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
 	if (ret_val != 0)
 		goto list_failure;
 
 	return 0;
 
+list_retry:
+	/* XXX - this limit is a guesstimate */
+	if (nlsze_mult < 4) {
+		rcu_read_unlock();
+		kfree_skb(ans_skb);
+		nlsze_mult++;
+		goto list_start;
+	}
+list_failure_lock:
+	rcu_read_unlock();
 list_failure:
-	netlbl_netlink_send_ack(info,
-				netlbl_cipsov4_gnl_family.id,
-				NLBL_CIPSOV4_C_ACK,
-				-ret_val);
+	kfree_skb(ans_skb);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_listall_cb - cipso_v4_doi_walk() callback for LISTALL
+ * @doi_def: the CIPSOv4 DOI definition
+ * @arg: the netlbl_cipsov4_doiwalk_arg structure
+ *
+ * Description:
+ * This function is designed to be used as a callback to the
+ * cipso_v4_doi_walk() function for use in generating a response for a LISTALL
+ * message.  Returns the size of the message on success, negative values on
+ * failure.
+ *
+ */
+static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg)
+{
+	int ret_val = -ENOMEM;
+	struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg;
+	void *data;
+
+	data = netlbl_netlink_hdr_put(cb_arg->skb,
+				      NETLINK_CB(cb_arg->nl_cb->skb).pid,
+				      cb_arg->seq,
+				      netlbl_cipsov4_gnl_family.id,
+				      NLM_F_MULTI,
+				      NLBL_CIPSOV4_C_LISTALL);
+	if (data == NULL)
+		goto listall_cb_failure;
+
+	ret_val = nla_put_u32(cb_arg->skb, NLBL_CIPSOV4_A_DOI, doi_def->doi);
+	if (ret_val != 0)
+		goto listall_cb_failure;
+	ret_val = nla_put_u32(cb_arg->skb,
+			      NLBL_CIPSOV4_A_MTYPE,
+			      doi_def->type);
+	if (ret_val != 0)
+		goto listall_cb_failure;
+
+	return genlmsg_end(cb_arg->skb, data);
+
+listall_cb_failure:
+	genlmsg_cancel(cb_arg->skb, data);
 	return ret_val;
 }
 
 /**
  * netlbl_cipsov4_listall - Handle a LISTALL message
  * @skb: the NETLINK buffer
- * @info: the Generic NETLINK info block
+ * @cb: the NETLINK callback
  *
  * Description:
  * Process a user generated LISTALL message and respond accordingly.  Returns
  * zero on success and negative values on error.
  *
  */
-static int netlbl_cipsov4_listall(struct sk_buff *skb, struct genl_info *info)
+static int netlbl_cipsov4_listall(struct sk_buff *skb,
+				  struct netlink_callback *cb)
 {
-	int ret_val = -EINVAL;
-	struct sk_buff *ans_skb;
+	struct netlbl_cipsov4_doiwalk_arg cb_arg;
+	int doi_skip = cb->args[0];
 
-	ans_skb = cipso_v4_doi_dump_all(NLMSG_SPACE(GENL_HDRLEN));
-	if (ans_skb == NULL) {
-		ret_val = -ENOMEM;
-		goto listall_failure;
-	}
-	netlbl_netlink_hdr_push(ans_skb,
-				info->snd_pid,
-				0,
-				netlbl_cipsov4_gnl_family.id,
-				NLBL_CIPSOV4_C_LISTALL);
+	cb_arg.nl_cb = cb;
+	cb_arg.skb = skb;
+	cb_arg.seq = cb->nlh->nlmsg_seq;
 
-	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
-	if (ret_val != 0)
-		goto listall_failure;
-
-	return 0;
+	cipso_v4_doi_walk(&doi_skip, netlbl_cipsov4_listall_cb, &cb_arg);
 
-listall_failure:
-	netlbl_netlink_send_ack(info,
-				netlbl_cipsov4_gnl_family.id,
-				NLBL_CIPSOV4_C_ACK,
-				-ret_val);
-	return ret_val;
+	cb->args[0] = doi_skip;
+	return skb->len;
 }
 
 /**
@@ -445,27 +652,14 @@ listall_failure:
  */
 static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
 {
-	int ret_val;
+	int ret_val = -EINVAL;
 	u32 doi;
-	struct nlattr *msg = netlbl_netlink_payload_data(skb);
 
-	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
-	if (ret_val != 0)
-		goto remove_return;
-
-	if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) {
-		ret_val = -EINVAL;
-		goto remove_return;
+	if (info->attrs[NLBL_CIPSOV4_A_DOI]) {
+		doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
+		ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free);
 	}
 
-	doi = nla_get_u32(msg);
-	ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free);
-
-remove_return:
-	netlbl_netlink_send_ack(info,
-				netlbl_cipsov4_gnl_family.id,
-				NLBL_CIPSOV4_C_ACK,
-				-ret_val);
 	return ret_val;
 }
 
@@ -475,14 +669,16 @@ remove_return:
 
 static struct genl_ops netlbl_cipsov4_genl_c_add = {
 	.cmd = NLBL_CIPSOV4_C_ADD,
-	.flags = 0,
+	.flags = GENL_ADMIN_PERM,
+	.policy = netlbl_cipsov4_genl_policy,
 	.doit = netlbl_cipsov4_add,
 	.dumpit = NULL,
 };
 
 static struct genl_ops netlbl_cipsov4_genl_c_remove = {
 	.cmd = NLBL_CIPSOV4_C_REMOVE,
-	.flags = 0,
+	.flags = GENL_ADMIN_PERM,
+	.policy = netlbl_cipsov4_genl_policy,
 	.doit = netlbl_cipsov4_remove,
 	.dumpit = NULL,
 };
@@ -490,6 +686,7 @@ static struct genl_ops netlbl_cipsov4_genl_c_remove = {
 static struct genl_ops netlbl_cipsov4_genl_c_list = {
 	.cmd = NLBL_CIPSOV4_C_LIST,
 	.flags = 0,
+	.policy = netlbl_cipsov4_genl_policy,
 	.doit = netlbl_cipsov4_list,
 	.dumpit = NULL,
 };
@@ -497,8 +694,9 @@ static struct genl_ops netlbl_cipsov4_genl_c_list = {
 static struct genl_ops netlbl_cipsov4_genl_c_listall = {
 	.cmd = NLBL_CIPSOV4_C_LISTALL,
 	.flags = 0,
-	.doit = netlbl_cipsov4_listall,
-	.dumpit = NULL,
+	.policy = netlbl_cipsov4_genl_policy,
+	.doit = NULL,
+	.dumpit = netlbl_cipsov4_listall,
 };
 
 /*
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index 4c6ff4b93004..f03cf9b78286 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -34,175 +34,71 @@
 #include <net/netlabel.h>
 
 /*
- * The following NetLabel payloads are supported by the CIPSO subsystem, all
- * of which are preceeded by the nlmsghdr struct.
+ * The following NetLabel payloads are supported by the CIPSO subsystem.
  *
- * o ACK:
- *   Sent by the kernel in response to an applications message, applications
- *   should never send this message.
+ * o ADD:
+ *   Sent by an application to add a new DOI mapping table.
  *
- *   +----------------------+-----------------------+
- *   | seq number (32 bits) | return code (32 bits) |
- *   +----------------------+-----------------------+
+ *   Required attributes:
  *
- *     seq number:  the sequence number of the original message, taken from the
- *                  nlmsghdr structure
- *     return code: return value, based on errno values
+ *     NLBL_CIPSOV4_A_DOI
+ *     NLBL_CIPSOV4_A_MTYPE
+ *     NLBL_CIPSOV4_A_TAGLST
  *
- * o ADD:
- *   Sent by an application to add a new DOI mapping table, after completion
- *   of the task the kernel should ACK this message.
- *
- *   +---------------+--------------------+---------------------+
- *   | DOI (32 bits) | map type (32 bits) | tag count (32 bits) | ...
- *   +---------------+--------------------+---------------------+
- *
- *   +-----------------+
- *   | tag #X (8 bits) | ... repeated
- *   +-----------------+
- *
- *   +-------------- ---- --- -- -
- *   | mapping data
- *   +-------------- ---- --- -- -
- *
- *     DOI:          the DOI value
- *     map type:     the mapping table type (defined in the cipso_ipv4.h header
- *                   as CIPSO_V4_MAP_*)
- *     tag count:    the number of tags, must be greater than zero
- *     tag:          the CIPSO tag for the DOI, tags listed first are given
- *                   higher priorirty when sending packets
- *     mapping data: specific to the map type (see below)
- *
- *   CIPSO_V4_MAP_STD
- *
- *   +------------------+-----------------------+----------------------+
- *   | levels (32 bits) | max l level (32 bits) | max r level (8 bits) | ...
- *   +------------------+-----------------------+----------------------+
- *
- *   +----------------------+---------------------+---------------------+
- *   | categories (32 bits) | max l cat (32 bits) | max r cat (16 bits) | ...
- *   +----------------------+---------------------+---------------------+
- *
- *   +--------------------------+-------------------------+
- *   | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated
- *   +--------------------------+-------------------------+
- *
- *   +-----------------------------+-----------------------------+
- *   | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated
- *   +-----------------------------+-----------------------------+
- *
- *     levels:         the number of level mappings
- *     max l level:    the highest local level
- *     max r level:    the highest remote/CIPSO level
- *     categories:     the number of category mappings
- *     max l cat:      the highest local category
- *     max r cat:      the highest remote/CIPSO category
- *     local level:    the local part of a level mapping
- *     CIPSO level:    the remote/CIPSO part of a level mapping
- *     local category: the local part of a category mapping
- *     CIPSO category: the remote/CIPSO part of a category mapping
- *
- *   CIPSO_V4_MAP_PASS
- *
- *   No mapping data is needed for this map type.
+ *   If using CIPSO_V4_MAP_STD the following attributes are required:
+ *
+ *     NLBL_CIPSOV4_A_MLSLVLLST
+ *     NLBL_CIPSOV4_A_MLSCATLST
+ *
+ *   If using CIPSO_V4_MAP_PASS no additional attributes are required.
  *
  * o REMOVE:
  *   Sent by an application to remove a specific DOI mapping table from the
- *   CIPSO V4 system.  The kernel should ACK this message.
+ *   CIPSO V4 system.
  *
- *   +---------------+
- *   | DOI (32 bits) |
- *   +---------------+
+ *   Required attributes:
  *
- *     DOI:          the DOI value
+ *     NLBL_CIPSOV4_A_DOI
  *
  * o LIST:
- *   Sent by an application to list the details of a DOI definition.  The
- *   kernel should send an ACK on error or a response as indicated below.  The
- *   application generated message format is shown below.
+ *   Sent by an application to list the details of a DOI definition.  On
+ *   success the kernel should send a response using the following format.
  *
- *   +---------------+
- *   | DOI (32 bits) |
- *   +---------------+
+ *   Required attributes:
  *
- *     DOI:          the DOI value
+ *     NLBL_CIPSOV4_A_DOI
  *
  *   The valid response message format depends on the type of the DOI mapping,
- *   the known formats are shown below.
- *
- *   +--------------------+
- *   | map type (32 bits) | ...
- *   +--------------------+
- *
- *     map type:       the DOI mapping table type (defined in the cipso_ipv4.h
- *                     header as CIPSO_V4_MAP_*)
- *
- *   (map type == CIPSO_V4_MAP_STD)
- *
- *   +----------------+------------------+----------------------+
- *   | tags (32 bits) | levels (32 bits) | categories (32 bits) | ...
- *   +----------------+------------------+----------------------+
+ *   the defined formats are shown below.
  *
- *   +-----------------+
- *   | tag #X (8 bits) | ... repeated
- *   +-----------------+
+ *   Required attributes:
  *
- *   +--------------------------+-------------------------+
- *   | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated
- *   +--------------------------+-------------------------+
+ *     NLBL_CIPSOV4_A_MTYPE
+ *     NLBL_CIPSOV4_A_TAGLST
  *
- *   +-----------------------------+-----------------------------+
- *   | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated
- *   +-----------------------------+-----------------------------+
+ *   If using CIPSO_V4_MAP_STD the following attributes are required:
  *
- *     tags:           the number of CIPSO tag types
- *     levels:         the number of level mappings
- *     categories:     the number of category mappings
- *     tag:            the tag number, tags listed first are given higher
- *                     priority when sending packets
- *     local level:    the local part of a level mapping
- *     CIPSO level:    the remote/CIPSO part of a level mapping
- *     local category: the local part of a category mapping
- *     CIPSO category: the remote/CIPSO part of a category mapping
+ *     NLBL_CIPSOV4_A_MLSLVLLST
+ *     NLBL_CIPSOV4_A_MLSCATLST
  *
- *   (map type == CIPSO_V4_MAP_PASS)
- *
- *   +----------------+
- *   | tags (32 bits) | ...
- *   +----------------+
- *
- *   +-----------------+
- *   | tag #X (8 bits) | ... repeated
- *   +-----------------+
- *
- *     tags:           the number of CIPSO tag types
- *     tag:            the tag number, tags listed first are given higher
- *                     priority when sending packets
+ *   If using CIPSO_V4_MAP_PASS no additional attributes are required.
  *
  * o LISTALL:
  *   This message is sent by an application to list the valid DOIs on the
- *   system.  There is no payload and the kernel should respond with an ACK
- *   or the following message.
- *
- *   +---------------------+------------------+-----------------------+
- *   | DOI count (32 bits) | DOI #X (32 bits) | map type #X (32 bits) |
- *   +---------------------+------------------+-----------------------+
+ *   system.  When sent by an application there is no payload and the
+ *   NLM_F_DUMP flag should be set.  The kernel should respond with a series of
+ *   the following messages.
  *
- *   +-----------------------+
- *   | map type #X (32 bits) | ...
- *   +-----------------------+
+ *   Required attributes:
  *
- *     DOI count:      the number of DOIs
- *     DOI:            the DOI value
- *     map type:       the DOI mapping table type (defined in the cipso_ipv4.h
- *                     header as CIPSO_V4_MAP_*)
+ *    NLBL_CIPSOV4_A_DOI
+ *    NLBL_CIPSOV4_A_MTYPE
  *
  */
 
 /* NetLabel CIPSOv4 commands */
 enum {
 	NLBL_CIPSOV4_C_UNSPEC,
-	NLBL_CIPSOV4_C_ACK,
 	NLBL_CIPSOV4_C_ADD,
 	NLBL_CIPSOV4_C_REMOVE,
 	NLBL_CIPSOV4_C_LIST,
@@ -211,6 +107,59 @@ enum {
 };
 #define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1)
 
+/* NetLabel CIPSOv4 attributes */
+enum {
+	NLBL_CIPSOV4_A_UNSPEC,
+	NLBL_CIPSOV4_A_DOI,
+	/* (NLA_U32)
+	 * the DOI value */
+	NLBL_CIPSOV4_A_MTYPE,
+	/* (NLA_U32)
+	 * the mapping table type (defined in the cipso_ipv4.h header as
+	 * CIPSO_V4_MAP_*) */
+	NLBL_CIPSOV4_A_TAG,
+	/* (NLA_U8)
+	 * a CIPSO tag type, meant to be used within a NLBL_CIPSOV4_A_TAGLST
+	 * attribute */
+	NLBL_CIPSOV4_A_TAGLST,
+	/* (NLA_NESTED)
+	 * the CIPSO tag list for the DOI, there must be at least one
+	 * NLBL_CIPSOV4_A_TAG attribute, tags listed first are given higher
+	 * priorirty when sending packets */
+	NLBL_CIPSOV4_A_MLSLVLLOC,
+	/* (NLA_U32)
+	 * the local MLS sensitivity level */
+	NLBL_CIPSOV4_A_MLSLVLREM,
+	/* (NLA_U32)
+	 * the remote MLS sensitivity level */
+	NLBL_CIPSOV4_A_MLSLVL,
+	/* (NLA_NESTED)
+	 * a MLS sensitivity level mapping, must contain only one attribute of
+	 * each of the following types: NLBL_CIPSOV4_A_MLSLVLLOC and
+	 * NLBL_CIPSOV4_A_MLSLVLREM */
+	NLBL_CIPSOV4_A_MLSLVLLST,
+	/* (NLA_NESTED)
+	 * the CIPSO level mappings, there must be at least one
+	 * NLBL_CIPSOV4_A_MLSLVL attribute */
+	NLBL_CIPSOV4_A_MLSCATLOC,
+	/* (NLA_U32)
+	 * the local MLS category */
+	NLBL_CIPSOV4_A_MLSCATREM,
+	/* (NLA_U32)
+	 * the remote MLS category */
+	NLBL_CIPSOV4_A_MLSCAT,
+	/* (NLA_NESTED)
+	 * a MLS category mapping, must contain only one attribute of each of
+	 * the following types: NLBL_CIPSOV4_A_MLSCATLOC and
+	 * NLBL_CIPSOV4_A_MLSCATREM */
+	NLBL_CIPSOV4_A_MLSCATLST,
+	/* (NLA_NESTED)
+	 * the CIPSO category mappings, there must be at least one
+	 * NLBL_CIPSOV4_A_MLSCAT attribute */
+	__NLBL_CIPSOV4_A_MAX,
+};
+#define NLBL_CIPSOV4_A_MAX (__NLBL_CIPSOV4_A_MAX - 1)
+
 /* NetLabel protocol functions */
 int netlbl_cipsov4_genl_init(void);
 
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 0489a1378101..f56d7a8ac7b7 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -354,160 +354,51 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain)
 }
 
 /**
- * netlbl_domhsh_dump - Dump the domain hash table into a sk_buff
+ * netlbl_domhsh_walk - Iterate through the domain mapping hash table
+ * @skip_bkt: the number of buckets to skip at the start
+ * @skip_chain: the number of entries to skip in the first iterated bucket
+ * @callback: callback for each entry
+ * @cb_arg: argument for the callback function
  *
  * Description:
- * Dump the domain hash table into a buffer suitable for returning to an
- * application in response to a NetLabel management DOMAIN message.  This
- * function may fail if another process is growing the hash table at the same
- * time.  The returned sk_buff has room at the front of the sk_buff for
- * @headroom bytes.  See netlabel.h for the DOMAIN message format.  Returns a
- * pointer to a sk_buff on success, NULL on error.
+ * Interate over the domain mapping hash table, skipping the first @skip_bkt
+ * buckets and @skip_chain entries.  For each entry in the table call
+ * @callback, if @callback returns a negative value stop 'walking' through the
+ * table and return.  Updates the values in @skip_bkt and @skip_chain on
+ * return.  Returns zero on succcess, negative values on failure.
  *
  */
-struct sk_buff *netlbl_domhsh_dump(size_t headroom)
+int netlbl_domhsh_walk(u32 *skip_bkt,
+		     u32 *skip_chain,
+		     int (*callback) (struct netlbl_dom_map *entry, void *arg),
+		     void *cb_arg)
 {
-	struct sk_buff *skb = NULL;
-	ssize_t buf_len;
-	u32 bkt_iter;
-	u32 dom_cnt = 0;
-	struct netlbl_domhsh_tbl *hsh_tbl;
-	struct netlbl_dom_map *list_iter;
-	ssize_t tmp_len;
+	int ret_val = -ENOENT;
+	u32 iter_bkt;
+	struct netlbl_dom_map *iter_entry;
+	u32 chain_cnt = 0;
 
-	buf_len = NETLBL_LEN_U32;
 	rcu_read_lock();
-	hsh_tbl = rcu_dereference(netlbl_domhsh);
-	for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++)
-		list_for_each_entry_rcu(list_iter,
-					&hsh_tbl->tbl[bkt_iter], list) {
-			buf_len += NETLBL_LEN_U32 +
-				nla_total_size(strlen(list_iter->domain) + 1);
-			switch (list_iter->type) {
-			case NETLBL_NLTYPE_UNLABELED:
-				break;
-			case NETLBL_NLTYPE_CIPSOV4:
-				buf_len += 2 * NETLBL_LEN_U32;
-				break;
-			}
-			dom_cnt++;
-		}
-
-	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
-	if (skb == NULL)
-		goto dump_failure;
-
-	if (nla_put_u32(skb, NLA_U32, dom_cnt) != 0)
-		goto dump_failure;
-	buf_len -= NETLBL_LEN_U32;
-	hsh_tbl = rcu_dereference(netlbl_domhsh);
-	for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++)
-		list_for_each_entry_rcu(list_iter,
-					&hsh_tbl->tbl[bkt_iter], list) {
-			tmp_len = nla_total_size(strlen(list_iter->domain) +
-						 1);
-			if (buf_len < NETLBL_LEN_U32 + tmp_len)
-				goto dump_failure;
-			if (nla_put_string(skb,
-					   NLA_STRING,
-					   list_iter->domain) != 0)
-				goto dump_failure;
-			if (nla_put_u32(skb, NLA_U32, list_iter->type) != 0)
-				goto dump_failure;
-			buf_len -= NETLBL_LEN_U32 + tmp_len;
-			switch (list_iter->type) {
-			case NETLBL_NLTYPE_UNLABELED:
-				break;
-			case NETLBL_NLTYPE_CIPSOV4:
-				if (buf_len < 2 * NETLBL_LEN_U32)
-					goto dump_failure;
-				if (nla_put_u32(skb,
-				       NLA_U32,
-				       list_iter->type_def.cipsov4->type) != 0)
-					goto dump_failure;
-				if (nla_put_u32(skb,
-				       NLA_U32,
-				       list_iter->type_def.cipsov4->doi) != 0)
-					goto dump_failure;
-				buf_len -= 2 * NETLBL_LEN_U32;
-				break;
+	for (iter_bkt = *skip_bkt;
+	     iter_bkt < rcu_dereference(netlbl_domhsh)->size;
+	     iter_bkt++, chain_cnt = 0) {
+		list_for_each_entry_rcu(iter_entry,
+					&netlbl_domhsh->tbl[iter_bkt],
+					list)
+			if (iter_entry->valid) {
+				if (chain_cnt++ < *skip_chain)
+					continue;
+				ret_val = callback(iter_entry, cb_arg);
+				if (ret_val < 0) {
+					chain_cnt--;
+					goto walk_return;
+				}
 			}
-		}
-	rcu_read_unlock();
-
-	return skb;
-
-dump_failure:
-	rcu_read_unlock();
-	kfree_skb(skb);
-	return NULL;
-}
-
-/**
- * netlbl_domhsh_dump_default - Dump the default domain mapping into a sk_buff
- *
- * Description:
- * Dump the default domain mapping into a buffer suitable for returning to an
- * application in response to a NetLabel management DEFDOMAIN message.  This
- * function may fail if another process is changing the default domain mapping
- * at the same time.  The returned sk_buff has room at the front of the
- * skb_buff for @headroom bytes.  See netlabel.h for the DEFDOMAIN message
- * format.  Returns a pointer to a sk_buff on success, NULL on error.
- *
- */
-struct sk_buff *netlbl_domhsh_dump_default(size_t headroom)
-{
-	struct sk_buff *skb;
-	ssize_t buf_len;
-	struct netlbl_dom_map *entry;
-
-	buf_len = NETLBL_LEN_U32;
-	rcu_read_lock();
-	entry = rcu_dereference(netlbl_domhsh_def);
-	if (entry != NULL)
-		switch (entry->type) {
-		case NETLBL_NLTYPE_UNLABELED:
-			break;
-		case NETLBL_NLTYPE_CIPSOV4:
-			buf_len += 2 * NETLBL_LEN_U32;
-			break;
-		}
-
-	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
-	if (skb == NULL)
-		goto dump_default_failure;
-
-	if (entry != rcu_dereference(netlbl_domhsh_def))
-		goto dump_default_failure;
-	if (entry != NULL) {
-		if (nla_put_u32(skb, NLA_U32, entry->type) != 0)
-			goto dump_default_failure;
-		buf_len -= NETLBL_LEN_U32;
-		switch (entry->type) {
-		case NETLBL_NLTYPE_UNLABELED:
-			break;
-		case NETLBL_NLTYPE_CIPSOV4:
-			if (buf_len < 2 * NETLBL_LEN_U32)
-				goto dump_default_failure;
-			if (nla_put_u32(skb,
-					NLA_U32,
-					entry->type_def.cipsov4->type) != 0)
-				goto dump_default_failure;
-			if (nla_put_u32(skb,
-					NLA_U32,
-					entry->type_def.cipsov4->doi) != 0)
-				goto dump_default_failure;
-			buf_len -= 2 * NETLBL_LEN_U32;
-			break;
-		}
-	} else
-		nla_put_u32(skb, NLA_U32, NETLBL_NLTYPE_NONE);
-	rcu_read_unlock();
-
-	return skb;
+	}
 
-dump_default_failure:
+walk_return:
 	rcu_read_unlock();
-	kfree_skb(skb);
-	return NULL;
+	*skip_bkt = iter_bkt;
+	*skip_chain = chain_cnt;
+	return ret_val;
 }
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 99a2287de246..02af72a7877c 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -61,7 +61,9 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry);
 int netlbl_domhsh_add_default(struct netlbl_dom_map *entry);
 int netlbl_domhsh_remove_default(void);
 struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
-struct sk_buff *netlbl_domhsh_dump(size_t headroom);
-struct sk_buff *netlbl_domhsh_dump_default(size_t headroom);
+int netlbl_domhsh_walk(u32 *skip_bkt,
+		     u32 *skip_chain,
+		     int (*callback) (struct netlbl_dom_map *entry, void *arg),
+		     void *cb_arg);
 
 #endif
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 0fd8aaafe23f..54fb7de3c2b1 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -85,6 +85,29 @@ socket_setattr_return:
 }
 
 /**
+ * netlbl_sock_getattr - Determine the security attributes of a sock
+ * @sk: the sock
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Examines the given sock to see any NetLabel style labeling has been
+ * applied to the sock, if so it parses the socket label and returns the
+ * security attributes in @secattr.  Returns zero on success, negative values
+ * on failure.
+ *
+ */
+int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+
+	ret_val = cipso_v4_sock_getattr(sk, secattr);
+	if (ret_val == 0)
+		return 0;
+
+	return netlbl_unlabel_getattr(secattr);
+}
+
+/**
  * netlbl_socket_getattr - Determine the security attributes of a socket
  * @sock: the socket
  * @secattr: the security attributes
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 85bc11a1fc46..8626c9f678eb 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -42,15 +42,29 @@
 #include "netlabel_user.h"
 #include "netlabel_mgmt.h"
 
+/* Argument struct for netlbl_domhsh_walk() */
+struct netlbl_domhsh_walk_arg {
+	struct netlink_callback *nl_cb;
+	struct sk_buff *skb;
+	u32 seq;
+};
+
 /* NetLabel Generic NETLINK CIPSOv4 family */
 static struct genl_family netlbl_mgmt_gnl_family = {
 	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
 	.name = NETLBL_NLTYPE_MGMT_NAME,
 	.version = NETLBL_PROTO_VERSION,
-	.maxattr = 0,
+	.maxattr = NLBL_MGMT_A_MAX,
 };
 
+/* NetLabel Netlink attribute policy */
+static struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
+	[NLBL_MGMT_A_DOMAIN] = { .type = NLA_NUL_STRING },
+	[NLBL_MGMT_A_PROTOCOL] = { .type = NLA_U32 },
+	[NLBL_MGMT_A_VERSION] = { .type = NLA_U32 },
+	[NLBL_MGMT_A_CV4DOI] = { .type = NLA_U32 },
+};
 
 /*
  * NetLabel Command Handlers
@@ -70,97 +84,62 @@ static struct genl_family netlbl_mgmt_gnl_family = {
 static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
 {
 	int ret_val = -EINVAL;
-	struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
-	int msg_len = netlbl_netlink_payload_len(skb);
-	u32 count;
 	struct netlbl_dom_map *entry = NULL;
-	u32 iter;
+	size_t tmp_size;
 	u32 tmp_val;
-	int tmp_size;
 
-	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
-	if (ret_val != 0)
+	if (!info->attrs[NLBL_MGMT_A_DOMAIN] ||
+	    !info->attrs[NLBL_MGMT_A_PROTOCOL])
 		goto add_failure;
 
-	if (msg_len < NETLBL_LEN_U32)
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (entry == NULL) {
+		ret_val = -ENOMEM;
+		goto add_failure;
+	}
+	tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]);
+	entry->domain = kmalloc(tmp_size, GFP_KERNEL);
+	if (entry->domain == NULL) {
+		ret_val = -ENOMEM;
 		goto add_failure;
-	count = netlbl_getinc_u32(&msg_ptr, &msg_len);
+	}
+	entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
+	nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size);
 
-	for (iter = 0; iter < count && msg_len > 0; iter++, entry = NULL) {
-		if (msg_len <= 0) {
-			ret_val = -EINVAL;
-			goto add_failure;
-		}
-		entry = kzalloc(sizeof(*entry), GFP_KERNEL);
-		if (entry == NULL) {
-			ret_val = -ENOMEM;
-			goto add_failure;
-		}
-		tmp_size = nla_len(msg_ptr);
-		if (tmp_size <= 0 || tmp_size > msg_len) {
-			ret_val = -EINVAL;
-			goto add_failure;
-		}
-		entry->domain = kmalloc(tmp_size, GFP_KERNEL);
-		if (entry->domain == NULL) {
-			ret_val = -ENOMEM;
+	switch (entry->type) {
+	case NETLBL_NLTYPE_UNLABELED:
+		ret_val = netlbl_domhsh_add(entry);
+		break;
+	case NETLBL_NLTYPE_CIPSOV4:
+		if (!info->attrs[NLBL_MGMT_A_CV4DOI])
 			goto add_failure;
-		}
-		nla_strlcpy(entry->domain, msg_ptr, tmp_size);
-		entry->domain[tmp_size - 1] = '\0';
-		msg_ptr = nla_next(msg_ptr, &msg_len);
 
-		if (msg_len < NETLBL_LEN_U32) {
-			ret_val = -EINVAL;
-			goto add_failure;
-		}
-		tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
-		entry->type = tmp_val;
-		switch (tmp_val) {
-		case NETLBL_NLTYPE_UNLABELED:
-			ret_val = netlbl_domhsh_add(entry);
-			break;
-		case NETLBL_NLTYPE_CIPSOV4:
-			if (msg_len < NETLBL_LEN_U32) {
-				ret_val = -EINVAL;
-				goto add_failure;
-			}
-			tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
-			/* We should be holding a rcu_read_lock() here
-			 * while we hold the result but since the entry
-			 * will always be deleted when the CIPSO DOI
-			 * is deleted we aren't going to keep the lock. */
-			rcu_read_lock();
-			entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
-			if (entry->type_def.cipsov4 == NULL) {
-				rcu_read_unlock();
-				ret_val = -EINVAL;
-				goto add_failure;
-			}
-			ret_val = netlbl_domhsh_add(entry);
+		tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
+		/* We should be holding a rcu_read_lock() here while we hold
+		 * the result but since the entry will always be deleted when
+		 * the CIPSO DOI is deleted we aren't going to keep the
+		 * lock. */
+		rcu_read_lock();
+		entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
+		if (entry->type_def.cipsov4 == NULL) {
 			rcu_read_unlock();
-			break;
-		default:
-			ret_val = -EINVAL;
-		}
-		if (ret_val != 0)
 			goto add_failure;
+		}
+		ret_val = netlbl_domhsh_add(entry);
+		rcu_read_unlock();
+		break;
+	default:
+		goto add_failure;
 	}
+	if (ret_val != 0)
+		goto add_failure;
 
-	netlbl_netlink_send_ack(info,
-				netlbl_mgmt_gnl_family.id,
-				NLBL_MGMT_C_ACK,
-				NETLBL_E_OK);
 	return 0;
 
 add_failure:
 	if (entry)
 		kfree(entry->domain);
 	kfree(entry);
-	netlbl_netlink_send_ack(info,
-				netlbl_mgmt_gnl_family.id,
-				NLBL_MGMT_C_ACK,
-				-ret_val);
 	return ret_val;
 }
 
@@ -176,87 +155,98 @@ add_failure:
  */
 static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info)
 {
-	int ret_val = -EINVAL;
-	struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
-	int msg_len = netlbl_netlink_payload_len(skb);
-	u32 count;
-	u32 iter;
-	int tmp_size;
-	unsigned char *domain;
-
-	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
-	if (ret_val != 0)
-		goto remove_return;
+	char *domain;
 
-	if (msg_len < NETLBL_LEN_U32)
-		goto remove_return;
-	count = netlbl_getinc_u32(&msg_ptr, &msg_len);
+	if (!info->attrs[NLBL_MGMT_A_DOMAIN])
+		return -EINVAL;
 
-	for (iter = 0; iter < count && msg_len > 0; iter++) {
-		if (msg_len <= 0) {
-			ret_val = -EINVAL;
-			goto remove_return;
-		}
-		tmp_size = nla_len(msg_ptr);
-		domain = nla_data(msg_ptr);
-		if (tmp_size <= 0 || tmp_size > msg_len ||
-		    domain[tmp_size - 1] != '\0') {
-			ret_val = -EINVAL;
-			goto remove_return;
-		}
-		ret_val = netlbl_domhsh_remove(domain);
+	domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]);
+	return netlbl_domhsh_remove(domain);
+}
+
+/**
+ * netlbl_mgmt_listall_cb - netlbl_domhsh_walk() callback for LISTALL
+ * @entry: the domain mapping hash table entry
+ * @arg: the netlbl_domhsh_walk_arg structure
+ *
+ * Description:
+ * This function is designed to be used as a callback to the
+ * netlbl_domhsh_walk() function for use in generating a response for a LISTALL
+ * message.  Returns the size of the message on success, negative values on
+ * failure.
+ *
+ */
+static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg)
+{
+	int ret_val = -ENOMEM;
+	struct netlbl_domhsh_walk_arg *cb_arg = arg;
+	void *data;
+
+	data = netlbl_netlink_hdr_put(cb_arg->skb,
+				      NETLINK_CB(cb_arg->nl_cb->skb).pid,
+				      cb_arg->seq,
+				      netlbl_mgmt_gnl_family.id,
+				      NLM_F_MULTI,
+				      NLBL_MGMT_C_LISTALL);
+	if (data == NULL)
+		goto listall_cb_failure;
+
+	ret_val = nla_put_string(cb_arg->skb,
+				 NLBL_MGMT_A_DOMAIN,
+				 entry->domain);
+	if (ret_val != 0)
+		goto listall_cb_failure;
+	ret_val = nla_put_u32(cb_arg->skb, NLBL_MGMT_A_PROTOCOL, entry->type);
+	if (ret_val != 0)
+		goto listall_cb_failure;
+	switch (entry->type) {
+	case NETLBL_NLTYPE_CIPSOV4:
+		ret_val = nla_put_u32(cb_arg->skb,
+				      NLBL_MGMT_A_CV4DOI,
+				      entry->type_def.cipsov4->doi);
 		if (ret_val != 0)
-			goto remove_return;
-		msg_ptr = nla_next(msg_ptr, &msg_len);
+			goto listall_cb_failure;
+		break;
 	}
 
-	ret_val = 0;
+	cb_arg->seq++;
+	return genlmsg_end(cb_arg->skb, data);
 
-remove_return:
-	netlbl_netlink_send_ack(info,
-				netlbl_mgmt_gnl_family.id,
-				NLBL_MGMT_C_ACK,
-				-ret_val);
+listall_cb_failure:
+	genlmsg_cancel(cb_arg->skb, data);
 	return ret_val;
 }
 
 /**
- * netlbl_mgmt_list - Handle a LIST message
+ * netlbl_mgmt_listall - Handle a LISTALL message
  * @skb: the NETLINK buffer
- * @info: the Generic NETLINK info block
+ * @cb: the NETLINK callback
  *
  * Description:
- * Process a user generated LIST message and dumps the domain hash table in a
- * form suitable for use in a kernel generated LIST message.  Returns zero on
- * success, negative values on failure.
+ * Process a user generated LISTALL message and dumps the domain hash table in
+ * a form suitable for use in a kernel generated LISTALL message.  Returns zero
+ * on success, negative values on failure.
  *
  */
-static int netlbl_mgmt_list(struct sk_buff *skb, struct genl_info *info)
+static int netlbl_mgmt_listall(struct sk_buff *skb,
+			       struct netlink_callback *cb)
 {
-	int ret_val = -ENOMEM;
-	struct sk_buff *ans_skb;
-
-	ans_skb = netlbl_domhsh_dump(NLMSG_SPACE(GENL_HDRLEN));
-	if (ans_skb == NULL)
-		goto list_failure;
-	netlbl_netlink_hdr_push(ans_skb,
-				info->snd_pid,
-				0,
-				netlbl_mgmt_gnl_family.id,
-				NLBL_MGMT_C_LIST);
-
-	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
-	if (ret_val != 0)
-		goto list_failure;
-
-	return 0;
-
-list_failure:
-	netlbl_netlink_send_ack(info,
-				netlbl_mgmt_gnl_family.id,
-				NLBL_MGMT_C_ACK,
-				-ret_val);
-	return ret_val;
+	struct netlbl_domhsh_walk_arg cb_arg;
+	u32 skip_bkt = cb->args[0];
+	u32 skip_chain = cb->args[1];
+
+	cb_arg.nl_cb = cb;
+	cb_arg.skb = skb;
+	cb_arg.seq = cb->nlh->nlmsg_seq;
+
+	netlbl_domhsh_walk(&skip_bkt,
+			   &skip_chain,
+			   netlbl_mgmt_listall_cb,
+			   &cb_arg);
+
+	cb->args[0] = skip_bkt;
+	cb->args[1] = skip_chain;
+	return skb->len;
 }
 
 /**
@@ -272,68 +262,51 @@ list_failure:
 static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
 {
 	int ret_val = -EINVAL;
-	struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
-	int msg_len = netlbl_netlink_payload_len(skb);
 	struct netlbl_dom_map *entry = NULL;
 	u32 tmp_val;
 
-	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
-	if (ret_val != 0)
-		goto adddef_failure;
-
-	if (msg_len < NETLBL_LEN_U32)
+	if (!info->attrs[NLBL_MGMT_A_PROTOCOL])
 		goto adddef_failure;
-	tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
 
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 	if (entry == NULL) {
 		ret_val = -ENOMEM;
 		goto adddef_failure;
 	}
+	entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
 
-	entry->type = tmp_val;
 	switch (entry->type) {
 	case NETLBL_NLTYPE_UNLABELED:
 		ret_val = netlbl_domhsh_add_default(entry);
 		break;
 	case NETLBL_NLTYPE_CIPSOV4:
-		if (msg_len < NETLBL_LEN_U32) {
-			ret_val = -EINVAL;
+		if (!info->attrs[NLBL_MGMT_A_CV4DOI])
 			goto adddef_failure;
-		}
-		tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
-		/* We should be holding a rcu_read_lock here while we
-		 * hold the result but since the entry will always be
-		 * deleted when the CIPSO DOI is deleted we are going
-		 * to skip the lock. */
+
+		tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
+		/* We should be holding a rcu_read_lock() here while we hold
+		 * the result but since the entry will always be deleted when
+		 * the CIPSO DOI is deleted we aren't going to keep the
+		 * lock. */
 		rcu_read_lock();
 		entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
 		if (entry->type_def.cipsov4 == NULL) {
 			rcu_read_unlock();
-			ret_val = -EINVAL;
 			goto adddef_failure;
 		}
 		ret_val = netlbl_domhsh_add_default(entry);
 		rcu_read_unlock();
 		break;
 	default:
-		ret_val = -EINVAL;
+		goto adddef_failure;
 	}
 	if (ret_val != 0)
 		goto adddef_failure;
 
-	netlbl_netlink_send_ack(info,
-				netlbl_mgmt_gnl_family.id,
-				NLBL_MGMT_C_ACK,
-				NETLBL_E_OK);
 	return 0;
 
 adddef_failure:
 	kfree(entry);
-	netlbl_netlink_send_ack(info,
-				netlbl_mgmt_gnl_family.id,
-				NLBL_MGMT_C_ACK,
-				-ret_val);
 	return ret_val;
 }
 
@@ -349,20 +322,7 @@ adddef_failure:
  */
 static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info)
 {
-	int ret_val;
-
-	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
-	if (ret_val != 0)
-		goto removedef_return;
-
-	ret_val = netlbl_domhsh_remove_default();
-
-removedef_return:
-	netlbl_netlink_send_ack(info,
-				netlbl_mgmt_gnl_family.id,
-				NLBL_MGMT_C_ACK,
-				-ret_val);
-	return ret_val;
+	return netlbl_domhsh_remove_default();
 }
 
 /**
@@ -379,88 +339,131 @@ removedef_return:
 static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
 {
 	int ret_val = -ENOMEM;
-	struct sk_buff *ans_skb;
+	struct sk_buff *ans_skb = NULL;
+	void *data;
+	struct netlbl_dom_map *entry;
 
-	ans_skb = netlbl_domhsh_dump_default(NLMSG_SPACE(GENL_HDRLEN));
+	ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (ans_skb == NULL)
+		return -ENOMEM;
+	data = netlbl_netlink_hdr_put(ans_skb,
+				      info->snd_pid,
+				      info->snd_seq,
+				      netlbl_mgmt_gnl_family.id,
+				      0,
+				      NLBL_MGMT_C_LISTDEF);
+	if (data == NULL)
 		goto listdef_failure;
-	netlbl_netlink_hdr_push(ans_skb,
-				info->snd_pid,
-				0,
-				netlbl_mgmt_gnl_family.id,
-				NLBL_MGMT_C_LISTDEF);
 
-	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	rcu_read_lock();
+	entry = netlbl_domhsh_getentry(NULL);
+	if (entry == NULL) {
+		ret_val = -ENOENT;
+		goto listdef_failure_lock;
+	}
+	ret_val = nla_put_u32(ans_skb, NLBL_MGMT_A_PROTOCOL, entry->type);
 	if (ret_val != 0)
-		goto listdef_failure;
+		goto listdef_failure_lock;
+	switch (entry->type) {
+	case NETLBL_NLTYPE_CIPSOV4:
+		ret_val = nla_put_u32(ans_skb,
+				      NLBL_MGMT_A_CV4DOI,
+				      entry->type_def.cipsov4->doi);
+		if (ret_val != 0)
+			goto listdef_failure_lock;
+		break;
+	}
+	rcu_read_unlock();
 
+	genlmsg_end(ans_skb, data);
+
+	ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto listdef_failure;
 	return 0;
 
+listdef_failure_lock:
+	rcu_read_unlock();
 listdef_failure:
-	netlbl_netlink_send_ack(info,
-				netlbl_mgmt_gnl_family.id,
-				NLBL_MGMT_C_ACK,
-				-ret_val);
+	kfree_skb(ans_skb);
 	return ret_val;
 }
 
 /**
- * netlbl_mgmt_modules - Handle a MODULES message
- * @skb: the NETLINK buffer
- * @info: the Generic NETLINK info block
+ * netlbl_mgmt_protocols_cb - Write an individual PROTOCOL message response
+ * @skb: the skb to write to
+ * @seq: the NETLINK sequence number
+ * @cb: the NETLINK callback
+ * @protocol: the NetLabel protocol to use in the message
  *
  * Description:
- * Process a user generated MODULES message and respond accordingly.
+ * This function is to be used in conjunction with netlbl_mgmt_protocols() to
+ * answer a application's PROTOCOLS message.  Returns the size of the message
+ * on success, negative values on failure.
  *
  */
-static int netlbl_mgmt_modules(struct sk_buff *skb, struct genl_info *info)
+static int netlbl_mgmt_protocols_cb(struct sk_buff *skb,
+				    struct netlink_callback *cb,
+				    u32 protocol)
 {
 	int ret_val = -ENOMEM;
-	size_t data_size;
-	u32 mod_count;
-	struct sk_buff *ans_skb = NULL;
-
-	/* unlabeled + cipsov4 */
-	mod_count = 2;
-
-	data_size = GENL_HDRLEN + NETLBL_LEN_U32 + mod_count * NETLBL_LEN_U32;
-	ans_skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL);
-	if (ans_skb == NULL)
-		goto modules_failure;
-
-	if (netlbl_netlink_hdr_put(ans_skb,
-				   info->snd_pid,
-				   0,
-				   netlbl_mgmt_gnl_family.id,
-				   NLBL_MGMT_C_MODULES) == NULL)
-		goto modules_failure;
-
-	ret_val = nla_put_u32(ans_skb, NLA_U32, mod_count);
-	if (ret_val != 0)
-		goto modules_failure;
-	ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_UNLABELED);
+	void *data;
+
+	data = netlbl_netlink_hdr_put(skb,
+				      NETLINK_CB(cb->skb).pid,
+				      cb->nlh->nlmsg_seq,
+				      netlbl_mgmt_gnl_family.id,
+				      NLM_F_MULTI,
+				      NLBL_MGMT_C_PROTOCOLS);
+	if (data == NULL)
+		goto protocols_cb_failure;
+
+	ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, protocol);
 	if (ret_val != 0)
-		goto modules_failure;
-	ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_CIPSOV4);
-	if (ret_val != 0)
-		goto modules_failure;
-
-	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
-	if (ret_val != 0)
-		goto modules_failure;
+		goto protocols_cb_failure;
 
-	return 0;
+	return genlmsg_end(skb, data);
 
-modules_failure:
-	kfree_skb(ans_skb);
-	netlbl_netlink_send_ack(info,
-				netlbl_mgmt_gnl_family.id,
-				NLBL_MGMT_C_ACK,
-				-ret_val);
+protocols_cb_failure:
+	genlmsg_cancel(skb, data);
 	return ret_val;
 }
 
 /**
+ * netlbl_mgmt_protocols - Handle a PROTOCOLS message
+ * @skb: the NETLINK buffer
+ * @cb: the NETLINK callback
+ *
+ * Description:
+ * Process a user generated PROTOCOLS message and respond accordingly.
+ *
+ */
+static int netlbl_mgmt_protocols(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	u32 protos_sent = cb->args[0];
+
+	if (protos_sent == 0) {
+		if (netlbl_mgmt_protocols_cb(skb,
+					     cb,
+					     NETLBL_NLTYPE_UNLABELED) < 0)
+			goto protocols_return;
+		protos_sent++;
+	}
+	if (protos_sent == 1) {
+		if (netlbl_mgmt_protocols_cb(skb,
+					     cb,
+					     NETLBL_NLTYPE_CIPSOV4) < 0)
+			goto protocols_return;
+		protos_sent++;
+	}
+
+protocols_return:
+	cb->args[0] = protos_sent;
+	return skb->len;
+}
+
+/**
  * netlbl_mgmt_version - Handle a VERSION message
  * @skb: the NETLINK buffer
  * @info: the Generic NETLINK info block
@@ -474,35 +477,35 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info)
 {
 	int ret_val = -ENOMEM;
 	struct sk_buff *ans_skb = NULL;
+	void *data;
 
-	ans_skb = netlbl_netlink_alloc_skb(0,
-					   GENL_HDRLEN + NETLBL_LEN_U32,
-					   GFP_KERNEL);
+	ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (ans_skb == NULL)
-		goto version_failure;
-	if (netlbl_netlink_hdr_put(ans_skb,
-				   info->snd_pid,
-				   0,
-				   netlbl_mgmt_gnl_family.id,
-				   NLBL_MGMT_C_VERSION) == NULL)
+		return -ENOMEM;
+	data = netlbl_netlink_hdr_put(ans_skb,
+				      info->snd_pid,
+				      info->snd_seq,
+				      netlbl_mgmt_gnl_family.id,
+				      0,
+				      NLBL_MGMT_C_VERSION);
+	if (data == NULL)
 		goto version_failure;
 
-	ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_PROTO_VERSION);
+	ret_val = nla_put_u32(ans_skb,
+			      NLBL_MGMT_A_VERSION,
+			      NETLBL_PROTO_VERSION);
 	if (ret_val != 0)
 		goto version_failure;
 
-	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	genlmsg_end(ans_skb, data);
+
+	ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
 	if (ret_val != 0)
 		goto version_failure;
-
 	return 0;
 
 version_failure:
 	kfree_skb(ans_skb);
-	netlbl_netlink_send_ack(info,
-				netlbl_mgmt_gnl_family.id,
-				NLBL_MGMT_C_ACK,
-				-ret_val);
 	return ret_val;
 }
 
@@ -513,35 +516,40 @@ version_failure:
 
 static struct genl_ops netlbl_mgmt_genl_c_add = {
 	.cmd = NLBL_MGMT_C_ADD,
-	.flags = 0,
+	.flags = GENL_ADMIN_PERM,
+	.policy = netlbl_mgmt_genl_policy,
 	.doit = netlbl_mgmt_add,
 	.dumpit = NULL,
 };
 
 static struct genl_ops netlbl_mgmt_genl_c_remove = {
 	.cmd = NLBL_MGMT_C_REMOVE,
-	.flags = 0,
+	.flags = GENL_ADMIN_PERM,
+	.policy = netlbl_mgmt_genl_policy,
 	.doit = netlbl_mgmt_remove,
 	.dumpit = NULL,
 };
 
-static struct genl_ops netlbl_mgmt_genl_c_list = {
-	.cmd = NLBL_MGMT_C_LIST,
+static struct genl_ops netlbl_mgmt_genl_c_listall = {
+	.cmd = NLBL_MGMT_C_LISTALL,
 	.flags = 0,
-	.doit = netlbl_mgmt_list,
-	.dumpit = NULL,
+	.policy = netlbl_mgmt_genl_policy,
+	.doit = NULL,
+	.dumpit = netlbl_mgmt_listall,
 };
 
 static struct genl_ops netlbl_mgmt_genl_c_adddef = {
 	.cmd = NLBL_MGMT_C_ADDDEF,
-	.flags = 0,
+	.flags = GENL_ADMIN_PERM,
+	.policy = netlbl_mgmt_genl_policy,
 	.doit = netlbl_mgmt_adddef,
 	.dumpit = NULL,
 };
 
 static struct genl_ops netlbl_mgmt_genl_c_removedef = {
 	.cmd = NLBL_MGMT_C_REMOVEDEF,
-	.flags = 0,
+	.flags = GENL_ADMIN_PERM,
+	.policy = netlbl_mgmt_genl_policy,
 	.doit = netlbl_mgmt_removedef,
 	.dumpit = NULL,
 };
@@ -549,20 +557,23 @@ static struct genl_ops netlbl_mgmt_genl_c_removedef = {
 static struct genl_ops netlbl_mgmt_genl_c_listdef = {
 	.cmd = NLBL_MGMT_C_LISTDEF,
 	.flags = 0,
+	.policy = netlbl_mgmt_genl_policy,
 	.doit = netlbl_mgmt_listdef,
 	.dumpit = NULL,
 };
 
-static struct genl_ops netlbl_mgmt_genl_c_modules = {
-	.cmd = NLBL_MGMT_C_MODULES,
+static struct genl_ops netlbl_mgmt_genl_c_protocols = {
+	.cmd = NLBL_MGMT_C_PROTOCOLS,
 	.flags = 0,
-	.doit = netlbl_mgmt_modules,
-	.dumpit = NULL,
+	.policy = netlbl_mgmt_genl_policy,
+	.doit = NULL,
+	.dumpit = netlbl_mgmt_protocols,
 };
 
 static struct genl_ops netlbl_mgmt_genl_c_version = {
 	.cmd = NLBL_MGMT_C_VERSION,
 	.flags = 0,
+	.policy = netlbl_mgmt_genl_policy,
 	.doit = netlbl_mgmt_version,
 	.dumpit = NULL,
 };
@@ -596,7 +607,7 @@ int netlbl_mgmt_genl_init(void)
 	if (ret_val != 0)
 		return ret_val;
 	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
-				    &netlbl_mgmt_genl_c_list);
+				    &netlbl_mgmt_genl_c_listall);
 	if (ret_val != 0)
 		return ret_val;
 	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
@@ -612,7 +623,7 @@ int netlbl_mgmt_genl_init(void)
 	if (ret_val != 0)
 		return ret_val;
 	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
-				    &netlbl_mgmt_genl_c_modules);
+				    &netlbl_mgmt_genl_c_protocols);
 	if (ret_val != 0)
 		return ret_val;
 	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
index fd6c6acbfa08..3642d3bfc8eb 100644
--- a/net/netlabel/netlabel_mgmt.h
+++ b/net/netlabel/netlabel_mgmt.h
@@ -34,212 +34,137 @@
 #include <net/netlabel.h>
 
 /*
- * The following NetLabel payloads are supported by the management interface,
- * all of which are preceeded by the nlmsghdr struct.
- *
- * o ACK:
- *   Sent by the kernel in response to an applications message, applications
- *   should never send this message.
- *
- *   +----------------------+-----------------------+
- *   | seq number (32 bits) | return code (32 bits) |
- *   +----------------------+-----------------------+
- *
- *     seq number:  the sequence number of the original message, taken from the
- *                  nlmsghdr structure
- *     return code: return value, based on errno values
+ * The following NetLabel payloads are supported by the management interface.
  *
  * o ADD:
  *   Sent by an application to add a domain mapping to the NetLabel system.
- *   The kernel should respond with an ACK.
- *
- *   +-------------------+
- *   | domains (32 bits) | ...
- *   +-------------------+
- *
- *     domains: the number of domains in the message
- *
- *   +--------------------------+-------------------------+
- *   | domain string (variable) | protocol type (32 bits) | ...
- *   +--------------------------+-------------------------+
  *
- *   +-------------- ---- --- -- -
- *   | mapping data                ... repeated
- *   +-------------- ---- --- -- -
+ *   Required attributes:
  *
- *     domain string: the domain string, NULL terminated
- *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
- *     mapping data:  specific to the map type (see below)
+ *     NLBL_MGMT_A_DOMAIN
+ *     NLBL_MGMT_A_PROTOCOL
  *
- *   NETLBL_NLTYPE_UNLABELED
+ *   If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
  *
- *     No mapping data for this protocol type.
+ *     NLBL_MGMT_A_CV4DOI
  *
- *   NETLBL_NLTYPE_CIPSOV4
- *
- *   +---------------+
- *   | doi (32 bits) |
- *   +---------------+
- *
- *     doi:  the CIPSO DOI value
+ *   If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
  *
  * o REMOVE:
  *   Sent by an application to remove a domain mapping from the NetLabel
- *   system.  The kernel should ACK this message.
- *
- *   +-------------------+
- *   | domains (32 bits) | ...
- *   +-------------------+
+ *   system.
  *
- *     domains: the number of domains in the message
+ *   Required attributes:
  *
- *   +--------------------------+
- *   | domain string (variable) | ...
- *   +--------------------------+
+ *     NLBL_MGMT_A_DOMAIN
  *
- *     domain string: the domain string, NULL terminated
- *
- * o LIST:
+ * o LISTALL:
  *   This message can be sent either from an application or by the kernel in
- *   response to an application generated LIST message.  When sent by an
- *   application there is no payload.  The kernel should respond to a LIST
- *   message either with a LIST message on success or an ACK message on
- *   failure.
- *
- *   +-------------------+
- *   | domains (32 bits) | ...
- *   +-------------------+
- *
- *     domains: the number of domains in the message
+ *   response to an application generated LISTALL message.  When sent by an
+ *   application there is no payload and the NLM_F_DUMP flag should be set.
+ *   The kernel should respond with a series of the following messages.
  *
- *   +--------------------------+
- *   | domain string (variable) | ...
- *   +--------------------------+
+ *   Required attributes:
  *
- *   +-------------------------+-------------- ---- --- -- -
- *   | protocol type (32 bits) | mapping data                ... repeated
- *   +-------------------------+-------------- ---- --- -- -
+ *     NLBL_MGMT_A_DOMAIN
+ *     NLBL_MGMT_A_PROTOCOL
  *
- *     domain string: the domain string, NULL terminated
- *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
- *     mapping data:  specific to the map type (see below)
+ *   If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
  *
- *   NETLBL_NLTYPE_UNLABELED
+ *     NLBL_MGMT_A_CV4DOI
  *
- *     No mapping data for this protocol type.
- *
- *   NETLBL_NLTYPE_CIPSOV4
- *
- *   +----------------+---------------+
- *   | type (32 bits) | doi (32 bits) |
- *   +----------------+---------------+
- *
- *     type: the CIPSO mapping table type (defined in the cipso_ipv4.h header
- *           as CIPSO_V4_MAP_*)
- *     doi:  the CIPSO DOI value
+ *   If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
  *
  * o ADDDEF:
  *   Sent by an application to set the default domain mapping for the NetLabel
- *   system.  The kernel should respond with an ACK.
+ *   system.
  *
- *   +-------------------------+-------------- ---- --- -- -
- *   | protocol type (32 bits) | mapping data                ... repeated
- *   +-------------------------+-------------- ---- --- -- -
+ *   Required attributes:
  *
- *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
- *     mapping data:  specific to the map type (see below)
+ *     NLBL_MGMT_A_PROTOCOL
  *
- *   NETLBL_NLTYPE_UNLABELED
+ *   If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
  *
- *     No mapping data for this protocol type.
+ *     NLBL_MGMT_A_CV4DOI
  *
- *   NETLBL_NLTYPE_CIPSOV4
- *
- *   +---------------+
- *   | doi (32 bits) |
- *   +---------------+
- *
- *     doi:  the CIPSO DOI value
+ *   If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
  *
  * o REMOVEDEF:
  *   Sent by an application to remove the default domain mapping from the
- *   NetLabel system, there is no payload.  The kernel should ACK this message.
+ *   NetLabel system, there is no payload.
  *
  * o LISTDEF:
  *   This message can be sent either from an application or by the kernel in
  *   response to an application generated LISTDEF message.  When sent by an
- *   application there is no payload.  The kernel should respond to a
- *   LISTDEF message either with a LISTDEF message on success or an ACK message
- *   on failure.
- *
- *   +-------------------------+-------------- ---- --- -- -
- *   | protocol type (32 bits) | mapping data                ... repeated
- *   +-------------------------+-------------- ---- --- -- -
+ *   application there is no payload.  On success the kernel should send a
+ *   response using the following format.
  *
- *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
- *     mapping data:  specific to the map type (see below)
+ *   Required attributes:
  *
- *   NETLBL_NLTYPE_UNLABELED
+ *     NLBL_MGMT_A_PROTOCOL
  *
- *     No mapping data for this protocol type.
+ *   If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required:
  *
- *   NETLBL_NLTYPE_CIPSOV4
+ *     NLBL_MGMT_A_CV4DOI
  *
- *   +----------------+---------------+
- *   | type (32 bits) | doi (32 bits) |
- *   +----------------+---------------+
+ *   If using NETLBL_NLTYPE_UNLABELED no other attributes are required.
  *
- *     type: the CIPSO mapping table type (defined in the cipso_ipv4.h header
- *           as CIPSO_V4_MAP_*)
- *     doi:  the CIPSO DOI value
+ * o PROTOCOLS:
+ *   Sent by an application to request a list of configured NetLabel protocols
+ *   in the kernel.  When sent by an application there is no payload and the
+ *   NLM_F_DUMP flag should be set.  The kernel should respond with a series of
+ *   the following messages.
  *
- * o MODULES:
- *   Sent by an application to request a list of configured NetLabel modules
- *   in the kernel.  When sent by an application there is no payload.
+ *   Required attributes:
  *
- *   +-------------------+
- *   | modules (32 bits) | ...
- *   +-------------------+
- *
- *     modules: the number of modules in the message, if this is an application
- *              generated message and the value is zero then return a list of
- *              the configured modules
- *
- *   +------------------+
- *   | module (32 bits) | ... repeated
- *   +------------------+
- *
- *     module: the module number as defined by NETLBL_NLTYPE_*
+ *     NLBL_MGMT_A_PROTOCOL
  *
  * o VERSION:
- *   Sent by an application to request the NetLabel version string.  When sent
- *   by an application there is no payload.  This message type is also used by
- *   the kernel to respond to an VERSION request.
+ *   Sent by an application to request the NetLabel version.  When sent by an
+ *   application there is no payload.  This message type is also used by the
+ *   kernel to respond to an VERSION request.
  *
- *   +-------------------+
- *   | version (32 bits) |
- *   +-------------------+
+ *   Required attributes:
  *
- *     version: the protocol version number
+ *     NLBL_MGMT_A_VERSION
  *
  */
 
 /* NetLabel Management commands */
 enum {
 	NLBL_MGMT_C_UNSPEC,
-	NLBL_MGMT_C_ACK,
 	NLBL_MGMT_C_ADD,
 	NLBL_MGMT_C_REMOVE,
-	NLBL_MGMT_C_LIST,
+	NLBL_MGMT_C_LISTALL,
 	NLBL_MGMT_C_ADDDEF,
 	NLBL_MGMT_C_REMOVEDEF,
 	NLBL_MGMT_C_LISTDEF,
-	NLBL_MGMT_C_MODULES,
+	NLBL_MGMT_C_PROTOCOLS,
 	NLBL_MGMT_C_VERSION,
 	__NLBL_MGMT_C_MAX,
 };
 #define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1)
 
+/* NetLabel Management attributes */
+enum {
+	NLBL_MGMT_A_UNSPEC,
+	NLBL_MGMT_A_DOMAIN,
+	/* (NLA_NUL_STRING)
+	 * the NULL terminated LSM domain string */
+	NLBL_MGMT_A_PROTOCOL,
+	/* (NLA_U32)
+	 * the NetLabel protocol type (defined by NETLBL_NLTYPE_*) */
+	NLBL_MGMT_A_VERSION,
+	/* (NLA_U32)
+	 * the NetLabel protocol version number (defined by
+	 * NETLBL_PROTO_VERSION) */
+	NLBL_MGMT_A_CV4DOI,
+	/* (NLA_U32)
+	 * the CIPSOv4 DOI value */
+	__NLBL_MGMT_A_MAX,
+};
+#define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1)
+
 /* NetLabel protocol functions */
 int netlbl_mgmt_genl_init(void);
 
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 785f4960e0d3..440f5c4e1e2d 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -55,9 +55,13 @@ static struct genl_family netlbl_unlabel_gnl_family = {
 	.hdrsize = 0,
 	.name = NETLBL_NLTYPE_UNLABELED_NAME,
 	.version = NETLBL_PROTO_VERSION,
-	.maxattr = 0,
+	.maxattr = NLBL_UNLABEL_A_MAX,
 };
 
+/* NetLabel Netlink attribute policy */
+static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
+	[NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 },
+};
 
 /*
  * NetLabel Command Handlers
@@ -75,31 +79,18 @@ static struct genl_family netlbl_unlabel_gnl_family = {
  */
 static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
 {
-	int ret_val;
-	struct nlattr *data = netlbl_netlink_payload_data(skb);
-	u32 value;
-
-	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
-	if (ret_val != 0)
-		return ret_val;
+	int ret_val = -EINVAL;
+	u8 value;
 
-	if (netlbl_netlink_payload_len(skb) == NETLBL_LEN_U32) {
-		value = nla_get_u32(data);
+	if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) {
+		value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]);
 		if (value == 1 || value == 0) {
 			atomic_set(&netlabel_unlabel_accept_flg, value);
-			netlbl_netlink_send_ack(info,
-						netlbl_unlabel_gnl_family.id,
-						NLBL_UNLABEL_C_ACK,
-						NETLBL_E_OK);
-			return 0;
+			ret_val = 0;
 		}
 	}
 
-	netlbl_netlink_send_ack(info,
-				netlbl_unlabel_gnl_family.id,
-				NLBL_UNLABEL_C_ACK,
-				EINVAL);
-	return -EINVAL;
+	return ret_val;
 }
 
 /**
@@ -114,39 +105,39 @@ static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
  */
 static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info)
 {
-	int ret_val = -ENOMEM;
+	int ret_val = -EINVAL;
 	struct sk_buff *ans_skb;
+	void *data;
 
-	ans_skb = netlbl_netlink_alloc_skb(0,
-					   GENL_HDRLEN + NETLBL_LEN_U32,
-					   GFP_KERNEL);
+	ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (ans_skb == NULL)
 		goto list_failure;
-
-	if (netlbl_netlink_hdr_put(ans_skb,
-				   info->snd_pid,
-				   0,
-				   netlbl_unlabel_gnl_family.id,
-				   NLBL_UNLABEL_C_LIST) == NULL)
+	data = netlbl_netlink_hdr_put(ans_skb,
+				      info->snd_pid,
+				      info->snd_seq,
+				      netlbl_unlabel_gnl_family.id,
+				      0,
+				      NLBL_UNLABEL_C_LIST);
+	if (data == NULL) {
+		ret_val = -ENOMEM;
 		goto list_failure;
+	}
 
-	ret_val = nla_put_u32(ans_skb,
-			      NLA_U32,
-			      atomic_read(&netlabel_unlabel_accept_flg));
+	ret_val = nla_put_u8(ans_skb,
+			     NLBL_UNLABEL_A_ACPTFLG,
+			     atomic_read(&netlabel_unlabel_accept_flg));
 	if (ret_val != 0)
 		goto list_failure;
 
-	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	genlmsg_end(ans_skb, data);
+
+	ret_val = genlmsg_unicast(ans_skb, info->snd_pid);
 	if (ret_val != 0)
 		goto list_failure;
-
 	return 0;
 
 list_failure:
-	netlbl_netlink_send_ack(info,
-				netlbl_unlabel_gnl_family.id,
-				NLBL_UNLABEL_C_ACK,
-				-ret_val);
+	kfree(ans_skb);
 	return ret_val;
 }
 
@@ -157,7 +148,8 @@ list_failure:
 
 static struct genl_ops netlbl_unlabel_genl_c_accept = {
 	.cmd = NLBL_UNLABEL_C_ACCEPT,
-	.flags = 0,
+	.flags = GENL_ADMIN_PERM,
+	.policy = netlbl_unlabel_genl_policy,
 	.doit = netlbl_unlabel_accept,
 	.dumpit = NULL,
 };
@@ -165,6 +157,7 @@ static struct genl_ops netlbl_unlabel_genl_c_accept = {
 static struct genl_ops netlbl_unlabel_genl_c_list = {
 	.cmd = NLBL_UNLABEL_C_LIST,
 	.flags = 0,
+	.policy = netlbl_unlabel_genl_policy,
 	.doit = netlbl_unlabel_list,
 	.dumpit = NULL,
 };
@@ -218,10 +211,8 @@ int netlbl_unlabel_genl_init(void)
  */
 int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr)
 {
-	if (atomic_read(&netlabel_unlabel_accept_flg) == 1) {
-		memset(secattr, 0, sizeof(*secattr));
-		return 0;
-	}
+	if (atomic_read(&netlabel_unlabel_accept_flg) == 1)
+		return netlbl_secattr_init(secattr);
 
 	return -ENOMSG;
 }
diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h
index f300e54e14b6..c2917fbb42cf 100644
--- a/net/netlabel/netlabel_unlabeled.h
+++ b/net/netlabel/netlabel_unlabeled.h
@@ -36,56 +36,47 @@
 /*
  * The following NetLabel payloads are supported by the Unlabeled subsystem.
  *
- * o ACK:
- *   Sent by the kernel in response to an applications message, applications
- *   should never send this message.
- *
- *   +----------------------+-----------------------+
- *   | seq number (32 bits) | return code (32 bits) |
- *   +----------------------+-----------------------+
- *
- *     seq number:  the sequence number of the original message, taken from the
- *                  nlmsghdr structure
- *     return code: return value, based on errno values
- *
  * o ACCEPT
  *   This message is sent from an application to specify if the kernel should
  *   allow unlabled packets to pass if they do not match any of the static
  *   mappings defined in the unlabeled module.
  *
- *   +-----------------+
- *   | allow (32 bits) |
- *   +-----------------+
+ *   Required attributes:
  *
- *     allow: if true (1) then allow the packets to pass, if false (0) then
- *            reject the packets
+ *     NLBL_UNLABEL_A_ACPTFLG
  *
  * o LIST
  *   This message can be sent either from an application or by the kernel in
  *   response to an application generated LIST message.  When sent by an
  *   application there is no payload.  The kernel should respond to a LIST
- *   message either with a LIST message on success or an ACK message on
- *   failure.
+ *   message with a LIST message on success.
  *
- *   +-----------------------+
- *   | accept flag (32 bits) |
- *   +-----------------------+
+ *   Required attributes:
  *
- *     accept flag: if true (1) then unlabeled packets are allowed to pass,
- *                  if false (0) then unlabeled packets are rejected
+ *     NLBL_UNLABEL_A_ACPTFLG
  *
  */
 
 /* NetLabel Unlabeled commands */
 enum {
 	NLBL_UNLABEL_C_UNSPEC,
-	NLBL_UNLABEL_C_ACK,
 	NLBL_UNLABEL_C_ACCEPT,
 	NLBL_UNLABEL_C_LIST,
 	__NLBL_UNLABEL_C_MAX,
 };
 #define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1)
 
+/* NetLabel Unlabeled attributes */
+enum {
+	NLBL_UNLABEL_A_UNSPEC,
+	NLBL_UNLABEL_A_ACPTFLG,
+	/* (NLA_U8)
+	 * if true then unlabeled packets are allowed to pass, else unlabeled
+	 * packets are rejected */
+	__NLBL_UNLABEL_A_MAX,
+};
+#define NLBL_UNLABEL_A_MAX (__NLBL_UNLABEL_A_MAX - 1)
+
 /* NetLabel protocol functions */
 int netlbl_unlabel_genl_init(void);
 
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 73cbe66e42ff..eeb7d768d2bb 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -74,85 +74,3 @@ int netlbl_netlink_init(void)
 
 	return 0;
 }
-
-/*
- * NetLabel Common Protocol Functions
- */
-
-/**
- * netlbl_netlink_send_ack - Send an ACK message
- * @info: the generic NETLINK information
- * @genl_family: the generic NETLINK family ID value
- * @ack_cmd: the generic NETLINK family ACK command value
- * @ret_code: return code to use
- *
- * Description:
- * This function sends an ACK message to the sender of the NETLINK message
- * specified by @info.
- *
- */
-void netlbl_netlink_send_ack(const struct genl_info *info,
-			     u32 genl_family,
-			     u8 ack_cmd,
-			     u32 ret_code)
-{
-	size_t data_size;
-	struct sk_buff *skb;
-
-	data_size = GENL_HDRLEN + 2 * NETLBL_LEN_U32;
-	skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL);
-	if (skb == NULL)
-		return;
-
-	if (netlbl_netlink_hdr_put(skb,
-				   info->snd_pid,
-				   0,
-				   genl_family,
-				   ack_cmd) == NULL)
-		goto send_ack_failure;
-
-	if (nla_put_u32(skb, NLA_U32, info->snd_seq) != 0)
-		goto send_ack_failure;
-	if (nla_put_u32(skb, NLA_U32, ret_code) != 0)
-		goto send_ack_failure;
-
-	netlbl_netlink_snd(skb, info->snd_pid);
-	return;
-
-send_ack_failure:
-	kfree_skb(skb);
-}
-
-/*
- * NETLINK I/O Functions
- */
-
-/**
- * netlbl_netlink_snd - Send a NetLabel message
- * @skb: NetLabel message
- * @pid: destination PID
- *
- * Description:
- * Sends a unicast NetLabel message over the NETLINK socket.
- *
- */
-int netlbl_netlink_snd(struct sk_buff *skb, u32 pid)
-{
-	return genlmsg_unicast(skb, pid);
-}
-
-/**
- * netlbl_netlink_snd - Send a NetLabel message
- * @skb: NetLabel message
- * @pid: sending PID
- * @group: multicast group id
- *
- * Description:
- * Sends a multicast NetLabel message over the NETLINK socket to all members
- * of @group except @pid.
- *
- */
-int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group)
-{
-	return genlmsg_multicast(skb, pid, group, GFP_KERNEL);
-}
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index 385a6c7488c6..3f9386b917df 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -41,72 +41,6 @@
 /* NetLabel NETLINK helper functions */
 
 /**
- * netlbl_netlink_cap_check - Check the NETLINK msg capabilities
- * @skb: the NETLINK buffer
- * @req_cap: the required capability
- *
- * Description:
- * Check the NETLINK buffer's capabilities against the required capabilities.
- * Returns zero on success, negative values on failure.
- *
- */
-static inline int netlbl_netlink_cap_check(const struct sk_buff *skb,
-					   kernel_cap_t req_cap)
-{
-	if (cap_raised(NETLINK_CB(skb).eff_cap, req_cap))
-		return 0;
-	return -EPERM;
-}
-
-/**
- * netlbl_getinc_u8 - Read a u8 value from a nlattr stream and move on
- * @nla: the attribute
- * @rem_len: remaining length
- *
- * Description:
- * Return a u8 value pointed to by @nla and advance it to the next attribute.
- *
- */
-static inline u8 netlbl_getinc_u8(struct nlattr **nla, int *rem_len)
-{
-	u8 val = nla_get_u8(*nla);
-	*nla = nla_next(*nla, rem_len);
-	return val;
-}
-
-/**
- * netlbl_getinc_u16 - Read a u16 value from a nlattr stream and move on
- * @nla: the attribute
- * @rem_len: remaining length
- *
- * Description:
- * Return a u16 value pointed to by @nla and advance it to the next attribute.
- *
- */
-static inline u16 netlbl_getinc_u16(struct nlattr **nla, int *rem_len)
-{
-	u16 val = nla_get_u16(*nla);
-	*nla = nla_next(*nla, rem_len);
-	return val;
-}
-
-/**
- * netlbl_getinc_u32 - Read a u32 value from a nlattr stream and move on
- * @nla: the attribute
- * @rem_len: remaining length
- *
- * Description:
- * Return a u32 value pointed to by @nla and advance it to the next attribute.
- *
- */
-static inline u32 netlbl_getinc_u32(struct nlattr **nla, int *rem_len)
-{
-	u32 val = nla_get_u32(*nla);
-	*nla = nla_next(*nla, rem_len);
-	return val;
-}
-
-/**
  * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff
  * @skb: the packet
  * @pid: the PID of the receipient
@@ -124,6 +58,7 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb,
 					   u32 pid,
 					   u32 seq,
 					   int type,
+					   int flags,
 					   u8 cmd)
 {
 	return genlmsg_put(skb,
@@ -131,85 +66,13 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb,
 			   seq,
 			   type,
 			   0,
-			   0,
+			   flags,
 			   cmd,
 			   NETLBL_PROTO_VERSION);
 }
 
-/**
- * netlbl_netlink_hdr_push - Write the NETLINK buffers into a sk_buff
- * @skb: the packet
- * @pid: the PID of the receipient
- * @seq: the sequence number
- * @type: the generic NETLINK message family type
- * @cmd: command
- *
- * Description:
- * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr
- * struct to the packet.
- *
- */
-static inline void netlbl_netlink_hdr_push(struct sk_buff *skb,
-					   u32 pid,
-					   u32 seq,
-					   int type,
-					   u8 cmd)
-
-{
-	struct nlmsghdr *nlh;
-	struct genlmsghdr *hdr;
-
-	nlh = (struct nlmsghdr *)skb_push(skb, NLMSG_SPACE(GENL_HDRLEN));
-	nlh->nlmsg_type = type;
-	nlh->nlmsg_len = skb->len;
-	nlh->nlmsg_flags = 0;
-	nlh->nlmsg_pid = pid;
-	nlh->nlmsg_seq = seq;
-
-	hdr = nlmsg_data(nlh);
-	hdr->cmd = cmd;
-	hdr->version = NETLBL_PROTO_VERSION;
-	hdr->reserved = 0;
-}
-
-/**
- * netlbl_netlink_payload_len - Return the length of the payload
- * @skb: the NETLINK buffer
- *
- * Description:
- * This function returns the length of the NetLabel payload.
- *
- */
-static inline u32 netlbl_netlink_payload_len(const struct sk_buff *skb)
-{
-	return nlmsg_len((struct nlmsghdr *)skb->data) - GENL_HDRLEN;
-}
-
-/**
- * netlbl_netlink_payload_data - Returns a pointer to the start of the payload
- * @skb: the NETLINK buffer
- *
- * Description:
- * This function returns a pointer to the start of the NetLabel payload.
- *
- */
-static inline void *netlbl_netlink_payload_data(const struct sk_buff *skb)
-{
-  return (unsigned char *)nlmsg_data((struct nlmsghdr *)skb->data) +
-	  GENL_HDRLEN;
-}
-
-/* NetLabel common protocol functions */
-
-void netlbl_netlink_send_ack(const struct genl_info *info,
-			     u32 genl_family,
-			     u8 ack_cmd,
-			     u32 ret_code);
-
 /* NetLabel NETLINK I/O functions */
 
 int netlbl_netlink_init(void);
-int netlbl_netlink_snd(struct sk_buff *skb, u32 pid);
-int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group);
 
 #endif
diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig
index 814ddc42f1f4..293dbd6246c1 100644
--- a/security/selinux/Kconfig
+++ b/security/selinux/Kconfig
@@ -124,3 +124,40 @@ config SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT
 
 	  If you are unsure what do do here, select N.
 
+config SECURITY_SELINUX_POLICYDB_VERSION_MAX
+	bool "NSA SELinux maximum supported policy format version"
+	depends on SECURITY_SELINUX
+	default n
+	help
+	  This option enables the maximum policy format version supported
+	  by SELinux to be set to a particular value.  This value is reported
+	  to userspace via /selinux/policyvers and used at policy load time.
+	  It can be adjusted downward to support legacy userland (init) that
+	  does not correctly handle kernels that support newer policy versions.
+
+	  Examples:
+	  For the Fedora Core 3 or 4 Linux distributions, enable this option
+	  and set the value via the next option. For Fedore Core 5 and later,
+	  do not enable this option.
+
+	  If you are unsure how to answer this question, answer N.
+
+config SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
+	int "NSA SELinux maximum supported policy format version value"
+	depends on SECURITY_SELINUX_POLICYDB_VERSION_MAX
+	range 15 21
+	default 19
+	help
+	  This option sets the value for the maximum policy format version
+	  supported by SELinux.
+
+	  Examples:
+	  For Fedora Core 3, use 18.
+	  For Fedora Core 4, use 19.
+
+	  If you are unsure how to answer this question, look for the
+	  policy format version supported by your policy toolchain, by
+	  running 'checkpolicy -V'. Or look at what policy you have
+	  installed under /etc/selinux/$SELINUXTYPE/policy, where
+	  SELINUXTYPE is defined in your /etc/selinux/config.
+
diff --git a/security/selinux/exports.c b/security/selinux/exports.c
index 9d7737db5e51..b6f96943be1f 100644
--- a/security/selinux/exports.c
+++ b/security/selinux/exports.c
@@ -21,19 +21,10 @@
 #include "security.h"
 #include "objsec.h"
 
-void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid)
+int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen)
 {
-	struct task_security_struct *tsec = tsk->security;
 	if (selinux_enabled)
-		*ctxid = tsec->sid;
-	else
-		*ctxid = 0;
-}
-
-int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
-{
-	if (selinux_enabled)
-		return security_sid_to_context(ctxid, ctx, ctxlen);
+		return security_sid_to_context(sid, ctx, ctxlen);
 	else {
 		*ctx = NULL;
 		*ctxlen = 0;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5a66c4c09f7a..e4d81a42fca4 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -51,7 +51,6 @@
 #include <net/ip.h>		/* for sysctl_local_port_range[] */
 #include <net/tcp.h>		/* struct or_callable used in sock_rcv_skb */
 #include <asm/uaccess.h>
-#include <asm/semaphore.h>
 #include <asm/ioctls.h>
 #include <linux/bitops.h>
 #include <linux/interrupt.h>
@@ -71,6 +70,7 @@
 #include <linux/audit.h>
 #include <linux/string.h>
 #include <linux/selinux.h>
+#include <linux/mutex.h>
 
 #include "avc.h"
 #include "objsec.h"
@@ -185,7 +185,7 @@ static int inode_alloc_security(struct inode *inode)
 		return -ENOMEM;
 
 	memset(isec, 0, sizeof(*isec));
-	init_MUTEX(&isec->sem);
+	mutex_init(&isec->lock);
 	INIT_LIST_HEAD(&isec->list);
 	isec->inode = inode;
 	isec->sid = SECINITSID_UNLABELED;
@@ -242,7 +242,7 @@ static int superblock_alloc_security(struct super_block *sb)
 	if (!sbsec)
 		return -ENOMEM;
 
-	init_MUTEX(&sbsec->sem);
+	mutex_init(&sbsec->lock);
 	INIT_LIST_HEAD(&sbsec->list);
 	INIT_LIST_HEAD(&sbsec->isec_head);
 	spin_lock_init(&sbsec->isec_lock);
@@ -594,7 +594,7 @@ static int superblock_doinit(struct super_block *sb, void *data)
 	struct inode *inode = root->d_inode;
 	int rc = 0;
 
-	down(&sbsec->sem);
+	mutex_lock(&sbsec->lock);
 	if (sbsec->initialized)
 		goto out;
 
@@ -689,7 +689,7 @@ next_inode:
 	}
 	spin_unlock(&sbsec->isec_lock);
 out:
-	up(&sbsec->sem);
+	mutex_unlock(&sbsec->lock);
 	return rc;
 }
 
@@ -843,15 +843,13 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
 	char *context = NULL;
 	unsigned len = 0;
 	int rc = 0;
-	int hold_sem = 0;
 
 	if (isec->initialized)
 		goto out;
 
-	down(&isec->sem);
-	hold_sem = 1;
+	mutex_lock(&isec->lock);
 	if (isec->initialized)
-		goto out;
+		goto out_unlock;
 
 	sbsec = inode->i_sb->s_security;
 	if (!sbsec->initialized) {
@@ -862,7 +860,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
 		if (list_empty(&isec->list))
 			list_add(&isec->list, &sbsec->isec_head);
 		spin_unlock(&sbsec->isec_lock);
-		goto out;
+		goto out_unlock;
 	}
 
 	switch (sbsec->behavior) {
@@ -885,7 +883,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
 			printk(KERN_WARNING "%s:  no dentry for dev=%s "
 			       "ino=%ld\n", __FUNCTION__, inode->i_sb->s_id,
 			       inode->i_ino);
-			goto out;
+			goto out_unlock;
 		}
 
 		len = INITCONTEXTLEN;
@@ -893,7 +891,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
 		if (!context) {
 			rc = -ENOMEM;
 			dput(dentry);
-			goto out;
+			goto out_unlock;
 		}
 		rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX,
 					   context, len);
@@ -903,7 +901,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
 						   NULL, 0);
 			if (rc < 0) {
 				dput(dentry);
-				goto out;
+				goto out_unlock;
 			}
 			kfree(context);
 			len = rc;
@@ -911,7 +909,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
 			if (!context) {
 				rc = -ENOMEM;
 				dput(dentry);
-				goto out;
+				goto out_unlock;
 			}
 			rc = inode->i_op->getxattr(dentry,
 						   XATTR_NAME_SELINUX,
@@ -924,7 +922,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
 				       "%d for dev=%s ino=%ld\n", __FUNCTION__,
 				       -rc, inode->i_sb->s_id, inode->i_ino);
 				kfree(context);
-				goto out;
+				goto out_unlock;
 			}
 			/* Map ENODATA to the default file SID */
 			sid = sbsec->def_sid;
@@ -960,7 +958,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
 					     isec->sclass,
 					     &sid);
 		if (rc)
-			goto out;
+			goto out_unlock;
 		isec->sid = sid;
 		break;
 	case SECURITY_FS_USE_MNTPOINT:
@@ -978,7 +976,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
 							  isec->sclass,
 							  &sid);
 				if (rc)
-					goto out;
+					goto out_unlock;
 				isec->sid = sid;
 			}
 		}
@@ -987,12 +985,11 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
 
 	isec->initialized = 1;
 
+out_unlock:
+	mutex_unlock(&isec->lock);
 out:
 	if (isec->sclass == SECCLASS_FILE)
 		isec->sclass = inode_mode_to_security_class(inode->i_mode);
-
-	if (hold_sem)
-		up(&isec->sem);
 	return rc;
 }
 
@@ -1364,25 +1361,6 @@ static inline u32 file_to_av(struct file *file)
 	return av;
 }
 
-/* Set an inode's SID to a specified value. */
-static int inode_security_set_sid(struct inode *inode, u32 sid)
-{
-	struct inode_security_struct *isec = inode->i_security;
-	struct superblock_security_struct *sbsec = inode->i_sb->s_security;
-
-	if (!sbsec->initialized) {
-		/* Defer initialization to selinux_complete_init. */
-		return 0;
-	}
-
-	down(&isec->sem);
-	isec->sclass = inode_mode_to_security_class(inode->i_mode);
-	isec->sid = sid;
-	isec->initialized = 1;
-	up(&isec->sem);
-	return 0;
-}
-
 /* Hook functions begin here. */
 
 static int selinux_ptrace(struct task_struct *parent, struct task_struct *child)
@@ -1711,10 +1689,12 @@ static inline void flush_unauthorized_files(struct files_struct * files)
 {
 	struct avc_audit_data ad;
 	struct file *file, *devnull = NULL;
-	struct tty_struct *tty = current->signal->tty;
+	struct tty_struct *tty;
 	struct fdtable *fdt;
 	long j = -1;
 
+	mutex_lock(&tty_mutex);
+	tty = current->signal->tty;
 	if (tty) {
 		file_list_lock();
 		file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list);
@@ -1734,6 +1714,7 @@ static inline void flush_unauthorized_files(struct files_struct * files)
 		}
 		file_list_unlock();
 	}
+	mutex_unlock(&tty_mutex);
 
 	/* Revalidate access to inherited open files. */
 
@@ -2091,7 +2072,13 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
 		}
 	}
 
-	inode_security_set_sid(inode, newsid);
+	/* Possibly defer initialization to selinux_complete_init. */
+	if (sbsec->initialized) {
+		struct inode_security_struct *isec = inode->i_security;
+		isec->sclass = inode_mode_to_security_class(inode->i_mode);
+		isec->sid = newsid;
+		isec->initialized = 1;
+	}
 
 	if (!ss_initialized || sbsec->behavior == SECURITY_FS_USE_MNTPOINT)
 		return -EOPNOTSUPP;
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 0a39bfd1319f..ef2267fea8bd 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -44,7 +44,7 @@ struct inode_security_struct {
 	u32 sid;             /* SID of this object */
 	u16 sclass;       /* security class of this object */
 	unsigned char initialized;     /* initialization flag */
-	struct semaphore sem;
+	struct mutex lock;
 	unsigned char inherit;         /* inherit SID from parent entry */
 };
 
@@ -63,7 +63,7 @@ struct superblock_security_struct {
 	unsigned int behavior;          /* labeling behavior */
 	unsigned char initialized;      /* initialization flag */
 	unsigned char proc;             /* proc fs */
-	struct semaphore sem;
+	struct mutex lock;
 	struct list_head isec_head;
 	spinlock_t isec_lock;
 };
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 911954a692fa..1ef79172cc8c 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -24,10 +24,15 @@
 #define POLICYDB_VERSION_VALIDATETRANS	19
 #define POLICYDB_VERSION_MLS		19
 #define POLICYDB_VERSION_AVTAB		20
+#define POLICYDB_VERSION_RANGETRANS	21
 
 /* Range of policy versions we understand*/
 #define POLICYDB_VERSION_MIN   POLICYDB_VERSION_BASE
-#define POLICYDB_VERSION_MAX   POLICYDB_VERSION_AVTAB
+#ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX
+#define POLICYDB_VERSION_MAX	CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
+#else
+#define POLICYDB_VERSION_MAX	POLICYDB_VERSION_RANGETRANS
+#endif
 
 extern int selinux_enabled;
 extern int selinux_mls_enabled;
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index 119bd6078ba1..c713af23250a 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -530,22 +530,21 @@ int mls_compute_sid(struct context *scontext,
 		    u32 specified,
 		    struct context *newcontext)
 {
+	struct range_trans *rtr;
+
 	if (!selinux_mls_enabled)
 		return 0;
 
 	switch (specified) {
 	case AVTAB_TRANSITION:
-		if (tclass == SECCLASS_PROCESS) {
-			struct range_trans *rangetr;
-			/* Look for a range transition rule. */
-			for (rangetr = policydb.range_tr; rangetr;
-			     rangetr = rangetr->next) {
-				if (rangetr->dom == scontext->type &&
-				    rangetr->type == tcontext->type) {
-					/* Set the range from the rule */
-					return mls_range_set(newcontext,
-					                     &rangetr->range);
-				}
+		/* Look for a range transition rule. */
+		for (rtr = policydb.range_tr; rtr; rtr = rtr->next) {
+			if (rtr->source_type == scontext->type &&
+			    rtr->target_type == tcontext->type &&
+			    rtr->target_class == tclass) {
+				/* Set the range from the rule */
+				return mls_range_set(newcontext,
+				                     &rtr->target_range);
 			}
 		}
 		/* Fallthrough */
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index f03960e697ce..b18895302555 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -96,6 +96,11 @@ static struct policydb_compat_info policydb_compat[] = {
 		.sym_num        = SYM_NUM,
 		.ocon_num       = OCON_NUM,
 	},
+	{
+		.version        = POLICYDB_VERSION_RANGETRANS,
+		.sym_num        = SYM_NUM,
+		.ocon_num       = OCON_NUM,
+	},
 };
 
 static struct policydb_compat_info *policydb_lookup_compat(int version)
@@ -645,15 +650,15 @@ void policydb_destroy(struct policydb *p)
 
 	for (rt = p->range_tr; rt; rt = rt -> next) {
 		if (lrt) {
-			ebitmap_destroy(&lrt->range.level[0].cat);
-			ebitmap_destroy(&lrt->range.level[1].cat);
+			ebitmap_destroy(&lrt->target_range.level[0].cat);
+			ebitmap_destroy(&lrt->target_range.level[1].cat);
 			kfree(lrt);
 		}
 		lrt = rt;
 	}
 	if (lrt) {
-		ebitmap_destroy(&lrt->range.level[0].cat);
-		ebitmap_destroy(&lrt->range.level[1].cat);
+		ebitmap_destroy(&lrt->target_range.level[0].cat);
+		ebitmap_destroy(&lrt->target_range.level[1].cat);
 		kfree(lrt);
 	}
 
@@ -1829,6 +1834,7 @@ int policydb_read(struct policydb *p, void *fp)
 	}
 
 	if (p->policyvers >= POLICYDB_VERSION_MLS) {
+		int new_rangetr = p->policyvers >= POLICYDB_VERSION_RANGETRANS;
 		rc = next_entry(buf, fp, sizeof(u32));
 		if (rc < 0)
 			goto bad;
@@ -1847,9 +1853,16 @@ int policydb_read(struct policydb *p, void *fp)
 			rc = next_entry(buf, fp, (sizeof(u32) * 2));
 			if (rc < 0)
 				goto bad;
-			rt->dom = le32_to_cpu(buf[0]);
-			rt->type = le32_to_cpu(buf[1]);
-			rc = mls_read_range_helper(&rt->range, fp);
+			rt->source_type = le32_to_cpu(buf[0]);
+			rt->target_type = le32_to_cpu(buf[1]);
+			if (new_rangetr) {
+				rc = next_entry(buf, fp, sizeof(u32));
+				if (rc < 0)
+					goto bad;
+				rt->target_class = le32_to_cpu(buf[0]);
+			} else
+				rt->target_class = SECCLASS_PROCESS;
+			rc = mls_read_range_helper(&rt->target_range, fp);
 			if (rc)
 				goto bad;
 			lrt = rt;
diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h
index b1340711f721..8319d5ff5944 100644
--- a/security/selinux/ss/policydb.h
+++ b/security/selinux/ss/policydb.h
@@ -106,9 +106,10 @@ struct cat_datum {
 };
 
 struct range_trans {
-	u32 dom;			/* current process domain */
-	u32 type;			/* program executable type */
-	struct mls_range range;		/* new range */
+	u32 source_type;
+	u32 target_type;
+	u32 target_class;
+	struct mls_range target_range;
 	struct range_trans *next;
 };
 
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 7eb69a602d8f..0c219a1b3243 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2003,7 +2003,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr,
 	return rc;
 }
 
-int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
                              struct selinux_audit_rule *rule,
                              struct audit_context *actx)
 {
@@ -2026,11 +2026,11 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
 		goto out;
 	}
 
-	ctxt = sidtab_search(&sidtab, ctxid);
+	ctxt = sidtab_search(&sidtab, sid);
 	if (!ctxt) {
 		audit_log(actx, GFP_ATOMIC, AUDIT_SELINUX_ERR,
 		          "selinux_audit_rule_match: unrecognized SID %d\n",
-		          ctxid);
+		          sid);
 		match = -ENOENT;
 		goto out;
 	}
@@ -2502,14 +2502,24 @@ void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock)
 {
 	struct inode_security_struct *isec = SOCK_INODE(sock)->i_security;
 	struct sk_security_struct *sksec = sk->sk_security;
+	struct netlbl_lsm_secattr secattr;
+	u32 nlbl_peer_sid;
 
 	sksec->sclass = isec->sclass;
 
 	if (sk->sk_family != PF_INET)
 		return;
 
+	netlbl_secattr_init(&secattr);
+	if (netlbl_sock_getattr(sk, &secattr) == 0 &&
+	    selinux_netlbl_secattr_to_sid(NULL,
+					  &secattr,
+					  sksec->sid,
+					  &nlbl_peer_sid) == 0)
+		sksec->peer_sid = nlbl_peer_sid;
+	netlbl_secattr_destroy(&secattr, 0);
+
 	sksec->nlbl_state = NLBL_REQUIRE;
-	sksec->peer_sid = sksec->sid;
 
 	/* Try to set the NetLabel on the socket to save time later, if we fail
 	 * here we will pick up the pieces in later calls to
@@ -2568,7 +2578,7 @@ int selinux_netlbl_inode_permission(struct inode *inode, int mask)
 	sock = SOCKET_I(inode);
 	isec = inode->i_security;
 	sksec = sock->sk->sk_security;
-	down(&isec->sem);
+	mutex_lock(&isec->lock);
 	if (unlikely(sksec->nlbl_state == NLBL_REQUIRE &&
 		     (mask & (MAY_WRITE | MAY_APPEND)))) {
 		lock_sock(sock->sk);
@@ -2576,7 +2586,7 @@ int selinux_netlbl_inode_permission(struct inode *inode, int mask)
 		release_sock(sock->sk);
 	} else
 		rc = 0;
-	up(&isec->sem);
+	mutex_unlock(&isec->lock);
 
 	return rc;
 }
@@ -2601,7 +2611,7 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
 	u32 netlbl_sid;
 	u32 recv_perm;
 
-	rc = selinux_netlbl_skbuff_getsid(skb, sksec->sid, &netlbl_sid);
+	rc = selinux_netlbl_skbuff_getsid(skb, SECINITSID_NETMSG, &netlbl_sid);
 	if (rc != 0)
 		return rc;
 
@@ -2610,13 +2620,13 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
 
 	switch (sksec->sclass) {
 	case SECCLASS_UDP_SOCKET:
-		recv_perm = UDP_SOCKET__RECV_MSG;
+		recv_perm = UDP_SOCKET__RECVFROM;
 		break;
 	case SECCLASS_TCP_SOCKET:
-		recv_perm = TCP_SOCKET__RECV_MSG;
+		recv_perm = TCP_SOCKET__RECVFROM;
 		break;
 	default:
-		recv_perm = RAWIP_SOCKET__RECV_MSG;
+		recv_perm = RAWIP_SOCKET__RECVFROM;
 	}
 
 	rc = avc_has_perm(sksec->sid,
diff --git a/sound/oss/au1550_ac97.c b/sound/oss/au1550_ac97.c
index 4cdb86252d67..219795171c71 100644
--- a/sound/oss/au1550_ac97.c
+++ b/sound/oss/au1550_ac97.c
@@ -719,8 +719,7 @@ prog_dmabuf_dac(struct au1550_state *s)
 }
 
 
-static void
-dac_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static void dac_dma_interrupt(int irq, void *dev_id)
 {
 	struct au1550_state *s = (struct au1550_state *) dev_id;
 	struct dmabuf  *db = &s->dma_dac;
@@ -754,8 +753,7 @@ dac_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 }
 
 
-static void
-adc_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static void adc_dma_interrupt(int irq, void *dev_id)
 {
 	struct	au1550_state *s = (struct au1550_state *)dev_id;
 	struct	dmabuf  *dp = &s->dma_adc;
diff --git a/sound/sparc/amd7930.c b/sound/sparc/amd7930.c
index 2bd8e40b8541..be0bd503f013 100644
--- a/sound/sparc/amd7930.c
+++ b/sound/sparc/amd7930.c
@@ -755,7 +755,7 @@ static struct snd_pcm_ops snd_amd7930_capture_ops = {
 	.pointer	=	snd_amd7930_capture_pointer,
 };
 
-static int __init snd_amd7930_pcm(struct snd_amd7930 *amd)
+static int __devinit snd_amd7930_pcm(struct snd_amd7930 *amd)
 {
 	struct snd_pcm *pcm;
 	int err;
@@ -870,7 +870,7 @@ static int snd_amd7930_put_volume(struct snd_kcontrol *kctl, struct snd_ctl_elem
 	return change;
 }
 
-static struct snd_kcontrol_new amd7930_controls[] __initdata = {
+static struct snd_kcontrol_new amd7930_controls[] __devinitdata = {
 	{
 		.iface		=	SNDRV_CTL_ELEM_IFACE_MIXER,
 		.name		=	"Monitor Volume",
@@ -900,7 +900,7 @@ static struct snd_kcontrol_new amd7930_controls[] __initdata = {
 	},
 };
 
-static int __init snd_amd7930_mixer(struct snd_amd7930 *amd)
+static int __devinit snd_amd7930_mixer(struct snd_amd7930 *amd)
 {
 	struct snd_card *card;
 	int idx, err;
@@ -945,11 +945,11 @@ static struct snd_device_ops snd_amd7930_dev_ops = {
 	.dev_free	=	snd_amd7930_dev_free,
 };
 
-static int __init snd_amd7930_create(struct snd_card *card,
-				     struct resource *rp,
-				     unsigned int reg_size,
-				     int irq, int dev,
-				     struct snd_amd7930 **ramd)
+static int __devinit snd_amd7930_create(struct snd_card *card,
+					struct resource *rp,
+					unsigned int reg_size,
+					int irq, int dev,
+					struct snd_amd7930 **ramd)
 {
 	unsigned long flags;
 	struct snd_amd7930 *amd;
@@ -1013,7 +1013,7 @@ static int __init snd_amd7930_create(struct snd_card *card,
 	return 0;
 }
 
-static int __init amd7930_attach_common(struct resource *rp, int irq)
+static int __devinit amd7930_attach_common(struct resource *rp, int irq)
 {
 	static int dev_num;
 	struct snd_card *card;
@@ -1065,7 +1065,7 @@ out_err:
 	return err;
 }
 
-static int __init amd7930_obio_attach(struct device_node *dp)
+static int __devinit amd7930_obio_attach(struct device_node *dp)
 {
 	struct linux_prom_registers *regs;
 	struct linux_prom_irqs *irqp;